icuSources/test/cintltst/nucnvtst.c

   1 /********************************************************************
   2  * COPYRIGHT:
   3  * Copyright (c) 1997-2010, International Business Machines Corporation and
   4  * others. All Rights Reserved.
   5  ********************************************************************/
   6 /*******************************************************************************
   7 *
   8 * File CCONVTST.C
   9 *
  10 * Modification History:
  11 *        Name                     Description
  12 *    Steven R. Loomis     7/8/1999      Adding input buffer test
  13 ********************************************************************************
  14 */
  15 #include <stdio.h>
  16 #include "cstring.h"
  17 #include "unicode/uloc.h"
  18 #include "unicode/ucnv.h"
  19 #include "unicode/ucnv_err.h"
  20 #include "unicode/ucnv_cb.h"
  21 #include "cintltst.h"
  22 #include "unicode/utypes.h"
  23 #include "unicode/ustring.h"
  24 #include "unicode/ucol.h"
  25 #include "cmemory.h"
  26 #include "nucnvtst.h"
  27
  28 static void TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message);
  29 static void TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message);
  30 #if !UCONFIG_NO_COLLATION
  31 static void TestJitterbug981(void);
  32 #endif
  33 static void TestJitterbug1293(void);
  34 static void TestNewConvertWithBufferSizes(int32_t osize, int32_t isize) ;
  35 static void TestConverterTypesAndStarters(void);
  36 static void TestAmbiguous(void);
  37 static void TestSignatureDetection(void);
  38 static void TestUTF7(void);
  39 static void TestIMAP(void);
  40 static void TestUTF8(void);
  41 static void TestCESU8(void);
  42 static void TestUTF16(void);
  43 static void TestUTF16BE(void);
  44 static void TestUTF16LE(void);
  45 static void TestUTF32(void);
  46 static void TestUTF32BE(void);
  47 static void TestUTF32LE(void);
  48 static void TestLATIN1(void);
  49
  50 #if !UCONFIG_NO_LEGACY_CONVERSION
  51 static void TestSBCS(void);
  52 static void TestDBCS(void);
  53 static void TestMBCS(void);
  54 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
  55 static void TestICCRunout(void);
  56 #endif
  57
  58 #ifdef U_ENABLE_GENERIC_ISO_2022
  59 static void TestISO_2022(void);
  60 #endif
  61
  62 static void TestISO_2022_JP(void);
  63 static void TestISO_2022_JP_1(void);
  64 static void TestISO_2022_JP_2(void);
  65 static void TestISO_2022_KR(void);
  66 static void TestISO_2022_KR_1(void);
  67 static void TestISO_2022_CN(void);
  68 #if 0
  69    /*
  70     * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
  71     */
  72 static void TestISO_2022_CN_EXT(void);
  73 #endif
  74 static void TestJIS(void);
  75 static void TestHZ(void);
  76 #endif
  77
  78 static void TestSCSU(void);
  79
  80 #if !UCONFIG_NO_LEGACY_CONVERSION
  81 static void TestEBCDIC_STATEFUL(void);
  82 static void TestGB18030(void);
  83 static void TestLMBCS(void);
  84 static void TestJitterbug255(void);
  85 static void TestEBCDICUS4XML(void);
  86 #if 0
  87    /*
  88     * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
  89     */
  90 static void TestJitterbug915(void);
  91 #endif
  92 static void TestISCII(void);
  93
  94 static void TestCoverageMBCS(void);
  95 static void TestJitterbug2346(void);
  96 static void TestJitterbug2411(void);
  97 static void TestJB5275(void);
  98 static void TestJB5275_1(void);
  99 static void TestJitterbug6175(void);
 100 #endif
 101
 102 static void TestInBufSizes(void);
 103
 104 static void TestRoundTrippingAllUTF(void);
 105 static void TestConv(const uint16_t in[],
 106                      int len,
 107                      const char* conv,
 108                      const char* lang,
 109                      char byteArr[],
 110                      int byteArrLen);
 111
 112 /* open a converter, using test data if it begins with '@' */
 113 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err);
 114
 115
 116 #define NEW_MAX_BUFFER 999
 117
 118 static int32_t  gInBufferSize = NEW_MAX_BUFFER;
 119 static int32_t  gOutBufferSize = NEW_MAX_BUFFER;
 120 static char     gNuConvTestName[1024];
 121
 122 #define nct_min(x,y)  ((x<y) ? x : y)
 123
 124 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err)
 125 {
 126   if(cnv && cnv[0] == '@') {
 127     return ucnv_openPackage(loadTestData(err), cnv+1, err);
 128   } else {
 129     return ucnv_open(cnv, err);
 130   }
 131 }
 132
 133 static void printSeq(const unsigned char* a, int len)
 134 {
 135     int i=0;
 136     log_verbose("{");
 137     while (i<len)
 138         log_verbose("0x%02x ", a[i++]);
 139     log_verbose("}\n");
 140 }
 141
 142 static void printUSeq(const UChar* a, int len)
 143 {
 144     int i=0;
 145     log_verbose("{U+");
 146     while (i<len) log_verbose("0x%04x ", a[i++]);
 147     log_verbose("}\n");
 148 }
 149
 150 static void printSeqErr(const unsigned char* a, int len)
 151 {
 152     int i=0;
 153     fprintf(stderr, "{");
 154     while (i<len)
 155         fprintf(stderr, "0x%02x ", a[i++]);
 156     fprintf(stderr, "}\n");
 157 }
 158
 159 static void printUSeqErr(const UChar* a, int len)
 160 {
 161     int i=0;
 162     fprintf(stderr, "{U+");
 163     while (i<len)
 164         fprintf(stderr, "0x%04x ", a[i++]);
 165     fprintf(stderr,"}\n");
 166 }
 167
 168 static void
 169 TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message)
 170 {
 171      const char* s0;
 172      const char* s=(char*)source;
 173      const int32_t *r=results;
 174      UErrorCode errorCode=U_ZERO_ERROR;
 175      UChar32 c;
 176
 177      while(s<limit) {
 178         s0=s;
 179         c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
 180         if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
 181             break; /* no more significant input */
 182         } else if(U_FAILURE(errorCode)) {
 183             log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
 184             break;
 185         } else if(
 186             /* test the expected number of input bytes only if >=0 */
 187             (*r>=0 && (int32_t)(s-s0)!=*r) ||
 188             c!=*(r+1)
 189         ) {
 190             log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
 191                 message, c, (s-s0), *(r+1), *r);
 192             break;
 193         }
 194         r+=2;
 195     }
 196 }
 197
 198 static void
 199 TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message)
 200 {
 201      const char* s=(char*)source;
 202      UErrorCode errorCode=U_ZERO_ERROR;
 203      uint32_t c;
 204      c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
 205      if(errorCode != expected){
 206         log_err("FAIL: Expected:%s when %s-----Got:%s\n", myErrorName(expected), message, myErrorName(errorCode));
 207      }
 208      if(c != 0xFFFD && c != 0xffff){
 209         log_err("FAIL: Expected return value of 0xfffd or 0xffff when %s-----Got 0x%lx\n", message, c);
 210      }
 211
 212 }
 213
 214 static void TestInBufSizes(void)
 215 {
 216   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,1);
 217 #if 1
 218   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,2);
 219   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,3);
 220   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,4);
 221   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,5);
 222   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,6);
 223   TestNewConvertWithBufferSizes(1,1);
 224   TestNewConvertWithBufferSizes(2,3);
 225   TestNewConvertWithBufferSizes(3,2);
 226 #endif
 227 }
 228
 229 static void TestOutBufSizes(void)
 230 {
 231 #if 1
 232   TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,NEW_MAX_BUFFER);
 233   TestNewConvertWithBufferSizes(1,NEW_MAX_BUFFER);
 234   TestNewConvertWithBufferSizes(2,NEW_MAX_BUFFER);
 235   TestNewConvertWithBufferSizes(3,NEW_MAX_BUFFER);
 236   TestNewConvertWithBufferSizes(4,NEW_MAX_BUFFER);
 237   TestNewConvertWithBufferSizes(5,NEW_MAX_BUFFER);
 238
 239 #endif
 240 }
 241
 242
 243 void addTestNewConvert(TestNode** root)
 244 {
 245 #if !UCONFIG_NO_FILE_IO
 246    addTest(root, &TestInBufSizes, "tsconv/nucnvtst/TestInBufSizes");
 247    addTest(root, &TestOutBufSizes, "tsconv/nucnvtst/TestOutBufSizes");
 248 #endif
 249    addTest(root, &TestConverterTypesAndStarters, "tsconv/nucnvtst/TestConverterTypesAndStarters");
 250    addTest(root, &TestAmbiguous, "tsconv/nucnvtst/TestAmbiguous");
 251    addTest(root, &TestSignatureDetection, "tsconv/nucnvtst/TestSignatureDetection");
 252    addTest(root, &TestUTF7, "tsconv/nucnvtst/TestUTF7");
 253    addTest(root, &TestIMAP, "tsconv/nucnvtst/TestIMAP");
 254    addTest(root, &TestUTF8, "tsconv/nucnvtst/TestUTF8");
 255
 256    /* test ucnv_getNextUChar() for charsets that encode single surrogates with complete byte sequences */
 257    addTest(root, &TestCESU8, "tsconv/nucnvtst/TestCESU8");
 258    addTest(root, &TestUTF16, "tsconv/nucnvtst/TestUTF16");
 259    addTest(root, &TestUTF16BE, "tsconv/nucnvtst/TestUTF16BE");
 260    addTest(root, &TestUTF16LE, "tsconv/nucnvtst/TestUTF16LE");
 261    addTest(root, &TestUTF32, "tsconv/nucnvtst/TestUTF32");
 262    addTest(root, &TestUTF32BE, "tsconv/nucnvtst/TestUTF32BE");
 263    addTest(root, &TestUTF32LE, "tsconv/nucnvtst/TestUTF32LE");
 264
 265 #if !UCONFIG_NO_LEGACY_CONVERSION
 266    addTest(root, &TestLMBCS, "tsconv/nucnvtst/TestLMBCS");
 267 #endif
 268
 269    addTest(root, &TestLATIN1, "tsconv/nucnvtst/TestLATIN1");
 270
 271 #if !UCONFIG_NO_LEGACY_CONVERSION
 272    addTest(root, &TestSBCS, "tsconv/nucnvtst/TestSBCS");
 273 #if !UCONFIG_NO_FILE_IO
 274    addTest(root, &TestDBCS, "tsconv/nucnvtst/TestDBCS");
 275    addTest(root, &TestICCRunout, "tsconv/nucnvtst/TestICCRunout");
 276 #endif
 277    addTest(root, &TestMBCS, "tsconv/nucnvtst/TestMBCS");
 278
 279 #ifdef U_ENABLE_GENERIC_ISO_2022
 280    addTest(root, &TestISO_2022, "tsconv/nucnvtst/TestISO_2022");
 281 #endif
 282
 283    addTest(root, &TestISO_2022_JP, "tsconv/nucnvtst/TestISO_2022_JP");
 284    addTest(root, &TestJIS, "tsconv/nucnvtst/TestJIS");
 285    addTest(root, &TestISO_2022_JP_1, "tsconv/nucnvtst/TestISO_2022_JP_1");
 286    addTest(root, &TestISO_2022_JP_2, "tsconv/nucnvtst/TestISO_2022_JP_2");
 287    addTest(root, &TestISO_2022_KR, "tsconv/nucnvtst/TestISO_2022_KR");
 288    addTest(root, &TestISO_2022_KR_1, "tsconv/nucnvtst/TestISO_2022_KR_1");
 289    addTest(root, &TestISO_2022_CN, "tsconv/nucnvtst/TestISO_2022_CN");
 290    /*
 291     * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
 292    addTest(root, &TestISO_2022_CN_EXT, "tsconv/nucnvtst/TestISO_2022_CN_EXT");
 293    addTest(root, &TestJitterbug915, "tsconv/nucnvtst/TestJitterbug915");
 294     */
 295    addTest(root, &TestHZ, "tsconv/nucnvtst/TestHZ");
 296 #endif
 297
 298    addTest(root, &TestSCSU, "tsconv/nucnvtst/TestSCSU");
 299
 300 #if !UCONFIG_NO_LEGACY_CONVERSION
 301    addTest(root, &TestEBCDIC_STATEFUL, "tsconv/nucnvtst/TestEBCDIC_STATEFUL");
 302    addTest(root, &TestGB18030, "tsconv/nucnvtst/TestGB18030");
 303    addTest(root, &TestJitterbug255, "tsconv/nucnvtst/TestJitterbug255");
 304    addTest(root, &TestEBCDICUS4XML, "tsconv/nucnvtst/TestEBCDICUS4XML");
 305    addTest(root, &TestISCII, "tsconv/nucnvtst/TestISCII");
 306    addTest(root, &TestJB5275, "tsconv/nucnvtst/TestJB5275");
 307    addTest(root, &TestJB5275_1, "tsconv/nucnvtst/TestJB5275_1");
 308 #if !UCONFIG_NO_COLLATION
 309    addTest(root, &TestJitterbug981, "tsconv/nucnvtst/TestJitterbug981");
 310 #endif
 311
 312    addTest(root, &TestJitterbug1293, "tsconv/nucnvtst/TestJitterbug1293");
 313 #endif
 314
 315
 316 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
 317    addTest(root, &TestCoverageMBCS, "tsconv/nucnvtst/TestCoverageMBCS");
 318 #endif
 319
 320    addTest(root, &TestRoundTrippingAllUTF, "tsconv/nucnvtst/TestRoundTrippingAllUTF");
 321
 322 #if !UCONFIG_NO_LEGACY_CONVERSION
 323    addTest(root, &TestJitterbug2346, "tsconv/nucnvtst/TestJitterbug2346");
 324    addTest(root, &TestJitterbug2411, "tsconv/nucnvtst/TestJitterbug2411");
 325    addTest(root, &TestJitterbug6175, "tsconv/nucnvtst/TestJitterbug6175");
 326 #endif
 327
 328 }
 329
 330
 331 /* Note that this test already makes use of statics, so it's not really
 332    multithread safe.
 333    This convenience function lets us make the error messages actually useful.
 334 */
 335
 336 static void setNuConvTestName(const char *codepage, const char *direction)
 337 {
 338     sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
 339         codepage,
 340         direction,
 341         (int)gInBufferSize,
 342         (int)gOutBufferSize);
 343 }
 344
 345 typedef enum
 346 {
 347   TC_OK       = 0,  /* test was OK */
 348   TC_MISMATCH = 1,  /* Match failed - err was printed */
 349   TC_FAIL     = 2   /* Test failed, don't print an err because it was already printed. */
 350 } ETestConvertResult;
 351
 352 /* Note: This function uses global variables and it will not do offset
 353 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
 354 static ETestConvertResult testConvertFromU( const UChar *source, int sourceLen,  const uint8_t *expect, int expectLen,
 355                 const char *codepage, const int32_t *expectOffsets , UBool useFallback)
 356 {
 357     UErrorCode status = U_ZERO_ERROR;
 358     UConverter *conv = 0;
 359     char    junkout[NEW_MAX_BUFFER]; /* FIX */
 360     int32_t    junokout[NEW_MAX_BUFFER]; /* FIX */
 361     char *p;
 362     const UChar *src;
 363     char *end;
 364     char *targ;
 365     int32_t *offs;
 366     int i;
 367     int32_t   realBufferSize;
 368     char *realBufferEnd;
 369     const UChar *realSourceEnd;
 370     const UChar *sourceLimit;
 371     UBool checkOffsets = TRUE;
 372     UBool doFlush;
 373
 374     for(i=0;i<NEW_MAX_BUFFER;i++)
 375         junkout[i] = (char)0xF0;
 376     for(i=0;i<NEW_MAX_BUFFER;i++)
 377         junokout[i] = 0xFF;
 378
 379     setNuConvTestName(codepage, "FROM");
 380
 381     log_verbose("\n=========  %s\n", gNuConvTestName);
 382
 383     conv = my_ucnv_open(codepage, &status);
 384
 385     if(U_FAILURE(status))
 386     {
 387         log_data_err("Couldn't open converter %s\n",codepage);
 388         return TC_FAIL;
 389     }
 390     if(useFallback){
 391         ucnv_setFallback(conv,useFallback);
 392     }
 393
 394     log_verbose("Converter opened..\n");
 395
 396     src = source;
 397     targ = junkout;
 398     offs = junokout;
 399
 400     realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
 401     realBufferEnd = junkout + realBufferSize;
 402     realSourceEnd = source + sourceLen;
 403
 404     if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER )
 405         checkOffsets = FALSE;
 406
 407     do
 408     {
 409       end = nct_min(targ + gOutBufferSize, realBufferEnd);
 410       sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
 411
 412       doFlush = (UBool)(sourceLimit == realSourceEnd);
 413
 414       if(targ == realBufferEnd) {
 415         log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
 416         return TC_FAIL;
 417       }
 418       log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx  TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
 419
 420
 421       status = U_ZERO_ERROR;
 422
 423       ucnv_fromUnicode (conv,
 424                         &targ,
 425                         end,
 426                         &src,
 427                         sourceLimit,
 428                         checkOffsets ? offs : NULL,
 429                         doFlush, /* flush if we're at the end of the input data */
 430                         &status);
 431     } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) );
 432
 433     if(U_FAILURE(status)) {
 434       log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
 435       return TC_FAIL;
 436     }
 437
 438     log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
 439                 sourceLen, targ-junkout);
 440
 441     if(getTestOption(VERBOSITY_OPTION))
 442     {
 443       char junk[9999];
 444       char offset_str[9999];
 445       char *ptr;
 446
 447       junk[0] = 0;
 448       offset_str[0] = 0;
 449       for(ptr = junkout;ptr<targ;ptr++) {
 450         sprintf(junk + strlen(junk), "0x%02x, ", (int)(0xFF & *ptr));
 451         sprintf(offset_str + strlen(offset_str), "0x%02x, ", (int)(0xFF & junokout[ptr-junkout]));
 452       }
 453
 454       log_verbose(junk);
 455       printSeq((const uint8_t *)expect, expectLen);
 456       if ( checkOffsets ) {
 457         log_verbose("\nOffsets:");
 458         log_verbose(offset_str);
 459       }
 460       log_verbose("\n");
 461     }
 462     ucnv_close(conv);
 463
 464     if(expectLen != targ-junkout) {
 465       log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
 466       log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
 467       printf("\nGot:");
 468       printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
 469       printf("\nExpected:");
 470       printSeqErr((const unsigned char*)expect, expectLen);
 471       return TC_MISMATCH;
 472     }
 473
 474     if (checkOffsets && (expectOffsets != 0) ) {
 475       log_verbose("comparing %d offsets..\n", targ-junkout);
 476       if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
 477         log_err("did not get the expected offsets. %s\n", gNuConvTestName);
 478         printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
 479         log_err("\n");
 480         log_err("Got  :     ");
 481         for(p=junkout;p<targ;p++) {
 482           log_err("%d,", junokout[p-junkout]);
 483         }
 484         log_err("\n");
 485         log_err("Expected:  ");
 486         for(i=0; i<(targ-junkout); i++) {
 487           log_err("%d,", expectOffsets[i]);
 488         }
 489         log_err("\n");
 490       }
 491     }
 492
 493     log_verbose("comparing..\n");
 494     if(!memcmp(junkout, expect, expectLen)) {
 495       log_verbose("Matches!\n");
 496       return TC_OK;
 497     } else {
 498       log_err("String does not match u->%s\n", gNuConvTestName);
 499       printUSeqErr(source, sourceLen);
 500       printf("\nGot:");
 501       printSeqErr((const unsigned char *)junkout, expectLen);
 502       printf("\nExpected:");
 503       printSeqErr((const unsigned char *)expect, expectLen);
 504
 505       return TC_MISMATCH;
 506     }
 507 }
 508
 509 /* Note: This function uses global variables and it will not do offset
 510 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
 511 static ETestConvertResult testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
 512                                           const char *codepage, const int32_t *expectOffsets, UBool useFallback)
 513 {
 514     UErrorCode status = U_ZERO_ERROR;
 515     UConverter *conv = 0;
 516     UChar    junkout[NEW_MAX_BUFFER]; /* FIX */
 517     int32_t    junokout[NEW_MAX_BUFFER]; /* FIX */
 518     const char *src;
 519     const char *realSourceEnd;
 520     const char *srcLimit;
 521     UChar *p;
 522     UChar *targ;
 523     UChar *end;
 524     int32_t *offs;
 525     int i;
 526     UBool   checkOffsets = TRUE;
 527
 528     int32_t   realBufferSize;
 529     UChar *realBufferEnd;
 530
 531
 532     for(i=0;i<NEW_MAX_BUFFER;i++)
 533         junkout[i] = 0xFFFE;
 534
 535     for(i=0;i<NEW_MAX_BUFFER;i++)
 536         junokout[i] = -1;
 537
 538     setNuConvTestName(codepage, "TO");
 539
 540     log_verbose("\n=========  %s\n", gNuConvTestName);
 541
 542     conv = my_ucnv_open(codepage, &status);
 543
 544     if(U_FAILURE(status))
 545     {
 546         log_data_err("Couldn't open converter %s\n",gNuConvTestName);
 547         return TC_FAIL;
 548     }
 549     if(useFallback){
 550         ucnv_setFallback(conv,useFallback);
 551     }
 552     log_verbose("Converter opened..\n");
 553
 554     src = (const char *)source;
 555     targ = junkout;
 556     offs = junokout;
 557
 558     realBufferSize = (sizeof(junkout)/sizeof(junkout[0]));
 559     realBufferEnd = junkout + realBufferSize;
 560     realSourceEnd = src + sourcelen;
 561
 562     if ( gOutBufferSize != realBufferSize ||  gInBufferSize != NEW_MAX_BUFFER )
 563         checkOffsets = FALSE;
 564
 565     do
 566     {
 567         end = nct_min( targ + gOutBufferSize, realBufferEnd);
 568         srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
 569
 570         if(targ == realBufferEnd)
 571         {
 572             log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjet=%08lx %s",targ,gNuConvTestName);
 573             return TC_FAIL;
 574         }
 575         log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
 576
 577         /* oldTarg = targ; */
 578
 579         status = U_ZERO_ERROR;
 580
 581         ucnv_toUnicode (conv,
 582                 &targ,
 583                 end,
 584                 &src,
 585                 srcLimit,
 586                 checkOffsets ? offs : NULL,
 587                 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of hte source data */
 588                 &status);
 589
 590         /*        offs += (targ-oldTarg); */
 591
 592       } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
 593
 594     if(U_FAILURE(status))
 595     {
 596         log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
 597         return TC_FAIL;
 598     }
 599
 600     log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
 601         sourcelen, targ-junkout);
 602     if(getTestOption(VERBOSITY_OPTION))
 603     {
 604         char junk[9999];
 605         char offset_str[9999];
 606         UChar *ptr;
 607
 608         junk[0] = 0;
 609         offset_str[0] = 0;
 610
 611         for(ptr = junkout;ptr<targ;ptr++)
 612         {
 613             sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr);
 614             sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]);
 615         }
 616
 617         log_verbose(junk);
 618         printUSeq(expect, expectlen);
 619         if ( checkOffsets )
 620           {
 621             log_verbose("\nOffsets:");
 622             log_verbose(offset_str);
 623           }
 624         log_verbose("\n");
 625     }
 626     ucnv_close(conv);
 627
 628     log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
 629
 630     if (checkOffsets && (expectOffsets != 0))
 631     {
 632         if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){
 633             log_err("did not get the expected offsets. %s\n",gNuConvTestName);
 634             log_err("Got:      ");
 635             for(p=junkout;p<targ;p++) {
 636                 log_err("%d,", junokout[p-junkout]);
 637             }
 638             log_err("\n");
 639             log_err("Expected: ");
 640             for(i=0; i<(targ-junkout); i++) {
 641                 log_err("%d,", expectOffsets[i]);
 642             }
 643             log_err("\n");
 644             log_err("output:   ");
 645             for(i=0; i<(targ-junkout); i++) {
 646                 log_err("%X,", junkout[i]);
 647             }
 648             log_err("\n");
 649             log_err("input:    ");
 650             for(i=0; i<(src-(const char *)source); i++) {
 651                 log_err("%X,", (unsigned char)source[i]);
 652             }
 653             log_err("\n");
 654         }
 655     }
 656
 657     if(!memcmp(junkout, expect, expectlen*2))
 658     {
 659         log_verbose("Matches!\n");
 660         return TC_OK;
 661     }
 662     else
 663     {
 664         log_err("String does not match. %s\n", gNuConvTestName);
 665         log_verbose("String does not match. %s\n", gNuConvTestName);
 666         printf("\nGot:");
 667         printUSeqErr(junkout, expectlen);
 668         printf("\nExpected:");
 669         printUSeqErr(expect, expectlen);
 670         return TC_MISMATCH;
 671     }
 672 }
 673
 674
 675 static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize )
 676 {
 677 /** test chars #1 */
 678     /*  1 2 3  1Han 2Han 3Han .  */
 679     static const UChar   sampleText[] =
 680      { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xD840, 0xDC21 };
 681     static const UChar sampleTextRoundTripUnmappable[] =
 682     { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xfffd };
 683
 684
 685     static const uint8_t expectedUTF8[] =
 686      { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E, 0xf0, 0xa0, 0x80, 0xa1 };
 687     static const int32_t toUTF8Offs[] =
 688      { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07, 0x08, 0x08, 0x08, 0x08 };
 689     static const int32_t fmUTF8Offs[] =
 690      { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d, 0x000e, 0x000e };
 691
 692 #ifdef U_ENABLE_GENERIC_ISO_2022
 693     /* Same as UTF8, but with ^[%B preceeding */
 694     static const const uint8_t expectedISO2022[] =
 695      { 0x1b, 0x25, 0x42, 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
 696     static const int32_t toISO2022Offs[]     =
 697      { -1, -1, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x04,
 698        0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07 }; /* right? */
 699     static const int32_t fmISO2022Offs[] =
 700      { 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x000a, 0x000d, 0x0010 }; /* is this right? */
 701 #endif
 702
 703     /*  1 2 3 0, <SO> h1 h2 h3 <SI> . EBCDIC_STATEFUL */
 704     static const uint8_t expectedIBM930[] =
 705      { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0x4B, 0x0e, 0xfe, 0xfe, 0x0f };
 706     static const int32_t toIBM930Offs[] =
 707      { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, -1 };
 708     static const int32_t fmIBM930Offs[] =
 709      { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c, 0x000e };
 710
 711     /* 1 2 3 0 h1 h2 h3 . MBCS*/
 712     static const uint8_t expectedIBM943[] =
 713      {  0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e, 0xfc, 0xfc };
 714     static const int32_t toIBM943Offs    [] =
 715      {  0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x08, 0x08 };
 716     static const int32_t fmIBM943Offs[] =
 717      { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a, 0x000b };
 718
 719     /* 1 2 3 0 h1 h2 h3 . DBCS*/
 720     static const uint8_t expectedIBM9027[] =
 721      {  0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48, 0x4c, 0x55, 0xfe, 0xfe, 0xfe, 0xfe };
 722     static const int32_t toIBM9027Offs    [] =
 723      {  0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08 };
 724
 725      /* 1 2 3 0 <?> <?> <?> . SBCS*/
 726     static const uint8_t expectedIBM920[] =
 727      {  0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e, 0x1a };
 728     static const int32_t toIBM920Offs    [] =
 729      {  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
 730
 731     /* 1 2 3 0 <?> <?> <?> . SBCS*/
 732     static const uint8_t expectedISO88593[] =
 733      { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a };
 734     static const int32_t toISO88593Offs[]     =
 735      { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
 736
 737     /* 1 2 3 0 <?> <?> <?> . <?> LATIN_1*/
 738     static const uint8_t expectedLATIN1[] =
 739      { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a };
 740     static const int32_t toLATIN1Offs[]     =
 741      { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
 742
 743
 744     /*  etc */
 745     static const uint8_t expectedUTF16BE[] =
 746      { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e, 0xd8, 0x40, 0xdc, 0x21 };
 747     static const int32_t toUTF16BEOffs[]=
 748      { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 };
 749     static const int32_t fmUTF16BEOffs[] =
 750      { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c,  0x000e, 0x0010, 0x0010 };
 751
 752     static const uint8_t expectedUTF16LE[] =
 753      { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00, 0x40, 0xd8, 0x21, 0xdc };
 754     static const int32_t toUTF16LEOffs[]=
 755      { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06,  0x07, 0x07, 0x08, 0x08, 0x08, 0x08 };
 756     static const int32_t fmUTF16LEOffs[] =
 757      { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 };
 758
 759     static const uint8_t expectedUTF32BE[] =
 760      { 0x00, 0x00, 0x00, 0x31,
 761        0x00, 0x00, 0x00, 0x32,
 762        0x00, 0x00, 0x00, 0x33,
 763        0x00, 0x00, 0x00, 0x00,
 764        0x00, 0x00, 0x4e, 0x00,
 765        0x00, 0x00, 0x4e, 0x8c,
 766        0x00, 0x00, 0x4e, 0x09,
 767        0x00, 0x00, 0x00, 0x2e,
 768        0x00, 0x02, 0x00, 0x21 };
 769     static const int32_t toUTF32BEOffs[]=
 770      { 0x00, 0x00, 0x00, 0x00,
 771        0x01, 0x01, 0x01, 0x01,
 772        0x02, 0x02, 0x02, 0x02,
 773        0x03, 0x03, 0x03, 0x03,
 774        0x04, 0x04, 0x04, 0x04,
 775        0x05, 0x05, 0x05, 0x05,
 776        0x06, 0x06, 0x06, 0x06,
 777        0x07, 0x07, 0x07, 0x07,
 778        0x08, 0x08, 0x08, 0x08,
 779        0x08, 0x08, 0x08, 0x08 };
 780     static const int32_t fmUTF32BEOffs[] =
 781      { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018,  0x001c, 0x0020, 0x0020 };
 782
 783     static const uint8_t expectedUTF32LE[] =
 784      { 0x31, 0x00, 0x00, 0x00,
 785        0x32, 0x00, 0x00, 0x00,
 786        0x33, 0x00, 0x00, 0x00,
 787        0x00, 0x00, 0x00, 0x00,
 788        0x00, 0x4e, 0x00, 0x00,
 789        0x8c, 0x4e, 0x00, 0x00,
 790        0x09, 0x4e, 0x00, 0x00,
 791        0x2e, 0x00, 0x00, 0x00,
 792        0x21, 0x00, 0x02, 0x00 };
 793     static const int32_t toUTF32LEOffs[]=
 794      { 0x00, 0x00, 0x00, 0x00,
 795        0x01, 0x01, 0x01, 0x01,
 796        0x02, 0x02, 0x02, 0x02,
 797        0x03, 0x03, 0x03, 0x03,
 798        0x04, 0x04, 0x04, 0x04,
 799        0x05, 0x05, 0x05, 0x05,
 800        0x06, 0x06, 0x06, 0x06,
 801        0x07, 0x07, 0x07, 0x07,
 802        0x08, 0x08, 0x08, 0x08,
 803        0x08, 0x08, 0x08, 0x08 };
 804     static const int32_t fmUTF32LEOffs[] =
 805      { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 };
 806
 807
 808
 809
 810 /** Test chars #2 **/
 811
 812     /* Sahha [health],  slashed h's */
 813     static const UChar malteseUChars[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x0061 };
 814     static const uint8_t expectedMaltese913[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 };
 815
 816     /* LMBCS */
 817     static const UChar LMBCSUChars[]     = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2666, 0x0220 };
 818     static const uint8_t expectedLMBCS[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73, 0x01, 0x04, 0x14, 0x02, 0x20 };
 819     static const int32_t toLMBCSOffs[]   = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x04 , 0x05, 0x05, 0x05 };
 820     static const int32_t fmLMBCSOffs[]   = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0006, 0x0008};
 821     /*********************************** START OF CODE finally *************/
 822
 823     gInBufferSize = insize;
 824     gOutBufferSize = outsize;
 825
 826     log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBufferSize = %d\n", gInBufferSize, gOutBufferSize);
 827
 828
 829     /*UTF-8*/
 830     testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
 831         expectedUTF8, sizeof(expectedUTF8), "UTF8", toUTF8Offs,FALSE );
 832
 833     log_verbose("Test surrogate behaviour for UTF8\n");
 834     {
 835         static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 };
 836         static const uint8_t expectedUTF8test2[]= { 0xe2, 0x82, 0xac,
 837                            0xf0, 0x90, 0x90, 0x81,
 838                            0xef, 0xbf, 0xbd
 839         };
 840         static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 };
 841         testConvertFromU(testinput, sizeof(testinput)/sizeof(testinput[0]),
 842                          expectedUTF8test2, sizeof(expectedUTF8test2), "UTF8", offsets,FALSE );
 843
 844
 845     }
 846
 847 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
 848     /*ISO-2022*/
 849     testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
 850         expectedISO2022, sizeof(expectedISO2022), "ISO_2022", toISO2022Offs,FALSE );
 851 #endif
 852
 853     /*UTF16 LE*/
 854     testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
 855         expectedUTF16LE, sizeof(expectedUTF16LE), "utf-16le", toUTF16LEOffs,FALSE );
 856     /*UTF16 BE*/
 857     testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
 858         expectedUTF16BE, sizeof(expectedUTF16BE), "utf-16be", toUTF16BEOffs,FALSE );
 859     /*UTF32 LE*/
 860     testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
 861         expectedUTF32LE, sizeof(expectedUTF32LE), "utf-32le", toUTF32LEOffs,FALSE );
 862     /*UTF32 BE*/
 863     testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
 864         expectedUTF32BE, sizeof(expectedUTF32BE), "utf-32be", toUTF32BEOffs,FALSE );
 865
 866     /*LATIN_1*/
 867     testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
 868         expectedLATIN1, sizeof(expectedLATIN1), "LATIN_1", toLATIN1Offs,FALSE );
 869
 870 #if !UCONFIG_NO_LEGACY_CONVERSION
 871     /*EBCDIC_STATEFUL*/
 872     testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
 873         expectedIBM930, sizeof(expectedIBM930), "ibm-930", toIBM930Offs,FALSE );
 874
 875     testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
 876         expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE );
 877
 878     /*MBCS*/
 879
 880     testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
 881         expectedIBM943, sizeof(expectedIBM943), "ibm-943", toIBM943Offs,FALSE );
 882     /*DBCS*/
 883     testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
 884         expectedIBM9027, sizeof(expectedIBM9027), "@ibm9027", toIBM9027Offs,FALSE );
 885     /*SBCS*/
 886     testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
 887         expectedIBM920, sizeof(expectedIBM920), "ibm-920", toIBM920Offs,FALSE );
 888     /*SBCS*/
 889     testConvertFromU(sampleText, sizeof(sampleText)/sizeof(sampleText[0]),
 890         expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE );
 891 #endif
 892
 893
 894 /****/
 895
 896     /*UTF-8*/
 897     testConvertToU(expectedUTF8, sizeof(expectedUTF8),
 898         sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf8", fmUTF8Offs,FALSE);
 899 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
 900     /*ISO-2022*/
 901     testConvertToU(expectedISO2022, sizeof(expectedISO2022),
 902         sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "ISO_2022", fmISO2022Offs,FALSE);
 903 #endif
 904
 905     /*UTF16 LE*/
 906     testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
 907         sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE);
 908     /*UTF16 BE*/
 909     testConvertToU(expectedUTF16BE, sizeof(expectedUTF16BE),
 910         sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16be", fmUTF16BEOffs,FALSE);
 911     /*UTF32 LE*/
 912     testConvertToU(expectedUTF32LE, sizeof(expectedUTF32LE),
 913         sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32le", fmUTF32LEOffs,FALSE);
 914     /*UTF32 BE*/
 915     testConvertToU(expectedUTF32BE, sizeof(expectedUTF32BE),
 916         sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-32be", fmUTF32BEOffs,FALSE);
 917
 918 #if !UCONFIG_NO_LEGACY_CONVERSION
 919     /*EBCDIC_STATEFUL*/
 920     testConvertToU(expectedIBM930, sizeof(expectedIBM930), sampleTextRoundTripUnmappable,
 921             sizeof(sampleTextRoundTripUnmappable)/sizeof(sampleTextRoundTripUnmappable[0]), "ibm-930", fmIBM930Offs,FALSE);
 922     /*MBCS*/
 923     testConvertToU(expectedIBM943, sizeof(expectedIBM943),sampleTextRoundTripUnmappable,
 924             sizeof(sampleTextRoundTripUnmappable)/sizeof(sampleTextRoundTripUnmappable[0]), "ibm-943", fmIBM943Offs,FALSE);
 925 #endif
 926
 927     /* Try it again to make sure it still works */
 928     testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
 929         sampleText, sizeof(sampleText)/sizeof(sampleText[0]), "utf-16le", fmUTF16LEOffs,FALSE);
 930
 931 #if !UCONFIG_NO_LEGACY_CONVERSION
 932     testConvertToU(expectedMaltese913, sizeof(expectedMaltese913),
 933         malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]), "latin3", NULL,FALSE);
 934
 935     testConvertFromU(malteseUChars, sizeof(malteseUChars)/sizeof(malteseUChars[0]),
 936         expectedMaltese913, sizeof(expectedMaltese913), "iso-8859-3", NULL,FALSE );
 937
 938     /*LMBCS*/
 939     testConvertFromU(LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]),
 940         expectedLMBCS, sizeof(expectedLMBCS), "LMBCS-1", toLMBCSOffs,FALSE );
 941     testConvertToU(expectedLMBCS, sizeof(expectedLMBCS),
 942         LMBCSUChars, sizeof(LMBCSUChars)/sizeof(LMBCSUChars[0]), "LMBCS-1", fmLMBCSOffs,FALSE);
 943 #endif
 944
 945     /* UTF-7 examples are mostly from http://www.imc.org/rfc2152 */
 946     {
 947         /* encode directly set D and set O */
 948         static const uint8_t utf7[] = {
 949             /*
 950                 Hi Mom -+Jjo--!
 951                 A+ImIDkQ.
 952                 +-
 953                 +ZeVnLIqe
 954             */
 955             0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
 956             0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
 957             0x2b, 0x2d,
 958             0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65
 959         };
 960         static const UChar unicode[] = {
 961             /*
 962                 Hi Mom -<WHITE SMILING FACE>-!
 963                 A<NOT IDENTICAL TO><ALPHA>.
 964                 +
 965                 [Japanese word "nihongo"]
 966             */
 967             0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
 968             0x41, 0x2262, 0x0391, 0x2e,
 969             0x2b,
 970             0x65e5, 0x672c, 0x8a9e
 971         };
 972         static const int32_t toUnicodeOffsets[] = {
 973             0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
 974             15, 17, 19, 23,
 975             24,
 976             27, 29, 32
 977         };
 978         static const int32_t fromUnicodeOffsets[] = {
 979             0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
 980             11, 12, 12, 12, 13, 13, 13, 13, 14,
 981             15, 15,
 982             16, 16, 16, 17, 17, 17, 18, 18, 18
 983         };
 984
 985         /* same but escaping set O (the exclamation mark) */
 986         static const uint8_t utf7Restricted[] = {
 987             /*
 988                 Hi Mom -+Jjo--+ACE-
 989                 A+ImIDkQ.
 990                 +-
 991                 +ZeVnLIqe
 992             */
 993             0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x2b, 0x41, 0x43, 0x45, 0x2d,
 994             0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
 995             0x2b, 0x2d,
 996             0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65
 997         };
 998         static const int32_t toUnicodeOffsetsR[] = {
 999             0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 15,
1000             19, 21, 23, 27,
1001             28,
1002             31, 33, 36
1003         };
1004         static const int32_t fromUnicodeOffsetsR[] = {
1005             0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10,
1006             11, 12, 12, 12, 13, 13, 13, 13, 14,
1007             15, 15,
1008             16, 16, 16, 17, 17, 17, 18, 18, 18
1009         };
1010
1011         testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7, sizeof(utf7), "UTF-7", fromUnicodeOffsets,FALSE);
1012
1013         testConvertToU(utf7, sizeof(utf7), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7", toUnicodeOffsets,FALSE);
1014
1015         testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, utf7Restricted, sizeof(utf7Restricted), "UTF-7,version=1", fromUnicodeOffsetsR,FALSE);
1016
1017         testConvertToU(utf7Restricted, sizeof(utf7Restricted), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "UTF-7,version=1", toUnicodeOffsetsR,FALSE);
1018     }
1019
1020     /*
1021      * IMAP-mailbox-name examples are mostly from http://www.imc.org/rfc2152,
1022      * modified according to RFC 2060,
1023      * and supplemented with the one example in RFC 2060 itself.
1024      */
1025     {
1026         static const uint8_t imap[] = {
1027             /*  Hi Mom -&Jjo--!
1028                 A&ImIDkQ-.
1029                 &-
1030                 &ZeVnLIqe-
1031                 \
1032                 ~peter
1033                 /mail
1034                 /&ZeVnLIqe-
1035                 /&U,BTFw-
1036             */
1037             0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x26, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
1038             0x41, 0x26, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2d, 0x2e,
1039             0x26, 0x2d,
1040             0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1041             0x5c,
1042             0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1043             0x2f, 0x6d, 0x61, 0x69, 0x6c,
1044             0x2f, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1045             0x2f, 0x26, 0x55, 0x2c, 0x42, 0x54, 0x46, 0x77, 0x2d
1046         };
1047         static const UChar unicode[] = {
1048             /*  Hi Mom -<WHITE SMILING FACE>-!
1049                 A<NOT IDENTICAL TO><ALPHA>.
1050                 &
1051                 [Japanese word "nihongo"]
1052                 \
1053                 ~peter
1054                 /mail
1055                 /<65e5, 672c, 8a9e>
1056                 /<53f0, 5317>
1057             */
1058             0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
1059             0x41, 0x2262, 0x0391, 0x2e,
1060             0x26,
1061             0x65e5, 0x672c, 0x8a9e,
1062             0x5c,
1063             0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1064             0x2f, 0x6d, 0x61, 0x69, 0x6c,
1065             0x2f, 0x65e5, 0x672c, 0x8a9e,
1066             0x2f, 0x53f0, 0x5317
1067         };
1068         static const int32_t toUnicodeOffsets[] = {
1069             0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
1070             15, 17, 19, 24,
1071             25,
1072             28, 30, 33,
1073             37,
1074             38, 39, 40, 41, 42, 43,
1075             44, 45, 46, 47, 48,
1076             49, 51, 53, 56,
1077             60, 62, 64
1078         };
1079         static const int32_t fromUnicodeOffsets[] = {
1080             0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
1081             11, 12, 12, 12, 13, 13, 13, 13, 13, 14,
1082             15, 15,
1083             16, 16, 16, 17, 17, 17, 18, 18, 18, 18,
1084             19,
1085             20, 21, 22, 23, 24, 25,
1086             26, 27, 28, 29, 30,
1087             31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34,
1088             35, 36, 36, 36, 37, 37, 37, 37, 37
1089         };
1090
1091         testConvertFromU(unicode, sizeof(unicode)/U_SIZEOF_UCHAR, imap, sizeof(imap), "IMAP-mailbox-name", fromUnicodeOffsets,FALSE);
1092
1093         testConvertToU(imap, sizeof(imap), unicode, sizeof(unicode)/U_SIZEOF_UCHAR, "IMAP-mailbox-name", toUnicodeOffsets,FALSE);
1094     }
1095
1096     /* Test UTF-8 bad data handling*/
1097     {
1098         static const uint8_t utf8[]={
1099             0x61,
1100             0xf7, 0xbf, 0xbf, 0xbf,         /* > 10FFFF */
1101             0x00,
1102             0x62,
1103             0xfb, 0xbf, 0xbf, 0xbf, 0xbf,   /* > 10FFFF */
1104             0xfb, 0xbf, 0xbf, 0xbf, 0xbf,   /* > 10FFFF */
1105             0xf4, 0x8f, 0xbf, 0xbf,         /* 10FFFF */
1106             0xdf, 0xbf,                     /* 7ff */
1107             0xbf,                           /* truncated tail */
1108             0xf4, 0x90, 0x80, 0x80,         /* 11FFFF */
1109             0x02
1110         };
1111
1112         static const uint16_t utf8Expected[]={
1113             0x0061,
1114             0xfffd,
1115             0x0000,
1116             0x0062,
1117             0xfffd,
1118             0xfffd,
1119             0xdbff, 0xdfff,
1120             0x07ff,
1121             0xfffd,
1122             0xfffd,
1123             0x0002
1124         };
1125
1126         static const int32_t utf8Offsets[]={
1127             0, 1, 5, 6, 7, 12, 17, 17, 21, 23, 24, 28
1128         };
1129         testConvertToU(utf8, sizeof(utf8),
1130                        utf8Expected, sizeof(utf8Expected)/sizeof(utf8Expected[0]), "utf-8", utf8Offsets ,FALSE);
1131
1132     }
1133
1134     /* Test UTF-32BE bad data handling*/
1135     {
1136         static const uint8_t utf32[]={
1137             0x00, 0x00, 0x00, 0x61,
1138             0x00, 0x11, 0x00, 0x00,         /* 0x110000 out of range */
1139             0x00, 0x10, 0xff, 0xff,         /* 0x10FFFF in range */
1140             0x00, 0x00, 0x00, 0x62,
1141             0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
1142             0x7f, 0xff, 0xff, 0xff,         /* 0x7fffffff out of range */
1143             0x00, 0x00, 0x01, 0x62,
1144             0x00, 0x00, 0x02, 0x62
1145         };
1146         static const uint16_t utf32Expected[]={
1147             0x0061,
1148             0xfffd,         /* 0x110000 out of range */
1149             0xDBFF,         /* 0x10FFFF in range */
1150             0xDFFF,
1151             0x0062,
1152             0xfffd,         /* 0xffffffff out of range */
1153             0xfffd,         /* 0x7fffffff out of range */
1154             0x0162,
1155             0x0262
1156         };
1157         static const int32_t utf32Offsets[]={
1158             0, 4, 8, 8, 12, 16, 20, 24, 28
1159         };
1160         static const uint8_t utf32ExpectedBack[]={
1161             0x00, 0x00, 0x00, 0x61,
1162             0x00, 0x00, 0xff, 0xfd,         /* 0x110000 out of range */
1163             0x00, 0x10, 0xff, 0xff,         /* 0x10FFFF in range */
1164             0x00, 0x00, 0x00, 0x62,
1165             0x00, 0x00, 0xff, 0xfd,         /* 0xffffffff out of range */
1166             0x00, 0x00, 0xff, 0xfd,         /* 0x7fffffff out of range */
1167             0x00, 0x00, 0x01, 0x62,
1168             0x00, 0x00, 0x02, 0x62
1169         };
1170         static const int32_t utf32OffsetsBack[]={
1171             0,0,0,0,
1172             1,1,1,1,
1173             2,2,2,2,
1174             4,4,4,4,
1175             5,5,5,5,
1176             6,6,6,6,
1177             7,7,7,7,
1178             8,8,8,8
1179         };
1180
1181         testConvertToU(utf32, sizeof(utf32),
1182                        utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32be", utf32Offsets ,FALSE);
1183         testConvertFromU(utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]),
1184             utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32be", utf32OffsetsBack, FALSE);
1185     }
1186
1187     /* Test UTF-32LE bad data handling*/
1188     {
1189         static const uint8_t utf32[]={
1190             0x61, 0x00, 0x00, 0x00,
1191             0x00, 0x00, 0x11, 0x00,         /* 0x110000 out of range */
1192             0xff, 0xff, 0x10, 0x00,         /* 0x10FFFF in range */
1193             0x62, 0x00, 0x00, 0x00,
1194             0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
1195             0xff, 0xff, 0xff, 0x7f,         /* 0x7fffffff out of range */
1196             0x62, 0x01, 0x00, 0x00,
1197             0x62, 0x02, 0x00, 0x00,
1198         };
1199
1200         static const uint16_t utf32Expected[]={
1201             0x0061,
1202             0xfffd,         /* 0x110000 out of range */
1203             0xDBFF,         /* 0x10FFFF in range */
1204             0xDFFF,
1205             0x0062,
1206             0xfffd,         /* 0xffffffff out of range */
1207             0xfffd,         /* 0x7fffffff out of range */
1208             0x0162,
1209             0x0262
1210         };
1211         static const int32_t utf32Offsets[]={
1212             0, 4, 8, 8, 12, 16, 20, 24, 28
1213         };
1214         static const uint8_t utf32ExpectedBack[]={
1215             0x61, 0x00, 0x00, 0x00,
1216             0xfd, 0xff, 0x00, 0x00,         /* 0x110000 out of range */
1217             0xff, 0xff, 0x10, 0x00,         /* 0x10FFFF in range */
1218             0x62, 0x00, 0x00, 0x00,
1219             0xfd, 0xff, 0x00, 0x00,         /* 0xffffffff out of range */
1220             0xfd, 0xff, 0x00, 0x00,         /* 0x7fffffff out of range */
1221             0x62, 0x01, 0x00, 0x00,
1222             0x62, 0x02, 0x00, 0x00
1223         };
1224         static const int32_t utf32OffsetsBack[]={
1225             0,0,0,0,
1226             1,1,1,1,
1227             2,2,2,2,
1228             4,4,4,4,
1229             5,5,5,5,
1230             6,6,6,6,
1231             7,7,7,7,
1232             8,8,8,8
1233         };
1234         testConvertToU(utf32, sizeof(utf32),
1235             utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]), "utf-32le", utf32Offsets,FALSE );
1236         testConvertFromU(utf32Expected, sizeof(utf32Expected)/sizeof(utf32Expected[0]),
1237             utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32le", utf32OffsetsBack, FALSE);
1238     }
1239 }
1240
1241 static void TestCoverageMBCS(){
1242 #if 0
1243     UErrorCode status = U_ZERO_ERROR;
1244     const char *directory = loadTestData(&status);
1245     char* tdpath = NULL;
1246     char* saveDirectory = (char*)malloc(sizeof(char) *(strlen(u_getDataDirectory())+1));
1247     int len = strlen(directory);
1248     char* index=NULL;
1249
1250     tdpath = (char*) malloc(sizeof(char) * (len * 2));
1251     uprv_strcpy(saveDirectory,u_getDataDirectory());
1252     log_verbose("Retrieved data directory %s \n",saveDirectory);
1253     uprv_strcpy(tdpath,directory);
1254     index=strrchr(tdpath,(char)U_FILE_SEP_CHAR);
1255
1256     if((unsigned int)(index-tdpath) != (strlen(tdpath)-1)){
1257             *(index+1)=0;
1258     }
1259     u_setDataDirectory(tdpath);
1260     log_verbose("ICU data directory is set to: %s \n" ,tdpath);
1261 #endif
1262
1263     /*some more test to increase the code coverage in MBCS.  Create an test converter from test1.ucm
1264       which is test file for MBCS conversion with single-byte codepage data.*/
1265     {
1266
1267         /* MBCS with single byte codepage data test1.ucm*/
1268         const UChar unicodeInput[]    = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0x0003};
1269         const uint8_t expectedtest1[] = { 0x00, 0x05, 0xff, 0x07, 0xff,};
1270         int32_t  totest1Offs[]        = { 0, 1, 2, 3, 5, };
1271
1272         /*from Unicode*/
1273         testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
1274             expectedtest1, sizeof(expectedtest1), "@test1", totest1Offs,FALSE );
1275     }
1276
1277     /*some more test to increase the code coverage in MBCS.  Create an test converter from test3.ucm
1278       which is test file for MBCS conversion with three-byte codepage data.*/
1279     {
1280
1281         /* MBCS with three byte codepage data test3.ucm*/
1282         const UChar unicodeInput[]    = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1283         const uint8_t expectedtest3[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x0b,  0x07,  0x01, 0x02, 0x0a,  0xff,};
1284         int32_t  totest3Offs[]        = { 0, 1, 2, 3, 3, 3, 4, 6, 6, 6, 8};
1285
1286         const uint8_t test3input[]    = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b,  0x07,  0x01, 0x02, 0x0a, 0x01, 0x02, 0x0c,};
1287         const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1288         int32_t fromtest3Offs[]       = { 0, 1, 2, 3, 6, 6, 7, 7, 10 };
1289
1290         /*from Unicode*/
1291         testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
1292             expectedtest3, sizeof(expectedtest3), "@test3", totest3Offs,FALSE );
1293
1294         /*to Unicode*/
1295         testConvertToU(test3input, sizeof(test3input),
1296             expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test3", fromtest3Offs ,FALSE);
1297
1298     }
1299
1300     /*some more test to increase the code coverage in MBCS.  Create an test converter from test4.ucm
1301       which is test file for MBCS conversion with four-byte codepage data.*/
1302     {
1303
1304         /* MBCS with three byte codepage data test4.ucm*/
1305         static const UChar unicodeInput[]    = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1306         static const uint8_t expectedtest4[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b,  0x07,  0x01, 0x02, 0x03, 0x0a,  0xff,};
1307         static const int32_t totest4Offs[]   = { 0, 1, 2, 3, 3, 3, 3, 4, 6, 6, 6, 6, 8,};
1308
1309         static const uint8_t test4input[]    = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b,  0x07,  0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0c,};
1310         static const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1311         static const int32_t fromtest4Offs[] = { 0, 1, 2, 3, 7, 7, 8, 8, 12,};
1312
1313         /*from Unicode*/
1314         testConvertFromU(unicodeInput, sizeof(unicodeInput)/sizeof(unicodeInput[0]),
1315             expectedtest4, sizeof(expectedtest4), "@test4", totest4Offs,FALSE );
1316
1317         /*to Unicode*/
1318         testConvertToU(test4input, sizeof(test4input),
1319             expectedUnicode, sizeof(expectedUnicode)/sizeof(expectedUnicode[0]), "@test4", fromtest4Offs,FALSE );
1320
1321     }
1322 #if 0
1323     free(tdpath);
1324     /* restore the original data directory */
1325     log_verbose("Setting the data directory to %s \n", saveDirectory);
1326     u_setDataDirectory(saveDirectory);
1327     free(saveDirectory);
1328 #endif
1329
1330 }
1331
1332 static void TestConverterType(const char *convName, UConverterType convType) {
1333     UConverter* myConverter;
1334     UErrorCode err = U_ZERO_ERROR;
1335
1336     myConverter = my_ucnv_open(convName, &err);
1337
1338     if (U_FAILURE(err)) {
1339         log_data_err("Failed to create an %s converter\n", convName);
1340         return;
1341     }
1342     else
1343     {
1344         if (ucnv_getType(myConverter)!=convType) {
1345             log_err("ucnv_getType Failed for %s. Got enum value 0x%X\n",
1346                 convName, convType);
1347         }
1348         else {
1349             log_verbose("ucnv_getType %s ok\n", convName);
1350         }
1351     }
1352     ucnv_close(myConverter);
1353 }
1354
1355 static void TestConverterTypesAndStarters()
1356 {
1357 #if !UCONFIG_NO_LEGACY_CONVERSION
1358     UConverter* myConverter;
1359     UErrorCode err = U_ZERO_ERROR;
1360     UBool mystarters[256];
1361
1362 /*    const UBool expectedKSCstarters[256] = {
1363         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1364         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1365         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1366         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1367         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1368         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1369         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1370         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1371         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1372         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1373         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1374         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1375         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1376         FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1377         FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1378         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1379         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1380         TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE,
1381         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1382         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1383         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1384         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1385         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1386         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1387         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1388         TRUE, TRUE, TRUE, TRUE, TRUE, TRUE};*/
1389
1390
1391     log_verbose("Testing KSC, ibm-930, ibm-878  for starters and their conversion types.");
1392
1393     myConverter = ucnv_open("ksc", &err);
1394     if (U_FAILURE(err)) {
1395       log_data_err("Failed to create an ibm-ksc converter\n");
1396       return;
1397     }
1398     else
1399     {
1400         if (ucnv_getType(myConverter)!=UCNV_MBCS)
1401             log_err("ucnv_getType Failed for ibm-949\n");
1402         else
1403             log_verbose("ucnv_getType ibm-949 ok\n");
1404
1405         if(myConverter!=NULL)
1406             ucnv_getStarters(myConverter, mystarters, &err);
1407
1408         /*if (memcmp(expectedKSCstarters, mystarters, sizeof(expectedKSCstarters)))
1409           log_err("Failed ucnv_getStarters for ksc\n");
1410           else
1411           log_verbose("ucnv_getStarters ok\n");*/
1412
1413     }
1414     ucnv_close(myConverter);
1415
1416     TestConverterType("ibm-930", UCNV_EBCDIC_STATEFUL);
1417     TestConverterType("ibm-878", UCNV_SBCS);
1418 #endif
1419
1420     TestConverterType("iso-8859-1", UCNV_LATIN_1);
1421
1422     TestConverterType("ibm-1208", UCNV_UTF8);
1423
1424     TestConverterType("utf-8", UCNV_UTF8);
1425     TestConverterType("UTF-16BE", UCNV_UTF16_BigEndian);
1426     TestConverterType("UTF-16LE", UCNV_UTF16_LittleEndian);
1427     TestConverterType("UTF-32BE", UCNV_UTF32_BigEndian);
1428     TestConverterType("UTF-32LE", UCNV_UTF32_LittleEndian);
1429
1430 #if !UCONFIG_NO_LEGACY_CONVERSION
1431
1432 #if defined(U_ENABLE_GENERIC_ISO_2022)
1433     TestConverterType("iso-2022", UCNV_ISO_2022);
1434 #endif
1435
1436     TestConverterType("hz", UCNV_HZ);
1437 #endif
1438
1439     TestConverterType("scsu", UCNV_SCSU);
1440
1441 #if !UCONFIG_NO_LEGACY_CONVERSION
1442     TestConverterType("x-iscii-de", UCNV_ISCII);
1443 #endif
1444
1445     TestConverterType("ascii", UCNV_US_ASCII);
1446     TestConverterType("utf-7", UCNV_UTF7);
1447     TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX);
1448     TestConverterType("bocu-1", UCNV_BOCU1);
1449 }
1450
1451 static void
1452 TestAmbiguousConverter(UConverter *cnv) {
1453     static const char inBytes[3]={ 0x61, 0x5B, 0x5c };
1454     UChar outUnicode[20]={ 0, 0, 0, 0 };
1455
1456     const char *s;
1457     UChar *u;
1458     UErrorCode errorCode;
1459     UBool isAmbiguous;
1460
1461     /* try to convert an 'a', a square bracket and a US-ASCII backslash */
1462     errorCode=U_ZERO_ERROR;
1463     s=inBytes;
1464     u=outUnicode;
1465     ucnv_toUnicode(cnv, &u, u+20, &s, s+3, NULL, TRUE, &errorCode);
1466     if(U_FAILURE(errorCode)) {
1467         /* we do not care about general failures in this test; the input may just not be mappable */
1468         return;
1469     }
1470
1471     if(outUnicode[0]!=0x61 || outUnicode[1]!=0x5B || outUnicode[2]==0xfffd) {
1472         /* not a close ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */
1473         /* There are some encodings that are partially ASCII based,
1474         like the ISO-7 and GSM series of codepages, which we ignore. */
1475         return;
1476     }
1477
1478     isAmbiguous=ucnv_isAmbiguous(cnv);
1479
1480     /* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous() */
1481     if((outUnicode[2]!=0x5c)!=isAmbiguous) {
1482         log_err("error: converter \"%s\" needs a backslash fix: %d but ucnv_isAmbiguous()==%d\n",
1483             ucnv_getName(cnv, &errorCode), outUnicode[2]!=0x5c, isAmbiguous);
1484         return;
1485     }
1486
1487     if(outUnicode[2]!=0x5c) {
1488         /* needs fixup, fix it */
1489         ucnv_fixFileSeparator(cnv, outUnicode, (int32_t)(u-outUnicode));
1490         if(outUnicode[2]!=0x5c) {
1491             /* the fix failed */
1492             log_err("error: ucnv_fixFileSeparator(%s) failed\n", ucnv_getName(cnv, &errorCode));
1493             return;
1494         }
1495     }
1496 }
1497
1498 static void TestAmbiguous()
1499 {
1500     UErrorCode status = U_ZERO_ERROR;
1501     UConverter *ascii_cnv = 0, *sjis_cnv = 0, *cnv;
1502     static const char target[] = {
1503         /* "\\usr\\local\\share\\data\\icutest.txt" */
1504         0x5c, 0x75, 0x73, 0x72,
1505         0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c,
1506         0x5c, 0x73, 0x68, 0x61, 0x72, 0x65,
1507         0x5c, 0x64, 0x61, 0x74, 0x61,
1508         0x5c, 0x69, 0x63, 0x75, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, 0x74,
1509         0
1510     };
1511     UChar asciiResult[200], sjisResult[200];
1512     int32_t /*asciiLength = 0,*/ sjisLength = 0, i;
1513     const char *name;
1514
1515     /* enumerate all converters */
1516     status=U_ZERO_ERROR;
1517     for(i=0; (name=ucnv_getAvailableName(i))!=NULL; ++i) {
1518         cnv=ucnv_open(name, &status);
1519         if(U_SUCCESS(status)) {
1520             TestAmbiguousConverter(cnv);
1521             ucnv_close(cnv);
1522         } else {
1523             log_err("error: unable to open available converter \"%s\"\n", name);
1524             status=U_ZERO_ERROR;
1525         }
1526     }
1527
1528 #if !UCONFIG_NO_LEGACY_CONVERSION
1529     sjis_cnv = ucnv_open("ibm-943", &status);
1530     if (U_FAILURE(status))
1531     {
1532         log_data_err("Failed to create a SJIS converter\n");
1533         return;
1534     }
1535     ascii_cnv = ucnv_open("LATIN-1", &status);
1536     if (U_FAILURE(status))
1537     {
1538         log_data_err("Failed to create a LATIN-1 converter\n");
1539         ucnv_close(sjis_cnv);
1540         return;
1541     }
1542     /* convert target from SJIS to Unicode */
1543     sjisLength = ucnv_toUChars(sjis_cnv, sjisResult, sizeof(sjisResult)/U_SIZEOF_UCHAR, target, (int32_t)strlen(target), &status);
1544     if (U_FAILURE(status))
1545     {
1546         log_err("Failed to convert the SJIS string.\n");
1547         ucnv_close(sjis_cnv);
1548         ucnv_close(ascii_cnv);
1549         return;
1550     }
1551     /* convert target from Latin-1 to Unicode */
1552     /*asciiLength =*/ ucnv_toUChars(ascii_cnv, asciiResult, sizeof(asciiResult)/U_SIZEOF_UCHAR, target, (int32_t)strlen(target), &status);
1553     if (U_FAILURE(status))
1554     {
1555         log_err("Failed to convert the Latin-1 string.\n");
1556         ucnv_close(sjis_cnv);
1557         ucnv_close(ascii_cnv);
1558         return;
1559     }
1560     if (!ucnv_isAmbiguous(sjis_cnv))
1561     {
1562         log_err("SJIS converter should contain ambiguous character mappings.\n");
1563         ucnv_close(sjis_cnv);
1564         ucnv_close(ascii_cnv);
1565         return;
1566     }
1567     if (u_strcmp(sjisResult, asciiResult) == 0)
1568     {
1569         log_err("File separators for SJIS don't need to be fixed.\n");
1570     }
1571     ucnv_fixFileSeparator(sjis_cnv, sjisResult, sjisLength);
1572     if (u_strcmp(sjisResult, asciiResult) != 0)
1573     {
1574         log_err("Fixing file separator for SJIS failed.\n");
1575     }
1576     ucnv_close(sjis_cnv);
1577     ucnv_close(ascii_cnv);
1578 #endif
1579 }
1580
1581 static void
1582 TestSignatureDetection(){
1583     /* with null terminated strings */
1584     {
1585         static const char* data[] = {
1586                 "\xFE\xFF\x00\x00",     /* UTF-16BE */
1587                 "\xFF\xFE\x00\x00",     /* UTF-16LE */
1588                 "\xEF\xBB\xBF\x00",     /* UTF-8    */
1589                 "\x0E\xFE\xFF\x00",     /* SCSU     */
1590
1591                 "\xFE\xFF",             /* UTF-16BE */
1592                 "\xFF\xFE",             /* UTF-16LE */
1593                 "\xEF\xBB\xBF",         /* UTF-8    */
1594                 "\x0E\xFE\xFF",         /* SCSU     */
1595
1596                 "\xFE\xFF\x41\x42",     /* UTF-16BE */
1597                 "\xFF\xFE\x41\x41",     /* UTF-16LE */
1598                 "\xEF\xBB\xBF\x41",     /* UTF-8    */
1599                 "\x0E\xFE\xFF\x41",     /* SCSU     */
1600
1601                 "\x2B\x2F\x76\x38\x2D", /* UTF-7    */
1602                 "\x2B\x2F\x76\x38\x41", /* UTF-7    */
1603                 "\x2B\x2F\x76\x39\x41", /* UTF-7    */
1604                 "\x2B\x2F\x76\x2B\x41", /* UTF-7    */
1605                 "\x2B\x2F\x76\x2F\x41",  /* UTF-7    */
1606
1607                 "\xDD\x73\x66\x73"      /* UTF-EBCDIC */
1608         };
1609         static const char* expected[] = {
1610                 "UTF-16BE",
1611                 "UTF-16LE",
1612                 "UTF-8",
1613                 "SCSU",
1614
1615                 "UTF-16BE",
1616                 "UTF-16LE",
1617                 "UTF-8",
1618                 "SCSU",
1619
1620                 "UTF-16BE",
1621                 "UTF-16LE",
1622                 "UTF-8",
1623                 "SCSU",
1624
1625                 "UTF-7",
1626                 "UTF-7",
1627                 "UTF-7",
1628                 "UTF-7",
1629                 "UTF-7",
1630                 "UTF-EBCDIC"
1631         };
1632         static const int32_t expectedLength[] ={
1633             2,
1634             2,
1635             3,
1636             3,
1637
1638             2,
1639             2,
1640             3,
1641             3,
1642
1643             2,
1644             2,
1645             3,
1646             3,
1647
1648             5,
1649             4,
1650             4,
1651             4,
1652             4,
1653             4
1654         };
1655         int i=0;
1656         UErrorCode err;
1657         int32_t signatureLength = -1;
1658         const char* source = NULL;
1659         const char* enc = NULL;
1660         for( ; i<sizeof(data)/sizeof(char*); i++){
1661             err = U_ZERO_ERROR;
1662             source = data[i];
1663             enc = ucnv_detectUnicodeSignature(source, -1 , &signatureLength, &err);
1664             if(U_FAILURE(err)){
1665                 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err));
1666                 continue;
1667             }
1668             if(enc == NULL || strcmp(enc,expected[i]) !=0){
1669                 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc);
1670                 continue;
1671             }
1672             if(signatureLength != expectedLength[i]){
1673                 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]);
1674             }
1675         }
1676     }
1677     {
1678         static const char* data[] = {
1679                 "\xFE\xFF\x00",         /* UTF-16BE */
1680                 "\xFF\xFE\x00",         /* UTF-16LE */
1681                 "\xEF\xBB\xBF\x00",     /* UTF-8    */
1682                 "\x0E\xFE\xFF\x00",     /* SCSU     */
1683                 "\x00\x00\xFE\xFF",     /* UTF-32BE */
1684                 "\xFF\xFE\x00\x00",     /* UTF-32LE */
1685                 "\xFE\xFF",             /* UTF-16BE */
1686                 "\xFF\xFE",             /* UTF-16LE */
1687                 "\xEF\xBB\xBF",         /* UTF-8    */
1688                 "\x0E\xFE\xFF",         /* SCSU     */
1689                 "\x00\x00\xFE\xFF",     /* UTF-32BE */
1690                 "\xFF\xFE\x00\x00",     /* UTF-32LE */
1691                 "\xFE\xFF\x41\x42",     /* UTF-16BE */
1692                 "\xFF\xFE\x41\x41",     /* UTF-16LE */
1693                 "\xEF\xBB\xBF\x41",     /* UTF-8    */
1694                 "\x0E\xFE\xFF\x41",     /* SCSU     */
1695                 "\x00\x00\xFE\xFF\x41", /* UTF-32BE */
1696                 "\xFF\xFE\x00\x00\x42", /* UTF-32LE */
1697                 "\xFB\xEE\x28",         /* BOCU-1   */
1698                 "\xFF\x41\x42"          /* NULL     */
1699         };
1700         static const int len[] = {
1701             3,
1702             3,
1703             4,
1704             4,
1705             4,
1706             4,
1707             2,
1708             2,
1709             3,
1710             3,
1711             4,
1712             4,
1713             4,
1714             4,
1715             4,
1716             4,
1717             5,
1718             5,
1719             3,
1720             3
1721         };
1722
1723         static const char* expected[] = {
1724                 "UTF-16BE",
1725                 "UTF-16LE",
1726                 "UTF-8",
1727                 "SCSU",
1728                 "UTF-32BE",
1729                 "UTF-32LE",
1730                 "UTF-16BE",
1731                 "UTF-16LE",
1732                 "UTF-8",
1733                 "SCSU",
1734                 "UTF-32BE",
1735                 "UTF-32LE",
1736                 "UTF-16BE",
1737                 "UTF-16LE",
1738                 "UTF-8",
1739                 "SCSU",
1740                 "UTF-32BE",
1741                 "UTF-32LE",
1742                 "BOCU-1",
1743                 NULL
1744         };
1745         static const int32_t expectedLength[] ={
1746             2,
1747             2,
1748             3,
1749             3,
1750             4,
1751             4,
1752             2,
1753             2,
1754             3,
1755             3,
1756             4,
1757             4,
1758             2,
1759             2,
1760             3,
1761             3,
1762             4,
1763             4,
1764             3,
1765             0
1766         };
1767         int i=0;
1768         UErrorCode err;
1769         int32_t signatureLength = -1;
1770         int32_t sourceLength=-1;
1771         const char* source = NULL;
1772         const char* enc = NULL;
1773         for( ; i<sizeof(data)/sizeof(char*); i++){
1774             err = U_ZERO_ERROR;
1775             source = data[i];
1776             sourceLength = len[i];
1777             enc = ucnv_detectUnicodeSignature(source, sourceLength , &signatureLength, &err);
1778             if(U_FAILURE(err)){
1779                 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err));
1780                 continue;
1781             }
1782             if(enc == NULL || strcmp(enc,expected[i]) !=0){
1783                 if(expected[i] !=NULL){
1784                  log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc);
1785                  continue;
1786                 }
1787             }
1788             if(signatureLength != expectedLength[i]){
1789                 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]);
1790             }
1791         }
1792     }
1793 }
1794
1795 static void TestUTF7() {
1796     /* test input */
1797     static const uint8_t in[]={
1798         /* H - +Jjo- - ! +- +2AHcAQ */
1799         0x48,
1800         0x2d,
1801         0x2b, 0x4a, 0x6a, 0x6f,
1802         0x2d, 0x2d,
1803         0x21,
1804         0x2b, 0x2d,
1805         0x2b, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51
1806     };
1807
1808     /* expected test results */
1809     static const int32_t results[]={
1810         /* number of bytes read, code point */
1811         1, 0x48,
1812         1, 0x2d,
1813         4, 0x263a, /* <WHITE SMILING FACE> */
1814         2, 0x2d,
1815         1, 0x21,
1816         2, 0x2b,
1817         7, 0x10401
1818     };
1819
1820     const char *cnvName;
1821     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1822     UErrorCode errorCode=U_ZERO_ERROR;
1823     UConverter *cnv=ucnv_open("UTF-7", &errorCode);
1824     if(U_FAILURE(errorCode)) {
1825         log_err("Unable to open a UTF-7 converter: %s\n", u_errorName(errorCode)); /* sholdn't be a data err */
1826         return;
1827     }
1828     TestNextUChar(cnv, source, limit, results, "UTF-7");
1829     /* Test the condition when source >= sourceLimit */
1830     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1831     cnvName = ucnv_getName(cnv, &errorCode);
1832     if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "UTF-7") != 0) {
1833         log_err("UTF-7 converter is called %s: %s\n", cnvName, u_errorName(errorCode));
1834     }
1835     ucnv_close(cnv);
1836 }
1837
1838 static void TestIMAP() {
1839     /* test input */
1840     static const uint8_t in[]={
1841         /* H - &Jjo- - ! &- &2AHcAQ- \ */
1842         0x48,
1843         0x2d,
1844         0x26, 0x4a, 0x6a, 0x6f,
1845         0x2d, 0x2d,
1846         0x21,
1847         0x26, 0x2d,
1848         0x26, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51, 0x2d
1849     };
1850
1851     /* expected test results */
1852     static const int32_t results[]={
1853         /* number of bytes read, code point */
1854         1, 0x48,
1855         1, 0x2d,
1856         4, 0x263a, /* <WHITE SMILING FACE> */
1857         2, 0x2d,
1858         1, 0x21,
1859         2, 0x26,
1860         7, 0x10401
1861     };
1862
1863     const char *cnvName;
1864     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1865     UErrorCode errorCode=U_ZERO_ERROR;
1866     UConverter *cnv=ucnv_open("IMAP-mailbox-name", &errorCode);
1867     if(U_FAILURE(errorCode)) {
1868         log_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_errorName(errorCode)); /* sholdn't be a data err */
1869         return;
1870     }
1871     TestNextUChar(cnv, source, limit, results, "IMAP-mailbox-name");
1872     /* Test the condition when source >= sourceLimit */
1873     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1874     cnvName = ucnv_getName(cnv, &errorCode);
1875     if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "IMAP-mailbox-name") != 0) {
1876         log_err("IMAP-mailbox-name converter is called %s: %s\n", cnvName, u_errorName(errorCode));
1877     }
1878     ucnv_close(cnv);
1879 }
1880
1881 static void TestUTF8() {
1882     /* test input */
1883     static const uint8_t in[]={
1884         0x61,
1885         0xc2, 0x80,
1886         0xe0, 0xa0, 0x80,
1887         0xf0, 0x90, 0x80, 0x80,
1888         0xf4, 0x84, 0x8c, 0xa1,
1889         0xf0, 0x90, 0x90, 0x81
1890     };
1891
1892     /* expected test results */
1893     static const int32_t results[]={
1894         /* number of bytes read, code point */
1895         1, 0x61,
1896         2, 0x80,
1897         3, 0x800,
1898         4, 0x10000,
1899         4, 0x104321,
1900         4, 0x10401
1901     };
1902
1903     /* error test input */
1904     static const uint8_t in2[]={
1905         0x61,
1906         0xc0, 0x80,                     /* illegal non-shortest form */
1907         0xe0, 0x80, 0x80,               /* illegal non-shortest form */
1908         0xf0, 0x80, 0x80, 0x80,         /* illegal non-shortest form */
1909         0xc0, 0xc0,                     /* illegal trail byte */
1910         0xf4, 0x90, 0x80, 0x80,         /* 0x110000 out of range */
1911         0xf8, 0x80, 0x80, 0x80, 0x80,   /* too long */
1912         0xfe,                           /* illegal byte altogether */
1913         0x62
1914     };
1915
1916     /* expected error test results */
1917     static const int32_t results2[]={
1918         /* number of bytes read, code point */
1919         1, 0x61,
1920         22, 0x62
1921     };
1922
1923     UConverterToUCallback cb;
1924     const void *p;
1925
1926     const char *source=(const char *)in,*limit=(const char *)in+sizeof(in);
1927     UErrorCode errorCode=U_ZERO_ERROR;
1928     UConverter *cnv=ucnv_open("UTF-8", &errorCode);
1929     if(U_FAILURE(errorCode)) {
1930         log_err("Unable to open a UTF-8 converter: %s\n", u_errorName(errorCode));
1931         return;
1932     }
1933     TestNextUChar(cnv, source, limit, results, "UTF-8");
1934     /* Test the condition when source >= sourceLimit */
1935     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1936
1937     /* test error behavior with a skip callback */
1938     ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
1939     source=(const char *)in2;
1940     limit=(const char *)(in2+sizeof(in2));
1941     TestNextUChar(cnv, source, limit, results2, "UTF-8");
1942
1943     ucnv_close(cnv);
1944 }
1945
1946 static void TestCESU8() {
1947     /* test input */
1948     static const uint8_t in[]={
1949         0x61,
1950         0xc2, 0x80,
1951         0xe0, 0xa0, 0x80,
1952         0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80,
1953         0xed, 0xb0, 0x81, 0xed, 0xa0, 0x82,
1954         0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf,
1955         0xef, 0xbf, 0xbc
1956     };
1957
1958     /* expected test results */
1959     static const int32_t results[]={
1960         /* number of bytes read, code point */
1961         1, 0x61,
1962         2, 0x80,
1963         3, 0x800,
1964         6, 0x10000,
1965         3, 0xdc01,
1966         -1,0xd802,  /* may read 3 or 6 bytes */
1967         -1,0x10ffff,/* may read 0 or 3 bytes */
1968         3, 0xfffc
1969     };
1970
1971     /* error test input */
1972     static const uint8_t in2[]={
1973         0x61,
1974         0xc0, 0x80,                     /* illegal non-shortest form */
1975         0xe0, 0x80, 0x80,               /* illegal non-shortest form */
1976         0xf0, 0x80, 0x80, 0x80,         /* illegal non-shortest form */
1977         0xc0, 0xc0,                     /* illegal trail byte */
1978         0xf0, 0x90, 0x80, 0x80,         /* illegal 4-byte supplementary code point */
1979         0xf4, 0x84, 0x8c, 0xa1,         /* illegal 4-byte supplementary code point */
1980         0xf0, 0x90, 0x90, 0x81,         /* illegal 4-byte supplementary code point */
1981         0xf4, 0x90, 0x80, 0x80,         /* 0x110000 out of range */
1982         0xf8, 0x80, 0x80, 0x80, 0x80,   /* too long */
1983         0xfe,                           /* illegal byte altogether */
1984         0x62
1985     };
1986
1987     /* expected error test results */
1988     static const int32_t results2[]={
1989         /* number of bytes read, code point */
1990         1, 0x61,
1991         34, 0x62
1992     };
1993
1994     UConverterToUCallback cb;
1995     const void *p;
1996
1997     const char *source=(const char *)in,*limit=(const char *)in+sizeof(in);
1998     UErrorCode errorCode=U_ZERO_ERROR;
1999     UConverter *cnv=ucnv_open("CESU-8", &errorCode);
2000     if(U_FAILURE(errorCode)) {
2001         log_err("Unable to open a CESU-8 converter: %s\n", u_errorName(errorCode));
2002         return;
2003     }
2004     TestNextUChar(cnv, source, limit, results, "CESU-8");
2005     /* Test the condition when source >= sourceLimit */
2006     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2007
2008     /* test error behavior with a skip callback */
2009     ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2010     source=(const char *)in2;
2011     limit=(const char *)(in2+sizeof(in2));
2012     TestNextUChar(cnv, source, limit, results2, "CESU-8");
2013
2014     ucnv_close(cnv);
2015 }
2016
2017 static void TestUTF16() {
2018     /* test input */
2019     static const uint8_t in1[]={
2020         0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff
2021     };
2022     static const uint8_t in2[]={
2023         0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff
2024     };
2025     static const uint8_t in3[]={
2026         0xfe, 0xfe, 0x4e, 0x00, 0xfe, 0xff, 0xd8, 0x40, 0xdc, 0x01
2027     };
2028
2029     /* expected test results */
2030     static const int32_t results1[]={
2031         /* number of bytes read, code point */
2032         4, 0x4e00,
2033         2, 0xfeff
2034     };
2035     static const int32_t results2[]={
2036         /* number of bytes read, code point */
2037         4, 0x004e,
2038         2, 0xfffe
2039     };
2040     static const int32_t results3[]={
2041         /* number of bytes read, code point */
2042         2, 0xfefe,
2043         2, 0x4e00,
2044         2, 0xfeff,
2045         4, 0x20001
2046     };
2047
2048     const char *source, *limit;
2049
2050     UErrorCode errorCode=U_ZERO_ERROR;
2051     UConverter *cnv=ucnv_open("UTF-16", &errorCode);
2052     if(U_FAILURE(errorCode)) {
2053         log_err("Unable to open a UTF-16 converter: %s\n", u_errorName(errorCode));
2054         return;
2055     }
2056
2057     source=(const char *)in1, limit=(const char *)in1+sizeof(in1);
2058     TestNextUChar(cnv, source, limit, results1, "UTF-16");
2059
2060     source=(const char *)in2, limit=(const char *)in2+sizeof(in2);
2061     ucnv_resetToUnicode(cnv);
2062     TestNextUChar(cnv, source, limit, results2, "UTF-16");
2063
2064     source=(const char *)in3, limit=(const char *)in3+sizeof(in3);
2065     ucnv_resetToUnicode(cnv);
2066     TestNextUChar(cnv, source, limit, results3, "UTF-16");
2067
2068     /* Test the condition when source >= sourceLimit */
2069     ucnv_resetToUnicode(cnv);
2070     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2071
2072     ucnv_close(cnv);
2073 }
2074
2075 static void TestUTF16BE() {
2076     /* test input */
2077     static const uint8_t in[]={
2078         0x00, 0x61,
2079         0x00, 0xc0,
2080         0x00, 0x31,
2081         0x00, 0xf4,
2082         0xce, 0xfe,
2083         0xd8, 0x01, 0xdc, 0x01
2084     };
2085
2086     /* expected test results */
2087     static const int32_t results[]={
2088         /* number of bytes read, code point */
2089         2, 0x61,
2090         2, 0xc0,
2091         2, 0x31,
2092         2, 0xf4,
2093         2, 0xcefe,
2094         4, 0x10401
2095     };
2096
2097     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2098     UErrorCode errorCode=U_ZERO_ERROR;
2099     UConverter *cnv=ucnv_open("utf-16be", &errorCode);
2100     if(U_FAILURE(errorCode)) {
2101         log_err("Unable to open a UTF16-BE converter: %s\n", u_errorName(errorCode));
2102         return;
2103     }
2104     TestNextUChar(cnv, source, limit, results, "UTF-16BE");
2105     /* Test the condition when source >= sourceLimit */
2106     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2107     /*Test for the condition where there is an invalid character*/
2108     {
2109         static const uint8_t source2[]={0x61};
2110         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2111         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
2112     }
2113 #if 0
2114     /*
2115      * Test disabled because currently the UTF-16BE/LE converters are supposed
2116      * to not set errors for unpaired surrogates.
2117      * This may change with
2118      * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2119      */
2120
2121     /*Test for the condition where there is a surrogate pair*/
2122     {
2123         const uint8_t source2[]={0xd8, 0x01};
2124         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
2125     }
2126 #endif
2127     ucnv_close(cnv);
2128 }
2129
2130 static void
2131 TestUTF16LE() {
2132     /* test input */
2133     static const uint8_t in[]={
2134         0x61, 0x00,
2135         0x31, 0x00,
2136         0x4e, 0x2e,
2137         0x4e, 0x00,
2138         0x01, 0xd8, 0x01, 0xdc
2139     };
2140
2141     /* expected test results */
2142     static const int32_t results[]={
2143         /* number of bytes read, code point */
2144         2, 0x61,
2145         2, 0x31,
2146         2, 0x2e4e,
2147         2, 0x4e,
2148         4, 0x10401
2149     };
2150
2151     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2152     UErrorCode errorCode=U_ZERO_ERROR;
2153     UConverter *cnv=ucnv_open("utf-16le", &errorCode);
2154     if(U_FAILURE(errorCode)) {
2155         log_err("Unable to open a UTF16-LE converter: %s\n", u_errorName(errorCode));
2156         return;
2157     }
2158     TestNextUChar(cnv, source, limit, results, "UTF-16LE");
2159     /* Test the condition when source >= sourceLimit */
2160     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2161     /*Test for the condition where there is an invalid character*/
2162     {
2163         static const uint8_t source2[]={0x61};
2164         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2165         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
2166     }
2167 #if 0
2168     /*
2169      * Test disabled because currently the UTF-16BE/LE converters are supposed
2170      * to not set errors for unpaired surrogates.
2171      * This may change with
2172      * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2173      */
2174
2175     /*Test for the condition where there is a surrogate character*/
2176     {
2177         static const uint8_t source2[]={0x01, 0xd8};
2178         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
2179     }
2180 #endif
2181
2182     ucnv_close(cnv);
2183 }
2184
2185 static void TestUTF32() {
2186     /* test input */
2187     static const uint8_t in1[]={
2188         0x00, 0x00, 0xfe, 0xff,   0x00, 0x10, 0x0f, 0x00,   0x00, 0x00, 0xfe, 0xff
2189     };
2190     static const uint8_t in2[]={
2191         0xff, 0xfe, 0x00, 0x00,   0x00, 0x10, 0x0f, 0x00,   0xfe, 0xff, 0x00, 0x00
2192     };
2193     static const uint8_t in3[]={
2194         0x00, 0x00, 0xfe, 0xfe,   0x00, 0x10, 0x0f, 0x00,   0x00, 0x00, 0xd8, 0x40,   0x00, 0x00, 0xdc, 0x01
2195     };
2196
2197     /* expected test results */
2198     static const int32_t results1[]={
2199         /* number of bytes read, code point */
2200         8, 0x100f00,
2201         4, 0xfeff
2202     };
2203     static const int32_t results2[]={
2204         /* number of bytes read, code point */
2205         8, 0x0f1000,
2206         4, 0xfffe
2207     };
2208     static const int32_t results3[]={
2209         /* number of bytes read, code point */
2210         4, 0xfefe,
2211         4, 0x100f00,
2212         4, 0xfffd, /* unmatched surrogate */
2213         4, 0xfffd  /* unmatched surrogate */
2214     };
2215
2216     const char *source, *limit;
2217
2218     UErrorCode errorCode=U_ZERO_ERROR;
2219     UConverter *cnv=ucnv_open("UTF-32", &errorCode);
2220     if(U_FAILURE(errorCode)) {
2221         log_err("Unable to open a UTF-32 converter: %s\n", u_errorName(errorCode));
2222         return;
2223     }
2224
2225     source=(const char *)in1, limit=(const char *)in1+sizeof(in1);
2226     TestNextUChar(cnv, source, limit, results1, "UTF-32");
2227
2228     source=(const char *)in2, limit=(const char *)in2+sizeof(in2);
2229     ucnv_resetToUnicode(cnv);
2230     TestNextUChar(cnv, source, limit, results2, "UTF-32");
2231
2232     source=(const char *)in3, limit=(const char *)in3+sizeof(in3);
2233     ucnv_resetToUnicode(cnv);
2234     TestNextUChar(cnv, source, limit, results3, "UTF-32");
2235
2236     /* Test the condition when source >= sourceLimit */
2237     ucnv_resetToUnicode(cnv);
2238     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2239
2240     ucnv_close(cnv);
2241 }
2242
2243 static void
2244 TestUTF32BE() {
2245     /* test input */
2246     static const uint8_t in[]={
2247         0x00, 0x00, 0x00, 0x61,
2248         0x00, 0x00, 0x30, 0x61,
2249         0x00, 0x00, 0xdc, 0x00,
2250         0x00, 0x00, 0xd8, 0x00,
2251         0x00, 0x00, 0xdf, 0xff,
2252         0x00, 0x00, 0xff, 0xfe,
2253         0x00, 0x10, 0xab, 0xcd,
2254         0x00, 0x10, 0xff, 0xff
2255     };
2256
2257     /* expected test results */
2258     static const int32_t results[]={
2259         /* number of bytes read, code point */
2260         4, 0x61,
2261         4, 0x3061,
2262         4, 0xfffd,
2263         4, 0xfffd,
2264         4, 0xfffd,
2265         4, 0xfffe,
2266         4, 0x10abcd,
2267         4, 0x10ffff
2268     };
2269
2270     /* error test input */
2271     static const uint8_t in2[]={
2272         0x00, 0x00, 0x00, 0x61,
2273         0x00, 0x11, 0x00, 0x00,         /* 0x110000 out of range */
2274         0x00, 0x00, 0x00, 0x62,
2275         0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
2276         0x7f, 0xff, 0xff, 0xff,         /* 0x7fffffff out of range */
2277         0x00, 0x00, 0x01, 0x62,
2278         0x00, 0x00, 0x02, 0x62
2279     };
2280
2281     /* expected error test results */
2282     static const int32_t results2[]={
2283         /* number of bytes read, code point */
2284         4,  0x61,
2285         8,  0x62,
2286         12, 0x162,
2287         4,  0x262
2288     };
2289
2290     UConverterToUCallback cb;
2291     const void *p;
2292
2293     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2294     UErrorCode errorCode=U_ZERO_ERROR;
2295     UConverter *cnv=ucnv_open("UTF-32BE", &errorCode);
2296     if(U_FAILURE(errorCode)) {
2297         log_err("Unable to open a UTF-32BE converter: %s\n", u_errorName(errorCode));
2298         return;
2299     }
2300     TestNextUChar(cnv, source, limit, results, "UTF-32BE");
2301
2302     /* Test the condition when source >= sourceLimit */
2303     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2304
2305     /* test error behavior with a skip callback */
2306     ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2307     source=(const char *)in2;
2308     limit=(const char *)(in2+sizeof(in2));
2309     TestNextUChar(cnv, source, limit, results2, "UTF-32BE");
2310
2311     ucnv_close(cnv);
2312 }
2313
2314 static void
2315 TestUTF32LE() {
2316     /* test input */
2317     static const uint8_t in[]={
2318         0x61, 0x00, 0x00, 0x00,
2319         0x61, 0x30, 0x00, 0x00,
2320         0x00, 0xdc, 0x00, 0x00,
2321         0x00, 0xd8, 0x00, 0x00,
2322         0xff, 0xdf, 0x00, 0x00,
2323         0xfe, 0xff, 0x00, 0x00,
2324         0xcd, 0xab, 0x10, 0x00,
2325         0xff, 0xff, 0x10, 0x00
2326     };
2327
2328     /* expected test results */
2329     static const int32_t results[]={
2330         /* number of bytes read, code point */
2331         4, 0x61,
2332         4, 0x3061,
2333         4, 0xfffd,
2334         4, 0xfffd,
2335         4, 0xfffd,
2336         4, 0xfffe,
2337         4, 0x10abcd,
2338         4, 0x10ffff
2339     };
2340
2341     /* error test input */
2342     static const uint8_t in2[]={
2343         0x61, 0x00, 0x00, 0x00,
2344         0x00, 0x00, 0x11, 0x00,         /* 0x110000 out of range */
2345         0x62, 0x00, 0x00, 0x00,
2346         0xff, 0xff, 0xff, 0xff,         /* 0xffffffff out of range */
2347         0xff, 0xff, 0xff, 0x7f,         /* 0x7fffffff out of range */
2348         0x62, 0x01, 0x00, 0x00,
2349         0x62, 0x02, 0x00, 0x00,
2350     };
2351
2352     /* expected error test results */
2353     static const int32_t results2[]={
2354         /* number of bytes read, code point */
2355         4,  0x61,
2356         8,  0x62,
2357         12, 0x162,
2358         4,  0x262,
2359     };
2360
2361     UConverterToUCallback cb;
2362     const void *p;
2363
2364     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2365     UErrorCode errorCode=U_ZERO_ERROR;
2366     UConverter *cnv=ucnv_open("UTF-32LE", &errorCode);
2367     if(U_FAILURE(errorCode)) {
2368         log_err("Unable to open a UTF-32LE converter: %s\n", u_errorName(errorCode));
2369         return;
2370     }
2371     TestNextUChar(cnv, source, limit, results, "UTF-32LE");
2372
2373     /* Test the condition when source >= sourceLimit */
2374     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2375
2376     /* test error behavior with a skip callback */
2377     ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2378     source=(const char *)in2;
2379     limit=(const char *)(in2+sizeof(in2));
2380     TestNextUChar(cnv, source, limit, results2, "UTF-32LE");
2381
2382     ucnv_close(cnv);
2383 }
2384
2385 static void
2386 TestLATIN1() {
2387     /* test input */
2388     static const uint8_t in[]={
2389        0x61,
2390        0x31,
2391        0x32,
2392        0xc0,
2393        0xf0,
2394        0xf4,
2395     };
2396
2397     /* expected test results */
2398     static const int32_t results[]={
2399         /* number of bytes read, code point */
2400         1, 0x61,
2401         1, 0x31,
2402         1, 0x32,
2403         1, 0xc0,
2404         1, 0xf0,
2405         1, 0xf4,
2406     };
2407     static const uint16_t in1[] = {
2408         0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2409         0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2410         0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2411         0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2412         0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2413         0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2414         0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2415         0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2416         0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2417         0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2418         0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2419         0xcb, 0x82
2420     };
2421     static const uint8_t out1[] = {
2422         0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2423         0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2424         0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2425         0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2426         0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2427         0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2428         0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2429         0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2430         0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2431         0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2432         0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2433         0xcb, 0x82
2434     };
2435     static const uint16_t in2[]={
2436         0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2437         0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2438         0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2439         0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2440         0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2441         0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2442         0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2443         0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2444         0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2445         0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2446         0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2447         0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2448         0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2449         0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2450         0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2451         0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2452         0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2453         0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2454         0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2455         0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2456         0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2457         0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2458         0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2459         0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2460         0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2461         0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2462         0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2463         0x37, 0x20, 0x2A, 0x2F,
2464     };
2465     static const unsigned char out2[]={
2466         0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2467         0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2468         0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2469         0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2470         0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2471         0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2472         0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2473         0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2474         0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2475         0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2476         0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2477         0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2478         0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2479         0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2480         0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2481         0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2482         0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2483         0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2484         0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2485         0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2486         0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2487         0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2488         0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2489         0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2490         0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2491         0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2492         0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2493         0x37, 0x20, 0x2A, 0x2F,
2494     };
2495     const char *source=(const char *)in;
2496     const char *limit=(const char *)in+sizeof(in);
2497
2498     UErrorCode errorCode=U_ZERO_ERROR;
2499     UConverter *cnv=ucnv_open("LATIN_1", &errorCode);
2500     if(U_FAILURE(errorCode)) {
2501         log_data_err("Unable to open a LATIN_1 converter: %s\n", u_errorName(errorCode));
2502         return;
2503     }
2504     TestNextUChar(cnv, source, limit, results, "LATIN_1");
2505     /* Test the condition when source >= sourceLimit */
2506     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2507     TestConv((uint16_t*)in1,sizeof(in1)/2,"LATIN_1","LATIN-1",(char*)out1,sizeof(out1));
2508     TestConv((uint16_t*)in2,sizeof(in2)/2,"ASCII","ASCII",(char*)out2,sizeof(out2));
2509
2510     ucnv_close(cnv);
2511 }
2512
2513 static void
2514 TestSBCS() {
2515     /* test input */
2516     static const uint8_t in[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4};
2517     /* expected test results */
2518     static const int32_t results[]={
2519         /* number of bytes read, code point */
2520         1, 0x61,
2521         1, 0xbf,
2522         1, 0xc4,
2523         1, 0x2021,
2524         1, 0xf8ff,
2525         1, 0x00d9
2526     };
2527
2528     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2529     UErrorCode errorCode=U_ZERO_ERROR;
2530     UConverter *cnv=ucnv_open("x-mac-turkish", &errorCode);
2531     if(U_FAILURE(errorCode)) {
2532         log_data_err("Unable to open a SBCS(x-mac-turkish) converter: %s\n", u_errorName(errorCode));
2533         return;
2534     }
2535     TestNextUChar(cnv, source, limit, results, "SBCS(x-mac-turkish)");
2536     /* Test the condition when source >= sourceLimit */
2537     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2538     /*Test for Illegal character */ /*
2539     {
2540     static const uint8_t input1[]={ 0xA1 };
2541     const char* illegalsource=(const char*)input1;
2542     TestNextUCharError(cnv, illegalsource, illegalsource+sizeof(illegalsource), U_INVALID_CHAR_FOUND, "source has a illegal characte");
2543     }
2544    */
2545     ucnv_close(cnv);
2546 }
2547
2548 static void
2549 TestDBCS() {
2550     /* test input */
2551     static const uint8_t in[]={
2552         0x44, 0x6a,
2553         0xc4, 0x9c,
2554         0x7a, 0x74,
2555         0x46, 0xab,
2556         0x42, 0x5b,
2557
2558     };
2559
2560     /* expected test results */
2561     static const int32_t results[]={
2562         /* number of bytes read, code point */
2563         2, 0x00a7,
2564         2, 0xe1d2,
2565         2, 0x6962,
2566         2, 0xf842,
2567         2, 0xffe5,
2568     };
2569
2570     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2571     UErrorCode errorCode=U_ZERO_ERROR;
2572
2573     UConverter *cnv=my_ucnv_open("@ibm9027", &errorCode);
2574     if(U_FAILURE(errorCode)) {
2575         log_data_err("Unable to open a DBCS(@ibm9027) converter: %s\n", u_errorName(errorCode));
2576         return;
2577     }
2578     TestNextUChar(cnv, source, limit, results, "DBCS(@ibm9027)");
2579     /* Test the condition when source >= sourceLimit */
2580     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2581     /*Test for the condition where there is an invalid character*/
2582     {
2583         static const uint8_t source2[]={0x1a, 0x1b};
2584         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
2585     }
2586     /*Test for the condition where we have a truncated char*/
2587     {
2588         static const uint8_t source1[]={0xc4};
2589         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2590         TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2591     }
2592     ucnv_close(cnv);
2593 }
2594
2595 static void
2596 TestMBCS() {
2597     /* test input */
2598     static const uint8_t in[]={
2599         0x01,
2600         0xa6, 0xa3,
2601         0x00,
2602         0xa6, 0xa1,
2603         0x08,
2604         0xc2, 0x76,
2605         0xc2, 0x78,
2606
2607     };
2608
2609     /* expected test results */
2610     static const int32_t results[]={
2611         /* number of bytes read, code point */
2612         1, 0x0001,
2613         2, 0x250c,
2614         1, 0x0000,
2615         2, 0x2500,
2616         1, 0x0008,
2617         2, 0xd60c,
2618         2, 0xd60e,
2619     };
2620
2621     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2622     UErrorCode errorCode=U_ZERO_ERROR;
2623
2624     UConverter *cnv=ucnv_open("ibm-1363", &errorCode);
2625     if(U_FAILURE(errorCode)) {
2626         log_data_err("Unable to open a MBCS(ibm-1363) converter: %s\n", u_errorName(errorCode));
2627         return;
2628     }
2629     TestNextUChar(cnv, source, limit, results, "MBCS(ibm-1363)");
2630     /* Test the condition when source >= sourceLimit */
2631     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2632     /*Test for the condition where there is an invalid character*/
2633     {
2634         static const uint8_t source2[]={0xa1, 0x80};
2635         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
2636     }
2637     /*Test for the condition where we have a truncated char*/
2638     {
2639         static const uint8_t source1[]={0xc4};
2640         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2641         TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2642     }
2643     ucnv_close(cnv);
2644
2645 }
2646
2647 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
2648 static void
2649 TestICCRunout() {
2650 /*    { "ibm-1363", :bin{ a2aea2 }, "\u00a1\u001a", :intvector{ 0, 2 }, :int{1}, :int{0}, "\", "?", :bin{""} } */
2651
2652     const char *cnvName = "ibm-1363";
2653     UErrorCode status = U_ZERO_ERROR;
2654     const char sourceData[] = { (char)0xa2, (char)0xae, (char)0xa2 };
2655     /* UChar   expectUData[] = { 0x00a1, 0x001a }; */
2656     const char *source = sourceData;
2657     const char *sourceLim = sourceData+sizeof(sourceData);
2658     UChar c1, c2, c3;
2659     UConverter *cnv=ucnv_open(cnvName, &status);
2660     if(U_FAILURE(status)) {
2661         log_data_err("Unable to open %s converter: %s\n", cnvName, u_errorName(status));
2662         return;
2663     }
2664
2665 #if 0
2666     {
2667     UChar   targetBuf[256];
2668     UChar   *target = targetBuf;
2669     UChar   *targetLim = target+256;
2670     ucnv_toUnicode(cnv, &target, targetLim, &source, sourceLim, NULL, TRUE, &status);
2671
2672     log_info("After convert: target@%d, source@%d, status%s\n",
2673              target-targetBuf, source-sourceData, u_errorName(status));
2674
2675     if(U_FAILURE(status)) {
2676         log_err("Failed to convert: %s\n", u_errorName(status));
2677     } else {
2678
2679     }
2680     }
2681 #endif
2682
2683     c1=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2684     log_verbose("c1: U+%04X, source@%d, status %s\n", c1, source-sourceData, u_errorName(status));
2685
2686     c2=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2687     log_verbose("c2: U+%04X, source@%d, status %s\n", c2, source-sourceData, u_errorName(status));
2688
2689     c3=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2690     log_verbose("c3: U+%04X, source@%d, status %s\n", c3, source-sourceData, u_errorName(status));
2691
2692     if(status==U_INDEX_OUTOFBOUNDS_ERROR && c3==0xFFFF) {
2693         log_verbose("OK\n");
2694     } else {
2695         log_err("FAIL: c3 was not FFFF or err was not U_INDEXOUTOFBOUNDS_ERROR\n");
2696     }
2697
2698     ucnv_close(cnv);
2699
2700 }
2701 #endif
2702
2703 #ifdef U_ENABLE_GENERIC_ISO_2022
2704
2705 static void
2706 TestISO_2022() {
2707     /* test input */
2708     static const uint8_t in[]={
2709         0x1b, 0x25, 0x42,
2710         0x31,
2711         0x32,
2712         0x61,
2713         0xc2, 0x80,
2714         0xe0, 0xa0, 0x80,
2715         0xf0, 0x90, 0x80, 0x80
2716     };
2717
2718
2719
2720     /* expected test results */
2721     static const int32_t results[]={
2722         /* number of bytes read, code point */
2723         4, 0x0031,  /* 4 bytes including the escape sequence */
2724         1, 0x0032,
2725         1, 0x61,
2726         2, 0x80,
2727         3, 0x800,
2728         4, 0x10000
2729     };
2730
2731     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2732     UErrorCode errorCode=U_ZERO_ERROR;
2733     UConverter *cnv;
2734
2735     cnv=ucnv_open("ISO_2022", &errorCode);
2736     if(U_FAILURE(errorCode)) {
2737         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
2738         return;
2739     }
2740     TestNextUChar(cnv, source, limit, results, "ISO_2022");
2741
2742     /* Test the condition when source >= sourceLimit */
2743     TestNextUCharError(cnv, source, source-1, U_ILLEGAL_ARGUMENT_ERROR, "sourceLimit < source");
2744     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2745     /*Test for the condition where we have a truncated char*/
2746     {
2747         static const uint8_t source1[]={0xc4};
2748         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2749         TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2750     }
2751     /*Test for the condition where there is an invalid character*/
2752     {
2753         static const uint8_t source2[]={0xa1, 0x01};
2754         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_CHAR_FOUND, "an invalid character");
2755     }
2756     ucnv_close(cnv);
2757 }
2758
2759 #endif
2760
2761 static void
2762 TestSmallTargetBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2763     const UChar* uSource;
2764     const UChar* uSourceLimit;
2765     const char* cSource;
2766     const char* cSourceLimit;
2767     UChar *uTargetLimit =NULL;
2768     UChar *uTarget;
2769     char *cTarget;
2770     const char *cTargetLimit;
2771     char *cBuf;
2772     UChar *uBuf; /*,*test;*/
2773     int32_t uBufSize = 120;
2774     int len=0;
2775     int i=2;
2776     UErrorCode errorCode=U_ZERO_ERROR;
2777     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2778     cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
2779     ucnv_reset(cnv);
2780     for(;--i>0; ){
2781         uSource = (UChar*) source;
2782         uSourceLimit=(const UChar*)sourceLimit;
2783         cTarget = cBuf;
2784         uTarget = uBuf;
2785         cSource = cBuf;
2786         cTargetLimit = cBuf;
2787         uTargetLimit = uBuf;
2788
2789         do{
2790
2791             cTargetLimit = cTargetLimit+ i;
2792             ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode);
2793             if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2794                errorCode=U_ZERO_ERROR;
2795                 continue;
2796             }
2797
2798             if(U_FAILURE(errorCode)){
2799                 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2800                 return;
2801             }
2802
2803         }while (uSource<uSourceLimit);
2804
2805         cSourceLimit =cTarget;
2806         do{
2807             uTargetLimit=uTargetLimit+i;
2808             ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode);
2809             if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2810                errorCode=U_ZERO_ERROR;
2811                 continue;
2812             }
2813             if(U_FAILURE(errorCode)){
2814                    log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2815                     return;
2816             }
2817         }while(cSource<cSourceLimit);
2818
2819         uSource = source;
2820         /*test =uBuf;*/
2821         for(len=0;len<(int)(source - sourceLimit);len++){
2822             if(uBuf[len]!=uSource[len]){
2823                 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
2824             }
2825         }
2826     }
2827     free(uBuf);
2828     free(cBuf);
2829 }
2830 /* Test for Jitterbug 778 */
2831 static void TestToAndFromUChars(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2832     const UChar* uSource;
2833     const UChar* uSourceLimit;
2834     const char* cSource;
2835     UChar *uTargetLimit =NULL;
2836     UChar *uTarget;
2837     char *cTarget;
2838     const char *cTargetLimit;
2839     char *cBuf;
2840     UChar *uBuf,*test;
2841     int32_t uBufSize = 120;
2842     int numCharsInTarget=0;
2843     UErrorCode errorCode=U_ZERO_ERROR;
2844     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2845     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
2846     uSource = source;
2847     uSourceLimit=sourceLimit;
2848     cTarget = cBuf;
2849     cTargetLimit = cBuf +uBufSize*5;
2850     uTarget = uBuf;
2851     uTargetLimit = uBuf+ uBufSize*5;
2852     ucnv_reset(cnv);
2853     numCharsInTarget=ucnv_fromUChars(cnv, cTarget, (int32_t)(cTargetLimit-cTarget), uSource, (int32_t)(uSourceLimit-uSource), &errorCode);
2854     if(U_FAILURE(errorCode)){
2855         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
2856         return;
2857     }
2858     cSource = cBuf;
2859     test =uBuf;
2860     ucnv_toUChars(cnv,uTarget,(int32_t)(uTargetLimit-uTarget),cSource,numCharsInTarget,&errorCode);
2861     if(U_FAILURE(errorCode)){
2862         log_err("ucnv_toUChars conversion failed, reason %s\n", u_errorName(errorCode));
2863         return;
2864     }
2865     uSource = source;
2866     while(uSource<uSourceLimit){
2867         if(*test!=*uSource){
2868
2869             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
2870         }
2871         uSource++;
2872         test++;
2873     }
2874     free(uBuf);
2875     free(cBuf);
2876 }
2877
2878 static void TestSmallSourceBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2879     const UChar* uSource;
2880     const UChar* uSourceLimit;
2881     const char* cSource;
2882     const char* cSourceLimit;
2883     UChar *uTargetLimit =NULL;
2884     UChar *uTarget;
2885     char *cTarget;
2886     const char *cTargetLimit;
2887     char *cBuf;
2888     UChar *uBuf; /*,*test;*/
2889     int32_t uBufSize = 120;
2890     int len=0;
2891     int i=2;
2892     const UChar *temp = sourceLimit;
2893     UErrorCode errorCode=U_ZERO_ERROR;
2894     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2895     cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
2896
2897     ucnv_reset(cnv);
2898     for(;--i>0;){
2899         uSource = (UChar*) source;
2900         cTarget = cBuf;
2901         uTarget = uBuf;
2902         cSource = cBuf;
2903         cTargetLimit = cBuf;
2904         uTargetLimit = uBuf+uBufSize*5;
2905         cTargetLimit = cTargetLimit+uBufSize*10;
2906         uSourceLimit=uSource;
2907         do{
2908
2909             if (uSourceLimit < sourceLimit) {
2910                 uSourceLimit = uSourceLimit+1;
2911             }
2912             ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode);
2913             if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2914                errorCode=U_ZERO_ERROR;
2915                 continue;
2916             }
2917
2918             if(U_FAILURE(errorCode)){
2919                 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2920                 return;
2921             }
2922
2923         }while (uSource<temp);
2924
2925         cSourceLimit =cBuf;
2926         do{
2927             if (cSourceLimit < cBuf + (cTarget - cBuf)) {
2928                 cSourceLimit = cSourceLimit+1;
2929             }
2930             ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode);
2931             if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2932                errorCode=U_ZERO_ERROR;
2933                 continue;
2934             }
2935             if(U_FAILURE(errorCode)){
2936                    log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2937                     return;
2938             }
2939         }while(cSource<cTarget);
2940
2941         uSource = source;
2942         /*test =uBuf;*/
2943         for(;len<(int)(source - sourceLimit);len++){
2944             if(uBuf[len]!=uSource[len]){
2945                 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
2946             }
2947         }
2948     }
2949     free(uBuf);
2950     free(cBuf);
2951 }
2952 static void
2953 TestGetNextUChar2022(UConverter* cnv, const char* source, const char* limit,
2954                      const uint16_t results[], const char* message){
2955 /*     const char* s0; */
2956      const char* s=(char*)source;
2957      const uint16_t *r=results;
2958      UErrorCode errorCode=U_ZERO_ERROR;
2959      uint32_t c,exC;
2960      ucnv_reset(cnv);
2961      while(s<limit) {
2962          /* s0=s; */
2963         c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
2964         if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
2965             break; /* no more significant input */
2966         } else if(U_FAILURE(errorCode)) {
2967             log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
2968             break;
2969         } else {
2970             if(UTF_IS_FIRST_SURROGATE(*r)){
2971                 int i =0, len = 2;
2972                 UTF_NEXT_CHAR_SAFE(r, i, len, exC, FALSE);
2973                 r++;
2974             }else{
2975                 exC = *r;
2976             }
2977             if(c!=(uint32_t)(exC))
2978                 log_err("%s ucnv_getNextUChar() Expected:  \\u%04X Got:  \\u%04X \n",message,(uint32_t) (*r),c);
2979         }
2980         r++;
2981     }
2982 }
2983
2984 static int TestJitterbug930(const char* enc){
2985     UErrorCode err = U_ZERO_ERROR;
2986     UConverter*converter;
2987     char out[80];
2988     char*target = out;
2989     UChar in[4];
2990     const UChar*source = in;
2991     int32_t off[80];
2992     int32_t* offsets = off;
2993     int numOffWritten=0;
2994     UBool flush = 0;
2995     converter = my_ucnv_open(enc, &err);
2996
2997     in[0] = 0x41;     /* 0x4E00;*/
2998     in[1] = 0x4E01;
2999     in[2] = 0x4E02;
3000     in[3] = 0x4E03;
3001
3002     memset(off, '*', sizeof(off));
3003
3004     ucnv_fromUnicode (converter,
3005             &target,
3006             target+2,
3007             &source,
3008             source+3,
3009             offsets,
3010             flush,
3011             &err);
3012
3013         /* writes three bytes into the output buffer: 41 1B 24
3014         * but offsets contains 0 1 1
3015     */
3016     while(*offsets< off[10]){
3017         numOffWritten++;
3018         offsets++;
3019     }
3020     log_verbose("Testing Jitterbug 930 for encoding %s",enc);
3021     if(numOffWritten!= (int)(target-out)){
3022         log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc, (int)(target-out),numOffWritten);
3023     }
3024
3025     err = U_ZERO_ERROR;
3026
3027     memset(off,'*' , sizeof(off));
3028
3029     flush = 1;
3030     offsets=off;
3031     ucnv_fromUnicode (converter,
3032             &target,
3033             target+4,
3034             &source,
3035             source,
3036             offsets,
3037             flush,
3038             &err);
3039     numOffWritten=0;
3040     while(*offsets< off[10]){
3041         numOffWritten++;
3042         if(*offsets!= -1){
3043             log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc,-1,*offsets) ;
3044         }
3045         offsets++;
3046     }
3047
3048     /* writes 42 43 7A into output buffer,
3049      * offsets contains -1 -1 -1
3050      */
3051     ucnv_close(converter);
3052     return 0;
3053 }
3054
3055 static void
3056 TestHZ() {
3057     /* test input */
3058     static const uint16_t in[]={
3059             0x3000, 0x3001, 0x3002, 0x00B7, 0x02C9, 0x02C7, 0x00A8, 0x3003, 0x3005, 0x2014,
3060             0xFF5E, 0x2016, 0x2026, 0x007E, 0x997C, 0x70B3, 0x75C5, 0x5E76, 0x73BB, 0x83E0,
3061             0x64AD, 0x62E8, 0x94B5, 0x000A, 0x6CE2, 0x535A, 0x52C3, 0x640F, 0x94C2, 0x7B94,
3062             0x4F2F, 0x5E1B, 0x8236, 0x000A, 0x8116, 0x818A, 0x6E24, 0x6CCA, 0x9A73, 0x6355,
3063             0x535C, 0x54FA, 0x8865, 0x000A, 0x57E0, 0x4E0D, 0x5E03, 0x6B65, 0x7C3F, 0x90E8,
3064             0x6016, 0x248F, 0x2490, 0x000A, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496,
3065             0x2497, 0x2498, 0x2499, 0x000A, 0x249A, 0x249B, 0x2474, 0x2475, 0x2476, 0x2477,
3066             0x2478, 0x2479, 0x247A, 0x000A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F, 0x2480,
3067             0x2481, 0x2482, 0x2483, 0x000A, 0x0041, 0x0043, 0x0044, 0x0045, 0x0046, 0x007E,
3068             0x0048, 0x0049, 0x004A, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050,
3069             0x0051, 0x0052, 0x0053, 0x000A, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059,
3070             0x005A, 0x005B, 0x005C, 0x000A
3071       };
3072     const UChar* uSource;
3073     const UChar* uSourceLimit;
3074     const char* cSource;
3075     const char* cSourceLimit;
3076     UChar *uTargetLimit =NULL;
3077     UChar *uTarget;
3078     char *cTarget;
3079     const char *cTargetLimit;
3080     char *cBuf;
3081     UChar *uBuf,*test;
3082     int32_t uBufSize = 120;
3083     UErrorCode errorCode=U_ZERO_ERROR;
3084     UConverter *cnv;
3085     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3086     int32_t* myOff= offsets;
3087     cnv=ucnv_open("HZ", &errorCode);
3088     if(U_FAILURE(errorCode)) {
3089         log_data_err("Unable to open HZ converter: %s\n", u_errorName(errorCode));
3090         return;
3091     }
3092
3093     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3094     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3095     uSource = (const UChar*)in;
3096     uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
3097     cTarget = cBuf;
3098     cTargetLimit = cBuf +uBufSize*5;
3099     uTarget = uBuf;
3100     uTargetLimit = uBuf+ uBufSize*5;
3101     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3102     if(U_FAILURE(errorCode)){
3103         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3104         return;
3105     }
3106     cSource = cBuf;
3107     cSourceLimit =cTarget;
3108     test =uBuf;
3109     myOff=offsets;
3110     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3111     if(U_FAILURE(errorCode)){
3112         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3113         return;
3114     }
3115     uSource = (const UChar*)in;
3116     while(uSource<uSourceLimit){
3117         if(*test!=*uSource){
3118
3119             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3120         }
3121         uSource++;
3122         test++;
3123     }
3124     TestGetNextUChar2022(cnv, cBuf, cTarget, in, "HZ encoding");
3125     TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3126     TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3127     TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3128     TestJitterbug930("csISO2022JP");
3129     ucnv_close(cnv);
3130     free(offsets);
3131     free(uBuf);
3132     free(cBuf);
3133 }
3134
3135 static void
3136 TestISCII(){
3137         /* test input */
3138     static const uint16_t in[]={
3139         /* test full range of Devanagari */
3140         0x0901,0x0902,0x0903,0x0905,0x0906,0x0907,0x0908,0x0909,0x090A,
3141         0x090B,0x090E,0x090F,0x0910,0x090D,0x0912,0x0913,0x0914,0x0911,
3142         0x0915,0x0916,0x0917,0x0918,0x0919,0x091A,0x091B,0x091C,0x091D,
3143         0x091E,0x091F,0x0920,0x0921,0x0922,0x0923,0x0924,0x0925,0x0926,
3144         0x0927,0x0928,0x0929,0x092A,0x092B,0x092C,0x092D,0x092E,0x092F,
3145         0x095F,0x0930,0x0931,0x0932,0x0933,0x0934,0x0935,0x0936,0x0937,
3146         0x0938,0x0939,0x200D,0x093E,0x093F,0x0940,0x0941,0x0942,0x0943,
3147         0x0946,0x0947,0x0948,0x0945,0x094A,0x094B,0x094C,0x0949,0x094D,
3148         0x093d,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C,
3149         0x096D,0x096E,0x096F,
3150         /* test Soft halant*/
3151         0x0915,0x094d, 0x200D,
3152         /* test explicit halant */
3153         0x0915,0x094d, 0x200c,
3154         /* test double danda */
3155         0x965,
3156         /* test ASCII */
3157         0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3158         0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3159         0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3160         /* tests from Lotus */
3161         0x0061,0x0915,0x000D,0x000A,0x0996,0x0043,
3162         0x0930,0x094D,0x200D,
3163         0x0901,0x000D,0x000A,0x0905,0x0985,0x0043,
3164         0x0915,0x0921,0x002B,0x095F,
3165         /* tamil range */
3166         0x0B86, 0xB87, 0xB88,
3167         /* telugu range */
3168         0x0C05, 0x0C02, 0x0C03,0x0c31,
3169         /* kannada range */
3170         0x0C85, 0xC82, 0x0C83,
3171         /* test Abbr sign and Anudatta */
3172         0x0970, 0x952,
3173        /* 0x0958,
3174         0x0959,
3175         0x095A,
3176         0x095B,
3177         0x095C,
3178         0x095D,
3179         0x095E,
3180         0x095F,*/
3181         0x0960 /* Vocallic RRI 0xAB, 0xE9*/,
3182         0x0944 /* Vowel Sign Vocallic RRI 0xDF, 0xE9 */,
3183         0x090C ,
3184         0x0962,
3185         0x0961 /* Vocallic LL 0xa6, 0xE9 */,
3186         0x0963 /* Vowel Sign Vocallic LL 0xdb, 0xE9, */,
3187         0x0950 /* OM Symbol 0xa1, 0xE9,*/,
3188         0x093D /* Avagraha  0xEA, 0xE9*/,
3189         0x0958,
3190         0x0959,
3191         0x095A,
3192         0x095B,
3193         0x095C,
3194         0x095D,
3195         0x095E,
3196         0x0020, 0x094D, 0x0930, 0x0000, 0x00A0
3197       };
3198     static const unsigned char byteArr[]={
3199
3200         0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,
3201         0xaa,0xab,0xac,0xad,0xae,0xaf,0xb0,0xb1,0xb2,
3202         0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,
3203         0xbc,0xbd,0xbe,0xbf,0xc0,0xc1,0xc2,0xc3,0xc4,
3204         0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,
3205         0xce,0xcf,0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,
3206         0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
3207         0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,
3208         0xea,0xe9,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,
3209         0xf8,0xf9,0xfa,
3210         /* test soft halant */
3211         0xb3, 0xE8, 0xE9,
3212         /* test explicit halant */
3213         0xb3, 0xE8, 0xE8,
3214         /* test double danda */
3215         0xea, 0xea,
3216         /* test ASCII */
3217         0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3218         0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3219         0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3220         /* test ATR code */
3221
3222         /* tests from Lotus */
3223         0x61,0xEF,0x42,0xEF,0x30,0xB3,0x0D,0x0A,0xEF,0x43,0xB4,0x43,
3224         0xEF,0x42,0xCF,0xE8,0xD9,
3225         0xEF,0x42,0xA1,0x0D,0x0A,0xEF,0x42,0xA4,0xEF,0x43,0xA4,0x43,
3226         0xEF,0x42,0xB3,0xBF,0x2B,0xEF,0x42,0xCE,
3227         /* tamil range */
3228         0xEF, 0x44, 0xa5, 0xa6, 0xa7,
3229         /* telugu range */
3230         0xEF, 0x45,0xa4, 0xa2, 0xa3,0xd0,
3231         /* kannada range */
3232         0xEF, 0x48,0xa4, 0xa2, 0xa3,
3233         /* anudatta and abbreviation sign */
3234         0xEF, 0x42, 0xF0, 0xBF, 0xF0, 0xB8,
3235
3236
3237         0xAA, 0xE9,/* RI + NUKTA 0x0960*/
3238
3239         0xDF, 0xE9,/* Vowel sign RI + NUKTA 0x0944*/
3240
3241         0xa6, 0xE9,/* Vowel I + NUKTA 0x090C*/
3242
3243         0xdb, 0xE9,/* Vowel sign I + Nukta 0x0962*/
3244
3245         0xa7, 0xE9,/* Vowel II + NUKTA 0x0961*/
3246
3247         0xdc, 0xE9,/* Vowel sign II + Nukta 0x0963*/
3248
3249         0xa1, 0xE9,/* chandrabindu + Nukta 0x0950*/
3250
3251         0xEA, 0xE9, /* Danda + Nukta 0x093D*/
3252
3253         0xB3, 0xE9, /* Ka + NUKTA */
3254
3255         0xB4, 0xE9, /* Kha + NUKTA */
3256
3257         0xB5, 0xE9, /* Ga + NUKTA */
3258
3259         0xBA, 0xE9,
3260
3261         0xBF, 0xE9,
3262
3263         0xC0, 0xE9,
3264
3265         0xC9, 0xE9,
3266         /* INV halant RA    */
3267         0xD9, 0xE8, 0xCF,
3268         0x00, 0x00A0,
3269         /* just consume unhandled codepoints */
3270         0xEF, 0x30,
3271
3272     };
3273     testConvertToU(byteArr,(sizeof(byteArr)),in,(sizeof(in)/U_SIZEOF_UCHAR),"x-iscii-de",NULL,TRUE);
3274     TestConv(in,(sizeof(in)/2),"ISCII,version=0","hindi", (char *)byteArr,sizeof(byteArr));
3275
3276 }
3277
3278 static void
3279 TestISO_2022_JP() {
3280     /* test input */
3281     static const uint16_t in[]={
3282         0x0041,/*0x00E9,*/0x3000, 0x3001, 0x3002, 0x0020, 0x000D, 0x000A,
3283         0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3284         0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3285         0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3286         0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3287         0x201D, 0x3014, 0x000D, 0x000A,
3288         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3289         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3290         };
3291     const UChar* uSource;
3292     const UChar* uSourceLimit;
3293     const char* cSource;
3294     const char* cSourceLimit;
3295     UChar *uTargetLimit =NULL;
3296     UChar *uTarget;
3297     char *cTarget;
3298     const char *cTargetLimit;
3299     char *cBuf;
3300     UChar *uBuf,*test;
3301     int32_t uBufSize = 120;
3302     UErrorCode errorCode=U_ZERO_ERROR;
3303     UConverter *cnv;
3304     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3305     int32_t* myOff= offsets;
3306     cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
3307     if(U_FAILURE(errorCode)) {
3308         log_data_err("Unable to open an ISO_2022_JP_1 converter: %s\n", u_errorName(errorCode));
3309         return;
3310     }
3311
3312     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3313     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3314     uSource = (const UChar*)in;
3315     uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
3316     cTarget = cBuf;
3317     cTargetLimit = cBuf +uBufSize*5;
3318     uTarget = uBuf;
3319     uTargetLimit = uBuf+ uBufSize*5;
3320     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3321     if(U_FAILURE(errorCode)){
3322         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3323         return;
3324     }
3325     cSource = cBuf;
3326     cSourceLimit =cTarget;
3327     test =uBuf;
3328     myOff=offsets;
3329     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3330     if(U_FAILURE(errorCode)){
3331         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3332         return;
3333     }
3334
3335     uSource = (const UChar*)in;
3336     while(uSource<uSourceLimit){
3337         if(*test!=*uSource){
3338
3339             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3340         }
3341         uSource++;
3342         test++;
3343     }
3344
3345     TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3346     TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3347     TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-JP encoding");
3348     TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3349     TestJitterbug930("csISO2022JP");
3350     ucnv_close(cnv);
3351     free(uBuf);
3352     free(cBuf);
3353     free(offsets);
3354 }
3355
3356 static void TestConv(const uint16_t in[],int len, const char* conv, const char* lang, char byteArr[],int byteArrLen){
3357     const UChar* uSource;
3358     const UChar* uSourceLimit;
3359     const char* cSource;
3360     const char* cSourceLimit;
3361     UChar *uTargetLimit =NULL;
3362     UChar *uTarget;
3363     char *cTarget;
3364     const char *cTargetLimit;
3365     char *cBuf;
3366     UChar *uBuf,*test;
3367     int32_t uBufSize = 120*10;
3368     UErrorCode errorCode=U_ZERO_ERROR;
3369     UConverter *cnv;
3370     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) );
3371     int32_t* myOff= offsets;
3372     cnv=my_ucnv_open(conv, &errorCode);
3373     if(U_FAILURE(errorCode)) {
3374         log_data_err("Unable to open a %s converter: %s\n", conv, u_errorName(errorCode));
3375         return;
3376     }
3377
3378     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar));
3379     cBuf =(char*)malloc(uBufSize * sizeof(char));
3380     uSource = (const UChar*)in;
3381     uSourceLimit=uSource+len;
3382     cTarget = cBuf;
3383     cTargetLimit = cBuf +uBufSize;
3384     uTarget = uBuf;
3385     uTargetLimit = uBuf+ uBufSize;
3386     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3387     if(U_FAILURE(errorCode)){
3388         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3389         return;
3390     }
3391     /*log_verbose("length of compressed string for language %s using %s:%i \n",conv,lang,(cTarget-cBuf));*/
3392     cSource = cBuf;
3393     cSourceLimit =cTarget;
3394     test =uBuf;
3395     myOff=offsets;
3396     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3397     if(U_FAILURE(errorCode)){
3398         log_err("ucnv_toUnicode conversion failed, reason: %s\n", u_errorName(errorCode));
3399         return;
3400     }
3401
3402     uSource = (const UChar*)in;
3403     while(uSource<uSourceLimit){
3404         if(*test!=*uSource){
3405             log_err("for codepage %s : Expected : \\u%04X \t Got: \\u%04X\n",conv,*uSource,(int)*test) ;
3406         }
3407         uSource++;
3408         test++;
3409     }
3410     TestSmallTargetBuffer(in,(const UChar*)&in[len],cnv);
3411     TestSmallSourceBuffer(in,(const UChar*)&in[len],cnv);
3412     TestGetNextUChar2022(cnv, cBuf, cTarget, in, conv);
3413     if(byteArr && byteArrLen!=0){
3414         TestGetNextUChar2022(cnv, byteArr, (byteArr+byteArrLen), in, lang);
3415         TestToAndFromUChars(in,(const UChar*)&in[len],cnv);
3416         {
3417             cSource = byteArr;
3418             cSourceLimit = cSource+byteArrLen;
3419             test=uBuf;
3420             myOff = offsets;
3421             ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3422             if(U_FAILURE(errorCode)){
3423                 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3424                 return;
3425             }
3426
3427             uSource = (const UChar*)in;
3428             while(uSource<uSourceLimit){
3429                 if(*test!=*uSource){
3430                     log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3431                 }
3432                 uSource++;
3433                 test++;
3434             }
3435         }
3436     }
3437
3438     ucnv_close(cnv);
3439     free(uBuf);
3440     free(cBuf);
3441     free(offsets);
3442 }
3443 static UChar U_CALLCONV
3444 _charAt(int32_t offset, void *context) {
3445     return ((char*)context)[offset];
3446 }
3447
3448 static int32_t
3449 unescape(UChar* dst, int32_t dstLen,const char* src,int32_t srcLen,UErrorCode *status){
3450     int32_t srcIndex=0;
3451     int32_t dstIndex=0;
3452     if(U_FAILURE(*status)){
3453         return 0;
3454     }
3455     if((dst==NULL && dstLen>0) || (src==NULL ) || dstLen < -1 || srcLen <-1 ){
3456         *status = U_ILLEGAL_ARGUMENT_ERROR;
3457         return 0;
3458     }
3459     if(srcLen==-1){
3460         srcLen = (int32_t)uprv_strlen(src);
3461     }
3462
3463     for (; srcIndex<srcLen; ) {
3464         UChar32 c = src[srcIndex++];
3465         if (c == 0x005C /*'\\'*/) {
3466             c = u_unescapeAt(_charAt,&srcIndex,srcLen,(void*)src); /* advances i*/
3467             if (c == (UChar32)0xFFFFFFFF) {
3468                 *status=U_INVALID_CHAR_FOUND; /* return empty string */
3469                 break; /* invalid escape sequence */
3470             }
3471         }
3472         if(dstIndex < dstLen){
3473             if(c>0xFFFF){
3474                dst[dstIndex++] = UTF16_LEAD(c);
3475                if(dstIndex<dstLen){
3476                     dst[dstIndex]=UTF16_TRAIL(c);
3477                }else{
3478                    *status=U_BUFFER_OVERFLOW_ERROR;
3479                }
3480             }else{
3481                 dst[dstIndex]=(UChar)c;
3482             }
3483
3484         }else{
3485             *status = U_BUFFER_OVERFLOW_ERROR;
3486         }
3487         dstIndex++; /* for preflighting */
3488     }
3489     return dstIndex;
3490 }
3491
3492 static void
3493 TestFullRoundtrip(const char* cp){
3494     UChar usource[10] ={0};
3495     UChar nsrc[10] = {0};
3496     uint32_t i=1;
3497     int len=0, ulen;
3498     nsrc[0]=0x0061;
3499     /* Test codepoint 0 */
3500     TestConv(usource,1,cp,"",NULL,0);
3501     TestConv(usource,2,cp,"",NULL,0);
3502     nsrc[2]=0x5555;
3503     TestConv(nsrc,3,cp,"",NULL,0);
3504
3505     for(;i<=0x10FFFF;i++){
3506         if(i==0xD800){
3507             i=0xDFFF;
3508             continue;
3509         }
3510         if(i<=0xFFFF){
3511             usource[0] =(UChar) i;
3512             len=1;
3513         }else{
3514             usource[0]=UTF16_LEAD(i);
3515             usource[1]=UTF16_TRAIL(i);
3516             len=2;
3517         }
3518         ulen=len;
3519         if(i==0x80) {
3520             usource[2]=0;
3521         }
3522         /* Test only single code points */
3523         TestConv(usource,ulen,cp,"",NULL,0);
3524         /* Test codepoint repeated twice */
3525         usource[ulen]=usource[0];
3526         usource[ulen+1]=usource[1];
3527         ulen+=len;
3528         TestConv(usource,ulen,cp,"",NULL,0);
3529         /* Test codepoint repeated 3 times */
3530         usource[ulen]=usource[0];
3531         usource[ulen+1]=usource[1];
3532         ulen+=len;
3533         TestConv(usource,ulen,cp,"",NULL,0);
3534         /* Test codepoint in between 2 codepoints */
3535         nsrc[1]=usource[0];
3536         nsrc[2]=usource[1];
3537         nsrc[len+1]=0x5555;
3538         TestConv(nsrc,len+2,cp,"",NULL,0);
3539         uprv_memset(usource,0,sizeof(UChar)*10);
3540     }
3541 }
3542
3543 static void
3544 TestRoundTrippingAllUTF(void){
3545     if(!getTestOption(QUICK_OPTION)){
3546         log_verbose("Running exhaustive round trip test for BOCU-1\n");
3547         TestFullRoundtrip("BOCU-1");
3548         log_verbose("Running exhaustive round trip test for SCSU\n");
3549         TestFullRoundtrip("SCSU");
3550         log_verbose("Running exhaustive round trip test for UTF-8\n");
3551         TestFullRoundtrip("UTF-8");
3552         log_verbose("Running exhaustive round trip test for CESU-8\n");
3553         TestFullRoundtrip("CESU-8");
3554         log_verbose("Running exhaustive round trip test for UTF-16BE\n");
3555         TestFullRoundtrip("UTF-16BE");
3556         log_verbose("Running exhaustive round trip test for UTF-16LE\n");
3557         TestFullRoundtrip("UTF-16LE");
3558         log_verbose("Running exhaustive round trip test for UTF-16\n");
3559         TestFullRoundtrip("UTF-16");
3560         log_verbose("Running exhaustive round trip test for UTF-32BE\n");
3561         TestFullRoundtrip("UTF-32BE");
3562         log_verbose("Running exhaustive round trip test for UTF-32LE\n");
3563         TestFullRoundtrip("UTF-32LE");
3564         log_verbose("Running exhaustive round trip test for UTF-32\n");
3565         TestFullRoundtrip("UTF-32");
3566         log_verbose("Running exhaustive round trip test for UTF-7\n");
3567         TestFullRoundtrip("UTF-7");
3568         log_verbose("Running exhaustive round trip test for UTF-7\n");
3569         TestFullRoundtrip("UTF-7,version=1");
3570         log_verbose("Running exhaustive round trip test for IMAP-mailbox-name\n");
3571         TestFullRoundtrip("IMAP-mailbox-name");
3572         log_verbose("Running exhaustive round trip test for GB18030\n");
3573         TestFullRoundtrip("GB18030");
3574     }
3575 }
3576
3577 static void
3578 TestSCSU() {
3579
3580     static const uint16_t germanUTF16[]={
3581         0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074
3582     };
3583
3584     static const uint8_t germanSCSU[]={
3585         0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65, 0xdf, 0x74
3586     };
3587
3588     static const uint16_t russianUTF16[]={
3589         0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430
3590     };
3591
3592     static const uint8_t russianSCSU[]={
3593         0x12, 0x9c, 0xbe, 0xc1, 0xba, 0xb2, 0xb0
3594     };
3595
3596     static const uint16_t japaneseUTF16[]={
3597         0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b,
3598         0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3,
3599         0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b,
3600         0x6d41, 0x884c, 0x3057, 0x305f, 0x300c, 0x30ea, 0x30f3, 0x30b4,
3601         0x306e, 0x6b4c, 0x300d, 0x304c, 0x3074, 0x3063, 0x305f, 0x308a,
3602         0x3059, 0x308b, 0x304b, 0x3082, 0x3057, 0x308c, 0x306a, 0x3044,
3603         0x3002, 0x7c73, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x30b3, 0x30f3,
3604         0x30d4, 0x30e5, 0x30fc, 0x30bf, 0x793e, 0x306e, 0x30d1, 0x30bd,
3605         0x30b3, 0x30f3, 0x300c, 0x30de, 0x30c3, 0x30af, 0xff08, 0x30de,
3606         0x30c3, 0x30ad, 0x30f3, 0x30c8, 0x30c3, 0x30b7, 0x30e5, 0xff09,
3607         0x300d, 0x3092, 0x3001, 0x3053, 0x3088, 0x306a, 0x304f, 0x611b,
3608         0x3059, 0x308b, 0x4eba, 0x305f, 0x3061, 0x306e, 0x3053, 0x3068,
3609         0x3060, 0x3002, 0x300c, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x4fe1,
3610         0x8005, 0x300d, 0x306a, 0x3093, 0x3066, 0x8a00, 0x3044, 0x65b9,
3611         0x307e, 0x3067, 0x3042, 0x308b, 0x3002
3612     };
3613
3614     /* SCSUEncoder produces a slightly longer result (179B vs. 178B) because of one different choice:
3615      it uses an SQn once where a longer look-ahead could have shown that SCn is more efficient */
3616     static const uint8_t japaneseSCSU[]={
3617         0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
3618         0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
3619         0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
3620         0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
3621         0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
3622         0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
3623         0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
3624         0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
3625         0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
3626         0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
3627         0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
3628         0xcb, 0x82
3629     };
3630
3631     static const uint16_t allFeaturesUTF16[]={
3632         0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff,
3633         0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df,
3634         0x01df, 0xf000, 0xdbff, 0xdfff
3635     };
3636
3637     /* see comment at japaneseSCSU: the same kind of different choice yields a slightly shorter
3638      * result here (34B vs. 35B)
3639      */
3640     static const uint8_t allFeaturesSCSU[]={
3641         0x41, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x1b, 0x03,
3642         0xdf, 0x1c, 0x88, 0x80, 0x0b, 0xbf, 0xff, 0xff, 0x0d, 0x0a,
3643         0x41, 0x10, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x13,
3644         0xdf, 0x14, 0x80, 0x15, 0xff
3645     };
3646     static const uint16_t monkeyIn[]={
3647         0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3648         0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3649         0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3650         0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3651         0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3652         0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3653         0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3654         0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3655         0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3656         0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3657         0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3658         0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3659         0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3660         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3661         0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3662         0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3663         0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3664         0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3665         0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A,
3666         /* test non-BMP code points */
3667         0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
3668         0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
3669         0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
3670         0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
3671         0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
3672         0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
3673         0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
3674         0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
3675         0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
3676         0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF, 0xDFFF,
3677         0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF,
3678
3679
3680         0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
3681         0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
3682         0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
3683         0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
3684         0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
3685     };
3686     static const char *fTestCases [] = {
3687           "\\ud800\\udc00", /* smallest surrogate*/
3688           "\\ud8ff\\udcff",
3689           "\\udBff\\udFff", /* largest surrogate pair*/
3690           "\\ud834\\udc00",
3691           "\\U0010FFFF",
3692           "Hello \\u9292 \\u9192 World!",
3693           "Hell\\u0429o \\u9292 \\u9192 W\\u00e4rld!",
3694           "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3695
3696           "\\u0648\\u06c8", /* catch missing reset*/
3697           "\\u0648\\u06c8",
3698
3699           "\\u4444\\uE001", /* lowest quotable*/
3700           "\\u4444\\uf2FF", /* highest quotable*/
3701           "\\u4444\\uf188\\u4444",
3702           "\\u4444\\uf188\\uf288",
3703           "\\u4444\\uf188abc\\u0429\\uf288",
3704           "\\u9292\\u2222",
3705           "Hell\\u0429\\u04230o \\u9292 \\u9292W\\u00e4\\u0192rld!",
3706           "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3707           "Hello World!123456",
3708           "Hello W\\u0081\\u011f\\u0082!", /* Latin 1 run*/
3709
3710           "abc\\u0301\\u0302",  /* uses SQn for u301 u302*/
3711           "abc\\u4411d",      /* uses SQU*/
3712           "abc\\u4411\\u4412d",/* uses SCU*/
3713           "abc\\u0401\\u0402\\u047f\\u00a5\\u0405", /* uses SQn for ua5*/
3714           "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", /* SJIS like data*/
3715           "\\u9292\\u2222",
3716           "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000",
3717           "\\u9999\\u3051\\u300c\\u9999\\u9999\\u3060\\u9999\\u3065\\u3065\\u3065\\u300c",
3718           "\\u3000\\u266a\\u30ea\\u30f3\\u30b4\\u53ef\\u611b\\u3044\\u3084\\u53ef\\u611b\\u3044\\u3084\\u30ea\\u30f3\\u30b4\\u3002",
3719
3720           "", /* empty input*/
3721           "\\u0000", /* smallest BMP character*/
3722           "\\uFFFF", /* largest BMP character*/
3723
3724           /* regression tests*/
3725           "\\u6441\\ub413\\ua733\\uf8fe\\ueedb\\u587f\\u195f\\u4899\\uf23d\\u49fd\\u0aac\\u5792\\ufc22\\ufc3c\\ufc46\\u00aa",
3726           "\\u00df\\u01df\\uf000\\udbff\\udfff\\u000d\n\\u0041\\u00df\\u0401\\u015f\\u00df\\u01df\\uf000\\udbff\\udfff",
3727           "\\u30f9\\u8321\\u05e5\\u181c\\ud72b\\u2019\\u99c9\\u2f2f\\uc10c\\u82e1\\u2c4d\\u1ebc\\u6013\\u66dc\\ubbde\\u94a5\\u4726\\u74af\\u3083\\u55b9\\u000c",
3728           "\\u0041\\u00df\\u0401\\u015f",
3729           "\\u9066\\u2123abc",
3730           "\\ud266\\u43d7\\u\\ue386\\uc9c0\\u4a6b\\u9222\\u901f\\u7410\\ua63f\\u539b\\u9596\\u482e\\u9d47\\ucfe4\\u7b71\\uc280\\uf26a\\u982f\\u862a\\u4edd\\uf513\\ufda6\\u869d\\u2ee0\\ua216\\u3ff6\\u3c70\\u89c0\\u9576\\ud5ec\\ubfda\\u6cca\\u5bb3\\ubcea\\u554c\\u914e\\ufa4a\\uede3\\u2990\\ud2f5\\u2729\\u5141\\u0f26\\uccd8\\u5413\\ud196\\ubbe2\\u51b9\\u9b48\\u0dc8\\u2195\\u21a2\\u21e9\\u00e4\\u9d92\\u0bc0\\u06c5",
3731           "\\uf95b\\u2458\\u2468\\u0e20\\uf51b\\ue36e\\ubfc1\\u0080\\u02dd\\uf1b5\\u0cf3\\u6059\\u7489",
3732     };
3733     int i=0;
3734     for(;i<sizeof(fTestCases)/sizeof(*fTestCases);i++){
3735         const char* cSrc = fTestCases[i];
3736         UErrorCode status = U_ZERO_ERROR;
3737         int32_t cSrcLen,srcLen;
3738         UChar* src;
3739         /* UConverter* cnv = ucnv_open("SCSU",&status); */
3740         cSrcLen = srcLen = (int32_t)uprv_strlen(fTestCases[i]);
3741         src = (UChar*) malloc((sizeof(UChar) * srcLen) + sizeof(UChar));
3742         srcLen=unescape(src,srcLen,cSrc,cSrcLen,&status);
3743         log_verbose("Testing roundtrip for src: %s at index :%d\n",cSrc,i);
3744         TestConv(src,srcLen,"SCSU","Coverage",NULL,0);
3745         free(src);
3746     }
3747     TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features", (char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
3748     TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features",(char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
3749     TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
3750     TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU,locale=ja","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
3751     TestConv(germanUTF16,(sizeof(germanUTF16)/2),"SCSU","german",(char *)germanSCSU,sizeof(germanSCSU));
3752     TestConv(russianUTF16,(sizeof(russianUTF16)/2), "SCSU","russian",(char *)russianSCSU,sizeof(russianSCSU));
3753     TestConv(monkeyIn,(sizeof(monkeyIn)/2),"SCSU","monkey",NULL,0);
3754 }
3755
3756 #if !UCONFIG_NO_LEGACY_CONVERSION
3757 static void TestJitterbug2346(){
3758     char source[] = { 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a,
3759                       0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a};
3760     uint16_t expected[] = {0x91CD,0x000D,0x000A,0x91CD,0x000D,0x000A};
3761
3762     UChar uTarget[500]={'\0'};
3763     UChar* utarget=uTarget;
3764     UChar* utargetLimit=uTarget+sizeof(uTarget)/2;
3765
3766     char cTarget[500]={'\0'};
3767     char* ctarget=cTarget;
3768     char* ctargetLimit=cTarget+sizeof(cTarget);
3769     const char* csource=source;
3770     UChar* temp = expected;
3771     UErrorCode err=U_ZERO_ERROR;
3772
3773     UConverter* conv =ucnv_open("ISO_2022_JP",&err);
3774     if(U_FAILURE(err)) {
3775         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
3776         return;
3777     }
3778     ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(source),NULL,TRUE,&err);
3779     if(U_FAILURE(err)) {
3780         log_err("ISO_2022_JP to Unicode conversion failed: %s\n", u_errorName(err));
3781         return;
3782     }
3783     utargetLimit=utarget;
3784     utarget = uTarget;
3785     while(utarget<utargetLimit){
3786         if(*temp!=*utarget){
3787
3788             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*utarget,(int)*temp) ;
3789         }
3790         utarget++;
3791         temp++;
3792     }
3793     ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err);
3794     if(U_FAILURE(err)) {
3795         log_err("ISO_2022_JP from Unicode conversion failed: %s\n", u_errorName(err));
3796         return;
3797     }
3798     ctargetLimit=ctarget;
3799     ctarget =cTarget;
3800     ucnv_close(conv);
3801
3802
3803 }
3804
3805 static void
3806 TestISO_2022_JP_1() {
3807     /* test input */
3808     static const uint16_t in[]={
3809         0x3000, 0x3001, 0x3002, 0x0020, 0xFF0E, 0x30FB, 0xFF1A, 0xFF1B, 0x000D, 0x000A,
3810         0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3811         0x52C8, 0x52CC, 0x52CF, 0x52D1, 0x52D4, 0x52D6, 0x52DB, 0x52DC, 0x000D, 0x000A,
3812         0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3813         0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3814         0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3815         0x201D, 0x000D, 0x000A,
3816         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3817         0x4F94, 0x4F97, 0x52BA, 0x52BB, 0x52BD, 0x52C0, 0x52C4, 0x52C6, 0x000D, 0x000A,
3818         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3819         0x4F78, 0x4F79, 0x4F7A, 0x4F7D, 0x4F7E, 0x4F81, 0x4F82, 0x4F84, 0x000D, 0x000A,
3820         0x4F85, 0x4F89, 0x4F8A, 0x4F8C, 0x4F8E, 0x4F90, 0x4F92, 0x4F93, 0x000D, 0x000A,
3821         0x52E1, 0x52E5, 0x52E8, 0x52E9, 0x000D, 0x000A
3822       };
3823     const UChar* uSource;
3824     const UChar* uSourceLimit;
3825     const char* cSource;
3826     const char* cSourceLimit;
3827     UChar *uTargetLimit =NULL;
3828     UChar *uTarget;
3829     char *cTarget;
3830     const char *cTargetLimit;
3831     char *cBuf;
3832     UChar *uBuf,*test;
3833     int32_t uBufSize = 120;
3834     UErrorCode errorCode=U_ZERO_ERROR;
3835     UConverter *cnv;
3836
3837     cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
3838     if(U_FAILURE(errorCode)) {
3839         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3840         return;
3841     }
3842
3843     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3844     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3845     uSource = (const UChar*)in;
3846     uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
3847     cTarget = cBuf;
3848     cTargetLimit = cBuf +uBufSize*5;
3849     uTarget = uBuf;
3850     uTargetLimit = uBuf+ uBufSize*5;
3851     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,TRUE, &errorCode);
3852     if(U_FAILURE(errorCode)){
3853         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3854         return;
3855     }
3856     cSource = cBuf;
3857     cSourceLimit =cTarget;
3858     test =uBuf;
3859     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,TRUE,&errorCode);
3860     if(U_FAILURE(errorCode)){
3861         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3862         return;
3863     }
3864     uSource = (const UChar*)in;
3865     while(uSource<uSourceLimit){
3866         if(*test!=*uSource){
3867
3868             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3869         }
3870         uSource++;
3871         test++;
3872     }
3873     /*ucnv_close(cnv);
3874     cnv=ucnv_open("ISO_2022,locale=jp,version=1", &errorCode);*/
3875     /*Test for the condition where there is an invalid character*/
3876     ucnv_reset(cnv);
3877     {
3878         static const uint8_t source2[]={0x0e,0x24,0x053};
3879         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-1]");
3880     }
3881     TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3882     TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3883     ucnv_close(cnv);
3884     free(uBuf);
3885     free(cBuf);
3886 }
3887
3888 static void
3889 TestISO_2022_JP_2() {
3890     /* test input */
3891     static const uint16_t in[]={
3892         0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3893         0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3894         0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3895         0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3896         0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3897         0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3898         0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3899         0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3900         0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3901         0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3902         0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3903         0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3904         0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3905         0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3906         0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3907         0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3908         0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3909         0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3910         0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A
3911       };
3912     const UChar* uSource;
3913     const UChar* uSourceLimit;
3914     const char* cSource;
3915     const char* cSourceLimit;
3916     UChar *uTargetLimit =NULL;
3917     UChar *uTarget;
3918     char *cTarget;
3919     const char *cTargetLimit;
3920     char *cBuf;
3921     UChar *uBuf,*test;
3922     int32_t uBufSize = 120;
3923     UErrorCode errorCode=U_ZERO_ERROR;
3924     UConverter *cnv;
3925     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3926     int32_t* myOff= offsets;
3927     cnv=ucnv_open("ISO_2022_JP_2", &errorCode);
3928     if(U_FAILURE(errorCode)) {
3929         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3930         return;
3931     }
3932
3933     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3934     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3935     uSource = (const UChar*)in;
3936     uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
3937     cTarget = cBuf;
3938     cTargetLimit = cBuf +uBufSize*5;
3939     uTarget = uBuf;
3940     uTargetLimit = uBuf+ uBufSize*5;
3941     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3942     if(U_FAILURE(errorCode)){
3943         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3944         return;
3945     }
3946     cSource = cBuf;
3947     cSourceLimit =cTarget;
3948     test =uBuf;
3949     myOff=offsets;
3950     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3951     if(U_FAILURE(errorCode)){
3952         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3953         return;
3954     }
3955     uSource = (const UChar*)in;
3956     while(uSource<uSourceLimit){
3957         if(*test!=*uSource){
3958
3959             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3960         }
3961         uSource++;
3962         test++;
3963     }
3964     TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3965     TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3966     TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
3967     /*Test for the condition where there is an invalid character*/
3968     ucnv_reset(cnv);
3969     {
3970         static const uint8_t source2[]={0x0e,0x24,0x053};
3971         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-2]");
3972     }
3973     ucnv_close(cnv);
3974     free(uBuf);
3975     free(cBuf);
3976     free(offsets);
3977 }
3978
3979 static void
3980 TestISO_2022_KR() {
3981     /* test input */
3982     static const uint16_t in[]={
3983                     0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
3984                    ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
3985                    ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
3986                    ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
3987                    ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
3988                    ,0x53E3,0x53E4,0x000A,0x000D};
3989     const UChar* uSource;
3990     const UChar* uSourceLimit;
3991     const char* cSource;
3992     const char* cSourceLimit;
3993     UChar *uTargetLimit =NULL;
3994     UChar *uTarget;
3995     char *cTarget;
3996     const char *cTargetLimit;
3997     char *cBuf;
3998     UChar *uBuf,*test;
3999     int32_t uBufSize = 120;
4000     UErrorCode errorCode=U_ZERO_ERROR;
4001     UConverter *cnv;
4002     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4003     int32_t* myOff= offsets;
4004     cnv=ucnv_open("ISO_2022,locale=kr", &errorCode);
4005     if(U_FAILURE(errorCode)) {
4006         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4007         return;
4008     }
4009
4010     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4011     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
4012     uSource = (const UChar*)in;
4013     uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
4014     cTarget = cBuf;
4015     cTargetLimit = cBuf +uBufSize*5;
4016     uTarget = uBuf;
4017     uTargetLimit = uBuf+ uBufSize*5;
4018     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4019     if(U_FAILURE(errorCode)){
4020         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4021         return;
4022     }
4023     cSource = cBuf;
4024     cSourceLimit =cTarget;
4025     test =uBuf;
4026     myOff=offsets;
4027     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4028     if(U_FAILURE(errorCode)){
4029         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4030         return;
4031     }
4032     uSource = (const UChar*)in;
4033     while(uSource<uSourceLimit){
4034         if(*test!=*uSource){
4035             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
4036         }
4037         uSource++;
4038         test++;
4039     }
4040     TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
4041     TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4042     TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4043     TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4044     TestJitterbug930("csISO2022KR");
4045     /*Test for the condition where there is an invalid character*/
4046     ucnv_reset(cnv);
4047     {
4048         static const uint8_t source2[]={0x1b,0x24,0x053};
4049         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
4050         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
4051     }
4052     ucnv_close(cnv);
4053     free(uBuf);
4054     free(cBuf);
4055     free(offsets);
4056 }
4057
4058 static void
4059 TestISO_2022_KR_1() {
4060     /* test input */
4061     static const uint16_t in[]={
4062                     0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
4063                    ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
4064                    ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
4065                    ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
4066                    ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
4067                    ,0x53E3,0x53E4,0x000A,0x000D};
4068     const UChar* uSource;
4069     const UChar* uSourceLimit;
4070     const char* cSource;
4071     const char* cSourceLimit;
4072     UChar *uTargetLimit =NULL;
4073     UChar *uTarget;
4074     char *cTarget;
4075     const char *cTargetLimit;
4076     char *cBuf;
4077     UChar *uBuf,*test;
4078     int32_t uBufSize = 120;
4079     UErrorCode errorCode=U_ZERO_ERROR;
4080     UConverter *cnv;
4081     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4082     int32_t* myOff= offsets;
4083     cnv=ucnv_open("ibm-25546", &errorCode);
4084     if(U_FAILURE(errorCode)) {
4085         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4086         return;
4087     }
4088
4089     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4090     cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
4091     uSource = (const UChar*)in;
4092     uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
4093     cTarget = cBuf;
4094     cTargetLimit = cBuf +uBufSize*5;
4095     uTarget = uBuf;
4096     uTargetLimit = uBuf+ uBufSize*5;
4097     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4098     if(U_FAILURE(errorCode)){
4099         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4100         return;
4101     }
4102     cSource = cBuf;
4103     cSourceLimit =cTarget;
4104     test =uBuf;
4105     myOff=offsets;
4106     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4107     if(U_FAILURE(errorCode)){
4108         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4109         return;
4110     }
4111     uSource = (const UChar*)in;
4112     while(uSource<uSourceLimit){
4113         if(*test!=*uSource){
4114             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
4115         }
4116         uSource++;
4117         test++;
4118     }
4119     ucnv_reset(cnv);
4120     TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
4121     TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4122     TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4123     ucnv_reset(cnv);
4124     TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4125         /*Test for the condition where there is an invalid character*/
4126     ucnv_reset(cnv);
4127     {
4128         static const uint8_t source2[]={0x1b,0x24,0x053};
4129         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
4130         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
4131     }
4132     ucnv_close(cnv);
4133     free(uBuf);
4134     free(cBuf);
4135     free(offsets);
4136 }
4137
4138 static void TestJitterbug2411(){
4139     static const char* source = "\x1b\x24\x29\x43\x6b\x6b\x6e\x6e\x6a\x68\x70\x6f\x69\x75\x79\x71\x77\x65\x68\x67\x0A"
4140                          "\x1b\x24\x29\x43\x6a\x61\x73\x64\x66\x6a\x61\x73\x64\x66\x68\x6f\x69\x75\x79\x1b\x24\x29\x43";
4141     UConverter* kr=NULL, *kr1=NULL;
4142     UErrorCode errorCode = U_ZERO_ERROR;
4143     UChar tgt[100]={'\0'};
4144     UChar* target = tgt;
4145     UChar* targetLimit = target+100;
4146     kr=ucnv_open("iso-2022-kr", &errorCode);
4147     if(U_FAILURE(errorCode)) {
4148         log_data_err("Unable to open a iso-2022-kr converter: %s\n", u_errorName(errorCode));
4149         return;
4150     }
4151     ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode);
4152     if(U_FAILURE(errorCode)) {
4153         log_err("iso-2022-kr cannot handle multiple escape sequences : %s\n", u_errorName(errorCode));
4154         return;
4155     }
4156     kr1 = ucnv_open("ibm-25546", &errorCode);
4157     if(U_FAILURE(errorCode)) {
4158         log_data_err("Unable to open a iso-2022-kr_1 converter: %s\n", u_errorName(errorCode));
4159         return;
4160     }
4161     target = tgt;
4162     targetLimit = target+100;
4163     ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode);
4164
4165     if(U_FAILURE(errorCode)) {
4166         log_err("iso-2022-kr_1 cannot handle multiple escape sequences : %s\n", u_errorName(errorCode));
4167         return;
4168     }
4169
4170     ucnv_close(kr);
4171     ucnv_close(kr1);
4172
4173 }
4174
4175 static void
4176 TestJIS(){
4177     /* From Unicode moved to testdata/conversion.txt */
4178     /*To Unicode*/
4179     {
4180         static const uint8_t sampleTextJIS[] = {
4181             0x1b,0x28,0x48,0x41,0x42, /*jis-Roman*/
4182             0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4183             0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4184         };
4185         static const uint16_t expectedISO2022JIS[] = {
4186             0x0041, 0x0042,
4187             0xFF81, 0xFF82,
4188             0x3000
4189         };
4190         static const int32_t  toISO2022JISOffs[]={
4191             3,4,
4192             8,9,
4193             16
4194         };
4195
4196         static const uint8_t sampleTextJIS7[] = {
4197             0x1b,0x28,0x48,0x41,0x42, /*JIS7-Roman*/
4198             0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4199             0x1b,0x24,0x42,0x21,0x21,
4200             0x0e,0x41,0x42,0x0f,      /*Test Katakana set with SI and SO */
4201             0x21,0x22,
4202             0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4203         };
4204         static const uint16_t expectedISO2022JIS7[] = {
4205             0x0041, 0x0042,
4206             0xFF81, 0xFF82,
4207             0x3000,
4208             0xFF81, 0xFF82,
4209             0x3001,
4210             0x3000
4211         };
4212         static const int32_t  toISO2022JIS7Offs[]={
4213             3,4,
4214             8,9,
4215             13,16,
4216             17,
4217             19,27
4218         };
4219         static const uint8_t sampleTextJIS8[] = {
4220             0x1b,0x28,0x48,0x41,0x42, /*JIS8-Roman*/
4221             0xa1,0xc8,0xd9,/*Katakana Set*/
4222             0x1b,0x28,0x42,
4223             0x41,0x42,
4224             0xb1,0xc3, /*Katakana Set*/
4225             0x1b,0x24,0x42,0x21,0x21
4226         };
4227         static const uint16_t expectedISO2022JIS8[] = {
4228             0x0041, 0x0042,
4229             0xff61, 0xff88, 0xff99,
4230             0x0041, 0x0042,
4231             0xff71, 0xff83,
4232             0x3000
4233         };
4234         static const int32_t  toISO2022JIS8Offs[]={
4235             3, 4,  5,  6,
4236             7, 11, 12, 13,
4237             14, 18,
4238         };
4239
4240         testConvertToU(sampleTextJIS,sizeof(sampleTextJIS),expectedISO2022JIS,
4241             sizeof(expectedISO2022JIS)/sizeof(expectedISO2022JIS[0]),"JIS", toISO2022JISOffs,TRUE);
4242         testConvertToU(sampleTextJIS7,sizeof(sampleTextJIS7),expectedISO2022JIS7,
4243             sizeof(expectedISO2022JIS7)/sizeof(expectedISO2022JIS7[0]),"JIS7", toISO2022JIS7Offs,TRUE);
4244         testConvertToU(sampleTextJIS8,sizeof(sampleTextJIS8),expectedISO2022JIS8,
4245             sizeof(expectedISO2022JIS8)/sizeof(expectedISO2022JIS8[0]),"JIS8", toISO2022JIS8Offs,TRUE);
4246     }
4247
4248 }
4249
4250
4251 #if 0
4252  ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
4253
4254 static void TestJitterbug915(){
4255 /* tests for roundtripping of the below sequence
4256 \x1b$)G\x0E#!#"###$#%#&#'#(#)#*#+          / *plane 1 * /
4257 \x1b$*H\x1bN"!\x1bN""\x1bN"#\x1bN"$\x1bN"% / *plane 2 * /
4258 \x1b$+I\x1bO"D\x1bO"E\x1bO"F\x1bO"G\x1bO"H / *plane 3 * /
4259 \x1b$+J\x1bO!D\x1bO!E\x1bO"j\x1bO"k\x1bO"l / *plane 4 * /
4260 \x1b$+K\x1bO!t\x1bO"P\x1bO"Q\x1bO#7\x1bO"\ / *plane 5 * /
4261 \x1b$+L\x1bO!#\x1bO",\x1bO#N\x1bO!n\x1bO#q / *plane 6 * /
4262 \x1b$+M\x1bO"q\x1bO!N\x1bO!j\x1bO#:\x1bO#o / *plane 7 * /
4263 */
4264     static const char cSource[]={
4265         0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
4266         0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
4267         0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
4268         0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
4269         0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
4270         0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
4271         0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x2F, 0x2A, 0x70,
4272         0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
4273         0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
4274         0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
4275         0x1B, 0x4F, 0x22, 0x48, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
4276         0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
4277         0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
4278         0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
4279         0x4F, 0x22, 0x6C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4280         0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4281         0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
4282         0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
4283         0x22, 0x5C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4284         0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4285         0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
4286         0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
4287         0x23, 0x71, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
4288         0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
4289         0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
4290         0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
4291         0x6F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
4292         0x37, 0x20, 0x2A, 0x2F
4293     };
4294     UChar uTarget[500]={'\0'};
4295     UChar* utarget=uTarget;
4296     UChar* utargetLimit=uTarget+sizeof(uTarget)/2;
4297
4298     char cTarget[500]={'\0'};
4299     char* ctarget=cTarget;
4300     char* ctargetLimit=cTarget+sizeof(cTarget);
4301     const char* csource=cSource;
4302     const char* tempSrc = cSource;
4303     UErrorCode err=U_ZERO_ERROR;
4304
4305     UConverter* conv =ucnv_open("ISO_2022_CN_EXT",&err);
4306     if(U_FAILURE(err)) {
4307         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
4308         return;
4309     }
4310     ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(cSource),NULL,TRUE,&err);
4311     if(U_FAILURE(err)) {
4312         log_err("iso-2022-CN to Unicode conversion failed: %s\n", u_errorName(err));
4313         return;
4314     }
4315     utargetLimit=utarget;
4316     utarget = uTarget;
4317     ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err);
4318     if(U_FAILURE(err)) {
4319         log_err("iso-2022-CN from Unicode conversion failed: %s\n", u_errorName(err));
4320         return;
4321     }
4322     ctargetLimit=ctarget;
4323     ctarget =cTarget;
4324     while(ctarget<ctargetLimit){
4325         if(*ctarget != *tempSrc){
4326             log_err("j915[%d] Expected : \\x%02X \t Got: \\x%02X\n", (int)(ctarget-cTarget), *ctarget,(int)*tempSrc) ;
4327         }
4328         ++ctarget;
4329         ++tempSrc;
4330     }
4331
4332     ucnv_close(conv);
4333 }
4334
4335 static void
4336 TestISO_2022_CN_EXT() {
4337     /* test input */
4338     static const uint16_t in[]={
4339                 /* test Non-BMP code points */
4340          0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
4341          0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
4342          0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
4343          0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
4344          0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
4345          0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
4346          0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
4347          0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
4348          0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
4349          0xD869, 0xDED5,
4350
4351          0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
4352          0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
4353          0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
4354          0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4355          0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4356          0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4357          0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4358          0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4359          0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4360          0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4361          0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4362          0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4363          0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4364          0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x3443, 0x3444, 0x000D, 0x000A,
4365          0x3445, 0x3449, 0x344A, 0x344B, 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A,
4366          0x60F6, 0x60F7, 0x60F8, 0x60F9, 0x60FA, 0x60FB, 0x60FC, 0x60FD, 0x000D, 0x000A,
4367          0x60FE, 0x60FF, 0x6100, 0x6101, 0x6102, 0x0041, 0x0042, 0x0043, 0x000D, 0x000A,
4368          0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x000D, 0x000A,
4369
4370          0x33E7, 0x33E8, 0x33E9, 0x33EA, 0x000D, 0x000A
4371
4372       };
4373
4374     const UChar* uSource;
4375     const UChar* uSourceLimit;
4376     const char* cSource;
4377     const char* cSourceLimit;
4378     UChar *uTargetLimit =NULL;
4379     UChar *uTarget;
4380     char *cTarget;
4381     const char *cTargetLimit;
4382     char *cBuf;
4383     UChar *uBuf,*test;
4384     int32_t uBufSize = 180;
4385     UErrorCode errorCode=U_ZERO_ERROR;
4386     UConverter *cnv;
4387     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4388     int32_t* myOff= offsets;
4389     cnv=ucnv_open("ISO_2022,locale=cn,version=1", &errorCode);
4390     if(U_FAILURE(errorCode)) {
4391         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4392         return;
4393     }
4394
4395     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4396     cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
4397     uSource = (const UChar*)in;
4398     uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
4399     cTarget = cBuf;
4400     cTargetLimit = cBuf +uBufSize*5;
4401     uTarget = uBuf;
4402     uTargetLimit = uBuf+ uBufSize*5;
4403     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4404     if(U_FAILURE(errorCode)){
4405         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4406         return;
4407     }
4408     cSource = cBuf;
4409     cSourceLimit =cTarget;
4410     test =uBuf;
4411     myOff=offsets;
4412     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4413     if(U_FAILURE(errorCode)){
4414         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4415         return;
4416     }
4417     uSource = (const UChar*)in;
4418     while(uSource<uSourceLimit){
4419         if(*test!=*uSource){
4420             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4421         }
4422         else{
4423             log_verbose("      Got: \\u%04X\n",(int)*test) ;
4424         }
4425         uSource++;
4426         test++;
4427     }
4428     TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4429     TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4430     /*Test for the condition where there is an invalid character*/
4431     ucnv_reset(cnv);
4432     {
4433         static const uint8_t source2[]={0x0e,0x24,0x053};
4434         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN-EXT]");
4435     }
4436     ucnv_close(cnv);
4437     free(uBuf);
4438     free(cBuf);
4439     free(offsets);
4440 }
4441 #endif
4442
4443 static void
4444 TestISO_2022_CN() {
4445     /* test input */
4446     static const uint16_t in[]={
4447          /* jitterbug 951 */
4448          0xFF2D, 0xFF49, 0xFF58, 0xFF45, 0xFF44, 0x0020, 0xFF43, 0xFF48, 0xFF41, 0xFF52,
4449          0x0020, 0xFF06, 0x0020, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17,
4450          0xFF18, 0xFF19, 0xFF10, 0x0020, 0xFF4E, 0xFF55, 0xFF4D, 0xFF42, 0xFF45, 0xFF52,
4451          0x0020, 0xFF54, 0xFF45, 0xFF53, 0xFF54, 0x0020, 0xFF4C, 0xFF49, 0xFF4E, 0xFF45,
4452          0x0020, 0x0045, 0x004e, 0x0044,
4453          /**/
4454          0x4E00, 0x4E00, 0x4E01, 0x4E03, 0x60F6, 0x60F7, 0x60F8, 0x60FB, 0x000D, 0x000A,
4455          0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x60FB, 0x60FC, 0x000D, 0x000A,
4456          0x4E07, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x0042, 0x0043, 0x000D, 0x000A,
4457          0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4458          0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4459          0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4460          0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4461          0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4462          0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4463          0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4464          0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4465          0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4466          0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x60FE, 0x60FF, 0x000D, 0x000A,
4467          0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 0x60F9, 0x60FA, 0x000D, 0x000A,
4468          0x6100, 0x6101, 0x0041, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4469          0x247D, 0x247E, 0x247F, 0x2480, 0x2481, 0x2482, 0x2483, 0x2484, 0x2485, 0x2486,
4470          0x2487, 0x2460, 0x2461, 0xFF20, 0xFF21, 0xFF22, 0x0049, 0x004A, 0x000D, 0x000A,
4471
4472       };
4473     const UChar* uSource;
4474     const UChar* uSourceLimit;
4475     const char* cSource;
4476     const char* cSourceLimit;
4477     UChar *uTargetLimit =NULL;
4478     UChar *uTarget;
4479     char *cTarget;
4480     const char *cTargetLimit;
4481     char *cBuf;
4482     UChar *uBuf,*test;
4483     int32_t uBufSize = 180;
4484     UErrorCode errorCode=U_ZERO_ERROR;
4485     UConverter *cnv;
4486     int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4487     int32_t* myOff= offsets;
4488     cnv=ucnv_open("ISO_2022,locale=cn,version=0", &errorCode);
4489     if(U_FAILURE(errorCode)) {
4490         log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4491         return;
4492     }
4493
4494     uBuf =  (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4495     cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
4496     uSource = (const UChar*)in;
4497     uSourceLimit=(const UChar*)in + (sizeof(in)/sizeof(in[0]));
4498     cTarget = cBuf;
4499     cTargetLimit = cBuf +uBufSize*5;
4500     uTarget = uBuf;
4501     uTargetLimit = uBuf+ uBufSize*5;
4502     ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4503     if(U_FAILURE(errorCode)){
4504         log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4505         return;
4506     }
4507     cSource = cBuf;
4508     cSourceLimit =cTarget;
4509     test =uBuf;
4510     myOff=offsets;
4511     ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4512     if(U_FAILURE(errorCode)){
4513         log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4514         return;
4515     }
4516     uSource = (const UChar*)in;
4517     while(uSource<uSourceLimit){
4518         if(*test!=*uSource){
4519             log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4520         }
4521         else{
4522             log_verbose("      Got: \\u%04X\n",(int)*test) ;
4523         }
4524         uSource++;
4525         test++;
4526     }
4527     TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-CN encoding");
4528     TestSmallTargetBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4529     TestSmallSourceBuffer(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4530     TestToAndFromUChars(in,(const UChar*)in + (sizeof(in)/sizeof(in[0])),cnv);
4531     TestJitterbug930("csISO2022CN");
4532     /*Test for the condition where there is an invalid character*/
4533     ucnv_reset(cnv);
4534     {
4535         static const uint8_t source2[]={0x0e,0x24,0x053};
4536         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN]");
4537     }
4538
4539     ucnv_close(cnv);
4540     free(uBuf);
4541     free(cBuf);
4542     free(offsets);
4543 }
4544
4545 /* Tests for empty segments in ISO-2022-JP/KR/CN, HZ, check that UConverterCallbackReason is UCNV_IRREGULAR */
4546 typedef struct {
4547     const char *    converterName;
4548     const char *    inputText;
4549     int             inputTextLength;
4550 } EmptySegmentTest;
4551
4552 /* Callback for TestJitterbug6175, should only get called for empty segment errors */
4553 static void UCNV_TO_U_CALLBACK_EMPTYSEGMENT( const void *context, UConverterToUnicodeArgs *toArgs, const char* codeUnits,
4554                                              int32_t length, UConverterCallbackReason reason, UErrorCode * err ) {
4555     if (reason > UCNV_IRREGULAR) {
4556         return;
4557     }
4558     if (reason != UCNV_IRREGULAR) {
4559         log_err("toUnicode callback invoked for empty segment but reason is not UCNV_IRREGULAR\n");
4560     }
4561     /* Standard stuff below from UCNV_TO_U_CALLBACK_SUBSTITUTE */
4562     *err = U_ZERO_ERROR;
4563     ucnv_cbToUWriteSub(toArgs,0,err);
4564 }
4565
4566 enum { kEmptySegmentToUCharsMax = 64 };
4567 static void TestJitterbug6175(void) {
4568     static const char  iso2022jp_a[] = { 0x61, 0x62, 0x1B,0x24,0x42, 0x1B,0x28,0x42, 0x63, 0x64, 0x0D, 0x0A };
4569     static const char  iso2022kr_a[] = { 0x1B,0x24,0x29,0x43, 0x61, 0x0E, 0x0F, 0x62, 0x0D, 0x0A };
4570     static const char  iso2022cn_a[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x0F, 0x1B,0x24,0x2A,0x48, 0x1B,0x4E, 0x6A,0x65, 0x63, 0x0D, 0x0A };
4571     static const char  iso2022cn_b[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x1B,0x24,0x29,0x47, 0x68,0x64, 0x0F, 0x63, 0x0D, 0x0A };
4572     static const char  hzGB2312_a[]  = { 0x61, 0x62, 0x7E,0x7B, 0x7E,0x7D, 0x63, 0x64 };
4573     static const EmptySegmentTest emptySegmentTests[] = {
4574         /* converterName inputText    inputTextLength */
4575         { "ISO-2022-JP", iso2022jp_a, sizeof(iso2022jp_a) },
4576         { "ISO-2022-KR", iso2022kr_a, sizeof(iso2022kr_a) },
4577         { "ISO-2022-CN", iso2022cn_a, sizeof(iso2022cn_a) },
4578         { "ISO-2022-CN", iso2022cn_b, sizeof(iso2022cn_b) },
4579         { "HZ-GB-2312",  hzGB2312_a,  sizeof(hzGB2312_a)  },
4580         /* terminator: */
4581         { NULL,          NULL,        0,                  }
4582     };
4583     const EmptySegmentTest * testPtr;
4584     for (testPtr = emptySegmentTests; testPtr->converterName != NULL; ++testPtr) {
4585         UErrorCode   err = U_ZERO_ERROR;
4586         UConverter * cnv = ucnv_open(testPtr->converterName, &err);
4587         if (U_FAILURE(err)) {
4588             log_data_err("Unable to open %s converter: %s\n", testPtr->converterName, u_errorName(err));
4589             return;
4590         }
4591         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_EMPTYSEGMENT, NULL, NULL, NULL, &err);
4592         if (U_FAILURE(err)) {
4593             log_data_err("Unable to setToUCallBack for %s converter: %s\n", testPtr->converterName, u_errorName(err));
4594             ucnv_close(cnv);
4595             return;
4596         }
4597         {
4598             UChar         toUChars[kEmptySegmentToUCharsMax];
4599             UChar *       toUCharsPtr = toUChars;
4600             const UChar * toUCharsLimit = toUCharsPtr + kEmptySegmentToUCharsMax;
4601             const char *  inCharsPtr = testPtr->inputText;
4602             const char *  inCharsLimit = inCharsPtr + testPtr->inputTextLength;
4603             ucnv_toUnicode(cnv, &toUCharsPtr, toUCharsLimit, &inCharsPtr, inCharsLimit, NULL, TRUE, &err);
4604         }
4605         ucnv_close(cnv);
4606     }
4607 }
4608
4609 static void
4610 TestEBCDIC_STATEFUL() {
4611     /* test input */
4612     static const uint8_t in[]={
4613         0x61,
4614         0x1a,
4615         0x0f, 0x4b,
4616         0x42,
4617         0x40,
4618         0x36,
4619     };
4620
4621     /* expected test results */
4622     static const int32_t results[]={
4623         /* number of bytes read, code point */
4624         1, 0x002f,
4625         1, 0x0092,
4626         2, 0x002e,
4627         1, 0xff62,
4628         1, 0x0020,
4629         1, 0x0096,
4630
4631     };
4632     static const uint8_t in2[]={
4633         0x0f,
4634         0xa1,
4635         0x01
4636     };
4637
4638     /* expected test results */
4639     static const int32_t results2[]={
4640         /* number of bytes read, code point */
4641         2, 0x203E,
4642         1, 0x0001,
4643     };
4644
4645     const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
4646     UErrorCode errorCode=U_ZERO_ERROR;
4647     UConverter *cnv=ucnv_open("ibm-930", &errorCode);
4648     if(U_FAILURE(errorCode)) {
4649         log_data_err("Unable to open a EBCDIC_STATEFUL(ibm-930) converter: %s\n", u_errorName(errorCode));
4650         return;
4651     }
4652     TestNextUChar(cnv, source, limit, results, "EBCDIC_STATEFUL(ibm-930)");
4653     ucnv_reset(cnv);
4654      /* Test the condition when source >= sourceLimit */
4655     TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
4656     ucnv_reset(cnv);
4657     /*Test for the condition where source > sourcelimit after consuming the shift chracter */
4658     {
4659         static const uint8_t source1[]={0x0f};
4660         TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_INDEX_OUTOFBOUNDS_ERROR, "a character is truncated");
4661     }
4662     /*Test for the condition where there is an invalid character*/
4663     ucnv_reset(cnv);
4664     {
4665         static const uint8_t source2[]={0x0e, 0x7F, 0xFF};
4666         TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [EBCDIC STATEFUL]");
4667     }
4668     ucnv_reset(cnv);
4669     source=(const char*)in2;
4670     limit=(const char*)in2+sizeof(in2);
4671     TestNextUChar(cnv,source,limit,results2,"EBCDIC_STATEFUL(ibm-930),seq#2");
4672     ucnv_close(cnv);
4673
4674 }
4675
4676 static void
4677 TestGB18030() {
4678     /* test input */
4679     static const uint8_t in[]={
4680         0x24,
4681         0x7f,
4682         0x81, 0x30, 0x81, 0x30,
4683         0xa8, 0xbf,
4684         0xa2, 0xe3,
4685         0xd2, 0xbb,
4686         0x82, 0x35, 0x8f, 0x33,
4687         0x84, 0x31, 0xa4, 0x39,
4688         0x90, 0x30, 0x81, 0x30,
4689         0xe3, 0x32, 0x9a, 0x35
4690 #if 0
4691         /*
4692          * Feature removed   markus 2000-oct-26
4693          * Only some codepages must match surrogate pairs into supplementary code points -
4694          * see javadoc for ucnv_getNextUChar() and implementation notes in ucnvmbcs.c .
4695          * GB 18030 provides direct encodings for supplementary code points, therefore
4696          * it must not combine two single-encoded surrogates into one code point.
4697          */
4698         0x83, 0x36, 0xc8, 0x30, 0x83, 0x37, 0xb0, 0x34 /* separately encoded surrogates */
4699 #endif
4700     };
4701
4702     /* expected test results */
4703     static const int32_t results[]={
4704         /* number of bytes read, code point */
4705         1, 0x24,
4706         1, 0x7f,
4707         4, 0x80,
4708         2, 0x1f9,
4709         2, 0x20ac,
4710         2, 0x4e00,
4711         4, 0x9fa6,
4712         4, 0xffff,
4713         4, 0x10000,
4714         4, 0x10ffff
4715 #if 0
4716         /* Feature removed. See comment above. */
4717         8, 0x10000
4718 #endif
4719     };
4720
4721 /*    const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); */
4722     UErrorCode errorCode=U_ZERO_ERROR;
4723     UConverter *cnv=ucnv_open("gb18030", &errorCode);
4724     if(U_FAILURE(errorCode)) {
4725         log_data_err("Unable to open a gb18030 converter: %s\n", u_errorName(errorCode));
4726         return;
4727     }
4728     TestNextUChar(cnv, (const char *)in, (const char *)in+sizeof(in), results, "gb18030");
4729     ucnv_close(cnv);
4730 }
4731
4732 static void
4733 TestLMBCS() {
4734     /* LMBCS-1 string */
4735     static const uint8_t pszLMBCS[]={
4736         0x61,
4737         0x01, 0x29,
4738         0x81,
4739         0xA0,
4740         0x0F, 0x27,
4741         0x0F, 0x91,
4742         0x14, 0x0a, 0x74,
4743         0x14, 0xF6, 0x02,
4744         0x14, 0xd8, 0x4d, 0x14, 0xdc, 0x56, /* UTF-16 surrogate pair */
4745         0x10, 0x88, 0xA0,
4746     };
4747
4748     /* Unicode UChar32 equivalents */
4749     static const UChar32 pszUnicode32[]={
4750         /* code point */
4751         0x00000061,
4752         0x00002013,
4753         0x000000FC,
4754         0x000000E1,
4755         0x00000007,
4756         0x00000091,
4757         0x00000a74,
4758         0x00000200,
4759         0x00023456, /* code point for surrogate pair */
4760         0x00005516
4761     };
4762
4763 /* Unicode UChar equivalents */
4764     static const UChar pszUnicode[]={
4765         /* code point */
4766         0x0061,
4767         0x2013,
4768         0x00FC,
4769         0x00E1,
4770         0x0007,
4771         0x0091,
4772         0x0a74,
4773         0x0200,
4774         0xD84D, /* low surrogate */
4775         0xDC56, /* high surrogate */
4776         0x5516
4777     };
4778
4779 /* expected test results */
4780     static const int offsets32[]={
4781         /* number of bytes read, code point */
4782         0,
4783         1,
4784         3,
4785         4,
4786         5,
4787         7,
4788         9,
4789         12,
4790         15,
4791         21,
4792         24
4793     };
4794
4795 /* expected test results */
4796     static const int offsets[]={
4797         /* number of bytes read, code point */
4798         0,
4799         1,
4800         3,
4801         4,
4802         5,
4803         7,
4804         9,
4805         12,
4806         15,
4807         18,
4808         21,
4809         24
4810     };
4811
4812
4813     UConverter *cnv;
4814
4815 #define NAME_LMBCS_1 "LMBCS-1"
4816 #define NAME_LMBCS_2 "LMBCS-2"
4817
4818
4819    /* Some basic open/close/property tests on some LMBCS converters */
4820     {
4821
4822       char expected_subchars[] = {0x3F};   /* ANSI Question Mark */
4823       char new_subchars [] = {0x7F};       /* subst char used by SmartSuite..*/
4824       char get_subchars [1];
4825       const char * get_name;
4826       UConverter *cnv1;
4827       UConverter *cnv2;
4828
4829       int8_t len = sizeof(get_subchars);
4830
4831       UErrorCode errorCode=U_ZERO_ERROR;
4832
4833       /* Open */
4834       cnv1=ucnv_open(NAME_LMBCS_1, &errorCode);
4835       if(U_FAILURE(errorCode)) {
4836          log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
4837          return;
4838       }
4839       cnv2=ucnv_open(NAME_LMBCS_2, &errorCode);
4840       if(U_FAILURE(errorCode)) {
4841          log_data_err("Unable to open a LMBCS-2 converter: %s\n", u_errorName(errorCode));
4842          return;
4843       }
4844
4845       /* Name */
4846       get_name = ucnv_getName (cnv1, &errorCode);
4847       if (strcmp(NAME_LMBCS_1,get_name)){
4848          log_err("Unexpected converter name: %s\n", get_name);
4849       }
4850       get_name = ucnv_getName (cnv2, &errorCode);
4851       if (strcmp(NAME_LMBCS_2,get_name)){
4852          log_err("Unexpected converter name: %s\n", get_name);
4853       }
4854
4855       /* substitution chars */
4856       ucnv_getSubstChars (cnv1, get_subchars, &len, &errorCode);
4857       if(U_FAILURE(errorCode)) {
4858          log_err("Failure on get subst chars: %s\n", u_errorName(errorCode));
4859       }
4860       if (len!=1){
4861          log_err("Unexpected length of sub chars\n");
4862       }
4863       if (get_subchars[0] != expected_subchars[0]){
4864            log_err("Unexpected value of sub chars\n");
4865       }
4866       ucnv_setSubstChars (cnv2,new_subchars, len, &errorCode);
4867       if(U_FAILURE(errorCode)) {
4868          log_err("Failure on set subst chars: %s\n", u_errorName(errorCode));
4869       }
4870       ucnv_getSubstChars (cnv2, get_subchars, &len, &errorCode);
4871       if(U_FAILURE(errorCode)) {
4872          log_err("Failure on get subst chars: %s\n", u_errorName(errorCode));
4873       }
4874       if (len!=1){
4875          log_err("Unexpected length of sub chars\n");
4876       }
4877       if (get_subchars[0] != new_subchars[0]){
4878            log_err("Unexpected value of sub chars\n");
4879       }
4880       ucnv_close(cnv1);
4881       ucnv_close(cnv2);
4882
4883     }
4884
4885     /* LMBCS to Unicode - offsets */
4886     {
4887        UErrorCode errorCode=U_ZERO_ERROR;
4888
4889        const char * pSource = (const char *)pszLMBCS;
4890        const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS);
4891
4892        UChar Out [sizeof(pszUnicode) + 1];
4893        UChar * pOut = Out;
4894        UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar);
4895
4896        int32_t off [sizeof(offsets)];
4897
4898       /* last 'offset' in expected results is just the final size.
4899          (Makes other tests easier). Compensate here: */
4900
4901        off[(sizeof(offsets)/sizeof(offsets[0]))-1] = sizeof(pszLMBCS);
4902
4903
4904
4905       cnv=ucnv_open("lmbcs", &errorCode); /* use generic name for LMBCS-1 */
4906       if(U_FAILURE(errorCode)) {
4907            log_data_err("Unable to open a LMBCS converter: %s\n", u_errorName(errorCode));
4908            return;
4909       }
4910
4911
4912
4913       ucnv_toUnicode (cnv,
4914                       &pOut,
4915                       OutLimit,
4916                       &pSource,
4917                       sourceLimit,
4918                       off,
4919                       TRUE,
4920                       &errorCode);
4921
4922
4923        if (memcmp(off,offsets,sizeof(offsets)))
4924        {
4925          log_err("LMBCS->Uni: Calculated offsets do not match expected results\n");
4926        }
4927        if (memcmp(Out,pszUnicode,sizeof(pszUnicode)))
4928        {
4929          log_err("LMBCS->Uni: Calculated codepoints do not match expected results\n");
4930        }
4931        ucnv_close(cnv);
4932     }
4933     {
4934    /* LMBCS to Unicode - getNextUChar */
4935       const char * sourceStart;
4936       const char *source=(const char *)pszLMBCS;
4937       const char *limit=(const char *)pszLMBCS+sizeof(pszLMBCS);
4938       const UChar32 *results= pszUnicode32;
4939       const int *off = offsets32;
4940
4941       UErrorCode errorCode=U_ZERO_ERROR;
4942       UChar32 uniChar;
4943
4944       cnv=ucnv_open("LMBCS-1", &errorCode);
4945       if(U_FAILURE(errorCode)) {
4946            log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
4947            return;
4948       }
4949       else
4950       {
4951
4952          while(source<limit) {
4953             sourceStart=source;
4954             uniChar=ucnv_getNextUChar(cnv, &source, source + (off[1] - off[0]), &errorCode);
4955             if(U_FAILURE(errorCode)) {
4956                   log_err("LMBCS-1 ucnv_getNextUChar() failed: %s\n", u_errorName(errorCode));
4957                   break;
4958             } else if(source-sourceStart != off[1] - off[0] || uniChar != *results) {
4959                log_err("LMBCS-1 ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
4960                    uniChar, (source-sourceStart), *results, *off);
4961                break;
4962             }
4963             results++;
4964             off++;
4965          }
4966        }
4967        ucnv_close(cnv);
4968     }
4969     { /* test locale & optimization group operations: Unicode to LMBCS */
4970
4971       UErrorCode errorCode=U_ZERO_ERROR;
4972       UConverter *cnv16he = ucnv_open("LMBCS-16,locale=he", &errorCode);
4973       UConverter *cnv16jp = ucnv_open("LMBCS-16,locale=ja_JP", &errorCode);
4974       UConverter *cnv01us = ucnv_open("LMBCS-1,locale=us_EN", &errorCode);
4975       UChar uniString [] = {0x0192}; /* Latin Small letter f with hook */
4976       const UChar * pUniOut = uniString;
4977       UChar * pUniIn = uniString;
4978       uint8_t lmbcsString [4];
4979       const char * pLMBCSOut = (const char *)lmbcsString;
4980       char * pLMBCSIn = (char *)lmbcsString;
4981
4982       /* 0192 (hook) converts to both group 3 & group 1. input locale should differentiate */
4983       ucnv_fromUnicode (cnv16he,
4984                         &pLMBCSIn, (pLMBCSIn + sizeof(lmbcsString)/sizeof(lmbcsString[0])),
4985                         &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0]),
4986                         NULL, 1, &errorCode);
4987
4988       if (lmbcsString[0] != 0x3 || lmbcsString[1] != 0x83)
4989       {
4990          log_err("LMBCS-16,locale=he gives unexpected translation\n");
4991       }
4992
4993       pLMBCSIn= (char *)lmbcsString;
4994       pUniOut = uniString;
4995       ucnv_fromUnicode (cnv01us,
4996                         &pLMBCSIn, (const char *)(lmbcsString + sizeof(lmbcsString)/sizeof(lmbcsString[0])),
4997                         &pUniOut, pUniOut + sizeof(uniString)/sizeof(uniString[0]),
4998                         NULL, 1, &errorCode);
4999
5000       if (lmbcsString[0] != 0x9F)
5001       {
5002          log_err("LMBCS-1,locale=US gives unexpected translation\n");
5003       }
5004
5005       /* single byte char from mbcs char set */
5006       lmbcsString[0] = 0xAE;  /* 1/2 width katakana letter small Yo */
5007       pLMBCSOut = (const char *)lmbcsString;
5008       pUniIn = uniString;
5009       ucnv_toUnicode (cnv16jp,
5010                         &pUniIn, pUniIn + 1,
5011                         &pLMBCSOut, (pLMBCSOut + 1),
5012                         NULL, 1, &errorCode);
5013       if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+1 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
5014       {
5015            log_err("Unexpected results from LMBCS-16 single byte char\n");
5016       }
5017       /* convert to group 1: should be 3 bytes */
5018       pLMBCSIn = (char *)lmbcsString;
5019       pUniOut = uniString;
5020       ucnv_fromUnicode (cnv01us,
5021                         &pLMBCSIn, (const char *)(pLMBCSIn + 3),
5022                         &pUniOut, pUniOut + 1,
5023                         NULL, 1, &errorCode);
5024       if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+3 || pUniOut != uniString+1
5025          || lmbcsString[0] != 0x10 || lmbcsString[1] != 0x10 || lmbcsString[2] != 0xAE)
5026       {
5027            log_err("Unexpected results to LMBCS-1 single byte mbcs char\n");
5028       }
5029       pLMBCSOut = (const char *)lmbcsString;
5030       pUniIn = uniString;
5031       ucnv_toUnicode (cnv01us,
5032                         &pUniIn, pUniIn + 1,
5033                         &pLMBCSOut, (const char *)(pLMBCSOut + 3),
5034                         NULL, 1, &errorCode);
5035       if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+3 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
5036       {
5037            log_err("Unexpected results from LMBCS-1 single byte mbcs char\n");
5038       }
5039       pLMBCSIn = (char *)lmbcsString;
5040       pUniOut = uniString;
5041       ucnv_fromUnicode (cnv16jp,
5042                         &pLMBCSIn, (const char *)(pLMBCSIn + 1),
5043                         &pUniOut, pUniOut + 1,
5044                         NULL, 1, &errorCode);
5045       if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+1 || pUniOut != uniString+1 || lmbcsString[0] != 0xAE)
5046       {
5047            log_err("Unexpected results to LMBCS-16 single byte mbcs char\n");
5048       }
5049       ucnv_close(cnv16he);
5050       ucnv_close(cnv16jp);
5051       ucnv_close(cnv01us);
5052     }
5053     {
5054        /* Small source buffer testing, LMBCS -> Unicode */
5055
5056        UErrorCode errorCode=U_ZERO_ERROR;
5057
5058        const char * pSource = (const char *)pszLMBCS;
5059        const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS);
5060        int codepointCount = 0;
5061
5062        UChar Out [sizeof(pszUnicode) + 1];
5063        UChar * pOut = Out;
5064        UChar * OutLimit = Out + sizeof(pszUnicode)/sizeof(UChar);
5065
5066
5067        cnv = ucnv_open(NAME_LMBCS_1, &errorCode);
5068        if(U_FAILURE(errorCode)) {
5069            log_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
5070            return;
5071        }
5072
5073
5074        while ((pSource < sourceLimit) && U_SUCCESS (errorCode))
5075        {
5076            ucnv_toUnicode (cnv,
5077                &pOut,
5078                OutLimit,
5079                &pSource,
5080                (pSource+1), /* claim that this is a 1- byte buffer */
5081                NULL,
5082                FALSE,    /* FALSE means there might be more chars in the next buffer */
5083                &errorCode);
5084
5085            if (U_SUCCESS (errorCode))
5086            {
5087                if ((pSource - (const char *)pszLMBCS) == offsets [codepointCount+1])
5088                {
5089                    /* we are on to the next code point: check value */
5090
5091                    if (Out[0] != pszUnicode[codepointCount]){
5092                        log_err("LMBCS->Uni result %lx should have been %lx \n",
5093                            Out[0], pszUnicode[codepointCount]);
5094                    }
5095
5096                    pOut = Out; /* reset for accumulating next code point */
5097                    codepointCount++;
5098                }
5099            }
5100            else
5101            {
5102                log_err("Unexpected Error on toUnicode: %s\n", u_errorName(errorCode));
5103            }
5104        }
5105        {
5106          /* limits & surrogate error testing */
5107          char LIn [sizeof(pszLMBCS)];
5108          const char * pLIn = LIn;
5109
5110          char LOut [sizeof(pszLMBCS)];
5111          char * pLOut = LOut;
5112
5113          UChar UOut [sizeof(pszUnicode)];
5114          UChar * pUOut = UOut;
5115
5116          UChar UIn [sizeof(pszUnicode)];
5117          const UChar * pUIn = UIn;
5118
5119          int32_t off [sizeof(offsets)];
5120          UChar32 uniChar;
5121
5122          errorCode=U_ZERO_ERROR;
5123
5124          /* negative source request should always return U_ILLEGAL_ARGUMENT_ERROR */
5125          pUIn++;
5126          ucnv_fromUnicode(cnv, &pLOut, pLOut+1, &pUIn, pUIn-1, off, FALSE, &errorCode);
5127          if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5128          {
5129             log_err("Unexpected Error on negative source request to ucnv_fromUnicode: %s\n", u_errorName(errorCode));
5130          }
5131          pUIn--;
5132
5133          errorCode=U_ZERO_ERROR;
5134          ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)(pLIn-1),off,FALSE, &errorCode);
5135          if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5136          {
5137             log_err("Unexpected Error on negative source request to ucnv_toUnicode: %s\n", u_errorName(errorCode));
5138          }
5139          errorCode=U_ZERO_ERROR;
5140
5141          uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)(pLIn-1), &errorCode);
5142          if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5143          {
5144             log_err("Unexpected Error on negative source request to ucnv_getNextUChar: %s\n", u_errorName(errorCode));
5145          }
5146          errorCode=U_ZERO_ERROR;
5147
5148          /* 0 byte source request - no error, no pointer movement */
5149          ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)pLIn,off,FALSE, &errorCode);
5150          ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn,off,FALSE, &errorCode);
5151          if(U_FAILURE(errorCode)) {
5152             log_err("0 byte source request: unexpected error: %s\n", u_errorName(errorCode));
5153          }
5154          if ((pUOut != UOut) || (pUIn != UIn) || (pLOut != LOut) || (pLIn != LIn))
5155          {
5156               log_err("Unexpected pointer move in 0 byte source request \n");
5157          }
5158          /*0 byte source request - GetNextUChar : error & value == fffe or ffff */
5159          uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)pLIn, &errorCode);
5160          if (errorCode != U_INDEX_OUTOFBOUNDS_ERROR)
5161          {
5162             log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode));
5163          }
5164          if (((uint32_t)uniChar - 0xfffe) > 1) /* not 0xfffe<=uniChar<=0xffff */
5165          {
5166             log_err("Unexpected value on 0-byte source request to ucnv_getnextUChar \n");
5167          }
5168          errorCode = U_ZERO_ERROR;
5169
5170          /* running out of target room : U_BUFFER_OVERFLOW_ERROR */
5171
5172          pUIn = pszUnicode;
5173          ucnv_fromUnicode(cnv, &pLOut,pLOut+offsets[4],&pUIn,pUIn+sizeof(pszUnicode)/sizeof(UChar),off,FALSE, &errorCode);
5174          if (errorCode != U_BUFFER_OVERFLOW_ERROR || pLOut != LOut + offsets[4] || pUIn != pszUnicode+4 )
5175          {
5176             log_err("Unexpected results on out of target room to ucnv_fromUnicode\n");
5177          }
5178
5179          errorCode = U_ZERO_ERROR;
5180
5181          pLIn = (const char *)pszLMBCS;
5182          ucnv_toUnicode(cnv, &pUOut,pUOut+4,&pLIn,(pLIn+sizeof(pszLMBCS)),off,FALSE, &errorCode);
5183          if (errorCode != U_BUFFER_OVERFLOW_ERROR || pUOut != UOut + 4 || pLIn != (const char *)pszLMBCS+offsets[4])
5184          {
5185             log_err("Unexpected results on out of target room to ucnv_toUnicode\n");
5186          }
5187
5188          /* unpaired or chopped LMBCS surrogates */
5189
5190          /* OK high surrogate, Low surrogate is chopped */
5191          LIn [0] = (char)0x14;
5192          LIn [1] = (char)0xD8;
5193          LIn [2] = (char)0x01;
5194          LIn [3] = (char)0x14;
5195          LIn [4] = (char)0xDC;
5196          pLIn = LIn;
5197          errorCode = U_ZERO_ERROR;
5198          pUOut = UOut;
5199
5200          ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
5201          ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5202          if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
5203          {
5204             log_err("Unexpected results on chopped low surrogate\n");
5205          }
5206
5207          /* chopped at surrogate boundary */
5208          LIn [0] = (char)0x14;
5209          LIn [1] = (char)0xD8;
5210          LIn [2] = (char)0x01;
5211          pLIn = LIn;
5212          errorCode = U_ZERO_ERROR;
5213          pUOut = UOut;
5214
5215          ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+3),off,TRUE, &errorCode);
5216          if (UOut[0] != 0xD801 || U_FAILURE(errorCode) || pUOut != UOut + 1 || pLIn != LIn + 3)
5217          {
5218             log_err("Unexpected results on chopped at surrogate boundary \n");
5219          }
5220
5221          /* unpaired surrogate plus valid Unichar */
5222          LIn [0] = (char)0x14;
5223          LIn [1] = (char)0xD8;
5224          LIn [2] = (char)0x01;
5225          LIn [3] = (char)0x14;
5226          LIn [4] = (char)0xC9;
5227          LIn [5] = (char)0xD0;
5228          pLIn = LIn;
5229          errorCode = U_ZERO_ERROR;
5230          pUOut = UOut;
5231
5232          ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+6),off,TRUE, &errorCode);
5233          if (UOut[0] != 0xD801 || UOut[1] != 0xC9D0 || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 6)
5234          {
5235             log_err("Unexpected results after unpaired surrogate plus valid Unichar \n");
5236          }
5237
5238       /* unpaired surrogate plus chopped Unichar */
5239          LIn [0] = (char)0x14;
5240          LIn [1] = (char)0xD8;
5241          LIn [2] = (char)0x01;
5242          LIn [3] = (char)0x14;
5243          LIn [4] = (char)0xC9;
5244
5245          pLIn = LIn;
5246          errorCode = U_ZERO_ERROR;
5247          pUOut = UOut;
5248
5249          ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5250          if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
5251          {
5252             log_err("Unexpected results after unpaired surrogate plus chopped Unichar \n");
5253          }
5254
5255          /* unpaired surrogate plus valid non-Unichar */
5256          LIn [0] = (char)0x14;
5257          LIn [1] = (char)0xD8;
5258          LIn [2] = (char)0x01;
5259          LIn [3] = (char)0x0F;
5260          LIn [4] = (char)0x3B;
5261
5262          pLIn = LIn;
5263          errorCode = U_ZERO_ERROR;
5264          pUOut = UOut;
5265
5266          ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5267          if (UOut[0] != 0xD801 || UOut[1] != 0x1B || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 5)
5268          {
5269             log_err("Unexpected results after unpaired surrogate plus valid non-Unichar\n");
5270          }
5271
5272          /* unpaired surrogate plus chopped non-Unichar */
5273          LIn [0] = (char)0x14;
5274          LIn [1] = (char)0xD8;
5275          LIn [2] = (char)0x01;
5276          LIn [3] = (char)0x0F;
5277
5278          pLIn = LIn;
5279          errorCode = U_ZERO_ERROR;
5280          pUOut = UOut;
5281
5282          ucnv_toUnicode(cnv, &pUOut,pUOut+sizeof(UOut)/sizeof(UChar),(const char **)&pLIn,(const char *)(pLIn+4),off,TRUE, &errorCode);
5283
5284          if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 4)
5285          {
5286             log_err("Unexpected results after unpaired surrogate plus chopped non-Unichar\n");
5287          }
5288        }
5289     }
5290    ucnv_close(cnv);  /* final cleanup */
5291 }
5292
5293
5294 static void TestJitterbug255()
5295 {
5296     static const uint8_t testBytes[] = { 0x95, 0xcf, 0x8a, 0xb7, 0x0d, 0x0a, 0x00 };
5297     const char *testBuffer = (const char *)testBytes;
5298     const char *testEnd = (const char *)testBytes + sizeof(testBytes);
5299     UErrorCode status = U_ZERO_ERROR;
5300     /*UChar32 result;*/
5301     UConverter *cnv = 0;
5302
5303     cnv = ucnv_open("shift-jis", &status);
5304     if (U_FAILURE(status) || cnv == 0) {
5305         log_data_err("Failed to open the converter for SJIS.\n");
5306                 return;
5307     }
5308     while (testBuffer != testEnd)
5309     {
5310         /*result = */ucnv_getNextUChar (cnv, &testBuffer, testEnd , &status);
5311         if (U_FAILURE(status))
5312         {
5313             log_err("Failed to convert the next UChar for SJIS.\n");
5314             break;
5315         }
5316     }
5317     ucnv_close(cnv);
5318 }
5319
5320 static void TestEBCDICUS4XML()
5321 {
5322     UChar unicodes_x[] = {0x0000, 0x0000, 0x0000, 0x0000};
5323     static const UChar toUnicodeMaps_x[] = {0x000A, 0x000A, 0x000D, 0x0000};
5324     static const char fromUnicodeMaps_x[] = {0x25, 0x25, 0x0D, 0x00};
5325     static const char newLines_x[] = {0x25, 0x15, 0x0D, 0x00};
5326     char target_x[] = {0x00, 0x00, 0x00, 0x00};
5327     UChar *unicodes = unicodes_x;
5328     const UChar *toUnicodeMaps = toUnicodeMaps_x;
5329     char *target = target_x;
5330     const char* fromUnicodeMaps = fromUnicodeMaps_x, *newLines = newLines_x;
5331     UErrorCode status = U_ZERO_ERROR;
5332     UConverter *cnv = 0;
5333
5334     cnv = ucnv_open("ebcdic-xml-us", &status);
5335     if (U_FAILURE(status) || cnv == 0) {
5336         log_data_err("Failed to open the converter for EBCDIC-XML-US.\n");
5337         return;
5338     }
5339     ucnv_toUnicode(cnv, &unicodes, unicodes+3, (const char**)&newLines, newLines+3, NULL, TRUE, &status);
5340     if (U_FAILURE(status) || memcmp(unicodes_x, toUnicodeMaps, sizeof(UChar)*3) != 0) {
5341         log_err("To Unicode conversion failed in EBCDICUS4XML test. %s\n",
5342             u_errorName(status));
5343         printUSeqErr(unicodes_x, 3);
5344         printUSeqErr(toUnicodeMaps, 3);
5345     }
5346     status = U_ZERO_ERROR;
5347     ucnv_fromUnicode(cnv, &target, target+3, (const UChar**)&toUnicodeMaps, toUnicodeMaps+3, NULL, TRUE, &status);
5348     if (U_FAILURE(status) || memcmp(target_x, fromUnicodeMaps, sizeof(char)*3) != 0) {
5349         log_err("From Unicode conversion failed in EBCDICUS4XML test. %s\n",
5350             u_errorName(status));
5351         printSeqErr((const unsigned char*)target_x, 3);
5352         printSeqErr((const unsigned char*)fromUnicodeMaps, 3);
5353     }
5354     ucnv_close(cnv);
5355 }
5356 #endif /* #if !UCONFIG_NO_LEGACY_COLLATION */
5357
5358 #if !UCONFIG_NO_COLLATION
5359
5360 static void TestJitterbug981(){
5361     const UChar* rules;
5362     int32_t rules_length, target_cap, bytes_needed, buff_size;
5363     UErrorCode status = U_ZERO_ERROR;
5364     UConverter *utf8cnv;
5365     UCollator* myCollator;
5366     char *buff;
5367     int numNeeded=0;
5368     utf8cnv = ucnv_open ("utf8", &status);
5369     if(U_FAILURE(status)){
5370         log_err("Could not open UTF-8 converter. Error: %s\n", u_errorName(status));
5371         return;
5372     }
5373     myCollator = ucol_open("zh", &status);
5374     if(U_FAILURE(status)){
5375         log_data_err("Could not open collator for zh locale. Error: %s\n", u_errorName(status));
5376         ucnv_close(utf8cnv);
5377         return;
5378     }
5379
5380     rules = ucol_getRules(myCollator, &rules_length);
5381     buff_size = rules_length * ucnv_getMaxCharSize(utf8cnv);
5382     buff = malloc(buff_size);
5383
5384     target_cap = 0;
5385     do {
5386         ucnv_reset(utf8cnv);
5387         status = U_ZERO_ERROR;
5388         if(target_cap >= buff_size) {
5389             log_err("wanted %d bytes, only %d available\n", target_cap, buff_size);
5390             break;
5391         }
5392         bytes_needed = ucnv_fromUChars(utf8cnv, buff, target_cap,
5393             rules, rules_length, &status);
5394         target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
5395         if(numNeeded!=0 && numNeeded!= bytes_needed){
5396             log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5397             break;
5398         }
5399         numNeeded = bytes_needed;
5400     } while (status == U_BUFFER_OVERFLOW_ERROR);
5401     ucol_close(myCollator);
5402     ucnv_close(utf8cnv);
5403     free(buff);
5404 }
5405
5406 #endif
5407
5408 static void TestJitterbug1293(){
5409     static const UChar src[] = {0x30DE, 0x30A4, 0x5E83, 0x544A, 0x30BF, 0x30A4, 0x30D7,0x000};
5410     char target[256];
5411     UErrorCode status = U_ZERO_ERROR;
5412     UConverter* conv=NULL;
5413     int32_t target_cap, bytes_needed, numNeeded = 0;
5414     conv = ucnv_open("shift-jis",&status);
5415     if(U_FAILURE(status)){
5416       log_data_err("Could not open Shift-Jis converter. Error: %s", u_errorName(status));
5417       return;
5418     }
5419
5420     do{
5421         target_cap =0;
5422         bytes_needed = ucnv_fromUChars(conv,target,256,src,u_strlen(src),&status);
5423         target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
5424         if(numNeeded!=0 && numNeeded!= bytes_needed){
5425           log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5426         }
5427         numNeeded = bytes_needed;
5428     } while (status == U_BUFFER_OVERFLOW_ERROR);
5429     if(U_FAILURE(status)){
5430       log_err("An error occured in ucnv_fromUChars. Error: %s", u_errorName(status));
5431       return;
5432     }
5433     ucnv_close(conv);
5434 }
5435 static void TestJB5275_1(){
5436
5437     static const char* data = "\x3B\xB3\x0A" /* Easy characters */
5438                                 "\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */
5439                                 /* Switch script: */
5440                                 "\xEF\x43\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Bengali test */
5441                                 "\x3B\xB3\x0A" /* Easy characters - new line, so should default!*/
5442                                 "\xEF\x40\x3B\xB3\x0A";
5443     static const UChar expected[] ={
5444             0x003b, 0x0a15, 0x000a, /* Easy characters */
5445             0x0a5c, 0x0a4d, 0x0a39, 0x0a5c, 0x0a4d, 0x0a39, 0x000a, /* Gurmukhi test */
5446             0x09dd, 0x09dc, 0x09cd, 0x09b9, 0x000a, /* Switch script: to Bengali*/
5447             0x003b, 0x0a15, 0x000a, /* Easy characters - new line, so should default!*/
5448             0x003b, 0x0a15, 0x000a /* Back to Gurmukhi*/
5449     };
5450
5451     UErrorCode status = U_ZERO_ERROR;
5452     UConverter* conv = ucnv_open("iscii-gur", &status);
5453     UChar dest[100] = {'\0'};
5454     UChar* target = dest;
5455     UChar* targetLimit = dest+100;
5456     const char* source = data;
5457     const char* sourceLimit = data+strlen(data);
5458     const UChar* exp = expected;
5459
5460     if (U_FAILURE(status)) {
5461         log_data_err("Unable to open converter: iscii-gur got errorCode: %s\n", u_errorName(status));
5462         return;
5463     }
5464
5465     log_verbose("Testing switching back to default script when new line is encountered.\n");
5466     ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status);
5467     if(U_FAILURE(status)){
5468         log_err("conversion failed: %s \n", u_errorName(status));
5469     }
5470     targetLimit = target;
5471     target = dest;
5472     printUSeq(target, targetLimit-target);
5473     while(target<targetLimit){
5474         if(*exp!=*target){
5475             log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target);
5476         }
5477         target++;
5478         exp++;
5479     }
5480     ucnv_close(conv);
5481 }
5482
5483 static void TestJB5275(){
5484     static const char* data =
5485     /* "\xEF\x42\xEF\x41\xA4\xD5\xE5\xB3\xEA\x0A"  unsupported sequence \xEF\x41 */
5486     /* "\xEF\x42\xEF\x41\xD4\xDA\xB3\xE8\xEA\x0A"  unsupported sequence \xEF\x41  */
5487     /* "\xEF\x44\xEF\x41\xC8\xE1\x8B\xDB\xB3\xE8 \xB3\xE4\xC1\xE8\x0A"  unsupported sequence \xEF\x41 */
5488         "\xEF\x4B\xC0\xE9\xBF\xE9\xE8\xD8\x0A"  /* Gurmukhi test */
5489         "\xEF\x4A\xC0\xD4\xBF\xD4\xE8\xD8\x0A"  /* Gujarati test */
5490         "\xEF\x48\x38\xB3\x0A"  /* Kannada test */
5491         "\xEF\x49\x39\xB3\x0A"  /* Malayalam test */
5492         "\xEF\x4A\x3A\xB3\x0A"  /* Gujarati test */
5493         "\xEF\x4B\x3B\xB3\x0A"  /* Punjabi test */
5494         /* "\xEF\x4C\x3C\xB3\x0A"  unsupported sequence \xEF\x41 */;
5495     static const UChar expected[] ={
5496         0x0A5C, 0x0A4D, 0x0A39, 0x0A5C, 0x0A4D, 0x0A39, 0x000A, /* Gurmukhi test */
5497         0x0AA2, 0x0AB5, 0x0AA1, 0x0AB5, 0x0ACD, 0x0AB9, 0x000A,     /* Gujarati test */
5498         0x0038, 0x0C95, 0x000A, /* Kannada test */
5499         0x0039, 0x0D15, 0x000A, /* Malayalam test */
5500         0x003A, 0x0A95, 0x000A, /* Gujarati test */
5501         0x003B, 0x0A15, 0x000A, /* Punjabi test */
5502     };
5503
5504     UErrorCode status = U_ZERO_ERROR;
5505     UConverter* conv = ucnv_open("iscii", &status);
5506     UChar dest[100] = {'\0'};
5507     UChar* target = dest;
5508     UChar* targetLimit = dest+100;
5509     const char* source = data;
5510     const char* sourceLimit = data+strlen(data);
5511     const UChar* exp = expected;
5512     ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status);
5513     if(U_FAILURE(status)){
5514         log_err("conversion failed: %s \n", u_errorName(status));
5515     }
5516     targetLimit = target;
5517     target = dest;
5518
5519     printUSeq(target, targetLimit-target);
5520
5521     while(target<targetLimit){
5522         if(*exp!=*target){
5523             log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target);
5524         }
5525         target++;
5526         exp++;
5527     }
5528     ucnv_close(conv);
5529 }