2 ******************************************************************************* 
   4 *   Copyright (C) 2003-2004, International Business Machines 
   5 *   Corporation and others.  All Rights Reserved. 
   7 ******************************************************************************* 
   8 *   file name:  convtest.cpp 
  10 *   tab size:   8 (not used) 
  13 *   created on: 2003jul15 
  14 *   created by: Markus W. Scherer 
  16 *   Test file for data-driven conversion tests. 
  19 #include "unicode/utypes.h" 
  21 #if !UCONFIG_NO_LEGACY_CONVERSION 
  23  * Note: Turning off all of convtest.cpp if !UCONFIG_NO_LEGACY_CONVERSION 
  24  * is slightly unnecessary - it removes tests for Unicode charsets 
  25  * like UTF-8 that should work. 
  26  * However, there is no easy way for the test to detect whether a test case 
  27  * is for a Unicode charset, so it would be difficult to only exclude those. 
  28  * Also, regular testing of ICU is done with all modules on, therefore 
  29  * not testing conversion for a custom configuration like this should be ok. 
  32 #include "unicode/ucnv.h" 
  33 #include "unicode/unistr.h" 
  34 #include "unicode/parsepos.h" 
  35 #include "unicode/uniset.h" 
  36 #include "unicode/ustring.h" 
  37 #include "unicode/ures.h" 
  39 #include "unicode/tstdtmod.h" 
  43 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 
  46     // characters used in test data for callbacks 
  53 ConversionTest::~ConversionTest() {} 
  56 ConversionTest::runIndexedTest(int32_t index
, UBool exec
, const char *&name
, char * /*par*/) { 
  57     if (exec
) logln("TestSuite ConversionTest: "); 
  59         case 0: name
="TestToUnicode"; if (exec
) TestToUnicode(); break; 
  60         case 1: name
="TestFromUnicode"; if (exec
) TestFromUnicode(); break; 
  61         case 2: name
="TestGetUnicodeSet"; if (exec
) TestGetUnicodeSet(); break; 
  62         default: name
=""; break; //needed to end loop 
  66 // test data interface ----------------------------------------------------- *** 
  69 ConversionTest::TestToUnicode() { 
  71     char charset
[100], cbopt
[4]; 
  73     UnicodeString s
, unicode
; 
  74     int32_t offsetsLength
; 
  75     UConverterToUCallback callback
; 
  77     TestDataModule 
*dataModule
; 
  79     const DataMap 
*testCase
; 
  83     errorCode
=U_ZERO_ERROR
; 
  84     dataModule
=TestDataModule::getTestDataModule("conversion", *this, errorCode
); 
  85     if(U_SUCCESS(errorCode
)) { 
  86         testData
=dataModule
->createTestData("toUnicode", errorCode
); 
  87         if(U_SUCCESS(errorCode
)) { 
  88             for(i
=0; testData
->nextCase(testCase
, errorCode
); ++i
) { 
  89                 if(U_FAILURE(errorCode
)) { 
  90                     errln("error retrieving conversion/toUnicode test case %d - %s", 
  91                             i
, u_errorName(errorCode
)); 
  92                     errorCode
=U_ZERO_ERROR
; 
  98                 s
=testCase
->getString("charset", errorCode
); 
  99                 s
.extract(0, 0x7fffffff, charset
, sizeof(charset
), ""); 
 102                 cc
.bytes
=testCase
->getBinary(cc
.bytesLength
, "bytes", errorCode
); 
 103                 unicode
=testCase
->getString("unicode", errorCode
); 
 104                 cc
.unicode
=unicode
.getBuffer(); 
 105                 cc
.unicodeLength
=unicode
.length(); 
 108                 cc
.offsets
=testCase
->getIntVector(offsetsLength
, "offsets", errorCode
); 
 109                 if(offsetsLength
==0) { 
 111                 } else if(offsetsLength
!=unicode
.length()) { 
 112                     errln("toUnicode[%d] unicode[%d] and offsets[%d] must have the same length", 
 113                             i
, unicode
.length(), offsetsLength
); 
 114                     errorCode
=U_ILLEGAL_ARGUMENT_ERROR
; 
 117                 cc
.finalFlush
= 0!=testCase
->getInt28("flush", errorCode
); 
 118                 cc
.fallbacks
= 0!=testCase
->getInt28("fallbacks", errorCode
); 
 120                 s
=testCase
->getString("errorCode", errorCode
); 
 121                 if(s
==UNICODE_STRING("invalid", 7)) { 
 122                     cc
.outErrorCode
=U_INVALID_CHAR_FOUND
; 
 123                 } else if(s
==UNICODE_STRING("illegal", 7)) { 
 124                     cc
.outErrorCode
=U_ILLEGAL_CHAR_FOUND
; 
 125                 } else if(s
==UNICODE_STRING("truncated", 9)) { 
 126                     cc
.outErrorCode
=U_TRUNCATED_CHAR_FOUND
; 
 127                 } else if(s
==UNICODE_STRING("illesc", 6)) { 
 128                     cc
.outErrorCode
=U_ILLEGAL_ESCAPE_SEQUENCE
; 
 129                 } else if(s
==UNICODE_STRING("unsuppesc", 9)) { 
 130                     cc
.outErrorCode
=U_UNSUPPORTED_ESCAPE_SEQUENCE
; 
 132                     cc
.outErrorCode
=U_ZERO_ERROR
; 
 135                 s
=testCase
->getString("callback", errorCode
); 
 136                 s
.extract(0, 0x7fffffff, cbopt
, sizeof(cbopt
), ""); 
 140                     callback
=UCNV_TO_U_CALLBACK_SUBSTITUTE
; 
 143                     callback
=UCNV_TO_U_CALLBACK_SKIP
; 
 146                     callback
=UCNV_TO_U_CALLBACK_STOP
; 
 149                     callback
=UCNV_TO_U_CALLBACK_ESCAPE
; 
 155                 option
=callback
==NULL 
? cbopt 
: cbopt
+1; 
 160                 cc
.invalidChars
=testCase
->getBinary(cc
.invalidLength
, "invalidChars", errorCode
); 
 162                 if(U_FAILURE(errorCode
)) { 
 163                     errln("error parsing conversion/toUnicode test case %d - %s", 
 164                             i
, u_errorName(errorCode
)); 
 165                     errorCode
=U_ZERO_ERROR
; 
 167                     logln("TestToUnicode[%d] %s", i
, charset
); 
 168                     ToUnicodeCase(cc
, callback
, option
); 
 176         errln("Failed: could not load test conversion data"); 
 181 ConversionTest::TestFromUnicode() { 
 183     char charset
[100], cbopt
[4]; 
 185     UnicodeString s
, unicode
, invalidUChars
; 
 186     int32_t offsetsLength
; 
 187     UConverterFromUCallback callback
; 
 189     TestDataModule 
*dataModule
; 
 191     const DataMap 
*testCase
; 
 193     UErrorCode errorCode
; 
 196     errorCode
=U_ZERO_ERROR
; 
 197     dataModule
=TestDataModule::getTestDataModule("conversion", *this, errorCode
); 
 198     if(U_SUCCESS(errorCode
)) { 
 199         testData
=dataModule
->createTestData("fromUnicode", errorCode
); 
 200         if(U_SUCCESS(errorCode
)) { 
 201             for(i
=0; testData
->nextCase(testCase
, errorCode
); ++i
) { 
 202                 if(U_FAILURE(errorCode
)) { 
 203                     errln("error retrieving conversion/fromUnicode test case %d - %s", 
 204                             i
, u_errorName(errorCode
)); 
 205                     errorCode
=U_ZERO_ERROR
; 
 211                 s
=testCase
->getString("charset", errorCode
); 
 212                 s
.extract(0, 0x7fffffff, charset
, sizeof(charset
), ""); 
 215                 unicode
=testCase
->getString("unicode", errorCode
); 
 216                 cc
.unicode
=unicode
.getBuffer(); 
 217                 cc
.unicodeLength
=unicode
.length(); 
 218                 cc
.bytes
=testCase
->getBinary(cc
.bytesLength
, "bytes", errorCode
); 
 221                 cc
.offsets
=testCase
->getIntVector(offsetsLength
, "offsets", errorCode
); 
 222                 if(offsetsLength
==0) { 
 224                 } else if(offsetsLength
!=cc
.bytesLength
) { 
 225                     errln("fromUnicode[%d] bytes[%d] and offsets[%d] must have the same length", 
 226                             i
, cc
.bytesLength
, offsetsLength
); 
 227                     errorCode
=U_ILLEGAL_ARGUMENT_ERROR
; 
 230                 cc
.finalFlush
= 0!=testCase
->getInt28("flush", errorCode
); 
 231                 cc
.fallbacks
= 0!=testCase
->getInt28("fallbacks", errorCode
); 
 233                 s
=testCase
->getString("errorCode", errorCode
); 
 234                 if(s
==UNICODE_STRING("invalid", 7)) { 
 235                     cc
.outErrorCode
=U_INVALID_CHAR_FOUND
; 
 236                 } else if(s
==UNICODE_STRING("illegal", 7)) { 
 237                     cc
.outErrorCode
=U_ILLEGAL_CHAR_FOUND
; 
 238                 } else if(s
==UNICODE_STRING("truncated", 9)) { 
 239                     cc
.outErrorCode
=U_TRUNCATED_CHAR_FOUND
; 
 241                     cc
.outErrorCode
=U_ZERO_ERROR
; 
 244                 s
=testCase
->getString("callback", errorCode
); 
 246                 // read NUL-separated subchar first, if any 
 247                 length
=u_strlen(p
=s
.getTerminatedBuffer()); 
 248                 if(++length
<s
.length()) { 
 249                     // copy the subchar from Latin-1 characters 
 250                     // start after the NUL 
 252                     length
=s
.length()-length
; 
 253                     if(length
>=(int32_t)sizeof(cc
.subchar
)) { 
 254                         errorCode
=U_ILLEGAL_ARGUMENT_ERROR
; 
 258                         for(j
=0; j
<length
; ++j
) { 
 259                             cc
.subchar
[j
]=(char)p
[j
]; 
 261                         // NUL-terminate the subchar 
 265                     // remove the NUL and subchar from s 
 266                     s
.truncate(u_strlen(s
.getBuffer())); 
 272                 s
.extract(0, 0x7fffffff, cbopt
, sizeof(cbopt
), ""); 
 276                     callback
=UCNV_FROM_U_CALLBACK_SUBSTITUTE
; 
 279                     callback
=UCNV_FROM_U_CALLBACK_SKIP
; 
 282                     callback
=UCNV_FROM_U_CALLBACK_STOP
; 
 285                     callback
=UCNV_FROM_U_CALLBACK_ESCAPE
; 
 291                 option
=callback
==NULL 
? cbopt 
: cbopt
+1; 
 296                 invalidUChars
=testCase
->getString("invalidUChars", errorCode
); 
 297                 cc
.invalidUChars
=invalidUChars
.getBuffer(); 
 298                 cc
.invalidLength
=invalidUChars
.length(); 
 300                 if(U_FAILURE(errorCode
)) { 
 301                     errln("error parsing conversion/fromUnicode test case %d - %s", 
 302                             i
, u_errorName(errorCode
)); 
 303                     errorCode
=U_ZERO_ERROR
; 
 305                     logln("TestFromUnicode[%d] %s", i
, charset
); 
 306                     FromUnicodeCase(cc
, callback
, option
); 
 314         errln("Failed: could not load test conversion data"); 
 318 static const UChar ellipsis
[]={ 0x2e, 0x2e, 0x2e }; 
 321 ConversionTest::TestGetUnicodeSet() { 
 323     UnicodeString s
, map
, mapnot
; 
 327     UnicodeSet cnvSet
, mapSet
, mapnotSet
, diffSet
; 
 330     TestDataModule 
*dataModule
; 
 332     const DataMap 
*testCase
; 
 333     UErrorCode errorCode
; 
 336     errorCode
=U_ZERO_ERROR
; 
 337     dataModule
=TestDataModule::getTestDataModule("conversion", *this, errorCode
); 
 338     if(U_SUCCESS(errorCode
)) { 
 339         testData
=dataModule
->createTestData("getUnicodeSet", errorCode
); 
 340         if(U_SUCCESS(errorCode
)) { 
 341             for(i
=0; testData
->nextCase(testCase
, errorCode
); ++i
) { 
 342                 if(U_FAILURE(errorCode
)) { 
 343                     errln("error retrieving conversion/getUnicodeSet test case %d - %s", 
 344                             i
, u_errorName(errorCode
)); 
 345                     errorCode
=U_ZERO_ERROR
; 
 349                 s
=testCase
->getString("charset", errorCode
); 
 350                 s
.extract(0, 0x7fffffff, charset
, sizeof(charset
), ""); 
 352                 map
=testCase
->getString("map", errorCode
); 
 353                 mapnot
=testCase
->getString("mapnot", errorCode
); 
 355                 which
=testCase
->getInt28("which", errorCode
); 
 357                 if(U_FAILURE(errorCode
)) { 
 358                     errln("error parsing conversion/getUnicodeSet test case %d - %s", 
 359                             i
, u_errorName(errorCode
)); 
 360                     errorCode
=U_ZERO_ERROR
; 
 364                 // test this test case 
 369                 mapSet
.applyPattern(map
, pos
, 0, NULL
, errorCode
); 
 370                 if(U_FAILURE(errorCode
) || pos
.getIndex()!=map
.length()) { 
 371                     errln("error creating the map set for conversion/getUnicodeSet test case %d - %s\n" 
 372                           "    error index %d  index %d  U+%04x", 
 373                             i
, u_errorName(errorCode
), pos
.getErrorIndex(), pos
.getIndex(), map
.char32At(pos
.getIndex())); 
 374                     errorCode
=U_ZERO_ERROR
; 
 379                 mapnotSet
.applyPattern(mapnot
, pos
, 0, NULL
, errorCode
); 
 380                 if(U_FAILURE(errorCode
) || pos
.getIndex()!=mapnot
.length()) { 
 381                     errln("error creating the mapnot set for conversion/getUnicodeSet test case %d - %s\n" 
 382                           "    error index %d  index %d  U+%04x", 
 383                             i
, u_errorName(errorCode
), pos
.getErrorIndex(), pos
.getIndex(), mapnot
.char32At(pos
.getIndex())); 
 384                     errorCode
=U_ZERO_ERROR
; 
 388                 logln("TestGetUnicodeSet[%d] %s", i
, charset
); 
 390                 cnv
=cnv_open(charset
, errorCode
); 
 391                 if(U_FAILURE(errorCode
)) { 
 392                     errln("error opening \"%s\" for conversion/getUnicodeSet test case %d - %s", 
 393                             charset
, i
, u_errorName(errorCode
)); 
 394                     errorCode
=U_ZERO_ERROR
; 
 398                 ucnv_getUnicodeSet(cnv
, (USet 
*)&cnvSet
, (UConverterUnicodeSet
)which
, &errorCode
); 
 401                 if(U_FAILURE(errorCode
)) { 
 402                     errln("error in ucnv_getUnicodeSet(\"%s\") for conversion/getUnicodeSet test case %d - %s", 
 403                             charset
, i
, u_errorName(errorCode
)); 
 404                     errorCode
=U_ZERO_ERROR
; 
 408                 // are there items that must be in cnvSet but are not? 
 409                 (diffSet
=mapSet
).removeAll(cnvSet
); 
 410                 if(!diffSet
.isEmpty()) { 
 411                     diffSet
.toPattern(s
, TRUE
); 
 413                         s
.replace(100, 0x7fffffff, ellipsis
, LENGTHOF(ellipsis
)); 
 415                     errln("error: ucnv_getUnicodeSet(\"%s\") is missing items - conversion/getUnicodeSet test case %d", 
 420                 // are there items that must not be in cnvSet but are? 
 421                 (diffSet
=mapnotSet
).retainAll(cnvSet
); 
 422                 if(!diffSet
.isEmpty()) { 
 423                     diffSet
.toPattern(s
, TRUE
); 
 425                         s
.replace(100, 0x7fffffff, ellipsis
, LENGTHOF(ellipsis
)); 
 427                     errln("error: ucnv_getUnicodeSet(\"%s\") contains unexpected items - conversion/getUnicodeSet test case %d", 
 437         errln("Failed: could not load test conversion data"); 
 441 // open testdata or ICU data converter ------------------------------------- *** 
 444 ConversionTest::cnv_open(const char *name
, UErrorCode 
&errorCode
) { 
 445     if(name
!=NULL 
&& *name
=='*') { 
 446         /* loadTestData(): set the data directory */ 
 447         return ucnv_openPackage(loadTestData(errorCode
), name
+1, &errorCode
); 
 449         return ucnv_open(name
, &errorCode
); 
 453 // output helpers ---------------------------------------------------------- *** 
 456 hexDigit(uint8_t digit
) { 
 457     return digit
<=9 ? (char)('0'+digit
) : (char)('a'-10+digit
); 
 461 printBytes(const uint8_t *bytes
, int32_t length
, char *out
) { 
 467         *out
++=hexDigit((uint8_t)(b
>>4)); 
 468         *out
++=hexDigit((uint8_t)(b
&0xf)); 
 475         *out
++=hexDigit((uint8_t)(b
>>4)); 
 476         *out
++=hexDigit((uint8_t)(b
&0xf)); 
 483 printUnicode(const UChar 
*unicode
, int32_t length
, char *out
) { 
 487     for(i
=0; i
<length
;) { 
 491         U16_NEXT(unicode
, i
, length
, c
); 
 497             *out
++=hexDigit((uint8_t)((c
>>16)&0xf)); 
 499         *out
++=hexDigit((uint8_t)((c
>>12)&0xf)); 
 500         *out
++=hexDigit((uint8_t)((c
>>8)&0xf)); 
 501         *out
++=hexDigit((uint8_t)((c
>>4)&0xf)); 
 502         *out
++=hexDigit((uint8_t)(c
&0xf)); 
 509 printOffsets(const int32_t *offsets
, int32_t length
, char *out
) { 
 516     for(i
=0; i
<length
; ++i
) { 
 522         // print all offsets with 2 characters each (-x, -9..99, xx) 
 528             *out
++=(char)('0'-o
); 
 530             *out
++=(d
=o
/10)==0 ? ' ' : (char)('0'+d
); 
 531             *out
++=(char)('0'+o%10
); 
 541 // toUnicode test worker functions ----------------------------------------- *** 
 544 stepToUnicode(ConversionCase 
&cc
, UConverter 
*cnv
, 
 545               UChar 
*result
, int32_t resultCapacity
, 
 546               int32_t *resultOffsets
, /* also resultCapacity */ 
 548               UErrorCode 
*pErrorCode
) { 
 549     const char *source
, *sourceLimit
, *bytesLimit
; 
 550     UChar 
*target
, *targetLimit
, *resultLimit
; 
 553     source
=(const char *)cc
.bytes
; 
 555     bytesLimit
=source
+cc
.bytesLength
; 
 556     resultLimit
=result
+resultCapacity
; 
 559         // call ucnv_toUnicode() with in/out buffers no larger than (step) at a time 
 560         // move only one buffer (in vs. out) at a time to be extra mean 
 561         // step==0 performs bulk conversion and generates offsets 
 563         // initialize the partial limits for the loop 
 565             // use the entire buffers 
 566             sourceLimit
=bytesLimit
; 
 567             targetLimit
=resultLimit
; 
 570             // start with empty partial buffers 
 575             // output offsets only for bulk conversion 
 580             // resetting the opposite conversion direction must not affect this one 
 581             ucnv_resetFromUnicode(cnv
); 
 585                 &target
, targetLimit
, 
 586                 &source
, sourceLimit
, 
 590             // check pointers and errors 
 591             if(source
>sourceLimit 
|| target
>targetLimit
) { 
 592                 *pErrorCode
=U_INTERNAL_PROGRAM_ERROR
; 
 594             } else if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
) { 
 595                 if(target
!=targetLimit
) { 
 596                     // buffer overflow must only be set when the target is filled 
 597                     *pErrorCode
=U_INTERNAL_PROGRAM_ERROR
; 
 599                 } else if(targetLimit
==resultLimit
) { 
 600                     // not just a partial overflow 
 604                 // the partial target is filled, set a new limit, reset the error and continue 
 605                 targetLimit
=(resultLimit
-target
)>=step 
? target
+step 
: resultLimit
; 
 606                 *pErrorCode
=U_ZERO_ERROR
; 
 607             } else if(U_FAILURE(*pErrorCode
)) { 
 608                 // some other error occurred, done 
 611                 if(source
!=sourceLimit
) { 
 612                     // when no error occurs, then the input must be consumed 
 613                     *pErrorCode
=U_INTERNAL_PROGRAM_ERROR
; 
 617                 if(sourceLimit
==bytesLimit
) { 
 622                 // the partial conversion succeeded, set a new limit and continue 
 623                 sourceLimit
=(bytesLimit
-source
)>=step 
? source
+step 
: bytesLimit
; 
 624                 flush
=(UBool
)(cc
.finalFlush 
&& sourceLimit
==bytesLimit
); 
 627     } else /* step<0 */ { 
 629          * step==-1: call only ucnv_getNextUChar() 
 630          * otherwise alternate between ucnv_toUnicode() and ucnv_getNextUChar() 
 631          *   if step==-2 or -3, then give ucnv_toUnicode() the whole remaining input, 
 632          *   else give it at most (-step-2)/2 bytes 
 636         // end the loop by getting an index out of bounds error 
 638             // resetting the opposite conversion direction must not affect this one 
 639             ucnv_resetFromUnicode(cnv
); 
 642             if((step
&1)!=0 /* odd: -1, -3, -5, ... */) { 
 643                 sourceLimit
=source
; // use sourceLimit not as a real limit 
 644                                     // but to remember the pre-getNextUChar source pointer 
 645                 c
=ucnv_getNextUChar(cnv
, &source
, bytesLimit
, pErrorCode
); 
 647                 // check pointers and errors 
 648                 if(*pErrorCode
==U_INDEX_OUTOFBOUNDS_ERROR
) { 
 649                     if(source
!=bytesLimit
) { 
 650                         *pErrorCode
=U_INTERNAL_PROGRAM_ERROR
; 
 652                         *pErrorCode
=U_ZERO_ERROR
; 
 655                 } else if(U_FAILURE(*pErrorCode
)) { 
 658                 // source may not move if c is from previous overflow 
 660                 if(target
==resultLimit
) { 
 661                     *pErrorCode
=U_BUFFER_OVERFLOW_ERROR
; 
 667                     *target
++=U16_LEAD(c
); 
 668                     if(target
==resultLimit
) { 
 669                         *pErrorCode
=U_BUFFER_OVERFLOW_ERROR
; 
 672                     *target
++=U16_TRAIL(c
); 
 675                 // alternate between -n-1 and -n but leave -1 alone 
 679             } else /* step is even */ { 
 680                 // allow only one UChar output 
 681                 targetLimit
=target
<resultLimit 
? target
+1 : resultLimit
; 
 683                 // as with ucnv_getNextUChar(), we always flush (if we go to bytesLimit) 
 684                 // and never output offsets 
 686                     sourceLimit
=bytesLimit
; 
 688                     sourceLimit
=source
+(-step
-2)/2; 
 689                     if(sourceLimit
>bytesLimit
) { 
 690                         sourceLimit
=bytesLimit
; 
 695                     &target
, targetLimit
, 
 696                     &source
, sourceLimit
, 
 697                     NULL
, (UBool
)(sourceLimit
==bytesLimit
), pErrorCode
); 
 699                 // check pointers and errors 
 700                 if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
) { 
 701                     if(target
!=targetLimit
) { 
 702                         // buffer overflow must only be set when the target is filled 
 703                         *pErrorCode
=U_INTERNAL_PROGRAM_ERROR
; 
 705                     } else if(targetLimit
==resultLimit
) { 
 706                         // not just a partial overflow 
 710                     // the partial target is filled, set a new limit and continue 
 711                     *pErrorCode
=U_ZERO_ERROR
; 
 712                 } else if(U_FAILURE(*pErrorCode
)) { 
 713                     // some other error occurred, done 
 716                     if(source
!=sourceLimit
) { 
 717                         // when no error occurs, then the input must be consumed 
 718                         *pErrorCode
=U_INTERNAL_PROGRAM_ERROR
; 
 722                     // we are done (flush==TRUE) but we continue, to get the index out of bounds error above 
 730     return (int32_t)(target
-result
); 
 734 ConversionTest::ToUnicodeCase(ConversionCase 
&cc
, UConverterToUCallback callback
, const char *option
) { 
 736     UErrorCode errorCode
; 
 738     // open the converter 
 739     errorCode
=U_ZERO_ERROR
; 
 740     cnv
=cnv_open(cc
.charset
, errorCode
); 
 741     if(U_FAILURE(errorCode
)) { 
 742         errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_open() failed - %s", 
 743                 cc
.caseNr
, cc
.charset
, cc
.cbopt
, cc
.fallbacks
, cc
.finalFlush
, u_errorName(errorCode
)); 
 749         ucnv_setToUCallBack(cnv
, callback
, option
, NULL
, NULL
, &errorCode
); 
 750         if(U_FAILURE(errorCode
)) { 
 751             errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setToUCallBack() failed - %s", 
 752                     cc
.caseNr
, cc
.charset
, cc
.cbopt
, cc
.fallbacks
, cc
.finalFlush
, u_errorName(errorCode
)); 
 758     int32_t resultOffsets
[200]; 
 760     int32_t resultLength
; 
 763     static const struct { 
 767         { 0, "bulk" }, // must be first for offsets to be checked 
 772         { -2, "toU(bulk)+getNext" }, 
 773         { -3, "getNext+toU(bulk)" }, 
 774         { -4, "toU(1)+getNext" }, 
 775         { -5, "getNext+toU(1)" }, 
 776         { -12, "toU(5)+getNext" }, 
 777         { -13, "getNext+toU(5)" }, 
 782     for(i
=0; i
<LENGTHOF(steps
) && ok
; ++i
) { 
 784         if(step
<0 && !cc
.finalFlush
) { 
 785             // skip ucnv_getNextUChar() if !finalFlush because 
 786             // ucnv_getNextUChar() always implies flush 
 790             // bulk test is first, then offsets are not checked any more 
 793         errorCode
=U_ZERO_ERROR
; 
 794         resultLength
=stepToUnicode(cc
, cnv
, 
 795                                 result
, LENGTHOF(result
), 
 796                                 step
==0 ? resultOffsets 
: NULL
, 
 799                 cc
, cnv
, steps
[i
].name
, 
 800                 result
, resultLength
, 
 801                 cc
.offsets
!=NULL 
? resultOffsets 
: NULL
, 
 803         if(U_FAILURE(errorCode
) || !cc
.finalFlush
) { 
 804             // reset if an error occurred or we did not flush 
 805             // otherwise do nothing to make sure that flushing resets 
 806             ucnv_resetToUnicode(cnv
); 
 810     // not a real loop, just a convenience for breaking out of the block 
 811     while(ok 
&& cc
.finalFlush
) { 
 812         // test ucnv_toUChars() 
 813         memset(result
, 0, sizeof(result
)); 
 815         errorCode
=U_ZERO_ERROR
; 
 816         resultLength
=ucnv_toUChars(cnv
, 
 817                         result
, LENGTHOF(result
), 
 818                         (const char *)cc
.bytes
, cc
.bytesLength
, 
 822                 result
, resultLength
, 
 830         // keep the correct result for simple checking 
 831         errorCode
=U_ZERO_ERROR
; 
 832         resultLength
=ucnv_toUChars(cnv
, 
 834                         (const char *)cc
.bytes
, cc
.bytesLength
, 
 836         if(errorCode
==U_STRING_NOT_TERMINATED_WARNING 
|| errorCode
==U_BUFFER_OVERFLOW_ERROR
) { 
 837             errorCode
=U_ZERO_ERROR
; 
 840                 cc
, cnv
, "preflight toUChars", 
 841                 result
, resultLength
, 
 852 ConversionTest::checkToUnicode(ConversionCase 
&cc
, UConverter 
*cnv
, const char *name
, 
 853                                const UChar 
*result
, int32_t resultLength
, 
 854                                const int32_t *resultOffsets
, 
 855                                UErrorCode resultErrorCode
) { 
 856     char resultInvalidChars
[8]; 
 857     int8_t resultInvalidLength
; 
 858     UErrorCode errorCode
; 
 862     // reset the message; NULL will mean "ok" 
 865     errorCode
=U_ZERO_ERROR
; 
 866     resultInvalidLength
=sizeof(resultInvalidChars
); 
 867     ucnv_getInvalidChars(cnv
, resultInvalidChars
, &resultInvalidLength
, &errorCode
); 
 868     if(U_FAILURE(errorCode
)) { 
 869         errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) ucnv_getInvalidChars() failed - %s", 
 870                 cc
.caseNr
, cc
.charset
, cc
.cbopt
, cc
.fallbacks
, cc
.finalFlush
, name
, u_errorName(errorCode
)); 
 874     // check everything that might have gone wrong 
 875     if(cc
.unicodeLength
!=resultLength
) { 
 876         msg
="wrong result length"; 
 877     } else if(0!=u_memcmp(cc
.unicode
, result
, cc
.unicodeLength
)) { 
 878         msg
="wrong result string"; 
 879     } else if(cc
.offsets
!=NULL 
&& 0!=memcmp(cc
.offsets
, resultOffsets
, cc
.unicodeLength
*sizeof(*cc
.offsets
))) { 
 881     } else if(cc
.outErrorCode
!=resultErrorCode
) { 
 882         msg
="wrong error code"; 
 883     } else if(cc
.invalidLength
!=resultInvalidLength
) { 
 884         msg
="wrong length of last invalid input"; 
 885     } else if(0!=memcmp(cc
.invalidChars
, resultInvalidChars
, cc
.invalidLength
)) { 
 886         msg
="wrong last invalid input"; 
 892         char buffer
[2000]; // one buffer for all strings 
 893         char *s
, *bytesString
, *unicodeString
, *resultString
, 
 894             *offsetsString
, *resultOffsetsString
, 
 895             *invalidCharsString
, *resultInvalidCharsString
; 
 897         bytesString
=s
=buffer
; 
 898         s
=printBytes(cc
.bytes
, cc
.bytesLength
, bytesString
); 
 899         s
=printUnicode(cc
.unicode
, cc
.unicodeLength
, unicodeString
=s
); 
 900         s
=printUnicode(result
, resultLength
, resultString
=s
); 
 901         s
=printOffsets(cc
.offsets
, cc
.unicodeLength
, offsetsString
=s
); 
 902         s
=printOffsets(resultOffsets
, resultLength
, resultOffsetsString
=s
); 
 903         s
=printBytes(cc
.invalidChars
, cc
.invalidLength
, invalidCharsString
=s
); 
 904         s
=printBytes((uint8_t *)resultInvalidChars
, resultInvalidLength
, resultInvalidCharsString
=s
); 
 906         if((s
-buffer
)>(int32_t)sizeof(buffer
)) { 
 907             errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) fatal error: checkToUnicode() test output buffer overflow writing %d chars\n", 
 908                     cc
.caseNr
, cc
.charset
, cc
.cbopt
, cc
.fallbacks
, cc
.finalFlush
, name
, (int)(s
-buffer
)); 
 912         errln("toUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) failed: %s\n" 
 914               " expected <%s>[%d]\n" 
 917               "  result offsets <%s>\n" 
 918               " error code expected %s got %s\n" 
 919               "  invalidChars expected <%s> got <%s>\n", 
 920               cc
.caseNr
, cc
.charset
, cc
.cbopt
, cc
.fallbacks
, cc
.finalFlush
, name
, msg
, 
 921               bytesString
, cc
.bytesLength
, 
 922               unicodeString
, cc
.unicodeLength
, 
 923               resultString
, resultLength
, 
 926               u_errorName(cc
.outErrorCode
), u_errorName(resultErrorCode
), 
 927               invalidCharsString
, resultInvalidCharsString
); 
 933 // fromUnicode test worker functions --------------------------------------- *** 
 936 stepFromUnicode(ConversionCase 
&cc
, UConverter 
*cnv
, 
 937                 char *result
, int32_t resultCapacity
, 
 938                 int32_t *resultOffsets
, /* also resultCapacity */ 
 940                 UErrorCode 
*pErrorCode
) { 
 941     const UChar 
*source
, *sourceLimit
, *unicodeLimit
; 
 942     char *target
, *targetLimit
, *resultLimit
; 
 947     unicodeLimit
=source
+cc
.unicodeLength
; 
 948     resultLimit
=result
+resultCapacity
; 
 950     // call ucnv_fromUnicode() with in/out buffers no larger than (step) at a time 
 951     // move only one buffer (in vs. out) at a time to be extra mean 
 952     // step==0 performs bulk conversion and generates offsets 
 954     // initialize the partial limits for the loop 
 956         // use the entire buffers 
 957         sourceLimit
=unicodeLimit
; 
 958         targetLimit
=resultLimit
; 
 961         // start with empty partial buffers 
 966         // output offsets only for bulk conversion 
 971         // resetting the opposite conversion direction must not affect this one 
 972         ucnv_resetToUnicode(cnv
); 
 975         ucnv_fromUnicode(cnv
, 
 976             &target
, targetLimit
, 
 977             &source
, sourceLimit
, 
 981         // check pointers and errors 
 982         if(source
>sourceLimit 
|| target
>targetLimit
) { 
 983             *pErrorCode
=U_INTERNAL_PROGRAM_ERROR
; 
 985         } else if(*pErrorCode
==U_BUFFER_OVERFLOW_ERROR
) { 
 986             if(target
!=targetLimit
) { 
 987                 // buffer overflow must only be set when the target is filled 
 988                 *pErrorCode
=U_INTERNAL_PROGRAM_ERROR
; 
 990             } else if(targetLimit
==resultLimit
) { 
 991                 // not just a partial overflow 
 995             // the partial target is filled, set a new limit, reset the error and continue 
 996             targetLimit
=(resultLimit
-target
)>=step 
? target
+step 
: resultLimit
; 
 997             *pErrorCode
=U_ZERO_ERROR
; 
 998         } else if(U_FAILURE(*pErrorCode
)) { 
 999             // some other error occurred, done 
1002             if(source
!=sourceLimit
) { 
1003                 // when no error occurs, then the input must be consumed 
1004                 *pErrorCode
=U_INTERNAL_PROGRAM_ERROR
; 
1008             if(sourceLimit
==unicodeLimit
) { 
1013             // the partial conversion succeeded, set a new limit and continue 
1014             sourceLimit
=(unicodeLimit
-source
)>=step 
? source
+step 
: unicodeLimit
; 
1015             flush
=(UBool
)(cc
.finalFlush 
&& sourceLimit
==unicodeLimit
); 
1019     return (int32_t)(target
-result
); 
1023 ConversionTest::FromUnicodeCase(ConversionCase 
&cc
, UConverterFromUCallback callback
, const char *option
) { 
1025     UErrorCode errorCode
; 
1027     // open the converter 
1028     errorCode
=U_ZERO_ERROR
; 
1029     cnv
=cnv_open(cc
.charset
, errorCode
); 
1030     if(U_FAILURE(errorCode
)) { 
1031         errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_open() failed - %s", 
1032                 cc
.caseNr
, cc
.charset
, cc
.cbopt
, cc
.fallbacks
, cc
.finalFlush
, u_errorName(errorCode
)); 
1037     if(callback
!=NULL
) { 
1038         ucnv_setFromUCallBack(cnv
, callback
, option
, NULL
, NULL
, &errorCode
); 
1039         if(U_FAILURE(errorCode
)) { 
1040             errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setFromUCallBack() failed - %s", 
1041                     cc
.caseNr
, cc
.charset
, cc
.cbopt
, cc
.fallbacks
, cc
.finalFlush
, u_errorName(errorCode
)); 
1047     // set the fallbacks flag 
1048     // TODO change with Jitterbug 2401, then add a similar call for toUnicode too 
1049     ucnv_setFallback(cnv
, cc
.fallbacks
); 
1054     if((length
=(int32_t)strlen(cc
.subchar
))!=0) { 
1055         ucnv_setSubstChars(cnv
, cc
.subchar
, (int8_t)length
, &errorCode
); 
1056         if(U_FAILURE(errorCode
)) { 
1057             errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d) ucnv_setSubChars() failed - %s", 
1058                     cc
.caseNr
, cc
.charset
, cc
.cbopt
, cc
.fallbacks
, cc
.finalFlush
, u_errorName(errorCode
)); 
1064     int32_t resultOffsets
[200]; 
1066     int32_t resultLength
; 
1069     static const struct { 
1073         { 0, "bulk" }, // must be first for offsets to be checked 
1081     for(i
=0; i
<LENGTHOF(steps
) && ok
; ++i
) { 
1084             // bulk test is first, then offsets are not checked any more 
1087         errorCode
=U_ZERO_ERROR
; 
1088         resultLength
=stepFromUnicode(cc
, cnv
, 
1089                                 result
, LENGTHOF(result
), 
1090                                 step
==0 ? resultOffsets 
: NULL
, 
1092         ok
=checkFromUnicode( 
1093                 cc
, cnv
, steps
[i
].name
, 
1094                 (uint8_t *)result
, resultLength
, 
1095                 cc
.offsets
!=NULL 
? resultOffsets 
: NULL
, 
1097         if(U_FAILURE(errorCode
) || !cc
.finalFlush
) { 
1098             // reset if an error occurred or we did not flush 
1099             // otherwise do nothing to make sure that flushing resets 
1100             ucnv_resetFromUnicode(cnv
); 
1104     // not a real loop, just a convenience for breaking out of the block 
1105     while(ok 
&& cc
.finalFlush
) { 
1106         // test ucnv_fromUChars() 
1107         memset(result
, 0, sizeof(result
)); 
1109         errorCode
=U_ZERO_ERROR
; 
1110         resultLength
=ucnv_fromUChars(cnv
, 
1111                         result
, LENGTHOF(result
), 
1112                         cc
.unicode
, cc
.unicodeLength
, 
1114         ok
=checkFromUnicode( 
1115                 cc
, cnv
, "fromUChars", 
1116                 (uint8_t *)result
, resultLength
, 
1123         // test preflighting 
1124         // keep the correct result for simple checking 
1125         errorCode
=U_ZERO_ERROR
; 
1126         resultLength
=ucnv_fromUChars(cnv
, 
1128                         cc
.unicode
, cc
.unicodeLength
, 
1130         if(errorCode
==U_STRING_NOT_TERMINATED_WARNING 
|| errorCode
==U_BUFFER_OVERFLOW_ERROR
) { 
1131             errorCode
=U_ZERO_ERROR
; 
1133         ok
=checkFromUnicode( 
1134                 cc
, cnv
, "preflight fromUChars", 
1135                 (uint8_t *)result
, resultLength
, 
1146 ConversionTest::checkFromUnicode(ConversionCase 
&cc
, UConverter 
*cnv
, const char *name
, 
1147                                  const uint8_t *result
, int32_t resultLength
, 
1148                                  const int32_t *resultOffsets
, 
1149                                  UErrorCode resultErrorCode
) { 
1150     UChar resultInvalidUChars
[8]; 
1151     int8_t resultInvalidLength
; 
1152     UErrorCode errorCode
; 
1156     // reset the message; NULL will mean "ok" 
1159     errorCode
=U_ZERO_ERROR
; 
1160     resultInvalidLength
=LENGTHOF(resultInvalidUChars
); 
1161     ucnv_getInvalidUChars(cnv
, resultInvalidUChars
, &resultInvalidLength
, &errorCode
); 
1162     if(U_FAILURE(errorCode
)) { 
1163         errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) ucnv_getInvalidUChars() failed - %s", 
1164                 cc
.caseNr
, cc
.charset
, cc
.cbopt
, cc
.fallbacks
, cc
.finalFlush
, name
, u_errorName(errorCode
)); 
1168     // check everything that might have gone wrong 
1169     if(cc
.bytesLength
!=resultLength
) { 
1170         msg
="wrong result length"; 
1171     } else if(0!=memcmp(cc
.bytes
, result
, cc
.bytesLength
)) { 
1172         msg
="wrong result string"; 
1173     } else if(cc
.offsets
!=NULL 
&& 0!=memcmp(cc
.offsets
, resultOffsets
, cc
.bytesLength
*sizeof(*cc
.offsets
))) { 
1174         msg
="wrong offsets"; 
1175     } else if(cc
.outErrorCode
!=resultErrorCode
) { 
1176         msg
="wrong error code"; 
1177     } else if(cc
.invalidLength
!=resultInvalidLength
) { 
1178         msg
="wrong length of last invalid input"; 
1179     } else if(0!=u_memcmp(cc
.invalidUChars
, resultInvalidUChars
, cc
.invalidLength
)) { 
1180         msg
="wrong last invalid input"; 
1186         char buffer
[2000]; // one buffer for all strings 
1187         char *s
, *unicodeString
, *bytesString
, *resultString
, 
1188             *offsetsString
, *resultOffsetsString
, 
1189             *invalidCharsString
, *resultInvalidUCharsString
; 
1191         unicodeString
=s
=buffer
; 
1192         s
=printUnicode(cc
.unicode
, cc
.unicodeLength
, unicodeString
); 
1193         s
=printBytes(cc
.bytes
, cc
.bytesLength
, bytesString
=s
); 
1194         s
=printBytes(result
, resultLength
, resultString
=s
); 
1195         s
=printOffsets(cc
.offsets
, cc
.bytesLength
, offsetsString
=s
); 
1196         s
=printOffsets(resultOffsets
, resultLength
, resultOffsetsString
=s
); 
1197         s
=printUnicode(cc
.invalidUChars
, cc
.invalidLength
, invalidCharsString
=s
); 
1198         s
=printUnicode(resultInvalidUChars
, resultInvalidLength
, resultInvalidUCharsString
=s
); 
1200         if((s
-buffer
)>(int32_t)sizeof(buffer
)) { 
1201             errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) fatal error: checkFromUnicode() test output buffer overflow writing %d chars\n", 
1202                     cc
.caseNr
, cc
.charset
, cc
.cbopt
, cc
.fallbacks
, cc
.finalFlush
, name
, (int)(s
-buffer
)); 
1206         errln("fromUnicode[%d](%s cb=\"%s\" fb=%d flush=%d %s) failed: %s\n" 
1207               "  unicode <%s>[%d]\n" 
1208               " expected <%s>[%d]\n" 
1209               "  result  <%s>[%d]\n" 
1211               "  result offsets <%s>\n" 
1212               " error code expected %s got %s\n" 
1213               "  invalidChars expected <%s> got <%s>\n", 
1214               cc
.caseNr
, cc
.charset
, cc
.cbopt
, cc
.fallbacks
, cc
.finalFlush
, name
, msg
, 
1215               unicodeString
, cc
.unicodeLength
, 
1216               bytesString
, cc
.bytesLength
, 
1217               resultString
, resultLength
, 
1219               resultOffsetsString
, 
1220               u_errorName(cc
.outErrorCode
), u_errorName(resultErrorCode
), 
1221               invalidCharsString
, resultInvalidUCharsString
); 
1227 #endif /* #if !UCONFIG_NO_LEGACY_CONVERSION */