1 // © 2016 and later: Unicode, Inc. and others. 
   2 // License & terms of use: http://www.unicode.org/copyright.html 
   4 ********************************************************************** 
   5 *   Copyright (C) 1999-2016, International Business Machines 
   6 *   Corporation and others.  All Rights Reserved. 
   7 ********************************************************************** 
   8 *   Date        Name        Description 
   9 *   11/10/99    aliu        Creation. 
  10 ********************************************************************** 
  13 #include "unicode/utypes.h" 
  15 #if !UCONFIG_NO_TRANSLITERATION 
  18 #include "unicode/locid.h" 
  19 #include "unicode/dtfmtsym.h" 
  20 #include "unicode/normlzr.h" 
  21 #include "unicode/translit.h" 
  22 #include "unicode/uchar.h" 
  23 #include "unicode/unifilt.h" 
  24 #include "unicode/uniset.h" 
  25 #include "unicode/ustring.h" 
  26 #include "unicode/usetiter.h" 
  27 #include "unicode/uscript.h" 
  28 #include "unicode/utf16.h" 
  47 /*********************************************************************** 
  49                      HOW TO USE THIS TEST FILE 
  51                   How I developed on two platforms 
  52                 without losing (too much of) my mind 
  55 1. Add new tests by copying/pasting/changing existing tests.  On Java, 
  56    any public void method named Test...() taking no parameters becomes 
  57    a test.  On C++, you need to modify the header and add a line to 
  58    the runIndexedTest() dispatch method. 
  60 2. Make liberal use of the expect() method; it is your friend. 
  62 3. The tests in this file exactly match those in a sister file on the 
  63    other side.  The two files are: 
  65    icu4j:  src/com/ibm/test/translit/TransliteratorTest.java 
  66    icu4c:  source/test/intltest/transtst.cpp 
  68                   ==> THIS IS THE IMPORTANT PART <== 
  70    When you add a test in this file, add it in TransliteratorTest.java 
  71    too.  Give it the same name and put it in the same relative place. 
  72    This makes maintenance a lot simpler for any poor soul who ends up 
  73    trying to synchronize the tests between icu4j and icu4c. 
  75 4. If you MUST enter a test that is NOT paralleled in the sister file, 
  76    then add it in the special non-mirrored section.  These are 
  85    Make sure you document the reason the test is here and not there. 
  90 ***********************************************************************/ 
  92 // Define character constants thusly to be EBCDIC-friendly 
  94     LEFT_BRACE
=((UChar
)0x007B), /*{*/ 
  95     PIPE      
=((UChar
)0x007C), /*|*/ 
  96     ZERO      
=((UChar
)0x0030), /*0*/ 
  97     UPPER_A   
=((UChar
)0x0041)  /*A*/ 
 100 TransliteratorTest::TransliteratorTest() 
 101 :   DESERET_DEE((UChar32
)0x10414), 
 102     DESERET_dee((UChar32
)0x1043C) 
 106 TransliteratorTest::~TransliteratorTest() {} 
 109 TransliteratorTest::runIndexedTest(int32_t index
, UBool exec
, 
 110                                    const char* &name
, char* /*par*/) { 
 112         TESTCASE(0,TestInstantiation
); 
 113         TESTCASE(1,TestSimpleRules
); 
 114         TESTCASE(2,TestRuleBasedInverse
); 
 115         TESTCASE(3,TestKeyboard
); 
 116         TESTCASE(4,TestKeyboard2
); 
 117         TESTCASE(5,TestKeyboard3
); 
 118         TESTCASE(6,TestArabic
); 
 119         TESTCASE(7,TestCompoundKana
); 
 120         TESTCASE(8,TestCompoundHex
); 
 121         TESTCASE(9,TestFiltering
); 
 122         TESTCASE(10,TestInlineSet
); 
 123         TESTCASE(11,TestPatternQuoting
); 
 124         TESTCASE(12,TestJ277
); 
 125         TESTCASE(13,TestJ243
); 
 126         TESTCASE(14,TestJ329
); 
 127         TESTCASE(15,TestSegments
); 
 128         TESTCASE(16,TestCursorOffset
); 
 129         TESTCASE(17,TestArbitraryVariableValues
); 
 130         TESTCASE(18,TestPositionHandling
); 
 131         TESTCASE(19,TestHiraganaKatakana
); 
 132         TESTCASE(20,TestCopyJ476
); 
 133         TESTCASE(21,TestAnchors
); 
 134         TESTCASE(22,TestInterIndic
); 
 135         TESTCASE(23,TestFilterIDs
); 
 136         TESTCASE(24,TestCaseMap
); 
 137         TESTCASE(25,TestNameMap
); 
 138         TESTCASE(26,TestLiberalizedID
); 
 139         TESTCASE(27,TestCreateInstance
); 
 140         TESTCASE(28,TestNormalizationTransliterator
); 
 141         TESTCASE(29,TestCompoundRBT
); 
 142         TESTCASE(30,TestCompoundFilter
); 
 143         TESTCASE(31,TestRemove
); 
 144         TESTCASE(32,TestToRules
); 
 145         TESTCASE(33,TestContext
); 
 146         TESTCASE(34,TestSupplemental
); 
 147         TESTCASE(35,TestQuantifier
); 
 148         TESTCASE(36,TestSTV
); 
 149         TESTCASE(37,TestCompoundInverse
); 
 150         TESTCASE(38,TestNFDChainRBT
); 
 151         TESTCASE(39,TestNullInverse
); 
 152         TESTCASE(40,TestAliasInverseID
); 
 153         TESTCASE(41,TestCompoundInverseID
); 
 154         TESTCASE(42,TestUndefinedVariable
); 
 155         TESTCASE(43,TestEmptyContext
); 
 156         TESTCASE(44,TestCompoundFilterID
); 
 157         TESTCASE(45,TestPropertySet
); 
 158         TESTCASE(46,TestNewEngine
); 
 159         TESTCASE(47,TestQuantifiedSegment
); 
 160         TESTCASE(48,TestDevanagariLatinRT
); 
 161         TESTCASE(49,TestTeluguLatinRT
); 
 162         TESTCASE(50,TestCompoundLatinRT
); 
 163         TESTCASE(51,TestSanskritLatinRT
); 
 164         TESTCASE(52,TestLocaleInstantiation
); 
 165         TESTCASE(53,TestTitleAccents
); 
 166         TESTCASE(54,TestLocaleResource
); 
 167         TESTCASE(55,TestParseError
); 
 168         TESTCASE(56,TestOutputSet
); 
 169         TESTCASE(57,TestVariableRange
); 
 170         TESTCASE(58,TestInvalidPostContext
); 
 171         TESTCASE(59,TestIDForms
); 
 172         TESTCASE(60,TestToRulesMark
); 
 173         TESTCASE(61,TestEscape
); 
 174         TESTCASE(62,TestAnchorMasking
); 
 175         TESTCASE(63,TestDisplayName
); 
 176         TESTCASE(64,TestSpecialCases
); 
 177 #if !UCONFIG_NO_FILE_IO 
 178         TESTCASE(65,TestIncrementalProgress
); 
 180         TESTCASE(66,TestSurrogateCasing
); 
 181         TESTCASE(67,TestFunction
); 
 182         TESTCASE(68,TestInvalidBackRef
); 
 183         TESTCASE(69,TestMulticharStringSet
); 
 184         TESTCASE(70,TestUserFunction
); 
 185         TESTCASE(71,TestAnyX
); 
 186         TESTCASE(72,TestSourceTargetSet
); 
 187         TESTCASE(73,TestGurmukhiDevanagari
); 
 188         TESTCASE(74,TestPatternWhiteSpace
); 
 189         TESTCASE(75,TestAllCodepoints
); 
 190         TESTCASE(76,TestBoilerplate
); 
 191         TESTCASE(77,TestAlternateSyntax
); 
 192         TESTCASE(78,TestBeginEnd
); 
 193         TESTCASE(79,TestBeginEndToRules
); 
 194         TESTCASE(80,TestRegisterAlias
); 
 195         TESTCASE(81,TestRuleStripping
); 
 196         TESTCASE(82,TestHalfwidthFullwidth
); 
 197         TESTCASE(83,TestThai
); 
 198         TESTCASE(84,TestAny
); 
 199         default: name 
= ""; break; 
 204  * Make sure every system transliterator can be instantiated. 
 206  * ALSO test that the result of toRules() for each rule is a valid 
 207  * rule.  Do this here so we don't have to have another test that 
 208  * instantiates everything as well. 
 210 void TransliteratorTest::TestInstantiation() { 
 211     UErrorCode ec 
= U_ZERO_ERROR
; 
 212     StringEnumeration
* avail 
= Transliterator::getAvailableIDs(ec
); 
 213     assertSuccess("getAvailableIDs()", ec
); 
 214     assertTrue("getAvailableIDs()!=NULL", avail
!=NULL
); 
 215     int32_t n 
= Transliterator::countAvailableIDs(); 
 216     assertTrue("getAvailableIDs().count()==countAvailableIDs()", 
 217                avail
->count(ec
) == n
); 
 218     assertSuccess("count()", ec
); 
 220     for (int32_t i
=0; i
<n
; ++i
) { 
 221         const UnicodeString
& id 
= *avail
->snext(ec
); 
 222         if (!assertSuccess("snext()", ec
) || 
 223             !assertTrue("snext()!=NULL", (&id
)!=NULL
, TRUE
)) { 
 226         UnicodeString id2 
= Transliterator::getAvailableID(i
); 
 227         if (id
.length() < 1) { 
 228             errln(UnicodeString("FAIL: getAvailableID(") + 
 229                   i 
+ ") returned empty string"); 
 233             errln(UnicodeString("FAIL: getAvailableID(") + 
 234                   i 
+ ") != getAvailableIDs().snext()"); 
 237         UParseError parseError
; 
 238         UErrorCode status 
= U_ZERO_ERROR
; 
 239         Transliterator
* t 
= Transliterator::createInstance(id
, 
 240                               UTRANS_FORWARD
, parseError
,status
); 
 242         Transliterator::getDisplayName(id
, name
); 
 244 #if UCONFIG_NO_BREAK_ITERATION 
 245             // If UCONFIG_NO_BREAK_ITERATION is on, then only Thai should fail. 
 246             if (id
.compare((UnicodeString
)"Thai-Latn") != 0 && 
 247                 id
.compare((UnicodeString
)"Thai-Latin") != 0) 
 249                 dataerrln(UnicodeString("FAIL: Couldn't create ") + id 
+ 
 250                       /*", parse error " + parseError.code +*/ 
 251                       ", line " + parseError
.line 
+ 
 252                       ", offset " + parseError
.offset 
+ 
 253                       ", pre-context " + prettify(parseError
.preContext
, TRUE
) + 
 254                       ", post-context " +prettify(parseError
.postContext
,TRUE
) + 
 255                       ", Error: " + u_errorName(status
)); 
 256                 // When createInstance fails, it deletes the failing 
 257                 // entry from the available ID list.  We detect this 
 258                 // here by looking for a change in countAvailableIDs. 
 259             int32_t nn 
= Transliterator::countAvailableIDs(); 
 262                 --i
; // Compensate for deleted entry 
 265             logln(UnicodeString("OK: ") + name 
+ " (" + id 
+ ")"); 
 269             t
->toRules(rules
, TRUE
); 
 270             Transliterator 
*u 
= Transliterator::createFromRules("x", 
 271                                     rules
, UTRANS_FORWARD
, parseError
,status
); 
 273                 errln(UnicodeString("FAIL: ") + id 
+ 
 274                       ".createFromRules() => bad rules" + 
 275                       /*", parse error " + parseError.code +*/ 
 276                       ", line " + parseError
.line 
+ 
 277                       ", offset " + parseError
.offset 
+ 
 278                       ", context " + prettify(parseError
.preContext
, TRUE
) + 
 279                       ", rules: " + prettify(rules
, TRUE
)); 
 286     assertTrue("snext()==NULL", avail
->snext(ec
)==NULL
); 
 287     assertSuccess("snext()", ec
); 
 290     // Now test the failure path 
 291     UParseError parseError
; 
 292     UErrorCode status 
= U_ZERO_ERROR
; 
 293     UnicodeString 
id("<Not a valid Transliterator ID>"); 
 294     Transliterator
* t 
= Transliterator::createInstance(id
, UTRANS_FORWARD
, parseError
, status
); 
 296         errln("FAIL: " + id 
+ " returned a transliterator"); 
 299         logln("OK: Bogus ID handled properly"); 
 303 void TransliteratorTest::TestSimpleRules(void) { 
 304     /* Example: rules 1. ab>x|y 
 307      * []|eabcd  start - no match, copy e to tranlated buffer 
 308      * [e]|abcd  match rule 1 - copy output & adjust cursor 
 309      * [ex|y]cd  match rule 2 - copy output & adjust cursor 
 310      * [exz]|d   no match, copy d to transliterated buffer 
 313     expect(UnicodeString("ab>x|y;", "") + 
 317     /* Another set of rules: 
 329     expect(UnicodeString("ab>x|yzacw;") + 
 337     UErrorCode status 
= U_ZERO_ERROR
; 
 338     UParseError parseError
; 
 339     Transliterator 
*t 
= Transliterator::createFromRules( 
 341         UnicodeString("$dummy=").append((UChar
)0xE100) + 
 343                       "$vowel=[aeiouAEIOU];" 
 345                       "$vowel } $lu > '!';" 
 350         UTRANS_FORWARD
, parseError
, 
 352     if (U_FAILURE(status
)) { 
 353         dataerrln("FAIL: RBT constructor failed - %s", u_errorName(status
)); 
 356     expect(*t
, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&"); 
 361  * Test inline set syntax and set variable syntax. 
 363 void TransliteratorTest::TestInlineSet(void) { 
 364     expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz"); 
 365     expect("a[0-9]b > qrs", "1a7b9", "1qrs9"); 
 367     expect(UnicodeString( 
 370            "$alphanumeric = [$digit $alpha];" // *** 
 371            "$special = [^$alphanumeric];"     // *** 
 372            "$alphanumeric > '-';" 
 373            "$special > '*';", ""), 
 375            "thx-1138", "---*----"); 
 379  * Create some inverses and confirm that they work.  We have to be 
 380  * careful how we do this, since the inverses will not be true 
 381  * inverses -- we can't throw any random string at the composition 
 382  * of the transliterators and expect the identity function.  F x 
 383  * F' != I.  However, if we are careful about the input, we will 
 384  * get the expected results. 
 386 void TransliteratorTest::TestRuleBasedInverse(void) { 
 387     UnicodeString RULES 
= 
 388         UnicodeString("abc>zyx;") + 
 406     const char* DATA
[] = { 
 407         // Careful here -- random strings will not work.  If we keep 
 408         // the left side to the domain and the right side to the range 
 409         // we will be okay though (left, abc; right xyz). 
 411         "abcacab", "zyxxxyy", 
 415     int32_t DATA_length 
= UPRV_LENGTHOF(DATA
); 
 417     UErrorCode status 
= U_ZERO_ERROR
; 
 418     UParseError parseError
; 
 419     Transliterator 
*fwd 
= Transliterator::createFromRules("<ID>", RULES
, 
 420                                 UTRANS_FORWARD
, parseError
, status
); 
 421     Transliterator 
*rev 
= Transliterator::createFromRules("<ID>", RULES
, 
 422                                 UTRANS_REVERSE
, parseError
, status
); 
 423     if (U_FAILURE(status
)) { 
 424         errln("FAIL: RBT constructor failed"); 
 427     for (int32_t i
=0; i
<DATA_length
; i
+=2) { 
 428         expect(*fwd
, DATA
[i
], DATA
[i
+1]); 
 429         expect(*rev
, DATA
[i
+1], DATA
[i
]); 
 436  * Basic test of keyboard. 
 438 void TransliteratorTest::TestKeyboard(void) { 
 439     UParseError parseError
; 
 440     UErrorCode status 
= U_ZERO_ERROR
; 
 441     Transliterator 
*t 
= Transliterator::createFromRules("<ID>", 
 442                               UnicodeString("psch>Y;") 
 446                               UTRANS_FORWARD
, parseError
, 
 448     if (U_FAILURE(status
)) { 
 449         errln("FAIL: RBT constructor failed"); 
 452     const char* DATA
[] = { 
 460         0, "AycAY", // null means finishKeyboardTransliteration 
 463     keyboardAux(*t
, DATA
, UPRV_LENGTHOF(DATA
)); 
 468  * Basic test of keyboard with cursor. 
 470 void TransliteratorTest::TestKeyboard2(void) { 
 471     UParseError parseError
; 
 472     UErrorCode status 
= U_ZERO_ERROR
; 
 473     Transliterator 
*t 
= Transliterator::createFromRules("<ID>", 
 474                               UnicodeString("ych>Y;") 
 478                               UTRANS_FORWARD
, parseError
, 
 480     if (U_FAILURE(status
)) { 
 481         errln("FAIL: RBT constructor failed"); 
 484     const char* DATA
[] = { 
 488         "s", "Aps", // modified for rollback - "Ay", 
 489         "c", "Apsc", // modified for rollback - "Ayc", 
 492         "s", "AycAps", // modified for rollback - "AycAy", 
 493         "c", "AycApsc", // modified for rollback - "AycAyc", 
 495         0, "AycAY", // null means finishKeyboardTransliteration 
 498     keyboardAux(*t
, DATA
, UPRV_LENGTHOF(DATA
)); 
 503  * Test keyboard transliteration with back-replacement. 
 505 void TransliteratorTest::TestKeyboard3(void) { 
 506     // We want th>z but t>y.  Furthermore, during keyboard 
 507     // transliteration we want t>y then yh>z if t, then h are 
 509     UnicodeString 
RULES("t>|y;" 
 512     const char* DATA
[] = { 
 513         // Column 1: characters to add to buffer (as if typed) 
 514         // Column 2: expected appearance of buffer after 
 515         //           keyboard xliteration. 
 518         "t", "abt", // modified for rollback - "aby", 
 520         "t", "abyct", // modified for rollback - "abycy", 
 522         0, "abycz", // null means finishKeyboardTransliteration 
 525     UParseError parseError
; 
 526     UErrorCode status 
= U_ZERO_ERROR
; 
 527     Transliterator 
*t 
= Transliterator::createFromRules("<ID>", RULES
, UTRANS_FORWARD
, parseError
, status
); 
 528     if (U_FAILURE(status
)) { 
 529         errln("FAIL: RBT constructor failed"); 
 532     keyboardAux(*t
, DATA
, UPRV_LENGTHOF(DATA
)); 
 536 void TransliteratorTest::keyboardAux(const Transliterator
& t
, 
 537                                      const char* DATA
[], int32_t DATA_length
) { 
 538     UErrorCode status 
= U_ZERO_ERROR
; 
 539     UTransPosition index
={0, 0, 0, 0}; 
 541     for (int32_t i
=0; i
<DATA_length
; i
+=2) { 
 547             t
.transliterate(s
, index
, DATA
[i
], status
); 
 550             t
.finishTransliteration(s
, index
); 
 552         // Show the start index '{' and the cursor '|' 
 553         UnicodeString a
, b
, c
; 
 554         s
.extractBetween(0, index
.contextStart
, a
); 
 555         s
.extractBetween(index
.contextStart
, index
.start
, b
); 
 556         s
.extractBetween(index
.start
, s
.length(), c
); 
 558             append((UChar
)LEFT_BRACE
). 
 562         if (s 
== DATA
[i
+1] && U_SUCCESS(status
)) { 
 565             errln(UnicodeString("FAIL: ") + log 
+ ", expected " + DATA
[i
+1]); 
 570 void TransliteratorTest::TestArabic(void) { 
 571 // Test disabled for 2.0 until new Arabic transliterator can be written. 
 573 //    const char* DATA[] = { 
 574 //        "Arabic", "\u062a\u062a\u0645\u062a\u0639\u0020"+ 
 575 //                  "\u0627\u0644\u0644\u063a\u0629\u0020"+ 
 576 //                  "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629\u0020"+ 
 577 //                  "\u0628\u0628\u0646\u0638\u0645\u0020"+ 
 578 //                  "\u0643\u062a\u0627\u0628\u0628\u064a\u0629\u0020"+ 
 579 //                  "\u062c\u0645\u064a\u0644\u0629", 
 583 //    UChar ar_raw[] = { 
 584 //        0x062a, 0x062a, 0x0645, 0x062a, 0x0639, 0x0020, 0x0627, 
 585 //        0x0644, 0x0644, 0x063a, 0x0629, 0x0020, 0x0627, 0x0644, 
 586 //        0x0639, 0x0631, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020, 
 587 //        0x0628, 0x0628, 0x0646, 0x0638, 0x0645, 0x0020, 0x0643, 
 588 //        0x062a, 0x0627, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020, 
 589 //        0x062c, 0x0645, 0x064a, 0x0644, 0x0629, 0 
 591 //    UnicodeString ar(ar_raw); 
 592 //    UErrorCode status=U_ZERO_ERROR; 
 593 //    UParseError parseError; 
 594 //    Transliterator *t = Transliterator::createInstance("Latin-Arabic", UTRANS_FORWARD, parseError, status); 
 596 //        errln("FAIL: createInstance failed"); 
 599 //    expect(*t, "Arabic", ar); 
 604  * Compose the Kana transliterator forward and reverse and try 
 605  * some strings that should come out unchanged. 
 607 void TransliteratorTest::TestCompoundKana(void) { 
 608     UParseError parseError
; 
 609     UErrorCode status 
= U_ZERO_ERROR
; 
 610     Transliterator
* t 
= Transliterator::createInstance("Latin-Hiragana;Hiragana-Latin", UTRANS_FORWARD
, parseError
, status
); 
 612         dataerrln("FAIL: construction of Latin-Hiragana;Hiragana-Latin failed - %s", u_errorName(status
)); 
 614         expect(*t
, "aaaaa", "aaaaa"); 
 620  * Compose the hex transliterators forward and reverse. 
 622 void TransliteratorTest::TestCompoundHex(void) { 
 623     UParseError parseError
; 
 624     UErrorCode status 
= U_ZERO_ERROR
; 
 625     Transliterator
* a 
= Transliterator::createInstance("Any-Hex", UTRANS_FORWARD
, parseError
, status
); 
 626     Transliterator
* b 
= Transliterator::createInstance("Hex-Any", UTRANS_FORWARD
, parseError
, status
); 
 627     Transliterator
* transab
[] = { a
, b 
}; 
 628     Transliterator
* transba
[] = { b
, a 
}; 
 629     if (a 
== 0 || b 
== 0) { 
 630         errln("FAIL: construction failed"); 
 635     // Do some basic tests of a 
 636     expect(*a
, "01", UnicodeString("\\u0030\\u0031", "")); 
 637     // Do some basic tests of b 
 638     expect(*b
, UnicodeString("\\u0030\\u0031", ""), "01"); 
 640     Transliterator
* ab 
= new CompoundTransliterator(transab
, 2); 
 641     UnicodeString 
s("abcde", ""); 
 644     UnicodeString 
str(s
); 
 645     a
->transliterate(str
); 
 646     Transliterator
* ba 
= new CompoundTransliterator(transba
, 2); 
 647     expect(*ba
, str
, str
); 
 655 int gTestFilterClassID 
= 0; 
 657  * Used by TestFiltering(). 
 659 class TestFilter 
: public UnicodeFilter 
{ 
 660     virtual UnicodeFunctor
* clone() const { 
 661         return new TestFilter(*this); 
 663     virtual UBool 
contains(UChar32 c
) const { 
 664         return c 
!= (UChar
)0x0063 /*c*/; 
 667     virtual UnicodeString
& toPattern(UnicodeString
& result
, 
 668                                      UBool 
/*escapeUnprintable*/) const { 
 671     virtual UBool 
matchesIndexValue(uint8_t /*v*/) const { 
 674     virtual void addMatchSetTo(UnicodeSet
& /*toUnionTo*/) const {} 
 676     UClassID 
getDynamicClassID() const { return (UClassID
)&gTestFilterClassID
; } 
 680  * Do some basic tests of filtering. 
 682 void TransliteratorTest::TestFiltering(void) { 
 683     UParseError parseError
; 
 684     UErrorCode status 
= U_ZERO_ERROR
; 
 685     Transliterator
* hex 
= Transliterator::createInstance("Any-Hex", UTRANS_FORWARD
, parseError
, status
); 
 687         errln("FAIL: createInstance(Any-Hex) failed"); 
 690     hex
->adoptFilter(new TestFilter()); 
 691     UnicodeString 
s("abcde"); 
 692     hex
->transliterate(s
); 
 693     UnicodeString 
exp("\\u0061\\u0062c\\u0064\\u0065", ""); 
 695         logln(UnicodeString("Ok:   \"") + exp 
+ "\""); 
 697         logln(UnicodeString("FAIL: \"") + s 
+ "\", wanted \"" + exp 
+ "\""); 
 700     // ICU4C ONLY. Do not find Transliterator.orphanFilter() in ICU4J. 
 701     UnicodeFilter 
*f 
= hex
->orphanFilter(); 
 703         errln("FAIL: orphanFilter() should get a UnicodeFilter"); 
 713 void TransliteratorTest::TestAnchors(void) { 
 714     expect(UnicodeString("^a  > 0; a$ > 2 ; a > 1;", ""), 
 717     expect(UnicodeString("$s=[z$]; $s{a>0; a}$s>2; a>1;", ""), 
 720     expect(UnicodeString("^ab  > 01 ;" 
 728     expect(UnicodeString("$s = [z$] ;" 
 735            "abzababbabxzabxabx", 
 740  * Test pattern quoting and escape mechanisms. 
 742 void TransliteratorTest::TestPatternQuoting(void) { 
 744     // Each item is <rules>, <input>, <expected output> 
 745     const UnicodeString DATA
[] = { 
 746         UnicodeString(UChar(0x4E01)) + ">'[male adult]'", 
 747         UnicodeString(UChar(0x4E01)), 
 751     for (int32_t i
=0; i
<3; i
+=3) { 
 752         logln(UnicodeString("Pattern: ") + prettify(DATA
[i
])); 
 753         UParseError parseError
; 
 754         UErrorCode status 
= U_ZERO_ERROR
; 
 755         Transliterator 
*t 
= Transliterator::createFromRules("<ID>", DATA
[i
], UTRANS_FORWARD
, parseError
, status
); 
 756         if (U_FAILURE(status
)) { 
 757             errln("RBT constructor failed"); 
 759             expect(*t
, DATA
[i
+1], DATA
[i
+2]); 
 766  * Regression test for bugs found in Greek transliteration. 
 768 void TransliteratorTest::TestJ277(void) { 
 769     UErrorCode status 
= U_ZERO_ERROR
; 
 770     UParseError parseError
; 
 771     Transliterator 
*gl 
= Transliterator::createInstance("Greek-Latin; NFD; [:M:]Remove; NFC", UTRANS_FORWARD
, parseError
, status
); 
 773         dataerrln("FAIL: createInstance(Greek-Latin) returned NULL - %s", u_errorName(status
)); 
 778     UChar upsilon 
= 0x3C5; 
 780 //    UChar PHI = 0x3A6; 
 782 //    UChar omega = 0x3C9; 
 783 //    UChar omicron = 0x3BF; 
 784 //    UChar epsilon = 0x3B5; 
 786     // sigma upsilon nu -> syn 
 788     syn
.append(sigma
).append(upsilon
).append(nu
); 
 789     expect(*gl
, syn
, "syn"); 
 791     // sigma alpha upsilon nu -> saun 
 793     sayn
.append(sigma
).append(alpha
).append(upsilon
).append(nu
); 
 794     expect(*gl
, sayn
, "saun"); 
 796     // Again, using a smaller rule set 
 801                 "$ypsilon = \\u03C5;" 
 802                 "$vowel   = [aeiouAEIOU$alpha$ypsilon];" 
 805                 "u <>  $vowel { $ypsilon;" 
 809     Transliterator 
*mini 
= Transliterator::createFromRules("mini", rules
, UTRANS_REVERSE
, parseError
, status
); 
 810     if (U_FAILURE(status
)) { errln("FAIL: Transliterator constructor failed"); return; } 
 811     expect(*mini
, syn
, "syn"); 
 812     expect(*mini
, sayn
, "saun"); 
 816 #if !UCONFIG_NO_FORMATTING 
 817     // Transliterate the Greek locale data 
 819     DateFormatSymbols 
syms(el
, status
); 
 820     if (U_FAILURE(status
)) { errln("FAIL: Transliterator constructor failed"); return; } 
 822     const UnicodeString
* data 
= syms
.getMonths(count
); 
 823     for (i
=0; i
<count
; ++i
) { 
 824         if (data
[i
].length() == 0) { 
 827         UnicodeString 
out(data
[i
]); 
 828         gl
->transliterate(out
); 
 830         if (data
[i
].length() >= 2 && out
.length() >= 2 && 
 831             u_isupper(data
[i
].charAt(0)) && u_islower(data
[i
].charAt(1))) { 
 832             if (!(u_isupper(out
.charAt(0)) && u_islower(out
.charAt(1)))) { 
 837             logln(prettify(data
[i
] + " -> " + out
)); 
 839             errln(UnicodeString("FAIL: ") + prettify(data
[i
] + " -> " + out
)); 
 848  * Prefix, suffix support in hex transliterators 
 850 void TransliteratorTest::TestJ243(void) { 
 851     UErrorCode ec 
= U_ZERO_ERROR
; 
 853     // Test default Hex-Any, which should handle 
 854     // \u, \U, u+, and U+ 
 855     Transliterator 
*hex 
= 
 856         Transliterator::createInstance("Hex-Any", UTRANS_FORWARD
, ec
); 
 857     if (assertSuccess("getInstance", ec
)) { 
 858         expect(*hex
, UnicodeString("\\u0041+\\U00000042,U+0043uU+0044z", ""), "A+B,CuDz"); 
 862 //    // Try a custom Hex-Unicode 
 863 //    // \uXXXX and &#xXXXX; 
 864 //    ec = U_ZERO_ERROR; 
 865 //    HexToUnicodeTransliterator hex2(UnicodeString("\\\\u###0;&\\#x###0\\;", ""), ec); 
 866 //    expect(hex2, UnicodeString("\\u61\\u062\\u0063\\u00645\\u66x0123", ""), 
 867 //           "abcd5fx0123"); 
 868 //    // Try custom Any-Hex (default is tested elsewhere) 
 869 //    ec = U_ZERO_ERROR; 
 870 //    UnicodeToHexTransliterator hex3(UnicodeString("&\\#x###0;", ""), ec); 
 871 //    expect(hex3, "012", "012"); 
 875  * Parsers need better syntax error messages. 
 877 void TransliteratorTest::TestJ329(void) { 
 879     struct { UBool containsErrors
; const char* rule
; } DATA
[] = { 
 880         { FALSE
, "a > b; c > d" }, 
 881         { TRUE
,  "a > b; no operator; c > d" }, 
 883     int32_t DATA_length 
= UPRV_LENGTHOF(DATA
); 
 885     for (int32_t i
=0; i
<DATA_length
; ++i
) { 
 886         UErrorCode status 
= U_ZERO_ERROR
; 
 887         UParseError parseError
; 
 888         Transliterator 
*rbt 
= Transliterator::createFromRules("<ID>", 
 893         UBool gotError 
= U_FAILURE(status
); 
 894         UnicodeString 
desc(DATA
[i
].rule
); 
 895         desc
.append(gotError 
? " -> error" : " -> no error"); 
 897             desc 
= desc 
+ ", ParseError code=" + u_errorName(status
) + 
 898                 " line=" + parseError
.line 
+ 
 899                 " offset=" + parseError
.offset 
+ 
 900                 " context=" + parseError
.preContext
; 
 902         if (gotError 
== DATA
[i
].containsErrors
) { 
 903             logln(UnicodeString("Ok:   ") + desc
); 
 905             errln(UnicodeString("FAIL: ") + desc
); 
 912  * Test segments and segment references. 
 914 void TransliteratorTest::TestSegments(void) { 
 916     // Each item is <rules>, <input>, <expected output> 
 917     UnicodeString DATA
[] = { 
 918         "([a-z]) '.' ([0-9]) > $2 '-' $1", 
 923         "(([a-z])([0-9])) > $1 '.' $2 '.' $3;", 
 927     int32_t DATA_length 
= UPRV_LENGTHOF(DATA
); 
 929     for (int32_t i
=0; i
<DATA_length
; i
+=3) { 
 930         logln("Pattern: " + prettify(DATA
[i
])); 
 931         UParseError parseError
; 
 932         UErrorCode status 
= U_ZERO_ERROR
; 
 933         Transliterator 
*t 
= Transliterator::createFromRules("ID", DATA
[i
], UTRANS_FORWARD
, parseError
, status
); 
 934         if (U_FAILURE(status
)) { 
 935             errln("FAIL: RBT constructor"); 
 937             expect(*t
, DATA
[i
+1], DATA
[i
+2]); 
 944  * Test cursor positioning outside of the key 
 946 void TransliteratorTest::TestCursorOffset(void) { 
 948     // Each item is <rules>, <input>, <expected output> 
 949     UnicodeString DATA
[] = { 
 950         "pre {alpha} post > | @ ALPHA ;" 
 952         "pre {beta} post > BETA @@ | ;" 
 955         "prealphapost prebetapost", 
 957         "prbetaxyz preBETApost", 
 959     int32_t DATA_length 
= UPRV_LENGTHOF(DATA
); 
 961     for (int32_t i
=0; i
<DATA_length
; i
+=3) { 
 962         logln("Pattern: " + prettify(DATA
[i
])); 
 963         UParseError parseError
; 
 964         UErrorCode status 
= U_ZERO_ERROR
; 
 965         Transliterator 
*t 
= Transliterator::createFromRules("<ID>", DATA
[i
], UTRANS_FORWARD
, parseError
, status
); 
 966         if (U_FAILURE(status
)) { 
 967             errln("FAIL: RBT constructor"); 
 969             expect(*t
, DATA
[i
+1], DATA
[i
+2]); 
 976  * Test zero length and > 1 char length variable values.  Test 
 977  * use of variable refs in UnicodeSets. 
 979 void TransliteratorTest::TestArbitraryVariableValues(void) { 
 981     // Each item is <rules>, <input>, <expected output> 
 982     UnicodeString DATA
[] = { 
1000     int32_t DATA_length 
= UPRV_LENGTHOF(DATA
); 
1002     for (int32_t i
=0; i
<DATA_length
; i
+=3) { 
1003         logln("Pattern: " + prettify(DATA
[i
])); 
1004         UParseError parseError
; 
1005         UErrorCode status 
= U_ZERO_ERROR
; 
1006         Transliterator 
*t 
= Transliterator::createFromRules("<ID>", DATA
[i
], UTRANS_FORWARD
, parseError
, status
); 
1007         if (U_FAILURE(status
)) { 
1008             errln("FAIL: RBT constructor"); 
1010             expect(*t
, DATA
[i
+1], DATA
[i
+2]); 
1017  * Confirm that the contextStart, contextLimit, start, and limit 
1018  * behave correctly. J474. 
1020 void TransliteratorTest::TestPositionHandling(void) { 
1021     // Array of 3n items 
1022     // Each item is <rules>, <input>, <expected output> 
1023     const char* DATA
[] = { 
1024         "a{t} > SS ; {t}b > UU ; {t} > TT ;", 
1025         "xtat txtb", // pos 0,9,0,9 
1028         "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;", 
1029         "xtat txtb", // pos 2,9,3,8 
1032         "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;", 
1033         "xtat txtb", // pos 3,8,3,8 
1037     // Array of 4n positions -- these go with the DATA array 
1038     // They are: contextStart, contextLimit, start, limit 
1045     int32_t n 
= UPRV_LENGTHOF(DATA
) / 3; 
1046     for (int32_t i
=0; i
<n
; i
++) { 
1047         UErrorCode status 
= U_ZERO_ERROR
; 
1048         UParseError parseError
; 
1049         Transliterator 
*t 
= Transliterator::createFromRules("<ID>", 
1050                                 DATA
[3*i
], UTRANS_FORWARD
, parseError
, status
); 
1051         if (U_FAILURE(status
)) { 
1053             errln("FAIL: RBT constructor"); 
1057         pos
.contextStart
= POS
[4*i
]; 
1058         pos
.contextLimit 
= POS
[4*i
+1]; 
1059         pos
.start 
= POS
[4*i
+2]; 
1060         pos
.limit 
= POS
[4*i
+3]; 
1061         UnicodeString 
rsource(DATA
[3*i
+1]); 
1062         t
->transliterate(rsource
, pos
, status
); 
1063         if (U_FAILURE(status
)) { 
1065             errln("FAIL: transliterate"); 
1068         t
->finishTransliteration(rsource
, pos
); 
1069         expectAux(DATA
[3*i
], 
1078  * Test the Hiragana-Katakana transliterator. 
1080 void TransliteratorTest::TestHiraganaKatakana(void) { 
1081     UParseError parseError
; 
1082     UErrorCode status 
= U_ZERO_ERROR
; 
1083     Transliterator
* hk 
= Transliterator::createInstance("Hiragana-Katakana", UTRANS_FORWARD
, parseError
, status
); 
1084     Transliterator
* kh 
= Transliterator::createInstance("Katakana-Hiragana", UTRANS_FORWARD
, parseError
, status
); 
1085     if (hk 
== 0 || kh 
== 0) { 
1086         dataerrln("FAIL: createInstance failed - %s", u_errorName(status
)); 
1092     // Array of 3n items 
1093     // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana> 
1094     const char* DATA
[] = { 
1096         "\\u3042\\u3090\\u3099\\u3092\\u3050", 
1097         "\\u30A2\\u30F8\\u30F2\\u30B0", 
1100         "\\u307C\\u3051\\u3060\\u3042\\u3093\\u30FC", 
1101         "\\u30DC\\u30F6\\u30C0\\u30FC\\u30F3\\u30FC", 
1103     int32_t DATA_length 
= UPRV_LENGTHOF(DATA
); 
1105     for (int32_t i
=0; i
<DATA_length
; i
+=3) { 
1106         UnicodeString h 
= CharsToUnicodeString(DATA
[i
+1]); 
1107         UnicodeString k 
= CharsToUnicodeString(DATA
[i
+2]); 
1109         case 0x68: //'h': // Hiragana-Katakana 
1112         case 0x6B: //'k': // Katakana-Hiragana 
1115         case 0x62: //'b': // both 
1126  * Test cloning / copy constructor of RBT. 
1128 void TransliteratorTest::TestCopyJ476(void) { 
1129     // The real test here is what happens when the destructors are 
1130     // called.  So we let one object get destructed, and check to 
1131     // see that its copy still works. 
1132     Transliterator 
*t2 
= 0; 
1134         UParseError parseError
; 
1135         UErrorCode status 
= U_ZERO_ERROR
; 
1136         Transliterator 
*t1 
= Transliterator::createFromRules("t1", 
1137             "a>A;b>B;'foo'+>'bar'", UTRANS_FORWARD
, parseError
, status
); 
1138         if (U_FAILURE(status
)) { 
1139             errln("FAIL: RBT constructor"); 
1142         t2 
= t1
->clone(); // Call copy constructor under the covers. 
1143         expect(*t1
, "abcfoofoo", "ABcbar"); 
1146     expect(*t2
, "abcfoofoo", "ABcbar"); 
1151  * Test inter-Indic transliterators.  These are composed. 
1152  * ICU4C Jitterbug 483. 
1154 void TransliteratorTest::TestInterIndic(void) { 
1155     UnicodeString 
ID("Devanagari-Gujarati", ""); 
1156     UErrorCode status 
= U_ZERO_ERROR
; 
1157     UParseError parseError
; 
1158     Transliterator
* dg 
= Transliterator::createInstance(ID
, UTRANS_FORWARD
, parseError
, status
); 
1160         dataerrln("FAIL: createInstance(" + ID 
+ ") returned NULL - " + u_errorName(status
)); 
1163     UnicodeString id 
= dg
->getID(); 
1165         errln("FAIL: createInstance(" + ID 
+ ")->getID() => " + id
); 
1167     UnicodeString dev 
= CharsToUnicodeString("\\u0901\\u090B\\u0925"); 
1168     UnicodeString guj 
= CharsToUnicodeString("\\u0A81\\u0A8B\\u0AA5"); 
1169     expect(*dg
, dev
, guj
); 
1174  * Test filter syntax in IDs. (J918) 
1176 void TransliteratorTest::TestFilterIDs(void) { 
1177     // Array of 3n strings: 
1178     // <id>, <inverse id>, <input>, <expected output> 
1179     const char* DATA
[] = { 
1180         "[aeiou]Any-Hex", // ID 
1181         "[aeiou]Hex-Any", // expected inverse ID 
1183         "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src) 
1185         "[aeiou]Any-Hex;[^5]Hex-Any", 
1186         "[^5]Any-Hex;[aeiou]Hex-Any", 
1195     enum { DATA_length 
= UPRV_LENGTHOF(DATA
) }; 
1197     for (int i
=0; i
<DATA_length
; i
+=4) { 
1198         UnicodeString 
ID(DATA
[i
], ""); 
1199         UnicodeString 
uID(DATA
[i
+1], ""); 
1200         UnicodeString 
data2(DATA
[i
+2], ""); 
1201         UnicodeString 
data3(DATA
[i
+3], ""); 
1202         UParseError parseError
; 
1203         UErrorCode status 
= U_ZERO_ERROR
; 
1204         Transliterator 
*t 
= Transliterator::createInstance(ID
, UTRANS_FORWARD
, parseError
, status
); 
1206             errln("FAIL: createInstance(" + ID 
+ ") returned NULL"); 
1209         expect(*t
, data2
, data3
); 
1212         if (ID 
!= t
->getID()) { 
1213             errln("FAIL: createInstance(" + ID 
+ ").getID() => " + 
1217         // Check the inverse 
1218         Transliterator 
*u 
= t
->createInverse(status
); 
1220             errln("FAIL: " + ID 
+ ".createInverse() returned NULL"); 
1221         } else if (u
->getID() != uID
) { 
1222             errln("FAIL: " + ID 
+ ".createInverse().getID() => " + 
1223                   u
->getID() + ", expected " + uID
); 
1232  * Test the case mapping transliterators. 
1234 void TransliteratorTest::TestCaseMap(void) { 
1235     UParseError parseError
; 
1236     UErrorCode status 
= U_ZERO_ERROR
; 
1237     Transliterator
* toUpper 
= 
1238         Transliterator::createInstance("Any-Upper[^xyzXYZ]", UTRANS_FORWARD
, parseError
, status
); 
1239     Transliterator
* toLower 
= 
1240         Transliterator::createInstance("Any-Lower[^xyzXYZ]", UTRANS_FORWARD
, parseError
, status
); 
1241     Transliterator
* toTitle 
= 
1242         Transliterator::createInstance("Any-Title[^xyzXYZ]", UTRANS_FORWARD
, parseError
, status
); 
1243     if (toUpper
==0 || toLower
==0 || toTitle
==0) { 
1244         errln("FAIL: createInstance returned NULL"); 
1251     expect(*toUpper
, "The quick brown fox jumped over the lazy dogs.", 
1252            "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS."); 
1253     expect(*toLower
, "The quIck brown fOX jUMPED OVER THE LAzY dogs.", 
1254            "the quick brown foX jumped over the lazY dogs."); 
1255     expect(*toTitle
, "the quick brown foX can't jump over the laZy dogs.", 
1256            "The Quick Brown FoX Can't Jump Over The LaZy Dogs."); 
1264  * Test the name mapping transliterators. 
1266 void TransliteratorTest::TestNameMap(void) { 
1267     UParseError parseError
; 
1268     UErrorCode status 
= U_ZERO_ERROR
; 
1269     Transliterator
* uni2name 
= 
1270         Transliterator::createInstance("Any-Name[^abc]", UTRANS_FORWARD
, parseError
, status
); 
1271     Transliterator
* name2uni 
= 
1272         Transliterator::createInstance("Name-Any", UTRANS_FORWARD
, parseError
, status
); 
1273     if (uni2name
==0 || name2uni
==0) { 
1274         errln("FAIL: createInstance returned NULL"); 
1280     // Careful:  CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N 
1281     expect(*uni2name
, CharsToUnicodeString("\\u00A0abc\\u4E01\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF"), 
1282            CharsToUnicodeString("\\\\N{NO-BREAK SPACE}abc\\\\N{CJK UNIFIED IDEOGRAPH-4E01}\\\\N{MICRO SIGN}\\\\N{GUJARATI SIGN CANDRABINDU}\\\\N{REPLACEMENT CHARACTER}\\\\N{<control-0004>}\\\\N{<control-0009>}\\\\N{<control-0081>}\\\\N{<noncharacter-FFFF>}")); 
1283     expect(*name2uni
, UNICODE_STRING_SIMPLE("{\\N { NO-BREAK SPACE}abc\\N{  CJK UNIFIED  IDEOGRAPH-4E01  }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{"), 
1284            CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{")); 
1291         Transliterator::createInstance("Any-Name;Name-Any", UTRANS_FORWARD
, parseError
, status
); 
1293         errln("FAIL: createInstance returned NULL"); 
1298     // Careful:  CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N 
1299     UnicodeString s 
= CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{"); 
1305  * Test liberalized ID syntax.  1006c 
1307 void TransliteratorTest::TestLiberalizedID(void) { 
1308     // Some test cases have an expected getID() value of NULL.  This 
1309     // means I have disabled the test case for now.  This stuff is 
1310     // still under development, and I haven't decided whether to make 
1311     // getID() return canonical case yet.  It will all get rewritten 
1312     // with the move to Source-Target/Variant IDs anyway. [aliu] 
1313     const char* DATA
[] = { 
1314         "latin-greek", NULL 
/*"Latin-Greek"*/, "case insensitivity", 
1315         "  Null  ", "Null", "whitespace", 
1316         " Latin[a-z]-Greek  ", "[a-z]Latin-Greek", "inline filter", 
1317         "  null  ; latin-greek  ", NULL 
/*"Null;Latin-Greek"*/, "compound whitespace", 
1319     const int32_t DATA_length 
= UPRV_LENGTHOF(DATA
); 
1320     UParseError parseError
; 
1321     UErrorCode status
= U_ZERO_ERROR
; 
1322     for (int32_t i
=0; i
<DATA_length
; i
+=3) { 
1323         Transliterator 
*t 
= Transliterator::createInstance(DATA
[i
], UTRANS_FORWARD
, parseError
, status
); 
1325             dataerrln(UnicodeString("FAIL: ") + DATA
[i
+2] + 
1326                   " cannot create ID \"" + DATA
[i
] + "\" - " + u_errorName(status
)); 
1330                 exp 
= UnicodeString(DATA
[i
+1], ""); 
1332             // Don't worry about getID() if the expected char* 
1333             // is NULL -- see above. 
1334             if (exp
.length() == 0 || exp 
== t
->getID()) { 
1335                 logln(UnicodeString("Ok: ") + DATA
[i
+2] + 
1336                       " create ID \"" + DATA
[i
] + "\" => \"" + 
1339                 errln(UnicodeString("FAIL: ") + DATA
[i
+2] + 
1340                       " create ID \"" + DATA
[i
] + "\" => \"" + 
1341                       t
->getID() + "\", exp \"" + exp 
+ "\""); 
1348 /* test for Jitterbug 912 */ 
1349 void TransliteratorTest::TestCreateInstance(){ 
1350     const char* FORWARD 
= "F"; 
1351     const char* REVERSE 
= "R"; 
1352     const char* DATA
[] = { 
1354         // Column 2: direction 
1355         // Column 3: expected ID, or "" if expect failure 
1356         "Latin-Hangul", REVERSE
, "Hangul-Latin", // JB#912 
1358         // JB#2689: bad compound causes crash 
1359         "InvalidSource-InvalidTarget", FORWARD
, "", 
1360         "InvalidSource-InvalidTarget", REVERSE
, "", 
1361         "Hex-Any;InvalidSource-InvalidTarget", FORWARD
, "", 
1362         "Hex-Any;InvalidSource-InvalidTarget", REVERSE
, "", 
1363         "InvalidSource-InvalidTarget;Hex-Any", FORWARD
, "", 
1364         "InvalidSource-InvalidTarget;Hex-Any", REVERSE
, "", 
1369     for (int32_t i
=0; DATA
[i
]; i
+=3) { 
1371         UErrorCode ec 
= U_ZERO_ERROR
; 
1372         UnicodeString 
id(DATA
[i
]); 
1373         UTransDirection dir 
= (DATA
[i
+1]==FORWARD
)? 
1374             UTRANS_FORWARD
:UTRANS_REVERSE
; 
1375         UnicodeString 
expID(DATA
[i
+2]); 
1377             Transliterator::createInstance(id
,dir
,err
,ec
); 
1378         UnicodeString newID
; 
1382         UBool ok 
= (newID 
== expID
); 
1384             newID 
= u_errorName(ec
); 
1387             logln((UnicodeString
)"Ok: createInstance(" + 
1388                   id 
+ "," + DATA
[i
+1] + ") => " + newID
); 
1390             dataerrln((UnicodeString
)"FAIL: createInstance(" + 
1391                   id 
+ "," + DATA
[i
+1] + ") => " + newID 
+ 
1392                   ", expected " + expID
); 
1399  * Test the normalization transliterator. 
1401 void TransliteratorTest::TestNormalizationTransliterator() { 
1402     // THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.test.normalizer.BasicTest 
1403     // PLEASE KEEP THEM IN SYNC WITH BasicTest. 
1404     const char* CANON
[] = { 
1405         // Input               Decomposed            Composed 
1406         "cat",                "cat",                "cat"               , 
1407         "\\u00e0ardvark",      "a\\u0300ardvark",     "\\u00e0ardvark"    , 
1409         "\\u1e0a",             "D\\u0307",            "\\u1e0a"            , // D-dot_above 
1410         "D\\u0307",            "D\\u0307",            "\\u1e0a"            , // D dot_above 
1412         "\\u1e0c\\u0307",       "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D-dot_below dot_above 
1413         "\\u1e0a\\u0323",       "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D-dot_above dot_below 
1414         "D\\u0307\\u0323",      "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D dot_below dot_above 
1416         "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307","\\u1e10\\u0323\\u0307", // D dot_below cedilla dot_above 
1417         "D\\u0307\\u0328\\u0323","D\\u0328\\u0323\\u0307","\\u1e0c\\u0328\\u0307", // D dot_above ogonek dot_below 
1419         "\\u1E14",             "E\\u0304\\u0300",      "\\u1E14"            , // E-macron-grave 
1420         "\\u0112\\u0300",       "E\\u0304\\u0300",      "\\u1E14"            , // E-macron + grave 
1421         "\\u00c8\\u0304",       "E\\u0300\\u0304",      "\\u00c8\\u0304"      , // E-grave + macron 
1423         "\\u212b",             "A\\u030a",            "\\u00c5"            , // angstrom_sign 
1424         "\\u00c5",             "A\\u030a",            "\\u00c5"            , // A-ring 
1426         "\\u00fdffin",         "y\\u0301ffin",        "\\u00fdffin"        ,    //updated with 3.0 
1427         "\\u00fd\\uFB03n",      "y\\u0301\\uFB03n",     "\\u00fd\\uFB03n"     , //updated with 3.0 
1429         "Henry IV",           "Henry IV",           "Henry IV"          , 
1430         "Henry \\u2163",       "Henry \\u2163",       "Henry \\u2163"      , 
1432         "\\u30AC",             "\\u30AB\\u3099",       "\\u30AC"            , // ga (Katakana) 
1433         "\\u30AB\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // ka + ten 
1434         "\\uFF76\\uFF9E",       "\\uFF76\\uFF9E",       "\\uFF76\\uFF9E"      , // hw_ka + hw_ten 
1435         "\\u30AB\\uFF9E",       "\\u30AB\\uFF9E",       "\\u30AB\\uFF9E"      , // ka + hw_ten 
1436         "\\uFF76\\u3099",       "\\uFF76\\u3099",       "\\uFF76\\u3099"      , // hw_ka + ten 
1438         "A\\u0300\\u0316",      "A\\u0316\\u0300",      "\\u00C0\\u0316"      , 
1442     const char* COMPAT
[] = { 
1443         // Input               Decomposed            Composed 
1444         "\\uFB4f",             "\\u05D0\\u05DC",       "\\u05D0\\u05DC"     , // Alef-Lamed vs. Alef, Lamed 
1446         "\\u00fdffin",         "y\\u0301ffin",        "\\u00fdffin"        ,    //updated for 3.0 
1447         "\\u00fd\\uFB03n",      "y\\u0301ffin",        "\\u00fdffin"        , // ffi ligature -> f + f + i 
1449         "Henry IV",           "Henry IV",           "Henry IV"          , 
1450         "Henry \\u2163",       "Henry IV",           "Henry IV"          , 
1452         "\\u30AC",             "\\u30AB\\u3099",       "\\u30AC"            , // ga (Katakana) 
1453         "\\u30AB\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // ka + ten 
1455         "\\uFF76\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // hw_ka + ten 
1460     UParseError parseError
; 
1461     UErrorCode status 
= U_ZERO_ERROR
; 
1462     Transliterator
* NFD 
= Transliterator::createInstance("NFD", UTRANS_FORWARD
, parseError
, status
); 
1463     Transliterator
* NFC 
= Transliterator::createInstance("NFC", UTRANS_FORWARD
, parseError
, status
); 
1465         dataerrln("FAIL: createInstance failed: %s", u_errorName(status
)); 
1470     for (i
=0; CANON
[i
]; i
+=3) { 
1471         UnicodeString in 
= CharsToUnicodeString(CANON
[i
]); 
1472         UnicodeString expd 
= CharsToUnicodeString(CANON
[i
+1]); 
1473         UnicodeString expc 
= CharsToUnicodeString(CANON
[i
+2]); 
1474         expect(*NFD
, in
, expd
); 
1475         expect(*NFC
, in
, expc
); 
1480     Transliterator
* NFKD 
= Transliterator::createInstance("NFKD", UTRANS_FORWARD
, parseError
, status
); 
1481     Transliterator
* NFKC 
= Transliterator::createInstance("NFKC", UTRANS_FORWARD
, parseError
, status
); 
1482     if (!NFKD 
|| !NFKC
) { 
1483         dataerrln("FAIL: createInstance failed"); 
1488     for (i
=0; COMPAT
[i
]; i
+=3) { 
1489         UnicodeString in 
= CharsToUnicodeString(COMPAT
[i
]); 
1490         UnicodeString expkd 
= CharsToUnicodeString(COMPAT
[i
+1]); 
1491         UnicodeString expkc 
= CharsToUnicodeString(COMPAT
[i
+2]); 
1492         expect(*NFKD
, in
, expkd
); 
1493         expect(*NFKC
, in
, expkc
); 
1499     status 
= U_ZERO_ERROR
; 
1500     Transliterator 
*t 
= Transliterator::createInstance("NFD; [x]Remove", 
1504         errln("FAIL: createInstance failed"); 
1506     expect(*t
, CharsToUnicodeString("\\u010dx"), 
1507            CharsToUnicodeString("c\\u030C")); 
1512  * Test compound RBT rules. 
1514 void TransliteratorTest::TestCompoundRBT(void) { 
1515     // Careful with spacing and ';' here:  Phrase this exactly 
1516     // as toRules() is going to return it.  If toRules() changes 
1517     // with regard to spacing or ';', then adjust this string. 
1518     UnicodeString 
rule("::Hex-Any;\n" 
1522                        "::[^t]Any-Upper;", ""); 
1523     UParseError parseError
; 
1524     UErrorCode status 
= U_ZERO_ERROR
; 
1525     Transliterator 
*t 
= Transliterator::createFromRules("Test", rule
, UTRANS_FORWARD
, parseError
, status
); 
1527         errln("FAIL: createFromRules failed"); 
1530     expect(*t
, UNICODE_STRING_SIMPLE("\\u0043at in the hat, bat on the mat"), 
1531            "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t"); 
1533     t
->toRules(r
, TRUE
); 
1535         logln((UnicodeString
)"OK: toRules() => " + r
); 
1537         errln((UnicodeString
)"FAIL: toRules() => " + r 
+ 
1538               ", expected " + rule
); 
1543     t 
= Transliterator::createInstance("Greek-Latin; Latin-Cyrillic", UTRANS_FORWARD
, parseError
, status
); 
1545         dataerrln("FAIL: createInstance failed - %s", u_errorName(status
)); 
1548     UnicodeString 
exp("::Greek-Latin;\n::Latin-Cyrillic;"); 
1549     t
->toRules(r
, TRUE
); 
1551         errln((UnicodeString
)"FAIL: toRules() => " + r 
+ 
1552               ", expected " + exp
); 
1554         logln((UnicodeString
)"OK: toRules() => " + r
); 
1558     // Round trip the result of toRules 
1559     t 
= Transliterator::createFromRules("Test", r
, UTRANS_FORWARD
, parseError
, status
); 
1561         errln("FAIL: createFromRules #2 failed"); 
1564         logln((UnicodeString
)"OK: createFromRules(" + r 
+ ") succeeded"); 
1567     // Test toRules again 
1568     t
->toRules(r
, TRUE
); 
1570         errln((UnicodeString
)"FAIL: toRules() => " + r 
+ 
1571               ", expected " + exp
); 
1573         logln((UnicodeString
)"OK: toRules() => " + r
); 
1578     // Test Foo(Bar) IDs.  Careful with spacing in id; make it conform 
1579     // to what the regenerated ID will look like. 
1580     UnicodeString 
id("Upper(Lower);(NFKC)", ""); 
1581     t 
= Transliterator::createInstance(id
, UTRANS_FORWARD
, parseError
, status
); 
1583         errln("FAIL: createInstance #2 failed"); 
1586     if (t
->getID() == id
) { 
1587         logln((UnicodeString
)"OK: created " + id
); 
1589         errln((UnicodeString
)"FAIL: createInstance(" + id 
+ 
1590               ").getID() => " + t
->getID()); 
1593     Transliterator 
*u 
= t
->createInverse(status
); 
1595         errln("FAIL: createInverse failed"); 
1599     exp 
= "NFKC();Lower(Upper)"; 
1600     if (u
->getID() == exp
) { 
1601         logln((UnicodeString
)"OK: createInverse(" + id 
+ ") => " + 
1604         errln((UnicodeString
)"FAIL: createInverse(" + id 
+ ") => " + 
1612  * Compound filter semantics were orginially not implemented 
1613  * correctly.  Originally, each component filter f(i) is replaced by 
1614  * f'(i) = f(i) && g, where g is the filter for the compound 
1619  * Suppose and I have a transliterator X. Internally X is 
1620  * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A]. 
1622  * The compound should convert all greek characters (through latin) to 
1623  * cyrillic, then lowercase the result. The filter should say "don't 
1624  * touch 'A' in the original". But because an intermediate result 
1625  * happens to go through "A", the Greek Alpha gets hung up. 
1627 void TransliteratorTest::TestCompoundFilter(void) { 
1628     UParseError parseError
; 
1629     UErrorCode status 
= U_ZERO_ERROR
; 
1630     Transliterator 
*t 
= Transliterator::createInstance
 
1631         ("Greek-Latin; Latin-Greek; Lower", UTRANS_FORWARD
, parseError
, status
); 
1633         dataerrln("FAIL: createInstance failed - %s", u_errorName(status
)); 
1636     t
->adoptFilter(new UnicodeSet("[^A]", status
)); 
1637     if (U_FAILURE(status
)) { 
1638         errln("FAIL: UnicodeSet ct failed"); 
1643     // Only the 'A' at index 1 should remain unchanged 
1645            CharsToUnicodeString("BA\\u039A\\u0391"), 
1646            CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1")); 
1650 void TransliteratorTest::TestRemove(void) { 
1651     UParseError parseError
; 
1652     UErrorCode status 
= U_ZERO_ERROR
; 
1653     Transliterator 
*t 
= Transliterator::createInstance("Remove[abc]", UTRANS_FORWARD
, parseError
, status
); 
1655         errln("FAIL: createInstance failed"); 
1659     expect(*t
, "Able bodied baker's cats", "Ale odied ker's ts"); 
1661     // extra test for RemoveTransliterator::clone(), which at one point wasn't 
1662     // duplicating the filter 
1663     Transliterator
* t2 
= t
->clone(); 
1664     expect(*t2
, "Able bodied baker's cats", "Ale odied ker's ts"); 
1670 void TransliteratorTest::TestToRules(void) { 
1671     const char* RBT 
= "rbt"; 
1672     const char* SET 
= "set"; 
1673     static const char* DATA
[] = { 
1675         "$a=\\u4E61; [$a] > A;", 
1679         "$white=[[:Zs:][:Zl:]]; $white{a} > A;", 
1680         "[[:Zs:][:Zl:]]{a} > A;", 
1707         "$white=[:Zs:]; $black=[^$white]; $black{a} > A;", 
1708         "[^[:Zs:]]{a} > A;", 
1711         "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;", 
1712         "[[a-z]-[:Zs:]]{a} > A;", 
1715         "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;", 
1716         "[[:Zs:]&[a-z]]{a} > A;", 
1719         "$a=[:Zs:]; $b=[x$a]; $b{a} > A;", 
1720         "[x[:Zs:]]{a} > A;", 
1723         "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;" 
1724         "$macron = \\u0304 ;" 
1725         "$evowel = [aeiouyAEIOUY] ;" 
1726         "$iotasub = \\u0345 ;"  
1727         "($evowel $macron $accentMinus *) i > | $1 $iotasub ;", 
1728         "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;", 
1731         "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;", 
1732         "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;", 
1734     static const int32_t DATA_length 
= UPRV_LENGTHOF(DATA
); 
1736     for (int32_t d
=0; d 
< DATA_length
; d
+=3) { 
1737         if (DATA
[d
] == RBT
) { 
1738             // Transliterator test 
1739             UParseError parseError
; 
1740             UErrorCode status 
= U_ZERO_ERROR
; 
1741             Transliterator 
*t 
= Transliterator::createFromRules("ID", 
1742                                                                 UnicodeString(DATA
[d
+1], -1, US_INV
), UTRANS_FORWARD
, parseError
, status
); 
1744                 dataerrln("FAIL: createFromRules failed - %s", u_errorName(status
)); 
1747             UnicodeString rules
, escapedRules
; 
1748             t
->toRules(rules
, FALSE
); 
1749             t
->toRules(escapedRules
, TRUE
); 
1750             UnicodeString expRules 
= CharsToUnicodeString(DATA
[d
+2]); 
1751             UnicodeString 
expEscapedRules(DATA
[d
+2], -1, US_INV
); 
1752             if (rules 
== expRules
) { 
1753                 logln((UnicodeString
)"Ok: " + UnicodeString(DATA
[d
+1], -1, US_INV
) + 
1756                 errln((UnicodeString
)"FAIL: " + UnicodeString(DATA
[d
+1], -1, US_INV
) + 
1757                       " => " + rules 
+ ", exp " + expRules
); 
1759             if (escapedRules 
== expEscapedRules
) { 
1760                 logln((UnicodeString
)"Ok: " + UnicodeString(DATA
[d
+1], -1, US_INV
) + 
1761                       " => " + escapedRules
); 
1763                 errln((UnicodeString
)"FAIL: " + UnicodeString(DATA
[d
+1], -1, US_INV
) + 
1764                       " => " + escapedRules 
+ ", exp " + expEscapedRules
); 
1770             UErrorCode status 
= U_ZERO_ERROR
; 
1771             UnicodeString 
pat(DATA
[d
+1], -1, US_INV
); 
1772             UnicodeString 
expToPat(DATA
[d
+2], -1, US_INV
); 
1773             UnicodeSet 
set(pat
, status
); 
1774             if (U_FAILURE(status
)) { 
1775                 errln("FAIL: UnicodeSet ct failed"); 
1778             // Adjust spacing etc. as necessary. 
1779             UnicodeString toPat
; 
1780             set
.toPattern(toPat
); 
1781             if (expToPat 
== toPat
) { 
1782                 logln((UnicodeString
)"Ok: " + pat 
+ 
1785                 errln((UnicodeString
)"FAIL: " + pat 
+ 
1786                       " => " + prettify(toPat
, TRUE
) + 
1787                       ", exp " + prettify(pat
, TRUE
)); 
1793 void TransliteratorTest::TestContext() { 
1794     UTransPosition pos 
= {0, 2, 0, 1}; // cs cl s l 
1795     expect("de > x; {d}e > y;", 
1800     expect("ab{c} > z;", 
1805 void TransliteratorTest::TestSupplemental() {  
1807     expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];" 
1809            CharsToUnicodeString("ab\\U0001030Fx"), 
1810            CharsToUnicodeString("\\U00010300bix")); 
1812     expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];" 
1813                                 "$b=[A-Z\\U00010400-\\U0001044D];" 
1814                                 "($a)($b) > $2 $1;"), 
1815            CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"), 
1816            CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301")); 
1818     // k|ax\\U00010300xm 
1820     // k|a\\U00010400\\U00010300xm 
1821     // ky|\\U00010400\\U00010300xm 
1822     // ky\\U00010400|\\U00010300xm 
1824     // ky\\U00010400|\\U00010300\\U00010400m 
1825     // ky\\U00010400y|\\U00010400m 
1826     expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];" 
1827                                 "$a {x} > | @ \\U00010400;" 
1828                                 "{$a} [^\\u0000-\\uFFFF] > y;"), 
1829            CharsToUnicodeString("kax\\U00010300xm"), 
1830            CharsToUnicodeString("ky\\U00010400y\\U00010400m")); 
1833            CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"), 
1834            UNICODE_STRING_SIMPLE("\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}")); 
1836     expectT("Any-Hex/Unicode", 
1837            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 
1838            UNICODE_STRING_SIMPLE("U+10330U+10FF00U+E0061U+00A0")); 
1840     expectT("Any-Hex/C", 
1841            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 
1842            UNICODE_STRING_SIMPLE("\\U00010330\\U0010FF00\\U000E0061\\u00A0")); 
1844     expectT("Any-Hex/Perl", 
1845            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 
1846            UNICODE_STRING_SIMPLE("\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}")); 
1848     expectT("Any-Hex/Java", 
1849            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 
1850            UNICODE_STRING_SIMPLE("\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0")); 
1852     expectT("Any-Hex/XML", 
1853            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 
1854            "𐌰􏼀󠁡 "); 
1856     expectT("Any-Hex/XML10", 
1857            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 
1858            "𐌰􏼀󠁡 "); 
1860     expectT(UNICODE_STRING_SIMPLE("[\\U000E0000-\\U000E0FFF] Remove"), 
1861            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), 
1862            CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0")); 
1865 void TransliteratorTest::TestQuantifier() {  
1867     // Make sure @ in a quantified anteContext works 
1868     expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';", 
1872     // Make sure @ in a quantified postContext works 
1873     expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';", 
1877     // Make sure @ in a quantified postContext with seg ref works 
1878     expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';", 
1882     // Make sure @ past ante context doesn't enter ante context 
1883     UTransPosition pos 
= {0, 5, 3, 5}; 
1884     expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';", 
1889     // Make sure @ past post context doesn't pass limit 
1890     UTransPosition pos2 
= {0, 4, 0, 2}; 
1891     expect("{b} a+ > c @@ |; x > y; a > A;", 
1896     // Make sure @ past post context doesn't enter post context 
1897     expect("{b} a+ > c @@ |; x > y; a > A;", 
1901     expect("(ab)? c > d;", 
1905     // NOTE: The (ab)+ when referenced just yields a single "ab", 
1906     // not the full sequence of them.  This accords with perl behavior. 
1907     expect("(ab)+ {x} > '(' $1 ')';", 
1909            "x ab(ab) abab(ab)y"); 
1912            "ac abc abbc abbbc", 
1915     expect("[abc]+ > x;", 
1916            "qac abrc abbcs abtbbc", 
1919     expect("q{(ab)+} > x;", 
1920            "qa qab qaba qababc qaba", 
1921            "qa qx qxa qxc qxa"); 
1923     expect("q(ab)* > x;", 
1924            "qa qab qaba qababc", 
1927     // NOTE: The (ab)+ when referenced just yields a single "ab", 
1928     // not the full sequence of them.  This accords with perl behavior. 
1929     expect("q(ab)* > '(' $1 ')';", 
1930            "qa qab qaba qababc", 
1931            "()a (ab) (ab)a (ab)c"); 
1933     // 'foo'+ and 'foo'* -- the quantifier should apply to the entire 
1935     expect("'ab'+ > x;", 
1939     // $foo+ and $foo* -- the quantifier should apply to the entire 
1940     // variable reference 
1941     expect("$var = ab; $var+ > x;", 
1946 class TestTrans 
: public Transliterator 
{ 
1948     TestTrans(const UnicodeString
& id
) : Transliterator(id
, 0) { 
1950     virtual Transliterator
* clone(void) const { 
1951         return new TestTrans(getID()); 
1953     virtual void handleTransliterate(Replaceable
& /*text*/, UTransPosition
& offsets
, 
1954         UBool 
/*isIncremental*/) const 
1956         offsets
.start 
= offsets
.limit
; 
1958     virtual UClassID 
getDynamicClassID() const; 
1959     static UClassID U_EXPORT2 
getStaticClassID(); 
1961 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TestTrans
) 
1964  * Test Source-Target/Variant. 
1966 void TransliteratorTest::TestSTV(void) { 
1967     int32_t ns 
= Transliterator::countAvailableSources(); 
1968     if (ns 
< 0 || ns 
> 255) { 
1969         errln((UnicodeString
)"FAIL: Bad source count: " + ns
); 
1973     for (i
=0; i
<ns
; ++i
) { 
1974         UnicodeString source
; 
1975         Transliterator::getAvailableSource(i
, source
); 
1976         logln((UnicodeString
)"" + i 
+ ": " + source
); 
1977         if (source
.length() == 0) { 
1978             errln("FAIL: empty source"); 
1981         int32_t nt 
= Transliterator::countAvailableTargets(source
); 
1982         if (nt 
< 0 || nt 
> 255) { 
1983             errln((UnicodeString
)"FAIL: Bad target count: " + nt
); 
1986         for (int32_t j
=0; j
<nt
; ++j
) { 
1987             UnicodeString target
; 
1988             Transliterator::getAvailableTarget(j
, source
, target
); 
1989             logln((UnicodeString
)" " + j 
+ ": " + target
); 
1990             if (target
.length() == 0) { 
1991                 errln("FAIL: empty target"); 
1994             int32_t nv 
= Transliterator::countAvailableVariants(source
, target
); 
1995             if (nv 
< 0 || nv 
> 255) { 
1996                 errln((UnicodeString
)"FAIL: Bad variant count: " + nv
); 
1999             for (int32_t k
=0; k
<nv
; ++k
) { 
2000                 UnicodeString variant
; 
2001                 Transliterator::getAvailableVariant(k
, source
, target
, variant
); 
2002                 if (variant
.length() == 0) {  
2003                     logln((UnicodeString
)"  " + k 
+ ": <empty>"); 
2005                     logln((UnicodeString
)"  " + k 
+ ": " + variant
); 
2011     // Test registration 
2012     const char* IDS
[] = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" }; 
2013     const char* FULL_IDS
[] = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" }; 
2014     const char* SOURCES
[] = { NULL
, "Seoridf", "Oewoir" }; 
2015     for (i
=0; i
<3; ++i
) { 
2016         Transliterator 
*t 
= new TestTrans(IDS
[i
]); 
2018             errln("FAIL: out of memory"); 
2021         if (t
->getID() != IDS
[i
]) { 
2022             errln((UnicodeString
)"FAIL: ID mismatch for " + IDS
[i
]); 
2026         Transliterator::registerInstance(t
); 
2027         UErrorCode status 
= U_ZERO_ERROR
; 
2028         t 
= Transliterator::createInstance(IDS
[i
], UTRANS_FORWARD
, status
); 
2030             errln((UnicodeString
)"FAIL: Registration/creation failed for ID " + 
2033             logln((UnicodeString
)"Ok: Registration/creation succeeded for ID " + 
2037         Transliterator::unregister(IDS
[i
]); 
2038         t 
= Transliterator::createInstance(IDS
[i
], UTRANS_FORWARD
, status
); 
2040             errln((UnicodeString
)"FAIL: Unregistration failed for ID " + 
2046     // Make sure getAvailable API reflects removal 
2047     int32_t n 
= Transliterator::countAvailableIDs(); 
2048     for (i
=0; i
<n
; ++i
) { 
2049         UnicodeString id 
= Transliterator::getAvailableID(i
); 
2050         for (j
=0; j
<3; ++j
) { 
2051             if (id
.caseCompare(FULL_IDS
[j
],0)==0) { 
2052                 errln((UnicodeString
)"FAIL: unregister(" + id 
+ ") failed"); 
2056     n 
= Transliterator::countAvailableTargets("Any"); 
2057     for (i
=0; i
<n
; ++i
) { 
2059         Transliterator::getAvailableTarget(i
, "Any", t
); 
2060         if (t
.caseCompare(IDS
[0],0)==0) { 
2061             errln((UnicodeString
)"FAIL: unregister(Any-" + t 
+ ") failed"); 
2064     n 
= Transliterator::countAvailableSources(); 
2065     for (i
=0; i
<n
; ++i
) { 
2067         Transliterator::getAvailableSource(i
, s
); 
2068         for (j
=0; j
<3; ++j
) { 
2069             if (SOURCES
[j
] == NULL
) continue; 
2070             if (s
.caseCompare(SOURCES
[j
],0)==0) { 
2071                 errln((UnicodeString
)"FAIL: unregister(" + s 
+ "-*) failed"); 
2078  * Test inverse of Greek-Latin; Title() 
2080 void TransliteratorTest::TestCompoundInverse(void) { 
2081     UParseError parseError
; 
2082     UErrorCode status 
= U_ZERO_ERROR
; 
2083     Transliterator 
*t 
= Transliterator::createInstance
 
2084         ("Greek-Latin; Title()", UTRANS_REVERSE
,parseError
, status
); 
2086         dataerrln("FAIL: createInstance - %s", u_errorName(status
)); 
2089     UnicodeString 
exp("(Title);Latin-Greek"); 
2090     if (t
->getID() == exp
) { 
2091         logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" + 
2094         errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" + 
2095               t
->getID() + "\", expected \"" + exp 
+ "\""); 
2101  * Test NFD chaining with RBT 
2103 void TransliteratorTest::TestNFDChainRBT() { 
2105     UErrorCode ec 
= U_ZERO_ERROR
; 
2106     Transliterator
* t 
= Transliterator::createFromRules( 
2107                                "TEST", "::NFD; aa > Q; a > q;", 
2108                                UTRANS_FORWARD
, pe
, ec
); 
2109     if (t 
== NULL 
|| U_FAILURE(ec
)) { 
2110         dataerrln("FAIL: Transliterator::createFromRules failed with %s", u_errorName(ec
)); 
2113     expect(*t
, "aa", "Q"); 
2116     // TEMPORARY TESTS -- BEING DEBUGGED 
2117 //=-    UnicodeString s, s2; 
2118 //=-    t = Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, pe, ec); 
2119 //=-    s = CharsToUnicodeString("rmk\\u1E63\\u0113t"); 
2120 //=-    s2 = CharsToUnicodeString("\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D"); 
2121 //=-    expect(*t, s, s2); 
2124 //=-    t = Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, pe, ec); 
2125 //=-    expect(*t, s2, s); 
2128 //=-    t = Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, pe, ec); 
2129 //=-    s = CharsToUnicodeString("rmk\\u1E63\\u0113t"); 
2130 //=-    expect(*t, s, s); 
2133 //    const char* source[] = { 
2135 //        "\\u015Br\\u012Bmad", 
2136 //        "bhagavadg\\u012Bt\\u0101", 
2139 //        "vi\\u1E63\\u0101da", 
2141 //        "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra", 
2142 //        "uv\\u0101cr\\u0325", 
2144 //        "rmk\\u1E63\\u0113t", 
2145 //      //"dharmak\\u1E63\\u0113tr\\u0113", 
2147 //        "kuruk\\u1E63\\u0113tr\\u0113", 
2148 //        "samav\\u0113t\\u0101", 
2149 //        "yuyutsava-\\u1E25", 
2150 //        "m\\u0101mak\\u0101-\\u1E25", 
2151 //     // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva", 
2153 //        "san\\u0304java", 
2158 //    const char* expected[] = { 
2160 //        "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d", 
2161 //        "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e", 
2162 //        "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f", 
2163 //        "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928", 
2164 //        "\\u0935\\u093f\\u0937\\u093e\\u0926", 
2165 //        "\\u092f\\u094b\\u0917", 
2166 //        "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930", 
2167 //        "\\u0909\\u0935\\u093E\\u091A\\u0943", 
2170 //        //"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947", 
2172 //        "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947", 
2173 //        "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e", 
2174 //        "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903", 
2175 //        "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903", 
2176 //    //  "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935", 
2177 //        "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924", 
2178 //        "\\u0938\\u0902\\u091c\\u0935", 
2182 //    UErrorCode status = U_ZERO_ERROR; 
2183 //    UParseError parseError; 
2184 //    UnicodeString message; 
2185 //    Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status); 
2186 //    Transliterator* devToLatinToDev=Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status); 
2187 //    if(U_FAILURE(status)){ 
2188 //        errln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status)); 
2189 //        errln("PreContext: " + prettify(parseError.preContext) + "PostContext: " + prettify( parseError.postContext) ); 
2190 //        delete latinToDevToLatin; 
2191 //        delete devToLatinToDev; 
2194 //    UnicodeString gotResult; 
2195 //    for(int i= 0; source[i] != 0; i++){ 
2196 //        gotResult = source[i]; 
2197 //        expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i])); 
2198 //        expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i])); 
2200 //    delete latinToDevToLatin; 
2201 //    delete devToLatinToDev; 
2205  * Inverse of "Null" should be "Null". (J21) 
2207 void TransliteratorTest::TestNullInverse() { 
2209     UErrorCode ec 
= U_ZERO_ERROR
; 
2210     Transliterator 
*t 
= Transliterator::createInstance("Null", UTRANS_FORWARD
, pe
, ec
); 
2211     if (t 
== 0 || U_FAILURE(ec
)) { 
2212         errln("FAIL: createInstance"); 
2215     Transliterator 
*u 
= t
->createInverse(ec
); 
2216     if (u 
== 0 || U_FAILURE(ec
)) { 
2217         errln("FAIL: createInverse"); 
2221     if (u
->getID() != "Null") { 
2222         errln("FAIL: Inverse of Null should be Null"); 
2229  * Check ID of inverse of alias. (J22) 
2231 void TransliteratorTest::TestAliasInverseID() { 
2232     UnicodeString 
ID("Latin-Hangul", ""); // This should be any alias ID with an inverse 
2234     UErrorCode ec 
= U_ZERO_ERROR
; 
2235     Transliterator 
*t 
= Transliterator::createInstance(ID
, UTRANS_FORWARD
, pe
, ec
); 
2236     if (t 
== 0 || U_FAILURE(ec
)) { 
2237         dataerrln("FAIL: createInstance - %s", u_errorName(ec
)); 
2240     Transliterator 
*u 
= t
->createInverse(ec
); 
2241     if (u 
== 0 || U_FAILURE(ec
)) { 
2242         errln("FAIL: createInverse"); 
2246     UnicodeString exp 
= "Hangul-Latin"; 
2247     UnicodeString got 
= u
->getID(); 
2249         errln((UnicodeString
)"FAIL: Inverse of " + ID 
+ " is " + got 
+ 
2250               ", expected " + exp
); 
2257  * Test IDs of inverses of compound transliterators. (J20) 
2259 void TransliteratorTest::TestCompoundInverseID() { 
2260     UnicodeString ID 
= "Latin-Jamo;NFC(NFD)"; 
2262     UErrorCode ec 
= U_ZERO_ERROR
; 
2263     Transliterator 
*t 
= Transliterator::createInstance(ID
, UTRANS_FORWARD
, pe
, ec
); 
2264     if (t 
== 0 || U_FAILURE(ec
)) { 
2265         dataerrln("FAIL: createInstance - %s", u_errorName(ec
)); 
2268     Transliterator 
*u 
= t
->createInverse(ec
); 
2269     if (u 
== 0 || U_FAILURE(ec
)) { 
2270         errln("FAIL: createInverse"); 
2274     UnicodeString exp 
= "NFD(NFC);Jamo-Latin"; 
2275     UnicodeString got 
= u
->getID(); 
2277         errln((UnicodeString
)"FAIL: Inverse of " + ID 
+ " is " + got 
+ 
2278               ", expected " + exp
); 
2285  * Test undefined variable. 
2288 void TransliteratorTest::TestUndefinedVariable() { 
2289     UnicodeString rule 
= "$initial } a <> \\u1161;"; 
2291     UErrorCode ec 
= U_ZERO_ERROR
; 
2292     Transliterator 
*t 
= Transliterator::createFromRules("<ID>", rule
, UTRANS_FORWARD
, pe
, ec
); 
2294     if (U_FAILURE(ec
)) { 
2295         logln((UnicodeString
)"OK: Got exception for " + rule 
+ ", as expected: " + 
2299     errln((UnicodeString
)"Fail: bogus rule " + rule 
+ " compiled with error " + 
2304  * Test empty context. 
2306 void TransliteratorTest::TestEmptyContext() { 
2307     expect(" { a } > b;", "xay a ", "xby b "); 
2311 * Test compound filter ID syntax 
2313 void TransliteratorTest::TestCompoundFilterID(void) { 
2314     static const char* DATA
[] = { 
2315         // Col. 1 = ID or rule set (latter must start with #) 
2317         // = columns > 1 are null if expect col. 1 to be illegal = 
2319         // Col. 2 = direction, "F..." or "R..." 
2320         // Col. 3 = source string 
2321         // Col. 4 = exp result 
2323         "[abc]; [abc]", NULL
, NULL
, NULL
, // multiple filters 
2324         "Latin-Greek; [abc];", NULL
, NULL
, NULL
, // misplaced filter 
2325         "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\\u0392c", 
2326         "[b]; (Lower); Latin-Greek; Upper(); ([\\u0392])", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393", 
2327         "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\\u0392c", 
2328         "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\\u0392]);", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393", 
2332     for (int32_t i
=0; DATA
[i
]; i
+=4) { 
2333         UnicodeString id 
= CharsToUnicodeString(DATA
[i
]); 
2334         UTransDirection direction 
= (DATA
[i
+1] != NULL 
&& DATA
[i
+1][0] == 'R') ? 
2335             UTRANS_REVERSE 
: UTRANS_FORWARD
; 
2336         UnicodeString source
; 
2338         if (DATA
[i
+2] != NULL
) { 
2339             source 
= CharsToUnicodeString(DATA
[i
+2]); 
2340             exp 
= CharsToUnicodeString(DATA
[i
+3]); 
2342         UBool expOk 
= (DATA
[i
+1] != NULL
); 
2343         Transliterator
* t 
= NULL
; 
2345         UErrorCode ec 
= U_ZERO_ERROR
; 
2346         if (id
.charAt(0) == 0x23/*#*/) { 
2347             t 
= Transliterator::createFromRules("ID", id
, direction
, pe
, ec
); 
2349             t 
= Transliterator::createInstance(id
, direction
, pe
, ec
); 
2351         UBool ok 
= (t 
!= NULL 
&& U_SUCCESS(ec
)); 
2352         UnicodeString transID
; 
2354             transID 
= t
->getID(); 
2357             transID 
= UnicodeString("NULL", ""); 
2360             logln((UnicodeString
)"Ok: " + id 
+ " => " + transID 
+ ", " + 
2362             if (source
.length() != 0) { 
2363                 expect(*t
, source
, exp
); 
2367             dataerrln((UnicodeString
)"FAIL: " + id 
+ " => " + transID 
+ ", " + 
2374  * Test new property set syntax 
2376 void TransliteratorTest::TestPropertySet() { 
2377     expect(UNICODE_STRING_SIMPLE("a>A; \\p{Lu}>x; \\p{ANY}>y;"), "abcDEF", "Ayyxxx"); 
2378     expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9", 
2379            "[ a stitch ]\n[ in time ]\r[ saves 9]"); 
2383  * Test various failure points of the new 2.0 engine. 
2385 void TransliteratorTest::TestNewEngine() { 
2387     UErrorCode ec 
= U_ZERO_ERROR
; 
2388     Transliterator 
*t 
= Transliterator::createInstance("Latin-Hiragana", UTRANS_FORWARD
, pe
, ec
); 
2389     if (t 
== 0 || U_FAILURE(ec
)) { 
2390         dataerrln("FAIL: createInstance Latin-Hiragana - %s", u_errorName(ec
)); 
2393     // Katakana should be untouched 
2394     expect(*t
, CharsToUnicodeString("a\\u3042\\u30A2"), 
2395            CharsToUnicodeString("\\u3042\\u3042\\u30A2")); 
2400     // This test will only work if Transliterator.ROLLBACK is 
2401     // true.  Otherwise, this test will fail, revealing a 
2402     // limitation of global filters in incremental mode. 
2404         Transliterator::createFromRules("a_to_A", "a > A;", UTRANS_FORWARD
, pe
, ec
); 
2406         Transliterator::createFromRules("A_to_b", "A > b;", UTRANS_FORWARD
, pe
, ec
); 
2407     if (U_FAILURE(ec
)) { 
2413     Transliterator
* array
[3]; 
2415     array
[1] = Transliterator::createInstance("NFD", UTRANS_FORWARD
, pe
, ec
); 
2417     if (U_FAILURE(ec
)) { 
2418         errln("FAIL: createInstance NFD"); 
2425     t 
= new CompoundTransliterator(array
, 3, new UnicodeSet("[:Ll:]", ec
)); 
2426     if (U_FAILURE(ec
)) { 
2427         errln("FAIL: UnicodeSet constructor"); 
2435     expect(*t
, "aAaA", "bAbA"); 
2437     assertTrue("countElements", t
->countElements() == 3); 
2438     assertEquals("getElement(0)", t
->getElement(0, ec
).getID(), "a_to_A"); 
2439     assertEquals("getElement(1)", t
->getElement(1, ec
).getID(), "NFD"); 
2440     assertEquals("getElement(2)", t
->getElement(2, ec
).getID(), "A_to_b"); 
2441     assertSuccess("getElement", ec
); 
2449     expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;", 
2453     UnicodeString gr 
= CharsToUnicodeString( 
2455         "$lcgvowel = [\\u03b1\\u03b5\\u03b7\\u03b9\\u03bf\\u03c5\\u03c9] ;" 
2456         "$rough = \\u0314 ;" 
2457         "($lcgvowel+ $ddot?) $rough > h | $1 ;" 
2461     expect(gr
, CharsToUnicodeString("\\u03B1\\u0314"), "ha"); 
2465  * Test quantified segment behavior.  We want: 
2466  * ([abc])+ > x $1 x; applied to "cba" produces "xax" 
2468 void TransliteratorTest::TestQuantifiedSegment(void) { 
2470     expect("([abc]+) > x $1 x;", "cba", "xcbax"); 
2472     // The tricky case; the quantifier is around the segment 
2473     expect("([abc])+ > x $1 x;", "cba", "xax"); 
2475     // Tricky case in reverse direction 
2476     expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax"); 
2478     // Check post-context segment 
2479     expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba"); 
2481     // Test toRule/toPattern for non-quantified segment. 
2482     // Careful with spacing here. 
2483     UnicodeString 
r("([a-c]){q} > x $1 x;"); 
2485     UErrorCode ec 
= U_ZERO_ERROR
; 
2486     Transliterator
* t 
= Transliterator::createFromRules("ID", r
, UTRANS_FORWARD
, pe
, ec
); 
2487     if (U_FAILURE(ec
)) { 
2488         errln("FAIL: createFromRules"); 
2493     t
->toRules(rr
, TRUE
); 
2495         errln((UnicodeString
)"FAIL: \"" + r 
+ "\" x toRules() => \"" + rr 
+ "\""); 
2497         logln((UnicodeString
)"Ok: \"" + r 
+ "\" x toRules() => \"" + rr 
+ "\""); 
2501     // Test toRule/toPattern for quantified segment. 
2502     // Careful with spacing here. 
2503     r 
= "([a-c])+{q} > x $1 x;"; 
2504     t 
= Transliterator::createFromRules("ID", r
, UTRANS_FORWARD
, pe
, ec
); 
2505     if (U_FAILURE(ec
)) { 
2506         errln("FAIL: createFromRules"); 
2510     t
->toRules(rr
, TRUE
); 
2512         errln((UnicodeString
)"FAIL: \"" + r 
+ "\" x toRules() => \"" + rr 
+ "\""); 
2514         logln((UnicodeString
)"Ok: \"" + r 
+ "\" x toRules() => \"" + rr 
+ "\""); 
2519 //====================================================================== 
2521 //====================================================================== 
2522 void TransliteratorTest::TestDevanagariLatinRT(){ 
2523     const int MAX_LEN
= 52; 
2524     const char* const source
[MAX_LEN
] = { 
2539       //"r\\u0323ya", // \u095c is not valid in Devanagari 
2565         "\\u1E6Dh\\u1E6Dha", 
2572         // Not roundtrippable --  
2573         // \\u0939\\u094d\\u094d\\u092E  - hma 
2574         // \\u0939\\u094d\\u092E         - hma 
2575         // CharsToUnicodeString("hma"), 
2580         "san\\u0304j\\u012Bb s\\u0113nagupta", 
2581         "\\u0101nand vaddir\\u0101ju",     
2585     const char* const expected
[MAX_LEN
] = { 
2586         "\\u092D\\u093E\\u0930\\u0924",   /* bha\\u0304rata */ 
2587         "\\u0915\\u094D\\u0930",          /* kra         */ 
2588         "\\u0915\\u094D\\u0937",          /* ks\\u0323a  */ 
2589         "\\u0916\\u094D\\u0930",          /* khra        */ 
2590         "\\u0917\\u094D\\u0930",          /* gra         */ 
2591         "\\u0919\\u094D\\u0930",          /* n\\u0307ra  */ 
2592         "\\u091A\\u094D\\u0930",          /* cra         */ 
2593         "\\u091B\\u094D\\u0930",          /* chra        */ 
2594         "\\u091C\\u094D\\u091E",          /* jn\\u0303a  */ 
2595         "\\u091D\\u094D\\u0930",          /* jhra        */ 
2596         "\\u091E\\u094D\\u0930",          /* n\\u0303ra  */ 
2597         "\\u091F\\u094D\\u092F",          /* t\\u0323ya  */ 
2598         "\\u0920\\u094D\\u0930",          /* t\\u0323hra */ 
2599         "\\u0921\\u094D\\u092F",          /* d\\u0323ya  */ 
2600       //"\\u095C\\u094D\\u092F",        /* r\\u0323ya  */ // \u095c is not valid in Devanagari 
2601         "\\u0922\\u094D\\u092F",          /* d\\u0323hya */ 
2602         "\\u0922\\u093C\\u094D\\u0930",   /* r\\u0323hra */ 
2603         "\\u0923\\u094D\\u0930",          /* n\\u0323ra  */ 
2604         "\\u0924\\u094D\\u0924",          /* tta         */ 
2605         "\\u0925\\u094D\\u0930",          /* thra        */ 
2606         "\\u0926\\u094D\\u0926",          /* dda         */ 
2607         "\\u0927\\u094D\\u0930",          /* dhra        */ 
2608         "\\u0928\\u094D\\u0928",          /* nna         */ 
2609         "\\u092A\\u094D\\u0930",          /* pra         */ 
2610         "\\u092B\\u094D\\u0930",          /* phra        */ 
2611         "\\u092C\\u094D\\u0930",          /* bra         */ 
2612         "\\u092D\\u094D\\u0930",          /* bhra        */ 
2613         "\\u092E\\u094D\\u0930",          /* mra         */ 
2614         "\\u0929\\u094D\\u0930",          /* n\\u0331ra  */ 
2615       //"\\u0934\\u094D\\u0930",        /* l\\u0331ra  */ 
2616         "\\u092F\\u094D\\u0930",          /* yra         */ 
2617         "\\u092F\\u093C\\u094D\\u0930",   /* y\\u0307ra  */ 
2619         "\\u0935\\u094D\\u0930",          /* vra         */ 
2620         "\\u0936\\u094D\\u0930",          /* s\\u0301ra  */ 
2621         "\\u0937\\u094D\\u0930",          /* s\\u0323ra  */ 
2622         "\\u0938\\u094D\\u0930",          /* sra         */ 
2623         "\\u0939\\u094d\\u092E",          /* hma         */ 
2624         "\\u091F\\u094D\\u091F",          /* t\\u0323t\\u0323a  */ 
2625         "\\u091F\\u094D\\u0920",          /* t\\u0323t\\u0323ha */ 
2626         "\\u0920\\u094D\\u0920",          /* t\\u0323ht\\u0323ha*/ 
2627         "\\u0921\\u094D\\u0921",          /* d\\u0323d\\u0323a  */ 
2628         "\\u0921\\u094D\\u0922",          /* d\\u0323d\\u0323ha */ 
2629         "\\u091F\\u094D\\u092F",          /* t\\u0323ya  */ 
2630         "\\u0920\\u094D\\u092F",          /* t\\u0323hya */ 
2631         "\\u0921\\u094D\\u092F",          /* d\\u0323ya  */ 
2632         "\\u0922\\u094D\\u092F",          /* d\\u0323hya */ 
2634         "\\u0939\\u094D\\u092F",          /* hya         */ 
2635         "\\u0936\\u0943",                 /* s\\u0301r\\u0325a  */ 
2636         "\\u0936\\u094D\\u091A",          /* s\\u0301ca  */ 
2637         "\\u090d",                        /* e\\u0306    */ 
2638         "\\u0938\\u0902\\u091C\\u0940\\u092C\\u094D \\u0938\\u0947\\u0928\\u0917\\u0941\\u092A\\u094D\\u0924", 
2639         "\\u0906\\u0928\\u0902\\u0926\\u094D \\u0935\\u0926\\u094D\\u0926\\u093F\\u0930\\u093E\\u091C\\u0941",     
2643     UErrorCode status 
= U_ZERO_ERROR
; 
2644     UParseError parseError
; 
2645     UnicodeString message
; 
2646     Transliterator
* latinToDev
=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD
, parseError
, status
); 
2647     Transliterator
* devToLatin
=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD
, parseError
, status
); 
2648     if(U_FAILURE(status
)){ 
2649         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status
)); 
2650         dataerrln("PreContext: " + prettify(parseError
.preContext
) + " PostContext: " + prettify( parseError
.postContext
) ); 
2653     UnicodeString gotResult
; 
2654     for(int i
= 0; i
<MAX_LEN
; i
++){ 
2655         gotResult 
= source
[i
]; 
2656         expect(*latinToDev
,CharsToUnicodeString(source
[i
]),CharsToUnicodeString(expected
[i
])); 
2657         expect(*devToLatin
,CharsToUnicodeString(expected
[i
]),CharsToUnicodeString(source
[i
])); 
2663 void TransliteratorTest::TestTeluguLatinRT(){ 
2664     const int MAX_LEN
=10; 
2665     const char* const source
[MAX_LEN
] = {    
2666         "raghur\\u0101m vi\\u015Bvan\\u0101dha",                         /* Raghuram Viswanadha    */ 
2667         "\\u0101nand vaddir\\u0101ju",                                   /* Anand Vaddiraju        */ 
2668         "r\\u0101j\\u012Bv ka\\u015Barab\\u0101da",                      /* Rajeev Kasarabada      */ 
2669         "san\\u0304j\\u012Bv ka\\u015Barab\\u0101da",                    /* sanjeev kasarabada     */ 
2670         "san\\u0304j\\u012Bb sen'gupta",                                 /* sanjib sengupata       */ 
2671         "amar\\u0113ndra hanum\\u0101nula",                              /* Amarendra hanumanula   */ 
2672         "ravi kum\\u0101r vi\\u015Bvan\\u0101dha",                       /* Ravi Kumar Viswanadha  */ 
2673         "\\u0101ditya kandr\\u0113gula",                                 /* Aditya Kandregula      */ 
2674         "\\u015Br\\u012Bdhar ka\\u1E47\\u1E6Dama\\u015Be\\u1E6D\\u1E6Di",/* Shridhar Kantamsetty   */ 
2675         "m\\u0101dhav de\\u015Be\\u1E6D\\u1E6Di"                         /* Madhav Desetty         */ 
2678     const char* const expected
[MAX_LEN
] = { 
2679         "\\u0c30\\u0c18\\u0c41\\u0c30\\u0c3e\\u0c2e\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",      
2680         "\\u0c06\\u0c28\\u0c02\\u0c26\\u0c4d \\u0C35\\u0C26\\u0C4D\\u0C26\\u0C3F\\u0C30\\u0C3E\\u0C1C\\u0C41",      
2681         "\\u0c30\\u0c3e\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26", 
2682         "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26", 
2683         "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c2c\\u0c4d \\u0c38\\u0c46\\u0c28\\u0c4d\\u0c17\\u0c41\\u0c2a\\u0c4d\\u0c24", 
2684         "\\u0c05\\u0c2e\\u0c30\\u0c47\\u0c02\\u0c26\\u0c4d\\u0c30 \\u0c39\\u0c28\\u0c41\\u0c2e\\u0c3e\\u0c28\\u0c41\\u0c32", 
2685         "\\u0c30\\u0c35\\u0c3f \\u0c15\\u0c41\\u0c2e\\u0c3e\\u0c30\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27", 
2686         "\\u0c06\\u0c26\\u0c3f\\u0c24\\u0c4d\\u0c2f \\u0C15\\u0C02\\u0C26\\u0C4D\\u0C30\\u0C47\\u0C17\\u0C41\\u0c32", 
2687         "\\u0c36\\u0c4d\\u0c30\\u0c40\\u0C27\\u0C30\\u0C4D \\u0c15\\u0c02\\u0c1f\\u0c2e\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f", 
2688         "\\u0c2e\\u0c3e\\u0c27\\u0c35\\u0c4d \\u0c26\\u0c46\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f", 
2691     UErrorCode status 
= U_ZERO_ERROR
; 
2692     UParseError parseError
; 
2693     UnicodeString message
; 
2694     Transliterator
* latinToDev
=Transliterator::createInstance("Latin-Telugu", UTRANS_FORWARD
, parseError
, status
); 
2695     Transliterator
* devToLatin
=Transliterator::createInstance("Telugu-Latin", UTRANS_FORWARD
, parseError
, status
); 
2696     if(U_FAILURE(status
)){ 
2697         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status
)); 
2698         dataerrln("PreContext: " + prettify(parseError
.preContext
) + " PostContext: " + prettify( parseError
.postContext
) ); 
2701     UnicodeString gotResult
; 
2702     for(int i
= 0; i
<MAX_LEN
; i
++){ 
2703         gotResult 
= source
[i
]; 
2704         expect(*latinToDev
,CharsToUnicodeString(source
[i
]),CharsToUnicodeString(expected
[i
])); 
2705         expect(*devToLatin
,CharsToUnicodeString(expected
[i
]),CharsToUnicodeString(source
[i
])); 
2711 void TransliteratorTest::TestSanskritLatinRT(){ 
2712     const int MAX_LEN 
=16; 
2713     const char* const source
[MAX_LEN
] = { 
2714         "rmk\\u1E63\\u0113t", 
2715         "\\u015Br\\u012Bmad", 
2716         "bhagavadg\\u012Bt\\u0101", 
2719         "vi\\u1E63\\u0101da", 
2721         "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra", 
2722         "uv\\u0101cr\\u0325", 
2723         "dharmak\\u1E63\\u0113tr\\u0113", 
2724         "kuruk\\u1E63\\u0113tr\\u0113", 
2725         "samav\\u0113t\\u0101", 
2727         "m\\u0101mak\\u0101\\u1E25", 
2728     // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva", 
2732     const char* const expected
[MAX_LEN
] = { 
2733         "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D", 
2734         "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d", 
2735         "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e", 
2736         "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f", 
2737         "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928", 
2738         "\\u0935\\u093f\\u0937\\u093e\\u0926", 
2739         "\\u092f\\u094b\\u0917", 
2740         "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930", 
2741         "\\u0909\\u0935\\u093E\\u091A\\u0943", 
2742         "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947", 
2743         "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947", 
2744         "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e", 
2745         "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903", 
2746         "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903", 
2747     //"\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935", 
2748         "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924", 
2749         "\\u0938\\u0902\\u091c\\u0935", 
2751     UErrorCode status 
= U_ZERO_ERROR
; 
2752     UParseError parseError
; 
2753     UnicodeString message
; 
2754     Transliterator
* latinToDev
=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD
, parseError
, status
); 
2755     Transliterator
* devToLatin
=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD
, parseError
, status
); 
2756     if(U_FAILURE(status
)){ 
2757         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status
)); 
2758         dataerrln("PreContext: " + prettify(parseError
.preContext
) + " PostContext: " + prettify( parseError
.postContext
) ); 
2761     UnicodeString gotResult
; 
2762     for(int i
= 0; i
<MAX_LEN
; i
++){ 
2763         gotResult 
= source
[i
]; 
2764         expect(*latinToDev
,CharsToUnicodeString(source
[i
]),CharsToUnicodeString(expected
[i
])); 
2765         expect(*devToLatin
,CharsToUnicodeString(expected
[i
]),CharsToUnicodeString(source
[i
])); 
2772 void TransliteratorTest::TestCompoundLatinRT(){ 
2773     const char* const source
[] = { 
2774         "rmk\\u1E63\\u0113t", 
2775         "\\u015Br\\u012Bmad", 
2776         "bhagavadg\\u012Bt\\u0101", 
2779         "vi\\u1E63\\u0101da", 
2781         "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra", 
2782         "uv\\u0101cr\\u0325", 
2783         "dharmak\\u1E63\\u0113tr\\u0113", 
2784         "kuruk\\u1E63\\u0113tr\\u0113", 
2785         "samav\\u0113t\\u0101", 
2787         "m\\u0101mak\\u0101\\u1E25", 
2788      // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva", 
2792     const int MAX_LEN 
= UPRV_LENGTHOF(source
); 
2793     const char* const expected
[MAX_LEN
] = { 
2794         "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D", 
2795         "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d", 
2796         "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e", 
2797         "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f", 
2798         "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928", 
2799         "\\u0935\\u093f\\u0937\\u093e\\u0926", 
2800         "\\u092f\\u094b\\u0917", 
2801         "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930", 
2802         "\\u0909\\u0935\\u093E\\u091A\\u0943", 
2803         "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947", 
2804         "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947", 
2805         "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e", 
2806         "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903", 
2807         "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903", 
2808     //  "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935", 
2809         "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924", 
2810         "\\u0938\\u0902\\u091c\\u0935" 
2812     if(MAX_LEN 
!= UPRV_LENGTHOF(expected
)) { 
2813         errln("error in TestCompoundLatinRT: source[] and expected[] have different lengths!"); 
2817     UErrorCode status 
= U_ZERO_ERROR
; 
2818     UParseError parseError
; 
2819     UnicodeString message
; 
2820     Transliterator
* devToLatinToDev  
=Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD
, parseError
, status
); 
2821     Transliterator
* latinToDevToLatin
=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD
, parseError
, status
); 
2822     Transliterator
* devToTelToDev    
=Transliterator::createInstance("Devanagari-Telugu;Telugu-Devanagari", UTRANS_FORWARD
, parseError
, status
); 
2823     Transliterator
* latinToTelToLatin
=Transliterator::createInstance("Latin-Telugu;Telugu-Latin", UTRANS_FORWARD
, parseError
, status
); 
2825     if(U_FAILURE(status
)){ 
2826         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status
)); 
2827         dataerrln("PreContext: " + prettify(parseError
.preContext
) + " PostContext: " + prettify( parseError
.postContext
) ); 
2830     UnicodeString gotResult
; 
2831     for(int i
= 0; i
<MAX_LEN
; i
++){ 
2832         gotResult 
= source
[i
]; 
2833         expect(*devToLatinToDev
,CharsToUnicodeString(expected
[i
]),CharsToUnicodeString(expected
[i
])); 
2834         expect(*latinToDevToLatin
,CharsToUnicodeString(source
[i
]),CharsToUnicodeString(source
[i
])); 
2835         expect(*latinToTelToLatin
,CharsToUnicodeString(source
[i
]),CharsToUnicodeString(source
[i
])); 
2838     delete(latinToDevToLatin
); 
2839     delete(devToLatinToDev
);   
2840     delete(devToTelToDev
);     
2841     delete(latinToTelToLatin
); 
2845  * Test Gurmukhi-Devanagari Tippi and Bindi 
2847 void TransliteratorTest::TestGurmukhiDevanagari(){ 
2849     // (\u0902) (when preceded by vowel)      --->  (\u0A02) 
2850     // (\u0902) (when preceded by consonant)  --->  (\u0A70) 
2851     UErrorCode status 
= U_ZERO_ERROR
; 
2852     UnicodeSet 
vowel(UnicodeString("[\\u0905-\\u090A \\u090F\\u0910\\u0913\\u0914 \\u093e-\\u0942\\u0947\\u0948\\u094B\\u094C\\u094D]", -1, US_INV
).unescape(), status
); 
2853     UnicodeSet 
non_vowel(UnicodeString("[\\u0915-\\u0928\\u092A-\\u0930]", -1, US_INV
).unescape(), status
); 
2854     UParseError parseError
; 
2856     UnicodeSetIterator 
vIter(vowel
); 
2857     UnicodeSetIterator 
nvIter(non_vowel
); 
2858     Transliterator
* trans 
= Transliterator::createInstance("Devanagari-Gurmukhi",UTRANS_FORWARD
, parseError
, status
); 
2859     if(U_FAILURE(status
)) { 
2860       dataerrln("Error creating transliterator %s", u_errorName(status
)); 
2864     UnicodeString 
src (" \\u0902", -1, US_INV
); 
2865     UnicodeString 
expected(" \\u0A02", -1, US_INV
); 
2866     src 
= src
.unescape(); 
2867     expected
= expected
.unescape(); 
2869     while(vIter
.next()){ 
2870         src
.setCharAt(0,(UChar
) vIter
.getCodepoint()); 
2871         expected
.setCharAt(0,(UChar
) (vIter
.getCodepoint()+0x0100)); 
2872         expect(*trans
,src
,expected
); 
2875     expected
.setCharAt(1,0x0A70); 
2876     while(nvIter
.next()){ 
2877         //src.setCharAt(0,(char) nvIter.codepoint); 
2878         src
.setCharAt(0,(UChar
)nvIter
.getCodepoint()); 
2879         expected
.setCharAt(0,(UChar
) (nvIter
.getCodepoint()+0x0100)); 
2880         expect(*trans
,src
,expected
); 
2885  * Test instantiation from a locale. 
2887 void TransliteratorTest::TestLocaleInstantiation(void) { 
2889     UErrorCode ec 
= U_ZERO_ERROR
; 
2890     Transliterator 
*t 
= Transliterator::createInstance("ru_RU-Latin", UTRANS_FORWARD
, pe
, ec
); 
2891     if (U_FAILURE(ec
)) { 
2892         dataerrln("FAIL: createInstance(ru_RU-Latin) - %s", u_errorName(ec
)); 
2896     expect(*t
, CharsToUnicodeString("\\u0430"), "a"); 
2899     t 
= Transliterator::createInstance("en-el", UTRANS_FORWARD
, pe
, ec
); 
2900     if (U_FAILURE(ec
)) { 
2901         errln("FAIL: createInstance(en-el)"); 
2905     expect(*t
, "a", CharsToUnicodeString("\\u03B1")); 
2910  * Test title case handling of accent (should ignore accents) 
2912 void TransliteratorTest::TestTitleAccents(void) { 
2914     UErrorCode ec 
= U_ZERO_ERROR
; 
2915     Transliterator 
*t 
= Transliterator::createInstance("Title", UTRANS_FORWARD
, pe
, ec
); 
2916     if (U_FAILURE(ec
)) { 
2917         errln("FAIL: createInstance(Title)"); 
2921     expect(*t
, CharsToUnicodeString("a\\u0300b can't abe"), CharsToUnicodeString("A\\u0300b Can't Abe")); 
2926  * Basic test of a locale resource based rule. 
2928 void TransliteratorTest::TestLocaleResource() { 
2929     const char* DATA
[] = { 
2931         //"Latin-Greek/UNGEGN",    "b",               "\\u03bc\\u03c0", 
2932         "Latin-el",              "b",               "\\u03bc\\u03c0", 
2933         "Latin-Greek",           "b",               "\\u03B2", 
2934         "Greek-Latin/UNGEGN",    "\\u03B2",         "v", 
2935         "el-Latin",              "\\u03B2",         "v", 
2936         "Greek-Latin",           "\\u03B2",         "b", 
2938     const int32_t DATA_length 
= UPRV_LENGTHOF(DATA
); 
2939     for (int32_t i
=0; i
<DATA_length
; i
+=3) { 
2941         UErrorCode ec 
= U_ZERO_ERROR
; 
2942         Transliterator 
*t 
= Transliterator::createInstance(DATA
[i
], UTRANS_FORWARD
, pe
, ec
); 
2943         if (U_FAILURE(ec
)) { 
2944             dataerrln((UnicodeString
)"FAIL: createInstance(" + DATA
[i
] + ") - " + u_errorName(ec
)); 
2948         expect(*t
, CharsToUnicodeString(DATA
[i
+1]), 
2949                CharsToUnicodeString(DATA
[i
+2])); 
2955  * Make sure parse errors reference the right line. 
2957 void TransliteratorTest::TestParseError() { 
2958     static const char* rule 
= 
2962     UErrorCode ec 
= U_ZERO_ERROR
; 
2964     Transliterator 
*t 
= Transliterator::createFromRules("ID", rule
, UTRANS_FORWARD
, pe
, ec
); 
2966     if (U_FAILURE(ec
)) { 
2967         UnicodeString 
err(pe
.preContext
); 
2968         err
.append((UChar
)124/*|*/).append(pe
.postContext
); 
2969         if (err
.indexOf("d << b") >= 0) { 
2970             logln("Ok: " + err
); 
2972             errln("FAIL: " + err
); 
2976         errln("FAIL: no syntax error"); 
2978     static const char* maskingRule 
= 
2983     delete Transliterator::createFromRules("ID", maskingRule
, UTRANS_FORWARD
, pe
, ec
); 
2984     if (ec 
!= U_RULE_MASK_ERROR
) { 
2985         errln("FAIL: returned %s instead of U_RULE_MASK_ERROR", u_errorName(ec
)); 
2987     else if (UnicodeString("a > x;") != UnicodeString(pe
.preContext
)) { 
2988         errln("FAIL: did not get expected precontext"); 
2990     else if (UnicodeString("ab > y;") != UnicodeString(pe
.postContext
)) { 
2991         errln("FAIL: did not get expected postcontext"); 
2996  * Make sure sets on output are disallowed. 
2998 void TransliteratorTest::TestOutputSet() { 
2999     UnicodeString rule 
= "$set = [a-cm-n]; b > $set;"; 
3000     UErrorCode ec 
= U_ZERO_ERROR
; 
3002     Transliterator 
*t 
= Transliterator::createFromRules("ID", rule
, UTRANS_FORWARD
, pe
, ec
); 
3004     if (U_FAILURE(ec
)) { 
3005         UnicodeString 
err(pe
.preContext
); 
3006         err
.append((UChar
)124/*|*/).append(pe
.postContext
); 
3007         logln("Ok: " + err
); 
3010     errln("FAIL: No syntax error"); 
3014  * Test the use variable range pragma, making sure that use of 
3015  * variable range characters is detected and flagged as an error. 
3017 void TransliteratorTest::TestVariableRange() { 
3018     UnicodeString rule 
= "use variable range 0x70 0x72; a > A; b > B; q > Q;"; 
3019     UErrorCode ec 
= U_ZERO_ERROR
; 
3021     Transliterator 
*t 
= Transliterator::createFromRules("ID", rule
, UTRANS_FORWARD
, pe
, ec
); 
3023     if (U_FAILURE(ec
)) { 
3024         UnicodeString 
err(pe
.preContext
); 
3025         err
.append((UChar
)124/*|*/).append(pe
.postContext
); 
3026         logln("Ok: " + err
); 
3029     errln("FAIL: No syntax error"); 
3033  * Test invalid post context error handling 
3035 void TransliteratorTest::TestInvalidPostContext() { 
3036     UnicodeString rule 
= "a}b{c>d;"; 
3037     UErrorCode ec 
= U_ZERO_ERROR
; 
3039     Transliterator 
*t 
= Transliterator::createFromRules("ID", rule
, UTRANS_FORWARD
, pe
, ec
); 
3041     if (U_FAILURE(ec
)) { 
3042         UnicodeString 
err(pe
.preContext
); 
3043         err
.append((UChar
)124/*|*/).append(pe
.postContext
); 
3044         if (err
.indexOf("a}b{c") >= 0) { 
3045             logln("Ok: " + err
); 
3047             errln("FAIL: " + err
); 
3051     errln("FAIL: No syntax error"); 
3055  * Test ID form variants 
3057 void TransliteratorTest::TestIDForms() { 
3058     const char* DATA
[] = { 
3060         "nfd", NULL
, "NFC", // make sure case is ignored 
3061         "Any-NFKD", NULL
, "Any-NFKC", 
3062         "Null", NULL
, "Null", 
3063         "-nfkc", "nfkc", "NFKD", 
3064         "-nfkc/", "nfkc", "NFKD", 
3065         "Latin-Greek/UNGEGN", NULL
, "Greek-Latin/UNGEGN", 
3066         "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN", 
3067         "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali", 
3068         "Source-", NULL
, NULL
, 
3069         "Source/Variant-", NULL
, NULL
, 
3070         "Source-/Variant", NULL
, NULL
, 
3071         "/Variant", NULL
, NULL
, 
3072         "/Variant-", NULL
, NULL
, 
3073         "-/Variant", NULL
, NULL
, 
3078     const int32_t DATA_length 
= UPRV_LENGTHOF(DATA
); 
3080     for (int32_t i
=0; i
<DATA_length
; i
+=3) { 
3081         const char* ID 
= DATA
[i
]; 
3082         const char* expID 
= DATA
[i
+1]; 
3083         const char* expInvID 
= DATA
[i
+2]; 
3084         UBool expValid 
= (expInvID 
!= NULL
); 
3085         if (expID 
== NULL
) { 
3089         UErrorCode ec 
= U_ZERO_ERROR
; 
3091             Transliterator::createInstance(ID
, UTRANS_FORWARD
, pe
, ec
); 
3092         if (U_FAILURE(ec
)) { 
3094                 logln((UnicodeString
)"Ok: getInstance(" + ID 
+") => " + u_errorName(ec
)); 
3096                 dataerrln((UnicodeString
)"FAIL: Couldn't create " + ID 
+ " - " + u_errorName(ec
)); 
3101         Transliterator 
*u 
= t
->createInverse(ec
); 
3102         if (U_FAILURE(ec
)) { 
3103             errln((UnicodeString
)"FAIL: Couldn't create inverse of " + ID
); 
3108         if (t
->getID() == expID 
&& 
3109             u
->getID() == expInvID
) { 
3110             logln((UnicodeString
)"Ok: " + ID 
+ ".getInverse() => " + expInvID
); 
3112             errln((UnicodeString
)"FAIL: getInstance(" + ID 
+ ") => " + 
3113                   t
->getID() + " x getInverse() => " + u
->getID() + 
3114                   ", expected " + expInvID
); 
3121 static const UChar SPACE
[]   = {32,0}; 
3122 static const UChar NEWLINE
[] = {10,0}; 
3123 static const UChar RETURN
[]  = {13,0}; 
3124 static const UChar EMPTY
[]   = {0}; 
3126 void TransliteratorTest::checkRules(const UnicodeString
& label
, Transliterator
& t2
, 
3127                                     const UnicodeString
& testRulesForward
) { 
3128     UnicodeString rules2
; t2
.toRules(rules2
, TRUE
); 
3129     //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), ""); 
3130     rules2
.findAndReplace(SPACE
, EMPTY
); 
3131     rules2
.findAndReplace(NEWLINE
, EMPTY
); 
3132     rules2
.findAndReplace(RETURN
, EMPTY
); 
3134     UnicodeString 
testRules(testRulesForward
); testRules
.findAndReplace(SPACE
, EMPTY
); 
3136     if (rules2 
!= testRules
) { 
3138         logln((UnicodeString
)"GENERATED RULES: " + rules2
); 
3139         logln((UnicodeString
)"SHOULD BE:       " + testRulesForward
); 
3144  * Mark's toRules test. 
3146 void TransliteratorTest::TestToRulesMark() { 
3147     const char* testRules 
=  
3148         "::[[:Latin:][:Mark:]];" 
3151         "a <> \\u03B1;" // alpha 
3155         "::([[:Greek:][:Mark:]]);" 
3157     const char* testRulesForward 
=  
3158         "::[[:Latin:][:Mark:]];" 
3166     const char* testRulesBackward 
=  
3167         "::[[:Greek:][:Mark:]];" 
3174     UnicodeString source 
= CharsToUnicodeString("\\u00E1"); // a-acute 
3175     UnicodeString target 
= CharsToUnicodeString("\\u03AC"); // alpha-acute 
3178     UErrorCode ec 
= U_ZERO_ERROR
; 
3179     Transliterator 
*t2 
= Transliterator::createFromRules("source-target", UnicodeString(testRules
, -1, US_INV
), UTRANS_FORWARD
, pe
, ec
); 
3180     Transliterator 
*t3 
= Transliterator::createFromRules("target-source", UnicodeString(testRules
, -1, US_INV
), UTRANS_REVERSE
, pe
, ec
); 
3182     if (U_FAILURE(ec
)) { 
3185         dataerrln((UnicodeString
)"FAIL: createFromRules => " + u_errorName(ec
)); 
3189     expect(*t2
, source
, target
); 
3190     expect(*t3
, target
, source
); 
3192     checkRules("Failed toRules FORWARD", *t2
, UnicodeString(testRulesForward
, -1, US_INV
)); 
3193     checkRules("Failed toRules BACKWARD", *t3
, UnicodeString(testRulesBackward
, -1, US_INV
)); 
3200  * Test Escape and Unescape transliterators. 
3202 void TransliteratorTest::TestEscape() { 
3208     t 
= Transliterator::createInstance("Hex-Any", UTRANS_FORWARD
, pe
, ec
); 
3209     if (U_FAILURE(ec
)) { 
3210         errln((UnicodeString
)"FAIL: createInstance"); 
3213                UNICODE_STRING_SIMPLE("\\x{40}\\U000000312Q"), 
3219     t 
= Transliterator::createInstance("Any-Hex/C", UTRANS_FORWARD
, pe
, ec
); 
3220     if (U_FAILURE(ec
)) { 
3221         errln((UnicodeString
)"FAIL: createInstance"); 
3224                CharsToUnicodeString("A\\U0010BEEF\\uFEED"), 
3225                UNICODE_STRING_SIMPLE("\\u0041\\U0010BEEF\\uFEED")); 
3230     t 
= Transliterator::createInstance("Any-Hex/Java", UTRANS_FORWARD
, pe
, ec
); 
3231     if (U_FAILURE(ec
)) { 
3232         errln((UnicodeString
)"FAIL: createInstance"); 
3235                CharsToUnicodeString("A\\U0010BEEF\\uFEED"), 
3236                UNICODE_STRING_SIMPLE("\\u0041\\uDBEF\\uDEEF\\uFEED")); 
3241     t 
= Transliterator::createInstance("Any-Hex/Perl", UTRANS_FORWARD
, pe
, ec
); 
3242     if (U_FAILURE(ec
)) { 
3243         errln((UnicodeString
)"FAIL: createInstance"); 
3246                CharsToUnicodeString("A\\U0010BEEF\\uFEED"), 
3247                UNICODE_STRING_SIMPLE("\\x{41}\\x{10BEEF}\\x{FEED}")); 
3253 void TransliteratorTest::TestAnchorMasking(){ 
3254     UnicodeString 
rule ("^a > Q; a > q;"); 
3255     UErrorCode status
= U_ZERO_ERROR
; 
3256     UParseError parseError
; 
3258     Transliterator
* t 
= Transliterator::createFromRules("ID", rule
, UTRANS_FORWARD
,parseError
,status
); 
3259     if(U_FAILURE(status
)){ 
3260         errln(UnicodeString("FAIL: ") + "ID" + 
3261               ".createFromRules() => bad rules" + 
3262               /*", parse error " + parseError.code +*/ 
3263               ", line " + parseError
.line 
+ 
3264               ", offset " + parseError
.offset 
+ 
3265               ", context " + prettify(parseError
.preContext
, TRUE
) + 
3266               ", rules: " + prettify(rule
, TRUE
)); 
3272  * Make sure display names of variants look reasonable. 
3274 void TransliteratorTest::TestDisplayName() { 
3275 #if UCONFIG_NO_FORMATTING 
3276     logln("Skipping, UCONFIG_NO_FORMATTING is set\n"); 
3279     static const char* DATA
[] = { 
3280         // ID, forward name, reverse name 
3281         // Update the text as necessary -- the important thing is 
3282         // not the text itself, but how various cases are handled. 
3285         "Any-Hex", "Any to Hex Escape", "Hex Escape to Any", 
3288         "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl", 
3291         "NFC", "Any to NFC", "Any to NFD", 
3294     int32_t DATA_length 
= UPRV_LENGTHOF(DATA
); 
3296     Locale 
US("en", "US"); 
3298     for (int32_t i
=0; i
<DATA_length
; i
+=3) { 
3300         Transliterator::getDisplayName(DATA
[i
], US
, name
); 
3301         if (name 
!= DATA
[i
+1]) { 
3302             dataerrln((UnicodeString
)"FAIL: " + DATA
[i
] + ".getDisplayName() => " + 
3303                   name 
+ ", expected " + DATA
[i
+1]); 
3305             logln((UnicodeString
)"Ok: " + DATA
[i
] + ".getDisplayName() => " + name
); 
3307         UErrorCode ec 
= U_ZERO_ERROR
; 
3309         Transliterator 
*t 
= Transliterator::createInstance(DATA
[i
], UTRANS_REVERSE
, pe
, ec
); 
3310         if (U_FAILURE(ec
)) { 
3312             dataerrln("FAIL: createInstance failed - %s", u_errorName(ec
)); 
3315         name 
= Transliterator::getDisplayName(t
->getID(), US
, name
); 
3316         if (name 
!= DATA
[i
+2]) { 
3317             dataerrln((UnicodeString
)"FAIL: " + t
->getID() + ".getDisplayName() => " + 
3318                   name 
+ ", expected " + DATA
[i
+2]); 
3320             logln((UnicodeString
)"Ok: " + t
->getID() + ".getDisplayName() => " + name
); 
3327 void TransliteratorTest::TestSpecialCases(void) { 
3328     const UnicodeString registerRules
[] = { 
3329         "Any-Dev1", "x > X; y > Y;", 
3330         "Any-Dev2", "XY > Z", 
3332             CharsToUnicodeString
 
3333             ("[^[:L:][:M:]] { \\u03bc\\u03c0 > b ; \\u03bc\\u03c0 } [^[:L:][:M:]] > b ; [^[:L:][:M:]] { [\\u039c\\u03bc][\\u03a0\\u03c0] > B ; [\\u039c\\u03bc][\\u03a0\\u03c0] } [^[:L:][:M:]] > B ;"), 
3337     const UnicodeString testCases
[] = { 
3339         // should add more test cases 
3340         "NFD" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "", 
3341         "NFC" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "", 
3342         "NFKD", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "", 
3343         "NFKC", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "", 
3346         "Greek-Latin/UNGEGN", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)", 
3347         "Greek-Latin/FAKE", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)", 
3349         // check for devanagari bug 
3350         "nfd;Dev1;Dev2;nfc", "xy", "Z", 
3352         // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE 
3353         "Title", CharsToUnicodeString("ab'cD ffi\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee 
+ DESERET_DEE
,  
3354                  CharsToUnicodeString("Ab'cd Ffi\\u0131ii\\u0307 \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE 
+ DESERET_dee
,  
3356         //TODO: enable this test once Titlecase works right 
3358         "Title", CharsToUnicodeString("\\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,  
3359                  CharsToUnicodeString("Ffi\\u0131ii \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,  
3361         "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee 
+ DESERET_DEE
,  
3362                  CharsToUnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 ") + DESERET_DEE 
+ DESERET_DEE
, 
3363         "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee 
+ DESERET_DEE
,  
3364                  CharsToUnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 ") + DESERET_dee 
+ DESERET_dee
, 
3366         "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee 
+ DESERET_DEE
, "", 
3367         "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee 
+ DESERET_DEE
, "", 
3370         "Greek-Latin/UNGEGN",  CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),  
3371                                CharsToUnicodeString("s ss s\\u0331s\\u0331") , 
3372         "Latin-Greek/UNGEGN",  CharsToUnicodeString("s ss s\\u0331s\\u0331"),  
3373                                CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3") , 
3374         "Greek-Latin",  CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),  
3375                         CharsToUnicodeString("s ss s\\u0331s\\u0331") , 
3376         "Latin-Greek",  CharsToUnicodeString("s ss s\\u0331s\\u0331"),  
3377                         CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"), 
3379         // Upper: TAT\\u02B9\\u00C2NA 
3380         // Lower: tat\\u02B9\\u00E2na 
3381         // Title: Tat\\u02B9\\u00E2na 
3382         "Upper", CharsToUnicodeString("tat\\u02B9\\u00E2na"), 
3383                  CharsToUnicodeString("TAT\\u02B9\\u00C2NA"), 
3384         "Lower", CharsToUnicodeString("TAT\\u02B9\\u00C2NA"), 
3385                  CharsToUnicodeString("tat\\u02B9\\u00E2na"), 
3386         "Title", CharsToUnicodeString("tat\\u02B9\\u00E2na"), 
3387                  CharsToUnicodeString("Tat\\u02B9\\u00E2na"), 
3394     for (i 
= 0; registerRules
[i
].length()!=0; i
+=2) { 
3395         UErrorCode status 
= U_ZERO_ERROR
; 
3397         Transliterator 
*t 
= Transliterator::createFromRules(registerRules
[0+i
],  
3398             registerRules
[i
+1], UTRANS_FORWARD
, pos
, status
); 
3399         if (U_FAILURE(status
)) { 
3400             dataerrln("Fails: Unable to create the transliterator from rules. - %s", u_errorName(status
)); 
3402             Transliterator::registerInstance(t
); 
3405     for (i 
= 0; testCases
[i
].length()!=0; i
+=3) { 
3406         UErrorCode ec 
= U_ZERO_ERROR
; 
3408         const UnicodeString
& name 
= testCases
[i
]; 
3409         Transliterator 
*t 
= Transliterator::createInstance(name
, UTRANS_FORWARD
, pe
, ec
); 
3410         if (U_FAILURE(ec
)) { 
3411             dataerrln((UnicodeString
)"FAIL: Couldn't create " + name 
+ " - " + u_errorName(ec
)); 
3415         const UnicodeString
& id 
= t
->getID(); 
3416         const UnicodeString
& source 
= testCases
[i
+1]; 
3417         UnicodeString target
; 
3419         // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe) 
3421         if (testCases
[i
+2].length() > 0) { 
3422             target 
= testCases
[i
+2]; 
3423         } else if (0==id
.caseCompare("NFD", U_FOLD_CASE_DEFAULT
)) { 
3424             Normalizer::normalize(source
, UNORM_NFD
, 0, target
, ec
); 
3425         } else if (0==id
.caseCompare("NFC", U_FOLD_CASE_DEFAULT
)) { 
3426             Normalizer::normalize(source
, UNORM_NFC
, 0, target
, ec
); 
3427         } else if (0==id
.caseCompare("NFKD", U_FOLD_CASE_DEFAULT
)) { 
3428             Normalizer::normalize(source
, UNORM_NFKD
, 0, target
, ec
); 
3429         } else if (0==id
.caseCompare("NFKC", U_FOLD_CASE_DEFAULT
)) { 
3430             Normalizer::normalize(source
, UNORM_NFKC
, 0, target
, ec
); 
3431         } else if (0==id
.caseCompare("Lower", U_FOLD_CASE_DEFAULT
)) { 
3433             target
.toLower(Locale::getUS()); 
3434         } else if (0==id
.caseCompare("Upper", U_FOLD_CASE_DEFAULT
)) { 
3436             target
.toUpper(Locale::getUS()); 
3438         if (U_FAILURE(ec
)) { 
3439             errln((UnicodeString
)"FAIL: Internal error normalizing " + source
); 
3443         expect(*t
, source
, target
); 
3446     for (i 
= 0; registerRules
[i
].length()!=0; i
+=2) { 
3447         Transliterator::unregister(registerRules
[i
]); 
3451 char* Char32ToEscapedChars(UChar32 ch
, char* buffer
) { 
3453         sprintf(buffer
, "\\u%04x", (int)ch
); 
3455         sprintf(buffer
, "\\U%08x", (int)ch
); 
3460 void TransliteratorTest::TestSurrogateCasing (void) { 
3461     // check that casing handles surrogates 
3462     // titlecase is currently defective 
3466     U16_GET(DESERET_dee
,0, 0, DESERET_dee
.length(), dee
); 
3467     UnicodeString 
DEE(u_totitle(dee
)); 
3468     if (DEE 
!= DESERET_DEE
) { 
3469         err("Fails titlecase of surrogates"); 
3470         err(Char32ToEscapedChars(dee
, buffer
));  
3472         errln(Char32ToEscapedChars(DEE
.char32At(0), buffer
)); 
3475     UnicodeString deeDEETest
=DESERET_dee 
+ DESERET_DEE
; 
3476     UnicodeString deedeeTest 
= DESERET_dee 
+ DESERET_dee
; 
3477     UnicodeString DEEDEETest 
= DESERET_DEE 
+ DESERET_DEE
; 
3478     UErrorCode status
= U_ZERO_ERROR
; 
3480     u_strToUpper(buffer2
, 20, deeDEETest
.getBuffer(), deeDEETest
.length(), NULL
, &status
); 
3481     if (U_FAILURE(status
) || (UnicodeString(buffer2
)!= DEEDEETest
)) { 
3482         errln("Fails: Can't uppercase surrogates."); 
3485     status
= U_ZERO_ERROR
; 
3486     u_strToLower(buffer2
, 20, deeDEETest
.getBuffer(), deeDEETest
.length(), NULL
, &status
); 
3487     if (U_FAILURE(status
) || (UnicodeString(buffer2
)!= deedeeTest
)) { 
3488         errln("Fails: Can't lowercase surrogates."); 
3492 static void _trans(Transliterator
& t
, const UnicodeString
& src
, 
3493                    UnicodeString
& result
) { 
3495     t
.transliterate(result
); 
3498 static void _trans(const UnicodeString
& id
, const UnicodeString
& src
, 
3499                    UnicodeString
& result
, UErrorCode ec
) { 
3501     Transliterator 
*t 
= Transliterator::createInstance(id
, UTRANS_FORWARD
, pe
, ec
); 
3502     if (U_SUCCESS(ec
)) { 
3503         _trans(*t
, src
, result
); 
3508 static UnicodeString 
_findMatch(const UnicodeString
& source
, 
3509                                        const UnicodeString
* pairs
) { 
3510     UnicodeString empty
; 
3511     for (int32_t i
=0; pairs
[i
].length() > 0; i
+=2) { 
3512         if (0==source
.caseCompare(pairs
[i
], U_FOLD_CASE_DEFAULT
)) { 
3519 // Check to see that incremental gets at least part way through a reasonable string. 
3521 void TransliteratorTest::TestIncrementalProgress(void) { 
3522     UErrorCode ec 
= U_ZERO_ERROR
; 
3523     UnicodeString latinTest 
= "The Quick Brown Fox."; 
3524     UnicodeString devaTest
; 
3525     _trans("Latin-Devanagari", latinTest
, devaTest
, ec
); 
3526     UnicodeString kataTest
; 
3527     _trans("Latin-Katakana", latinTest
, kataTest
, ec
); 
3528     if (U_FAILURE(ec
)) { 
3529         errln("FAIL: Internal error"); 
3532     const UnicodeString tests
[] = { 
3535         "Halfwidth", latinTest
, 
3536         "Devanagari", devaTest
, 
3537         "Katakana", kataTest
, 
3541     UnicodeString 
test("The Quick Brown Fox Jumped Over The Lazy Dog."); 
3542     int32_t i 
= 0, j
=0, k
=0; 
3543     int32_t sources 
= Transliterator::countAvailableSources(); 
3544     for (i 
= 0; i 
< sources
; i
++) { 
3545         UnicodeString source
; 
3546         Transliterator::getAvailableSource(i
, source
); 
3547         UnicodeString test 
= _findMatch(source
, tests
); 
3548         if (test
.length() == 0) { 
3549             logln((UnicodeString
)"Skipping " + source 
+ "-X"); 
3552         int32_t targets 
= Transliterator::countAvailableTargets(source
); 
3553         for (j 
= 0; j 
< targets
; j
++) { 
3554             UnicodeString target
; 
3555             Transliterator::getAvailableTarget(j
, source
, target
); 
3556             int32_t variants 
= Transliterator::countAvailableVariants(source
, target
); 
3557             for (k 
=0; k
< variants
; k
++) { 
3558                 UnicodeString variant
; 
3560                 UErrorCode status 
= U_ZERO_ERROR
; 
3562                 Transliterator::getAvailableVariant(k
, source
, target
, variant
); 
3563                 UnicodeString id 
= source 
+ "-" + target 
+ "/" + variant
; 
3565                 Transliterator 
*t 
= Transliterator::createInstance(id
, UTRANS_FORWARD
, err
, status
); 
3566                 if (U_FAILURE(status
)) { 
3567                     dataerrln((UnicodeString
)"FAIL: Could not create " + id
); 
3571                 status 
= U_ZERO_ERROR
; 
3572                 CheckIncrementalAux(t
, test
); 
3575                 _trans(*t
, test
, rev
); 
3576                 Transliterator 
*inv 
= t
->createInverse(status
); 
3577                 if (U_FAILURE(status
)) { 
3578                     // The following are forward-only, it is OK that creating an inverse will not work: 
3579                     // 1. Devanagari-Arabic 
3581                     // 2a. Any-*/BGN_1981 
3584                     // If UCONFIG_NO_BREAK_ITERATION is on, Latin-Thai is also not expected to work. 
3585                     if (    id
.compare((UnicodeString
)"Devanagari-Arabic/") != 0 
3586                          && !(id
.startsWith((UnicodeString
)"Any-") && 
3587                                 (id
.endsWith((UnicodeString
)"/BGN") || id
.endsWith((UnicodeString
)"/BGN_1981") || id
.endsWith((UnicodeString
)"/UNGEGN") || id
.endsWith((UnicodeString
)"/MNS")) 
3589 #if UCONFIG_NO_BREAK_ITERATION 
3590                          && id
.compare((UnicodeString
)"Latin-Thai/") != 0 
3594                         errln((UnicodeString
)"FAIL: Could not create inverse of " + id
); 
3600                 CheckIncrementalAux(inv
, rev
); 
3608 void TransliteratorTest::CheckIncrementalAux(const Transliterator
* t
,  
3609                                                       const UnicodeString
& input
) { 
3610     UErrorCode ec 
= U_ZERO_ERROR
; 
3612     UnicodeString test 
= input
; 
3614     pos
.contextStart 
= 0; 
3615     pos
.contextLimit 
= input
.length(); 
3617     pos
.limit 
= input
.length(); 
3619     t
->transliterate(test
, pos
, ec
); 
3620     if (U_FAILURE(ec
)) { 
3621         errln((UnicodeString
)"FAIL: transliterate() error " + u_errorName(ec
)); 
3624     UBool gotError 
= FALSE
; 
3625     (void)gotError
;    // Suppress set but not used warning. 
3627     // we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X? 
3629     if (pos
.start 
== 0 && pos
.limit 
!= 0 && t
->getID() != "Hex-Any/Unicode") { 
3630         errln((UnicodeString
)"No Progress, " + 
3631               t
->getID() + ": " + formatInput(test
, input
, pos
)); 
3634         logln((UnicodeString
)"PASS Progress, " + 
3635               t
->getID() + ": " + formatInput(test
, input
, pos
)); 
3637     t
->finishTransliteration(test
, pos
); 
3638     if (pos
.start 
!= pos
.limit
) { 
3639         errln((UnicodeString
)"Incomplete, " + 
3640               t
->getID() + ": " + formatInput(test
, input
, pos
)); 
3645 void TransliteratorTest::TestFunction() { 
3646     // Careful with spacing and ';' here:  Phrase this exactly 
3647     // as toRules() is going to return it.  If toRules() changes 
3648     // with regard to spacing or ';', then adjust this string. 
3649     UnicodeString rule 
= 
3650         "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';"; 
3653     UErrorCode ec 
= U_ZERO_ERROR
; 
3654     Transliterator 
*t 
= Transliterator::createFromRules("Test", rule
, UTRANS_FORWARD
, pe
, ec
); 
3656         dataerrln("FAIL: createFromRules failed - %s", u_errorName(ec
)); 
3661     t
->toRules(r
, TRUE
); 
3663         logln((UnicodeString
)"OK: toRules() => " + r
); 
3665         errln((UnicodeString
)"FAIL: toRules() => " + r 
+ 
3666               ", expected " + rule
); 
3669     expect(*t
, "The Quick Brown Fox", 
3670            UNICODE_STRING_SIMPLE("T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox")); 
3675 void TransliteratorTest::TestInvalidBackRef(void) { 
3676     UnicodeString rule 
=  ". > $1;"; 
3677     UnicodeString rule2 
=CharsToUnicodeString("(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\\u0020;"); 
3679     UErrorCode ec 
= U_ZERO_ERROR
; 
3680     Transliterator 
*t 
= Transliterator::createFromRules("Test", rule
, UTRANS_FORWARD
, pe
, ec
); 
3681     Transliterator 
*t2 
= Transliterator::createFromRules("Test2", rule2
, UTRANS_FORWARD
, pe
, ec
); 
3684         errln("FAIL: createFromRules should have returned NULL"); 
3689         errln("FAIL: createFromRules should have returned NULL"); 
3693     if (U_SUCCESS(ec
)) { 
3694         errln("FAIL: Ok: . > $1; => no error"); 
3696         logln((UnicodeString
)"Ok: . > $1; => " + u_errorName(ec
)); 
3700 void TransliteratorTest::TestMulticharStringSet() { 
3707         "         e } [{fg}] > r;" ; 
3710     UErrorCode ec 
= U_ZERO_ERROR
; 
3711     Transliterator
* t 
= Transliterator::createFromRules("Test", rule
, UTRANS_FORWARD
, pe
, ec
); 
3712     if (t 
== NULL 
|| U_FAILURE(ec
)) { 
3714         errln("FAIL: createFromRules failed"); 
3718     expect(*t
, "a aa ab bc d gd de gde gdefg ddefg", 
3719            "y x yz z d gd de gdq gdqfg ddrfg"); 
3722     // Overlapped string test.  Make sure that when multiple 
3723     // strings can match that the longest one is matched. 
3725         "    [a {ab} {abc}]    > x;" 
3728         " q [t {st} {rst}] { e > p;" ; 
3730     t 
= Transliterator::createFromRules("Test", rule
, UTRANS_FORWARD
, pe
, ec
); 
3731     if (t 
== NULL 
|| U_FAILURE(ec
)) { 
3733         errln("FAIL: createFromRules failed"); 
3737     expect(*t
, "a ab abc qte qste qrste", 
3738            "x x x qtp qstp qrstp"); 
3742 // vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv 
3743 // BEGIN TestUserFunction support factory 
3745 Transliterator
* _TUFF
[4]; 
3746 UnicodeString
* _TUFID
[4]; 
3748 static Transliterator
* U_EXPORT2 
_TUFFactory(const UnicodeString
& /*ID*/, 
3749                                    Transliterator::Token context
) { 
3750     return _TUFF
[context
.integer
]->clone(); 
3753 static void _TUFReg(const UnicodeString
& ID
, Transliterator
* t
, int32_t n
) { 
3755     _TUFID
[n
] = new UnicodeString(ID
); 
3756     Transliterator::registerFactory(ID
, _TUFFactory
, Transliterator::integerToken(n
)); 
3759 static void _TUFUnreg(int32_t n
) { 
3760     if (_TUFF
[n
] != NULL
) { 
3761         Transliterator::unregister(*_TUFID
[n
]); 
3767 // END TestUserFunction support factory 
3768 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 
3771  * Test that user-registered transliterators can be used under function 
3774 void TransliteratorTest::TestUserFunction() { 
3778     UErrorCode ec 
= U_ZERO_ERROR
; 
3780     // Setup our factory 
3782     for (i
=0; i
<4; ++i
) { 
3786     // There's no need to register inverses if we don't use them 
3787     t 
= Transliterator::createFromRules("gif", 
3788                                         UNICODE_STRING_SIMPLE("'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';"), 
3789                                         UTRANS_FORWARD
, pe
, ec
); 
3790     if (t 
== NULL 
|| U_FAILURE(ec
)) { 
3791         dataerrln((UnicodeString
)"FAIL: createFromRules gif " + u_errorName(ec
)); 
3794     _TUFReg("Any-gif", t
, 0); 
3796     t 
= Transliterator::createFromRules("RemoveCurly", 
3797                                         UNICODE_STRING_SIMPLE("[\\{\\}] > ; '\\N' > ;"), 
3798                                         UTRANS_FORWARD
, pe
, ec
); 
3799     if (t 
== NULL 
|| U_FAILURE(ec
)) { 
3800         errln((UnicodeString
)"FAIL: createFromRules RemoveCurly " + u_errorName(ec
)); 
3803     expect(*t
, UNICODE_STRING_SIMPLE("\\N{name}"), "name"); 
3804     _TUFReg("Any-RemoveCurly", t
, 1); 
3806     logln("Trying &hex"); 
3807     t 
= Transliterator::createFromRules("hex2", 
3809                                         UTRANS_FORWARD
, pe
, ec
); 
3810     if (t 
== NULL 
|| U_FAILURE(ec
)) { 
3811         errln("FAIL: createFromRules"); 
3814     logln("Registering"); 
3815     _TUFReg("Any-hex2", t
, 2); 
3816     t 
= Transliterator::createInstance("Any-hex2", UTRANS_FORWARD
, ec
); 
3817     if (t 
== NULL 
|| U_FAILURE(ec
)) { 
3818         errln((UnicodeString
)"FAIL: createInstance Any-hex2 " + u_errorName(ec
)); 
3821     expect(*t
, "abc", UNICODE_STRING_SIMPLE("\\u0061\\u0062\\u0063")); 
3824     logln("Trying &gif"); 
3825     t 
= Transliterator::createFromRules("gif2", 
3826                                         "(.) > &Gif(&Hex2($1));", 
3827                                         UTRANS_FORWARD
, pe
, ec
); 
3828     if (t 
== NULL 
|| U_FAILURE(ec
)) { 
3829         errln((UnicodeString
)"FAIL: createFromRules gif2 " + u_errorName(ec
)); 
3832     logln("Registering"); 
3833     _TUFReg("Any-gif2", t
, 3); 
3834     t 
= Transliterator::createInstance("Any-gif2", UTRANS_FORWARD
, ec
); 
3835     if (t 
== NULL 
|| U_FAILURE(ec
)) { 
3836         errln((UnicodeString
)"FAIL: createInstance Any-gif2 " + u_errorName(ec
)); 
3839     expect(*t
, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">" 
3840            "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">"); 
3843     // Test that filters are allowed after & 
3844     t 
= Transliterator::createFromRules("test", 
3845                                         "(.) > &Hex($1) ' ' &RemoveCurly(&Name($1)) ' ';", 
3846                                         UTRANS_FORWARD
, pe
, ec
); 
3847     if (t 
== NULL 
|| U_FAILURE(ec
)) { 
3848         errln((UnicodeString
)"FAIL: createFromRules test " + u_errorName(ec
)); 
3852            UNICODE_STRING_SIMPLE("\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C ")); 
3856     for (i
=0; i
<4; ++i
) { 
3862  * Test the Any-X transliterators. 
3864 void TransliteratorTest::TestAnyX(void) { 
3865     UParseError parseError
; 
3866     UErrorCode status 
= U_ZERO_ERROR
; 
3867     Transliterator
* anyLatin 
= 
3868         Transliterator::createInstance("Any-Latin", UTRANS_FORWARD
, parseError
, status
); 
3870         dataerrln("FAIL: createInstance returned NULL - %s", u_errorName(status
)); 
3876            CharsToUnicodeString("greek:\\u03B1\\u03B2\\u03BA\\u0391\\u0392\\u039A hiragana:\\u3042\\u3076\\u304F cyrillic:\\u0430\\u0431\\u0446"), 
3877            CharsToUnicodeString("greek:abkABK hiragana:abuku cyrillic:abc")); 
3881     status 
= U_ZERO_ERROR
; 
3882     Transliterator
* anyASCII 
= 
3883         Transliterator::createInstance("Any-Latin;Latin-ASCII", UTRANS_FORWARD
, parseError
, status
); 
3884     if (U_FAILURE(status
) || anyASCII
==0) { 
3885         dataerrln("FAIL: createInstance returned NULL and/or set status %s", u_errorName(status
)); 
3891            CharsToUnicodeString("ArabicDigits:\\u0660\\u0661\\u0664\\u0669 PersianDigits:\\u06F0\\u06F1\\u06F4\\u06F9"), 
3892            CharsToUnicodeString("ArabicDigits:0149 PersianDigits:0149")); 
3898  * Test Any-X transliterators with sample letters from all scripts. 
3900 void TransliteratorTest::TestAny(void) { 
3901     UErrorCode status 
= U_ZERO_ERROR
; 
3902     // Note: there is a lot of implict construction of UnicodeStrings from (char *) in 
3903     //       function call parameters going on in this test. 
3904     UnicodeSet 
alphabetic("[:alphabetic:]", status
); 
3905     if (U_FAILURE(status
)) { 
3906         dataerrln("Failure: file %s, line %d, status = %s", __FILE__
, __LINE__
, u_errorName(status
)); 
3909     alphabetic
.freeze(); 
3911     UnicodeString testString
; 
3912     for (int32_t i 
= 0; i 
< USCRIPT_CODE_LIMIT
; i
++) { 
3913         const char *scriptName 
= uscript_getShortName((UScriptCode
)i
); 
3914         if (scriptName 
== NULL
) { 
3915             errln("Failure: file %s, line %d: Script Code %d is invalid, ", __FILE__
, __LINE__
, i
); 
3920         sample
.applyPropertyAlias("script", scriptName
, status
); 
3921         if (U_FAILURE(status
)) { 
3922             errln("Failure: file %s, line %d, status = %s", __FILE__
, __LINE__
, u_errorName(status
)); 
3925         sample
.retainAll(alphabetic
); 
3926         for (int32_t count
=0; count
<5; count
++) { 
3927             UChar32 c 
= sample
.charAt(count
); 
3931             testString
.append(c
); 
3935     UParseError parseError
; 
3936     Transliterator
* anyLatin 
= 
3937         Transliterator::createInstance("Any-Latin", UTRANS_FORWARD
, parseError
, status
); 
3938     if (U_FAILURE(status
)) { 
3939         dataerrln("Failure: file %s, line %d, status = %s", __FILE__
, __LINE__
, u_errorName(status
)); 
3943     logln(UnicodeString("Sample set for Any-Latin: ") + testString
); 
3944     anyLatin
->transliterate(testString
); 
3945     logln(UnicodeString("Sample result for Any-Latin: ") + testString
); 
3951  * Test the source and target set API.  These are only implemented 
3952  * for RBT and CompoundTransliterator at this time. 
3954 void TransliteratorTest::TestSourceTargetSet() { 
3955     UErrorCode ec 
= U_ZERO_ERROR
; 
3963     UnicodeSet 
expSrc("[arx{lu}]", ec
); 
3966     UnicodeSet 
expTrg("[bq]", ec
); 
3969     Transliterator
* t 
= Transliterator::createFromRules("test", r
, UTRANS_FORWARD
, pe
, ec
); 
3971     if (U_FAILURE(ec
)) { 
3973         errln("FAIL: Couldn't set up test"); 
3977     UnicodeSet src
; t
->getSourceSet(src
); 
3978     UnicodeSet trg
; t
->getTargetSet(trg
); 
3980     if (src 
== expSrc 
&& trg 
== expTrg
) { 
3982         logln((UnicodeString
)"Ok: " + 
3983               r 
+ " => source = " + src
.toPattern(a
, TRUE
) + 
3984               ", target = " + trg
.toPattern(b
, TRUE
)); 
3986         UnicodeString a
, b
, c
, d
; 
3987         errln((UnicodeString
)"FAIL: " + 
3988               r 
+ " => source = " + src
.toPattern(a
, TRUE
) + 
3989               ", expected " + expSrc
.toPattern(b
, TRUE
) + 
3990               "; target = " + trg
.toPattern(c
, TRUE
) + 
3991               ", expected " + expTrg
.toPattern(d
, TRUE
)); 
3998  * Test handling of Pattern_White_Space, for both RBT and UnicodeSet. 
4000 void TransliteratorTest::TestPatternWhiteSpace() { 
4002     const char* r 
= "a > \\u200E b;"; 
4004     UErrorCode ec 
= U_ZERO_ERROR
; 
4006     Transliterator
* t 
= Transliterator::createFromRules("test", CharsToUnicodeString(r
), UTRANS_FORWARD
, pe
, ec
); 
4008     if (U_FAILURE(ec
)) { 
4009         errln("FAIL: Couldn't set up test"); 
4011         expect(*t
, "a", "b"); 
4017     UnicodeSet 
set(CharsToUnicodeString("[a \\u200E]"), ec
); 
4019     if (U_FAILURE(ec
)) { 
4020         errln("FAIL: Couldn't set up test"); 
4022         if (set
.contains(0x200E)) { 
4023             errln("FAIL: U+200E not being ignored by UnicodeSet"); 
4027 //====================================================================== 
4028 // this method is in TestUScript.java 
4029 //====================================================================== 
4030 void TransliteratorTest::TestAllCodepoints(){ 
4031     UScriptCode code
= USCRIPT_INVALID_CODE
; 
4032     char id
[256]={'\0'}; 
4033     char abbr
[256]={'\0'}; 
4034     char newId
[256]={'\0'}; 
4035     char newAbbrId
[256]={'\0'}; 
4036     char oldId
[256]={'\0'}; 
4037     char oldAbbrId
[256]={'\0'}; 
4039     UErrorCode status 
=U_ZERO_ERROR
; 
4042     for(uint32_t i 
= 0; i
<=0x10ffff; i
++){ 
4043         code 
=  uscript_getScript(i
,&status
); 
4044         if(code 
== USCRIPT_INVALID_CODE
){ 
4045             dataerrln("uscript_getScript for codepoint \\U%08X failed.", i
); 
4047         const char* myId 
= uscript_getName(code
); 
4049           dataerrln("Valid script code returned NULL name. Check your data!"); 
4052         uprv_strcpy(id
,myId
); 
4053         uprv_strcpy(abbr
,uscript_getShortName(code
)); 
4055         uprv_strcpy(newId
,"[:"); 
4056         uprv_strcat(newId
,id
); 
4057         uprv_strcat(newId
,":];NFD"); 
4059         uprv_strcpy(newAbbrId
,"[:"); 
4060         uprv_strcat(newAbbrId
,abbr
); 
4061         uprv_strcat(newAbbrId
,":];NFD"); 
4063         if(uprv_strcmp(newId
,oldId
)!=0){ 
4064             Transliterator
* t 
= Transliterator::createInstance(newId
,UTRANS_FORWARD
,pe
,status
); 
4065             if(t
==NULL 
|| U_FAILURE(status
)){ 
4066                 dataerrln((UnicodeString
)"FAIL: Could not create " + id 
+ " - " + u_errorName(status
)); 
4070         if(uprv_strcmp(newAbbrId
,oldAbbrId
)!=0){ 
4071             Transliterator
* t 
= Transliterator::createInstance(newAbbrId
,UTRANS_FORWARD
,pe
,status
); 
4072             if(t
==NULL 
|| U_FAILURE(status
)){ 
4073                 dataerrln((UnicodeString
)"FAIL: Could not create " + id 
+ " - " + u_errorName(status
)); 
4077         uprv_strcpy(oldId
,newId
); 
4078         uprv_strcpy(oldAbbrId
, newAbbrId
); 
4084 #define TEST_TRANSLIT_ID(id, cls) { \ 
4085   UErrorCode ec = U_ZERO_ERROR; \ 
4086   Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); \ 
4087   if (U_FAILURE(ec)) { \ 
4088     dataerrln("FAIL: Couldn't create %s - %s", id, u_errorName(ec)); \ 
4090     if (t->getDynamicClassID() != cls::getStaticClassID()) { \ 
4091       errln("FAIL: " #cls " dynamic and static class ID mismatch"); \ 
4093     /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \ 
4098 #define TEST_TRANSLIT_RULE(rule, cls) { \ 
4099   UErrorCode ec = U_ZERO_ERROR; \ 
4101   Transliterator* t = Transliterator::createFromRules("_", rule, UTRANS_FORWARD, pe, ec); \ 
4102   if (U_FAILURE(ec)) { \ 
4103     errln("FAIL: Couldn't create " rule); \ 
4105     if (t->getDynamicClassID() != cls ::getStaticClassID()) { \ 
4106       errln("FAIL: " #cls " dynamic and static class ID mismatch"); \ 
4108     /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \ 
4113 void TransliteratorTest::TestBoilerplate() { 
4114     TEST_TRANSLIT_ID("Any-Latin", AnyTransliterator
); 
4115     TEST_TRANSLIT_ID("Any-Hex", EscapeTransliterator
); 
4116     TEST_TRANSLIT_ID("Hex-Any", UnescapeTransliterator
); 
4117     TEST_TRANSLIT_ID("Lower", LowercaseTransliterator
); 
4118     TEST_TRANSLIT_ID("Upper", UppercaseTransliterator
); 
4119     TEST_TRANSLIT_ID("Title", TitlecaseTransliterator
); 
4120     TEST_TRANSLIT_ID("Null", NullTransliterator
); 
4121     TEST_TRANSLIT_ID("Remove", RemoveTransliterator
); 
4122     TEST_TRANSLIT_ID("Any-Name", UnicodeNameTransliterator
); 
4123     TEST_TRANSLIT_ID("Name-Any", NameUnicodeTransliterator
); 
4124     TEST_TRANSLIT_ID("NFD", NormalizationTransliterator
); 
4125     TEST_TRANSLIT_ID("Latin-Greek", CompoundTransliterator
); 
4126     TEST_TRANSLIT_RULE("a>b;", RuleBasedTransliterator
); 
4129 void TransliteratorTest::TestAlternateSyntax() { 
4134     expect(CharsToUnicodeString("a \\u2192 x; b \\u2190 y; c \\u2194 z"), 
4137     expect(CharsToUnicodeString("([:^ASCII:]) \\u2192 \\u2206Name($1);"), 
4138            CharsToUnicodeString("<=\\u2190; >=\\u2192; <>=\\u2194; &=\\u2206"), 
4139            UNICODE_STRING_SIMPLE("<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}")); 
4142 static const char* BEGIN_END_RULES
[] = { 
4156     "", // test case commented out below, this is here to keep from messing up the indexes 
4165     "", // test case commented out below, this is here to keep from messing up the indexes 
4174     "", // test case commented out below, this is here to keep from messing up the indexes 
4193     "$ws = [[:Separator:][\\u0009-\\u000C]$];" 
4194     "$delim = [\\-$ws];" 
4195     "$ws $delim* > ' ';" 
4196     "'-' $delim* > '-';", 
4200     "$ws = [[:Separator:][\\u0009-\\u000C]$];" 
4201     "$delim = [\\-$ws];" 
4202     "$ws $delim* > ' ';" 
4203     "'-' $delim* > '-';", 
4206     "$ws = [[:Separator:][\\u0009-\\u000C]$];" 
4207     "$delim = [\\-$ws];" 
4208     "$ws $delim* > ' ';" 
4209     "'-' $delim* > '-';" 
4213     "$ws = [[:Separator:][\\u0009-\\u000C]$];" 
4214     "$delim = [\\-$ws];" 
4216     "$ws $delim* > ' ';" 
4217     "'-' $delim* > '-';", 
4222     "$ws = [[:Separator:][\\u0009-\\u000C]$];" 
4223     "$delim = [\\-$ws];" 
4225     "$ws $delim* > ' ';" 
4226     "'-' $delim* > '-';", 
4228     "", // test case commented out below, this is here to keep from messing up the indexes 
4232     "$ws = [[:Separator:][\\u0009-\\u000C]$];" 
4233     "$delim = [\\-$ws];" 
4235     "$ws $delim* > ' ';" 
4236     "'-' $delim* > '-';" 
4239     "", // test case commented out below, this is here to keep from messing up the indexes 
4243     "$ws = [[:Separator:][\\u0009-\\u000C]$];" 
4244     "$delim = [\\-$ws];" 
4247     "$ws $delim* > ' ';" 
4248     "'-' $delim* > '-';" 
4251     "$ab { ' ' } $ab > '-';" 
4258     "", // test case commented out below, this is here to keep from messing up the indexes 
4261     "$ws = [[:Separator:][\\u0009-\\u000C]$];" 
4262     "$delim = [\\-$ws];" 
4265     "$ws $delim* > ' ';" 
4266     "'-' $delim* > '-';" 
4268     "$ab { ' ' } $ab > '-';" 
4284     "", // test case commented out below, this is here to keep from messing up the indexes 
4305     "", // test case commented out below, this is here to keep from messing up the indexes 
4317 (This entire test is commented out below and will need some heavy revision when we re-add 
4318 the ::BEGIN/::END stuff) 
4319 static const char* BOGUS_BEGIN_END_RULES[] = { 
4338 static const int32_t BOGUS_BEGIN_END_RULES_length = UPRV_LENGTHOF(BOGUS_BEGIN_END_RULES); 
4341 static const char* BEGIN_END_TEST_CASES
[] = { 
4342     // rules             input                   expected output 
4343     BEGIN_END_RULES
[0],  "abc ababc aba",        "xy zbc z", 
4344 //    BEGIN_END_RULES[1],  "abc ababc aba",        "xy abxy z", 
4345 //    BEGIN_END_RULES[2],  "abc ababc aba",        "xy abxy z", 
4346 //    BEGIN_END_RULES[3],  "abc ababc aba",        "xy abxy z", 
4347     BEGIN_END_RULES
[4],  "abc ababc aba",        "xy abxy z", 
4348     BEGIN_END_RULES
[5],  "abccabaacababcbc",     "PXAARXQBR", 
4350     BEGIN_END_RULES
[6],  "e   e - e---e-  e",    "e e e-e-e", 
4351     BEGIN_END_RULES
[7],  "e   e - e---e-  e",    "e e e-e-e", 
4352     BEGIN_END_RULES
[8],  "e   e - e---e-  e",    "e e e-e-e", 
4353     BEGIN_END_RULES
[9],  "e   e - e---e-  e",    "e e e-e-e", 
4354 //    BEGIN_END_RULES[10],  "e   e - e---e-  e",    "e e e-e-e", 
4355 //    BEGIN_END_RULES[11], "e   e - e---e-  e",    "e e e-e-e", 
4356 //    BEGIN_END_RULES[12], "e   e - e---e-  e",    "e e e-e-e", 
4357 //    BEGIN_END_RULES[12], "a    a    a    a",     "a%a%a%a", 
4358 //    BEGIN_END_RULES[12], "a a-b c b a",          "a%a-b cb-a", 
4359     BEGIN_END_RULES
[13], "e   e - e---e-  e",    "e e e-e-e", 
4360     BEGIN_END_RULES
[13], "a    a    a    a",     "a%a%a%a", 
4361     BEGIN_END_RULES
[13], "a a-b c b a",          "a%a-b cb-a", 
4363 //    BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ", 
4364     BEGIN_END_RULES
[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ", 
4365 //    BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ", 
4366     BEGIN_END_RULES
[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ" 
4368 static const int32_t BEGIN_END_TEST_CASES_length 
= UPRV_LENGTHOF(BEGIN_END_TEST_CASES
); 
4370 void TransliteratorTest::TestBeginEnd() { 
4371     // run through the list of test cases above 
4373     for (i 
= 0; i 
< BEGIN_END_TEST_CASES_length
; i 
+= 3) { 
4374         expect((UnicodeString
)"Test case #" + (i 
/ 3), 
4375                UnicodeString(BEGIN_END_TEST_CASES
[i
], -1, US_INV
), 
4376                UnicodeString(BEGIN_END_TEST_CASES
[i 
+ 1], -1, US_INV
), 
4377                UnicodeString(BEGIN_END_TEST_CASES
[i 
+ 2], -1, US_INV
)); 
4380     // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing 
4381     UParseError parseError
; 
4382     UErrorCode status 
= U_ZERO_ERROR
; 
4383     Transliterator
* reversed  
= Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES
[17]), 
4384             UTRANS_REVERSE
, parseError
, status
); 
4385     if (reversed 
== 0 || U_FAILURE(status
)) { 
4386         reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError
, status
); 
4388         expect(*reversed
, UnicodeString("xy XY XYZ yz YZ"), UnicodeString("xy abc xaba yz aba")); 
4392     // finally, run through the list of syntactically-ill-formed rule sets above and make sure 
4393     // that all of them cause errors 
4395 (commented out until we have the real ::BEGIN/::END stuff in place 
4396     for (i = 0; i < BOGUS_BEGIN_END_RULES_length; i++) { 
4397         UParseError parseError; 
4398         UErrorCode status = U_ZERO_ERROR; 
4399         Transliterator* t = Transliterator::createFromRules("foo", UnicodeString(BOGUS_BEGIN_END_RULES[i]), 
4400                 UTRANS_FORWARD, parseError, status); 
4401         if (!U_FAILURE(status)) { 
4403             errln((UnicodeString)"Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]); 
4409 void TransliteratorTest::TestBeginEndToRules() { 
4410     // run through the same list of test cases we used above, but this time, instead of just 
4411     // instantiating a Transliterator from the rules and running the test against it, we instantiate 
4412     // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from 
4413     // the resulting set of rules, and make sure that the generated rule set is semantically equivalent 
4414     // to (i.e., does the same thing as) the original rule set 
4415     for (int32_t i 
= 0; i 
< BEGIN_END_TEST_CASES_length
; i 
+= 3) { 
4416         UParseError parseError
; 
4417         UErrorCode status 
= U_ZERO_ERROR
; 
4418         Transliterator
* t 
= Transliterator::createFromRules("--", UnicodeString(BEGIN_END_TEST_CASES
[i
], -1, US_INV
), 
4419                 UTRANS_FORWARD
, parseError
, status
); 
4420         if (U_FAILURE(status
)) { 
4421             reportParseError(UnicodeString("FAIL: Couldn't create transliterator"), parseError
, status
); 
4423             UnicodeString rules
; 
4424             t
->toRules(rules
, TRUE
); 
4425             Transliterator
* t2 
= Transliterator::createFromRules((UnicodeString
)"Test case #" + (i 
/ 3), rules
, 
4426                     UTRANS_FORWARD
, parseError
, status
); 
4427             if (U_FAILURE(status
)) { 
4428                 reportParseError(UnicodeString("FAIL: Couldn't create transliterator from generated rules"), 
4429                         parseError
, status
); 
4433                        UnicodeString(BEGIN_END_TEST_CASES
[i 
+ 1], -1, US_INV
), 
4434                        UnicodeString(BEGIN_END_TEST_CASES
[i 
+ 2], -1, US_INV
)); 
4441     // do the same thing for the reversible test case 
4442     UParseError parseError
; 
4443     UErrorCode status 
= U_ZERO_ERROR
; 
4444     Transliterator
* reversed 
= Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES
[17]), 
4445             UTRANS_REVERSE
, parseError
, status
); 
4446     if (U_FAILURE(status
)) { 
4447         reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError
, status
); 
4449         UnicodeString rules
; 
4450         reversed
->toRules(rules
, FALSE
); 
4451         Transliterator
* reversed2 
= Transliterator::createFromRules("Reversed", rules
, UTRANS_FORWARD
, 
4452                 parseError
, status
); 
4453         if (U_FAILURE(status
)) { 
4454             reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator from generated rules"), 
4455                     parseError
, status
); 
4459                    UnicodeString("xy XY XYZ yz YZ"), 
4460                    UnicodeString("xy abc xaba yz aba")); 
4467 void TransliteratorTest::TestRegisterAlias() { 
4468     UnicodeString 
longID("Lower;[aeiou]Upper"); 
4469     UnicodeString 
shortID("Any-CapVowels"); 
4470     UnicodeString 
reallyShortID("CapVowels"); 
4472     Transliterator::registerAlias(shortID
, longID
); 
4474     UErrorCode err 
= U_ZERO_ERROR
; 
4475     Transliterator
* t1 
= Transliterator::createInstance(longID
, UTRANS_FORWARD
, err
); 
4476     if (U_FAILURE(err
)) { 
4477         errln("Failed to instantiate transliterator with long ID"); 
4478         Transliterator::unregister(shortID
); 
4481     Transliterator
* t2 
= Transliterator::createInstance(reallyShortID
, UTRANS_FORWARD
, err
); 
4482     if (U_FAILURE(err
)) { 
4483         errln("Failed to instantiate transliterator with short ID"); 
4485         Transliterator::unregister(shortID
); 
4489     if (t1
->getID() != longID
) 
4490         errln("Transliterator instantiated with long ID doesn't have long ID"); 
4491     if (t2
->getID() != reallyShortID
) 
4492         errln("Transliterator instantiated with short ID doesn't have short ID"); 
4494     UnicodeString rules1
; 
4495     UnicodeString rules2
; 
4497     t1
->toRules(rules1
, TRUE
); 
4498     t2
->toRules(rules2
, TRUE
); 
4499     if (rules1 
!= rules2
) 
4500         errln("Alias transliterators aren't the same"); 
4504     Transliterator::unregister(shortID
); 
4506     t1 
= Transliterator::createInstance(shortID
, UTRANS_FORWARD
, err
); 
4507     if (U_SUCCESS(err
)) { 
4508         errln("Instantiation with short ID succeeded after short ID was unregistered"); 
4512     // try the same thing again, but this time with something other than 
4513     // an instance of CompoundTransliterator 
4514     UnicodeString 
realID("Latin-Greek"); 
4515     UnicodeString 
fakeID("Latin-dlgkjdflkjdl"); 
4516     Transliterator::registerAlias(fakeID
, realID
); 
4519     t1 
= Transliterator::createInstance(realID
, UTRANS_FORWARD
, err
); 
4520     if (U_FAILURE(err
)) { 
4521         dataerrln("Failed to instantiate transliterator with real ID - %s", u_errorName(err
)); 
4522         Transliterator::unregister(realID
); 
4525     t2 
= Transliterator::createInstance(fakeID
, UTRANS_FORWARD
, err
); 
4526     if (U_FAILURE(err
)) { 
4527         errln("Failed to instantiate transliterator with fake ID"); 
4529         Transliterator::unregister(realID
); 
4533     t1
->toRules(rules1
, TRUE
); 
4534     t2
->toRules(rules2
, TRUE
); 
4535     if (rules1 
!= rules2
) 
4536         errln("Alias transliterators aren't the same"); 
4540     Transliterator::unregister(fakeID
); 
4543 void TransliteratorTest::TestRuleStripping() { 
4546 \uE001>\u0C01; # SIGN 
4548     static const UChar rule
[] = { 
4549         0x0023,0x0020,0x000D,0x000A, 
4550         0xE001,0x003E,0x0C01,0x003B,0x0020,0x0023,0x0020,0x0053,0x0049,0x0047,0x004E,0 
4552     static const UChar expectedRule
[] = { 
4553         0xE001,0x003E,0x0C01,0x003B,0 
4555     UChar result
[UPRV_LENGTHOF(rule
)]; 
4556     UErrorCode status 
= U_ZERO_ERROR
; 
4557     int32_t len 
= utrans_stripRules(rule
, UPRV_LENGTHOF(rule
), result
, &status
); 
4558     if (len 
!= u_strlen(expectedRule
)) { 
4559         errln("utrans_stripRules return len = %d", len
); 
4561     if (u_strncmp(expectedRule
, result
, len
) != 0) { 
4562         errln("utrans_stripRules did not return expected string"); 
4567  * Test the Halfwidth-Fullwidth transliterator (ticket 6281). 
4569 void TransliteratorTest::TestHalfwidthFullwidth(void) { 
4570     UParseError parseError
; 
4571     UErrorCode status 
= U_ZERO_ERROR
; 
4572     Transliterator
* hf 
= Transliterator::createInstance("Halfwidth-Fullwidth", UTRANS_FORWARD
, parseError
, status
); 
4573     Transliterator
* fh 
= Transliterator::createInstance("Fullwidth-Halfwidth", UTRANS_FORWARD
, parseError
, status
); 
4574     if (hf 
== 0 || fh 
== 0) { 
4575         dataerrln("FAIL: createInstance failed - %s", u_errorName(status
)); 
4581     // Array of 2n items 
4583     //   "hf"|"fh"|"both", 
4586     const char* DATA
[] = { 
4588         "\\uFFE9\\uFFEA\\uFFEB\\uFFEC\\u0061\\uFF71\\u00AF\\u0020", 
4589         "\\u2190\\u2191\\u2192\\u2193\\uFF41\\u30A2\\uFFE3\\u3000", 
4591     int32_t DATA_length 
= UPRV_LENGTHOF(DATA
); 
4593     for (int32_t i
=0; i
<DATA_length
; i
+=3) { 
4594         UnicodeString h 
= CharsToUnicodeString(DATA
[i
+1]); 
4595         UnicodeString f 
= CharsToUnicodeString(DATA
[i
+2]); 
4597         case 0x68: //'h': // Halfwidth-Fullwidth only 
4600         case 0x66: //'f': // Fullwidth-Halfwidth only 
4603         case 0x62: //'b': // both directions 
4615      *  Test Thai.  The text is the first paragraph of "What is Unicode" from the Unicode.org web site. 
4616      *              TODO: confirm that the expected results are correct. 
4617      *              For now, test just confirms that C++ and Java give identical results. 
4619 void TransliteratorTest::TestThai(void) { 
4620 #if !UCONFIG_NO_BREAK_ITERATION 
4621     UParseError parseError
; 
4622     UErrorCode status 
= U_ZERO_ERROR
; 
4623     Transliterator
* tr 
= Transliterator::createInstance("Any-Latin", UTRANS_FORWARD
, parseError
, status
); 
4625         dataerrln("FAIL: createInstance failed - %s", u_errorName(status
)); 
4628     if (U_FAILURE(status
)) { 
4629         errln("FAIL: createInstance failed with %s", u_errorName(status
)); 
4632     const char *thaiText 
=  
4633         "\\u0e42\\u0e14\\u0e22\\u0e1e\\u0e37\\u0e49\\u0e19\\u0e10\\u0e32\\u0e19\\u0e41\\u0e25\\u0e49\\u0e27, \\u0e04\\u0e2d" 
4634         "\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d\\u0e23\\u0e4c\\u0e08\\u0e30\\u0e40\\u0e01\\u0e35\\u0e48\\u0e22" 
4635         "\\u0e27\\u0e02\\u0e49\\u0e2d\\u0e07\\u0e01\\u0e31\\u0e1a\\u0e40\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e02\\u0e2d" 
4636         "\\u0e07\\u0e15\\u0e31\\u0e27\\u0e40\\u0e25\\u0e02. \\u0e04\\u0e2d\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d" 
4637         "\\u0e23\\u0e4c\\u0e08\\u0e31\\u0e14\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29" 
4638         "\\u0e23\\u0e41\\u0e25\\u0e30\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30\\u0e2d\\u0e37\\u0e48\\u0e19\\u0e46 \\u0e42" 
4639         "\\u0e14\\u0e22\\u0e01\\u0e32\\u0e23\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25" 
4640         "\\u0e02\\u0e43\\u0e2b\\u0e49\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e41\\u0e15\\u0e48\\u0e25\\u0e30\\u0e15" 
4641         "\\u0e31\\u0e27. \\u0e01\\u0e48\\u0e2d\\u0e19\\u0e2b\\u0e19\\u0e49\\u0e32\\u0e17\\u0e35\\u0e48\\u0e4a Unicode \\u0e08" 
4642         "\\u0e30\\u0e16\\u0e39\\u0e01\\u0e2a\\u0e23\\u0e49\\u0e32\\u0e07\\u0e02\\u0e36\\u0e49\\u0e19, \\u0e44\\u0e14\\u0e49" 
4643         "\\u0e21\\u0e35\\u0e23\\u0e30\\u0e1a\\u0e1a encoding \\u0e2d\\u0e22\\u0e39\\u0e48\\u0e2b\\u0e25\\u0e32\\u0e22\\u0e23" 
4644         "\\u0e49\\u0e2d\\u0e22\\u0e23\\u0e30\\u0e1a\\u0e1a\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e01\\u0e32\\u0e23" 
4645         "\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25\\u0e02\\u0e40\\u0e2b\\u0e25\\u0e48" 
4646         "\\u0e32\\u0e19\\u0e35\\u0e49. \\u0e44\\u0e21\\u0e48\\u0e21\\u0e35 encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48" 
4647         "\\u0e21\\u0e35\\u0e08\\u0e33\\u0e19\\u0e27\\u0e19\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30" 
4648         "\\u0e21\\u0e32\\u0e01\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d: \\u0e22\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d" 
4649         "\\u0e22\\u0e48\\u0e32\\u0e07\\u0e40\\u0e0a\\u0e48\\u0e19, \\u0e40\\u0e09\\u0e1e\\u0e32\\u0e30\\u0e43\\u0e19\\u0e01" 
4650         "\\u0e25\\u0e38\\u0e48\\u0e21\\u0e2a\\u0e2b\\u0e20\\u0e32\\u0e1e\\u0e22\\u0e38\\u0e42\\u0e23\\u0e1b\\u0e40\\u0e1e" 
4651         "\\u0e35\\u0e22\\u0e07\\u0e41\\u0e2b\\u0e48\\u0e07\\u0e40\\u0e14\\u0e35\\u0e22\\u0e27 \\u0e01\\u0e47\\u0e15\\u0e49" 
4652         "\\u0e2d\\u0e07\\u0e01\\u0e32\\u0e23\\u0e2b\\u0e25\\u0e32\\u0e22 encoding \\u0e43\\u0e19\\u0e01\\u0e32\\u0e23\\u0e04" 
4653         "\\u0e23\\u0e2d\\u0e1a\\u0e04\\u0e25\\u0e38\\u0e21\\u0e17\\u0e38\\u0e01\\u0e20\\u0e32\\u0e29\\u0e32\\u0e43\\u0e19" 
4654         "\\u0e01\\u0e25\\u0e38\\u0e48\\u0e21. \\u0e2b\\u0e23\\u0e37\\u0e2d\\u0e41\\u0e21\\u0e49\\u0e41\\u0e15\\u0e48\\u0e43" 
4655         "\\u0e19\\u0e20\\u0e32\\u0e29\\u0e32\\u0e40\\u0e14\\u0e35\\u0e48\\u0e22\\u0e27 \\u0e40\\u0e0a\\u0e48\\u0e19 \\u0e20" 
4656         "\\u0e32\\u0e29\\u0e32\\u0e2d\\u0e31\\u0e07\\u0e01\\u0e24\\u0e29 \\u0e01\\u0e47\\u0e44\\u0e21\\u0e48\\u0e21\\u0e35" 
4657         " encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d\\u0e2a\\u0e33\\u0e2b" 
4658         "\\u0e23\\u0e31\\u0e1a\\u0e17\\u0e38\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29\\u0e23, \\u0e40\\u0e04" 
4659         "\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e27\\u0e23\\u0e23\\u0e04\\u0e15\\u0e2d\\u0e19" 
4660         " \\u0e41\\u0e25\\u0e30\\u0e2a\\u0e31\\u0e0d\\u0e25\\u0e31\\u0e01\\u0e29\\u0e13\\u0e4c\\u0e17\\u0e32\\u0e07\\u0e40" 
4661         "\\u0e17\\u0e04\\u0e19\\u0e34\\u0e04\\u0e17\\u0e35\\u0e48\\u0e43\\u0e0a\\u0e49\\u0e01\\u0e31\\u0e19\\u0e2d\\u0e22" 
4662         "\\u0e39\\u0e48\\u0e17\\u0e31\\u0e48\\u0e27\\u0e44\\u0e1b."; 
4664     const char *latinText 
=      
4665         "doy ph\\u1ee5\\u0304\\u0302n \\u1e6d\\u0304h\\u0101n l\\u00e6\\u0302w, khxmphiwtexr\\u0312 ca ke\\u012b\\u0300" 
4666         "ywk\\u0304\\u0125xng k\\u1ea1b re\\u1ee5\\u0304\\u0300xng k\\u0304hxng t\\u1ea1wlek\\u0304h. khxmphiwtexr" 
4667         "\\u0312 c\\u1ea1d k\\u0115b t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r l\\u00e6a x\\u1ea1kk\\u0304h ra x\\u1ee5\\u0304" 
4668         "\\u0300n\\u00ab doy k\\u0101r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304\\u0131\\u0302 s\\u0304" 
4669         "\\u1ea3h\\u0304r\\u1ea1b t\\u00e6\\u0300la t\\u1ea1w. k\\u0300xn h\\u0304n\\u0302\\u0101 th\\u012b\\u0300\\u0301" 
4670         " Unicode ca t\\u0304h\\u016bk s\\u0304r\\u0302\\u0101ng k\\u0304h\\u1ee5\\u0302n, d\\u1ecb\\u0302 m\\u012b " 
4671         "rabb encoding xy\\u016b\\u0300 h\\u0304l\\u0101y r\\u0302xy rabb s\\u0304\\u1ea3h\\u0304r\\u1ea1b k\\u0101" 
4672         "r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304el\\u0300\\u0101 n\\u012b\\u0302. m\\u1ecb\\u0300m" 
4673         "\\u012b encoding d\\u0131 th\\u012b\\u0300 m\\u012b c\\u1ea3nwn t\\u1ea1w x\\u1ea1kk\\u0304hra m\\u0101k p" 
4674         "he\\u012byng phx: yk t\\u1ea1wx\\u1ef3\\u0101ng ch\\u00e8n, c\\u0304heph\\u0101a n\\u0131 kl\\u00f9m s\\u0304" 
4675         "h\\u0304p\\u0323h\\u0101ph yurop phe\\u012byng h\\u0304\\u00e6\\u0300ng de\\u012byw k\\u0306 t\\u0302xngk\\u0101" 
4676         "r h\\u0304l\\u0101y encoding n\\u0131 k\\u0101r khrxbkhlum thuk p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 n\\u0131" 
4677         " kl\\u00f9m. h\\u0304r\\u1ee5\\u0304x m\\u00e6\\u0302t\\u00e6\\u0300 n\\u0131 p\\u0323h\\u0101s\\u0304\\u02b9" 
4678         "\\u0101 de\\u012b\\u0300yw ch\\u00e8n p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 x\\u1ea1ngkvs\\u0304\\u02b9 k\\u0306" 
4679         " m\\u1ecb\\u0300m\\u012b encoding d\\u0131 th\\u012b\\u0300 phe\\u012byng phx s\\u0304\\u1ea3h\\u0304r\\u1ea1" 
4680         "b thuk t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r, kher\\u1ee5\\u0304\\u0300xngh\\u0304m\\u0101y wrrkh txn l\\u00e6" 
4681         "a s\\u0304\\u1ea1\\u1ef5l\\u1ea1ks\\u0304\\u02b9\\u1e47\\u0312 th\\u0101ng thekhnikh th\\u012b\\u0300 ch\\u0131" 
4682         "\\u0302 k\\u1ea1n xy\\u016b\\u0300 th\\u1ea1\\u0300wp\\u1ecb."; 
4685     UnicodeString  
xlitText(thaiText
); 
4686     xlitText 
= xlitText
.unescape(); 
4687     tr
->transliterate(xlitText
); 
4689     UnicodeString 
expectedText(latinText
); 
4690     expectedText 
= expectedText
.unescape(); 
4691     expect(*tr
, xlitText
, expectedText
); 
4698 //====================================================================== 
4700 //====================================================================== 
4701 void TransliteratorTest::expectT(const UnicodeString
& id
, 
4702                                  const UnicodeString
& source
, 
4703                                  const UnicodeString
& expectedResult
) { 
4704     UErrorCode ec 
= U_ZERO_ERROR
; 
4706     Transliterator 
*t 
= Transliterator::createInstance(id
, UTRANS_FORWARD
, pe
, ec
); 
4707     if (U_FAILURE(ec
)) { 
4708         errln((UnicodeString
)"FAIL: Could not create " + id 
+ " -  " + u_errorName(ec
)); 
4712     expect(*t
, source
, expectedResult
); 
4716 void TransliteratorTest::reportParseError(const UnicodeString
& message
, 
4717                                           const UParseError
& parseError
, 
4718                                           const UErrorCode
& status
) { 
4720           /*", parse error " + parseError.code +*/ 
4721           ", line " + parseError
.line 
+ 
4722           ", offset " + parseError
.offset 
+ 
4723           ", pre-context " + prettify(parseError
.preContext
, TRUE
) + 
4724           ", post-context " + prettify(parseError
.postContext
,TRUE
) + 
4725           ", Error: " + u_errorName(status
)); 
4728 void TransliteratorTest::expect(const UnicodeString
& rules
, 
4729                                 const UnicodeString
& source
, 
4730                                 const UnicodeString
& expectedResult
, 
4731                                 UTransPosition 
*pos
) { 
4732     expect("<ID>", rules
, source
, expectedResult
, pos
); 
4735 void TransliteratorTest::expect(const UnicodeString
& id
, 
4736                                 const UnicodeString
& rules
, 
4737                                 const UnicodeString
& source
, 
4738                                 const UnicodeString
& expectedResult
, 
4739                                 UTransPosition 
*pos
) { 
4740     UErrorCode status 
= U_ZERO_ERROR
; 
4741     UParseError parseError
; 
4742     Transliterator
* t 
= Transliterator::createFromRules(id
, rules
, UTRANS_FORWARD
, parseError
, status
); 
4743     if (U_FAILURE(status
)) { 
4744         reportParseError(UnicodeString("Couldn't create transliterator from ") + rules
, parseError
, status
); 
4746         expect(*t
, source
, expectedResult
, pos
); 
4751 void TransliteratorTest::expect(const Transliterator
& t
, 
4752                                 const UnicodeString
& source
, 
4753                                 const UnicodeString
& expectedResult
, 
4754                                 const Transliterator
& reverseTransliterator
) { 
4755     expect(t
, source
, expectedResult
); 
4756     expect(reverseTransliterator
, expectedResult
, source
); 
4759 void TransliteratorTest::expect(const Transliterator
& t
, 
4760                                 const UnicodeString
& source
, 
4761                                 const UnicodeString
& expectedResult
, 
4762                                 UTransPosition 
*pos
) { 
4764         UnicodeString 
result(source
); 
4765         t
.transliterate(result
); 
4766         expectAux(t
.getID() + ":String", source
, result
, expectedResult
); 
4768     UTransPosition index
={0, 0, 0, 0}; 
4773     UnicodeString 
rsource(source
); 
4775         t
.transliterate(rsource
); 
4777         // Do it all at once -- below we do it incrementally 
4778         t
.finishTransliteration(rsource
, *pos
); 
4780     expectAux(t
.getID() + ":Replaceable", source
, rsource
, expectedResult
); 
4782     // Test keyboard (incremental) transliteration -- this result 
4783     // must be the same after we finalize (see below). 
4788         formatInput(log
, rsource
, index
); 
4790         UErrorCode status 
= U_ZERO_ERROR
; 
4791         t
.transliterate(rsource
, index
, status
); 
4792         formatInput(log
, rsource
, index
); 
4794         for (int32_t i
=0; i
<source
.length(); ++i
) { 
4798             log
.append(source
.charAt(i
)).append(" -> "); 
4799             UErrorCode status 
= U_ZERO_ERROR
; 
4800             t
.transliterate(rsource
, index
, source
.charAt(i
), status
); 
4801             formatInput(log
, rsource
, index
); 
4805     // As a final step in keyboard transliteration, we must call 
4806     // transliterate to finish off any pending partial matches that 
4807     // were waiting for more input. 
4808     t
.finishTransliteration(rsource
, index
); 
4809     log
.append(" => ").append(rsource
); 
4811     expectAux(t
.getID() + ":Keyboard", log
, 
4812               rsource 
== expectedResult
, 
4818  * @param appendTo result is appended to this param. 
4819  * @param input the string being transliterated 
4820  * @param pos the index struct 
4822 UnicodeString
& TransliteratorTest::formatInput(UnicodeString 
&appendTo
, 
4823                                                const UnicodeString
& input
, 
4824                                                const UTransPosition
& pos
) { 
4825     // Output a string of the form aaa{bbb|ccc|ddd}eee, where 
4826     // the {} indicate the context start and limit, and the || 
4827     // indicate the start and limit. 
4828     if (0 <= pos
.contextStart 
&& 
4829         pos
.contextStart 
<= pos
.start 
&& 
4830         pos
.start 
<= pos
.limit 
&& 
4831         pos
.limit 
<= pos
.contextLimit 
&& 
4832         pos
.contextLimit 
<= input
.length()) { 
4834         UnicodeString a
, b
, c
, d
, e
; 
4835         input
.extractBetween(0, pos
.contextStart
, a
); 
4836         input
.extractBetween(pos
.contextStart
, pos
.start
, b
); 
4837         input
.extractBetween(pos
.start
, pos
.limit
, c
); 
4838         input
.extractBetween(pos
.limit
, pos
.contextLimit
, d
); 
4839         input
.extractBetween(pos
.contextLimit
, input
.length(), e
); 
4840         appendTo
.append(a
).append((UChar
)123/*{*/).append(b
). 
4841             append((UChar
)PIPE
).append(c
).append((UChar
)PIPE
).append(d
). 
4842             append((UChar
)125/*}*/).append(e
); 
4844         appendTo
.append((UnicodeString
)"INVALID UTransPosition {cs=" + 
4845                         pos
.contextStart 
+ ", s=" + pos
.start 
+ ", l=" + 
4846                         pos
.limit 
+ ", cl=" + pos
.contextLimit 
+ "} on " + 
4852 void TransliteratorTest::expectAux(const UnicodeString
& tag
, 
4853                                    const UnicodeString
& source
, 
4854                                    const UnicodeString
& result
, 
4855                                    const UnicodeString
& expectedResult
) { 
4856     expectAux(tag
, source 
+ " -> " + result
, 
4857               result 
== expectedResult
, 
4861 void TransliteratorTest::expectAux(const UnicodeString
& tag
, 
4862                                    const UnicodeString
& summary
, UBool pass
, 
4863                                    const UnicodeString
& expectedResult
) { 
4865         logln(UnicodeString("(")+tag
+") " + prettify(summary
)); 
4867         dataerrln(UnicodeString("FAIL: (")+tag
+") " 
4869               + ", expected " + prettify(expectedResult
)); 
4873 #endif /* #if !UCONFIG_NO_TRANSLITERATION */