icuSources/test/intltest/transtst.cpp

   1 /*
   2 **********************************************************************
   3 *   Copyright (C) 1999-2008, International Business Machines
   4 *   Corporation and others.  All Rights Reserved.
   5 **********************************************************************
   6 *   Date        Name        Description
   7 *   11/10/99    aliu        Creation.
   8 **********************************************************************
   9 */
  10
  11 #include "unicode/utypes.h"
  12
  13 #if !UCONFIG_NO_TRANSLITERATION
  14
  15 #include "transtst.h"
  16 #include "unicode/locid.h"
  17 #include "unicode/dtfmtsym.h"
  18 #include "unicode/normlzr.h"
  19 #include "unicode/translit.h"
  20 #include "unicode/uchar.h"
  21 #include "unicode/unifilt.h"
  22 #include "unicode/uniset.h"
  23 #include "unicode/ustring.h"
  24 #include "unicode/usetiter.h"
  25 #include "unicode/uscript.h"
  26 #include "cpdtrans.h"
  27 #include "nultrans.h"
  28 #include "rbt.h"
  29 #include "rbt_pars.h"
  30 #include "anytrans.h"
  31 #include "esctrn.h"
  32 #include "name2uni.h"
  33 #include "nortrans.h"
  34 #include "remtrans.h"
  35 #include "titletrn.h"
  36 #include "tolowtrn.h"
  37 #include "toupptrn.h"
  38 #include "unesctrn.h"
  39 #include "uni2name.h"
  40 #include "cstring.h"
  41 #include "cmemory.h"
  42 #include <stdio.h>
  43
  44 /***********************************************************************
  45
  46                      HOW TO USE THIS TEST FILE
  47                                -or-
  48                   How I developed on two platforms
  49                 without losing (too much of) my mind
  50
  51
  52 1. Add new tests by copying/pasting/changing existing tests.  On Java,
  53    any public void method named Test...() taking no parameters becomes
  54    a test.  On C++, you need to modify the header and add a line to
  55    the runIndexedTest() dispatch method.
  56
  57 2. Make liberal use of the expect() method; it is your friend.
  58
  59 3. The tests in this file exactly match those in a sister file on the
  60    other side.  The two files are:
  61
  62    icu4j:  src/com/ibm/test/translit/TransliteratorTest.java
  63    icu4c:  source/test/intltest/transtst.cpp
  64
  65                   ==> THIS IS THE IMPORTANT PART <==
  66
  67    When you add a test in this file, add it in TransliteratorTest.java
  68    too.  Give it the same name and put it in the same relative place.
  69    This makes maintenance a lot simpler for any poor soul who ends up
  70    trying to synchronize the tests between icu4j and icu4c.
  71
  72 4. If you MUST enter a test that is NOT paralleled in the sister file,
  73    then add it in the special non-mirrored section.  These are
  74    labeled
  75
  76      "icu4j ONLY"
  77
  78    or
  79
  80      "icu4c ONLY"
  81
  82    Make sure you document the reason the test is here and not there.
  83
  84
  85 Thank you.
  86 The Management
  87 ***********************************************************************/
  88
  89 // Define character constants thusly to be EBCDIC-friendly
  90 enum {
  91     LEFT_BRACE=((UChar)0x007B), /*{*/
  92     PIPE      =((UChar)0x007C), /*|*/
  93     ZERO      =((UChar)0x0030), /*0*/
  94     UPPER_A   =((UChar)0x0041)  /*A*/
  95 };
  96
  97 TransliteratorTest::TransliteratorTest()
  98 :   DESERET_DEE((UChar32)0x10414),
  99     DESERET_dee((UChar32)0x1043C)
 100 {
 101 }
 102
 103 TransliteratorTest::~TransliteratorTest() {}
 104
 105 void
 106 TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
 107                                    const char* &name, char* /*par*/) {
 108     switch (index) {
 109         TESTCASE(0,TestInstantiation);
 110         TESTCASE(1,TestSimpleRules);
 111         TESTCASE(2,TestRuleBasedInverse);
 112         TESTCASE(3,TestKeyboard);
 113         TESTCASE(4,TestKeyboard2);
 114         TESTCASE(5,TestKeyboard3);
 115         TESTCASE(6,TestArabic);
 116         TESTCASE(7,TestCompoundKana);
 117         TESTCASE(8,TestCompoundHex);
 118         TESTCASE(9,TestFiltering);
 119         TESTCASE(10,TestInlineSet);
 120         TESTCASE(11,TestPatternQuoting);
 121         TESTCASE(12,TestJ277);
 122         TESTCASE(13,TestJ243);
 123         TESTCASE(14,TestJ329);
 124         TESTCASE(15,TestSegments);
 125         TESTCASE(16,TestCursorOffset);
 126         TESTCASE(17,TestArbitraryVariableValues);
 127         TESTCASE(18,TestPositionHandling);
 128         TESTCASE(19,TestHiraganaKatakana);
 129         TESTCASE(20,TestCopyJ476);
 130         TESTCASE(21,TestAnchors);
 131         TESTCASE(22,TestInterIndic);
 132         TESTCASE(23,TestFilterIDs);
 133         TESTCASE(24,TestCaseMap);
 134         TESTCASE(25,TestNameMap);
 135         TESTCASE(26,TestLiberalizedID);
 136         TESTCASE(27,TestCreateInstance);
 137         TESTCASE(28,TestNormalizationTransliterator);
 138         TESTCASE(29,TestCompoundRBT);
 139         TESTCASE(30,TestCompoundFilter);
 140         TESTCASE(31,TestRemove);
 141         TESTCASE(32,TestToRules);
 142         TESTCASE(33,TestContext);
 143         TESTCASE(34,TestSupplemental);
 144         TESTCASE(35,TestQuantifier);
 145         TESTCASE(36,TestSTV);
 146         TESTCASE(37,TestCompoundInverse);
 147         TESTCASE(38,TestNFDChainRBT);
 148         TESTCASE(39,TestNullInverse);
 149         TESTCASE(40,TestAliasInverseID);
 150         TESTCASE(41,TestCompoundInverseID);
 151         TESTCASE(42,TestUndefinedVariable);
 152         TESTCASE(43,TestEmptyContext);
 153         TESTCASE(44,TestCompoundFilterID);
 154         TESTCASE(45,TestPropertySet);
 155         TESTCASE(46,TestNewEngine);
 156         TESTCASE(47,TestQuantifiedSegment);
 157         TESTCASE(48,TestDevanagariLatinRT);
 158         TESTCASE(49,TestTeluguLatinRT);
 159         TESTCASE(50,TestCompoundLatinRT);
 160         TESTCASE(51,TestSanskritLatinRT);
 161         TESTCASE(52,TestLocaleInstantiation);
 162         TESTCASE(53,TestTitleAccents);
 163         TESTCASE(54,TestLocaleResource);
 164         TESTCASE(55,TestParseError);
 165         TESTCASE(56,TestOutputSet);
 166         TESTCASE(57,TestVariableRange);
 167         TESTCASE(58,TestInvalidPostContext);
 168         TESTCASE(59,TestIDForms);
 169         TESTCASE(60,TestToRulesMark);
 170         TESTCASE(61,TestEscape);
 171         TESTCASE(62,TestAnchorMasking);
 172         TESTCASE(63,TestDisplayName);
 173         TESTCASE(64,TestSpecialCases);
 174         TESTCASE(65,TestIncrementalProgress);
 175         TESTCASE(66,TestSurrogateCasing);
 176         TESTCASE(67,TestFunction);
 177         TESTCASE(68,TestInvalidBackRef);
 178         TESTCASE(69,TestMulticharStringSet);
 179         TESTCASE(70,TestUserFunction);
 180         TESTCASE(71,TestAnyX);
 181         TESTCASE(72,TestSourceTargetSet);
 182         TESTCASE(73,TestGurmukhiDevanagari);
 183         TESTCASE(74,TestRuleWhitespace);
 184         TESTCASE(75,TestAllCodepoints);
 185         TESTCASE(76,TestBoilerplate);
 186         TESTCASE(77,TestAlternateSyntax);
 187         TESTCASE(78,TestBeginEnd);
 188         TESTCASE(79,TestBeginEndToRules);
 189         TESTCASE(80,TestRegisterAlias);
 190         TESTCASE(81,TestRuleStripping);
 191         TESTCASE(82,TestHalfwidthFullwidth);
 192         TESTCASE(83,TestThai);
 193         default: name = ""; break;
 194     }
 195 }
 196
 197 static const UVersionInfo ICU_39 = {3,9,4,0};
 198 /**
 199  * Make sure every system transliterator can be instantiated.
 200  *
 201  * ALSO test that the result of toRules() for each rule is a valid
 202  * rule.  Do this here so we don't have to have another test that
 203  * instantiates everything as well.
 204  */
 205 void TransliteratorTest::TestInstantiation() {
 206     UErrorCode ec = U_ZERO_ERROR;
 207     StringEnumeration* avail = Transliterator::getAvailableIDs(ec);
 208     assertSuccess("getAvailableIDs()", ec);
 209     assertTrue("getAvailableIDs()!=NULL", avail!=NULL);
 210     int32_t n = Transliterator::countAvailableIDs();
 211     assertTrue("getAvailableIDs().count()==countAvailableIDs()",
 212                avail->count(ec) == n);
 213     assertSuccess("count()", ec);
 214     UnicodeString name;
 215     for (int32_t i=0; i<n; ++i) {
 216         const UnicodeString& id = *avail->snext(ec);
 217         if (!assertSuccess("snext()", ec) ||
 218             !assertTrue("snext()!=NULL", (&id)!=NULL, TRUE)) {
 219             break;
 220         }
 221         UnicodeString id2 = Transliterator::getAvailableID(i);
 222         if (id.length() < 1) {
 223             errln(UnicodeString("FAIL: getAvailableID(") +
 224                   i + ") returned empty string");
 225             continue;
 226         }
 227         if (id != id2) {
 228             errln(UnicodeString("FAIL: getAvailableID(") +
 229                   i + ") != getAvailableIDs().snext()");
 230             continue;
 231         }
 232         UParseError parseError;
 233         UErrorCode status = U_ZERO_ERROR;
 234         Transliterator* t = Transliterator::createInstance(id,
 235                               UTRANS_FORWARD, parseError,status);
 236         name.truncate(0);
 237         Transliterator::getDisplayName(id, name);
 238         if (t == 0) {
 239             errln(UnicodeString("FAIL: Couldn't create ") + id +
 240                   /*", parse error " + parseError.code +*/
 241                   ", line " + parseError.line +
 242                   ", offset " + parseError.offset +
 243                   ", pre-context " + prettify(parseError.preContext, TRUE) +
 244                   ", post-context " +prettify(parseError.postContext,TRUE) +
 245                   ", Error: " + u_errorName(status));
 246             // When createInstance fails, it deletes the failing
 247             // entry from the available ID list.  We detect this
 248             // here by looking for a change in countAvailableIDs.
 249             int32_t nn = Transliterator::countAvailableIDs();
 250             if (nn == (n - 1)) {
 251                 n = nn;
 252                 --i; // Compensate for deleted entry
 253             }
 254         } else {
 255             logln(UnicodeString("OK: ") + name + " (" + id + ")");
 256
 257             // Now test toRules
 258             UnicodeString rules;
 259             t->toRules(rules, TRUE);
 260             Transliterator *u = Transliterator::createFromRules("x",
 261                                     rules, UTRANS_FORWARD, parseError,status);
 262             if (u == 0) {
 263                 errln(UnicodeString("FAIL: ") + id +
 264                       ".createFromRules() => bad rules" +
 265                       /*", parse error " + parseError.code +*/
 266                       ", line " + parseError.line +
 267                       ", offset " + parseError.offset +
 268                       ", context " + prettify(parseError.preContext, TRUE) +
 269                       ", rules: " + prettify(rules, TRUE));
 270             } else {
 271                 delete u;
 272             }
 273             delete t;
 274         }
 275     }
 276     assertTrue("snext()==NULL", avail->snext(ec)==NULL);
 277     assertSuccess("snext()", ec);
 278     delete avail;
 279
 280     // Now test the failure path
 281     UParseError parseError;
 282     UErrorCode status = U_ZERO_ERROR;
 283     UnicodeString id("<Not a valid Transliterator ID>");
 284     Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
 285     if (t != 0) {
 286         errln("FAIL: " + id + " returned a transliterator");
 287         delete t;
 288     } else {
 289         logln("OK: Bogus ID handled properly");
 290     }
 291 }
 292
 293 void TransliteratorTest::TestSimpleRules(void) {
 294     /* Example: rules 1. ab>x|y
 295      *                2. yc>z
 296      *
 297      * []|eabcd  start - no match, copy e to tranlated buffer
 298      * [e]|abcd  match rule 1 - copy output & adjust cursor
 299      * [ex|y]cd  match rule 2 - copy output & adjust cursor
 300      * [exz]|d   no match, copy d to transliterated buffer
 301      * [exzd]|   done
 302      */
 303     expect(UnicodeString("ab>x|y;", "") +
 304            "yc>z",
 305            "eabcd", "exzd");
 306
 307     /* Another set of rules:
 308      *    1. ab>x|yzacw
 309      *    2. za>q
 310      *    3. qc>r
 311      *    4. cw>n
 312      *
 313      * []|ab       Rule 1
 314      * [x|yzacw]   No match
 315      * [xy|zacw]   Rule 2
 316      * [xyq|cw]    Rule 4
 317      * [xyqn]|     Done
 318      */
 319     expect(UnicodeString("ab>x|yzacw;") +
 320            "za>q;" +
 321            "qc>r;" +
 322            "cw>n",
 323            "ab", "xyqn");
 324
 325     /* Test categories
 326      */
 327     UErrorCode status = U_ZERO_ERROR;
 328     UParseError parseError;
 329     Transliterator *t = Transliterator::createFromRules(
 330         "<ID>",
 331         UnicodeString("$dummy=").append((UChar)0xE100) +
 332         UnicodeString(";"
 333                       "$vowel=[aeiouAEIOU];"
 334                       "$lu=[:Lu:];"
 335                       "$vowel } $lu > '!';"
 336                       "$vowel > '&';"
 337                       "'!' { $lu > '^';"
 338                       "$lu > '*';"
 339                       "a > ERROR", ""),
 340         UTRANS_FORWARD, parseError,
 341         status);
 342     if (U_FAILURE(status)) {
 343         errln("FAIL: RBT constructor failed");
 344         return;
 345     }
 346     expect(*t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
 347     delete t;
 348 }
 349
 350 /**
 351  * Test inline set syntax and set variable syntax.
 352  */
 353 void TransliteratorTest::TestInlineSet(void) {
 354     expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz");
 355     expect("a[0-9]b > qrs", "1a7b9", "1qrs9");
 356
 357     expect(UnicodeString(
 358            "$digit = [0-9];"
 359            "$alpha = [a-zA-Z];"
 360            "$alphanumeric = [$digit $alpha];" // ***
 361            "$special = [^$alphanumeric];"     // ***
 362            "$alphanumeric > '-';"
 363            "$special > '*';", ""),
 364
 365            "thx-1138", "---*----");
 366 }
 367
 368 /**
 369  * Create some inverses and confirm that they work.  We have to be
 370  * careful how we do this, since the inverses will not be true
 371  * inverses -- we can't throw any random string at the composition
 372  * of the transliterators and expect the identity function.  F x
 373  * F' != I.  However, if we are careful about the input, we will
 374  * get the expected results.
 375  */
 376 void TransliteratorTest::TestRuleBasedInverse(void) {
 377     UnicodeString RULES =
 378         UnicodeString("abc>zyx;") +
 379         "ab>yz;" +
 380         "bc>zx;" +
 381         "ca>xy;" +
 382         "a>x;" +
 383         "b>y;" +
 384         "c>z;" +
 385
 386         "abc<zyx;" +
 387         "ab<yz;" +
 388         "bc<zx;" +
 389         "ca<xy;" +
 390         "a<x;" +
 391         "b<y;" +
 392         "c<z;" +
 393
 394         "";
 395
 396     const char* DATA[] = {
 397         // Careful here -- random strings will not work.  If we keep
 398         // the left side to the domain and the right side to the range
 399         // we will be okay though (left, abc; right xyz).
 400         "a", "x",
 401         "abcacab", "zyxxxyy",
 402         "caccb", "xyzzy",
 403     };
 404
 405     int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
 406
 407     UErrorCode status = U_ZERO_ERROR;
 408     UParseError parseError;
 409     Transliterator *fwd = Transliterator::createFromRules("<ID>", RULES,
 410                                 UTRANS_FORWARD, parseError, status);
 411     Transliterator *rev = Transliterator::createFromRules("<ID>", RULES,
 412                                 UTRANS_REVERSE, parseError, status);
 413     if (U_FAILURE(status)) {
 414         errln("FAIL: RBT constructor failed");
 415         return;
 416     }
 417     for (int32_t i=0; i<DATA_length; i+=2) {
 418         expect(*fwd, DATA[i], DATA[i+1]);
 419         expect(*rev, DATA[i+1], DATA[i]);
 420     }
 421     delete fwd;
 422     delete rev;
 423 }
 424
 425 /**
 426  * Basic test of keyboard.
 427  */
 428 void TransliteratorTest::TestKeyboard(void) {
 429     UParseError parseError;
 430     UErrorCode status = U_ZERO_ERROR;
 431     Transliterator *t = Transliterator::createFromRules("<ID>",
 432                               UnicodeString("psch>Y;")
 433                               +"ps>y;"
 434                               +"ch>x;"
 435                               +"a>A;",
 436                               UTRANS_FORWARD, parseError,
 437                               status);
 438     if (U_FAILURE(status)) {
 439         errln("FAIL: RBT constructor failed");
 440         return;
 441     }
 442     const char* DATA[] = {
 443         // insertion, buffer
 444         "a", "A",
 445         "p", "Ap",
 446         "s", "Aps",
 447         "c", "Apsc",
 448         "a", "AycA",
 449         "psch", "AycAY",
 450         0, "AycAY", // null means finishKeyboardTransliteration
 451     };
 452
 453     keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
 454     delete t;
 455 }
 456
 457 /**
 458  * Basic test of keyboard with cursor.
 459  */
 460 void TransliteratorTest::TestKeyboard2(void) {
 461     UParseError parseError;
 462     UErrorCode status = U_ZERO_ERROR;
 463     Transliterator *t = Transliterator::createFromRules("<ID>",
 464                               UnicodeString("ych>Y;")
 465                               +"ps>|y;"
 466                               +"ch>x;"
 467                               +"a>A;",
 468                               UTRANS_FORWARD, parseError,
 469                               status);
 470     if (U_FAILURE(status)) {
 471         errln("FAIL: RBT constructor failed");
 472         return;
 473     }
 474     const char* DATA[] = {
 475         // insertion, buffer
 476         "a", "A",
 477         "p", "Ap",
 478         "s", "Aps", // modified for rollback - "Ay",
 479         "c", "Apsc", // modified for rollback - "Ayc",
 480         "a", "AycA",
 481         "p", "AycAp",
 482         "s", "AycAps", // modified for rollback - "AycAy",
 483         "c", "AycApsc", // modified for rollback - "AycAyc",
 484         "h", "AycAY",
 485         0, "AycAY", // null means finishKeyboardTransliteration
 486     };
 487
 488     keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
 489     delete t;
 490 }
 491
 492 /**
 493  * Test keyboard transliteration with back-replacement.
 494  */
 495 void TransliteratorTest::TestKeyboard3(void) {
 496     // We want th>z but t>y.  Furthermore, during keyboard
 497     // transliteration we want t>y then yh>z if t, then h are
 498     // typed.
 499     UnicodeString RULES("t>|y;"
 500                         "yh>z;");
 501
 502     const char* DATA[] = {
 503         // Column 1: characters to add to buffer (as if typed)
 504         // Column 2: expected appearance of buffer after
 505         //           keyboard xliteration.
 506         "a", "a",
 507         "b", "ab",
 508         "t", "abt", // modified for rollback - "aby",
 509         "c", "abyc",
 510         "t", "abyct", // modified for rollback - "abycy",
 511         "h", "abycz",
 512         0, "abycz", // null means finishKeyboardTransliteration
 513     };
 514
 515     UParseError parseError;
 516     UErrorCode status = U_ZERO_ERROR;
 517     Transliterator *t = Transliterator::createFromRules("<ID>", RULES, UTRANS_FORWARD, parseError, status);
 518     if (U_FAILURE(status)) {
 519         errln("FAIL: RBT constructor failed");
 520         return;
 521     }
 522     keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
 523     delete t;
 524 }
 525
 526 void TransliteratorTest::keyboardAux(const Transliterator& t,
 527                                      const char* DATA[], int32_t DATA_length) {
 528     UErrorCode status = U_ZERO_ERROR;
 529     UTransPosition index={0, 0, 0, 0};
 530     UnicodeString s;
 531     for (int32_t i=0; i<DATA_length; i+=2) {
 532         UnicodeString log;
 533         if (DATA[i] != 0) {
 534             log = s + " + "
 535                 + DATA[i]
 536                 + " -> ";
 537             t.transliterate(s, index, DATA[i], status);
 538         } else {
 539             log = s + " => ";
 540             t.finishTransliteration(s, index);
 541         }
 542         // Show the start index '{' and the cursor '|'
 543         UnicodeString a, b, c;
 544         s.extractBetween(0, index.contextStart, a);
 545         s.extractBetween(index.contextStart, index.start, b);
 546         s.extractBetween(index.start, s.length(), c);
 547         log.append(a).
 548             append((UChar)LEFT_BRACE).
 549             append(b).
 550             append((UChar)PIPE).
 551             append(c);
 552         if (s == DATA[i+1] && U_SUCCESS(status)) {
 553             logln(log);
 554         } else {
 555             errln(UnicodeString("FAIL: ") + log + ", expected " + DATA[i+1]);
 556         }
 557     }
 558 }
 559
 560 void TransliteratorTest::TestArabic(void) {
 561 // Test disabled for 2.0 until new Arabic transliterator can be written.
 562 //    /*
 563 //    const char* DATA[] = {
 564 //        "Arabic", "\u062a\u062a\u0645\u062a\u0639\u0020"+
 565 //                  "\u0627\u0644\u0644\u063a\u0629\u0020"+
 566 //                  "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629\u0020"+
 567 //                  "\u0628\u0628\u0646\u0638\u0645\u0020"+
 568 //                  "\u0643\u062a\u0627\u0628\u0628\u064a\u0629\u0020"+
 569 //                  "\u062c\u0645\u064a\u0644\u0629",
 570 //    };
 571 //    */
 572 //
 573 //    UChar ar_raw[] = {
 574 //        0x062a, 0x062a, 0x0645, 0x062a, 0x0639, 0x0020, 0x0627,
 575 //        0x0644, 0x0644, 0x063a, 0x0629, 0x0020, 0x0627, 0x0644,
 576 //        0x0639, 0x0631, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
 577 //        0x0628, 0x0628, 0x0646, 0x0638, 0x0645, 0x0020, 0x0643,
 578 //        0x062a, 0x0627, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
 579 //        0x062c, 0x0645, 0x064a, 0x0644, 0x0629, 0
 580 //    };
 581 //    UnicodeString ar(ar_raw);
 582 //    UErrorCode status=U_ZERO_ERROR;
 583 //    UParseError parseError;
 584 //    Transliterator *t = Transliterator::createInstance("Latin-Arabic", UTRANS_FORWARD, parseError, status);
 585 //    if (t == 0) {
 586 //        errln("FAIL: createInstance failed");
 587 //        return;
 588 //    }
 589 //    expect(*t, "Arabic", ar);
 590 //    delete t;
 591 }
 592
 593 /**
 594  * Compose the Kana transliterator forward and reverse and try
 595  * some strings that should come out unchanged.
 596  */
 597 void TransliteratorTest::TestCompoundKana(void) {
 598     UParseError parseError;
 599     UErrorCode status = U_ZERO_ERROR;
 600     Transliterator* t = Transliterator::createInstance("Latin-Hiragana;Hiragana-Latin", UTRANS_FORWARD, parseError, status);
 601     if (t == 0) {
 602         errln("FAIL: construction of Latin-Hiragana;Hiragana-Latin failed");
 603     } else {
 604         expect(*t, "aaaaa", "aaaaa");
 605         delete t;
 606     }
 607 }
 608
 609 /**
 610  * Compose the hex transliterators forward and reverse.
 611  */
 612 void TransliteratorTest::TestCompoundHex(void) {
 613     UParseError parseError;
 614     UErrorCode status = U_ZERO_ERROR;
 615     Transliterator* a = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
 616     Transliterator* b = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, parseError, status);
 617     Transliterator* transab[] = { a, b };
 618     Transliterator* transba[] = { b, a };
 619     if (a == 0 || b == 0) {
 620         errln("FAIL: construction failed");
 621         delete a;
 622         delete b;
 623         return;
 624     }
 625     // Do some basic tests of a
 626     expect(*a, "01", UnicodeString("\\u0030\\u0031", ""));
 627     // Do some basic tests of b
 628     expect(*b, UnicodeString("\\u0030\\u0031", ""), "01");
 629
 630     Transliterator* ab = new CompoundTransliterator(transab, 2);
 631     UnicodeString s("abcde", "");
 632     expect(*ab, s, s);
 633
 634     UnicodeString str(s);
 635     a->transliterate(str);
 636     Transliterator* ba = new CompoundTransliterator(transba, 2);
 637     expect(*ba, str, str);
 638
 639     delete ab;
 640     delete ba;
 641     delete a;
 642     delete b;
 643 }
 644
 645 int gTestFilterClassID = 0;
 646 /**
 647  * Used by TestFiltering().
 648  */
 649 class TestFilter : public UnicodeFilter {
 650     virtual UnicodeFunctor* clone() const {
 651         return new TestFilter(*this);
 652     }
 653     virtual UBool contains(UChar32 c) const {
 654         return c != (UChar)0x0063 /*c*/;
 655     }
 656     // Stubs
 657     virtual UnicodeString& toPattern(UnicodeString& result,
 658                                      UBool /*escapeUnprintable*/) const {
 659         return result;
 660     }
 661     virtual UBool matchesIndexValue(uint8_t /*v*/) const {
 662         return FALSE;
 663     }
 664     virtual void addMatchSetTo(UnicodeSet& /*toUnionTo*/) const {}
 665 public:
 666     UClassID getDynamicClassID() const { return (UClassID)&gTestFilterClassID; }
 667 };
 668
 669 /**
 670  * Do some basic tests of filtering.
 671  */
 672 void TransliteratorTest::TestFiltering(void) {
 673     UParseError parseError;
 674     UErrorCode status = U_ZERO_ERROR;
 675     Transliterator* hex = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
 676     if (hex == 0) {
 677         errln("FAIL: createInstance(Any-Hex) failed");
 678         return;
 679     }
 680     hex->adoptFilter(new TestFilter());
 681     UnicodeString s("abcde");
 682     hex->transliterate(s);
 683     UnicodeString exp("\\u0061\\u0062c\\u0064\\u0065", "");
 684     if (s == exp) {
 685         logln(UnicodeString("Ok:   \"") + exp + "\"");
 686     } else {
 687         logln(UnicodeString("FAIL: \"") + s + "\", wanted \"" + exp + "\"");
 688     }
 689
 690     // ICU4C ONLY. Do not find Transliterator.orphanFilter() in ICU4J.
 691     UnicodeFilter *f = hex->orphanFilter();
 692     if (f == NULL){
 693         errln("FAIL: orphanFilter() should get a UnicodeFilter");
 694     } else {
 695         delete f;
 696     }
 697     delete hex;
 698 }
 699
 700 /**
 701  * Test anchors
 702  */
 703 void TransliteratorTest::TestAnchors(void) {
 704     expect(UnicodeString("^a  > 0; a$ > 2 ; a > 1;", ""),
 705            "aaa",
 706            "012");
 707     expect(UnicodeString("$s=[z$]; $s{a>0; a}$s>2; a>1;", ""),
 708            "aaa",
 709            "012");
 710     expect(UnicodeString("^ab  > 01 ;"
 711            " ab  > |8 ;"
 712            "  b  > k ;"
 713            " 8x$ > 45 ;"
 714            " 8x  > 77 ;", ""),
 715
 716            "ababbabxabx",
 717            "018k7745");
 718     expect(UnicodeString("$s = [z$] ;"
 719            "$s{ab    > 01 ;"
 720            "   ab    > |8 ;"
 721            "    b    > k ;"
 722            "   8x}$s > 45 ;"
 723            "   8x    > 77 ;", ""),
 724
 725            "abzababbabxzabxabx",
 726            "01z018k45z01x45");
 727 }
 728
 729 /**
 730  * Test pattern quoting and escape mechanisms.
 731  */
 732 void TransliteratorTest::TestPatternQuoting(void) {
 733     // Array of 3n items
 734     // Each item is <rules>, <input>, <expected output>
 735     const UnicodeString DATA[] = {
 736         UnicodeString(UChar(0x4E01)) + ">'[male adult]'",
 737         UnicodeString(UChar(0x4E01)),
 738         "[male adult]"
 739     };
 740
 741     for (int32_t i=0; i<3; i+=3) {
 742         logln(UnicodeString("Pattern: ") + prettify(DATA[i]));
 743         UParseError parseError;
 744         UErrorCode status = U_ZERO_ERROR;
 745         Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
 746         if (U_FAILURE(status)) {
 747             errln("RBT constructor failed");
 748         } else {
 749             expect(*t, DATA[i+1], DATA[i+2]);
 750         }
 751         delete t;
 752     }
 753 }
 754
 755 /**
 756  * Regression test for bugs found in Greek transliteration.
 757  */
 758 void TransliteratorTest::TestJ277(void) {
 759     UErrorCode status = U_ZERO_ERROR;
 760     UParseError parseError;
 761     Transliterator *gl = Transliterator::createInstance("Greek-Latin; NFD; [:M:]Remove; NFC", UTRANS_FORWARD, parseError, status);
 762     if (gl == NULL) {
 763         errln("FAIL: createInstance(Greek-Latin) returned NULL");
 764         return;
 765     }
 766
 767     UChar sigma = 0x3C3;
 768     UChar upsilon = 0x3C5;
 769     UChar nu = 0x3BD;
 770 //    UChar PHI = 0x3A6;
 771     UChar alpha = 0x3B1;
 772 //    UChar omega = 0x3C9;
 773 //    UChar omicron = 0x3BF;
 774 //    UChar epsilon = 0x3B5;
 775
 776     // sigma upsilon nu -> syn
 777     UnicodeString syn;
 778     syn.append(sigma).append(upsilon).append(nu);
 779     expect(*gl, syn, "syn");
 780
 781     // sigma alpha upsilon nu -> saun
 782     UnicodeString sayn;
 783     sayn.append(sigma).append(alpha).append(upsilon).append(nu);
 784     expect(*gl, sayn, "saun");
 785
 786     // Again, using a smaller rule set
 787     UnicodeString rules(
 788                 "$alpha   = \\u03B1;"
 789                 "$nu      = \\u03BD;"
 790                 "$sigma   = \\u03C3;"
 791                 "$ypsilon = \\u03C5;"
 792                 "$vowel   = [aeiouAEIOU$alpha$ypsilon];"
 793                 "s <>           $sigma;"
 794                 "a <>           $alpha;"
 795                 "u <>  $vowel { $ypsilon;"
 796                 "y <>           $ypsilon;"
 797                 "n <>           $nu;",
 798                 "");
 799     Transliterator *mini = Transliterator::createFromRules("mini", rules, UTRANS_REVERSE, parseError, status);
 800     if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
 801     expect(*mini, syn, "syn");
 802     expect(*mini, sayn, "saun");
 803     delete mini;
 804     mini = NULL;
 805
 806 #if !UCONFIG_NO_FORMATTING
 807     // Transliterate the Greek locale data
 808     Locale el("el");
 809     DateFormatSymbols syms(el, status);
 810     if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
 811     int32_t i, count;
 812     const UnicodeString* data = syms.getMonths(count);
 813     for (i=0; i<count; ++i) {
 814         if (data[i].length() == 0) {
 815             continue;
 816         }
 817         UnicodeString out(data[i]);
 818         gl->transliterate(out);
 819         UBool ok = TRUE;
 820         if (data[i].length() >= 2 && out.length() >= 2 &&
 821             u_isupper(data[i].charAt(0)) && u_islower(data[i].charAt(1))) {
 822             if (!(u_isupper(out.charAt(0)) && u_islower(out.charAt(1)))) {
 823                 ok = FALSE;
 824             }
 825         }
 826         if (ok) {
 827             logln(prettify(data[i] + " -> " + out));
 828         } else {
 829             errln(UnicodeString("FAIL: ") + prettify(data[i] + " -> " + out));
 830         }
 831     }
 832 #endif
 833
 834     delete gl;
 835 }
 836
 837 /**
 838  * Prefix, suffix support in hex transliterators
 839  */
 840 void TransliteratorTest::TestJ243(void) {
 841     UErrorCode ec = U_ZERO_ERROR;
 842
 843     // Test default Hex-Any, which should handle
 844     // \u, \U, u+, and U+
 845     Transliterator *hex =
 846         Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, ec);
 847     if (assertSuccess("getInstance", ec)) {
 848         expect(*hex, UnicodeString("\\u0041+\\U00000042,U+0043uU+0044z", ""), "A+B,CuDz");
 849     }
 850     delete hex;
 851
 852 //    // Try a custom Hex-Unicode
 853 //    // \uXXXX and &#xXXXX;
 854 //    ec = U_ZERO_ERROR;
 855 //    HexToUnicodeTransliterator hex2(UnicodeString("\\\\u###0;&\\#x###0\\;", ""), ec);
 856 //    expect(hex2, UnicodeString("\\u61\\u062\\u0063\\u00645\\u66x&#x30;&#x031;&#x0032;&#x00033;", ""),
 857 //           "abcd5fx012&#x00033;");
 858 //    // Try custom Any-Hex (default is tested elsewhere)
 859 //    ec = U_ZERO_ERROR;
 860 //    UnicodeToHexTransliterator hex3(UnicodeString("&\\#x###0;", ""), ec);
 861 //    expect(hex3, "012", "&#x30;&#x31;&#x32;");
 862 }
 863
 864 /**
 865  * Parsers need better syntax error messages.
 866  */
 867 void TransliteratorTest::TestJ329(void) {
 868
 869     struct { UBool containsErrors; const char* rule; } DATA[] = {
 870         { FALSE, "a > b; c > d" },
 871         { TRUE,  "a > b; no operator; c > d" },
 872     };
 873     int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
 874
 875     for (int32_t i=0; i<DATA_length; ++i) {
 876         UErrorCode status = U_ZERO_ERROR;
 877         UParseError parseError;
 878         Transliterator *rbt = Transliterator::createFromRules("<ID>",
 879                                     DATA[i].rule,
 880                                     UTRANS_FORWARD,
 881                                     parseError,
 882                                     status);
 883         UBool gotError = U_FAILURE(status);
 884         UnicodeString desc(DATA[i].rule);
 885         desc.append(gotError ? " -> error" : " -> no error");
 886         if (gotError) {
 887             desc = desc + ", ParseError code=" + u_errorName(status) +
 888                 " line=" + parseError.line +
 889                 " offset=" + parseError.offset +
 890                 " context=" + parseError.preContext;
 891         }
 892         if (gotError == DATA[i].containsErrors) {
 893             logln(UnicodeString("Ok:   ") + desc);
 894         } else {
 895             errln(UnicodeString("FAIL: ") + desc);
 896         }
 897         delete rbt;
 898     }
 899 }
 900
 901 /**
 902  * Test segments and segment references.
 903  */
 904 void TransliteratorTest::TestSegments(void) {
 905     // Array of 3n items
 906     // Each item is <rules>, <input>, <expected output>
 907     UnicodeString DATA[] = {
 908         "([a-z]) '.' ([0-9]) > $2 '-' $1",
 909         "abc.123.xyz.456",
 910         "ab1-c23.xy4-z56",
 911
 912         // nested
 913         "(([a-z])([0-9])) > $1 '.' $2 '.' $3;",
 914         "a1 b2",
 915         "a1.a.1 b2.b.2",
 916     };
 917     int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
 918
 919     for (int32_t i=0; i<DATA_length; i+=3) {
 920         logln("Pattern: " + prettify(DATA[i]));
 921         UParseError parseError;
 922         UErrorCode status = U_ZERO_ERROR;
 923         Transliterator *t = Transliterator::createFromRules("ID", DATA[i], UTRANS_FORWARD, parseError, status);
 924         if (U_FAILURE(status)) {
 925             errln("FAIL: RBT constructor");
 926         } else {
 927             expect(*t, DATA[i+1], DATA[i+2]);
 928         }
 929         delete t;
 930     }
 931 }
 932
 933 /**
 934  * Test cursor positioning outside of the key
 935  */
 936 void TransliteratorTest::TestCursorOffset(void) {
 937     // Array of 3n items
 938     // Each item is <rules>, <input>, <expected output>
 939     UnicodeString DATA[] = {
 940         "pre {alpha} post > | @ ALPHA ;"
 941         "eALPHA > beta ;"
 942         "pre {beta} post > BETA @@ | ;"
 943         "post > xyz",
 944
 945         "prealphapost prebetapost",
 946
 947         "prbetaxyz preBETApost",
 948     };
 949     int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
 950
 951     for (int32_t i=0; i<DATA_length; i+=3) {
 952         logln("Pattern: " + prettify(DATA[i]));
 953         UParseError parseError;
 954         UErrorCode status = U_ZERO_ERROR;
 955         Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
 956         if (U_FAILURE(status)) {
 957             errln("FAIL: RBT constructor");
 958         } else {
 959             expect(*t, DATA[i+1], DATA[i+2]);
 960         }
 961         delete t;
 962     }
 963 }
 964
 965 /**
 966  * Test zero length and > 1 char length variable values.  Test
 967  * use of variable refs in UnicodeSets.
 968  */
 969 void TransliteratorTest::TestArbitraryVariableValues(void) {
 970     // Array of 3n items
 971     // Each item is <rules>, <input>, <expected output>
 972     UnicodeString DATA[] = {
 973         "$abe = ab;"
 974         "$pat = x[yY]z;"
 975         "$ll  = 'a-z';"
 976         "$llZ = [$ll];"
 977         "$llY = [$ll$pat];"
 978         "$emp = ;"
 979
 980         "$abe > ABE;"
 981         "$pat > END;"
 982         "$llZ > 1;"
 983         "$llY > 2;"
 984         "7$emp 8 > 9;"
 985         "",
 986
 987         "ab xYzxyz stY78",
 988         "ABE ENDEND 1129",
 989     };
 990     int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
 991
 992     for (int32_t i=0; i<DATA_length; i+=3) {
 993         logln("Pattern: " + prettify(DATA[i]));
 994         UParseError parseError;
 995         UErrorCode status = U_ZERO_ERROR;
 996         Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
 997         if (U_FAILURE(status)) {
 998             errln("FAIL: RBT constructor");
 999         } else {
1000             expect(*t, DATA[i+1], DATA[i+2]);
1001         }
1002         delete t;
1003     }
1004 }
1005
1006 /**
1007  * Confirm that the contextStart, contextLimit, start, and limit
1008  * behave correctly. J474.
1009  */
1010 void TransliteratorTest::TestPositionHandling(void) {
1011     // Array of 3n items
1012     // Each item is <rules>, <input>, <expected output>
1013     const char* DATA[] = {
1014         "a{t} > SS ; {t}b > UU ; {t} > TT ;",
1015         "xtat txtb", // pos 0,9,0,9
1016         "xTTaSS TTxUUb",
1017
1018         "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1019         "xtat txtb", // pos 2,9,3,8
1020         "xtaSS TTxUUb",
1021
1022         "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1023         "xtat txtb", // pos 3,8,3,8
1024         "xtaTT TTxTTb",
1025     };
1026
1027     // Array of 4n positions -- these go with the DATA array
1028     // They are: contextStart, contextLimit, start, limit
1029     int32_t POS[] = {
1030         0, 9, 0, 9,
1031         2, 9, 3, 8,
1032         3, 8, 3, 8,
1033     };
1034
1035     int32_t n = (int32_t)(sizeof(DATA) / sizeof(DATA[0])) / 3;
1036     for (int32_t i=0; i<n; i++) {
1037         UErrorCode status = U_ZERO_ERROR;
1038         UParseError parseError;
1039         Transliterator *t = Transliterator::createFromRules("<ID>",
1040                                 DATA[3*i], UTRANS_FORWARD, parseError, status);
1041         if (U_FAILURE(status)) {
1042             delete t;
1043             errln("FAIL: RBT constructor");
1044             return;
1045         }
1046         UTransPosition pos;
1047         pos.contextStart= POS[4*i];
1048         pos.contextLimit = POS[4*i+1];
1049         pos.start = POS[4*i+2];
1050         pos.limit = POS[4*i+3];
1051         UnicodeString rsource(DATA[3*i+1]);
1052         t->transliterate(rsource, pos, status);
1053         if (U_FAILURE(status)) {
1054             delete t;
1055             errln("FAIL: transliterate");
1056             return;
1057         }
1058         t->finishTransliteration(rsource, pos);
1059         expectAux(DATA[3*i],
1060                   DATA[3*i+1],
1061                   rsource,
1062                   DATA[3*i+2]);
1063         delete t;
1064     }
1065 }
1066
1067 /**
1068  * Test the Hiragana-Katakana transliterator.
1069  */
1070 void TransliteratorTest::TestHiraganaKatakana(void) {
1071     UParseError parseError;
1072     UErrorCode status = U_ZERO_ERROR;
1073     Transliterator* hk = Transliterator::createInstance("Hiragana-Katakana", UTRANS_FORWARD, parseError, status);
1074     Transliterator* kh = Transliterator::createInstance("Katakana-Hiragana", UTRANS_FORWARD, parseError, status);
1075     if (hk == 0 || kh == 0) {
1076         errln("FAIL: createInstance failed");
1077         delete hk;
1078         delete kh;
1079         return;
1080     }
1081
1082     // Array of 3n items
1083     // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana>
1084     const char* DATA[] = {
1085         "both",
1086         "\\u3042\\u3090\\u3099\\u3092\\u3050",
1087         "\\u30A2\\u30F8\\u30F2\\u30B0",
1088
1089         "kh",
1090         "\\u307C\\u3051\\u3060\\u3042\\u3093\\u30FC",
1091         "\\u30DC\\u30F6\\u30C0\\u30FC\\u30F3\\u30FC",
1092     };
1093     int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
1094
1095     for (int32_t i=0; i<DATA_length; i+=3) {
1096         UnicodeString h = CharsToUnicodeString(DATA[i+1]);
1097         UnicodeString k = CharsToUnicodeString(DATA[i+2]);
1098         switch (*DATA[i]) {
1099         case 0x68: //'h': // Hiragana-Katakana
1100             expect(*hk, h, k);
1101             break;
1102         case 0x6B: //'k': // Katakana-Hiragana
1103             expect(*kh, k, h);
1104             break;
1105         case 0x62: //'b': // both
1106             expect(*hk, h, k);
1107             expect(*kh, k, h);
1108             break;
1109         }
1110     }
1111     delete hk;
1112     delete kh;
1113 }
1114
1115 /**
1116  * Test cloning / copy constructor of RBT.
1117  */
1118 void TransliteratorTest::TestCopyJ476(void) {
1119     // The real test here is what happens when the destructors are
1120     // called.  So we let one object get destructed, and check to
1121     // see that its copy still works.
1122     Transliterator *t2 = 0;
1123     {
1124         UParseError parseError;
1125         UErrorCode status = U_ZERO_ERROR;
1126         Transliterator *t1 = Transliterator::createFromRules("t1",
1127             "a>A;b>B;'foo'+>'bar'", UTRANS_FORWARD, parseError, status);
1128         if (U_FAILURE(status)) {
1129             errln("FAIL: RBT constructor");
1130             return;
1131         }
1132         t2 = t1->clone(); // Call copy constructor under the covers.
1133         expect(*t1, "abcfoofoo", "ABcbar");
1134         delete t1;
1135     }
1136     expect(*t2, "abcfoofoo", "ABcbar");
1137     delete t2;
1138 }
1139
1140 /**
1141  * Test inter-Indic transliterators.  These are composed.
1142  * ICU4C Jitterbug 483.
1143  */
1144 void TransliteratorTest::TestInterIndic(void) {
1145     UnicodeString ID("Devanagari-Gujarati", "");
1146     UErrorCode status = U_ZERO_ERROR;
1147     UParseError parseError;
1148     Transliterator* dg = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
1149     if (dg == 0) {
1150         errln("FAIL: createInstance(" + ID + ") returned NULL");
1151         return;
1152     }
1153     UnicodeString id = dg->getID();
1154     if (id != ID) {
1155         errln("FAIL: createInstance(" + ID + ")->getID() => " + id);
1156     }
1157     UnicodeString dev = CharsToUnicodeString("\\u0901\\u090B\\u0925");
1158     UnicodeString guj = CharsToUnicodeString("\\u0A81\\u0A8B\\u0AA5");
1159     expect(*dg, dev, guj);
1160     delete dg;
1161 }
1162
1163 /**
1164  * Test filter syntax in IDs. (J918)
1165  */
1166 void TransliteratorTest::TestFilterIDs(void) {
1167     // Array of 3n strings:
1168     // <id>, <inverse id>, <input>, <expected output>
1169     const char* DATA[] = {
1170         "[aeiou]Any-Hex", // ID
1171         "[aeiou]Hex-Any", // expected inverse ID
1172         "quizzical",      // src
1173         "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src)
1174
1175         "[aeiou]Any-Hex;[^5]Hex-Any",
1176         "[^5]Any-Hex;[aeiou]Hex-Any",
1177         "quizzical",
1178         "q\\u0075izzical",
1179
1180         "[abc]Null",
1181         "[abc]Null",
1182         "xyz",
1183         "xyz",
1184     };
1185     enum { DATA_length = sizeof(DATA) / sizeof(DATA[0]) };
1186
1187     for (int i=0; i<DATA_length; i+=4) {
1188         UnicodeString ID(DATA[i], "");
1189         UnicodeString uID(DATA[i+1], "");
1190         UnicodeString data2(DATA[i+2], "");
1191         UnicodeString data3(DATA[i+3], "");
1192         UParseError parseError;
1193         UErrorCode status = U_ZERO_ERROR;
1194         Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
1195         if (t == 0) {
1196             errln("FAIL: createInstance(" + ID + ") returned NULL");
1197             return;
1198         }
1199         expect(*t, data2, data3);
1200
1201         // Check the ID
1202         if (ID != t->getID()) {
1203             errln("FAIL: createInstance(" + ID + ").getID() => " +
1204                   t->getID());
1205         }
1206
1207         // Check the inverse
1208         Transliterator *u = t->createInverse(status);
1209         if (u == 0) {
1210             errln("FAIL: " + ID + ".createInverse() returned NULL");
1211         } else if (u->getID() != uID) {
1212             errln("FAIL: " + ID + ".createInverse().getID() => " +
1213                   u->getID() + ", expected " + uID);
1214         }
1215
1216         delete t;
1217         delete u;
1218     }
1219 }
1220
1221 /**
1222  * Test the case mapping transliterators.
1223  */
1224 void TransliteratorTest::TestCaseMap(void) {
1225     UParseError parseError;
1226     UErrorCode status = U_ZERO_ERROR;
1227     Transliterator* toUpper =
1228         Transliterator::createInstance("Any-Upper[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1229     Transliterator* toLower =
1230         Transliterator::createInstance("Any-Lower[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1231     Transliterator* toTitle =
1232         Transliterator::createInstance("Any-Title[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1233     if (toUpper==0 || toLower==0 || toTitle==0) {
1234         errln("FAIL: createInstance returned NULL");
1235         delete toUpper;
1236         delete toLower;
1237         delete toTitle;
1238         return;
1239     }
1240
1241     expect(*toUpper, "The quick brown fox jumped over the lazy dogs.",
1242            "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS.");
1243     expect(*toLower, "The quIck brown fOX jUMPED OVER THE LAzY dogs.",
1244            "the quick brown foX jumped over the lazY dogs.");
1245     expect(*toTitle, "the quick brown foX can't jump over the laZy dogs.",
1246            "The Quick Brown FoX Can't Jump Over The LaZy Dogs.");
1247
1248     delete toUpper;
1249     delete toLower;
1250     delete toTitle;
1251 }
1252
1253 /**
1254  * Test the name mapping transliterators.
1255  */
1256 void TransliteratorTest::TestNameMap(void) {
1257     UParseError parseError;
1258     UErrorCode status = U_ZERO_ERROR;
1259     Transliterator* uni2name =
1260         Transliterator::createInstance("Any-Name[^abc]", UTRANS_FORWARD, parseError, status);
1261     Transliterator* name2uni =
1262         Transliterator::createInstance("Name-Any", UTRANS_FORWARD, parseError, status);
1263     if (uni2name==0 || name2uni==0) {
1264         errln("FAIL: createInstance returned NULL");
1265         delete uni2name;
1266         delete name2uni;
1267         return;
1268     }
1269
1270     // Careful:  CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1271     expect(*uni2name, CharsToUnicodeString("\\u00A0abc\\u4E01\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF"),
1272            CharsToUnicodeString("\\\\N{NO-BREAK SPACE}abc\\\\N{CJK UNIFIED IDEOGRAPH-4E01}\\\\N{MICRO SIGN}\\\\N{GUJARATI SIGN CANDRABINDU}\\\\N{REPLACEMENT CHARACTER}\\\\N{END OF TRANSMISSION}\\\\N{CHARACTER TABULATION}\\\\N{<control-0081>}\\\\N{<noncharacter-FFFF>}"));
1273     expect(*name2uni, UNICODE_STRING_SIMPLE("{\\N { NO-BREAK SPACE}abc\\N{  CJK UNIFIED  IDEOGRAPH-4E01  }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{END OF TRANSMISSION}\\N{CHARACTER TABULATION}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{"),
1274            CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{"));
1275
1276     delete uni2name;
1277     delete name2uni;
1278
1279     // round trip
1280     Transliterator* t =
1281         Transliterator::createInstance("Any-Name;Name-Any", UTRANS_FORWARD, parseError, status);
1282     if (t==0) {
1283         errln("FAIL: createInstance returned NULL");
1284         delete t;
1285         return;
1286     }
1287
1288     // Careful:  CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1289     UnicodeString s = CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{");
1290     expect(*t, s, s);
1291     delete t;
1292 }
1293
1294 /**
1295  * Test liberalized ID syntax.  1006c
1296  */
1297 void TransliteratorTest::TestLiberalizedID(void) {
1298     // Some test cases have an expected getID() value of NULL.  This
1299     // means I have disabled the test case for now.  This stuff is
1300     // still under development, and I haven't decided whether to make
1301     // getID() return canonical case yet.  It will all get rewritten
1302     // with the move to Source-Target/Variant IDs anyway. [aliu]
1303     const char* DATA[] = {
1304         "latin-greek", NULL /*"Latin-Greek"*/, "case insensitivity",
1305         "  Null  ", "Null", "whitespace",
1306         " Latin[a-z]-Greek  ", "[a-z]Latin-Greek", "inline filter",
1307         "  null  ; latin-greek  ", NULL /*"Null;Latin-Greek"*/, "compound whitespace",
1308     };
1309     const int32_t DATA_length = sizeof(DATA)/sizeof(DATA[0]);
1310     UParseError parseError;
1311     UErrorCode status= U_ZERO_ERROR;
1312     for (int32_t i=0; i<DATA_length; i+=3) {
1313         Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, parseError, status);
1314         if (t == 0) {
1315             errln(UnicodeString("FAIL: ") + DATA[i+2] +
1316                   " cannot create ID \"" + DATA[i] + "\"");
1317         } else {
1318             UnicodeString exp;
1319             if (DATA[i+1]) {
1320                 exp = UnicodeString(DATA[i+1], "");
1321             }
1322             // Don't worry about getID() if the expected char*
1323             // is NULL -- see above.
1324             if (exp.length() == 0 || exp == t->getID()) {
1325                 logln(UnicodeString("Ok: ") + DATA[i+2] +
1326                       " create ID \"" + DATA[i] + "\" => \"" +
1327                       exp + "\"");
1328             } else {
1329                 errln(UnicodeString("FAIL: ") + DATA[i+2] +
1330                       " create ID \"" + DATA[i] + "\" => \"" +
1331                       t->getID() + "\", exp \"" + exp + "\"");
1332             }
1333             delete t;
1334         }
1335     }
1336 }
1337
1338 /* test for Jitterbug 912 */
1339 void TransliteratorTest::TestCreateInstance(){
1340     const char* FORWARD = "F";
1341     const char* REVERSE = "R";
1342     const char* DATA[] = {
1343         // Column 1: id
1344         // Column 2: direction
1345         // Column 3: expected ID, or "" if expect failure
1346         "Latin-Hangul", REVERSE, "Hangul-Latin", // JB#912
1347
1348         // JB#2689: bad compound causes crash
1349         "InvalidSource-InvalidTarget", FORWARD, "",
1350         "InvalidSource-InvalidTarget", REVERSE, "",
1351         "Hex-Any;InvalidSource-InvalidTarget", FORWARD, "",
1352         "Hex-Any;InvalidSource-InvalidTarget", REVERSE, "",
1353         "InvalidSource-InvalidTarget;Hex-Any", FORWARD, "",
1354         "InvalidSource-InvalidTarget;Hex-Any", REVERSE, "",
1355
1356         NULL
1357     };
1358
1359     for (int32_t i=0; DATA[i]; i+=3) {
1360         UParseError err;
1361         UErrorCode ec = U_ZERO_ERROR;
1362         UnicodeString id(DATA[i]);
1363         UTransDirection dir = (DATA[i+1]==FORWARD)?
1364             UTRANS_FORWARD:UTRANS_REVERSE;
1365         UnicodeString expID(DATA[i+2]);
1366         Transliterator* t =
1367             Transliterator::createInstance(id,dir,err,ec);
1368         UnicodeString newID;
1369         if (t) {
1370             newID = t->getID();
1371         }
1372         UBool ok = (newID == expID);
1373         if (!t) {
1374             newID = u_errorName(ec);
1375         }
1376         if (ok) {
1377             logln((UnicodeString)"Ok: createInstance(" +
1378                   id + "," + DATA[i+1] + ") => " + newID);
1379         } else {
1380             errln((UnicodeString)"FAIL: createInstance(" +
1381                   id + "," + DATA[i+1] + ") => " + newID +
1382                   ", expected " + expID);
1383         }
1384         delete t;
1385     }
1386 }
1387
1388 /**
1389  * Test the normalization transliterator.
1390  */
1391 void TransliteratorTest::TestNormalizationTransliterator() {
1392     // THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.test.normalizer.BasicTest
1393     // PLEASE KEEP THEM IN SYNC WITH BasicTest.
1394     const char* CANON[] = {
1395         // Input               Decomposed            Composed
1396         "cat",                "cat",                "cat"               ,
1397         "\\u00e0ardvark",      "a\\u0300ardvark",     "\\u00e0ardvark"    ,
1398
1399         "\\u1e0a",             "D\\u0307",            "\\u1e0a"            , // D-dot_above
1400         "D\\u0307",            "D\\u0307",            "\\u1e0a"            , // D dot_above
1401
1402         "\\u1e0c\\u0307",       "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D-dot_below dot_above
1403         "\\u1e0a\\u0323",       "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D-dot_above dot_below
1404         "D\\u0307\\u0323",      "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D dot_below dot_above
1405
1406         "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307","\\u1e10\\u0323\\u0307", // D dot_below cedilla dot_above
1407         "D\\u0307\\u0328\\u0323","D\\u0328\\u0323\\u0307","\\u1e0c\\u0328\\u0307", // D dot_above ogonek dot_below
1408
1409         "\\u1E14",             "E\\u0304\\u0300",      "\\u1E14"            , // E-macron-grave
1410         "\\u0112\\u0300",       "E\\u0304\\u0300",      "\\u1E14"            , // E-macron + grave
1411         "\\u00c8\\u0304",       "E\\u0300\\u0304",      "\\u00c8\\u0304"      , // E-grave + macron
1412
1413         "\\u212b",             "A\\u030a",            "\\u00c5"            , // angstrom_sign
1414         "\\u00c5",             "A\\u030a",            "\\u00c5"            , // A-ring
1415
1416         "\\u00fdffin",         "y\\u0301ffin",        "\\u00fdffin"        ,    //updated with 3.0
1417         "\\u00fd\\uFB03n",      "y\\u0301\\uFB03n",     "\\u00fd\\uFB03n"     , //updated with 3.0
1418
1419         "Henry IV",           "Henry IV",           "Henry IV"          ,
1420         "Henry \\u2163",       "Henry \\u2163",       "Henry \\u2163"      ,
1421
1422         "\\u30AC",             "\\u30AB\\u3099",       "\\u30AC"            , // ga (Katakana)
1423         "\\u30AB\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // ka + ten
1424         "\\uFF76\\uFF9E",       "\\uFF76\\uFF9E",       "\\uFF76\\uFF9E"      , // hw_ka + hw_ten
1425         "\\u30AB\\uFF9E",       "\\u30AB\\uFF9E",       "\\u30AB\\uFF9E"      , // ka + hw_ten
1426         "\\uFF76\\u3099",       "\\uFF76\\u3099",       "\\uFF76\\u3099"      , // hw_ka + ten
1427
1428         "A\\u0300\\u0316",      "A\\u0316\\u0300",      "\\u00C0\\u0316"      ,
1429         0 // end
1430     };
1431
1432     const char* COMPAT[] = {
1433         // Input               Decomposed            Composed
1434         "\\uFB4f",             "\\u05D0\\u05DC",       "\\u05D0\\u05DC"     , // Alef-Lamed vs. Alef, Lamed
1435
1436         "\\u00fdffin",         "y\\u0301ffin",        "\\u00fdffin"        ,    //updated for 3.0
1437         "\\u00fd\\uFB03n",      "y\\u0301ffin",        "\\u00fdffin"        , // ffi ligature -> f + f + i
1438
1439         "Henry IV",           "Henry IV",           "Henry IV"          ,
1440         "Henry \\u2163",       "Henry IV",           "Henry IV"          ,
1441
1442         "\\u30AC",             "\\u30AB\\u3099",       "\\u30AC"            , // ga (Katakana)
1443         "\\u30AB\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // ka + ten
1444
1445         "\\uFF76\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // hw_ka + ten
1446         0 // end
1447     };
1448
1449     int32_t i;
1450     UParseError parseError;
1451     UErrorCode status = U_ZERO_ERROR;
1452     Transliterator* NFD = Transliterator::createInstance("NFD", UTRANS_FORWARD, parseError, status);
1453     Transliterator* NFC = Transliterator::createInstance("NFC", UTRANS_FORWARD, parseError, status);
1454     if (!NFD || !NFC) {
1455         errln("FAIL: createInstance failed");
1456         delete NFD;
1457         delete NFC;
1458         return;
1459     }
1460     for (i=0; CANON[i]; i+=3) {
1461         UnicodeString in = CharsToUnicodeString(CANON[i]);
1462         UnicodeString expd = CharsToUnicodeString(CANON[i+1]);
1463         UnicodeString expc = CharsToUnicodeString(CANON[i+2]);
1464         expect(*NFD, in, expd);
1465         expect(*NFC, in, expc);
1466     }
1467     delete NFD;
1468     delete NFC;
1469
1470     Transliterator* NFKD = Transliterator::createInstance("NFKD", UTRANS_FORWARD, parseError, status);
1471     Transliterator* NFKC = Transliterator::createInstance("NFKC", UTRANS_FORWARD, parseError, status);
1472     if (!NFKD || !NFKC) {
1473         errln("FAIL: createInstance failed");
1474         delete NFKD;
1475         delete NFKC;
1476         return;
1477     }
1478     for (i=0; COMPAT[i]; i+=3) {
1479         UnicodeString in = CharsToUnicodeString(COMPAT[i]);
1480         UnicodeString expkd = CharsToUnicodeString(COMPAT[i+1]);
1481         UnicodeString expkc = CharsToUnicodeString(COMPAT[i+2]);
1482         expect(*NFKD, in, expkd);
1483         expect(*NFKC, in, expkc);
1484     }
1485     delete NFKD;
1486     delete NFKC;
1487
1488     UParseError pe;
1489     status = U_ZERO_ERROR;
1490     Transliterator *t = Transliterator::createInstance("NFD; [x]Remove",
1491                                                        UTRANS_FORWARD,
1492                                                        pe, status);
1493     if (t == 0) {
1494         errln("FAIL: createInstance failed");
1495     }
1496     expect(*t, CharsToUnicodeString("\\u010dx"),
1497            CharsToUnicodeString("c\\u030C"));
1498     delete t;
1499 }
1500
1501 /**
1502  * Test compound RBT rules.
1503  */
1504 void TransliteratorTest::TestCompoundRBT(void) {
1505     // Careful with spacing and ';' here:  Phrase this exactly
1506     // as toRules() is going to return it.  If toRules() changes
1507     // with regard to spacing or ';', then adjust this string.
1508     UnicodeString rule("::Hex-Any;\n"
1509                        "::Any-Lower;\n"
1510                        "a > '.A.';\n"
1511                        "b > '.B.';\n"
1512                        "::[^t]Any-Upper;", "");
1513     UParseError parseError;
1514     UErrorCode status = U_ZERO_ERROR;
1515     Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, parseError, status);
1516     if (t == 0) {
1517         errln("FAIL: createFromRules failed");
1518         return;
1519     }
1520     expect(*t, UNICODE_STRING_SIMPLE("\\u0043at in the hat, bat on the mat"),
1521            "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");
1522     UnicodeString r;
1523     t->toRules(r, TRUE);
1524     if (r == rule) {
1525         logln((UnicodeString)"OK: toRules() => " + r);
1526     } else {
1527         errln((UnicodeString)"FAIL: toRules() => " + r +
1528               ", expected " + rule);
1529     }
1530     delete t;
1531
1532     // Now test toRules
1533     t = Transliterator::createInstance("Greek-Latin; Latin-Cyrillic", UTRANS_FORWARD, parseError, status);
1534     if (t == 0) {
1535         errln("FAIL: createInstance failed");
1536         return;
1537     }
1538     UnicodeString exp("::Greek-Latin;\n::Latin-Cyrillic;");
1539     t->toRules(r, TRUE);
1540     if (r != exp) {
1541         errln((UnicodeString)"FAIL: toRules() => " + r +
1542               ", expected " + exp);
1543     } else {
1544         logln((UnicodeString)"OK: toRules() => " + r);
1545     }
1546     delete t;
1547
1548     // Round trip the result of toRules
1549     t = Transliterator::createFromRules("Test", r, UTRANS_FORWARD, parseError, status);
1550     if (t == 0) {
1551         errln("FAIL: createFromRules #2 failed");
1552         return;
1553     } else {
1554         logln((UnicodeString)"OK: createFromRules(" + r + ") succeeded");
1555     }
1556
1557     // Test toRules again
1558     t->toRules(r, TRUE);
1559     if (r != exp) {
1560         errln((UnicodeString)"FAIL: toRules() => " + r +
1561               ", expected " + exp);
1562     } else {
1563         logln((UnicodeString)"OK: toRules() => " + r);
1564     }
1565
1566     delete t;
1567
1568     // Test Foo(Bar) IDs.  Careful with spacing in id; make it conform
1569     // to what the regenerated ID will look like.
1570     UnicodeString id("Upper(Lower);(NFKC)", "");
1571     t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
1572     if (t == 0) {
1573         errln("FAIL: createInstance #2 failed");
1574         return;
1575     }
1576     if (t->getID() == id) {
1577         logln((UnicodeString)"OK: created " + id);
1578     } else {
1579         errln((UnicodeString)"FAIL: createInstance(" + id +
1580               ").getID() => " + t->getID());
1581     }
1582
1583     Transliterator *u = t->createInverse(status);
1584     if (u == 0) {
1585         errln("FAIL: createInverse failed");
1586         delete t;
1587         return;
1588     }
1589     exp = "NFKC();Lower(Upper)";
1590     if (u->getID() == exp) {
1591         logln((UnicodeString)"OK: createInverse(" + id + ") => " +
1592               u->getID());
1593     } else {
1594         errln((UnicodeString)"FAIL: createInverse(" + id + ") => " +
1595               u->getID());
1596     }
1597     delete t;
1598     delete u;
1599 }
1600
1601 /**
1602  * Compound filter semantics were orginially not implemented
1603  * correctly.  Originally, each component filter f(i) is replaced by
1604  * f'(i) = f(i) && g, where g is the filter for the compound
1605  * transliterator.
1606  *
1607  * From Mark:
1608  *
1609  * Suppose and I have a transliterator X. Internally X is
1610  * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A].
1611  *
1612  * The compound should convert all greek characters (through latin) to
1613  * cyrillic, then lowercase the result. The filter should say "don't
1614  * touch 'A' in the original". But because an intermediate result
1615  * happens to go through "A", the Greek Alpha gets hung up.
1616  */
1617 void TransliteratorTest::TestCompoundFilter(void) {
1618     UParseError parseError;
1619     UErrorCode status = U_ZERO_ERROR;
1620     Transliterator *t = Transliterator::createInstance
1621         ("Greek-Latin; Latin-Greek; Lower", UTRANS_FORWARD, parseError, status);
1622     if (t == 0) {
1623         errln("FAIL: createInstance failed");
1624         return;
1625     }
1626     t->adoptFilter(new UnicodeSet("[^A]", status));
1627     if (U_FAILURE(status)) {
1628         errln("FAIL: UnicodeSet ct failed");
1629         delete t;
1630         return;
1631     }
1632
1633     // Only the 'A' at index 1 should remain unchanged
1634     expect(*t,
1635            CharsToUnicodeString("BA\\u039A\\u0391"),
1636            CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1"));
1637     delete t;
1638 }
1639
1640 void TransliteratorTest::TestRemove(void) {
1641     UParseError parseError;
1642     UErrorCode status = U_ZERO_ERROR;
1643     Transliterator *t = Transliterator::createInstance("Remove[abc]", UTRANS_FORWARD, parseError, status);
1644     if (t == 0) {
1645         errln("FAIL: createInstance failed");
1646         return;
1647     }
1648
1649     expect(*t, "Able bodied baker's cats", "Ale odied ker's ts");
1650
1651     // extra test for RemoveTransliterator::clone(), which at one point wasn't
1652     // duplicating the filter
1653     Transliterator* t2 = t->clone();
1654     expect(*t2, "Able bodied baker's cats", "Ale odied ker's ts");
1655
1656     delete t;
1657     delete t2;
1658 }
1659
1660 void TransliteratorTest::TestToRules(void) {
1661     const char* RBT = "rbt";
1662     const char* SET = "set";
1663     static const char* DATA[] = {
1664         RBT,
1665         "$a=\\u4E61; [$a] > A;",
1666         "[\\u4E61] > A;",
1667
1668         RBT,
1669         "$white=[[:Zs:][:Zl:]]; $white{a} > A;",
1670         "[[:Zs:][:Zl:]]{a} > A;",
1671
1672         SET,
1673         "[[:Zs:][:Zl:]]",
1674         "[[:Zs:][:Zl:]]",
1675
1676         SET,
1677         "[:Ps:]",
1678         "[:Ps:]",
1679
1680         SET,
1681         "[:L:]",
1682         "[:L:]",
1683
1684         SET,
1685         "[[:L:]-[A]]",
1686         "[[:L:]-[A]]",
1687
1688         SET,
1689         "[~[:Lu:][:Ll:]]",
1690         "[~[:Lu:][:Ll:]]",
1691
1692         SET,
1693         "[~[a-z]]",
1694         "[~[a-z]]",
1695
1696         RBT,
1697         "$white=[:Zs:]; $black=[^$white]; $black{a} > A;",
1698         "[^[:Zs:]]{a} > A;",
1699
1700         RBT,
1701         "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;",
1702         "[[a-z]-[:Zs:]]{a} > A;",
1703
1704         RBT,
1705         "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;",
1706         "[[:Zs:]&[a-z]]{a} > A;",
1707
1708         RBT,
1709         "$a=[:Zs:]; $b=[x$a]; $b{a} > A;",
1710         "[x[:Zs:]]{a} > A;",
1711
1712         RBT,
1713         "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"
1714         "$macron = \\u0304 ;"
1715         "$evowel = [aeiouyAEIOUY] ;"
1716         "$iotasub = \\u0345 ;"
1717         "($evowel $macron $accentMinus *) i > | $1 $iotasub ;",
1718         "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;",
1719
1720         RBT,
1721         "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1722         "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1723     };
1724     static const int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
1725
1726     for (int32_t d=0; d < DATA_length; d+=3) {
1727         if (DATA[d] == RBT) {
1728             // Transliterator test
1729             UParseError parseError;
1730             UErrorCode status = U_ZERO_ERROR;
1731             Transliterator *t = Transliterator::createFromRules("ID",
1732                                                                 UnicodeString(DATA[d+1], -1, US_INV), UTRANS_FORWARD, parseError, status);
1733             if (t == 0) {
1734                 errln("FAIL: createFromRules failed");
1735                 return;
1736             }
1737             UnicodeString rules, escapedRules;
1738             t->toRules(rules, FALSE);
1739             t->toRules(escapedRules, TRUE);
1740             UnicodeString expRules = CharsToUnicodeString(DATA[d+2]);
1741             UnicodeString expEscapedRules(DATA[d+2], -1, US_INV);
1742             if (rules == expRules) {
1743                 logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
1744                       " => " + rules);
1745             } else {
1746                 errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
1747                       " => " + rules + ", exp " + expRules);
1748             }
1749             if (escapedRules == expEscapedRules) {
1750                 logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
1751                       " => " + escapedRules);
1752             } else {
1753                 errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
1754                       " => " + escapedRules + ", exp " + expEscapedRules);
1755             }
1756             delete t;
1757
1758         } else {
1759             // UnicodeSet test
1760             UErrorCode status = U_ZERO_ERROR;
1761             UnicodeString pat(DATA[d+1], -1, US_INV);
1762             UnicodeString expToPat(DATA[d+2], -1, US_INV);
1763             UnicodeSet set(pat, status);
1764             if (U_FAILURE(status)) {
1765                 errln("FAIL: UnicodeSet ct failed");
1766                 return;
1767             }
1768             // Adjust spacing etc. as necessary.
1769             UnicodeString toPat;
1770             set.toPattern(toPat);
1771             if (expToPat == toPat) {
1772                 logln((UnicodeString)"Ok: " + pat +
1773                       " => " + toPat);
1774             } else {
1775                 errln((UnicodeString)"FAIL: " + pat +
1776                       " => " + prettify(toPat, TRUE) +
1777                       ", exp " + prettify(pat, TRUE));
1778             }
1779         }
1780     }
1781 }
1782
1783 void TransliteratorTest::TestContext() {
1784     UTransPosition pos = {0, 2, 0, 1}; // cs cl s l
1785     expect("de > x; {d}e > y;",
1786            "de",
1787            "ye",
1788            &pos);
1789
1790     expect("ab{c} > z;",
1791            "xadabdabcy",
1792            "xadabdabzy");
1793 }
1794
1795 void TransliteratorTest::TestSupplemental() {
1796
1797     expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];"
1798                                 "a > $a; $s > i;"),
1799            CharsToUnicodeString("ab\\U0001030Fx"),
1800            CharsToUnicodeString("\\U00010300bix"));
1801
1802     expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];"
1803                                 "$b=[A-Z\\U00010400-\\U0001044D];"
1804                                 "($a)($b) > $2 $1;"),
1805            CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"),
1806            CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301"));
1807
1808     // k|ax\\U00010300xm
1809
1810     // k|a\\U00010400\\U00010300xm
1811     // ky|\\U00010400\\U00010300xm
1812     // ky\\U00010400|\\U00010300xm
1813
1814     // ky\\U00010400|\\U00010300\\U00010400m
1815     // ky\\U00010400y|\\U00010400m
1816     expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];"
1817                                 "$a {x} > | @ \\U00010400;"
1818                                 "{$a} [^\\u0000-\\uFFFF] > y;"),
1819            CharsToUnicodeString("kax\\U00010300xm"),
1820            CharsToUnicodeString("ky\\U00010400y\\U00010400m"));
1821
1822     expectT("Any-Name",
1823            CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"),
1824            UNICODE_STRING_SIMPLE("\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}"));
1825
1826     expectT("Any-Hex/Unicode",
1827            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1828            UNICODE_STRING_SIMPLE("U+10330U+10FF00U+E0061U+00A0"));
1829
1830     expectT("Any-Hex/C",
1831            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1832            UNICODE_STRING_SIMPLE("\\U00010330\\U0010FF00\\U000E0061\\u00A0"));
1833
1834     expectT("Any-Hex/Perl",
1835            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1836            UNICODE_STRING_SIMPLE("\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}"));
1837
1838     expectT("Any-Hex/Java",
1839            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1840            UNICODE_STRING_SIMPLE("\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0"));
1841
1842     expectT("Any-Hex/XML",
1843            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1844            "&#x10330;&#x10FF00;&#xE0061;&#xA0;");
1845
1846     expectT("Any-Hex/XML10",
1847            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1848            "&#66352;&#1113856;&#917601;&#160;");
1849
1850     expectT(UNICODE_STRING_SIMPLE("[\\U000E0000-\\U000E0FFF] Remove"),
1851            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1852            CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0"));
1853 }
1854
1855 void TransliteratorTest::TestQuantifier() {
1856
1857     // Make sure @ in a quantified anteContext works
1858     expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';",
1859            "AAAAAb",
1860            "aaa(aac)");
1861
1862     // Make sure @ in a quantified postContext works
1863     expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';",
1864            "baaaaa",
1865            "caa(aaa)");
1866
1867     // Make sure @ in a quantified postContext with seg ref works
1868     expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';",
1869            "baaaaa",
1870            "baa(aaa)");
1871
1872     // Make sure @ past ante context doesn't enter ante context
1873     UTransPosition pos = {0, 5, 3, 5};
1874     expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';",
1875            "xxxab",
1876            "xxx(ac)",
1877            &pos);
1878
1879     // Make sure @ past post context doesn't pass limit
1880     UTransPosition pos2 = {0, 4, 0, 2};
1881     expect("{b} a+ > c @@ |; x > y; a > A;",
1882            "baxx",
1883            "caxx",
1884            &pos2);
1885
1886     // Make sure @ past post context doesn't enter post context
1887     expect("{b} a+ > c @@ |; x > y; a > A;",
1888            "baxx",
1889            "cayy");
1890
1891     expect("(ab)? c > d;",
1892            "c abc ababc",
1893            "d d abd");
1894
1895     // NOTE: The (ab)+ when referenced just yields a single "ab",
1896     // not the full sequence of them.  This accords with perl behavior.
1897     expect("(ab)+ {x} > '(' $1 ')';",
1898            "x abx ababxy",
1899            "x ab(ab) abab(ab)y");
1900
1901     expect("b+ > x;",
1902            "ac abc abbc abbbc",
1903            "ac axc axc axc");
1904
1905     expect("[abc]+ > x;",
1906            "qac abrc abbcs abtbbc",
1907            "qx xrx xs xtx");
1908
1909     expect("q{(ab)+} > x;",
1910            "qa qab qaba qababc qaba",
1911            "qa qx qxa qxc qxa");
1912
1913     expect("q(ab)* > x;",
1914            "qa qab qaba qababc",
1915            "xa x xa xc");
1916
1917     // NOTE: The (ab)+ when referenced just yields a single "ab",
1918     // not the full sequence of them.  This accords with perl behavior.
1919     expect("q(ab)* > '(' $1 ')';",
1920            "qa qab qaba qababc",
1921            "()a (ab) (ab)a (ab)c");
1922
1923     // 'foo'+ and 'foo'* -- the quantifier should apply to the entire
1924     // quoted string
1925     expect("'ab'+ > x;",
1926            "bb ab ababb",
1927            "bb x xb");
1928
1929     // $foo+ and $foo* -- the quantifier should apply to the entire
1930     // variable reference
1931     expect("$var = ab; $var+ > x;",
1932            "bb ab ababb",
1933            "bb x xb");
1934 }
1935
1936 class TestTrans : public Transliterator {
1937 public:
1938     TestTrans(const UnicodeString& id) : Transliterator(id, 0) {
1939     }
1940     virtual Transliterator* clone(void) const {
1941         return new TestTrans(getID());
1942     }
1943     virtual void handleTransliterate(Replaceable& /*text*/, UTransPosition& offsets,
1944         UBool /*isIncremental*/) const
1945     {
1946         offsets.start = offsets.limit;
1947     }
1948     virtual UClassID getDynamicClassID() const;
1949     static UClassID U_EXPORT2 getStaticClassID();
1950 };
1951 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TestTrans)
1952
1953 /**
1954  * Test Source-Target/Variant.
1955  */
1956 void TransliteratorTest::TestSTV(void) {
1957     int32_t ns = Transliterator::countAvailableSources();
1958     if (ns < 0 || ns > 255) {
1959         errln((UnicodeString)"FAIL: Bad source count: " + ns);
1960         return;
1961     }
1962     int32_t i, j;
1963     for (i=0; i<ns; ++i) {
1964         UnicodeString source;
1965         Transliterator::getAvailableSource(i, source);
1966         logln((UnicodeString)"" + i + ": " + source);
1967         if (source.length() == 0) {
1968             errln("FAIL: empty source");
1969             continue;
1970         }
1971         int32_t nt = Transliterator::countAvailableTargets(source);
1972         if (nt < 0 || nt > 255) {
1973             errln((UnicodeString)"FAIL: Bad target count: " + nt);
1974             continue;
1975         }
1976         for (int32_t j=0; j<nt; ++j) {
1977             UnicodeString target;
1978             Transliterator::getAvailableTarget(j, source, target);
1979             logln((UnicodeString)" " + j + ": " + target);
1980             if (target.length() == 0) {
1981                 errln("FAIL: empty target");
1982                 continue;
1983             }
1984             int32_t nv = Transliterator::countAvailableVariants(source, target);
1985             if (nv < 0 || nv > 255) {
1986                 errln((UnicodeString)"FAIL: Bad variant count: " + nv);
1987                 continue;
1988             }
1989             for (int32_t k=0; k<nv; ++k) {
1990                 UnicodeString variant;
1991                 Transliterator::getAvailableVariant(k, source, target, variant);
1992                 if (variant.length() == 0) {
1993                     logln((UnicodeString)"  " + k + ": <empty>");
1994                 } else {
1995                     logln((UnicodeString)"  " + k + ": " + variant);
1996                 }
1997             }
1998         }
1999     }
2000
2001     // Test registration
2002     const char* IDS[] = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2003     const char* FULL_IDS[] = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2004     const char* SOURCES[] = { NULL, "Seoridf", "Oewoir" };
2005     for (i=0; i<3; ++i) {
2006         Transliterator *t = new TestTrans(IDS[i]);
2007         if (t == 0) {
2008             errln("FAIL: out of memory");
2009             return;
2010         }
2011         if (t->getID() != IDS[i]) {
2012             errln((UnicodeString)"FAIL: ID mismatch for " + IDS[i]);
2013             delete t;
2014             return;
2015         }
2016         Transliterator::registerInstance(t);
2017         UErrorCode status = U_ZERO_ERROR;
2018         t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
2019         if (t == NULL) {
2020             errln((UnicodeString)"FAIL: Registration/creation failed for ID " +
2021                   IDS[i]);
2022         } else {
2023             logln((UnicodeString)"Ok: Registration/creation succeeded for ID " +
2024                   IDS[i]);
2025             delete t;
2026         }
2027         Transliterator::unregister(IDS[i]);
2028         t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
2029         if (t != NULL) {
2030             errln((UnicodeString)"FAIL: Unregistration failed for ID " +
2031                   IDS[i]);
2032             delete t;
2033         }
2034     }
2035
2036     // Make sure getAvailable API reflects removal
2037     int32_t n = Transliterator::countAvailableIDs();
2038     for (i=0; i<n; ++i) {
2039         UnicodeString id = Transliterator::getAvailableID(i);
2040         for (j=0; j<3; ++j) {
2041             if (id.caseCompare(FULL_IDS[j],0)==0) {
2042                 errln((UnicodeString)"FAIL: unregister(" + id + ") failed");
2043             }
2044         }
2045     }
2046     n = Transliterator::countAvailableTargets("Any");
2047     for (i=0; i<n; ++i) {
2048         UnicodeString t;
2049         Transliterator::getAvailableTarget(i, "Any", t);
2050         if (t.caseCompare(IDS[0],0)==0) {
2051             errln((UnicodeString)"FAIL: unregister(Any-" + t + ") failed");
2052         }
2053     }
2054     n = Transliterator::countAvailableSources();
2055     for (i=0; i<n; ++i) {
2056         UnicodeString s;
2057         Transliterator::getAvailableSource(i, s);
2058         for (j=0; j<3; ++j) {
2059             if (SOURCES[j] == NULL) continue;
2060             if (s.caseCompare(SOURCES[j],0)==0) {
2061                 errln((UnicodeString)"FAIL: unregister(" + s + "-*) failed");
2062             }
2063         }
2064     }
2065 }
2066
2067 /**
2068  * Test inverse of Greek-Latin; Title()
2069  */
2070 void TransliteratorTest::TestCompoundInverse(void) {
2071     UParseError parseError;
2072     UErrorCode status = U_ZERO_ERROR;
2073     Transliterator *t = Transliterator::createInstance
2074         ("Greek-Latin; Title()", UTRANS_REVERSE,parseError, status);
2075     if (t == 0) {
2076         errln("FAIL: createInstance");
2077         return;
2078     }
2079     UnicodeString exp("(Title);Latin-Greek");
2080     if (t->getID() == exp) {
2081         logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" +
2082               t->getID());
2083     } else {
2084         errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" +
2085               t->getID() + "\", expected \"" + exp + "\"");
2086     }
2087     delete t;
2088 }
2089
2090 /**
2091  * Test NFD chaining with RBT
2092  */
2093 void TransliteratorTest::TestNFDChainRBT() {
2094     UParseError pe;
2095     UErrorCode ec = U_ZERO_ERROR;
2096     Transliterator* t = Transliterator::createFromRules(
2097                                "TEST", "::NFD; aa > Q; a > q;",
2098                                UTRANS_FORWARD, pe, ec);
2099     if (t == NULL || U_FAILURE(ec)) {
2100         errln("FAIL: Transliterator::createFromRules failed with %s", u_errorName(ec));
2101         return;
2102     }
2103     expect(*t, "aa", "Q");
2104     delete t;
2105
2106     // TEMPORARY TESTS -- BEING DEBUGGED
2107 //=-    UnicodeString s, s2;
2108 //=-    t = Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, pe, ec);
2109 //=-    s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2110 //=-    s2 = CharsToUnicodeString("\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D");
2111 //=-    expect(*t, s, s2);
2112 //=-    delete t;
2113 //=-
2114 //=-    t = Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2115 //=-    expect(*t, s2, s);
2116 //=-    delete t;
2117 //=-
2118 //=-    t = Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2119 //=-    s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2120 //=-    expect(*t, s, s);
2121 //=-    delete t;
2122
2123 //    const char* source[] = {
2124 //        /*
2125 //        "\\u015Br\\u012Bmad",
2126 //        "bhagavadg\\u012Bt\\u0101",
2127 //        "adhy\\u0101ya",
2128 //        "arjuna",
2129 //        "vi\\u1E63\\u0101da",
2130 //        "y\\u014Dga",
2131 //        "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2132 //        "uv\\u0101cr\\u0325",
2133 //        */
2134 //        "rmk\\u1E63\\u0113t",
2135 //      //"dharmak\\u1E63\\u0113tr\\u0113",
2136 //        /*
2137 //        "kuruk\\u1E63\\u0113tr\\u0113",
2138 //        "samav\\u0113t\\u0101",
2139 //        "yuyutsava-\\u1E25",
2140 //        "m\\u0101mak\\u0101-\\u1E25",
2141 //     // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2142 //        "kimakurvata",
2143 //        "san\\u0304java",
2144 //        */
2145 //
2146 //        0
2147 //    };
2148 //    const char* expected[] = {
2149 //        /*
2150 //        "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2151 //        "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2152 //        "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2153 //        "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2154 //        "\\u0935\\u093f\\u0937\\u093e\\u0926",
2155 //        "\\u092f\\u094b\\u0917",
2156 //        "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2157 //        "\\u0909\\u0935\\u093E\\u091A\\u0943",
2158 //        */
2159 //        "\\u0927",
2160 //        //"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2161 //        /*
2162 //        "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2163 //        "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2164 //        "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2165 //        "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2166 //    //  "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2167 //        "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2168 //        "\\u0938\\u0902\\u091c\\u0935",
2169 //        */
2170 //        0
2171 //    };
2172 //    UErrorCode status = U_ZERO_ERROR;
2173 //    UParseError parseError;
2174 //    UnicodeString message;
2175 //    Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2176 //    Transliterator* devToLatinToDev=Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2177 //    if(U_FAILURE(status)){
2178 //        errln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2179 //        errln("PreContext: " + prettify(parseError.preContext) + "PostContext: " + prettify( parseError.postContext) );
2180 //        delete latinToDevToLatin;
2181 //        delete devToLatinToDev;
2182 //        return;
2183 //    }
2184 //    UnicodeString gotResult;
2185 //    for(int i= 0; source[i] != 0; i++){
2186 //        gotResult = source[i];
2187 //        expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2188 //        expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2189 //    }
2190 //    delete latinToDevToLatin;
2191 //    delete devToLatinToDev;
2192 }
2193
2194 /**
2195  * Inverse of "Null" should be "Null". (J21)
2196  */
2197 void TransliteratorTest::TestNullInverse() {
2198     UParseError pe;
2199     UErrorCode ec = U_ZERO_ERROR;
2200     Transliterator *t = Transliterator::createInstance("Null", UTRANS_FORWARD, pe, ec);
2201     if (t == 0 || U_FAILURE(ec)) {
2202         errln("FAIL: createInstance");
2203         return;
2204     }
2205     Transliterator *u = t->createInverse(ec);
2206     if (u == 0 || U_FAILURE(ec)) {
2207         errln("FAIL: createInverse");
2208         delete t;
2209         return;
2210     }
2211     if (u->getID() != "Null") {
2212         errln("FAIL: Inverse of Null should be Null");
2213     }
2214     delete t;
2215     delete u;
2216 }
2217
2218 /**
2219  * Check ID of inverse of alias. (J22)
2220  */
2221 void TransliteratorTest::TestAliasInverseID() {
2222     UnicodeString ID("Latin-Hangul", ""); // This should be any alias ID with an inverse
2223     UParseError pe;
2224     UErrorCode ec = U_ZERO_ERROR;
2225     Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
2226     if (t == 0 || U_FAILURE(ec)) {
2227         errln("FAIL: createInstance");
2228         return;
2229     }
2230     Transliterator *u = t->createInverse(ec);
2231     if (u == 0 || U_FAILURE(ec)) {
2232         errln("FAIL: createInverse");
2233         delete t;
2234         return;
2235     }
2236     UnicodeString exp = "Hangul-Latin";
2237     UnicodeString got = u->getID();
2238     if (got != exp) {
2239         errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
2240               ", expected " + exp);
2241     }
2242     delete t;
2243     delete u;
2244 }
2245
2246 /**
2247  * Test IDs of inverses of compound transliterators. (J20)
2248  */
2249 void TransliteratorTest::TestCompoundInverseID() {
2250     UnicodeString ID = "Latin-Jamo;NFC(NFD)";
2251     UParseError pe;
2252     UErrorCode ec = U_ZERO_ERROR;
2253     Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
2254     if (t == 0 || U_FAILURE(ec)) {
2255         errln("FAIL: createInstance");
2256         return;
2257     }
2258     Transliterator *u = t->createInverse(ec);
2259     if (u == 0 || U_FAILURE(ec)) {
2260         errln("FAIL: createInverse");
2261         delete t;
2262         return;
2263     }
2264     UnicodeString exp = "NFD(NFC);Jamo-Latin";
2265     UnicodeString got = u->getID();
2266     if (got != exp) {
2267         errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
2268               ", expected " + exp);
2269     }
2270     delete t;
2271     delete u;
2272 }
2273
2274 /**
2275  * Test undefined variable.
2276
2277  */
2278 void TransliteratorTest::TestUndefinedVariable() {
2279     UnicodeString rule = "$initial } a <> \\u1161;";
2280     UParseError pe;
2281     UErrorCode ec = U_ZERO_ERROR;
2282     Transliterator *t = Transliterator::createFromRules("<ID>", rule, UTRANS_FORWARD, pe, ec);
2283     delete t;
2284     if (U_FAILURE(ec)) {
2285         logln((UnicodeString)"OK: Got exception for " + rule + ", as expected: " +
2286               u_errorName(ec));
2287         return;
2288     }
2289     errln((UnicodeString)"Fail: bogus rule " + rule + " compiled with error " +
2290           u_errorName(ec));
2291 }
2292
2293 /**
2294  * Test empty context.
2295  */
2296 void TransliteratorTest::TestEmptyContext() {
2297     expect(" { a } > b;", "xay a ", "xby b ");
2298 }
2299
2300 /**
2301 * Test compound filter ID syntax
2302 */
2303 void TransliteratorTest::TestCompoundFilterID(void) {
2304     static const char* DATA[] = {
2305         // Col. 1 = ID or rule set (latter must start with #)
2306
2307         // = columns > 1 are null if expect col. 1 to be illegal =
2308
2309         // Col. 2 = direction, "F..." or "R..."
2310         // Col. 3 = source string
2311         // Col. 4 = exp result
2312
2313         "[abc]; [abc]", NULL, NULL, NULL, // multiple filters
2314         "Latin-Greek; [abc];", NULL, NULL, NULL, // misplaced filter
2315         "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\\u0392c",
2316         "[b]; (Lower); Latin-Greek; Upper(); ([\\u0392])", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2317         "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\\u0392c",
2318         "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\\u0392]);", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2319         NULL,
2320     };
2321
2322     for (int32_t i=0; DATA[i]; i+=4) {
2323         UnicodeString id = CharsToUnicodeString(DATA[i]);
2324         UTransDirection direction = (DATA[i+1] != NULL && DATA[i+1][0] == 'R') ?
2325             UTRANS_REVERSE : UTRANS_FORWARD;
2326         UnicodeString source;
2327         UnicodeString exp;
2328         if (DATA[i+2] != NULL) {
2329             source = CharsToUnicodeString(DATA[i+2]);
2330             exp = CharsToUnicodeString(DATA[i+3]);
2331         }
2332         UBool expOk = (DATA[i+1] != NULL);
2333         Transliterator* t = NULL;
2334         UParseError pe;
2335         UErrorCode ec = U_ZERO_ERROR;
2336         if (id.charAt(0) == 0x23/*#*/) {
2337             t = Transliterator::createFromRules("ID", id, direction, pe, ec);
2338         } else {
2339             t = Transliterator::createInstance(id, direction, pe, ec);
2340         }
2341         UBool ok = (t != NULL && U_SUCCESS(ec));
2342         UnicodeString transID;
2343         if (t!=0) {
2344             transID = t->getID();
2345         }
2346         else {
2347             transID = UnicodeString("NULL", "");
2348         }
2349         if (ok == expOk) {
2350             logln((UnicodeString)"Ok: " + id + " => " + transID + ", " +
2351                   u_errorName(ec));
2352             if (source.length() != 0) {
2353                 expect(*t, source, exp);
2354             }
2355             delete t;
2356         } else {
2357             errln((UnicodeString)"FAIL: " + id + " => " + transID + ", " +
2358                   u_errorName(ec));
2359         }
2360     }
2361 }
2362
2363 /**
2364  * Test new property set syntax
2365  */
2366 void TransliteratorTest::TestPropertySet() {
2367     expect(UNICODE_STRING_SIMPLE("a>A; \\p{Lu}>x; \\p{ANY}>y;"), "abcDEF", "Ayyxxx");
2368     expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9",
2369            "[ a stitch ]\n[ in time ]\r[ saves 9]");
2370 }
2371
2372 /**
2373  * Test various failure points of the new 2.0 engine.
2374  */
2375 void TransliteratorTest::TestNewEngine() {
2376     UParseError pe;
2377     UErrorCode ec = U_ZERO_ERROR;
2378     Transliterator *t = Transliterator::createInstance("Latin-Hiragana", UTRANS_FORWARD, pe, ec);
2379     if (t == 0 || U_FAILURE(ec)) {
2380         errln("FAIL: createInstance Latin-Hiragana");
2381         return;
2382     }
2383     // Katakana should be untouched
2384     expect(*t, CharsToUnicodeString("a\\u3042\\u30A2"),
2385            CharsToUnicodeString("\\u3042\\u3042\\u30A2"));
2386
2387     delete t;
2388
2389 #if 1
2390     // This test will only work if Transliterator.ROLLBACK is
2391     // true.  Otherwise, this test will fail, revealing a
2392     // limitation of global filters in incremental mode.
2393     Transliterator *a =
2394         Transliterator::createFromRules("a_to_A", "a > A;", UTRANS_FORWARD, pe, ec);
2395     Transliterator *A =
2396         Transliterator::createFromRules("A_to_b", "A > b;", UTRANS_FORWARD, pe, ec);
2397     if (U_FAILURE(ec)) {
2398         delete a;
2399         delete A;
2400         return;
2401     }
2402
2403     Transliterator* array[3];
2404     array[0] = a;
2405     array[1] = Transliterator::createInstance("NFD", UTRANS_FORWARD, pe, ec);
2406     array[2] = A;
2407     if (U_FAILURE(ec)) {
2408         errln("FAIL: createInstance NFD");
2409         delete a;
2410         delete A;
2411         delete array[1];
2412         return;
2413     }
2414
2415     t = new CompoundTransliterator(array, 3, new UnicodeSet("[:Ll:]", ec));
2416     if (U_FAILURE(ec)) {
2417         errln("FAIL: UnicodeSet constructor");
2418         delete a;
2419         delete A;
2420         delete array[1];
2421         delete t;
2422         return;
2423     }
2424
2425     expect(*t, "aAaA", "bAbA");
2426
2427     assertTrue("countElements", t->countElements() == 3);
2428     assertEquals("getElement(0)", t->getElement(0, ec).getID(), "a_to_A");
2429     assertEquals("getElement(1)", t->getElement(1, ec).getID(), "NFD");
2430     assertEquals("getElement(2)", t->getElement(2, ec).getID(), "A_to_b");
2431     assertSuccess("getElement", ec);
2432
2433     delete a;
2434     delete A;
2435     delete array[1];
2436     delete t;
2437 #endif
2438
2439     expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;",
2440            "a",
2441            "ax");
2442
2443     UnicodeString gr = CharsToUnicodeString(
2444         "$ddot = \\u0308 ;"
2445         "$lcgvowel = [\\u03b1\\u03b5\\u03b7\\u03b9\\u03bf\\u03c5\\u03c9] ;"
2446         "$rough = \\u0314 ;"
2447         "($lcgvowel+ $ddot?) $rough > h | $1 ;"
2448         "\\u03b1 <> a ;"
2449         "$rough <> h ;");
2450
2451     expect(gr, CharsToUnicodeString("\\u03B1\\u0314"), "ha");
2452 }
2453
2454 /**
2455  * Test quantified segment behavior.  We want:
2456  * ([abc])+ > x $1 x; applied to "cba" produces "xax"
2457  */
2458 void TransliteratorTest::TestQuantifiedSegment(void) {
2459     // The normal case
2460     expect("([abc]+) > x $1 x;", "cba", "xcbax");
2461
2462     // The tricky case; the quantifier is around the segment
2463     expect("([abc])+ > x $1 x;", "cba", "xax");
2464
2465     // Tricky case in reverse direction
2466     expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax");
2467
2468     // Check post-context segment
2469     expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba");
2470
2471     // Test toRule/toPattern for non-quantified segment.
2472     // Careful with spacing here.
2473     UnicodeString r("([a-c]){q} > x $1 x;");
2474     UParseError pe;
2475     UErrorCode ec = U_ZERO_ERROR;
2476     Transliterator* t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
2477     if (U_FAILURE(ec)) {
2478         errln("FAIL: createFromRules");
2479         delete t;
2480         return;
2481     }
2482     UnicodeString rr;
2483     t->toRules(rr, TRUE);
2484     if (r != rr) {
2485         errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
2486     } else {
2487         logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
2488     }
2489     delete t;
2490
2491     // Test toRule/toPattern for quantified segment.
2492     // Careful with spacing here.
2493     r = "([a-c])+{q} > x $1 x;";
2494     t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
2495     if (U_FAILURE(ec)) {
2496         errln("FAIL: createFromRules");
2497         delete t;
2498         return;
2499     }
2500     t->toRules(rr, TRUE);
2501     if (r != rr) {
2502         errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
2503     } else {
2504         logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
2505     }
2506     delete t;
2507 }
2508
2509 //======================================================================
2510 // Ram's tests
2511 //======================================================================
2512 void TransliteratorTest::TestDevanagariLatinRT(){
2513     const int MAX_LEN= 52;
2514     const char* const source[MAX_LEN] = {
2515         "bh\\u0101rata",
2516         "kra",
2517         "k\\u1E63a",
2518         "khra",
2519         "gra",
2520         "\\u1E45ra",
2521         "cra",
2522         "chra",
2523         "j\\u00F1a",
2524         "jhra",
2525         "\\u00F1ra",
2526         "\\u1E6Dya",
2527         "\\u1E6Dhra",
2528         "\\u1E0Dya",
2529       //"r\\u0323ya", // \u095c is not valid in Devanagari
2530         "\\u1E0Dhya",
2531         "\\u1E5Bhra",
2532         "\\u1E47ra",
2533         "tta",
2534         "thra",
2535         "dda",
2536         "dhra",
2537         "nna",
2538         "pra",
2539         "phra",
2540         "bra",
2541         "bhra",
2542         "mra",
2543         "\\u1E49ra",
2544       //"l\\u0331ra",
2545         "yra",
2546         "\\u1E8Fra",
2547       //"l-",
2548         "vra",
2549         "\\u015Bra",
2550         "\\u1E63ra",
2551         "sra",
2552         "hma",
2553         "\\u1E6D\\u1E6Da",
2554         "\\u1E6D\\u1E6Dha",
2555         "\\u1E6Dh\\u1E6Dha",
2556         "\\u1E0D\\u1E0Da",
2557         "\\u1E0D\\u1E0Dha",
2558         "\\u1E6Dya",
2559         "\\u1E6Dhya",
2560         "\\u1E0Dya",
2561         "\\u1E0Dhya",
2562         // Not roundtrippable --
2563         // \\u0939\\u094d\\u094d\\u092E  - hma
2564         // \\u0939\\u094d\\u092E         - hma
2565         // CharsToUnicodeString("hma"),
2566         "hya",
2567         "\\u015Br\\u0325",
2568         "\\u015Bca",
2569         "\\u0115",
2570         "san\\u0304j\\u012Bb s\\u0113nagupta",
2571         "\\u0101nand vaddir\\u0101ju",
2572         "\\u0101",
2573         "a"
2574     };
2575     const char* const expected[MAX_LEN] = {
2576         "\\u092D\\u093E\\u0930\\u0924",   /* bha\\u0304rata */
2577         "\\u0915\\u094D\\u0930",          /* kra         */
2578         "\\u0915\\u094D\\u0937",          /* ks\\u0323a  */
2579         "\\u0916\\u094D\\u0930",          /* khra        */
2580         "\\u0917\\u094D\\u0930",          /* gra         */
2581         "\\u0919\\u094D\\u0930",          /* n\\u0307ra  */
2582         "\\u091A\\u094D\\u0930",          /* cra         */
2583         "\\u091B\\u094D\\u0930",          /* chra        */
2584         "\\u091C\\u094D\\u091E",          /* jn\\u0303a  */
2585         "\\u091D\\u094D\\u0930",          /* jhra        */
2586         "\\u091E\\u094D\\u0930",          /* n\\u0303ra  */
2587         "\\u091F\\u094D\\u092F",          /* t\\u0323ya  */
2588         "\\u0920\\u094D\\u0930",          /* t\\u0323hra */
2589         "\\u0921\\u094D\\u092F",          /* d\\u0323ya  */
2590       //"\\u095C\\u094D\\u092F",        /* r\\u0323ya  */ // \u095c is not valid in Devanagari
2591         "\\u0922\\u094D\\u092F",          /* d\\u0323hya */
2592         "\\u0922\\u093C\\u094D\\u0930",   /* r\\u0323hra */
2593         "\\u0923\\u094D\\u0930",          /* n\\u0323ra  */
2594         "\\u0924\\u094D\\u0924",          /* tta         */
2595         "\\u0925\\u094D\\u0930",          /* thra        */
2596         "\\u0926\\u094D\\u0926",          /* dda         */
2597         "\\u0927\\u094D\\u0930",          /* dhra        */
2598         "\\u0928\\u094D\\u0928",          /* nna         */
2599         "\\u092A\\u094D\\u0930",          /* pra         */
2600         "\\u092B\\u094D\\u0930",          /* phra        */
2601         "\\u092C\\u094D\\u0930",          /* bra         */
2602         "\\u092D\\u094D\\u0930",          /* bhra        */
2603         "\\u092E\\u094D\\u0930",          /* mra         */
2604         "\\u0929\\u094D\\u0930",          /* n\\u0331ra  */
2605       //"\\u0934\\u094D\\u0930",        /* l\\u0331ra  */
2606         "\\u092F\\u094D\\u0930",          /* yra         */
2607         "\\u092F\\u093C\\u094D\\u0930",   /* y\\u0307ra  */
2608       //"l-",
2609         "\\u0935\\u094D\\u0930",          /* vra         */
2610         "\\u0936\\u094D\\u0930",          /* s\\u0301ra  */
2611         "\\u0937\\u094D\\u0930",          /* s\\u0323ra  */
2612         "\\u0938\\u094D\\u0930",          /* sra         */
2613         "\\u0939\\u094d\\u092E",          /* hma         */
2614         "\\u091F\\u094D\\u091F",          /* t\\u0323t\\u0323a  */
2615         "\\u091F\\u094D\\u0920",          /* t\\u0323t\\u0323ha */
2616         "\\u0920\\u094D\\u0920",          /* t\\u0323ht\\u0323ha*/
2617         "\\u0921\\u094D\\u0921",          /* d\\u0323d\\u0323a  */
2618         "\\u0921\\u094D\\u0922",          /* d\\u0323d\\u0323ha */
2619         "\\u091F\\u094D\\u092F",          /* t\\u0323ya  */
2620         "\\u0920\\u094D\\u092F",          /* t\\u0323hya */
2621         "\\u0921\\u094D\\u092F",          /* d\\u0323ya  */
2622         "\\u0922\\u094D\\u092F",          /* d\\u0323hya */
2623      // "hma",                         /* hma         */
2624         "\\u0939\\u094D\\u092F",          /* hya         */
2625         "\\u0936\\u0943",                 /* s\\u0301r\\u0325a  */
2626         "\\u0936\\u094D\\u091A",          /* s\\u0301ca  */
2627         "\\u090d",                        /* e\\u0306    */
2628         "\\u0938\\u0902\\u091C\\u0940\\u092C\\u094D \\u0938\\u0947\\u0928\\u0917\\u0941\\u092A\\u094D\\u0924",
2629         "\\u0906\\u0928\\u0902\\u0926\\u094D \\u0935\\u0926\\u094D\\u0926\\u093F\\u0930\\u093E\\u091C\\u0941",
2630         "\\u0906",
2631         "\\u0905",
2632     };
2633     UErrorCode status = U_ZERO_ERROR;
2634     UParseError parseError;
2635     UnicodeString message;
2636     Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2637     Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2638     if(U_FAILURE(status)){
2639         errln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2640         errln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2641         return;
2642     }
2643     UnicodeString gotResult;
2644     for(int i= 0; i<MAX_LEN; i++){
2645         gotResult = source[i];
2646         expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2647         expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2648     }
2649     delete latinToDev;
2650     delete devToLatin;
2651 }
2652
2653 void TransliteratorTest::TestTeluguLatinRT(){
2654     const int MAX_LEN=10;
2655     const char* const source[MAX_LEN] = {
2656         "raghur\\u0101m vi\\u015Bvan\\u0101dha",                         /* Raghuram Viswanadha    */
2657         "\\u0101nand vaddir\\u0101ju",                                   /* Anand Vaddiraju        */
2658         "r\\u0101j\\u012Bv ka\\u015Barab\\u0101da",                      /* Rajeev Kasarabada      */
2659         "san\\u0304j\\u012Bv ka\\u015Barab\\u0101da",                    /* sanjeev kasarabada     */
2660         "san\\u0304j\\u012Bb sen'gupta",                                 /* sanjib sengupata       */
2661         "amar\\u0113ndra hanum\\u0101nula",                              /* Amarendra hanumanula   */
2662         "ravi kum\\u0101r vi\\u015Bvan\\u0101dha",                       /* Ravi Kumar Viswanadha  */
2663         "\\u0101ditya kandr\\u0113gula",                                 /* Aditya Kandregula      */
2664         "\\u015Br\\u012Bdhar ka\\u1E47\\u1E6Dama\\u015Be\\u1E6D\\u1E6Di",/* Shridhar Kantamsetty   */
2665         "m\\u0101dhav de\\u015Be\\u1E6D\\u1E6Di"                         /* Madhav Desetty         */
2666     };
2667
2668     const char* const expected[MAX_LEN] = {
2669         "\\u0c30\\u0c18\\u0c41\\u0c30\\u0c3e\\u0c2e\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2670         "\\u0c06\\u0c28\\u0c02\\u0c26\\u0c4d \\u0C35\\u0C26\\u0C4D\\u0C26\\u0C3F\\u0C30\\u0C3E\\u0C1C\\u0C41",
2671         "\\u0c30\\u0c3e\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2672         "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2673         "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c2c\\u0c4d \\u0c38\\u0c46\\u0c28\\u0c4d\\u0c17\\u0c41\\u0c2a\\u0c4d\\u0c24",
2674         "\\u0c05\\u0c2e\\u0c30\\u0c47\\u0c02\\u0c26\\u0c4d\\u0c30 \\u0c39\\u0c28\\u0c41\\u0c2e\\u0c3e\\u0c28\\u0c41\\u0c32",
2675         "\\u0c30\\u0c35\\u0c3f \\u0c15\\u0c41\\u0c2e\\u0c3e\\u0c30\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2676         "\\u0c06\\u0c26\\u0c3f\\u0c24\\u0c4d\\u0c2f \\u0C15\\u0C02\\u0C26\\u0C4D\\u0C30\\u0C47\\u0C17\\u0C41\\u0c32",
2677         "\\u0c36\\u0c4d\\u0c30\\u0c40\\u0C27\\u0C30\\u0C4D \\u0c15\\u0c02\\u0c1f\\u0c2e\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2678         "\\u0c2e\\u0c3e\\u0c27\\u0c35\\u0c4d \\u0c26\\u0c46\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2679     };
2680
2681     UErrorCode status = U_ZERO_ERROR;
2682     UParseError parseError;
2683     UnicodeString message;
2684     Transliterator* latinToDev=Transliterator::createInstance("Latin-Telugu", UTRANS_FORWARD, parseError, status);
2685     Transliterator* devToLatin=Transliterator::createInstance("Telugu-Latin", UTRANS_FORWARD, parseError, status);
2686     if(U_FAILURE(status)){
2687         errln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2688         errln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2689         return;
2690     }
2691     UnicodeString gotResult;
2692     for(int i= 0; i<MAX_LEN; i++){
2693         gotResult = source[i];
2694         expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2695         expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2696     }
2697     delete latinToDev;
2698     delete devToLatin;
2699 }
2700
2701 void TransliteratorTest::TestSanskritLatinRT(){
2702     const int MAX_LEN =16;
2703     const char* const source[MAX_LEN] = {
2704         "rmk\\u1E63\\u0113t",
2705         "\\u015Br\\u012Bmad",
2706         "bhagavadg\\u012Bt\\u0101",
2707         "adhy\\u0101ya",
2708         "arjuna",
2709         "vi\\u1E63\\u0101da",
2710         "y\\u014Dga",
2711         "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2712         "uv\\u0101cr\\u0325",
2713         "dharmak\\u1E63\\u0113tr\\u0113",
2714         "kuruk\\u1E63\\u0113tr\\u0113",
2715         "samav\\u0113t\\u0101",
2716         "yuyutsava\\u1E25",
2717         "m\\u0101mak\\u0101\\u1E25",
2718     // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2719         "kimakurvata",
2720         "san\\u0304java",
2721     };
2722     const char* const expected[MAX_LEN] = {
2723         "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2724         "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2725         "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2726         "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2727         "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2728         "\\u0935\\u093f\\u0937\\u093e\\u0926",
2729         "\\u092f\\u094b\\u0917",
2730         "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2731         "\\u0909\\u0935\\u093E\\u091A\\u0943",
2732         "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2733         "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2734         "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2735         "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2736         "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2737     //"\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2738         "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2739         "\\u0938\\u0902\\u091c\\u0935",
2740     };
2741     UErrorCode status = U_ZERO_ERROR;
2742     UParseError parseError;
2743     UnicodeString message;
2744     Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2745     Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2746     if(U_FAILURE(status)){
2747         errln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2748         errln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2749         return;
2750     }
2751     UnicodeString gotResult;
2752     for(int i= 0; i<MAX_LEN; i++){
2753         gotResult = source[i];
2754         expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2755         expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2756     }
2757     delete latinToDev;
2758     delete devToLatin;
2759 }
2760
2761
2762 void TransliteratorTest::TestCompoundLatinRT(){
2763     const char* const source[] = {
2764         "rmk\\u1E63\\u0113t",
2765         "\\u015Br\\u012Bmad",
2766         "bhagavadg\\u012Bt\\u0101",
2767         "adhy\\u0101ya",
2768         "arjuna",
2769         "vi\\u1E63\\u0101da",
2770         "y\\u014Dga",
2771         "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2772         "uv\\u0101cr\\u0325",
2773         "dharmak\\u1E63\\u0113tr\\u0113",
2774         "kuruk\\u1E63\\u0113tr\\u0113",
2775         "samav\\u0113t\\u0101",
2776         "yuyutsava\\u1E25",
2777         "m\\u0101mak\\u0101\\u1E25",
2778      // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2779         "kimakurvata",
2780         "san\\u0304java"
2781     };
2782     const int MAX_LEN = sizeof(source)/sizeof(source[0]);
2783     const char* const expected[MAX_LEN] = {
2784         "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2785         "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2786         "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2787         "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2788         "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2789         "\\u0935\\u093f\\u0937\\u093e\\u0926",
2790         "\\u092f\\u094b\\u0917",
2791         "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2792         "\\u0909\\u0935\\u093E\\u091A\\u0943",
2793         "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2794         "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2795         "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2796         "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2797         "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2798     //  "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2799         "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2800         "\\u0938\\u0902\\u091c\\u0935"
2801     };
2802     if(MAX_LEN != sizeof(expected)/sizeof(expected[0])) {
2803         errln("error in TestCompoundLatinRT: source[] and expected[] have different lengths!");
2804         return;
2805     }
2806
2807     UErrorCode status = U_ZERO_ERROR;
2808     UParseError parseError;
2809     UnicodeString message;
2810     Transliterator* devToLatinToDev  =Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2811     Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2812     Transliterator* devToTelToDev    =Transliterator::createInstance("Devanagari-Telugu;Telugu-Devanagari", UTRANS_FORWARD, parseError, status);
2813     Transliterator* latinToTelToLatin=Transliterator::createInstance("Latin-Telugu;Telugu-Latin", UTRANS_FORWARD, parseError, status);
2814
2815     if(U_FAILURE(status)){
2816         errln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2817         errln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2818         return;
2819     }
2820     UnicodeString gotResult;
2821     for(int i= 0; i<MAX_LEN; i++){
2822         gotResult = source[i];
2823         expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2824         expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2825         expect(*latinToTelToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2826
2827     }
2828     delete(latinToDevToLatin);
2829     delete(devToLatinToDev);
2830     delete(devToTelToDev);
2831     delete(latinToTelToLatin);
2832 }
2833
2834 /**
2835  * Test Gurmukhi-Devanagari Tippi and Bindi
2836  */
2837 void TransliteratorTest::TestGurmukhiDevanagari(){
2838     // the rule says:
2839     // (\u0902) (when preceded by vowel)      --->  (\u0A02)
2840     // (\u0902) (when preceded by consonant)  --->  (\u0A70)
2841     UErrorCode status = U_ZERO_ERROR;
2842     UnicodeSet vowel(UnicodeString("[\\u0905-\\u090A \\u090F\\u0910\\u0913\\u0914 \\u093e-\\u0942\\u0947\\u0948\\u094B\\u094C\\u094D]", -1, US_INV).unescape(), status);
2843     UnicodeSet non_vowel(UnicodeString("[\\u0915-\\u0928\\u092A-\\u0930]", -1, US_INV).unescape(), status);
2844     UParseError parseError;
2845
2846     UnicodeSetIterator vIter(vowel);
2847     UnicodeSetIterator nvIter(non_vowel);
2848     Transliterator* trans = Transliterator::createInstance("Devanagari-Gurmukhi",UTRANS_FORWARD, parseError, status);
2849     if(U_FAILURE(status)) {
2850       errln("Error creating transliterator %s", u_errorName(status));
2851       delete trans;
2852       return;
2853     }
2854     UnicodeString src (" \\u0902", -1, US_INV);
2855     UnicodeString expected(" \\u0A02", -1, US_INV);
2856     src = src.unescape();
2857     expected= expected.unescape();
2858
2859     while(vIter.next()){
2860         src.setCharAt(0,(UChar) vIter.getCodepoint());
2861         expected.setCharAt(0,(UChar) (vIter.getCodepoint()+0x0100));
2862         expect(*trans,src,expected);
2863     }
2864
2865     expected.setCharAt(1,0x0A70);
2866     while(nvIter.next()){
2867         //src.setCharAt(0,(char) nvIter.codepoint);
2868         src.setCharAt(0,(UChar)nvIter.getCodepoint());
2869         expected.setCharAt(0,(UChar) (nvIter.getCodepoint()+0x0100));
2870         expect(*trans,src,expected);
2871     }
2872     delete trans;
2873 }
2874 /**
2875  * Test instantiation from a locale.
2876  */
2877 void TransliteratorTest::TestLocaleInstantiation(void) {
2878     UParseError pe;
2879     UErrorCode ec = U_ZERO_ERROR;
2880     Transliterator *t = Transliterator::createInstance("ru_RU-Latin", UTRANS_FORWARD, pe, ec);
2881     if (U_FAILURE(ec)) {
2882         errln("FAIL: createInstance(ru_RU-Latin)");
2883         delete t;
2884         return;
2885     }
2886     expect(*t, CharsToUnicodeString("\\u0430"), "a");
2887     delete t;
2888
2889     t = Transliterator::createInstance("en-el", UTRANS_FORWARD, pe, ec);
2890     if (U_FAILURE(ec)) {
2891         errln("FAIL: createInstance(en-el)");
2892         delete t;
2893         return;
2894     }
2895     expect(*t, "a", CharsToUnicodeString("\\u03B1"));
2896     delete t;
2897 }
2898
2899 /**
2900  * Test title case handling of accent (should ignore accents)
2901  */
2902 void TransliteratorTest::TestTitleAccents(void) {
2903     UParseError pe;
2904     UErrorCode ec = U_ZERO_ERROR;
2905     Transliterator *t = Transliterator::createInstance("Title", UTRANS_FORWARD, pe, ec);
2906     if (U_FAILURE(ec)) {
2907         errln("FAIL: createInstance(Title)");
2908         delete t;
2909         return;
2910     }
2911     expect(*t, CharsToUnicodeString("a\\u0300b can't abe"), CharsToUnicodeString("A\\u0300b Can't Abe"));
2912     delete t;
2913 }
2914
2915 /**
2916  * Basic test of a locale resource based rule.
2917  */
2918 void TransliteratorTest::TestLocaleResource() {
2919     const char* DATA[] = {
2920         // id                    from               to
2921         //"Latin-Greek/UNGEGN",    "b",               "\\u03bc\\u03c0",
2922         "Latin-el",              "b",               "\\u03bc\\u03c0",
2923         "Latin-Greek",           "b",               "\\u03B2",
2924         "Greek-Latin/UNGEGN",    "\\u03B2",         "v",
2925         "el-Latin",              "\\u03B2",         "v",
2926         "Greek-Latin",           "\\u03B2",         "b",
2927     };
2928     const int32_t DATA_length = sizeof(DATA) / sizeof(DATA[0]);
2929     for (int32_t i=0; i<DATA_length; i+=3) {
2930         UParseError pe;
2931         UErrorCode ec = U_ZERO_ERROR;
2932         Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, pe, ec);
2933         if (U_FAILURE(ec)) {
2934             errln((UnicodeString)"FAIL: createInstance(" + DATA[i] + ")");
2935             delete t;
2936             continue;
2937         }
2938         expect(*t, CharsToUnicodeString(DATA[i+1]),
2939                CharsToUnicodeString(DATA[i+2]));
2940         delete t;
2941     }
2942 }
2943
2944 /**
2945  * Make sure parse errors reference the right line.
2946  */
2947 void TransliteratorTest::TestParseError() {
2948     static const char* rule =
2949         "a > b;\n"
2950         "# more stuff\n"
2951         "d << b;";
2952     UErrorCode ec = U_ZERO_ERROR;
2953     UParseError pe;
2954     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
2955     delete t;
2956     if (U_FAILURE(ec)) {
2957         UnicodeString err(pe.preContext);
2958         err.append((UChar)124/*|*/).append(pe.postContext);
2959         if (err.indexOf("d << b") >= 0) {
2960             logln("Ok: " + err);
2961         } else {
2962             errln("FAIL: " + err);
2963         }
2964     }
2965     else {
2966         errln("FAIL: no syntax error");
2967     }
2968     static const char* maskingRule =
2969         "a>x;\n"
2970         "# more stuff\n"
2971         "ab>y;";
2972     ec = U_ZERO_ERROR;
2973     delete Transliterator::createFromRules("ID", maskingRule, UTRANS_FORWARD, pe, ec);
2974     if (ec != U_RULE_MASK_ERROR) {
2975         errln("FAIL: returned %s instead of U_RULE_MASK_ERROR", u_errorName(ec));
2976     }
2977     else if (UnicodeString("a > x;") != UnicodeString(pe.preContext)) {
2978         errln("FAIL: did not get expected precontext");
2979     }
2980     else if (UnicodeString("ab > y;") != UnicodeString(pe.postContext)) {
2981         errln("FAIL: did not get expected postcontext");
2982     }
2983 }
2984
2985 /**
2986  * Make sure sets on output are disallowed.
2987  */
2988 void TransliteratorTest::TestOutputSet() {
2989     UnicodeString rule = "$set = [a-cm-n]; b > $set;";
2990     UErrorCode ec = U_ZERO_ERROR;
2991     UParseError pe;
2992     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
2993     delete t;
2994     if (U_FAILURE(ec)) {
2995         UnicodeString err(pe.preContext);
2996         err.append((UChar)124/*|*/).append(pe.postContext);
2997         logln("Ok: " + err);
2998         return;
2999     }
3000     errln("FAIL: No syntax error");
3001 }
3002
3003 /**
3004  * Test the use variable range pragma, making sure that use of
3005  * variable range characters is detected and flagged as an error.
3006  */
3007 void TransliteratorTest::TestVariableRange() {
3008     UnicodeString rule = "use variable range 0x70 0x72; a > A; b > B; q > Q;";
3009     UErrorCode ec = U_ZERO_ERROR;
3010     UParseError pe;
3011     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3012     delete t;
3013     if (U_FAILURE(ec)) {
3014         UnicodeString err(pe.preContext);
3015         err.append((UChar)124/*|*/).append(pe.postContext);
3016         logln("Ok: " + err);
3017         return;
3018     }
3019     errln("FAIL: No syntax error");
3020 }
3021
3022 /**
3023  * Test invalid post context error handling
3024  */
3025 void TransliteratorTest::TestInvalidPostContext() {
3026     UnicodeString rule = "a}b{c>d;";
3027     UErrorCode ec = U_ZERO_ERROR;
3028     UParseError pe;
3029     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3030     delete t;
3031     if (U_FAILURE(ec)) {
3032         UnicodeString err(pe.preContext);
3033         err.append((UChar)124/*|*/).append(pe.postContext);
3034         if (err.indexOf("a}b{c") >= 0) {
3035             logln("Ok: " + err);
3036         } else {
3037             errln("FAIL: " + err);
3038         }
3039         return;
3040     }
3041     errln("FAIL: No syntax error");
3042 }
3043
3044 /**
3045  * Test ID form variants
3046  */
3047 void TransliteratorTest::TestIDForms() {
3048     const char* DATA[] = {
3049         "NFC", NULL, "NFD",
3050         "nfd", NULL, "NFC", // make sure case is ignored
3051         "Any-NFKD", NULL, "Any-NFKC",
3052         "Null", NULL, "Null",
3053         "-nfkc", "nfkc", "NFKD",
3054         "-nfkc/", "nfkc", "NFKD",
3055         "Latin-Greek/UNGEGN", NULL, "Greek-Latin/UNGEGN",
3056         "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN",
3057         "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali",
3058         "Source-", NULL, NULL,
3059         "Source/Variant-", NULL, NULL,
3060         "Source-/Variant", NULL, NULL,
3061         "/Variant", NULL, NULL,
3062         "/Variant-", NULL, NULL,
3063         "-/Variant", NULL, NULL,
3064         "-/", NULL, NULL,
3065         "-", NULL, NULL,
3066         "/", NULL, NULL,
3067     };
3068     const int32_t DATA_length = sizeof(DATA)/sizeof(DATA[0]);
3069
3070     for (int32_t i=0; i<DATA_length; i+=3) {
3071         const char* ID = DATA[i];
3072         const char* expID = DATA[i+1];
3073         const char* expInvID = DATA[i+2];
3074         UBool expValid = (expInvID != NULL);
3075         if (expID == NULL) {
3076             expID = ID;
3077         }
3078         UParseError pe;
3079         UErrorCode ec = U_ZERO_ERROR;
3080         Transliterator *t =
3081             Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
3082         if (U_FAILURE(ec)) {
3083             if (!expValid) {
3084                 logln((UnicodeString)"Ok: getInstance(" + ID +") => " + u_errorName(ec));
3085             } else {
3086                 errln((UnicodeString)"FAIL: Couldn't create " + ID);
3087             }
3088             delete t;
3089             continue;
3090         }
3091         Transliterator *u = t->createInverse(ec);
3092         if (U_FAILURE(ec)) {
3093             errln((UnicodeString)"FAIL: Couldn't create inverse of " + ID);
3094             delete t;
3095             delete u;
3096             continue;
3097         }
3098         if (t->getID() == expID &&
3099             u->getID() == expInvID) {
3100             logln((UnicodeString)"Ok: " + ID + ".getInverse() => " + expInvID);
3101         } else {
3102             errln((UnicodeString)"FAIL: getInstance(" + ID + ") => " +
3103                   t->getID() + " x getInverse() => " + u->getID() +
3104                   ", expected " + expInvID);
3105         }
3106         delete t;
3107         delete u;
3108     }
3109 }
3110
3111 static const UChar SPACE[]   = {32,0};
3112 static const UChar NEWLINE[] = {10,0};
3113 static const UChar RETURN[]  = {13,0};
3114 static const UChar EMPTY[]   = {0};
3115
3116 void TransliteratorTest::checkRules(const UnicodeString& label, Transliterator& t2,
3117                                     const UnicodeString& testRulesForward) {
3118     UnicodeString rules2; t2.toRules(rules2, TRUE);
3119     //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
3120     rules2.findAndReplace(SPACE, EMPTY);
3121     rules2.findAndReplace(NEWLINE, EMPTY);
3122     rules2.findAndReplace(RETURN, EMPTY);
3123
3124     UnicodeString testRules(testRulesForward); testRules.findAndReplace(SPACE, EMPTY);
3125
3126     if (rules2 != testRules) {
3127         errln(label);
3128         logln((UnicodeString)"GENERATED RULES: " + rules2);
3129         logln((UnicodeString)"SHOULD BE:       " + testRulesForward);
3130     }
3131 }
3132
3133 /**
3134  * Mark's toRules test.
3135  */
3136 void TransliteratorTest::TestToRulesMark() {
3137     const char* testRules =
3138         "::[[:Latin:][:Mark:]];"
3139         "::NFKD (NFC);"
3140         "::Lower (Lower);"
3141         "a <> \\u03B1;" // alpha
3142         "::NFKC (NFD);"
3143         "::Upper (Lower);"
3144         "::Lower ();"
3145         "::([[:Greek:][:Mark:]]);"
3146         ;
3147     const char* testRulesForward =
3148         "::[[:Latin:][:Mark:]];"
3149         "::NFKD(NFC);"
3150         "::Lower(Lower);"
3151         "a > \\u03B1;"
3152         "::NFKC(NFD);"
3153         "::Upper (Lower);"
3154         "::Lower ();"
3155         ;
3156     const char* testRulesBackward =
3157         "::[[:Greek:][:Mark:]];"
3158         "::Lower (Upper);"
3159         "::NFD(NFKC);"
3160         "\\u03B1 > a;"
3161         "::Lower(Lower);"
3162         "::NFC(NFKD);"
3163         ;
3164     UnicodeString source = CharsToUnicodeString("\\u00E1"); // a-acute
3165     UnicodeString target = CharsToUnicodeString("\\u03AC"); // alpha-acute
3166
3167     UParseError pe;
3168     UErrorCode ec = U_ZERO_ERROR;
3169     Transliterator *t2 = Transliterator::createFromRules("source-target", UnicodeString(testRules, -1, US_INV), UTRANS_FORWARD, pe, ec);
3170     Transliterator *t3 = Transliterator::createFromRules("target-source", UnicodeString(testRules, -1, US_INV), UTRANS_REVERSE, pe, ec);
3171
3172     if (U_FAILURE(ec)) {
3173         delete t2;
3174         delete t3;
3175         errln((UnicodeString)"FAIL: createFromRules => " + u_errorName(ec));
3176         return;
3177     }
3178
3179     expect(*t2, source, target);
3180     expect(*t3, target, source);
3181
3182     checkRules("Failed toRules FORWARD", *t2, UnicodeString(testRulesForward, -1, US_INV));
3183     checkRules("Failed toRules BACKWARD", *t3, UnicodeString(testRulesBackward, -1, US_INV));
3184
3185     delete t2;
3186     delete t3;
3187 }
3188
3189 /**
3190  * Test Escape and Unescape transliterators.
3191  */
3192 void TransliteratorTest::TestEscape() {
3193     UParseError pe;
3194     UErrorCode ec;
3195     Transliterator *t;
3196
3197     ec = U_ZERO_ERROR;
3198     t = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, pe, ec);
3199     if (U_FAILURE(ec)) {
3200         errln((UnicodeString)"FAIL: createInstance");
3201     } else {
3202         expect(*t,
3203                UNICODE_STRING_SIMPLE("\\x{40}\\U00000031&#x32;&#81;"),
3204                "@12Q");
3205     }
3206     delete t;
3207
3208     ec = U_ZERO_ERROR;
3209     t = Transliterator::createInstance("Any-Hex/C", UTRANS_FORWARD, pe, ec);
3210     if (U_FAILURE(ec)) {
3211         errln((UnicodeString)"FAIL: createInstance");
3212     } else {
3213         expect(*t,
3214                CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3215                UNICODE_STRING_SIMPLE("\\u0041\\U0010BEEF\\uFEED"));
3216     }
3217     delete t;
3218
3219     ec = U_ZERO_ERROR;
3220     t = Transliterator::createInstance("Any-Hex/Java", UTRANS_FORWARD, pe, ec);
3221     if (U_FAILURE(ec)) {
3222         errln((UnicodeString)"FAIL: createInstance");
3223     } else {
3224         expect(*t,
3225                CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3226                UNICODE_STRING_SIMPLE("\\u0041\\uDBEF\\uDEEF\\uFEED"));
3227     }
3228     delete t;
3229
3230     ec = U_ZERO_ERROR;
3231     t = Transliterator::createInstance("Any-Hex/Perl", UTRANS_FORWARD, pe, ec);
3232     if (U_FAILURE(ec)) {
3233         errln((UnicodeString)"FAIL: createInstance");
3234     } else {
3235         expect(*t,
3236                CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3237                UNICODE_STRING_SIMPLE("\\x{41}\\x{10BEEF}\\x{FEED}"));
3238     }
3239     delete t;
3240 }
3241
3242
3243 void TransliteratorTest::TestAnchorMasking(){
3244     UnicodeString rule ("^a > Q; a > q;");
3245     UErrorCode status= U_ZERO_ERROR;
3246     UParseError parseError;
3247
3248     Transliterator* t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD,parseError,status);
3249     if(U_FAILURE(status)){
3250         errln(UnicodeString("FAIL: ") + "ID" +
3251               ".createFromRules() => bad rules" +
3252               /*", parse error " + parseError.code +*/
3253               ", line " + parseError.line +
3254               ", offset " + parseError.offset +
3255               ", context " + prettify(parseError.preContext, TRUE) +
3256               ", rules: " + prettify(rule, TRUE));
3257     }
3258     delete t;
3259 }
3260
3261 /**
3262  * Make sure display names of variants look reasonable.
3263  */
3264 void TransliteratorTest::TestDisplayName() {
3265 #if UCONFIG_NO_FORMATTING
3266     logln("Skipping, UCONFIG_NO_FORMATTING is set\n");
3267     return;
3268 #else
3269     static const char* DATA[] = {
3270         // ID, forward name, reverse name
3271         // Update the text as necessary -- the important thing is
3272         // not the text itself, but how various cases are handled.
3273
3274         // Basic test
3275         "Any-Hex", "Any to Hex Escape", "Hex Escape to Any",
3276
3277         // Variants
3278         "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl",
3279
3280         // Target-only IDs
3281         "NFC", "Any to NFC", "Any to NFD",
3282     };
3283
3284     int32_t DATA_length = sizeof(DATA) / sizeof(DATA[0]);
3285
3286     Locale US("en", "US");
3287
3288     for (int32_t i=0; i<DATA_length; i+=3) {
3289         UnicodeString name;
3290         Transliterator::getDisplayName(DATA[i], US, name);
3291         if (name != DATA[i+1]) {
3292             errln((UnicodeString)"FAIL: " + DATA[i] + ".getDisplayName() => " +
3293                   name + ", expected " + DATA[i+1]);
3294         } else {
3295             logln((UnicodeString)"Ok: " + DATA[i] + ".getDisplayName() => " + name);
3296         }
3297         UErrorCode ec = U_ZERO_ERROR;
3298         UParseError pe;
3299         Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_REVERSE, pe, ec);
3300         if (U_FAILURE(ec)) {
3301             delete t;
3302             errln("FAIL: createInstance failed");
3303             continue;
3304         }
3305         name = Transliterator::getDisplayName(t->getID(), US, name);
3306         if (name != DATA[i+2]) {
3307             errln((UnicodeString)"FAIL: " + t->getID() + ".getDisplayName() => " +
3308                   name + ", expected " + DATA[i+2]);
3309         } else {
3310             logln((UnicodeString)"Ok: " + t->getID() + ".getDisplayName() => " + name);
3311         }
3312         delete t;
3313     }
3314 #endif
3315 }
3316
3317 void TransliteratorTest::TestSpecialCases(void) {
3318     const UnicodeString registerRules[] = {
3319         "Any-Dev1", "x > X; y > Y;",
3320         "Any-Dev2", "XY > Z",
3321         "Greek-Latin/FAKE",
3322             CharsToUnicodeString
3323             ("[^[:L:][:M:]] { \\u03bc\\u03c0 > b ; \\u03bc\\u03c0 } [^[:L:][:M:]] > b ; [^[:L:][:M:]] { [\\u039c\\u03bc][\\u03a0\\u03c0] > B ; [\\u039c\\u03bc][\\u03a0\\u03c0] } [^[:L:][:M:]] > B ;"),
3324         "" // END MARKER
3325     };
3326
3327     const UnicodeString testCases[] = {
3328         // NORMALIZATION
3329         // should add more test cases
3330         "NFD" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3331         "NFC" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3332         "NFKD", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3333         "NFKC", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3334
3335         // mp -> b BUG
3336         "Greek-Latin/UNGEGN", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3337         "Greek-Latin/FAKE", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3338
3339         // check for devanagari bug
3340         "nfd;Dev1;Dev2;nfc", "xy", "Z",
3341
3342         // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE
3343         "Title", CharsToUnicodeString("ab'cD ffi\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3344                  CharsToUnicodeString("Ab'cd Ffi\\u0131ii\\u0307 \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3345
3346         //TODO: enable this test once Titlecase works right
3347         /*
3348         "Title", CharsToUnicodeString("\\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3349                  CharsToUnicodeString("Ffi\\u0131ii \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3350                  */
3351         "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3352                  CharsToUnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 ") + DESERET_DEE + DESERET_DEE,
3353         "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3354                  CharsToUnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 ") + DESERET_dee + DESERET_dee,
3355
3356         "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
3357         "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
3358
3359          // FORMS OF S
3360         "Greek-Latin/UNGEGN",  CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3361                                CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3362         "Latin-Greek/UNGEGN",  CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3363                                CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3") ,
3364         "Greek-Latin",  CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3365                         CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3366         "Latin-Greek",  CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3367                         CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3368         // Tatiana bug
3369         // Upper: TAT\\u02B9\\u00C2NA
3370         // Lower: tat\\u02B9\\u00E2na
3371         // Title: Tat\\u02B9\\u00E2na
3372         "Upper", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3373                  CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3374         "Lower", CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3375                  CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3376         "Title", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3377                  CharsToUnicodeString("Tat\\u02B9\\u00E2na"),
3378
3379         "" // END MARKER
3380     };
3381
3382     UParseError pos;
3383     int32_t i;
3384     for (i = 0; registerRules[i].length()!=0; i+=2) {
3385         UErrorCode status = U_ZERO_ERROR;
3386
3387         Transliterator *t = Transliterator::createFromRules(registerRules[0+i],
3388             registerRules[i+1], UTRANS_FORWARD, pos, status);
3389         if (U_FAILURE(status)) {
3390             errln("Fails: Unable to create the transliterator from rules.");
3391         } else {
3392             Transliterator::registerInstance(t);
3393         }
3394     }
3395     for (i = 0; testCases[i].length()!=0; i+=3) {
3396         UErrorCode ec = U_ZERO_ERROR;
3397         UParseError pe;
3398         const UnicodeString& name = testCases[i];
3399         Transliterator *t = Transliterator::createInstance(name, UTRANS_FORWARD, pe, ec);
3400         if (U_FAILURE(ec)) {
3401             errln((UnicodeString)"FAIL: Couldn't create " + name);
3402             delete t;
3403             continue;
3404         }
3405         const UnicodeString& id = t->getID();
3406         const UnicodeString& source = testCases[i+1];
3407         UnicodeString target;
3408
3409         // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe)
3410
3411         if (testCases[i+2].length() > 0) {
3412             target = testCases[i+2];
3413         } else if (0==id.caseCompare("NFD", U_FOLD_CASE_DEFAULT)) {
3414             Normalizer::normalize(source, UNORM_NFD, 0, target, ec);
3415         } else if (0==id.caseCompare("NFC", U_FOLD_CASE_DEFAULT)) {
3416             Normalizer::normalize(source, UNORM_NFC, 0, target, ec);
3417         } else if (0==id.caseCompare("NFKD", U_FOLD_CASE_DEFAULT)) {
3418             Normalizer::normalize(source, UNORM_NFKD, 0, target, ec);
3419         } else if (0==id.caseCompare("NFKC", U_FOLD_CASE_DEFAULT)) {
3420             Normalizer::normalize(source, UNORM_NFKC, 0, target, ec);
3421         } else if (0==id.caseCompare("Lower", U_FOLD_CASE_DEFAULT)) {
3422             target = source;
3423             target.toLower(Locale::getUS());
3424         } else if (0==id.caseCompare("Upper", U_FOLD_CASE_DEFAULT)) {
3425             target = source;
3426             target.toUpper(Locale::getUS());
3427         }
3428         if (U_FAILURE(ec)) {
3429             errln((UnicodeString)"FAIL: Internal error normalizing " + source);
3430             continue;
3431         }
3432
3433         expect(*t, source, target);
3434         delete t;
3435     }
3436     for (i = 0; registerRules[i].length()!=0; i+=2) {
3437         Transliterator::unregister(registerRules[i]);
3438     }
3439 }
3440
3441 char* Char32ToEscapedChars(UChar32 ch, char* buffer) {
3442     if (ch <= 0xFFFF) {
3443         sprintf(buffer, "\\u%04x", (int)ch);
3444     } else {
3445         sprintf(buffer, "\\U%08x", (int)ch);
3446     }
3447     return buffer;
3448 }
3449
3450 void TransliteratorTest::TestSurrogateCasing (void) {
3451     // check that casing handles surrogates
3452     // titlecase is currently defective
3453     char buffer[20];
3454     UChar buffer2[20];
3455     UChar32 dee;
3456     UTF_GET_CHAR(DESERET_dee,0, 0, DESERET_dee.length(), dee);
3457     UnicodeString DEE(u_totitle(dee));
3458     if (DEE != DESERET_DEE) {
3459         err("Fails titlecase of surrogates");
3460         err(Char32ToEscapedChars(dee, buffer));
3461         err(", ");
3462         errln(Char32ToEscapedChars(DEE.char32At(0), buffer));
3463     }
3464
3465     UnicodeString deeDEETest=DESERET_dee + DESERET_DEE;
3466     UnicodeString deedeeTest = DESERET_dee + DESERET_dee;
3467     UnicodeString DEEDEETest = DESERET_DEE + DESERET_DEE;
3468     UErrorCode status= U_ZERO_ERROR;
3469
3470     u_strToUpper(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
3471     if (U_FAILURE(status) || (UnicodeString(buffer2)!= DEEDEETest)) {
3472         errln("Fails: Can't uppercase surrogates.");
3473     }
3474
3475     status= U_ZERO_ERROR;
3476     u_strToLower(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
3477     if (U_FAILURE(status) || (UnicodeString(buffer2)!= deedeeTest)) {
3478         errln("Fails: Can't lowercase surrogates.");
3479     }
3480 }
3481
3482 static void _trans(Transliterator& t, const UnicodeString& src,
3483                    UnicodeString& result) {
3484     result = src;
3485     t.transliterate(result);
3486 }
3487
3488 static void _trans(const UnicodeString& id, const UnicodeString& src,
3489                    UnicodeString& result, UErrorCode ec) {
3490     UParseError pe;
3491     Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
3492     if (U_SUCCESS(ec)) {
3493         _trans(*t, src, result);
3494     }
3495     delete t;
3496 }
3497
3498 static UnicodeString _findMatch(const UnicodeString& source,
3499                                        const UnicodeString* pairs) {
3500     UnicodeString empty;
3501     for (int32_t i=0; pairs[i].length() > 0; i+=2) {
3502         if (0==source.caseCompare(pairs[i], U_FOLD_CASE_DEFAULT)) {
3503             return pairs[i+1];
3504         }
3505     }
3506     return empty;
3507 }
3508
3509 // Check to see that incremental gets at least part way through a reasonable string.
3510
3511 void TransliteratorTest::TestIncrementalProgress(void) {
3512     UErrorCode ec = U_ZERO_ERROR;
3513     UnicodeString latinTest = "The Quick Brown Fox.";
3514     UnicodeString devaTest;
3515     _trans("Latin-Devanagari", latinTest, devaTest, ec);
3516     UnicodeString kataTest;
3517     _trans("Latin-Katakana", latinTest, kataTest, ec);
3518     if (U_FAILURE(ec)) {
3519         errln("FAIL: Internal error");
3520         return;
3521     }
3522     const UnicodeString tests[] = {
3523         "Any", latinTest,
3524         "Latin", latinTest,
3525         "Halfwidth", latinTest,
3526         "Devanagari", devaTest,
3527         "Katakana", kataTest,
3528         "" // END MARKER
3529     };
3530
3531     UnicodeString test("The Quick Brown Fox Jumped Over The Lazy Dog.");
3532     int32_t i = 0, j=0, k=0;
3533     int32_t sources = Transliterator::countAvailableSources();
3534     for (i = 0; i < sources; i++) {
3535         UnicodeString source;
3536         Transliterator::getAvailableSource(i, source);
3537         UnicodeString test = _findMatch(source, tests);
3538         if (test.length() == 0) {
3539             logln((UnicodeString)"Skipping " + source + "-X");
3540             continue;
3541         }
3542         int32_t targets = Transliterator::countAvailableTargets(source);
3543         for (j = 0; j < targets; j++) {
3544             UnicodeString target;
3545             Transliterator::getAvailableTarget(j, source, target);
3546             int32_t variants = Transliterator::countAvailableVariants(source, target);
3547             for (k =0; k< variants; k++) {
3548                 UnicodeString variant;
3549                 UParseError err;
3550                 UErrorCode status = U_ZERO_ERROR;
3551
3552                 Transliterator::getAvailableVariant(k, source, target, variant);
3553                 UnicodeString id = source + "-" + target + "/" + variant;
3554
3555                 Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, err, status);
3556                 if (U_FAILURE(status)) {
3557                     errln((UnicodeString)"FAIL: Could not create " + id);
3558                     delete t;
3559                     continue;
3560                 }
3561                 status = U_ZERO_ERROR;
3562                 CheckIncrementalAux(t, test);
3563
3564                 UnicodeString rev;
3565                 _trans(*t, test, rev);
3566                 Transliterator *inv = t->createInverse(status);
3567                 if (U_FAILURE(status)) {
3568                     errln((UnicodeString)"FAIL: Could not create inverse of " + id);
3569                     delete t;
3570                     delete inv;
3571                     continue;
3572                 }
3573                 CheckIncrementalAux(inv, rev);
3574                 delete t;
3575                 delete inv;
3576             }
3577         }
3578     }
3579 }
3580
3581 void TransliteratorTest::CheckIncrementalAux(const Transliterator* t,
3582                                                       const UnicodeString& input) {
3583     UErrorCode ec = U_ZERO_ERROR;
3584     UTransPosition pos;
3585     UnicodeString test = input;
3586
3587     pos.contextStart = 0;
3588     pos.contextLimit = input.length();
3589     pos.start = 0;
3590     pos.limit = input.length();
3591
3592     t->transliterate(test, pos, ec);
3593     if (U_FAILURE(ec)) {
3594         errln((UnicodeString)"FAIL: transliterate() error " + u_errorName(ec));
3595         return;
3596     }
3597     UBool gotError = FALSE;
3598
3599     // we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X?
3600
3601     if (pos.start == 0 && pos.limit != 0 && t->getID() != "Hex-Any/Unicode") {
3602         errln((UnicodeString)"No Progress, " +
3603               t->getID() + ": " + formatInput(test, input, pos));
3604         gotError = TRUE;
3605     } else {
3606         logln((UnicodeString)"PASS Progress, " +
3607               t->getID() + ": " + formatInput(test, input, pos));
3608     }
3609     t->finishTransliteration(test, pos);
3610     if (pos.start != pos.limit) {
3611         errln((UnicodeString)"Incomplete, " +
3612               t->getID() + ": " + formatInput(test, input, pos));
3613         gotError = TRUE;
3614     }
3615 }
3616
3617 void TransliteratorTest::TestFunction() {
3618     // Careful with spacing and ';' here:  Phrase this exactly
3619     // as toRules() is going to return it.  If toRules() changes
3620     // with regard to spacing or ';', then adjust this string.
3621     UnicodeString rule =
3622         "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';";
3623
3624     UParseError pe;
3625     UErrorCode ec = U_ZERO_ERROR;
3626     Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3627     if (t == NULL) {
3628         errln("FAIL: createFromRules failed");
3629         return;
3630     }
3631
3632     UnicodeString r;
3633     t->toRules(r, TRUE);
3634     if (r == rule) {
3635         logln((UnicodeString)"OK: toRules() => " + r);
3636     } else {
3637         errln((UnicodeString)"FAIL: toRules() => " + r +
3638               ", expected " + rule);
3639     }
3640
3641     expect(*t, "The Quick Brown Fox",
3642            UNICODE_STRING_SIMPLE("T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox"));
3643
3644     delete t;
3645 }
3646
3647 void TransliteratorTest::TestInvalidBackRef(void) {
3648     UnicodeString rule =  ". > $1;";
3649     UnicodeString rule2 =CharsToUnicodeString("(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\\u0020;");
3650     UParseError pe;
3651     UErrorCode ec = U_ZERO_ERROR;
3652     Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3653     Transliterator *t2 = Transliterator::createFromRules("Test2", rule2, UTRANS_FORWARD, pe, ec);
3654
3655     if (t != NULL) {
3656         errln("FAIL: createFromRules should have returned NULL");
3657         delete t;
3658     }
3659
3660     if (t2 != NULL) {
3661         errln("FAIL: createFromRules should have returned NULL");
3662         delete t2;
3663     }
3664
3665     if (U_SUCCESS(ec)) {
3666         errln("FAIL: Ok: . > $1; => no error");
3667     } else {
3668         logln((UnicodeString)"Ok: . > $1; => " + u_errorName(ec));
3669     }
3670 }
3671
3672 void TransliteratorTest::TestMulticharStringSet() {
3673     // Basic testing
3674     const char* rule =
3675         "       [{aa}]       > x;"
3676         "         a          > y;"
3677         "       [b{bc}]      > z;"
3678         "[{gd}] { e          > q;"
3679         "         e } [{fg}] > r;" ;
3680
3681     UParseError pe;
3682     UErrorCode ec = U_ZERO_ERROR;
3683     Transliterator* t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3684     if (t == NULL || U_FAILURE(ec)) {
3685         delete t;
3686         errln("FAIL: createFromRules failed");
3687         return;
3688     }
3689
3690     expect(*t, "a aa ab bc d gd de gde gdefg ddefg",
3691            "y x yz z d gd de gdq gdqfg ddrfg");
3692     delete t;
3693
3694     // Overlapped string test.  Make sure that when multiple
3695     // strings can match that the longest one is matched.
3696     rule =
3697         "    [a {ab} {abc}]    > x;"
3698         "           b          > y;"
3699         "           c          > z;"
3700         " q [t {st} {rst}] { e > p;" ;
3701
3702     t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3703     if (t == NULL || U_FAILURE(ec)) {
3704         delete t;
3705         errln("FAIL: createFromRules failed");
3706         return;
3707     }
3708
3709     expect(*t, "a ab abc qte qste qrste",
3710            "x x x qtp qstp qrstp");
3711     delete t;
3712 }
3713
3714 // vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
3715 // BEGIN TestUserFunction support factory
3716
3717 Transliterator* _TUFF[4];
3718 UnicodeString* _TUFID[4];
3719
3720 static Transliterator* U_EXPORT2 _TUFFactory(const UnicodeString& /*ID*/,
3721                                    Transliterator::Token context) {
3722     return _TUFF[context.integer]->clone();
3723 }
3724
3725 static void _TUFReg(const UnicodeString& ID, Transliterator* t, int32_t n) {
3726     _TUFF[n] = t;
3727     _TUFID[n] = new UnicodeString(ID);
3728     Transliterator::registerFactory(ID, _TUFFactory, Transliterator::integerToken(n));
3729 }
3730
3731 static void _TUFUnreg(int32_t n) {
3732     if (_TUFF[n] != NULL) {
3733         Transliterator::unregister(*_TUFID[n]);
3734         delete _TUFF[n];
3735         delete _TUFID[n];
3736     }
3737 }
3738
3739 // END TestUserFunction support factory
3740 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
3741
3742 /**
3743  * Test that user-registered transliterators can be used under function
3744  * syntax.
3745  */
3746 void TransliteratorTest::TestUserFunction() {
3747
3748     Transliterator* t;
3749     UParseError pe;
3750     UErrorCode ec = U_ZERO_ERROR;
3751
3752     // Setup our factory
3753     int32_t i;
3754     for (i=0; i<4; ++i) {
3755         _TUFF[i] = NULL;
3756     }
3757
3758     // There's no need to register inverses if we don't use them
3759     t = Transliterator::createFromRules("gif",
3760                                         UNICODE_STRING_SIMPLE("'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';"),
3761                                         UTRANS_FORWARD, pe, ec);
3762     if (t == NULL || U_FAILURE(ec)) {
3763         errln((UnicodeString)"FAIL: createFromRules gif " + u_errorName(ec));
3764         return;
3765     }
3766     _TUFReg("Any-gif", t, 0);
3767
3768     t = Transliterator::createFromRules("RemoveCurly",
3769                                         UNICODE_STRING_SIMPLE("[\\{\\}] > ; '\\N' > ;"),
3770                                         UTRANS_FORWARD, pe, ec);
3771     if (t == NULL || U_FAILURE(ec)) {
3772         errln((UnicodeString)"FAIL: createFromRules RemoveCurly " + u_errorName(ec));
3773         goto FAIL;
3774     }
3775     expect(*t, UNICODE_STRING_SIMPLE("\\N{name}"), "name");
3776     _TUFReg("Any-RemoveCurly", t, 1);
3777
3778     logln("Trying &hex");
3779     t = Transliterator::createFromRules("hex2",
3780                                         "(.) > &hex($1);",
3781                                         UTRANS_FORWARD, pe, ec);
3782     if (t == NULL || U_FAILURE(ec)) {
3783         errln("FAIL: createFromRules");
3784         goto FAIL;
3785     }
3786     logln("Registering");
3787     _TUFReg("Any-hex2", t, 2);
3788     t = Transliterator::createInstance("Any-hex2", UTRANS_FORWARD, ec);
3789     if (t == NULL || U_FAILURE(ec)) {
3790         errln((UnicodeString)"FAIL: createInstance Any-hex2 " + u_errorName(ec));
3791         goto FAIL;
3792     }
3793     expect(*t, "abc", UNICODE_STRING_SIMPLE("\\u0061\\u0062\\u0063"));
3794     delete t;
3795
3796     logln("Trying &gif");
3797     t = Transliterator::createFromRules("gif2",
3798                                         "(.) > &Gif(&Hex2($1));",
3799                                         UTRANS_FORWARD, pe, ec);
3800     if (t == NULL || U_FAILURE(ec)) {
3801         errln((UnicodeString)"FAIL: createFromRules gif2 " + u_errorName(ec));
3802         goto FAIL;
3803     }
3804     logln("Registering");
3805     _TUFReg("Any-gif2", t, 3);
3806     t = Transliterator::createInstance("Any-gif2", UTRANS_FORWARD, ec);
3807     if (t == NULL || U_FAILURE(ec)) {
3808         errln((UnicodeString)"FAIL: createInstance Any-gif2 " + u_errorName(ec));
3809         goto FAIL;
3810     }
3811     expect(*t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">"
3812            "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">");
3813     delete t;
3814
3815     // Test that filters are allowed after &
3816     t = Transliterator::createFromRules("test",
3817                                         "(.) > &Hex($1) ' ' &RemoveCurly(&Name($1)) ' ';",
3818                                         UTRANS_FORWARD, pe, ec);
3819     if (t == NULL || U_FAILURE(ec)) {
3820         errln((UnicodeString)"FAIL: createFromRules test " + u_errorName(ec));
3821         goto FAIL;
3822     }
3823     expect(*t, "abc",
3824            UNICODE_STRING_SIMPLE("\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C "));
3825     delete t;
3826
3827  FAIL:
3828     for (i=0; i<4; ++i) {
3829         _TUFUnreg(i);
3830     }
3831 }
3832
3833 /**
3834  * Test the Any-X transliterators.
3835  */
3836 void TransliteratorTest::TestAnyX(void) {
3837     UParseError parseError;
3838     UErrorCode status = U_ZERO_ERROR;
3839     Transliterator* anyLatin =
3840         Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
3841     if (anyLatin==0) {
3842         errln("FAIL: createInstance returned NULL");
3843         delete anyLatin;
3844         return;
3845     }
3846
3847     expect(*anyLatin,
3848            CharsToUnicodeString("greek:\\u03B1\\u03B2\\u03BA\\u0391\\u0392\\u039A hiragana:\\u3042\\u3076\\u304F cyrillic:\\u0430\\u0431\\u0446"),
3849            CharsToUnicodeString("greek:abkABK hiragana:abuku cyrillic:abc"));
3850
3851     delete anyLatin;
3852 }
3853
3854 /**
3855  * Test the source and target set API.  These are only implemented
3856  * for RBT and CompoundTransliterator at this time.
3857  */
3858 void TransliteratorTest::TestSourceTargetSet() {
3859     UErrorCode ec = U_ZERO_ERROR;
3860
3861     // Rules
3862     const char* r =
3863         "a > b; "
3864         "r [x{lu}] > q;";
3865
3866     // Expected source
3867     UnicodeSet expSrc("[arx{lu}]", ec);
3868
3869     // Expected target
3870     UnicodeSet expTrg("[bq]", ec);
3871
3872     UParseError pe;
3873     Transliterator* t = Transliterator::createFromRules("test", r, UTRANS_FORWARD, pe, ec);
3874
3875     if (U_FAILURE(ec)) {
3876         delete t;
3877         errln("FAIL: Couldn't set up test");
3878         return;
3879     }
3880
3881     UnicodeSet src; t->getSourceSet(src);
3882     UnicodeSet trg; t->getTargetSet(trg);
3883
3884     if (src == expSrc && trg == expTrg) {
3885         UnicodeString a, b;
3886         logln((UnicodeString)"Ok: " +
3887               r + " => source = " + src.toPattern(a, TRUE) +
3888               ", target = " + trg.toPattern(b, TRUE));
3889     } else {
3890         UnicodeString a, b, c, d;
3891         errln((UnicodeString)"FAIL: " +
3892               r + " => source = " + src.toPattern(a, TRUE) +
3893               ", expected " + expSrc.toPattern(b, TRUE) +
3894               "; target = " + trg.toPattern(c, TRUE) +
3895               ", expected " + expTrg.toPattern(d, TRUE));
3896     }
3897
3898     delete t;
3899 }
3900
3901 /**
3902  * Test handling of rule whitespace, for both RBT and UnicodeSet.
3903  */
3904 void TransliteratorTest::TestRuleWhitespace() {
3905     // Rules
3906     const char* r = "a > \\u200E b;";
3907
3908     UErrorCode ec = U_ZERO_ERROR;
3909     UParseError pe;
3910     Transliterator* t = Transliterator::createFromRules("test", CharsToUnicodeString(r), UTRANS_FORWARD, pe, ec);
3911
3912     if (U_FAILURE(ec)) {
3913         errln("FAIL: Couldn't set up test");
3914     } else {
3915         expect(*t, "a", "b");
3916     }
3917     delete t;
3918
3919     // UnicodeSet
3920     ec = U_ZERO_ERROR;
3921     UnicodeSet set(CharsToUnicodeString("[a \\u200E]"), ec);
3922
3923     if (U_FAILURE(ec)) {
3924         errln("FAIL: Couldn't set up test");
3925     } else {
3926         if (set.contains(0x200E)) {
3927             errln("FAIL: U+200E not being ignored by UnicodeSet");
3928         }
3929     }
3930 }
3931 //======================================================================
3932 // this method is in TestUScript.java
3933 //======================================================================
3934 void TransliteratorTest::TestAllCodepoints(){
3935     UScriptCode code= USCRIPT_INVALID_CODE;
3936     char id[256]={'\0'};
3937     char abbr[256]={'\0'};
3938     char newId[256]={'\0'};
3939     char newAbbrId[256]={'\0'};
3940     char oldId[256]={'\0'};
3941     char oldAbbrId[256]={'\0'};
3942
3943     UErrorCode status =U_ZERO_ERROR;
3944     UParseError pe;
3945
3946     for(uint32_t i = 0; i<=0x10ffff; i++){
3947         code =  uscript_getScript(i,&status);
3948         if(code == USCRIPT_INVALID_CODE){
3949             errln("uscript_getScript for codepoint \\U%08X failed.\n", i);
3950         }
3951         const char* myId = uscript_getName(code);
3952         if(!myId) {
3953           errln("Valid script code returned NULL name. Check your data!");
3954           return;
3955         }
3956         uprv_strcpy(id,myId);
3957         uprv_strcpy(abbr,uscript_getShortName(code));
3958
3959         uprv_strcpy(newId,"[:");
3960         uprv_strcat(newId,id);
3961         uprv_strcat(newId,":];NFD");
3962
3963         uprv_strcpy(newAbbrId,"[:");
3964         uprv_strcat(newAbbrId,abbr);
3965         uprv_strcat(newAbbrId,":];NFD");
3966
3967         if(uprv_strcmp(newId,oldId)!=0){
3968             Transliterator* t = Transliterator::createInstance(newId,UTRANS_FORWARD,pe,status);
3969             if(t==NULL || U_FAILURE(status)){
3970                 errln((UnicodeString)"FAIL: Could not create " + id);
3971             }
3972             delete t;
3973         }
3974         if(uprv_strcmp(newAbbrId,oldAbbrId)!=0){
3975             Transliterator* t = Transliterator::createInstance(newAbbrId,UTRANS_FORWARD,pe,status);
3976             if(t==NULL || U_FAILURE(status)){
3977                 errln((UnicodeString)"FAIL: Could not create " + id);
3978             }
3979             delete t;
3980         }
3981         uprv_strcpy(oldId,newId);
3982         uprv_strcpy(oldAbbrId, newAbbrId);
3983
3984     }
3985
3986 }
3987
3988 #define TEST_TRANSLIT_ID(id, cls) { \
3989   UErrorCode ec = U_ZERO_ERROR; \
3990   Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); \
3991   if (U_FAILURE(ec)) { \
3992     errln("FAIL: Couldn't create " id); \
3993   } else { \
3994     if (t->getDynamicClassID() != cls::getStaticClassID()) { \
3995       errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
3996     } \
3997     /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
3998   } \
3999   delete t; \
4000 }
4001
4002 #define TEST_TRANSLIT_RULE(rule, cls) { \
4003   UErrorCode ec = U_ZERO_ERROR; \
4004   UParseError pe; \
4005   Transliterator* t = Transliterator::createFromRules("_", rule, UTRANS_FORWARD, pe, ec); \
4006   if (U_FAILURE(ec)) { \
4007     errln("FAIL: Couldn't create " rule); \
4008   } else { \
4009     if (t->getDynamicClassID() != cls ::getStaticClassID()) { \
4010       errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
4011     } \
4012     /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
4013   } \
4014   delete t; \
4015 }
4016
4017 void TransliteratorTest::TestBoilerplate() {
4018     TEST_TRANSLIT_ID("Any-Latin", AnyTransliterator);
4019     TEST_TRANSLIT_ID("Any-Hex", EscapeTransliterator);
4020     TEST_TRANSLIT_ID("Hex-Any", UnescapeTransliterator);
4021     TEST_TRANSLIT_ID("Lower", LowercaseTransliterator);
4022     TEST_TRANSLIT_ID("Upper", UppercaseTransliterator);
4023     TEST_TRANSLIT_ID("Title", TitlecaseTransliterator);
4024     TEST_TRANSLIT_ID("Null", NullTransliterator);
4025     TEST_TRANSLIT_ID("Remove", RemoveTransliterator);
4026     TEST_TRANSLIT_ID("Any-Name", UnicodeNameTransliterator);
4027     TEST_TRANSLIT_ID("Name-Any", NameUnicodeTransliterator);
4028     TEST_TRANSLIT_ID("NFD", NormalizationTransliterator);
4029     TEST_TRANSLIT_ID("Latin-Greek", CompoundTransliterator);
4030     TEST_TRANSLIT_RULE("a>b;", RuleBasedTransliterator);
4031 }
4032
4033 void TransliteratorTest::TestAlternateSyntax() {
4034     // U+2206 == &
4035     // U+2190 == <
4036     // U+2192 == >
4037     // U+2194 == <>
4038     expect(CharsToUnicodeString("a \\u2192 x; b \\u2190 y; c \\u2194 z"),
4039            "abc",
4040            "xbz");
4041     expect(CharsToUnicodeString("([:^ASCII:]) \\u2192 \\u2206Name($1);"),
4042            CharsToUnicodeString("<=\\u2190; >=\\u2192; <>=\\u2194; &=\\u2206"),
4043            UNICODE_STRING_SIMPLE("<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}"));
4044 }
4045
4046 static const char* BEGIN_END_RULES[] = {
4047     // [0]
4048     "abc > xy;"
4049     "aba > z;",
4050
4051     // [1]
4052 /*
4053     "::BEGIN;"
4054     "abc > xy;"
4055     "::END;"
4056     "::BEGIN;"
4057     "aba > z;"
4058     "::END;",
4059 */
4060     "", // test case commented out below, this is here to keep from messing up the indexes
4061
4062     // [2]
4063 /*
4064     "abc > xy;"
4065     "::BEGIN;"
4066     "aba > z;"
4067     "::END;",
4068 */
4069     "", // test case commented out below, this is here to keep from messing up the indexes
4070
4071     // [3]
4072 /*
4073     "::BEGIN;"
4074     "abc > xy;"
4075     "::END;"
4076     "aba > z;",
4077 */
4078     "", // test case commented out below, this is here to keep from messing up the indexes
4079
4080     // [4]
4081     "abc > xy;"
4082     "::Null;"
4083     "aba > z;",
4084
4085     // [5]
4086     "::Upper;"
4087     "ABC > xy;"
4088     "AB > x;"
4089     "C > z;"
4090     "::Upper;"
4091     "XYZ > p;"
4092     "XY > q;"
4093     "Z > r;"
4094     "::Upper;",
4095
4096     // [6]
4097     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4098     "$delim = [\\-$ws];"
4099     "$ws $delim* > ' ';"
4100     "'-' $delim* > '-';",
4101
4102     // [7]
4103     "::Null;"
4104     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4105     "$delim = [\\-$ws];"
4106     "$ws $delim* > ' ';"
4107     "'-' $delim* > '-';",
4108
4109     // [8]
4110     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4111     "$delim = [\\-$ws];"
4112     "$ws $delim* > ' ';"
4113     "'-' $delim* > '-';"
4114     "::Null;",
4115
4116     // [9]
4117     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4118     "$delim = [\\-$ws];"
4119     "::Null;"
4120     "$ws $delim* > ' ';"
4121     "'-' $delim* > '-';",
4122
4123     // [10]
4124 /*
4125     "::BEGIN;"
4126     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4127     "$delim = [\\-$ws];"
4128     "::END;"
4129     "$ws $delim* > ' ';"
4130     "'-' $delim* > '-';",
4131 */
4132     "", // test case commented out below, this is here to keep from messing up the indexes
4133
4134     // [11]
4135 /*
4136     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4137     "$delim = [\\-$ws];"
4138     "::BEGIN;"
4139     "$ws $delim* > ' ';"
4140     "'-' $delim* > '-';"
4141     "::END;",
4142 */
4143     "", // test case commented out below, this is here to keep from messing up the indexes
4144
4145     // [12]
4146 /*
4147     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4148     "$delim = [\\-$ws];"
4149     "$ab = [ab];"
4150     "::BEGIN;"
4151     "$ws $delim* > ' ';"
4152     "'-' $delim* > '-';"
4153     "::END;"
4154     "::BEGIN;"
4155     "$ab { ' ' } $ab > '-';"
4156     "c { ' ' > ;"
4157     "::END;"
4158     "::BEGIN;"
4159     "'a-a' > a\\%|a;"
4160     "::END;",
4161 */
4162     "", // test case commented out below, this is here to keep from messing up the indexes
4163
4164     // [13]
4165     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4166     "$delim = [\\-$ws];"
4167     "$ab = [ab];"
4168     "::Null;"
4169     "$ws $delim* > ' ';"
4170     "'-' $delim* > '-';"
4171     "::Null;"
4172     "$ab { ' ' } $ab > '-';"
4173     "c { ' ' > ;"
4174     "::Null;"
4175     "'a-a' > a\\%|a;",
4176
4177     // [14]
4178 /*
4179     "::[abc];"
4180     "::BEGIN;"
4181     "abc > xy;"
4182     "::END;"
4183     "::BEGIN;"
4184     "aba > yz;"
4185     "::END;"
4186     "::Upper;",
4187 */
4188     "", // test case commented out below, this is here to keep from messing up the indexes
4189
4190     // [15]
4191     "::[abc];"
4192     "abc > xy;"
4193     "::Null;"
4194     "aba > yz;"
4195     "::Upper;",
4196
4197     // [16]
4198 /*
4199     "::[abc];"
4200     "::BEGIN;"
4201     "abc <> xy;"
4202     "::END;"
4203     "::BEGIN;"
4204     "aba <> yz;"
4205     "::END;"
4206     "::Upper(Lower);"
4207     "::([XYZ]);"
4208 */
4209     "", // test case commented out below, this is here to keep from messing up the indexes
4210
4211     // [17]
4212     "::[abc];"
4213     "abc <> xy;"
4214     "::Null;"
4215     "aba <> yz;"
4216     "::Upper(Lower);"
4217     "::([XYZ]);"
4218 };
4219 static const int32_t BEGIN_END_RULES_length = (int32_t)(sizeof(BEGIN_END_RULES) / sizeof(BEGIN_END_RULES[0]));
4220
4221 /*
4222 (This entire test is commented out below and will need some heavy revision when we re-add
4223 the ::BEGIN/::END stuff)
4224 static const char* BOGUS_BEGIN_END_RULES[] = {
4225     // [7]
4226     "::BEGIN;"
4227     "abc > xy;"
4228     "::BEGIN;"
4229     "aba > z;"
4230     "::END;"
4231     "::END;",
4232
4233     // [8]
4234     "abc > xy;"
4235     " aba > z;"
4236     "::END;",
4237
4238     // [9]
4239     "::BEGIN;"
4240     "::Upper;"
4241     "::END;"
4242 };
4243 static const int32_t BOGUS_BEGIN_END_RULES_length = (int32_t)(sizeof(BOGUS_BEGIN_END_RULES) / sizeof(BOGUS_BEGIN_END_RULES[0]));
4244 */
4245
4246 static const char* BEGIN_END_TEST_CASES[] = {
4247     // rules             input                   expected output
4248     BEGIN_END_RULES[0],  "abc ababc aba",        "xy zbc z",
4249 //    BEGIN_END_RULES[1],  "abc ababc aba",        "xy abxy z",
4250 //    BEGIN_END_RULES[2],  "abc ababc aba",        "xy abxy z",
4251 //    BEGIN_END_RULES[3],  "abc ababc aba",        "xy abxy z",
4252     BEGIN_END_RULES[4],  "abc ababc aba",        "xy abxy z",
4253     BEGIN_END_RULES[5],  "abccabaacababcbc",     "PXAARXQBR",
4254
4255     BEGIN_END_RULES[6],  "e   e - e---e-  e",    "e e e-e-e",
4256     BEGIN_END_RULES[7],  "e   e - e---e-  e",    "e e e-e-e",
4257     BEGIN_END_RULES[8],  "e   e - e---e-  e",    "e e e-e-e",
4258     BEGIN_END_RULES[9],  "e   e - e---e-  e",    "e e e-e-e",
4259 //    BEGIN_END_RULES[10],  "e   e - e---e-  e",    "e e e-e-e",
4260 //    BEGIN_END_RULES[11], "e   e - e---e-  e",    "e e e-e-e",
4261 //    BEGIN_END_RULES[12], "e   e - e---e-  e",    "e e e-e-e",
4262 //    BEGIN_END_RULES[12], "a    a    a    a",     "a%a%a%a",
4263 //    BEGIN_END_RULES[12], "a a-b c b a",          "a%a-b cb-a",
4264     BEGIN_END_RULES[13], "e   e - e---e-  e",    "e e e-e-e",
4265     BEGIN_END_RULES[13], "a    a    a    a",     "a%a%a%a",
4266     BEGIN_END_RULES[13], "a a-b c b a",          "a%a-b cb-a",
4267
4268 //    BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4269     BEGIN_END_RULES[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4270 //    BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4271     BEGIN_END_RULES[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ"
4272 };
4273 static const int32_t BEGIN_END_TEST_CASES_length = (int32_t)(sizeof(BEGIN_END_TEST_CASES) / sizeof(BEGIN_END_TEST_CASES[0]));
4274
4275 void TransliteratorTest::TestBeginEnd() {
4276     // run through the list of test cases above
4277     int32_t i = 0;
4278     for (i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
4279         expect((UnicodeString)"Test case #" + (i / 3),
4280                UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
4281                UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
4282                UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
4283     }
4284
4285     // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing
4286     UParseError parseError;
4287     UErrorCode status = U_ZERO_ERROR;
4288     Transliterator* reversed  = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
4289             UTRANS_REVERSE, parseError, status);
4290     if (reversed == 0 || U_FAILURE(status)) {
4291         reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
4292     } else {
4293         expect(*reversed, UnicodeString("xy XY XYZ yz YZ"), UnicodeString("xy abc xaba yz aba"));
4294     }
4295     delete reversed;
4296
4297     // finally, run through the list of syntactically-ill-formed rule sets above and make sure
4298     // that all of them cause errors
4299 /*
4300 (commented out until we have the real ::BEGIN/::END stuff in place
4301     for (i = 0; i < BOGUS_BEGIN_END_RULES_length; i++) {
4302         UParseError parseError;
4303         UErrorCode status = U_ZERO_ERROR;
4304         Transliterator* t = Transliterator::createFromRules("foo", UnicodeString(BOGUS_BEGIN_END_RULES[i]),
4305                 UTRANS_FORWARD, parseError, status);
4306         if (!U_FAILURE(status)) {
4307             delete t;
4308             errln((UnicodeString)"Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]);
4309         }
4310     }
4311 */
4312 }
4313
4314 void TransliteratorTest::TestBeginEndToRules() {
4315     // run through the same list of test cases we used above, but this time, instead of just
4316     // instantiating a Transliterator from the rules and running the test against it, we instantiate
4317     // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from
4318     // the resulting set of rules, and make sure that the generated rule set is semantically equivalent
4319     // to (i.e., does the same thing as) the original rule set
4320     for (int32_t i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
4321         UParseError parseError;
4322         UErrorCode status = U_ZERO_ERROR;
4323         Transliterator* t = Transliterator::createFromRules("--", UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
4324                 UTRANS_FORWARD, parseError, status);
4325         if (U_FAILURE(status)) {
4326             reportParseError(UnicodeString("FAIL: Couldn't create transliterator"), parseError, status);
4327         } else {
4328             UnicodeString rules;
4329             t->toRules(rules, TRUE);
4330             Transliterator* t2 = Transliterator::createFromRules((UnicodeString)"Test case #" + (i / 3), rules,
4331                     UTRANS_FORWARD, parseError, status);
4332             if (U_FAILURE(status)) {
4333                 reportParseError(UnicodeString("FAIL: Couldn't create transliterator from generated rules"),
4334                         parseError, status);
4335                 delete t;
4336             } else {
4337                 expect(*t2,
4338                        UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
4339                        UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
4340                 delete t;
4341                 delete t2;
4342             }
4343         }
4344     }
4345
4346     // do the same thing for the reversible test case
4347     UParseError parseError;
4348     UErrorCode status = U_ZERO_ERROR;
4349     Transliterator* reversed = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
4350             UTRANS_REVERSE, parseError, status);
4351     if (U_FAILURE(status)) {
4352         reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
4353     } else {
4354         UnicodeString rules;
4355         reversed->toRules(rules, FALSE);
4356         Transliterator* reversed2 = Transliterator::createFromRules("Reversed", rules, UTRANS_FORWARD,
4357                 parseError, status);
4358         if (U_FAILURE(status)) {
4359             reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator from generated rules"),
4360                     parseError, status);
4361             delete reversed;
4362         } else {
4363             expect(*reversed2,
4364                    UnicodeString("xy XY XYZ yz YZ"),
4365                    UnicodeString("xy abc xaba yz aba"));
4366             delete reversed;
4367             delete reversed2;
4368         }
4369     }
4370 }
4371
4372 void TransliteratorTest::TestRegisterAlias() {
4373     UnicodeString longID("Lower;[aeiou]Upper");
4374     UnicodeString shortID("Any-CapVowels");
4375     UnicodeString reallyShortID("CapVowels");
4376
4377     Transliterator::registerAlias(shortID, longID);
4378
4379     UErrorCode err = U_ZERO_ERROR;
4380     Transliterator* t1 = Transliterator::createInstance(longID, UTRANS_FORWARD, err);
4381     if (U_FAILURE(err)) {
4382         errln("Failed to instantiate transliterator with long ID");
4383         Transliterator::unregister(shortID);
4384         return;
4385     }
4386     Transliterator* t2 = Transliterator::createInstance(reallyShortID, UTRANS_FORWARD, err);
4387     if (U_FAILURE(err)) {
4388         errln("Failed to instantiate transliterator with short ID");
4389         delete t1;
4390         Transliterator::unregister(shortID);
4391         return;
4392     }
4393
4394     if (t1->getID() != longID)
4395         errln("Transliterator instantiated with long ID doesn't have long ID");
4396     if (t2->getID() != reallyShortID)
4397         errln("Transliterator instantiated with short ID doesn't have short ID");
4398
4399     UnicodeString rules1;
4400     UnicodeString rules2;
4401
4402     t1->toRules(rules1, TRUE);
4403     t2->toRules(rules2, TRUE);
4404     if (rules1 != rules2)
4405         errln("Alias transliterators aren't the same");
4406
4407     delete t1;
4408     delete t2;
4409     Transliterator::unregister(shortID);
4410
4411     t1 = Transliterator::createInstance(shortID, UTRANS_FORWARD, err);
4412     if (U_SUCCESS(err)) {
4413         errln("Instantiation with short ID succeeded after short ID was unregistered");
4414         delete t1;
4415     }
4416
4417     // try the same thing again, but this time with something other than
4418     // an instance of CompoundTransliterator
4419     UnicodeString realID("Latin-Greek");
4420     UnicodeString fakeID("Latin-dlgkjdflkjdl");
4421     Transliterator::registerAlias(fakeID, realID);
4422
4423     err = U_ZERO_ERROR;
4424     t1 = Transliterator::createInstance(realID, UTRANS_FORWARD, err);
4425     if (U_FAILURE(err)) {
4426         errln("Failed to instantiate transliterator with real ID");
4427         Transliterator::unregister(realID);
4428         return;
4429     }
4430     t2 = Transliterator::createInstance(fakeID, UTRANS_FORWARD, err);
4431     if (U_FAILURE(err)) {
4432         errln("Failed to instantiate transliterator with fake ID");
4433         delete t1;
4434         Transliterator::unregister(realID);
4435         return;
4436     }
4437
4438     t1->toRules(rules1, TRUE);
4439     t2->toRules(rules2, TRUE);
4440     if (rules1 != rules2)
4441         errln("Alias transliterators aren't the same");
4442
4443     delete t1;
4444     delete t2;
4445     Transliterator::unregister(fakeID);
4446 }
4447
4448 void TransliteratorTest::TestRuleStripping() {
4449     /*
4450 #
4451 \uE001>\u0C01; # SIGN
4452     */
4453     static const UChar rule[] = {
4454         0x0023,0x0020,0x000D,0x000A,
4455         0xE001,0x003E,0x0C01,0x003B,0x0020,0x0023,0x0020,0x0053,0x0049,0x0047,0x004E,0
4456     };
4457     static const UChar expectedRule[] = {
4458         0xE001,0x003E,0x0C01,0x003B,0
4459     };
4460     UChar result[sizeof(rule)/sizeof(rule[0])];
4461     UErrorCode status = U_ZERO_ERROR;
4462     int32_t len = utrans_stripRules(rule, (int32_t)(sizeof(rule)/sizeof(rule[0])), result, &status);
4463     if (len != u_strlen(expectedRule)) {
4464         errln("utrans_stripRules return len = %d", len);
4465     }
4466     if (u_strncmp(expectedRule, result, len) != 0) {
4467         errln("utrans_stripRules did not return expected string");
4468     }
4469 }
4470
4471 /**
4472  * Test the Halfwidth-Fullwidth transliterator (ticket 6281).
4473  */
4474 void TransliteratorTest::TestHalfwidthFullwidth(void) {
4475     UParseError parseError;
4476     UErrorCode status = U_ZERO_ERROR;
4477     Transliterator* hf = Transliterator::createInstance("Halfwidth-Fullwidth", UTRANS_FORWARD, parseError, status);
4478     Transliterator* fh = Transliterator::createInstance("Fullwidth-Halfwidth", UTRANS_FORWARD, parseError, status);
4479     if (hf == 0 || fh == 0) {
4480         errln("FAIL: createInstance failed");
4481         delete hf;
4482         delete fh;
4483         return;
4484     }
4485
4486     // Array of 2n items
4487     // Each item is
4488     //   "hf"|"fh"|"both",
4489     //   <Halfwidth>,
4490     //   <Fullwidth>
4491     const char* DATA[] = {
4492         "both",
4493         "\\uFFE9\\uFFEA\\uFFEB\\uFFEC\\u0061\\uFF71\\u00AF\\u0020",
4494         "\\u2190\\u2191\\u2192\\u2193\\uFF41\\u30A2\\uFFE3\\u3000",
4495     };
4496     int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
4497
4498     for (int32_t i=0; i<DATA_length; i+=3) {
4499         UnicodeString h = CharsToUnicodeString(DATA[i+1]);
4500         UnicodeString f = CharsToUnicodeString(DATA[i+2]);
4501         switch (*DATA[i]) {
4502         case 0x68: //'h': // Halfwidth-Fullwidth only
4503             expect(*hf, h, f);
4504             break;
4505         case 0x66: //'f': // Fullwidth-Halfwidth only
4506             expect(*fh, f, h);
4507             break;
4508         case 0x62: //'b': // both directions
4509             expect(*hf, h, f);
4510             expect(*fh, f, h);
4511             break;
4512         }
4513     }
4514     delete hf;
4515     delete fh;
4516 }
4517
4518
4519     /**
4520      *  Test Thai.  The text is the first paragraph of "What is Unicode" from the Unicode.org web site.
4521      *              TODO: confirm that the expected results are correct.
4522      *              For now, test just confirms that C++ and Java give identical results.
4523      */
4524 void TransliteratorTest::TestThai(void) {
4525     UParseError parseError;
4526     UErrorCode status = U_ZERO_ERROR;
4527     Transliterator* tr = Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
4528     if (tr == 0) {
4529         errln("FAIL: createInstance failed");
4530         return;
4531     }
4532     if (U_FAILURE(status)) {
4533         errln("FAIL: createInstance failed with %s", u_errorName(status));
4534         return;
4535     }
4536     const char *thaiText =
4537         "\\u0e42\\u0e14\\u0e22\\u0e1e\\u0e37\\u0e49\\u0e19\\u0e10\\u0e32\\u0e19\\u0e41\\u0e25\\u0e49\\u0e27, \\u0e04\\u0e2d"
4538         "\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d\\u0e23\\u0e4c\\u0e08\\u0e30\\u0e40\\u0e01\\u0e35\\u0e48\\u0e22"
4539         "\\u0e27\\u0e02\\u0e49\\u0e2d\\u0e07\\u0e01\\u0e31\\u0e1a\\u0e40\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e02\\u0e2d"
4540         "\\u0e07\\u0e15\\u0e31\\u0e27\\u0e40\\u0e25\\u0e02. \\u0e04\\u0e2d\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d"
4541         "\\u0e23\\u0e4c\\u0e08\\u0e31\\u0e14\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29"
4542         "\\u0e23\\u0e41\\u0e25\\u0e30\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30\\u0e2d\\u0e37\\u0e48\\u0e19\\u0e46 \\u0e42"
4543         "\\u0e14\\u0e22\\u0e01\\u0e32\\u0e23\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25"
4544         "\\u0e02\\u0e43\\u0e2b\\u0e49\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e41\\u0e15\\u0e48\\u0e25\\u0e30\\u0e15"
4545         "\\u0e31\\u0e27. \\u0e01\\u0e48\\u0e2d\\u0e19\\u0e2b\\u0e19\\u0e49\\u0e32\\u0e17\\u0e35\\u0e48\\u0e4a Unicode \\u0e08"
4546         "\\u0e30\\u0e16\\u0e39\\u0e01\\u0e2a\\u0e23\\u0e49\\u0e32\\u0e07\\u0e02\\u0e36\\u0e49\\u0e19, \\u0e44\\u0e14\\u0e49"
4547         "\\u0e21\\u0e35\\u0e23\\u0e30\\u0e1a\\u0e1a encoding \\u0e2d\\u0e22\\u0e39\\u0e48\\u0e2b\\u0e25\\u0e32\\u0e22\\u0e23"
4548         "\\u0e49\\u0e2d\\u0e22\\u0e23\\u0e30\\u0e1a\\u0e1a\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e01\\u0e32\\u0e23"
4549         "\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25\\u0e02\\u0e40\\u0e2b\\u0e25\\u0e48"
4550         "\\u0e32\\u0e19\\u0e35\\u0e49. \\u0e44\\u0e21\\u0e48\\u0e21\\u0e35 encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48"
4551         "\\u0e21\\u0e35\\u0e08\\u0e33\\u0e19\\u0e27\\u0e19\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30"
4552         "\\u0e21\\u0e32\\u0e01\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d: \\u0e22\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d"
4553         "\\u0e22\\u0e48\\u0e32\\u0e07\\u0e40\\u0e0a\\u0e48\\u0e19, \\u0e40\\u0e09\\u0e1e\\u0e32\\u0e30\\u0e43\\u0e19\\u0e01"
4554         "\\u0e25\\u0e38\\u0e48\\u0e21\\u0e2a\\u0e2b\\u0e20\\u0e32\\u0e1e\\u0e22\\u0e38\\u0e42\\u0e23\\u0e1b\\u0e40\\u0e1e"
4555         "\\u0e35\\u0e22\\u0e07\\u0e41\\u0e2b\\u0e48\\u0e07\\u0e40\\u0e14\\u0e35\\u0e22\\u0e27 \\u0e01\\u0e47\\u0e15\\u0e49"
4556         "\\u0e2d\\u0e07\\u0e01\\u0e32\\u0e23\\u0e2b\\u0e25\\u0e32\\u0e22 encoding \\u0e43\\u0e19\\u0e01\\u0e32\\u0e23\\u0e04"
4557         "\\u0e23\\u0e2d\\u0e1a\\u0e04\\u0e25\\u0e38\\u0e21\\u0e17\\u0e38\\u0e01\\u0e20\\u0e32\\u0e29\\u0e32\\u0e43\\u0e19"
4558         "\\u0e01\\u0e25\\u0e38\\u0e48\\u0e21. \\u0e2b\\u0e23\\u0e37\\u0e2d\\u0e41\\u0e21\\u0e49\\u0e41\\u0e15\\u0e48\\u0e43"
4559         "\\u0e19\\u0e20\\u0e32\\u0e29\\u0e32\\u0e40\\u0e14\\u0e35\\u0e48\\u0e22\\u0e27 \\u0e40\\u0e0a\\u0e48\\u0e19 \\u0e20"
4560         "\\u0e32\\u0e29\\u0e32\\u0e2d\\u0e31\\u0e07\\u0e01\\u0e24\\u0e29 \\u0e01\\u0e47\\u0e44\\u0e21\\u0e48\\u0e21\\u0e35"
4561         " encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d\\u0e2a\\u0e33\\u0e2b"
4562         "\\u0e23\\u0e31\\u0e1a\\u0e17\\u0e38\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29\\u0e23, \\u0e40\\u0e04"
4563         "\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e27\\u0e23\\u0e23\\u0e04\\u0e15\\u0e2d\\u0e19"
4564         " \\u0e41\\u0e25\\u0e30\\u0e2a\\u0e31\\u0e0d\\u0e25\\u0e31\\u0e01\\u0e29\\u0e13\\u0e4c\\u0e17\\u0e32\\u0e07\\u0e40"
4565         "\\u0e17\\u0e04\\u0e19\\u0e34\\u0e04\\u0e17\\u0e35\\u0e48\\u0e43\\u0e0a\\u0e49\\u0e01\\u0e31\\u0e19\\u0e2d\\u0e22"
4566         "\\u0e39\\u0e48\\u0e17\\u0e31\\u0e48\\u0e27\\u0e44\\u0e1b.";
4567
4568     const char *latinText =
4569         "doy ph\\u1ee5\\u0304\\u0302n \\u1e6d\\u0304h\\u0101n l\\u00e6\\u0302w, khxmphiwtexr\\u0312 ca ke\\u012b\\u0300"
4570         "ywk\\u0304\\u0125xng k\\u1ea1b re\\u1ee5\\u0304\\u0300xng k\\u0304hxng t\\u1ea1wlek\\u0304h. khxmphiwtexr"
4571         "\\u0312 c\\u1ea1d k\\u0115b t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r l\\u00e6a x\\u1ea1kk\\u0304h ra x\\u1ee5\\u0304"
4572         "\\u0300n\\u00ab doy k\\u0101r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304\\u0131\\u0302 s\\u0304"
4573         "\\u1ea3h\\u0304r\\u1ea1b t\\u00e6\\u0300la t\\u1ea1w. k\\u0300xn h\\u0304n\\u0302\\u0101 th\\u012b\\u0300\\u0301"
4574         " Unicode ca t\\u0304h\\u016bk s\\u0304r\\u0302\\u0101ng k\\u0304h\\u1ee5\\u0302n, d\\u1ecb\\u0302 m\\u012b "
4575         "rabb encoding xy\\u016b\\u0300 h\\u0304l\\u0101y r\\u0302xy rabb s\\u0304\\u1ea3h\\u0304r\\u1ea1b k\\u0101"
4576         "r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304el\\u0300\\u0101 n\\u012b\\u0302. m\\u1ecb\\u0300m"
4577         "\\u012b encoding d\\u0131 th\\u012b\\u0300 m\\u012b c\\u1ea3nwn t\\u1ea1w x\\u1ea1kk\\u0304hra m\\u0101k p"
4578         "he\\u012byng phx: yk t\\u1ea1wx\\u1ef3\\u0101ng ch\\u00e8n, c\\u0304heph\\u0101a n\\u0131 kl\\u00f9m s\\u0304"
4579         "h\\u0304p\\u0323h\\u0101ph yurop phe\\u012byng h\\u0304\\u00e6\\u0300ng de\\u012byw k\\u0306 t\\u0302xngk\\u0101"
4580         "r h\\u0304l\\u0101y encoding n\\u0131 k\\u0101r khrxbkhlum thuk p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 n\\u0131"
4581         " kl\\u00f9m. h\\u0304r\\u1ee5\\u0304x m\\u00e6\\u0302t\\u00e6\\u0300 n\\u0131 p\\u0323h\\u0101s\\u0304\\u02b9"
4582         "\\u0101 de\\u012b\\u0300yw ch\\u00e8n p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 x\\u1ea1ngkvs\\u0304\\u02b9 k\\u0306"
4583         " m\\u1ecb\\u0300m\\u012b encoding d\\u0131 th\\u012b\\u0300 phe\\u012byng phx s\\u0304\\u1ea3h\\u0304r\\u1ea1"
4584         "b thuk t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r, kher\\u1ee5\\u0304\\u0300xngh\\u0304m\\u0101y wrrkh txn l\\u00e6"
4585         "a s\\u0304\\u1ea1\\u1ef5l\\u1ea1ks\\u0304\\u02b9\\u1e47\\u0312 th\\u0101ng thekhnikh th\\u012b\\u0300 ch\\u0131"
4586         "\\u0302 k\\u1ea1n xy\\u016b\\u0300 th\\u1ea1\\u0300wp\\u1ecb.";
4587
4588
4589     UnicodeString  xlitText(thaiText);
4590     xlitText = xlitText.unescape();
4591     tr->transliterate(xlitText);
4592
4593     UnicodeString expectedText(latinText);
4594     expectedText = expectedText.unescape();
4595     expect(*tr, xlitText, expectedText);
4596
4597     delete tr;
4598 }
4599
4600
4601 //======================================================================
4602 // Support methods
4603 //======================================================================
4604 void TransliteratorTest::expectT(const UnicodeString& id,
4605                                  const UnicodeString& source,
4606                                  const UnicodeString& expectedResult) {
4607     UErrorCode ec = U_ZERO_ERROR;
4608     UParseError pe;
4609     Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
4610     if (U_FAILURE(ec)) {
4611         errln((UnicodeString)"FAIL: Could not create " + id);
4612         delete t;
4613         return;
4614     }
4615     expect(*t, source, expectedResult);
4616     delete t;
4617 }
4618
4619 void TransliteratorTest::reportParseError(const UnicodeString& message,
4620                                           const UParseError& parseError,
4621                                           const UErrorCode& status) {
4622     errln(message +
4623           /*", parse error " + parseError.code +*/
4624           ", line " + parseError.line +
4625           ", offset " + parseError.offset +
4626           ", pre-context " + prettify(parseError.preContext, TRUE) +
4627           ", post-context " + prettify(parseError.postContext,TRUE) +
4628           ", Error: " + u_errorName(status));
4629 }
4630
4631 void TransliteratorTest::expect(const UnicodeString& rules,
4632                                 const UnicodeString& source,
4633                                 const UnicodeString& expectedResult,
4634                                 UTransPosition *pos) {
4635     expect("<ID>", rules, source, expectedResult, pos);
4636 }
4637
4638 void TransliteratorTest::expect(const UnicodeString& id,
4639                                 const UnicodeString& rules,
4640                                 const UnicodeString& source,
4641                                 const UnicodeString& expectedResult,
4642                                 UTransPosition *pos) {
4643     UErrorCode status = U_ZERO_ERROR;
4644     UParseError parseError;
4645     Transliterator* t = Transliterator::createFromRules(id, rules, UTRANS_FORWARD, parseError, status);
4646     if (U_FAILURE(status)) {
4647         reportParseError(UnicodeString("Couldn't create transliterator from ") + rules, parseError, status);
4648     } else {
4649         expect(*t, source, expectedResult, pos);
4650     }
4651     delete t;
4652 }
4653
4654 void TransliteratorTest::expect(const Transliterator& t,
4655                                 const UnicodeString& source,
4656                                 const UnicodeString& expectedResult,
4657                                 const Transliterator& reverseTransliterator) {
4658     expect(t, source, expectedResult);
4659     expect(reverseTransliterator, expectedResult, source);
4660 }
4661
4662 void TransliteratorTest::expect(const Transliterator& t,
4663                                 const UnicodeString& source,
4664                                 const UnicodeString& expectedResult,
4665                                 UTransPosition *pos) {
4666     if (pos == 0) {
4667         UnicodeString result(source);
4668         t.transliterate(result);
4669         expectAux(t.getID() + ":String", source, result, expectedResult);
4670     }
4671     UTransPosition index={0, 0, 0, 0};
4672     if (pos != 0) {
4673         index = *pos;
4674     }
4675
4676     UnicodeString rsource(source);
4677     if (pos == 0) {
4678         t.transliterate(rsource);
4679     } else {
4680         // Do it all at once -- below we do it incrementally
4681         t.finishTransliteration(rsource, *pos);
4682     }
4683     expectAux(t.getID() + ":Replaceable", source, rsource, expectedResult);
4684
4685     // Test keyboard (incremental) transliteration -- this result
4686     // must be the same after we finalize (see below).
4687     UnicodeString log;
4688     rsource.remove();
4689     if (pos != 0) {
4690         rsource = source;
4691         formatInput(log, rsource, index);
4692         log.append(" -> ");
4693         UErrorCode status = U_ZERO_ERROR;
4694         t.transliterate(rsource, index, status);
4695         formatInput(log, rsource, index);
4696     } else {
4697         for (int32_t i=0; i<source.length(); ++i) {
4698             if (i != 0) {
4699                 log.append(" + ");
4700             }
4701             log.append(source.charAt(i)).append(" -> ");
4702             UErrorCode status = U_ZERO_ERROR;
4703             t.transliterate(rsource, index, source.charAt(i), status);
4704             formatInput(log, rsource, index);
4705         }
4706     }
4707
4708     // As a final step in keyboard transliteration, we must call
4709     // transliterate to finish off any pending partial matches that
4710     // were waiting for more input.
4711     t.finishTransliteration(rsource, index);
4712     log.append(" => ").append(rsource);
4713
4714     expectAux(t.getID() + ":Keyboard", log,
4715               rsource == expectedResult,
4716               expectedResult);
4717 }
4718
4719
4720 /**
4721  * @param appendTo result is appended to this param.
4722  * @param input the string being transliterated
4723  * @param pos the index struct
4724  */
4725 UnicodeString& TransliteratorTest::formatInput(UnicodeString &appendTo,
4726                                                const UnicodeString& input,
4727                                                const UTransPosition& pos) {
4728     // Output a string of the form aaa{bbb|ccc|ddd}eee, where
4729     // the {} indicate the context start and limit, and the ||
4730     // indicate the start and limit.
4731     if (0 <= pos.contextStart &&
4732         pos.contextStart <= pos.start &&
4733         pos.start <= pos.limit &&
4734         pos.limit <= pos.contextLimit &&
4735         pos.contextLimit <= input.length()) {
4736
4737         UnicodeString a, b, c, d, e;
4738         input.extractBetween(0, pos.contextStart, a);
4739         input.extractBetween(pos.contextStart, pos.start, b);
4740         input.extractBetween(pos.start, pos.limit, c);
4741         input.extractBetween(pos.limit, pos.contextLimit, d);
4742         input.extractBetween(pos.contextLimit, input.length(), e);
4743         appendTo.append(a).append((UChar)123/*{*/).append(b).
4744             append((UChar)PIPE).append(c).append((UChar)PIPE).append(d).
4745             append((UChar)125/*}*/).append(e);
4746     } else {
4747         appendTo.append((UnicodeString)"INVALID UTransPosition {cs=" +
4748                         pos.contextStart + ", s=" + pos.start + ", l=" +
4749                         pos.limit + ", cl=" + pos.contextLimit + "} on " +
4750                         input);
4751     }
4752     return appendTo;
4753 }
4754
4755 void TransliteratorTest::expectAux(const UnicodeString& tag,
4756                                    const UnicodeString& source,
4757                                    const UnicodeString& result,
4758                                    const UnicodeString& expectedResult) {
4759     expectAux(tag, source + " -> " + result,
4760               result == expectedResult,
4761               expectedResult);
4762 }
4763
4764 void TransliteratorTest::expectAux(const UnicodeString& tag,
4765                                    const UnicodeString& summary, UBool pass,
4766                                    const UnicodeString& expectedResult) {
4767     if (pass) {
4768         logln(UnicodeString("(")+tag+") " + prettify(summary));
4769     } else {
4770         errln(UnicodeString("FAIL: (")+tag+") "
4771               + prettify(summary)
4772               + ", expected " + prettify(expectedResult));
4773     }
4774 }
4775
4776 #endif /* #if !UCONFIG_NO_TRANSLITERATION */