icuSources/test/intltest/transtst.cpp

   1 // © 2016 and later: Unicode, Inc. and others.
   2 // License & terms of use: http://www.unicode.org/copyright.html
   3 /*
   4 **********************************************************************
   5 *   Copyright (C) 1999-2016, International Business Machines
   6 *   Corporation and others.  All Rights Reserved.
   7 **********************************************************************
   8 *   Date        Name        Description
   9 *   11/10/99    aliu        Creation.
  10 **********************************************************************
  11 */
  12
  13 #include "unicode/utypes.h"
  14
  15 #if !UCONFIG_NO_TRANSLITERATION
  16
  17 #include "transtst.h"
  18 #include "unicode/locid.h"
  19 #include "unicode/dtfmtsym.h"
  20 #include "unicode/normlzr.h"
  21 #include "unicode/translit.h"
  22 #include "unicode/uchar.h"
  23 #include "unicode/unifilt.h"
  24 #include "unicode/uniset.h"
  25 #include "unicode/ustring.h"
  26 #include "unicode/usetiter.h"
  27 #include "unicode/uscript.h"
  28 #include "unicode/utf16.h"
  29 #include "cpdtrans.h"
  30 #include "nultrans.h"
  31 #include "rbt.h"
  32 #include "rbt_pars.h"
  33 #include "anytrans.h"
  34 #include "esctrn.h"
  35 #include "name2uni.h"
  36 #include "nortrans.h"
  37 #include "remtrans.h"
  38 #include "titletrn.h"
  39 #include "tolowtrn.h"
  40 #include "toupptrn.h"
  41 #include "unesctrn.h"
  42 #include "uni2name.h"
  43 #include "cstring.h"
  44 #include "cmemory.h"
  45 #include <stdio.h>
  46
  47 /***********************************************************************
  48
  49                      HOW TO USE THIS TEST FILE
  50                                -or-
  51                   How I developed on two platforms
  52                 without losing (too much of) my mind
  53
  54
  55 1. Add new tests by copying/pasting/changing existing tests.  On Java,
  56    any public void method named Test...() taking no parameters becomes
  57    a test.  On C++, you need to modify the header and add a line to
  58    the runIndexedTest() dispatch method.
  59
  60 2. Make liberal use of the expect() method; it is your friend.
  61
  62 3. The tests in this file exactly match those in a sister file on the
  63    other side.  The two files are:
  64
  65    icu4j:  src/com/ibm/test/translit/TransliteratorTest.java
  66    icu4c:  source/test/intltest/transtst.cpp
  67
  68                   ==> THIS IS THE IMPORTANT PART <==
  69
  70    When you add a test in this file, add it in TransliteratorTest.java
  71    too.  Give it the same name and put it in the same relative place.
  72    This makes maintenance a lot simpler for any poor soul who ends up
  73    trying to synchronize the tests between icu4j and icu4c.
  74
  75 4. If you MUST enter a test that is NOT paralleled in the sister file,
  76    then add it in the special non-mirrored section.  These are
  77    labeled
  78
  79      "icu4j ONLY"
  80
  81    or
  82
  83      "icu4c ONLY"
  84
  85    Make sure you document the reason the test is here and not there.
  86
  87
  88 Thank you.
  89 The Management
  90 ***********************************************************************/
  91
  92 // Define character constants thusly to be EBCDIC-friendly
  93 enum {
  94     LEFT_BRACE=((UChar)0x007B), /*{*/
  95     PIPE      =((UChar)0x007C), /*|*/
  96     ZERO      =((UChar)0x0030), /*0*/
  97     UPPER_A   =((UChar)0x0041)  /*A*/
  98 };
  99
 100 TransliteratorTest::TransliteratorTest()
 101 :   DESERET_DEE((UChar32)0x10414),
 102     DESERET_dee((UChar32)0x1043C)
 103 {
 104 }
 105
 106 TransliteratorTest::~TransliteratorTest() {}
 107
 108 void
 109 TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
 110                                    const char* &name, char* /*par*/) {
 111     switch (index) {
 112         TESTCASE(0,TestInstantiation);
 113         TESTCASE(1,TestSimpleRules);
 114         TESTCASE(2,TestRuleBasedInverse);
 115         TESTCASE(3,TestKeyboard);
 116         TESTCASE(4,TestKeyboard2);
 117         TESTCASE(5,TestKeyboard3);
 118         TESTCASE(6,TestArabic);
 119         TESTCASE(7,TestCompoundKana);
 120         TESTCASE(8,TestCompoundHex);
 121         TESTCASE(9,TestFiltering);
 122         TESTCASE(10,TestInlineSet);
 123         TESTCASE(11,TestPatternQuoting);
 124         TESTCASE(12,TestJ277);
 125         TESTCASE(13,TestJ243);
 126         TESTCASE(14,TestJ329);
 127         TESTCASE(15,TestSegments);
 128         TESTCASE(16,TestCursorOffset);
 129         TESTCASE(17,TestArbitraryVariableValues);
 130         TESTCASE(18,TestPositionHandling);
 131         TESTCASE(19,TestHiraganaKatakana);
 132         TESTCASE(20,TestCopyJ476);
 133         TESTCASE(21,TestAnchors);
 134         TESTCASE(22,TestInterIndic);
 135         TESTCASE(23,TestFilterIDs);
 136         TESTCASE(24,TestCaseMap);
 137         TESTCASE(25,TestNameMap);
 138         TESTCASE(26,TestLiberalizedID);
 139         TESTCASE(27,TestCreateInstance);
 140         TESTCASE(28,TestNormalizationTransliterator);
 141         TESTCASE(29,TestCompoundRBT);
 142         TESTCASE(30,TestCompoundFilter);
 143         TESTCASE(31,TestRemove);
 144         TESTCASE(32,TestToRules);
 145         TESTCASE(33,TestContext);
 146         TESTCASE(34,TestSupplemental);
 147         TESTCASE(35,TestQuantifier);
 148         TESTCASE(36,TestSTV);
 149         TESTCASE(37,TestCompoundInverse);
 150         TESTCASE(38,TestNFDChainRBT);
 151         TESTCASE(39,TestNullInverse);
 152         TESTCASE(40,TestAliasInverseID);
 153         TESTCASE(41,TestCompoundInverseID);
 154         TESTCASE(42,TestUndefinedVariable);
 155         TESTCASE(43,TestEmptyContext);
 156         TESTCASE(44,TestCompoundFilterID);
 157         TESTCASE(45,TestPropertySet);
 158         TESTCASE(46,TestNewEngine);
 159         TESTCASE(47,TestQuantifiedSegment);
 160         TESTCASE(48,TestDevanagariLatinRT);
 161         TESTCASE(49,TestTeluguLatinRT);
 162         TESTCASE(50,TestCompoundLatinRT);
 163         TESTCASE(51,TestSanskritLatinRT);
 164         TESTCASE(52,TestLocaleInstantiation);
 165         TESTCASE(53,TestTitleAccents);
 166         TESTCASE(54,TestLocaleResource);
 167         TESTCASE(55,TestParseError);
 168         TESTCASE(56,TestOutputSet);
 169         TESTCASE(57,TestVariableRange);
 170         TESTCASE(58,TestInvalidPostContext);
 171         TESTCASE(59,TestIDForms);
 172         TESTCASE(60,TestToRulesMark);
 173         TESTCASE(61,TestEscape);
 174         TESTCASE(62,TestAnchorMasking);
 175         TESTCASE(63,TestDisplayName);
 176         TESTCASE(64,TestSpecialCases);
 177 #if !UCONFIG_NO_FILE_IO
 178         TESTCASE(65,TestIncrementalProgress);
 179 #endif
 180         TESTCASE(66,TestSurrogateCasing);
 181         TESTCASE(67,TestFunction);
 182         TESTCASE(68,TestInvalidBackRef);
 183         TESTCASE(69,TestMulticharStringSet);
 184         TESTCASE(70,TestUserFunction);
 185         TESTCASE(71,TestAnyX);
 186         TESTCASE(72,TestSourceTargetSet);
 187         TESTCASE(73,TestGurmukhiDevanagari);
 188         TESTCASE(74,TestPatternWhiteSpace);
 189         TESTCASE(75,TestAllCodepoints);
 190         TESTCASE(76,TestBoilerplate);
 191         TESTCASE(77,TestAlternateSyntax);
 192         TESTCASE(78,TestBeginEnd);
 193         TESTCASE(79,TestBeginEndToRules);
 194         TESTCASE(80,TestRegisterAlias);
 195         TESTCASE(81,TestRuleStripping);
 196         TESTCASE(82,TestHalfwidthFullwidth);
 197         TESTCASE(83,TestThai);
 198         TESTCASE(84,TestAny);
 199         default: name = ""; break;
 200     }
 201 }
 202
 203 /**
 204  * Make sure every system transliterator can be instantiated.
 205  *
 206  * ALSO test that the result of toRules() for each rule is a valid
 207  * rule.  Do this here so we don't have to have another test that
 208  * instantiates everything as well.
 209  */
 210 void TransliteratorTest::TestInstantiation() {
 211     UErrorCode ec = U_ZERO_ERROR;
 212     StringEnumeration* avail = Transliterator::getAvailableIDs(ec);
 213     assertSuccess("getAvailableIDs()", ec);
 214     assertTrue("getAvailableIDs()!=NULL", avail!=NULL);
 215     int32_t n = Transliterator::countAvailableIDs();
 216     assertTrue("getAvailableIDs().count()==countAvailableIDs()",
 217                avail->count(ec) == n);
 218     assertSuccess("count()", ec);
 219     UnicodeString name;
 220     for (int32_t i=0; i<n; ++i) {
 221         const UnicodeString& id = *avail->snext(ec);
 222         if (!assertSuccess("snext()", ec) ||
 223             !assertTrue("snext()!=NULL", (&id)!=NULL, TRUE)) {
 224             break;
 225         }
 226         UnicodeString id2 = Transliterator::getAvailableID(i);
 227         if (id.length() < 1) {
 228             errln(UnicodeString("FAIL: getAvailableID(") +
 229                   i + ") returned empty string");
 230             continue;
 231         }
 232         if (id != id2) {
 233             errln(UnicodeString("FAIL: getAvailableID(") +
 234                   i + ") != getAvailableIDs().snext()");
 235             continue;
 236         }
 237         UParseError parseError;
 238         UErrorCode status = U_ZERO_ERROR;
 239         Transliterator* t = Transliterator::createInstance(id,
 240                               UTRANS_FORWARD, parseError,status);
 241         name.truncate(0);
 242         Transliterator::getDisplayName(id, name);
 243         if (t == 0) {
 244 #if UCONFIG_NO_BREAK_ITERATION
 245             // If UCONFIG_NO_BREAK_ITERATION is on, then only Thai should fail.
 246             if (id.compare((UnicodeString)"Thai-Latn") != 0 &&
 247                 id.compare((UnicodeString)"Thai-Latin") != 0)
 248 #endif
 249                 dataerrln(UnicodeString("FAIL: Couldn't create ") + id +
 250                       /*", parse error " + parseError.code +*/
 251                       ", line " + parseError.line +
 252                       ", offset " + parseError.offset +
 253                       ", pre-context " + prettify(parseError.preContext, TRUE) +
 254                       ", post-context " +prettify(parseError.postContext,TRUE) +
 255                       ", Error: " + u_errorName(status));
 256                 // When createInstance fails, it deletes the failing
 257                 // entry from the available ID list.  We detect this
 258                 // here by looking for a change in countAvailableIDs.
 259             int32_t nn = Transliterator::countAvailableIDs();
 260             if (nn == (n - 1)) {
 261                 n = nn;
 262                 --i; // Compensate for deleted entry
 263             }
 264         } else {
 265             logln(UnicodeString("OK: ") + name + " (" + id + ")");
 266
 267             // Now test toRules
 268             UnicodeString rules;
 269             t->toRules(rules, TRUE);
 270             Transliterator *u = Transliterator::createFromRules("x",
 271                                     rules, UTRANS_FORWARD, parseError,status);
 272             if (u == 0) {
 273                 errln(UnicodeString("FAIL: ") + id +
 274                       ".createFromRules() => bad rules" +
 275                       /*", parse error " + parseError.code +*/
 276                       ", line " + parseError.line +
 277                       ", offset " + parseError.offset +
 278                       ", context " + prettify(parseError.preContext, TRUE) +
 279                       ", rules: " + prettify(rules, TRUE));
 280             } else {
 281                 delete u;
 282             }
 283             delete t;
 284         }
 285     }
 286     assertTrue("snext()==NULL", avail->snext(ec)==NULL);
 287     assertSuccess("snext()", ec);
 288     delete avail;
 289
 290     // Now test the failure path
 291     UParseError parseError;
 292     UErrorCode status = U_ZERO_ERROR;
 293     UnicodeString id("<Not a valid Transliterator ID>");
 294     Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
 295     if (t != 0) {
 296         errln("FAIL: " + id + " returned a transliterator");
 297         delete t;
 298     } else {
 299         logln("OK: Bogus ID handled properly");
 300     }
 301 }
 302
 303 void TransliteratorTest::TestSimpleRules(void) {
 304     /* Example: rules 1. ab>x|y
 305      *                2. yc>z
 306      *
 307      * []|eabcd  start - no match, copy e to tranlated buffer
 308      * [e]|abcd  match rule 1 - copy output & adjust cursor
 309      * [ex|y]cd  match rule 2 - copy output & adjust cursor
 310      * [exz]|d   no match, copy d to transliterated buffer
 311      * [exzd]|   done
 312      */
 313     expect(UnicodeString("ab>x|y;", "") +
 314            "yc>z",
 315            "eabcd", "exzd");
 316
 317     /* Another set of rules:
 318      *    1. ab>x|yzacw
 319      *    2. za>q
 320      *    3. qc>r
 321      *    4. cw>n
 322      *
 323      * []|ab       Rule 1
 324      * [x|yzacw]   No match
 325      * [xy|zacw]   Rule 2
 326      * [xyq|cw]    Rule 4
 327      * [xyqn]|     Done
 328      */
 329     expect(UnicodeString("ab>x|yzacw;") +
 330            "za>q;" +
 331            "qc>r;" +
 332            "cw>n",
 333            "ab", "xyqn");
 334
 335     /* Test categories
 336      */
 337     UErrorCode status = U_ZERO_ERROR;
 338     UParseError parseError;
 339     Transliterator *t = Transliterator::createFromRules(
 340         "<ID>",
 341         UnicodeString("$dummy=").append((UChar)0xE100) +
 342         UnicodeString(";"
 343                       "$vowel=[aeiouAEIOU];"
 344                       "$lu=[:Lu:];"
 345                       "$vowel } $lu > '!';"
 346                       "$vowel > '&';"
 347                       "'!' { $lu > '^';"
 348                       "$lu > '*';"
 349                       "a > ERROR", ""),
 350         UTRANS_FORWARD, parseError,
 351         status);
 352     if (U_FAILURE(status)) {
 353         dataerrln("FAIL: RBT constructor failed - %s", u_errorName(status));
 354         return;
 355     }
 356     expect(*t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
 357     delete t;
 358 }
 359
 360 /**
 361  * Test inline set syntax and set variable syntax.
 362  */
 363 void TransliteratorTest::TestInlineSet(void) {
 364     expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz");
 365     expect("a[0-9]b > qrs", "1a7b9", "1qrs9");
 366
 367     expect(UnicodeString(
 368            "$digit = [0-9];"
 369            "$alpha = [a-zA-Z];"
 370            "$alphanumeric = [$digit $alpha];" // ***
 371            "$special = [^$alphanumeric];"     // ***
 372            "$alphanumeric > '-';"
 373            "$special > '*';", ""),
 374
 375            "thx-1138", "---*----");
 376 }
 377
 378 /**
 379  * Create some inverses and confirm that they work.  We have to be
 380  * careful how we do this, since the inverses will not be true
 381  * inverses -- we can't throw any random string at the composition
 382  * of the transliterators and expect the identity function.  F x
 383  * F' != I.  However, if we are careful about the input, we will
 384  * get the expected results.
 385  */
 386 void TransliteratorTest::TestRuleBasedInverse(void) {
 387     UnicodeString RULES =
 388         UnicodeString("abc>zyx;") +
 389         "ab>yz;" +
 390         "bc>zx;" +
 391         "ca>xy;" +
 392         "a>x;" +
 393         "b>y;" +
 394         "c>z;" +
 395
 396         "abc<zyx;" +
 397         "ab<yz;" +
 398         "bc<zx;" +
 399         "ca<xy;" +
 400         "a<x;" +
 401         "b<y;" +
 402         "c<z;" +
 403
 404         "";
 405
 406     const char* DATA[] = {
 407         // Careful here -- random strings will not work.  If we keep
 408         // the left side to the domain and the right side to the range
 409         // we will be okay though (left, abc; right xyz).
 410         "a", "x",
 411         "abcacab", "zyxxxyy",
 412         "caccb", "xyzzy",
 413     };
 414
 415     int32_t DATA_length = UPRV_LENGTHOF(DATA);
 416
 417     UErrorCode status = U_ZERO_ERROR;
 418     UParseError parseError;
 419     Transliterator *fwd = Transliterator::createFromRules("<ID>", RULES,
 420                                 UTRANS_FORWARD, parseError, status);
 421     Transliterator *rev = Transliterator::createFromRules("<ID>", RULES,
 422                                 UTRANS_REVERSE, parseError, status);
 423     if (U_FAILURE(status)) {
 424         errln("FAIL: RBT constructor failed");
 425         return;
 426     }
 427     for (int32_t i=0; i<DATA_length; i+=2) {
 428         expect(*fwd, DATA[i], DATA[i+1]);
 429         expect(*rev, DATA[i+1], DATA[i]);
 430     }
 431     delete fwd;
 432     delete rev;
 433 }
 434
 435 /**
 436  * Basic test of keyboard.
 437  */
 438 void TransliteratorTest::TestKeyboard(void) {
 439     UParseError parseError;
 440     UErrorCode status = U_ZERO_ERROR;
 441     Transliterator *t = Transliterator::createFromRules("<ID>",
 442                               UnicodeString("psch>Y;")
 443                               +"ps>y;"
 444                               +"ch>x;"
 445                               +"a>A;",
 446                               UTRANS_FORWARD, parseError,
 447                               status);
 448     if (U_FAILURE(status)) {
 449         errln("FAIL: RBT constructor failed");
 450         return;
 451     }
 452     const char* DATA[] = {
 453         // insertion, buffer
 454         "a", "A",
 455         "p", "Ap",
 456         "s", "Aps",
 457         "c", "Apsc",
 458         "a", "AycA",
 459         "psch", "AycAY",
 460         0, "AycAY", // null means finishKeyboardTransliteration
 461     };
 462
 463     keyboardAux(*t, DATA, UPRV_LENGTHOF(DATA));
 464     delete t;
 465 }
 466
 467 /**
 468  * Basic test of keyboard with cursor.
 469  */
 470 void TransliteratorTest::TestKeyboard2(void) {
 471     UParseError parseError;
 472     UErrorCode status = U_ZERO_ERROR;
 473     Transliterator *t = Transliterator::createFromRules("<ID>",
 474                               UnicodeString("ych>Y;")
 475                               +"ps>|y;"
 476                               +"ch>x;"
 477                               +"a>A;",
 478                               UTRANS_FORWARD, parseError,
 479                               status);
 480     if (U_FAILURE(status)) {
 481         errln("FAIL: RBT constructor failed");
 482         return;
 483     }
 484     const char* DATA[] = {
 485         // insertion, buffer
 486         "a", "A",
 487         "p", "Ap",
 488         "s", "Aps", // modified for rollback - "Ay",
 489         "c", "Apsc", // modified for rollback - "Ayc",
 490         "a", "AycA",
 491         "p", "AycAp",
 492         "s", "AycAps", // modified for rollback - "AycAy",
 493         "c", "AycApsc", // modified for rollback - "AycAyc",
 494         "h", "AycAY",
 495         0, "AycAY", // null means finishKeyboardTransliteration
 496     };
 497
 498     keyboardAux(*t, DATA, UPRV_LENGTHOF(DATA));
 499     delete t;
 500 }
 501
 502 /**
 503  * Test keyboard transliteration with back-replacement.
 504  */
 505 void TransliteratorTest::TestKeyboard3(void) {
 506     // We want th>z but t>y.  Furthermore, during keyboard
 507     // transliteration we want t>y then yh>z if t, then h are
 508     // typed.
 509     UnicodeString RULES("t>|y;"
 510                         "yh>z;");
 511
 512     const char* DATA[] = {
 513         // Column 1: characters to add to buffer (as if typed)
 514         // Column 2: expected appearance of buffer after
 515         //           keyboard xliteration.
 516         "a", "a",
 517         "b", "ab",
 518         "t", "abt", // modified for rollback - "aby",
 519         "c", "abyc",
 520         "t", "abyct", // modified for rollback - "abycy",
 521         "h", "abycz",
 522         0, "abycz", // null means finishKeyboardTransliteration
 523     };
 524
 525     UParseError parseError;
 526     UErrorCode status = U_ZERO_ERROR;
 527     Transliterator *t = Transliterator::createFromRules("<ID>", RULES, UTRANS_FORWARD, parseError, status);
 528     if (U_FAILURE(status)) {
 529         errln("FAIL: RBT constructor failed");
 530         return;
 531     }
 532     keyboardAux(*t, DATA, UPRV_LENGTHOF(DATA));
 533     delete t;
 534 }
 535
 536 void TransliteratorTest::keyboardAux(const Transliterator& t,
 537                                      const char* DATA[], int32_t DATA_length) {
 538     UErrorCode status = U_ZERO_ERROR;
 539     UTransPosition index={0, 0, 0, 0};
 540     UnicodeString s;
 541     for (int32_t i=0; i<DATA_length; i+=2) {
 542         UnicodeString log;
 543         if (DATA[i] != 0) {
 544             log = s + " + "
 545                 + DATA[i]
 546                 + " -> ";
 547             t.transliterate(s, index, DATA[i], status);
 548         } else {
 549             log = s + " => ";
 550             t.finishTransliteration(s, index);
 551         }
 552         // Show the start index '{' and the cursor '|'
 553         UnicodeString a, b, c;
 554         s.extractBetween(0, index.contextStart, a);
 555         s.extractBetween(index.contextStart, index.start, b);
 556         s.extractBetween(index.start, s.length(), c);
 557         log.append(a).
 558             append((UChar)LEFT_BRACE).
 559             append(b).
 560             append((UChar)PIPE).
 561             append(c);
 562         if (s == DATA[i+1] && U_SUCCESS(status)) {
 563             logln(log);
 564         } else {
 565             errln(UnicodeString("FAIL: ") + log + ", expected " + DATA[i+1]);
 566         }
 567     }
 568 }
 569
 570 void TransliteratorTest::TestArabic(void) {
 571 // Test disabled for 2.0 until new Arabic transliterator can be written.
 572 //    /*
 573 //    const char* DATA[] = {
 574 //        "Arabic", "\u062a\u062a\u0645\u062a\u0639\u0020"+
 575 //                  "\u0627\u0644\u0644\u063a\u0629\u0020"+
 576 //                  "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629\u0020"+
 577 //                  "\u0628\u0628\u0646\u0638\u0645\u0020"+
 578 //                  "\u0643\u062a\u0627\u0628\u0628\u064a\u0629\u0020"+
 579 //                  "\u062c\u0645\u064a\u0644\u0629",
 580 //    };
 581 //    */
 582 //
 583 //    UChar ar_raw[] = {
 584 //        0x062a, 0x062a, 0x0645, 0x062a, 0x0639, 0x0020, 0x0627,
 585 //        0x0644, 0x0644, 0x063a, 0x0629, 0x0020, 0x0627, 0x0644,
 586 //        0x0639, 0x0631, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
 587 //        0x0628, 0x0628, 0x0646, 0x0638, 0x0645, 0x0020, 0x0643,
 588 //        0x062a, 0x0627, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
 589 //        0x062c, 0x0645, 0x064a, 0x0644, 0x0629, 0
 590 //    };
 591 //    UnicodeString ar(ar_raw);
 592 //    UErrorCode status=U_ZERO_ERROR;
 593 //    UParseError parseError;
 594 //    Transliterator *t = Transliterator::createInstance("Latin-Arabic", UTRANS_FORWARD, parseError, status);
 595 //    if (t == 0) {
 596 //        errln("FAIL: createInstance failed");
 597 //        return;
 598 //    }
 599 //    expect(*t, "Arabic", ar);
 600 //    delete t;
 601 }
 602
 603 /**
 604  * Compose the Kana transliterator forward and reverse and try
 605  * some strings that should come out unchanged.
 606  */
 607 void TransliteratorTest::TestCompoundKana(void) {
 608     UParseError parseError;
 609     UErrorCode status = U_ZERO_ERROR;
 610     Transliterator* t = Transliterator::createInstance("Latin-Hiragana;Hiragana-Latin", UTRANS_FORWARD, parseError, status);
 611     if (t == 0) {
 612         dataerrln("FAIL: construction of Latin-Hiragana;Hiragana-Latin failed - %s", u_errorName(status));
 613     } else {
 614         expect(*t, "aaaaa", "aaaaa");
 615         delete t;
 616     }
 617 }
 618
 619 /**
 620  * Compose the hex transliterators forward and reverse.
 621  */
 622 void TransliteratorTest::TestCompoundHex(void) {
 623     UParseError parseError;
 624     UErrorCode status = U_ZERO_ERROR;
 625     Transliterator* a = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
 626     Transliterator* b = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, parseError, status);
 627     Transliterator* transab[] = { a, b };
 628     Transliterator* transba[] = { b, a };
 629     if (a == 0 || b == 0) {
 630         errln("FAIL: construction failed");
 631         delete a;
 632         delete b;
 633         return;
 634     }
 635     // Do some basic tests of a
 636     expect(*a, "01", UnicodeString("\\u0030\\u0031", ""));
 637     // Do some basic tests of b
 638     expect(*b, UnicodeString("\\u0030\\u0031", ""), "01");
 639
 640     Transliterator* ab = new CompoundTransliterator(transab, 2);
 641     UnicodeString s("abcde", "");
 642     expect(*ab, s, s);
 643
 644     UnicodeString str(s);
 645     a->transliterate(str);
 646     Transliterator* ba = new CompoundTransliterator(transba, 2);
 647     expect(*ba, str, str);
 648
 649     delete ab;
 650     delete ba;
 651     delete a;
 652     delete b;
 653 }
 654
 655 int gTestFilterClassID = 0;
 656 /**
 657  * Used by TestFiltering().
 658  */
 659 class TestFilter : public UnicodeFilter {
 660     virtual UnicodeFunctor* clone() const {
 661         return new TestFilter(*this);
 662     }
 663     virtual UBool contains(UChar32 c) const {
 664         return c != (UChar)0x0063 /*c*/;
 665     }
 666     // Stubs
 667     virtual UnicodeString& toPattern(UnicodeString& result,
 668                                      UBool /*escapeUnprintable*/) const {
 669         return result;
 670     }
 671     virtual UBool matchesIndexValue(uint8_t /*v*/) const {
 672         return FALSE;
 673     }
 674     virtual void addMatchSetTo(UnicodeSet& /*toUnionTo*/) const {}
 675 public:
 676     UClassID getDynamicClassID() const { return (UClassID)&gTestFilterClassID; }
 677 };
 678
 679 /**
 680  * Do some basic tests of filtering.
 681  */
 682 void TransliteratorTest::TestFiltering(void) {
 683     UParseError parseError;
 684     UErrorCode status = U_ZERO_ERROR;
 685     Transliterator* hex = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
 686     if (hex == 0) {
 687         errln("FAIL: createInstance(Any-Hex) failed");
 688         return;
 689     }
 690     hex->adoptFilter(new TestFilter());
 691     UnicodeString s("abcde");
 692     hex->transliterate(s);
 693     UnicodeString exp("\\u0061\\u0062c\\u0064\\u0065", "");
 694     if (s == exp) {
 695         logln(UnicodeString("Ok:   \"") + exp + "\"");
 696     } else {
 697         logln(UnicodeString("FAIL: \"") + s + "\", wanted \"" + exp + "\"");
 698     }
 699
 700     // ICU4C ONLY. Do not find Transliterator.orphanFilter() in ICU4J.
 701     UnicodeFilter *f = hex->orphanFilter();
 702     if (f == NULL){
 703         errln("FAIL: orphanFilter() should get a UnicodeFilter");
 704     } else {
 705         delete f;
 706     }
 707     delete hex;
 708 }
 709
 710 /**
 711  * Test anchors
 712  */
 713 void TransliteratorTest::TestAnchors(void) {
 714     expect(UnicodeString("^a  > 0; a$ > 2 ; a > 1;", ""),
 715            "aaa",
 716            "012");
 717     expect(UnicodeString("$s=[z$]; $s{a>0; a}$s>2; a>1;", ""),
 718            "aaa",
 719            "012");
 720     expect(UnicodeString("^ab  > 01 ;"
 721            " ab  > |8 ;"
 722            "  b  > k ;"
 723            " 8x$ > 45 ;"
 724            " 8x  > 77 ;", ""),
 725
 726            "ababbabxabx",
 727            "018k7745");
 728     expect(UnicodeString("$s = [z$] ;"
 729            "$s{ab    > 01 ;"
 730            "   ab    > |8 ;"
 731            "    b    > k ;"
 732            "   8x}$s > 45 ;"
 733            "   8x    > 77 ;", ""),
 734
 735            "abzababbabxzabxabx",
 736            "01z018k45z01x45");
 737 }
 738
 739 /**
 740  * Test pattern quoting and escape mechanisms.
 741  */
 742 void TransliteratorTest::TestPatternQuoting(void) {
 743     // Array of 3n items
 744     // Each item is <rules>, <input>, <expected output>
 745     const UnicodeString DATA[] = {
 746         UnicodeString(UChar(0x4E01)) + ">'[male adult]'",
 747         UnicodeString(UChar(0x4E01)),
 748         "[male adult]"
 749     };
 750
 751     for (int32_t i=0; i<3; i+=3) {
 752         logln(UnicodeString("Pattern: ") + prettify(DATA[i]));
 753         UParseError parseError;
 754         UErrorCode status = U_ZERO_ERROR;
 755         Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
 756         if (U_FAILURE(status)) {
 757             errln("RBT constructor failed");
 758         } else {
 759             expect(*t, DATA[i+1], DATA[i+2]);
 760         }
 761         delete t;
 762     }
 763 }
 764
 765 /**
 766  * Regression test for bugs found in Greek transliteration.
 767  */
 768 void TransliteratorTest::TestJ277(void) {
 769     UErrorCode status = U_ZERO_ERROR;
 770     UParseError parseError;
 771     Transliterator *gl = Transliterator::createInstance("Greek-Latin; NFD; [:M:]Remove; NFC", UTRANS_FORWARD, parseError, status);
 772     if (gl == NULL) {
 773         dataerrln("FAIL: createInstance(Greek-Latin) returned NULL - %s", u_errorName(status));
 774         return;
 775     }
 776
 777     UChar sigma = 0x3C3;
 778     UChar upsilon = 0x3C5;
 779     UChar nu = 0x3BD;
 780 //    UChar PHI = 0x3A6;
 781     UChar alpha = 0x3B1;
 782 //    UChar omega = 0x3C9;
 783 //    UChar omicron = 0x3BF;
 784 //    UChar epsilon = 0x3B5;
 785
 786     // sigma upsilon nu -> syn
 787     UnicodeString syn;
 788     syn.append(sigma).append(upsilon).append(nu);
 789     expect(*gl, syn, "syn");
 790
 791     // sigma alpha upsilon nu -> saun
 792     UnicodeString sayn;
 793     sayn.append(sigma).append(alpha).append(upsilon).append(nu);
 794     expect(*gl, sayn, "saun");
 795
 796     // Again, using a smaller rule set
 797     UnicodeString rules(
 798                 "$alpha   = \\u03B1;"
 799                 "$nu      = \\u03BD;"
 800                 "$sigma   = \\u03C3;"
 801                 "$ypsilon = \\u03C5;"
 802                 "$vowel   = [aeiouAEIOU$alpha$ypsilon];"
 803                 "s <>           $sigma;"
 804                 "a <>           $alpha;"
 805                 "u <>  $vowel { $ypsilon;"
 806                 "y <>           $ypsilon;"
 807                 "n <>           $nu;",
 808                 "");
 809     Transliterator *mini = Transliterator::createFromRules("mini", rules, UTRANS_REVERSE, parseError, status);
 810     if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
 811     expect(*mini, syn, "syn");
 812     expect(*mini, sayn, "saun");
 813     delete mini;
 814     mini = NULL;
 815
 816 #if !UCONFIG_NO_FORMATTING
 817     // Transliterate the Greek locale data
 818     Locale el("el");
 819     DateFormatSymbols syms(el, status);
 820     if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
 821     int32_t i, count;
 822     const UnicodeString* data = syms.getMonths(count);
 823     for (i=0; i<count; ++i) {
 824         if (data[i].length() == 0) {
 825             continue;
 826         }
 827         UnicodeString out(data[i]);
 828         gl->transliterate(out);
 829         UBool ok = TRUE;
 830         if (data[i].length() >= 2 && out.length() >= 2 &&
 831             u_isupper(data[i].charAt(0)) && u_islower(data[i].charAt(1))) {
 832             if (!(u_isupper(out.charAt(0)) && u_islower(out.charAt(1)))) {
 833                 ok = FALSE;
 834             }
 835         }
 836         if (ok) {
 837             logln(prettify(data[i] + " -> " + out));
 838         } else {
 839             errln(UnicodeString("FAIL: ") + prettify(data[i] + " -> " + out));
 840         }
 841     }
 842 #endif
 843
 844     delete gl;
 845 }
 846
 847 /**
 848  * Prefix, suffix support in hex transliterators
 849  */
 850 void TransliteratorTest::TestJ243(void) {
 851     UErrorCode ec = U_ZERO_ERROR;
 852
 853     // Test default Hex-Any, which should handle
 854     // \u, \U, u+, and U+
 855     Transliterator *hex =
 856         Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, ec);
 857     if (assertSuccess("getInstance", ec)) {
 858         expect(*hex, UnicodeString("\\u0041+\\U00000042,U+0043uU+0044z", ""), "A+B,CuDz");
 859     }
 860     delete hex;
 861
 862 //    // Try a custom Hex-Unicode
 863 //    // \uXXXX and &#xXXXX;
 864 //    ec = U_ZERO_ERROR;
 865 //    HexToUnicodeTransliterator hex2(UnicodeString("\\\\u###0;&\\#x###0\\;", ""), ec);
 866 //    expect(hex2, UnicodeString("\\u61\\u062\\u0063\\u00645\\u66x&#x30;&#x031;&#x0032;&#x00033;", ""),
 867 //           "abcd5fx012&#x00033;");
 868 //    // Try custom Any-Hex (default is tested elsewhere)
 869 //    ec = U_ZERO_ERROR;
 870 //    UnicodeToHexTransliterator hex3(UnicodeString("&\\#x###0;", ""), ec);
 871 //    expect(hex3, "012", "&#x30;&#x31;&#x32;");
 872 }
 873
 874 /**
 875  * Parsers need better syntax error messages.
 876  */
 877 void TransliteratorTest::TestJ329(void) {
 878
 879     struct { UBool containsErrors; const char* rule; } DATA[] = {
 880         { FALSE, "a > b; c > d" },
 881         { TRUE,  "a > b; no operator; c > d" },
 882     };
 883     int32_t DATA_length = UPRV_LENGTHOF(DATA);
 884
 885     for (int32_t i=0; i<DATA_length; ++i) {
 886         UErrorCode status = U_ZERO_ERROR;
 887         UParseError parseError;
 888         Transliterator *rbt = Transliterator::createFromRules("<ID>",
 889                                     DATA[i].rule,
 890                                     UTRANS_FORWARD,
 891                                     parseError,
 892                                     status);
 893         UBool gotError = U_FAILURE(status);
 894         UnicodeString desc(DATA[i].rule);
 895         desc.append(gotError ? " -> error" : " -> no error");
 896         if (gotError) {
 897             desc = desc + ", ParseError code=" + u_errorName(status) +
 898                 " line=" + parseError.line +
 899                 " offset=" + parseError.offset +
 900                 " context=" + parseError.preContext;
 901         }
 902         if (gotError == DATA[i].containsErrors) {
 903             logln(UnicodeString("Ok:   ") + desc);
 904         } else {
 905             errln(UnicodeString("FAIL: ") + desc);
 906         }
 907         delete rbt;
 908     }
 909 }
 910
 911 /**
 912  * Test segments and segment references.
 913  */
 914 void TransliteratorTest::TestSegments(void) {
 915     // Array of 3n items
 916     // Each item is <rules>, <input>, <expected output>
 917     UnicodeString DATA[] = {
 918         "([a-z]) '.' ([0-9]) > $2 '-' $1",
 919         "abc.123.xyz.456",
 920         "ab1-c23.xy4-z56",
 921
 922         // nested
 923         "(([a-z])([0-9])) > $1 '.' $2 '.' $3;",
 924         "a1 b2",
 925         "a1.a.1 b2.b.2",
 926     };
 927     int32_t DATA_length = UPRV_LENGTHOF(DATA);
 928
 929     for (int32_t i=0; i<DATA_length; i+=3) {
 930         logln("Pattern: " + prettify(DATA[i]));
 931         UParseError parseError;
 932         UErrorCode status = U_ZERO_ERROR;
 933         Transliterator *t = Transliterator::createFromRules("ID", DATA[i], UTRANS_FORWARD, parseError, status);
 934         if (U_FAILURE(status)) {
 935             errln("FAIL: RBT constructor");
 936         } else {
 937             expect(*t, DATA[i+1], DATA[i+2]);
 938         }
 939         delete t;
 940     }
 941 }
 942
 943 /**
 944  * Test cursor positioning outside of the key
 945  */
 946 void TransliteratorTest::TestCursorOffset(void) {
 947     // Array of 3n items
 948     // Each item is <rules>, <input>, <expected output>
 949     UnicodeString DATA[] = {
 950         "pre {alpha} post > | @ ALPHA ;"
 951         "eALPHA > beta ;"
 952         "pre {beta} post > BETA @@ | ;"
 953         "post > xyz",
 954
 955         "prealphapost prebetapost",
 956
 957         "prbetaxyz preBETApost",
 958     };
 959     int32_t DATA_length = UPRV_LENGTHOF(DATA);
 960
 961     for (int32_t i=0; i<DATA_length; i+=3) {
 962         logln("Pattern: " + prettify(DATA[i]));
 963         UParseError parseError;
 964         UErrorCode status = U_ZERO_ERROR;
 965         Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
 966         if (U_FAILURE(status)) {
 967             errln("FAIL: RBT constructor");
 968         } else {
 969             expect(*t, DATA[i+1], DATA[i+2]);
 970         }
 971         delete t;
 972     }
 973 }
 974
 975 /**
 976  * Test zero length and > 1 char length variable values.  Test
 977  * use of variable refs in UnicodeSets.
 978  */
 979 void TransliteratorTest::TestArbitraryVariableValues(void) {
 980     // Array of 3n items
 981     // Each item is <rules>, <input>, <expected output>
 982     UnicodeString DATA[] = {
 983         "$abe = ab;"
 984         "$pat = x[yY]z;"
 985         "$ll  = 'a-z';"
 986         "$llZ = [$ll];"
 987         "$llY = [$ll$pat];"
 988         "$emp = ;"
 989
 990         "$abe > ABE;"
 991         "$pat > END;"
 992         "$llZ > 1;"
 993         "$llY > 2;"
 994         "7$emp 8 > 9;"
 995         "",
 996
 997         "ab xYzxyz stY78",
 998         "ABE ENDEND 1129",
 999     };
1000     int32_t DATA_length = UPRV_LENGTHOF(DATA);
1001
1002     for (int32_t i=0; i<DATA_length; i+=3) {
1003         logln("Pattern: " + prettify(DATA[i]));
1004         UParseError parseError;
1005         UErrorCode status = U_ZERO_ERROR;
1006         Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
1007         if (U_FAILURE(status)) {
1008             errln("FAIL: RBT constructor");
1009         } else {
1010             expect(*t, DATA[i+1], DATA[i+2]);
1011         }
1012         delete t;
1013     }
1014 }
1015
1016 /**
1017  * Confirm that the contextStart, contextLimit, start, and limit
1018  * behave correctly. J474.
1019  */
1020 void TransliteratorTest::TestPositionHandling(void) {
1021     // Array of 3n items
1022     // Each item is <rules>, <input>, <expected output>
1023     const char* DATA[] = {
1024         "a{t} > SS ; {t}b > UU ; {t} > TT ;",
1025         "xtat txtb", // pos 0,9,0,9
1026         "xTTaSS TTxUUb",
1027
1028         "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1029         "xtat txtb", // pos 2,9,3,8
1030         "xtaSS TTxUUb",
1031
1032         "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1033         "xtat txtb", // pos 3,8,3,8
1034         "xtaTT TTxTTb",
1035     };
1036
1037     // Array of 4n positions -- these go with the DATA array
1038     // They are: contextStart, contextLimit, start, limit
1039     int32_t POS[] = {
1040         0, 9, 0, 9,
1041         2, 9, 3, 8,
1042         3, 8, 3, 8,
1043     };
1044
1045     int32_t n = UPRV_LENGTHOF(DATA) / 3;
1046     for (int32_t i=0; i<n; i++) {
1047         UErrorCode status = U_ZERO_ERROR;
1048         UParseError parseError;
1049         Transliterator *t = Transliterator::createFromRules("<ID>",
1050                                 DATA[3*i], UTRANS_FORWARD, parseError, status);
1051         if (U_FAILURE(status)) {
1052             delete t;
1053             errln("FAIL: RBT constructor");
1054             return;
1055         }
1056         UTransPosition pos;
1057         pos.contextStart= POS[4*i];
1058         pos.contextLimit = POS[4*i+1];
1059         pos.start = POS[4*i+2];
1060         pos.limit = POS[4*i+3];
1061         UnicodeString rsource(DATA[3*i+1]);
1062         t->transliterate(rsource, pos, status);
1063         if (U_FAILURE(status)) {
1064             delete t;
1065             errln("FAIL: transliterate");
1066             return;
1067         }
1068         t->finishTransliteration(rsource, pos);
1069         expectAux(DATA[3*i],
1070                   DATA[3*i+1],
1071                   rsource,
1072                   DATA[3*i+2]);
1073         delete t;
1074     }
1075 }
1076
1077 /**
1078  * Test the Hiragana-Katakana transliterator.
1079  */
1080 void TransliteratorTest::TestHiraganaKatakana(void) {
1081     UParseError parseError;
1082     UErrorCode status = U_ZERO_ERROR;
1083     Transliterator* hk = Transliterator::createInstance("Hiragana-Katakana", UTRANS_FORWARD, parseError, status);
1084     Transliterator* kh = Transliterator::createInstance("Katakana-Hiragana", UTRANS_FORWARD, parseError, status);
1085     if (hk == 0 || kh == 0) {
1086         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
1087         delete hk;
1088         delete kh;
1089         return;
1090     }
1091
1092     // Array of 3n items
1093     // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana>
1094     const char* DATA[] = {
1095         "both",
1096         "\\u3042\\u3090\\u3099\\u3092\\u3050",
1097         "\\u30A2\\u30F8\\u30F2\\u30B0",
1098
1099         "kh",
1100         "\\u307C\\u3051\\u3060\\u3042\\u3093\\u30FC",
1101         "\\u30DC\\u30F6\\u30C0\\u30FC\\u30F3\\u30FC",
1102     };
1103     int32_t DATA_length = UPRV_LENGTHOF(DATA);
1104
1105     for (int32_t i=0; i<DATA_length; i+=3) {
1106         UnicodeString h = CharsToUnicodeString(DATA[i+1]);
1107         UnicodeString k = CharsToUnicodeString(DATA[i+2]);
1108         switch (*DATA[i]) {
1109         case 0x68: //'h': // Hiragana-Katakana
1110             expect(*hk, h, k);
1111             break;
1112         case 0x6B: //'k': // Katakana-Hiragana
1113             expect(*kh, k, h);
1114             break;
1115         case 0x62: //'b': // both
1116             expect(*hk, h, k);
1117             expect(*kh, k, h);
1118             break;
1119         }
1120     }
1121     delete hk;
1122     delete kh;
1123 }
1124
1125 /**
1126  * Test cloning / copy constructor of RBT.
1127  */
1128 void TransliteratorTest::TestCopyJ476(void) {
1129     // The real test here is what happens when the destructors are
1130     // called.  So we let one object get destructed, and check to
1131     // see that its copy still works.
1132     Transliterator *t2 = 0;
1133     {
1134         UParseError parseError;
1135         UErrorCode status = U_ZERO_ERROR;
1136         Transliterator *t1 = Transliterator::createFromRules("t1",
1137             "a>A;b>B;'foo'+>'bar'", UTRANS_FORWARD, parseError, status);
1138         if (U_FAILURE(status)) {
1139             errln("FAIL: RBT constructor");
1140             return;
1141         }
1142         t2 = t1->clone(); // Call copy constructor under the covers.
1143         expect(*t1, "abcfoofoo", "ABcbar");
1144         delete t1;
1145     }
1146     expect(*t2, "abcfoofoo", "ABcbar");
1147     delete t2;
1148 }
1149
1150 /**
1151  * Test inter-Indic transliterators.  These are composed.
1152  * ICU4C Jitterbug 483.
1153  */
1154 void TransliteratorTest::TestInterIndic(void) {
1155     UnicodeString ID("Devanagari-Gujarati", "");
1156     UErrorCode status = U_ZERO_ERROR;
1157     UParseError parseError;
1158     Transliterator* dg = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
1159     if (dg == 0) {
1160         dataerrln("FAIL: createInstance(" + ID + ") returned NULL - " + u_errorName(status));
1161         return;
1162     }
1163     UnicodeString id = dg->getID();
1164     if (id != ID) {
1165         errln("FAIL: createInstance(" + ID + ")->getID() => " + id);
1166     }
1167     UnicodeString dev = CharsToUnicodeString("\\u0901\\u090B\\u0925");
1168     UnicodeString guj = CharsToUnicodeString("\\u0A81\\u0A8B\\u0AA5");
1169     expect(*dg, dev, guj);
1170     delete dg;
1171 }
1172
1173 /**
1174  * Test filter syntax in IDs. (J918)
1175  */
1176 void TransliteratorTest::TestFilterIDs(void) {
1177     // Array of 3n strings:
1178     // <id>, <inverse id>, <input>, <expected output>
1179     const char* DATA[] = {
1180         "[aeiou]Any-Hex", // ID
1181         "[aeiou]Hex-Any", // expected inverse ID
1182         "quizzical",      // src
1183         "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src)
1184
1185         "[aeiou]Any-Hex;[^5]Hex-Any",
1186         "[^5]Any-Hex;[aeiou]Hex-Any",
1187         "quizzical",
1188         "q\\u0075izzical",
1189
1190         "[abc]Null",
1191         "[abc]Null",
1192         "xyz",
1193         "xyz",
1194     };
1195     enum { DATA_length = UPRV_LENGTHOF(DATA) };
1196
1197     for (int i=0; i<DATA_length; i+=4) {
1198         UnicodeString ID(DATA[i], "");
1199         UnicodeString uID(DATA[i+1], "");
1200         UnicodeString data2(DATA[i+2], "");
1201         UnicodeString data3(DATA[i+3], "");
1202         UParseError parseError;
1203         UErrorCode status = U_ZERO_ERROR;
1204         Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
1205         if (t == 0) {
1206             errln("FAIL: createInstance(" + ID + ") returned NULL");
1207             return;
1208         }
1209         expect(*t, data2, data3);
1210
1211         // Check the ID
1212         if (ID != t->getID()) {
1213             errln("FAIL: createInstance(" + ID + ").getID() => " +
1214                   t->getID());
1215         }
1216
1217         // Check the inverse
1218         Transliterator *u = t->createInverse(status);
1219         if (u == 0) {
1220             errln("FAIL: " + ID + ".createInverse() returned NULL");
1221         } else if (u->getID() != uID) {
1222             errln("FAIL: " + ID + ".createInverse().getID() => " +
1223                   u->getID() + ", expected " + uID);
1224         }
1225
1226         delete t;
1227         delete u;
1228     }
1229 }
1230
1231 /**
1232  * Test the case mapping transliterators.
1233  */
1234 void TransliteratorTest::TestCaseMap(void) {
1235     UParseError parseError;
1236     UErrorCode status = U_ZERO_ERROR;
1237     Transliterator* toUpper =
1238         Transliterator::createInstance("Any-Upper[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1239     Transliterator* toLower =
1240         Transliterator::createInstance("Any-Lower[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1241     Transliterator* toTitle =
1242         Transliterator::createInstance("Any-Title[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1243     if (toUpper==0 || toLower==0 || toTitle==0) {
1244         errln("FAIL: createInstance returned NULL");
1245         delete toUpper;
1246         delete toLower;
1247         delete toTitle;
1248         return;
1249     }
1250
1251     expect(*toUpper, "The quick brown fox jumped over the lazy dogs.",
1252            "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS.");
1253     expect(*toLower, "The quIck brown fOX jUMPED OVER THE LAzY dogs.",
1254            "the quick brown foX jumped over the lazY dogs.");
1255     expect(*toTitle, "the quick brown foX can't jump over the laZy dogs.",
1256            "The Quick Brown FoX Can't Jump Over The LaZy Dogs.");
1257
1258     delete toUpper;
1259     delete toLower;
1260     delete toTitle;
1261 }
1262
1263 /**
1264  * Test the name mapping transliterators.
1265  */
1266 void TransliteratorTest::TestNameMap(void) {
1267     UParseError parseError;
1268     UErrorCode status = U_ZERO_ERROR;
1269     Transliterator* uni2name =
1270         Transliterator::createInstance("Any-Name[^abc]", UTRANS_FORWARD, parseError, status);
1271     Transliterator* name2uni =
1272         Transliterator::createInstance("Name-Any", UTRANS_FORWARD, parseError, status);
1273     if (uni2name==0 || name2uni==0) {
1274         errln("FAIL: createInstance returned NULL");
1275         delete uni2name;
1276         delete name2uni;
1277         return;
1278     }
1279
1280     // Careful:  CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1281     expect(*uni2name, CharsToUnicodeString("\\u00A0abc\\u4E01\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF"),
1282            CharsToUnicodeString("\\\\N{NO-BREAK SPACE}abc\\\\N{CJK UNIFIED IDEOGRAPH-4E01}\\\\N{MICRO SIGN}\\\\N{GUJARATI SIGN CANDRABINDU}\\\\N{REPLACEMENT CHARACTER}\\\\N{<control-0004>}\\\\N{<control-0009>}\\\\N{<control-0081>}\\\\N{<noncharacter-FFFF>}"));
1283     expect(*name2uni, UNICODE_STRING_SIMPLE("{\\N { NO-BREAK SPACE}abc\\N{  CJK UNIFIED  IDEOGRAPH-4E01  }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{"),
1284            CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{"));
1285
1286     delete uni2name;
1287     delete name2uni;
1288
1289     // round trip
1290     Transliterator* t =
1291         Transliterator::createInstance("Any-Name;Name-Any", UTRANS_FORWARD, parseError, status);
1292     if (t==0) {
1293         errln("FAIL: createInstance returned NULL");
1294         delete t;
1295         return;
1296     }
1297
1298     // Careful:  CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1299     UnicodeString s = CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{");
1300     expect(*t, s, s);
1301     delete t;
1302 }
1303
1304 /**
1305  * Test liberalized ID syntax.  1006c
1306  */
1307 void TransliteratorTest::TestLiberalizedID(void) {
1308     // Some test cases have an expected getID() value of NULL.  This
1309     // means I have disabled the test case for now.  This stuff is
1310     // still under development, and I haven't decided whether to make
1311     // getID() return canonical case yet.  It will all get rewritten
1312     // with the move to Source-Target/Variant IDs anyway. [aliu]
1313     const char* DATA[] = {
1314         "latin-greek", NULL /*"Latin-Greek"*/, "case insensitivity",
1315         "  Null  ", "Null", "whitespace",
1316         " Latin[a-z]-Greek  ", "[a-z]Latin-Greek", "inline filter",
1317         "  null  ; latin-greek  ", NULL /*"Null;Latin-Greek"*/, "compound whitespace",
1318     };
1319     const int32_t DATA_length = UPRV_LENGTHOF(DATA);
1320     UParseError parseError;
1321     UErrorCode status= U_ZERO_ERROR;
1322     for (int32_t i=0; i<DATA_length; i+=3) {
1323         Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, parseError, status);
1324         if (t == 0) {
1325             dataerrln(UnicodeString("FAIL: ") + DATA[i+2] +
1326                   " cannot create ID \"" + DATA[i] + "\" - " + u_errorName(status));
1327         } else {
1328             UnicodeString exp;
1329             if (DATA[i+1]) {
1330                 exp = UnicodeString(DATA[i+1], "");
1331             }
1332             // Don't worry about getID() if the expected char*
1333             // is NULL -- see above.
1334             if (exp.length() == 0 || exp == t->getID()) {
1335                 logln(UnicodeString("Ok: ") + DATA[i+2] +
1336                       " create ID \"" + DATA[i] + "\" => \"" +
1337                       exp + "\"");
1338             } else {
1339                 errln(UnicodeString("FAIL: ") + DATA[i+2] +
1340                       " create ID \"" + DATA[i] + "\" => \"" +
1341                       t->getID() + "\", exp \"" + exp + "\"");
1342             }
1343             delete t;
1344         }
1345     }
1346 }
1347
1348 /* test for Jitterbug 912 */
1349 void TransliteratorTest::TestCreateInstance(){
1350     const char* FORWARD = "F";
1351     const char* REVERSE = "R";
1352     const char* DATA[] = {
1353         // Column 1: id
1354         // Column 2: direction
1355         // Column 3: expected ID, or "" if expect failure
1356         "Latin-Hangul", REVERSE, "Hangul-Latin", // JB#912
1357
1358         // JB#2689: bad compound causes crash
1359         "InvalidSource-InvalidTarget", FORWARD, "",
1360         "InvalidSource-InvalidTarget", REVERSE, "",
1361         "Hex-Any;InvalidSource-InvalidTarget", FORWARD, "",
1362         "Hex-Any;InvalidSource-InvalidTarget", REVERSE, "",
1363         "InvalidSource-InvalidTarget;Hex-Any", FORWARD, "",
1364         "InvalidSource-InvalidTarget;Hex-Any", REVERSE, "",
1365
1366         NULL
1367     };
1368
1369     for (int32_t i=0; DATA[i]; i+=3) {
1370         UParseError err;
1371         UErrorCode ec = U_ZERO_ERROR;
1372         UnicodeString id(DATA[i]);
1373         UTransDirection dir = (DATA[i+1]==FORWARD)?
1374             UTRANS_FORWARD:UTRANS_REVERSE;
1375         UnicodeString expID(DATA[i+2]);
1376         Transliterator* t =
1377             Transliterator::createInstance(id,dir,err,ec);
1378         UnicodeString newID;
1379         if (t) {
1380             newID = t->getID();
1381         }
1382         UBool ok = (newID == expID);
1383         if (!t) {
1384             newID = u_errorName(ec);
1385         }
1386         if (ok) {
1387             logln((UnicodeString)"Ok: createInstance(" +
1388                   id + "," + DATA[i+1] + ") => " + newID);
1389         } else {
1390             dataerrln((UnicodeString)"FAIL: createInstance(" +
1391                   id + "," + DATA[i+1] + ") => " + newID +
1392                   ", expected " + expID);
1393         }
1394         delete t;
1395     }
1396 }
1397
1398 /**
1399  * Test the normalization transliterator.
1400  */
1401 void TransliteratorTest::TestNormalizationTransliterator() {
1402     // THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.test.normalizer.BasicTest
1403     // PLEASE KEEP THEM IN SYNC WITH BasicTest.
1404     const char* CANON[] = {
1405         // Input               Decomposed            Composed
1406         "cat",                "cat",                "cat"               ,
1407         "\\u00e0ardvark",      "a\\u0300ardvark",     "\\u00e0ardvark"    ,
1408
1409         "\\u1e0a",             "D\\u0307",            "\\u1e0a"            , // D-dot_above
1410         "D\\u0307",            "D\\u0307",            "\\u1e0a"            , // D dot_above
1411
1412         "\\u1e0c\\u0307",       "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D-dot_below dot_above
1413         "\\u1e0a\\u0323",       "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D-dot_above dot_below
1414         "D\\u0307\\u0323",      "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D dot_below dot_above
1415
1416         "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307","\\u1e10\\u0323\\u0307", // D dot_below cedilla dot_above
1417         "D\\u0307\\u0328\\u0323","D\\u0328\\u0323\\u0307","\\u1e0c\\u0328\\u0307", // D dot_above ogonek dot_below
1418
1419         "\\u1E14",             "E\\u0304\\u0300",      "\\u1E14"            , // E-macron-grave
1420         "\\u0112\\u0300",       "E\\u0304\\u0300",      "\\u1E14"            , // E-macron + grave
1421         "\\u00c8\\u0304",       "E\\u0300\\u0304",      "\\u00c8\\u0304"      , // E-grave + macron
1422
1423         "\\u212b",             "A\\u030a",            "\\u00c5"            , // angstrom_sign
1424         "\\u00c5",             "A\\u030a",            "\\u00c5"            , // A-ring
1425
1426         "\\u00fdffin",         "y\\u0301ffin",        "\\u00fdffin"        ,    //updated with 3.0
1427         "\\u00fd\\uFB03n",      "y\\u0301\\uFB03n",     "\\u00fd\\uFB03n"     , //updated with 3.0
1428
1429         "Henry IV",           "Henry IV",           "Henry IV"          ,
1430         "Henry \\u2163",       "Henry \\u2163",       "Henry \\u2163"      ,
1431
1432         "\\u30AC",             "\\u30AB\\u3099",       "\\u30AC"            , // ga (Katakana)
1433         "\\u30AB\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // ka + ten
1434         "\\uFF76\\uFF9E",       "\\uFF76\\uFF9E",       "\\uFF76\\uFF9E"      , // hw_ka + hw_ten
1435         "\\u30AB\\uFF9E",       "\\u30AB\\uFF9E",       "\\u30AB\\uFF9E"      , // ka + hw_ten
1436         "\\uFF76\\u3099",       "\\uFF76\\u3099",       "\\uFF76\\u3099"      , // hw_ka + ten
1437
1438         "A\\u0300\\u0316",      "A\\u0316\\u0300",      "\\u00C0\\u0316"      ,
1439         0 // end
1440     };
1441
1442     const char* COMPAT[] = {
1443         // Input               Decomposed            Composed
1444         "\\uFB4f",             "\\u05D0\\u05DC",       "\\u05D0\\u05DC"     , // Alef-Lamed vs. Alef, Lamed
1445
1446         "\\u00fdffin",         "y\\u0301ffin",        "\\u00fdffin"        ,    //updated for 3.0
1447         "\\u00fd\\uFB03n",      "y\\u0301ffin",        "\\u00fdffin"        , // ffi ligature -> f + f + i
1448
1449         "Henry IV",           "Henry IV",           "Henry IV"          ,
1450         "Henry \\u2163",       "Henry IV",           "Henry IV"          ,
1451
1452         "\\u30AC",             "\\u30AB\\u3099",       "\\u30AC"            , // ga (Katakana)
1453         "\\u30AB\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // ka + ten
1454
1455         "\\uFF76\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // hw_ka + ten
1456         0 // end
1457     };
1458
1459     int32_t i;
1460     UParseError parseError;
1461     UErrorCode status = U_ZERO_ERROR;
1462     Transliterator* NFD = Transliterator::createInstance("NFD", UTRANS_FORWARD, parseError, status);
1463     Transliterator* NFC = Transliterator::createInstance("NFC", UTRANS_FORWARD, parseError, status);
1464     if (!NFD || !NFC) {
1465         dataerrln("FAIL: createInstance failed: %s", u_errorName(status));
1466         delete NFD;
1467         delete NFC;
1468         return;
1469     }
1470     for (i=0; CANON[i]; i+=3) {
1471         UnicodeString in = CharsToUnicodeString(CANON[i]);
1472         UnicodeString expd = CharsToUnicodeString(CANON[i+1]);
1473         UnicodeString expc = CharsToUnicodeString(CANON[i+2]);
1474         expect(*NFD, in, expd);
1475         expect(*NFC, in, expc);
1476     }
1477     delete NFD;
1478     delete NFC;
1479
1480     Transliterator* NFKD = Transliterator::createInstance("NFKD", UTRANS_FORWARD, parseError, status);
1481     Transliterator* NFKC = Transliterator::createInstance("NFKC", UTRANS_FORWARD, parseError, status);
1482     if (!NFKD || !NFKC) {
1483         dataerrln("FAIL: createInstance failed");
1484         delete NFKD;
1485         delete NFKC;
1486         return;
1487     }
1488     for (i=0; COMPAT[i]; i+=3) {
1489         UnicodeString in = CharsToUnicodeString(COMPAT[i]);
1490         UnicodeString expkd = CharsToUnicodeString(COMPAT[i+1]);
1491         UnicodeString expkc = CharsToUnicodeString(COMPAT[i+2]);
1492         expect(*NFKD, in, expkd);
1493         expect(*NFKC, in, expkc);
1494     }
1495     delete NFKD;
1496     delete NFKC;
1497
1498     UParseError pe;
1499     status = U_ZERO_ERROR;
1500     Transliterator *t = Transliterator::createInstance("NFD; [x]Remove",
1501                                                        UTRANS_FORWARD,
1502                                                        pe, status);
1503     if (t == 0) {
1504         errln("FAIL: createInstance failed");
1505     }
1506     expect(*t, CharsToUnicodeString("\\u010dx"),
1507            CharsToUnicodeString("c\\u030C"));
1508     delete t;
1509 }
1510
1511 /**
1512  * Test compound RBT rules.
1513  */
1514 void TransliteratorTest::TestCompoundRBT(void) {
1515     // Careful with spacing and ';' here:  Phrase this exactly
1516     // as toRules() is going to return it.  If toRules() changes
1517     // with regard to spacing or ';', then adjust this string.
1518     UnicodeString rule("::Hex-Any;\n"
1519                        "::Any-Lower;\n"
1520                        "a > '.A.';\n"
1521                        "b > '.B.';\n"
1522                        "::[^t]Any-Upper;", "");
1523     UParseError parseError;
1524     UErrorCode status = U_ZERO_ERROR;
1525     Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, parseError, status);
1526     if (t == 0) {
1527         errln("FAIL: createFromRules failed");
1528         return;
1529     }
1530     expect(*t, UNICODE_STRING_SIMPLE("\\u0043at in the hat, bat on the mat"),
1531            "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");
1532     UnicodeString r;
1533     t->toRules(r, TRUE);
1534     if (r == rule) {
1535         logln((UnicodeString)"OK: toRules() => " + r);
1536     } else {
1537         errln((UnicodeString)"FAIL: toRules() => " + r +
1538               ", expected " + rule);
1539     }
1540     delete t;
1541
1542     // Now test toRules
1543     t = Transliterator::createInstance("Greek-Latin; Latin-Cyrillic", UTRANS_FORWARD, parseError, status);
1544     if (t == 0) {
1545         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
1546         return;
1547     }
1548     UnicodeString exp("::Greek-Latin;\n::Latin-Cyrillic;");
1549     t->toRules(r, TRUE);
1550     if (r != exp) {
1551         errln((UnicodeString)"FAIL: toRules() => " + r +
1552               ", expected " + exp);
1553     } else {
1554         logln((UnicodeString)"OK: toRules() => " + r);
1555     }
1556     delete t;
1557
1558     // Round trip the result of toRules
1559     t = Transliterator::createFromRules("Test", r, UTRANS_FORWARD, parseError, status);
1560     if (t == 0) {
1561         errln("FAIL: createFromRules #2 failed");
1562         return;
1563     } else {
1564         logln((UnicodeString)"OK: createFromRules(" + r + ") succeeded");
1565     }
1566
1567     // Test toRules again
1568     t->toRules(r, TRUE);
1569     if (r != exp) {
1570         errln((UnicodeString)"FAIL: toRules() => " + r +
1571               ", expected " + exp);
1572     } else {
1573         logln((UnicodeString)"OK: toRules() => " + r);
1574     }
1575
1576     delete t;
1577
1578     // Test Foo(Bar) IDs.  Careful with spacing in id; make it conform
1579     // to what the regenerated ID will look like.
1580     UnicodeString id("Upper(Lower);(NFKC)", "");
1581     t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
1582     if (t == 0) {
1583         errln("FAIL: createInstance #2 failed");
1584         return;
1585     }
1586     if (t->getID() == id) {
1587         logln((UnicodeString)"OK: created " + id);
1588     } else {
1589         errln((UnicodeString)"FAIL: createInstance(" + id +
1590               ").getID() => " + t->getID());
1591     }
1592
1593     Transliterator *u = t->createInverse(status);
1594     if (u == 0) {
1595         errln("FAIL: createInverse failed");
1596         delete t;
1597         return;
1598     }
1599     exp = "NFKC();Lower(Upper)";
1600     if (u->getID() == exp) {
1601         logln((UnicodeString)"OK: createInverse(" + id + ") => " +
1602               u->getID());
1603     } else {
1604         errln((UnicodeString)"FAIL: createInverse(" + id + ") => " +
1605               u->getID());
1606     }
1607     delete t;
1608     delete u;
1609 }
1610
1611 /**
1612  * Compound filter semantics were orginially not implemented
1613  * correctly.  Originally, each component filter f(i) is replaced by
1614  * f'(i) = f(i) && g, where g is the filter for the compound
1615  * transliterator.
1616  *
1617  * From Mark:
1618  *
1619  * Suppose and I have a transliterator X. Internally X is
1620  * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A].
1621  *
1622  * The compound should convert all greek characters (through latin) to
1623  * cyrillic, then lowercase the result. The filter should say "don't
1624  * touch 'A' in the original". But because an intermediate result
1625  * happens to go through "A", the Greek Alpha gets hung up.
1626  */
1627 void TransliteratorTest::TestCompoundFilter(void) {
1628     UParseError parseError;
1629     UErrorCode status = U_ZERO_ERROR;
1630     Transliterator *t = Transliterator::createInstance
1631         ("Greek-Latin; Latin-Greek; Lower", UTRANS_FORWARD, parseError, status);
1632     if (t == 0) {
1633         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
1634         return;
1635     }
1636     t->adoptFilter(new UnicodeSet("[^A]", status));
1637     if (U_FAILURE(status)) {
1638         errln("FAIL: UnicodeSet ct failed");
1639         delete t;
1640         return;
1641     }
1642
1643     // Only the 'A' at index 1 should remain unchanged
1644     expect(*t,
1645            CharsToUnicodeString("BA\\u039A\\u0391"),
1646            CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1"));
1647     delete t;
1648 }
1649
1650 void TransliteratorTest::TestRemove(void) {
1651     UParseError parseError;
1652     UErrorCode status = U_ZERO_ERROR;
1653     Transliterator *t = Transliterator::createInstance("Remove[abc]", UTRANS_FORWARD, parseError, status);
1654     if (t == 0) {
1655         errln("FAIL: createInstance failed");
1656         return;
1657     }
1658
1659     expect(*t, "Able bodied baker's cats", "Ale odied ker's ts");
1660
1661     // extra test for RemoveTransliterator::clone(), which at one point wasn't
1662     // duplicating the filter
1663     Transliterator* t2 = t->clone();
1664     expect(*t2, "Able bodied baker's cats", "Ale odied ker's ts");
1665
1666     delete t;
1667     delete t2;
1668 }
1669
1670 void TransliteratorTest::TestToRules(void) {
1671     const char* RBT = "rbt";
1672     const char* SET = "set";
1673     static const char* DATA[] = {
1674         RBT,
1675         "$a=\\u4E61; [$a] > A;",
1676         "[\\u4E61] > A;",
1677
1678         RBT,
1679         "$white=[[:Zs:][:Zl:]]; $white{a} > A;",
1680         "[[:Zs:][:Zl:]]{a} > A;",
1681
1682         SET,
1683         "[[:Zs:][:Zl:]]",
1684         "[[:Zs:][:Zl:]]",
1685
1686         SET,
1687         "[:Ps:]",
1688         "[:Ps:]",
1689
1690         SET,
1691         "[:L:]",
1692         "[:L:]",
1693
1694         SET,
1695         "[[:L:]-[A]]",
1696         "[[:L:]-[A]]",
1697
1698         SET,
1699         "[~[:Lu:][:Ll:]]",
1700         "[~[:Lu:][:Ll:]]",
1701
1702         SET,
1703         "[~[a-z]]",
1704         "[~[a-z]]",
1705
1706         RBT,
1707         "$white=[:Zs:]; $black=[^$white]; $black{a} > A;",
1708         "[^[:Zs:]]{a} > A;",
1709
1710         RBT,
1711         "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;",
1712         "[[a-z]-[:Zs:]]{a} > A;",
1713
1714         RBT,
1715         "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;",
1716         "[[:Zs:]&[a-z]]{a} > A;",
1717
1718         RBT,
1719         "$a=[:Zs:]; $b=[x$a]; $b{a} > A;",
1720         "[x[:Zs:]]{a} > A;",
1721
1722         RBT,
1723         "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"
1724         "$macron = \\u0304 ;"
1725         "$evowel = [aeiouyAEIOUY] ;"
1726         "$iotasub = \\u0345 ;"
1727         "($evowel $macron $accentMinus *) i > | $1 $iotasub ;",
1728         "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;",
1729
1730         RBT,
1731         "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1732         "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1733     };
1734     static const int32_t DATA_length = UPRV_LENGTHOF(DATA);
1735
1736     for (int32_t d=0; d < DATA_length; d+=3) {
1737         if (DATA[d] == RBT) {
1738             // Transliterator test
1739             UParseError parseError;
1740             UErrorCode status = U_ZERO_ERROR;
1741             Transliterator *t = Transliterator::createFromRules("ID",
1742                                                                 UnicodeString(DATA[d+1], -1, US_INV), UTRANS_FORWARD, parseError, status);
1743             if (t == 0) {
1744                 dataerrln("FAIL: createFromRules failed - %s", u_errorName(status));
1745                 return;
1746             }
1747             UnicodeString rules, escapedRules;
1748             t->toRules(rules, FALSE);
1749             t->toRules(escapedRules, TRUE);
1750             UnicodeString expRules = CharsToUnicodeString(DATA[d+2]);
1751             UnicodeString expEscapedRules(DATA[d+2], -1, US_INV);
1752             if (rules == expRules) {
1753                 logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
1754                       " => " + rules);
1755             } else {
1756                 errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
1757                       " => " + rules + ", exp " + expRules);
1758             }
1759             if (escapedRules == expEscapedRules) {
1760                 logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
1761                       " => " + escapedRules);
1762             } else {
1763                 errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
1764                       " => " + escapedRules + ", exp " + expEscapedRules);
1765             }
1766             delete t;
1767
1768         } else {
1769             // UnicodeSet test
1770             UErrorCode status = U_ZERO_ERROR;
1771             UnicodeString pat(DATA[d+1], -1, US_INV);
1772             UnicodeString expToPat(DATA[d+2], -1, US_INV);
1773             UnicodeSet set(pat, status);
1774             if (U_FAILURE(status)) {
1775                 errln("FAIL: UnicodeSet ct failed");
1776                 return;
1777             }
1778             // Adjust spacing etc. as necessary.
1779             UnicodeString toPat;
1780             set.toPattern(toPat);
1781             if (expToPat == toPat) {
1782                 logln((UnicodeString)"Ok: " + pat +
1783                       " => " + toPat);
1784             } else {
1785                 errln((UnicodeString)"FAIL: " + pat +
1786                       " => " + prettify(toPat, TRUE) +
1787                       ", exp " + prettify(pat, TRUE));
1788             }
1789         }
1790     }
1791 }
1792
1793 void TransliteratorTest::TestContext() {
1794     UTransPosition pos = {0, 2, 0, 1}; // cs cl s l
1795     expect("de > x; {d}e > y;",
1796            "de",
1797            "ye",
1798            &pos);
1799
1800     expect("ab{c} > z;",
1801            "xadabdabcy",
1802            "xadabdabzy");
1803 }
1804
1805 void TransliteratorTest::TestSupplemental() {
1806
1807     expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];"
1808                                 "a > $a; $s > i;"),
1809            CharsToUnicodeString("ab\\U0001030Fx"),
1810            CharsToUnicodeString("\\U00010300bix"));
1811
1812     expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];"
1813                                 "$b=[A-Z\\U00010400-\\U0001044D];"
1814                                 "($a)($b) > $2 $1;"),
1815            CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"),
1816            CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301"));
1817
1818     // k|ax\\U00010300xm
1819
1820     // k|a\\U00010400\\U00010300xm
1821     // ky|\\U00010400\\U00010300xm
1822     // ky\\U00010400|\\U00010300xm
1823
1824     // ky\\U00010400|\\U00010300\\U00010400m
1825     // ky\\U00010400y|\\U00010400m
1826     expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];"
1827                                 "$a {x} > | @ \\U00010400;"
1828                                 "{$a} [^\\u0000-\\uFFFF] > y;"),
1829            CharsToUnicodeString("kax\\U00010300xm"),
1830            CharsToUnicodeString("ky\\U00010400y\\U00010400m"));
1831
1832     expectT("Any-Name",
1833            CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"),
1834            UNICODE_STRING_SIMPLE("\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}"));
1835
1836     expectT("Any-Hex/Unicode",
1837            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1838            UNICODE_STRING_SIMPLE("U+10330U+10FF00U+E0061U+00A0"));
1839
1840     expectT("Any-Hex/C",
1841            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1842            UNICODE_STRING_SIMPLE("\\U00010330\\U0010FF00\\U000E0061\\u00A0"));
1843
1844     expectT("Any-Hex/Perl",
1845            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1846            UNICODE_STRING_SIMPLE("\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}"));
1847
1848     expectT("Any-Hex/Java",
1849            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1850            UNICODE_STRING_SIMPLE("\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0"));
1851
1852     expectT("Any-Hex/XML",
1853            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1854            "&#x10330;&#x10FF00;&#xE0061;&#xA0;");
1855
1856     expectT("Any-Hex/XML10",
1857            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1858            "&#66352;&#1113856;&#917601;&#160;");
1859
1860     expectT(UNICODE_STRING_SIMPLE("[\\U000E0000-\\U000E0FFF] Remove"),
1861            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1862            CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0"));
1863 }
1864
1865 void TransliteratorTest::TestQuantifier() {
1866
1867     // Make sure @ in a quantified anteContext works
1868     expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';",
1869            "AAAAAb",
1870            "aaa(aac)");
1871
1872     // Make sure @ in a quantified postContext works
1873     expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';",
1874            "baaaaa",
1875            "caa(aaa)");
1876
1877     // Make sure @ in a quantified postContext with seg ref works
1878     expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';",
1879            "baaaaa",
1880            "baa(aaa)");
1881
1882     // Make sure @ past ante context doesn't enter ante context
1883     UTransPosition pos = {0, 5, 3, 5};
1884     expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';",
1885            "xxxab",
1886            "xxx(ac)",
1887            &pos);
1888
1889     // Make sure @ past post context doesn't pass limit
1890     UTransPosition pos2 = {0, 4, 0, 2};
1891     expect("{b} a+ > c @@ |; x > y; a > A;",
1892            "baxx",
1893            "caxx",
1894            &pos2);
1895
1896     // Make sure @ past post context doesn't enter post context
1897     expect("{b} a+ > c @@ |; x > y; a > A;",
1898            "baxx",
1899            "cayy");
1900
1901     expect("(ab)? c > d;",
1902            "c abc ababc",
1903            "d d abd");
1904
1905     // NOTE: The (ab)+ when referenced just yields a single "ab",
1906     // not the full sequence of them.  This accords with perl behavior.
1907     expect("(ab)+ {x} > '(' $1 ')';",
1908            "x abx ababxy",
1909            "x ab(ab) abab(ab)y");
1910
1911     expect("b+ > x;",
1912            "ac abc abbc abbbc",
1913            "ac axc axc axc");
1914
1915     expect("[abc]+ > x;",
1916            "qac abrc abbcs abtbbc",
1917            "qx xrx xs xtx");
1918
1919     expect("q{(ab)+} > x;",
1920            "qa qab qaba qababc qaba",
1921            "qa qx qxa qxc qxa");
1922
1923     expect("q(ab)* > x;",
1924            "qa qab qaba qababc",
1925            "xa x xa xc");
1926
1927     // NOTE: The (ab)+ when referenced just yields a single "ab",
1928     // not the full sequence of them.  This accords with perl behavior.
1929     expect("q(ab)* > '(' $1 ')';",
1930            "qa qab qaba qababc",
1931            "()a (ab) (ab)a (ab)c");
1932
1933     // 'foo'+ and 'foo'* -- the quantifier should apply to the entire
1934     // quoted string
1935     expect("'ab'+ > x;",
1936            "bb ab ababb",
1937            "bb x xb");
1938
1939     // $foo+ and $foo* -- the quantifier should apply to the entire
1940     // variable reference
1941     expect("$var = ab; $var+ > x;",
1942            "bb ab ababb",
1943            "bb x xb");
1944 }
1945
1946 class TestTrans : public Transliterator {
1947 public:
1948     TestTrans(const UnicodeString& id) : Transliterator(id, 0) {
1949     }
1950     virtual Transliterator* clone(void) const {
1951         return new TestTrans(getID());
1952     }
1953     virtual void handleTransliterate(Replaceable& /*text*/, UTransPosition& offsets,
1954         UBool /*isIncremental*/) const
1955     {
1956         offsets.start = offsets.limit;
1957     }
1958     virtual UClassID getDynamicClassID() const;
1959     static UClassID U_EXPORT2 getStaticClassID();
1960 };
1961 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TestTrans)
1962
1963 /**
1964  * Test Source-Target/Variant.
1965  */
1966 void TransliteratorTest::TestSTV(void) {
1967     int32_t ns = Transliterator::countAvailableSources();
1968     if (ns < 0 || ns > 255) {
1969         errln((UnicodeString)"FAIL: Bad source count: " + ns);
1970         return;
1971     }
1972     int32_t i, j;
1973     for (i=0; i<ns; ++i) {
1974         UnicodeString source;
1975         Transliterator::getAvailableSource(i, source);
1976         logln((UnicodeString)"" + i + ": " + source);
1977         if (source.length() == 0) {
1978             errln("FAIL: empty source");
1979             continue;
1980         }
1981         int32_t nt = Transliterator::countAvailableTargets(source);
1982         if (nt < 0 || nt > 255) {
1983             errln((UnicodeString)"FAIL: Bad target count: " + nt);
1984             continue;
1985         }
1986         for (int32_t j=0; j<nt; ++j) {
1987             UnicodeString target;
1988             Transliterator::getAvailableTarget(j, source, target);
1989             logln((UnicodeString)" " + j + ": " + target);
1990             if (target.length() == 0) {
1991                 errln("FAIL: empty target");
1992                 continue;
1993             }
1994             int32_t nv = Transliterator::countAvailableVariants(source, target);
1995             if (nv < 0 || nv > 255) {
1996                 errln((UnicodeString)"FAIL: Bad variant count: " + nv);
1997                 continue;
1998             }
1999             for (int32_t k=0; k<nv; ++k) {
2000                 UnicodeString variant;
2001                 Transliterator::getAvailableVariant(k, source, target, variant);
2002                 if (variant.length() == 0) {
2003                     logln((UnicodeString)"  " + k + ": <empty>");
2004                 } else {
2005                     logln((UnicodeString)"  " + k + ": " + variant);
2006                 }
2007             }
2008         }
2009     }
2010
2011     // Test registration
2012     const char* IDS[] = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2013     const char* FULL_IDS[] = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2014     const char* SOURCES[] = { NULL, "Seoridf", "Oewoir" };
2015     for (i=0; i<3; ++i) {
2016         Transliterator *t = new TestTrans(IDS[i]);
2017         if (t == 0) {
2018             errln("FAIL: out of memory");
2019             return;
2020         }
2021         if (t->getID() != IDS[i]) {
2022             errln((UnicodeString)"FAIL: ID mismatch for " + IDS[i]);
2023             delete t;
2024             return;
2025         }
2026         Transliterator::registerInstance(t);
2027         UErrorCode status = U_ZERO_ERROR;
2028         t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
2029         if (t == NULL) {
2030             errln((UnicodeString)"FAIL: Registration/creation failed for ID " +
2031                   IDS[i]);
2032         } else {
2033             logln((UnicodeString)"Ok: Registration/creation succeeded for ID " +
2034                   IDS[i]);
2035             delete t;
2036         }
2037         Transliterator::unregister(IDS[i]);
2038         t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
2039         if (t != NULL) {
2040             errln((UnicodeString)"FAIL: Unregistration failed for ID " +
2041                   IDS[i]);
2042             delete t;
2043         }
2044     }
2045
2046     // Make sure getAvailable API reflects removal
2047     int32_t n = Transliterator::countAvailableIDs();
2048     for (i=0; i<n; ++i) {
2049         UnicodeString id = Transliterator::getAvailableID(i);
2050         for (j=0; j<3; ++j) {
2051             if (id.caseCompare(FULL_IDS[j],0)==0) {
2052                 errln((UnicodeString)"FAIL: unregister(" + id + ") failed");
2053             }
2054         }
2055     }
2056     n = Transliterator::countAvailableTargets("Any");
2057     for (i=0; i<n; ++i) {
2058         UnicodeString t;
2059         Transliterator::getAvailableTarget(i, "Any", t);
2060         if (t.caseCompare(IDS[0],0)==0) {
2061             errln((UnicodeString)"FAIL: unregister(Any-" + t + ") failed");
2062         }
2063     }
2064     n = Transliterator::countAvailableSources();
2065     for (i=0; i<n; ++i) {
2066         UnicodeString s;
2067         Transliterator::getAvailableSource(i, s);
2068         for (j=0; j<3; ++j) {
2069             if (SOURCES[j] == NULL) continue;
2070             if (s.caseCompare(SOURCES[j],0)==0) {
2071                 errln((UnicodeString)"FAIL: unregister(" + s + "-*) failed");
2072             }
2073         }
2074     }
2075 }
2076
2077 /**
2078  * Test inverse of Greek-Latin; Title()
2079  */
2080 void TransliteratorTest::TestCompoundInverse(void) {
2081     UParseError parseError;
2082     UErrorCode status = U_ZERO_ERROR;
2083     Transliterator *t = Transliterator::createInstance
2084         ("Greek-Latin; Title()", UTRANS_REVERSE,parseError, status);
2085     if (t == 0) {
2086         dataerrln("FAIL: createInstance - %s", u_errorName(status));
2087         return;
2088     }
2089     UnicodeString exp("(Title);Latin-Greek");
2090     if (t->getID() == exp) {
2091         logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" +
2092               t->getID());
2093     } else {
2094         errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" +
2095               t->getID() + "\", expected \"" + exp + "\"");
2096     }
2097     delete t;
2098 }
2099
2100 /**
2101  * Test NFD chaining with RBT
2102  */
2103 void TransliteratorTest::TestNFDChainRBT() {
2104     UParseError pe;
2105     UErrorCode ec = U_ZERO_ERROR;
2106     Transliterator* t = Transliterator::createFromRules(
2107                                "TEST", "::NFD; aa > Q; a > q;",
2108                                UTRANS_FORWARD, pe, ec);
2109     if (t == NULL || U_FAILURE(ec)) {
2110         dataerrln("FAIL: Transliterator::createFromRules failed with %s", u_errorName(ec));
2111         return;
2112     }
2113     expect(*t, "aa", "Q");
2114     delete t;
2115
2116     // TEMPORARY TESTS -- BEING DEBUGGED
2117 //=-    UnicodeString s, s2;
2118 //=-    t = Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, pe, ec);
2119 //=-    s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2120 //=-    s2 = CharsToUnicodeString("\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D");
2121 //=-    expect(*t, s, s2);
2122 //=-    delete t;
2123 //=-
2124 //=-    t = Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2125 //=-    expect(*t, s2, s);
2126 //=-    delete t;
2127 //=-
2128 //=-    t = Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2129 //=-    s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2130 //=-    expect(*t, s, s);
2131 //=-    delete t;
2132
2133 //    const char* source[] = {
2134 //        /*
2135 //        "\\u015Br\\u012Bmad",
2136 //        "bhagavadg\\u012Bt\\u0101",
2137 //        "adhy\\u0101ya",
2138 //        "arjuna",
2139 //        "vi\\u1E63\\u0101da",
2140 //        "y\\u014Dga",
2141 //        "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2142 //        "uv\\u0101cr\\u0325",
2143 //        */
2144 //        "rmk\\u1E63\\u0113t",
2145 //      //"dharmak\\u1E63\\u0113tr\\u0113",
2146 //        /*
2147 //        "kuruk\\u1E63\\u0113tr\\u0113",
2148 //        "samav\\u0113t\\u0101",
2149 //        "yuyutsava-\\u1E25",
2150 //        "m\\u0101mak\\u0101-\\u1E25",
2151 //     // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2152 //        "kimakurvata",
2153 //        "san\\u0304java",
2154 //        */
2155 //
2156 //        0
2157 //    };
2158 //    const char* expected[] = {
2159 //        /*
2160 //        "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2161 //        "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2162 //        "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2163 //        "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2164 //        "\\u0935\\u093f\\u0937\\u093e\\u0926",
2165 //        "\\u092f\\u094b\\u0917",
2166 //        "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2167 //        "\\u0909\\u0935\\u093E\\u091A\\u0943",
2168 //        */
2169 //        "\\u0927",
2170 //        //"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2171 //        /*
2172 //        "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2173 //        "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2174 //        "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2175 //        "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2176 //    //  "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2177 //        "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2178 //        "\\u0938\\u0902\\u091c\\u0935",
2179 //        */
2180 //        0
2181 //    };
2182 //    UErrorCode status = U_ZERO_ERROR;
2183 //    UParseError parseError;
2184 //    UnicodeString message;
2185 //    Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2186 //    Transliterator* devToLatinToDev=Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2187 //    if(U_FAILURE(status)){
2188 //        errln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2189 //        errln("PreContext: " + prettify(parseError.preContext) + "PostContext: " + prettify( parseError.postContext) );
2190 //        delete latinToDevToLatin;
2191 //        delete devToLatinToDev;
2192 //        return;
2193 //    }
2194 //    UnicodeString gotResult;
2195 //    for(int i= 0; source[i] != 0; i++){
2196 //        gotResult = source[i];
2197 //        expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2198 //        expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2199 //    }
2200 //    delete latinToDevToLatin;
2201 //    delete devToLatinToDev;
2202 }
2203
2204 /**
2205  * Inverse of "Null" should be "Null". (J21)
2206  */
2207 void TransliteratorTest::TestNullInverse() {
2208     UParseError pe;
2209     UErrorCode ec = U_ZERO_ERROR;
2210     Transliterator *t = Transliterator::createInstance("Null", UTRANS_FORWARD, pe, ec);
2211     if (t == 0 || U_FAILURE(ec)) {
2212         errln("FAIL: createInstance");
2213         return;
2214     }
2215     Transliterator *u = t->createInverse(ec);
2216     if (u == 0 || U_FAILURE(ec)) {
2217         errln("FAIL: createInverse");
2218         delete t;
2219         return;
2220     }
2221     if (u->getID() != "Null") {
2222         errln("FAIL: Inverse of Null should be Null");
2223     }
2224     delete t;
2225     delete u;
2226 }
2227
2228 /**
2229  * Check ID of inverse of alias. (J22)
2230  */
2231 void TransliteratorTest::TestAliasInverseID() {
2232     UnicodeString ID("Latin-Hangul", ""); // This should be any alias ID with an inverse
2233     UParseError pe;
2234     UErrorCode ec = U_ZERO_ERROR;
2235     Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
2236     if (t == 0 || U_FAILURE(ec)) {
2237         dataerrln("FAIL: createInstance - %s", u_errorName(ec));
2238         return;
2239     }
2240     Transliterator *u = t->createInverse(ec);
2241     if (u == 0 || U_FAILURE(ec)) {
2242         errln("FAIL: createInverse");
2243         delete t;
2244         return;
2245     }
2246     UnicodeString exp = "Hangul-Latin";
2247     UnicodeString got = u->getID();
2248     if (got != exp) {
2249         errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
2250               ", expected " + exp);
2251     }
2252     delete t;
2253     delete u;
2254 }
2255
2256 /**
2257  * Test IDs of inverses of compound transliterators. (J20)
2258  */
2259 void TransliteratorTest::TestCompoundInverseID() {
2260     UnicodeString ID = "Latin-Jamo;NFC(NFD)";
2261     UParseError pe;
2262     UErrorCode ec = U_ZERO_ERROR;
2263     Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
2264     if (t == 0 || U_FAILURE(ec)) {
2265         dataerrln("FAIL: createInstance - %s", u_errorName(ec));
2266         return;
2267     }
2268     Transliterator *u = t->createInverse(ec);
2269     if (u == 0 || U_FAILURE(ec)) {
2270         errln("FAIL: createInverse");
2271         delete t;
2272         return;
2273     }
2274     UnicodeString exp = "NFD(NFC);Jamo-Latin";
2275     UnicodeString got = u->getID();
2276     if (got != exp) {
2277         errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
2278               ", expected " + exp);
2279     }
2280     delete t;
2281     delete u;
2282 }
2283
2284 /**
2285  * Test undefined variable.
2286
2287  */
2288 void TransliteratorTest::TestUndefinedVariable() {
2289     UnicodeString rule = "$initial } a <> \\u1161;";
2290     UParseError pe;
2291     UErrorCode ec = U_ZERO_ERROR;
2292     Transliterator *t = Transliterator::createFromRules("<ID>", rule, UTRANS_FORWARD, pe, ec);
2293     delete t;
2294     if (U_FAILURE(ec)) {
2295         logln((UnicodeString)"OK: Got exception for " + rule + ", as expected: " +
2296               u_errorName(ec));
2297         return;
2298     }
2299     errln((UnicodeString)"Fail: bogus rule " + rule + " compiled with error " +
2300           u_errorName(ec));
2301 }
2302
2303 /**
2304  * Test empty context.
2305  */
2306 void TransliteratorTest::TestEmptyContext() {
2307     expect(" { a } > b;", "xay a ", "xby b ");
2308 }
2309
2310 /**
2311 * Test compound filter ID syntax
2312 */
2313 void TransliteratorTest::TestCompoundFilterID(void) {
2314     static const char* DATA[] = {
2315         // Col. 1 = ID or rule set (latter must start with #)
2316
2317         // = columns > 1 are null if expect col. 1 to be illegal =
2318
2319         // Col. 2 = direction, "F..." or "R..."
2320         // Col. 3 = source string
2321         // Col. 4 = exp result
2322
2323         "[abc]; [abc]", NULL, NULL, NULL, // multiple filters
2324         "Latin-Greek; [abc];", NULL, NULL, NULL, // misplaced filter
2325         "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\\u0392c",
2326         "[b]; (Lower); Latin-Greek; Upper(); ([\\u0392])", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2327         "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\\u0392c",
2328         "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\\u0392]);", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2329         NULL,
2330     };
2331
2332     for (int32_t i=0; DATA[i]; i+=4) {
2333         UnicodeString id = CharsToUnicodeString(DATA[i]);
2334         UTransDirection direction = (DATA[i+1] != NULL && DATA[i+1][0] == 'R') ?
2335             UTRANS_REVERSE : UTRANS_FORWARD;
2336         UnicodeString source;
2337         UnicodeString exp;
2338         if (DATA[i+2] != NULL) {
2339             source = CharsToUnicodeString(DATA[i+2]);
2340             exp = CharsToUnicodeString(DATA[i+3]);
2341         }
2342         UBool expOk = (DATA[i+1] != NULL);
2343         Transliterator* t = NULL;
2344         UParseError pe;
2345         UErrorCode ec = U_ZERO_ERROR;
2346         if (id.charAt(0) == 0x23/*#*/) {
2347             t = Transliterator::createFromRules("ID", id, direction, pe, ec);
2348         } else {
2349             t = Transliterator::createInstance(id, direction, pe, ec);
2350         }
2351         UBool ok = (t != NULL && U_SUCCESS(ec));
2352         UnicodeString transID;
2353         if (t!=0) {
2354             transID = t->getID();
2355         }
2356         else {
2357             transID = UnicodeString("NULL", "");
2358         }
2359         if (ok == expOk) {
2360             logln((UnicodeString)"Ok: " + id + " => " + transID + ", " +
2361                   u_errorName(ec));
2362             if (source.length() != 0) {
2363                 expect(*t, source, exp);
2364             }
2365             delete t;
2366         } else {
2367             dataerrln((UnicodeString)"FAIL: " + id + " => " + transID + ", " +
2368                   u_errorName(ec));
2369         }
2370     }
2371 }
2372
2373 /**
2374  * Test new property set syntax
2375  */
2376 void TransliteratorTest::TestPropertySet() {
2377     expect(UNICODE_STRING_SIMPLE("a>A; \\p{Lu}>x; \\p{ANY}>y;"), "abcDEF", "Ayyxxx");
2378     expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9",
2379            "[ a stitch ]\n[ in time ]\r[ saves 9]");
2380 }
2381
2382 /**
2383  * Test various failure points of the new 2.0 engine.
2384  */
2385 void TransliteratorTest::TestNewEngine() {
2386     UParseError pe;
2387     UErrorCode ec = U_ZERO_ERROR;
2388     Transliterator *t = Transliterator::createInstance("Latin-Hiragana", UTRANS_FORWARD, pe, ec);
2389     if (t == 0 || U_FAILURE(ec)) {
2390         dataerrln("FAIL: createInstance Latin-Hiragana - %s", u_errorName(ec));
2391         return;
2392     }
2393     // Katakana should be untouched
2394     expect(*t, CharsToUnicodeString("a\\u3042\\u30A2"),
2395            CharsToUnicodeString("\\u3042\\u3042\\u30A2"));
2396
2397     delete t;
2398
2399 #if 1
2400     // This test will only work if Transliterator.ROLLBACK is
2401     // true.  Otherwise, this test will fail, revealing a
2402     // limitation of global filters in incremental mode.
2403     Transliterator *a =
2404         Transliterator::createFromRules("a_to_A", "a > A;", UTRANS_FORWARD, pe, ec);
2405     Transliterator *A =
2406         Transliterator::createFromRules("A_to_b", "A > b;", UTRANS_FORWARD, pe, ec);
2407     if (U_FAILURE(ec)) {
2408         delete a;
2409         delete A;
2410         return;
2411     }
2412
2413     Transliterator* array[3];
2414     array[0] = a;
2415     array[1] = Transliterator::createInstance("NFD", UTRANS_FORWARD, pe, ec);
2416     array[2] = A;
2417     if (U_FAILURE(ec)) {
2418         errln("FAIL: createInstance NFD");
2419         delete a;
2420         delete A;
2421         delete array[1];
2422         return;
2423     }
2424
2425     t = new CompoundTransliterator(array, 3, new UnicodeSet("[:Ll:]", ec));
2426     if (U_FAILURE(ec)) {
2427         errln("FAIL: UnicodeSet constructor");
2428         delete a;
2429         delete A;
2430         delete array[1];
2431         delete t;
2432         return;
2433     }
2434
2435     expect(*t, "aAaA", "bAbA");
2436
2437     assertTrue("countElements", t->countElements() == 3);
2438     assertEquals("getElement(0)", t->getElement(0, ec).getID(), "a_to_A");
2439     assertEquals("getElement(1)", t->getElement(1, ec).getID(), "NFD");
2440     assertEquals("getElement(2)", t->getElement(2, ec).getID(), "A_to_b");
2441     assertSuccess("getElement", ec);
2442
2443     delete a;
2444     delete A;
2445     delete array[1];
2446     delete t;
2447 #endif
2448
2449     expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;",
2450            "a",
2451            "ax");
2452
2453     UnicodeString gr = CharsToUnicodeString(
2454         "$ddot = \\u0308 ;"
2455         "$lcgvowel = [\\u03b1\\u03b5\\u03b7\\u03b9\\u03bf\\u03c5\\u03c9] ;"
2456         "$rough = \\u0314 ;"
2457         "($lcgvowel+ $ddot?) $rough > h | $1 ;"
2458         "\\u03b1 <> a ;"
2459         "$rough <> h ;");
2460
2461     expect(gr, CharsToUnicodeString("\\u03B1\\u0314"), "ha");
2462 }
2463
2464 /**
2465  * Test quantified segment behavior.  We want:
2466  * ([abc])+ > x $1 x; applied to "cba" produces "xax"
2467  */
2468 void TransliteratorTest::TestQuantifiedSegment(void) {
2469     // The normal case
2470     expect("([abc]+) > x $1 x;", "cba", "xcbax");
2471
2472     // The tricky case; the quantifier is around the segment
2473     expect("([abc])+ > x $1 x;", "cba", "xax");
2474
2475     // Tricky case in reverse direction
2476     expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax");
2477
2478     // Check post-context segment
2479     expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba");
2480
2481     // Test toRule/toPattern for non-quantified segment.
2482     // Careful with spacing here.
2483     UnicodeString r("([a-c]){q} > x $1 x;");
2484     UParseError pe;
2485     UErrorCode ec = U_ZERO_ERROR;
2486     Transliterator* t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
2487     if (U_FAILURE(ec)) {
2488         errln("FAIL: createFromRules");
2489         delete t;
2490         return;
2491     }
2492     UnicodeString rr;
2493     t->toRules(rr, TRUE);
2494     if (r != rr) {
2495         errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
2496     } else {
2497         logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
2498     }
2499     delete t;
2500
2501     // Test toRule/toPattern for quantified segment.
2502     // Careful with spacing here.
2503     r = "([a-c])+{q} > x $1 x;";
2504     t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
2505     if (U_FAILURE(ec)) {
2506         errln("FAIL: createFromRules");
2507         delete t;
2508         return;
2509     }
2510     t->toRules(rr, TRUE);
2511     if (r != rr) {
2512         errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
2513     } else {
2514         logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
2515     }
2516     delete t;
2517 }
2518
2519 //======================================================================
2520 // Ram's tests
2521 //======================================================================
2522 void TransliteratorTest::TestDevanagariLatinRT(){
2523     const int MAX_LEN= 52;
2524     const char* const source[MAX_LEN] = {
2525         "bh\\u0101rata",
2526         "kra",
2527         "k\\u1E63a",
2528         "khra",
2529         "gra",
2530         "\\u1E45ra",
2531         "cra",
2532         "chra",
2533         "j\\u00F1a",
2534         "jhra",
2535         "\\u00F1ra",
2536         "\\u1E6Dya",
2537         "\\u1E6Dhra",
2538         "\\u1E0Dya",
2539       //"r\\u0323ya", // \u095c is not valid in Devanagari
2540         "\\u1E0Dhya",
2541         "\\u1E5Bhra",
2542         "\\u1E47ra",
2543         "tta",
2544         "thra",
2545         "dda",
2546         "dhra",
2547         "nna",
2548         "pra",
2549         "phra",
2550         "bra",
2551         "bhra",
2552         "mra",
2553         "\\u1E49ra",
2554       //"l\\u0331ra",
2555         "yra",
2556         "\\u1E8Fra",
2557       //"l-",
2558         "vra",
2559         "\\u015Bra",
2560         "\\u1E63ra",
2561         "sra",
2562         "hma",
2563         "\\u1E6D\\u1E6Da",
2564         "\\u1E6D\\u1E6Dha",
2565         "\\u1E6Dh\\u1E6Dha",
2566         "\\u1E0D\\u1E0Da",
2567         "\\u1E0D\\u1E0Dha",
2568         "\\u1E6Dya",
2569         "\\u1E6Dhya",
2570         "\\u1E0Dya",
2571         "\\u1E0Dhya",
2572         // Not roundtrippable --
2573         // \\u0939\\u094d\\u094d\\u092E  - hma
2574         // \\u0939\\u094d\\u092E         - hma
2575         // CharsToUnicodeString("hma"),
2576         "hya",
2577         "\\u015Br\\u0325",
2578         "\\u015Bca",
2579         "\\u0115",
2580         "san\\u0304j\\u012Bb s\\u0113nagupta",
2581         "\\u0101nand vaddir\\u0101ju",
2582         "\\u0101",
2583         "a"
2584     };
2585     const char* const expected[MAX_LEN] = {
2586         "\\u092D\\u093E\\u0930\\u0924",   /* bha\\u0304rata */
2587         "\\u0915\\u094D\\u0930",          /* kra         */
2588         "\\u0915\\u094D\\u0937",          /* ks\\u0323a  */
2589         "\\u0916\\u094D\\u0930",          /* khra        */
2590         "\\u0917\\u094D\\u0930",          /* gra         */
2591         "\\u0919\\u094D\\u0930",          /* n\\u0307ra  */
2592         "\\u091A\\u094D\\u0930",          /* cra         */
2593         "\\u091B\\u094D\\u0930",          /* chra        */
2594         "\\u091C\\u094D\\u091E",          /* jn\\u0303a  */
2595         "\\u091D\\u094D\\u0930",          /* jhra        */
2596         "\\u091E\\u094D\\u0930",          /* n\\u0303ra  */
2597         "\\u091F\\u094D\\u092F",          /* t\\u0323ya  */
2598         "\\u0920\\u094D\\u0930",          /* t\\u0323hra */
2599         "\\u0921\\u094D\\u092F",          /* d\\u0323ya  */
2600       //"\\u095C\\u094D\\u092F",        /* r\\u0323ya  */ // \u095c is not valid in Devanagari
2601         "\\u0922\\u094D\\u092F",          /* d\\u0323hya */
2602         "\\u0922\\u093C\\u094D\\u0930",   /* r\\u0323hra */
2603         "\\u0923\\u094D\\u0930",          /* n\\u0323ra  */
2604         "\\u0924\\u094D\\u0924",          /* tta         */
2605         "\\u0925\\u094D\\u0930",          /* thra        */
2606         "\\u0926\\u094D\\u0926",          /* dda         */
2607         "\\u0927\\u094D\\u0930",          /* dhra        */
2608         "\\u0928\\u094D\\u0928",          /* nna         */
2609         "\\u092A\\u094D\\u0930",          /* pra         */
2610         "\\u092B\\u094D\\u0930",          /* phra        */
2611         "\\u092C\\u094D\\u0930",          /* bra         */
2612         "\\u092D\\u094D\\u0930",          /* bhra        */
2613         "\\u092E\\u094D\\u0930",          /* mra         */
2614         "\\u0929\\u094D\\u0930",          /* n\\u0331ra  */
2615       //"\\u0934\\u094D\\u0930",        /* l\\u0331ra  */
2616         "\\u092F\\u094D\\u0930",          /* yra         */
2617         "\\u092F\\u093C\\u094D\\u0930",   /* y\\u0307ra  */
2618       //"l-",
2619         "\\u0935\\u094D\\u0930",          /* vra         */
2620         "\\u0936\\u094D\\u0930",          /* s\\u0301ra  */
2621         "\\u0937\\u094D\\u0930",          /* s\\u0323ra  */
2622         "\\u0938\\u094D\\u0930",          /* sra         */
2623         "\\u0939\\u094d\\u092E",          /* hma         */
2624         "\\u091F\\u094D\\u091F",          /* t\\u0323t\\u0323a  */
2625         "\\u091F\\u094D\\u0920",          /* t\\u0323t\\u0323ha */
2626         "\\u0920\\u094D\\u0920",          /* t\\u0323ht\\u0323ha*/
2627         "\\u0921\\u094D\\u0921",          /* d\\u0323d\\u0323a  */
2628         "\\u0921\\u094D\\u0922",          /* d\\u0323d\\u0323ha */
2629         "\\u091F\\u094D\\u092F",          /* t\\u0323ya  */
2630         "\\u0920\\u094D\\u092F",          /* t\\u0323hya */
2631         "\\u0921\\u094D\\u092F",          /* d\\u0323ya  */
2632         "\\u0922\\u094D\\u092F",          /* d\\u0323hya */
2633      // "hma",                         /* hma         */
2634         "\\u0939\\u094D\\u092F",          /* hya         */
2635         "\\u0936\\u0943",                 /* s\\u0301r\\u0325a  */
2636         "\\u0936\\u094D\\u091A",          /* s\\u0301ca  */
2637         "\\u090d",                        /* e\\u0306    */
2638         "\\u0938\\u0902\\u091C\\u0940\\u092C\\u094D \\u0938\\u0947\\u0928\\u0917\\u0941\\u092A\\u094D\\u0924",
2639         "\\u0906\\u0928\\u0902\\u0926\\u094D \\u0935\\u0926\\u094D\\u0926\\u093F\\u0930\\u093E\\u091C\\u0941",
2640         "\\u0906",
2641         "\\u0905",
2642     };
2643     UErrorCode status = U_ZERO_ERROR;
2644     UParseError parseError;
2645     UnicodeString message;
2646     Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2647     Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2648     if(U_FAILURE(status)){
2649         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2650         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2651         return;
2652     }
2653     UnicodeString gotResult;
2654     for(int i= 0; i<MAX_LEN; i++){
2655         gotResult = source[i];
2656         expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2657         expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2658     }
2659     delete latinToDev;
2660     delete devToLatin;
2661 }
2662
2663 void TransliteratorTest::TestTeluguLatinRT(){
2664     const int MAX_LEN=10;
2665     const char* const source[MAX_LEN] = {
2666         "raghur\\u0101m vi\\u015Bvan\\u0101dha",                         /* Raghuram Viswanadha    */
2667         "\\u0101nand vaddir\\u0101ju",                                   /* Anand Vaddiraju        */
2668         "r\\u0101j\\u012Bv ka\\u015Barab\\u0101da",                      /* Rajeev Kasarabada      */
2669         "san\\u0304j\\u012Bv ka\\u015Barab\\u0101da",                    /* sanjeev kasarabada     */
2670         "san\\u0304j\\u012Bb sen'gupta",                                 /* sanjib sengupata       */
2671         "amar\\u0113ndra hanum\\u0101nula",                              /* Amarendra hanumanula   */
2672         "ravi kum\\u0101r vi\\u015Bvan\\u0101dha",                       /* Ravi Kumar Viswanadha  */
2673         "\\u0101ditya kandr\\u0113gula",                                 /* Aditya Kandregula      */
2674         "\\u015Br\\u012Bdhar ka\\u1E47\\u1E6Dama\\u015Be\\u1E6D\\u1E6Di",/* Shridhar Kantamsetty   */
2675         "m\\u0101dhav de\\u015Be\\u1E6D\\u1E6Di"                         /* Madhav Desetty         */
2676     };
2677
2678     const char* const expected[MAX_LEN] = {
2679         "\\u0c30\\u0c18\\u0c41\\u0c30\\u0c3e\\u0c2e\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2680         "\\u0c06\\u0c28\\u0c02\\u0c26\\u0c4d \\u0C35\\u0C26\\u0C4D\\u0C26\\u0C3F\\u0C30\\u0C3E\\u0C1C\\u0C41",
2681         "\\u0c30\\u0c3e\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2682         "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2683         "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c2c\\u0c4d \\u0c38\\u0c46\\u0c28\\u0c4d\\u0c17\\u0c41\\u0c2a\\u0c4d\\u0c24",
2684         "\\u0c05\\u0c2e\\u0c30\\u0c47\\u0c02\\u0c26\\u0c4d\\u0c30 \\u0c39\\u0c28\\u0c41\\u0c2e\\u0c3e\\u0c28\\u0c41\\u0c32",
2685         "\\u0c30\\u0c35\\u0c3f \\u0c15\\u0c41\\u0c2e\\u0c3e\\u0c30\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2686         "\\u0c06\\u0c26\\u0c3f\\u0c24\\u0c4d\\u0c2f \\u0C15\\u0C02\\u0C26\\u0C4D\\u0C30\\u0C47\\u0C17\\u0C41\\u0c32",
2687         "\\u0c36\\u0c4d\\u0c30\\u0c40\\u0C27\\u0C30\\u0C4D \\u0c15\\u0c02\\u0c1f\\u0c2e\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2688         "\\u0c2e\\u0c3e\\u0c27\\u0c35\\u0c4d \\u0c26\\u0c46\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2689     };
2690
2691     UErrorCode status = U_ZERO_ERROR;
2692     UParseError parseError;
2693     UnicodeString message;
2694     Transliterator* latinToDev=Transliterator::createInstance("Latin-Telugu", UTRANS_FORWARD, parseError, status);
2695     Transliterator* devToLatin=Transliterator::createInstance("Telugu-Latin", UTRANS_FORWARD, parseError, status);
2696     if(U_FAILURE(status)){
2697         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2698         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2699         return;
2700     }
2701     UnicodeString gotResult;
2702     for(int i= 0; i<MAX_LEN; i++){
2703         gotResult = source[i];
2704         expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2705         expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2706     }
2707     delete latinToDev;
2708     delete devToLatin;
2709 }
2710
2711 void TransliteratorTest::TestSanskritLatinRT(){
2712     const int MAX_LEN =16;
2713     const char* const source[MAX_LEN] = {
2714         "rmk\\u1E63\\u0113t",
2715         "\\u015Br\\u012Bmad",
2716         "bhagavadg\\u012Bt\\u0101",
2717         "adhy\\u0101ya",
2718         "arjuna",
2719         "vi\\u1E63\\u0101da",
2720         "y\\u014Dga",
2721         "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2722         "uv\\u0101cr\\u0325",
2723         "dharmak\\u1E63\\u0113tr\\u0113",
2724         "kuruk\\u1E63\\u0113tr\\u0113",
2725         "samav\\u0113t\\u0101",
2726         "yuyutsava\\u1E25",
2727         "m\\u0101mak\\u0101\\u1E25",
2728     // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2729         "kimakurvata",
2730         "san\\u0304java",
2731     };
2732     const char* const expected[MAX_LEN] = {
2733         "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2734         "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2735         "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2736         "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2737         "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2738         "\\u0935\\u093f\\u0937\\u093e\\u0926",
2739         "\\u092f\\u094b\\u0917",
2740         "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2741         "\\u0909\\u0935\\u093E\\u091A\\u0943",
2742         "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2743         "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2744         "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2745         "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2746         "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2747     //"\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2748         "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2749         "\\u0938\\u0902\\u091c\\u0935",
2750     };
2751     UErrorCode status = U_ZERO_ERROR;
2752     UParseError parseError;
2753     UnicodeString message;
2754     Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2755     Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2756     if(U_FAILURE(status)){
2757         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2758         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2759         return;
2760     }
2761     UnicodeString gotResult;
2762     for(int i= 0; i<MAX_LEN; i++){
2763         gotResult = source[i];
2764         expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2765         expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2766     }
2767     delete latinToDev;
2768     delete devToLatin;
2769 }
2770
2771
2772 void TransliteratorTest::TestCompoundLatinRT(){
2773     const char* const source[] = {
2774         "rmk\\u1E63\\u0113t",
2775         "\\u015Br\\u012Bmad",
2776         "bhagavadg\\u012Bt\\u0101",
2777         "adhy\\u0101ya",
2778         "arjuna",
2779         "vi\\u1E63\\u0101da",
2780         "y\\u014Dga",
2781         "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2782         "uv\\u0101cr\\u0325",
2783         "dharmak\\u1E63\\u0113tr\\u0113",
2784         "kuruk\\u1E63\\u0113tr\\u0113",
2785         "samav\\u0113t\\u0101",
2786         "yuyutsava\\u1E25",
2787         "m\\u0101mak\\u0101\\u1E25",
2788      // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2789         "kimakurvata",
2790         "san\\u0304java"
2791     };
2792     const int MAX_LEN = UPRV_LENGTHOF(source);
2793     const char* const expected[MAX_LEN] = {
2794         "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2795         "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2796         "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2797         "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2798         "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2799         "\\u0935\\u093f\\u0937\\u093e\\u0926",
2800         "\\u092f\\u094b\\u0917",
2801         "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2802         "\\u0909\\u0935\\u093E\\u091A\\u0943",
2803         "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2804         "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2805         "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2806         "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2807         "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2808     //  "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2809         "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2810         "\\u0938\\u0902\\u091c\\u0935"
2811     };
2812     if(MAX_LEN != UPRV_LENGTHOF(expected)) {
2813         errln("error in TestCompoundLatinRT: source[] and expected[] have different lengths!");
2814         return;
2815     }
2816
2817     UErrorCode status = U_ZERO_ERROR;
2818     UParseError parseError;
2819     UnicodeString message;
2820     Transliterator* devToLatinToDev  =Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2821     Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2822     Transliterator* devToTelToDev    =Transliterator::createInstance("Devanagari-Telugu;Telugu-Devanagari", UTRANS_FORWARD, parseError, status);
2823     Transliterator* latinToTelToLatin=Transliterator::createInstance("Latin-Telugu;Telugu-Latin", UTRANS_FORWARD, parseError, status);
2824
2825     if(U_FAILURE(status)){
2826         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2827         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2828         return;
2829     }
2830     UnicodeString gotResult;
2831     for(int i= 0; i<MAX_LEN; i++){
2832         gotResult = source[i];
2833         expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2834         expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2835         expect(*latinToTelToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2836
2837     }
2838     delete(latinToDevToLatin);
2839     delete(devToLatinToDev);
2840     delete(devToTelToDev);
2841     delete(latinToTelToLatin);
2842 }
2843
2844 /**
2845  * Test Gurmukhi-Devanagari Tippi and Bindi
2846  */
2847 void TransliteratorTest::TestGurmukhiDevanagari(){
2848     // the rule says:
2849     // (\u0902) (when preceded by vowel)      --->  (\u0A02)
2850     // (\u0902) (when preceded by consonant)  --->  (\u0A70)
2851     UErrorCode status = U_ZERO_ERROR;
2852     UnicodeSet vowel(UnicodeString("[\\u0905-\\u090A \\u090F\\u0910\\u0913\\u0914 \\u093e-\\u0942\\u0947\\u0948\\u094B\\u094C\\u094D]", -1, US_INV).unescape(), status);
2853     UnicodeSet non_vowel(UnicodeString("[\\u0915-\\u0928\\u092A-\\u0930]", -1, US_INV).unescape(), status);
2854     UParseError parseError;
2855
2856     UnicodeSetIterator vIter(vowel);
2857     UnicodeSetIterator nvIter(non_vowel);
2858     Transliterator* trans = Transliterator::createInstance("Devanagari-Gurmukhi",UTRANS_FORWARD, parseError, status);
2859     if(U_FAILURE(status)) {
2860       dataerrln("Error creating transliterator %s", u_errorName(status));
2861       delete trans;
2862       return;
2863     }
2864     UnicodeString src (" \\u0902", -1, US_INV);
2865     UnicodeString expected(" \\u0A02", -1, US_INV);
2866     src = src.unescape();
2867     expected= expected.unescape();
2868
2869     while(vIter.next()){
2870         src.setCharAt(0,(UChar) vIter.getCodepoint());
2871         expected.setCharAt(0,(UChar) (vIter.getCodepoint()+0x0100));
2872         expect(*trans,src,expected);
2873     }
2874
2875     expected.setCharAt(1,0x0A70);
2876     while(nvIter.next()){
2877         //src.setCharAt(0,(char) nvIter.codepoint);
2878         src.setCharAt(0,(UChar)nvIter.getCodepoint());
2879         expected.setCharAt(0,(UChar) (nvIter.getCodepoint()+0x0100));
2880         expect(*trans,src,expected);
2881     }
2882     delete trans;
2883 }
2884 /**
2885  * Test instantiation from a locale.
2886  */
2887 void TransliteratorTest::TestLocaleInstantiation(void) {
2888     UParseError pe;
2889     UErrorCode ec = U_ZERO_ERROR;
2890     Transliterator *t = Transliterator::createInstance("ru_RU-Latin", UTRANS_FORWARD, pe, ec);
2891     if (U_FAILURE(ec)) {
2892         dataerrln("FAIL: createInstance(ru_RU-Latin) - %s", u_errorName(ec));
2893         delete t;
2894         return;
2895     }
2896     expect(*t, CharsToUnicodeString("\\u0430"), "a");
2897     delete t;
2898
2899     t = Transliterator::createInstance("en-el", UTRANS_FORWARD, pe, ec);
2900     if (U_FAILURE(ec)) {
2901         errln("FAIL: createInstance(en-el)");
2902         delete t;
2903         return;
2904     }
2905     expect(*t, "a", CharsToUnicodeString("\\u03B1"));
2906     delete t;
2907 }
2908
2909 /**
2910  * Test title case handling of accent (should ignore accents)
2911  */
2912 void TransliteratorTest::TestTitleAccents(void) {
2913     UParseError pe;
2914     UErrorCode ec = U_ZERO_ERROR;
2915     Transliterator *t = Transliterator::createInstance("Title", UTRANS_FORWARD, pe, ec);
2916     if (U_FAILURE(ec)) {
2917         errln("FAIL: createInstance(Title)");
2918         delete t;
2919         return;
2920     }
2921     expect(*t, CharsToUnicodeString("a\\u0300b can't abe"), CharsToUnicodeString("A\\u0300b Can't Abe"));
2922     delete t;
2923 }
2924
2925 /**
2926  * Basic test of a locale resource based rule.
2927  */
2928 void TransliteratorTest::TestLocaleResource() {
2929     const char* DATA[] = {
2930         // id                    from               to
2931         //"Latin-Greek/UNGEGN",    "b",               "\\u03bc\\u03c0",
2932         "Latin-el",              "b",               "\\u03bc\\u03c0",
2933         "Latin-Greek",           "b",               "\\u03B2",
2934         "Greek-Latin/UNGEGN",    "\\u03B2",         "v",
2935         "el-Latin",              "\\u03B2",         "v",
2936         "Greek-Latin",           "\\u03B2",         "b",
2937     };
2938     const int32_t DATA_length = UPRV_LENGTHOF(DATA);
2939     for (int32_t i=0; i<DATA_length; i+=3) {
2940         UParseError pe;
2941         UErrorCode ec = U_ZERO_ERROR;
2942         Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, pe, ec);
2943         if (U_FAILURE(ec)) {
2944             dataerrln((UnicodeString)"FAIL: createInstance(" + DATA[i] + ") - " + u_errorName(ec));
2945             delete t;
2946             continue;
2947         }
2948         expect(*t, CharsToUnicodeString(DATA[i+1]),
2949                CharsToUnicodeString(DATA[i+2]));
2950         delete t;
2951     }
2952 }
2953
2954 /**
2955  * Make sure parse errors reference the right line.
2956  */
2957 void TransliteratorTest::TestParseError() {
2958     static const char* rule =
2959         "a > b;\n"
2960         "# more stuff\n"
2961         "d << b;";
2962     UErrorCode ec = U_ZERO_ERROR;
2963     UParseError pe;
2964     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
2965     delete t;
2966     if (U_FAILURE(ec)) {
2967         UnicodeString err(pe.preContext);
2968         err.append((UChar)124/*|*/).append(pe.postContext);
2969         if (err.indexOf("d << b") >= 0) {
2970             logln("Ok: " + err);
2971         } else {
2972             errln("FAIL: " + err);
2973         }
2974     }
2975     else {
2976         errln("FAIL: no syntax error");
2977     }
2978     static const char* maskingRule =
2979         "a>x;\n"
2980         "# more stuff\n"
2981         "ab>y;";
2982     ec = U_ZERO_ERROR;
2983     delete Transliterator::createFromRules("ID", maskingRule, UTRANS_FORWARD, pe, ec);
2984     if (ec != U_RULE_MASK_ERROR) {
2985         errln("FAIL: returned %s instead of U_RULE_MASK_ERROR", u_errorName(ec));
2986     }
2987     else if (UnicodeString("a > x;") != UnicodeString(pe.preContext)) {
2988         errln("FAIL: did not get expected precontext");
2989     }
2990     else if (UnicodeString("ab > y;") != UnicodeString(pe.postContext)) {
2991         errln("FAIL: did not get expected postcontext");
2992     }
2993 }
2994
2995 /**
2996  * Make sure sets on output are disallowed.
2997  */
2998 void TransliteratorTest::TestOutputSet() {
2999     UnicodeString rule = "$set = [a-cm-n]; b > $set;";
3000     UErrorCode ec = U_ZERO_ERROR;
3001     UParseError pe;
3002     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3003     delete t;
3004     if (U_FAILURE(ec)) {
3005         UnicodeString err(pe.preContext);
3006         err.append((UChar)124/*|*/).append(pe.postContext);
3007         logln("Ok: " + err);
3008         return;
3009     }
3010     errln("FAIL: No syntax error");
3011 }
3012
3013 /**
3014  * Test the use variable range pragma, making sure that use of
3015  * variable range characters is detected and flagged as an error.
3016  */
3017 void TransliteratorTest::TestVariableRange() {
3018     UnicodeString rule = "use variable range 0x70 0x72; a > A; b > B; q > Q;";
3019     UErrorCode ec = U_ZERO_ERROR;
3020     UParseError pe;
3021     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3022     delete t;
3023     if (U_FAILURE(ec)) {
3024         UnicodeString err(pe.preContext);
3025         err.append((UChar)124/*|*/).append(pe.postContext);
3026         logln("Ok: " + err);
3027         return;
3028     }
3029     errln("FAIL: No syntax error");
3030 }
3031
3032 /**
3033  * Test invalid post context error handling
3034  */
3035 void TransliteratorTest::TestInvalidPostContext() {
3036     UnicodeString rule = "a}b{c>d;";
3037     UErrorCode ec = U_ZERO_ERROR;
3038     UParseError pe;
3039     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3040     delete t;
3041     if (U_FAILURE(ec)) {
3042         UnicodeString err(pe.preContext);
3043         err.append((UChar)124/*|*/).append(pe.postContext);
3044         if (err.indexOf("a}b{c") >= 0) {
3045             logln("Ok: " + err);
3046         } else {
3047             errln("FAIL: " + err);
3048         }
3049         return;
3050     }
3051     errln("FAIL: No syntax error");
3052 }
3053
3054 /**
3055  * Test ID form variants
3056  */
3057 void TransliteratorTest::TestIDForms() {
3058     const char* DATA[] = {
3059         "NFC", NULL, "NFD",
3060         "nfd", NULL, "NFC", // make sure case is ignored
3061         "Any-NFKD", NULL, "Any-NFKC",
3062         "Null", NULL, "Null",
3063         "-nfkc", "nfkc", "NFKD",
3064         "-nfkc/", "nfkc", "NFKD",
3065         "Latin-Greek/UNGEGN", NULL, "Greek-Latin/UNGEGN",
3066         "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN",
3067         "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali",
3068         "Source-", NULL, NULL,
3069         "Source/Variant-", NULL, NULL,
3070         "Source-/Variant", NULL, NULL,
3071         "/Variant", NULL, NULL,
3072         "/Variant-", NULL, NULL,
3073         "-/Variant", NULL, NULL,
3074         "-/", NULL, NULL,
3075         "-", NULL, NULL,
3076         "/", NULL, NULL,
3077     };
3078     const int32_t DATA_length = UPRV_LENGTHOF(DATA);
3079
3080     for (int32_t i=0; i<DATA_length; i+=3) {
3081         const char* ID = DATA[i];
3082         const char* expID = DATA[i+1];
3083         const char* expInvID = DATA[i+2];
3084         UBool expValid = (expInvID != NULL);
3085         if (expID == NULL) {
3086             expID = ID;
3087         }
3088         UParseError pe;
3089         UErrorCode ec = U_ZERO_ERROR;
3090         Transliterator *t =
3091             Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
3092         if (U_FAILURE(ec)) {
3093             if (!expValid) {
3094                 logln((UnicodeString)"Ok: getInstance(" + ID +") => " + u_errorName(ec));
3095             } else {
3096                 dataerrln((UnicodeString)"FAIL: Couldn't create " + ID + " - " + u_errorName(ec));
3097             }
3098             delete t;
3099             continue;
3100         }
3101         Transliterator *u = t->createInverse(ec);
3102         if (U_FAILURE(ec)) {
3103             errln((UnicodeString)"FAIL: Couldn't create inverse of " + ID);
3104             delete t;
3105             delete u;
3106             continue;
3107         }
3108         if (t->getID() == expID &&
3109             u->getID() == expInvID) {
3110             logln((UnicodeString)"Ok: " + ID + ".getInverse() => " + expInvID);
3111         } else {
3112             errln((UnicodeString)"FAIL: getInstance(" + ID + ") => " +
3113                   t->getID() + " x getInverse() => " + u->getID() +
3114                   ", expected " + expInvID);
3115         }
3116         delete t;
3117         delete u;
3118     }
3119 }
3120
3121 static const UChar SPACE[]   = {32,0};
3122 static const UChar NEWLINE[] = {10,0};
3123 static const UChar RETURN[]  = {13,0};
3124 static const UChar EMPTY[]   = {0};
3125
3126 void TransliteratorTest::checkRules(const UnicodeString& label, Transliterator& t2,
3127                                     const UnicodeString& testRulesForward) {
3128     UnicodeString rules2; t2.toRules(rules2, TRUE);
3129     //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
3130     rules2.findAndReplace(SPACE, EMPTY);
3131     rules2.findAndReplace(NEWLINE, EMPTY);
3132     rules2.findAndReplace(RETURN, EMPTY);
3133
3134     UnicodeString testRules(testRulesForward); testRules.findAndReplace(SPACE, EMPTY);
3135
3136     if (rules2 != testRules) {
3137         errln(label);
3138         logln((UnicodeString)"GENERATED RULES: " + rules2);
3139         logln((UnicodeString)"SHOULD BE:       " + testRulesForward);
3140     }
3141 }
3142
3143 /**
3144  * Mark's toRules test.
3145  */
3146 void TransliteratorTest::TestToRulesMark() {
3147     const char* testRules =
3148         "::[[:Latin:][:Mark:]];"
3149         "::NFKD (NFC);"
3150         "::Lower (Lower);"
3151         "a <> \\u03B1;" // alpha
3152         "::NFKC (NFD);"
3153         "::Upper (Lower);"
3154         "::Lower ();"
3155         "::([[:Greek:][:Mark:]]);"
3156         ;
3157     const char* testRulesForward =
3158         "::[[:Latin:][:Mark:]];"
3159         "::NFKD(NFC);"
3160         "::Lower(Lower);"
3161         "a > \\u03B1;"
3162         "::NFKC(NFD);"
3163         "::Upper (Lower);"
3164         "::Lower ();"
3165         ;
3166     const char* testRulesBackward =
3167         "::[[:Greek:][:Mark:]];"
3168         "::Lower (Upper);"
3169         "::NFD(NFKC);"
3170         "\\u03B1 > a;"
3171         "::Lower(Lower);"
3172         "::NFC(NFKD);"
3173         ;
3174     UnicodeString source = CharsToUnicodeString("\\u00E1"); // a-acute
3175     UnicodeString target = CharsToUnicodeString("\\u03AC"); // alpha-acute
3176
3177     UParseError pe;
3178     UErrorCode ec = U_ZERO_ERROR;
3179     Transliterator *t2 = Transliterator::createFromRules("source-target", UnicodeString(testRules, -1, US_INV), UTRANS_FORWARD, pe, ec);
3180     Transliterator *t3 = Transliterator::createFromRules("target-source", UnicodeString(testRules, -1, US_INV), UTRANS_REVERSE, pe, ec);
3181
3182     if (U_FAILURE(ec)) {
3183         delete t2;
3184         delete t3;
3185         dataerrln((UnicodeString)"FAIL: createFromRules => " + u_errorName(ec));
3186         return;
3187     }
3188
3189     expect(*t2, source, target);
3190     expect(*t3, target, source);
3191
3192     checkRules("Failed toRules FORWARD", *t2, UnicodeString(testRulesForward, -1, US_INV));
3193     checkRules("Failed toRules BACKWARD", *t3, UnicodeString(testRulesBackward, -1, US_INV));
3194
3195     delete t2;
3196     delete t3;
3197 }
3198
3199 /**
3200  * Test Escape and Unescape transliterators.
3201  */
3202 void TransliteratorTest::TestEscape() {
3203     UParseError pe;
3204     UErrorCode ec;
3205     Transliterator *t;
3206
3207     ec = U_ZERO_ERROR;
3208     t = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, pe, ec);
3209     if (U_FAILURE(ec)) {
3210         errln((UnicodeString)"FAIL: createInstance");
3211     } else {
3212         expect(*t,
3213                UNICODE_STRING_SIMPLE("\\x{40}\\U00000031&#x32;&#81;"),
3214                "@12Q");
3215     }
3216     delete t;
3217
3218     ec = U_ZERO_ERROR;
3219     t = Transliterator::createInstance("Any-Hex/C", UTRANS_FORWARD, pe, ec);
3220     if (U_FAILURE(ec)) {
3221         errln((UnicodeString)"FAIL: createInstance");
3222     } else {
3223         expect(*t,
3224                CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3225                UNICODE_STRING_SIMPLE("\\u0041\\U0010BEEF\\uFEED"));
3226     }
3227     delete t;
3228
3229     ec = U_ZERO_ERROR;
3230     t = Transliterator::createInstance("Any-Hex/Java", UTRANS_FORWARD, pe, ec);
3231     if (U_FAILURE(ec)) {
3232         errln((UnicodeString)"FAIL: createInstance");
3233     } else {
3234         expect(*t,
3235                CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3236                UNICODE_STRING_SIMPLE("\\u0041\\uDBEF\\uDEEF\\uFEED"));
3237     }
3238     delete t;
3239
3240     ec = U_ZERO_ERROR;
3241     t = Transliterator::createInstance("Any-Hex/Perl", UTRANS_FORWARD, pe, ec);
3242     if (U_FAILURE(ec)) {
3243         errln((UnicodeString)"FAIL: createInstance");
3244     } else {
3245         expect(*t,
3246                CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3247                UNICODE_STRING_SIMPLE("\\x{41}\\x{10BEEF}\\x{FEED}"));
3248     }
3249     delete t;
3250 }
3251
3252
3253 void TransliteratorTest::TestAnchorMasking(){
3254     UnicodeString rule ("^a > Q; a > q;");
3255     UErrorCode status= U_ZERO_ERROR;
3256     UParseError parseError;
3257
3258     Transliterator* t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD,parseError,status);
3259     if(U_FAILURE(status)){
3260         errln(UnicodeString("FAIL: ") + "ID" +
3261               ".createFromRules() => bad rules" +
3262               /*", parse error " + parseError.code +*/
3263               ", line " + parseError.line +
3264               ", offset " + parseError.offset +
3265               ", context " + prettify(parseError.preContext, TRUE) +
3266               ", rules: " + prettify(rule, TRUE));
3267     }
3268     delete t;
3269 }
3270
3271 /**
3272  * Make sure display names of variants look reasonable.
3273  */
3274 void TransliteratorTest::TestDisplayName() {
3275 #if UCONFIG_NO_FORMATTING
3276     logln("Skipping, UCONFIG_NO_FORMATTING is set\n");
3277     return;
3278 #else
3279     static const char* DATA[] = {
3280         // ID, forward name, reverse name
3281         // Update the text as necessary -- the important thing is
3282         // not the text itself, but how various cases are handled.
3283
3284         // Basic test
3285         "Any-Hex", "Any to Hex Escape", "Hex Escape to Any",
3286
3287         // Variants
3288         "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl",
3289
3290         // Target-only IDs
3291         "NFC", "Any to NFC", "Any to NFD",
3292     };
3293
3294     int32_t DATA_length = UPRV_LENGTHOF(DATA);
3295
3296     Locale US("en", "US");
3297
3298     for (int32_t i=0; i<DATA_length; i+=3) {
3299         UnicodeString name;
3300         Transliterator::getDisplayName(DATA[i], US, name);
3301         if (name != DATA[i+1]) {
3302             dataerrln((UnicodeString)"FAIL: " + DATA[i] + ".getDisplayName() => " +
3303                   name + ", expected " + DATA[i+1]);
3304         } else {
3305             logln((UnicodeString)"Ok: " + DATA[i] + ".getDisplayName() => " + name);
3306         }
3307         UErrorCode ec = U_ZERO_ERROR;
3308         UParseError pe;
3309         Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_REVERSE, pe, ec);
3310         if (U_FAILURE(ec)) {
3311             delete t;
3312             dataerrln("FAIL: createInstance failed - %s", u_errorName(ec));
3313             continue;
3314         }
3315         name = Transliterator::getDisplayName(t->getID(), US, name);
3316         if (name != DATA[i+2]) {
3317             dataerrln((UnicodeString)"FAIL: " + t->getID() + ".getDisplayName() => " +
3318                   name + ", expected " + DATA[i+2]);
3319         } else {
3320             logln((UnicodeString)"Ok: " + t->getID() + ".getDisplayName() => " + name);
3321         }
3322         delete t;
3323     }
3324 #endif
3325 }
3326
3327 void TransliteratorTest::TestSpecialCases(void) {
3328     const UnicodeString registerRules[] = {
3329         "Any-Dev1", "x > X; y > Y;",
3330         "Any-Dev2", "XY > Z",
3331         "Greek-Latin/FAKE",
3332             CharsToUnicodeString
3333             ("[^[:L:][:M:]] { \\u03bc\\u03c0 > b ; \\u03bc\\u03c0 } [^[:L:][:M:]] > b ; [^[:L:][:M:]] { [\\u039c\\u03bc][\\u03a0\\u03c0] > B ; [\\u039c\\u03bc][\\u03a0\\u03c0] } [^[:L:][:M:]] > B ;"),
3334         "" // END MARKER
3335     };
3336
3337     const UnicodeString testCases[] = {
3338         // NORMALIZATION
3339         // should add more test cases
3340         "NFD" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3341         "NFC" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3342         "NFKD", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3343         "NFKC", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3344
3345         // mp -> b BUG
3346         "Greek-Latin/UNGEGN", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3347         "Greek-Latin/FAKE", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3348
3349         // check for devanagari bug
3350         "nfd;Dev1;Dev2;nfc", "xy", "Z",
3351
3352         // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE
3353         "Title", CharsToUnicodeString("ab'cD ffi\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3354                  CharsToUnicodeString("Ab'cd Ffi\\u0131ii\\u0307 \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3355
3356         //TODO: enable this test once Titlecase works right
3357         /*
3358         "Title", CharsToUnicodeString("\\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3359                  CharsToUnicodeString("Ffi\\u0131ii \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3360                  */
3361         "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3362                  CharsToUnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 ") + DESERET_DEE + DESERET_DEE,
3363         "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3364                  CharsToUnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 ") + DESERET_dee + DESERET_dee,
3365
3366         "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
3367         "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
3368
3369          // FORMS OF S
3370         "Greek-Latin/UNGEGN",  CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3371                                CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3372         "Latin-Greek/UNGEGN",  CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3373                                CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3") ,
3374         "Greek-Latin",  CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3375                         CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3376         "Latin-Greek",  CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3377                         CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3378         // Tatiana bug
3379         // Upper: TAT\\u02B9\\u00C2NA
3380         // Lower: tat\\u02B9\\u00E2na
3381         // Title: Tat\\u02B9\\u00E2na
3382         "Upper", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3383                  CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3384         "Lower", CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3385                  CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3386         "Title", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3387                  CharsToUnicodeString("Tat\\u02B9\\u00E2na"),
3388
3389         "" // END MARKER
3390     };
3391
3392     UParseError pos;
3393     int32_t i;
3394     for (i = 0; registerRules[i].length()!=0; i+=2) {
3395         UErrorCode status = U_ZERO_ERROR;
3396
3397         Transliterator *t = Transliterator::createFromRules(registerRules[0+i],
3398             registerRules[i+1], UTRANS_FORWARD, pos, status);
3399         if (U_FAILURE(status)) {
3400             dataerrln("Fails: Unable to create the transliterator from rules. - %s", u_errorName(status));
3401         } else {
3402             Transliterator::registerInstance(t);
3403         }
3404     }
3405     for (i = 0; testCases[i].length()!=0; i+=3) {
3406         UErrorCode ec = U_ZERO_ERROR;
3407         UParseError pe;
3408         const UnicodeString& name = testCases[i];
3409         Transliterator *t = Transliterator::createInstance(name, UTRANS_FORWARD, pe, ec);
3410         if (U_FAILURE(ec)) {
3411             dataerrln((UnicodeString)"FAIL: Couldn't create " + name + " - " + u_errorName(ec));
3412             delete t;
3413             continue;
3414         }
3415         const UnicodeString& id = t->getID();
3416         const UnicodeString& source = testCases[i+1];
3417         UnicodeString target;
3418
3419         // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe)
3420
3421         if (testCases[i+2].length() > 0) {
3422             target = testCases[i+2];
3423         } else if (0==id.caseCompare("NFD", U_FOLD_CASE_DEFAULT)) {
3424             Normalizer::normalize(source, UNORM_NFD, 0, target, ec);
3425         } else if (0==id.caseCompare("NFC", U_FOLD_CASE_DEFAULT)) {
3426             Normalizer::normalize(source, UNORM_NFC, 0, target, ec);
3427         } else if (0==id.caseCompare("NFKD", U_FOLD_CASE_DEFAULT)) {
3428             Normalizer::normalize(source, UNORM_NFKD, 0, target, ec);
3429         } else if (0==id.caseCompare("NFKC", U_FOLD_CASE_DEFAULT)) {
3430             Normalizer::normalize(source, UNORM_NFKC, 0, target, ec);
3431         } else if (0==id.caseCompare("Lower", U_FOLD_CASE_DEFAULT)) {
3432             target = source;
3433             target.toLower(Locale::getUS());
3434         } else if (0==id.caseCompare("Upper", U_FOLD_CASE_DEFAULT)) {
3435             target = source;
3436             target.toUpper(Locale::getUS());
3437         }
3438         if (U_FAILURE(ec)) {
3439             errln((UnicodeString)"FAIL: Internal error normalizing " + source);
3440             continue;
3441         }
3442
3443         expect(*t, source, target);
3444         delete t;
3445     }
3446     for (i = 0; registerRules[i].length()!=0; i+=2) {
3447         Transliterator::unregister(registerRules[i]);
3448     }
3449 }
3450
3451 char* Char32ToEscapedChars(UChar32 ch, char* buffer) {
3452     if (ch <= 0xFFFF) {
3453         sprintf(buffer, "\\u%04x", (int)ch);
3454     } else {
3455         sprintf(buffer, "\\U%08x", (int)ch);
3456     }
3457     return buffer;
3458 }
3459
3460 void TransliteratorTest::TestSurrogateCasing (void) {
3461     // check that casing handles surrogates
3462     // titlecase is currently defective
3463     char buffer[20];
3464     UChar buffer2[20];
3465     UChar32 dee;
3466     U16_GET(DESERET_dee,0, 0, DESERET_dee.length(), dee);
3467     UnicodeString DEE(u_totitle(dee));
3468     if (DEE != DESERET_DEE) {
3469         err("Fails titlecase of surrogates");
3470         err(Char32ToEscapedChars(dee, buffer));
3471         err(", ");
3472         errln(Char32ToEscapedChars(DEE.char32At(0), buffer));
3473     }
3474
3475     UnicodeString deeDEETest=DESERET_dee + DESERET_DEE;
3476     UnicodeString deedeeTest = DESERET_dee + DESERET_dee;
3477     UnicodeString DEEDEETest = DESERET_DEE + DESERET_DEE;
3478     UErrorCode status= U_ZERO_ERROR;
3479
3480     u_strToUpper(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
3481     if (U_FAILURE(status) || (UnicodeString(buffer2)!= DEEDEETest)) {
3482         errln("Fails: Can't uppercase surrogates.");
3483     }
3484
3485     status= U_ZERO_ERROR;
3486     u_strToLower(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
3487     if (U_FAILURE(status) || (UnicodeString(buffer2)!= deedeeTest)) {
3488         errln("Fails: Can't lowercase surrogates.");
3489     }
3490 }
3491
3492 static void _trans(Transliterator& t, const UnicodeString& src,
3493                    UnicodeString& result) {
3494     result = src;
3495     t.transliterate(result);
3496 }
3497
3498 static void _trans(const UnicodeString& id, const UnicodeString& src,
3499                    UnicodeString& result, UErrorCode ec) {
3500     UParseError pe;
3501     Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
3502     if (U_SUCCESS(ec)) {
3503         _trans(*t, src, result);
3504     }
3505     delete t;
3506 }
3507
3508 static UnicodeString _findMatch(const UnicodeString& source,
3509                                        const UnicodeString* pairs) {
3510     UnicodeString empty;
3511     for (int32_t i=0; pairs[i].length() > 0; i+=2) {
3512         if (0==source.caseCompare(pairs[i], U_FOLD_CASE_DEFAULT)) {
3513             return pairs[i+1];
3514         }
3515     }
3516     return empty;
3517 }
3518
3519 // Check to see that incremental gets at least part way through a reasonable string.
3520
3521 void TransliteratorTest::TestIncrementalProgress(void) {
3522     UErrorCode ec = U_ZERO_ERROR;
3523     UnicodeString latinTest = "The Quick Brown Fox.";
3524     UnicodeString devaTest;
3525     _trans("Latin-Devanagari", latinTest, devaTest, ec);
3526     UnicodeString kataTest;
3527     _trans("Latin-Katakana", latinTest, kataTest, ec);
3528     if (U_FAILURE(ec)) {
3529         errln("FAIL: Internal error");
3530         return;
3531     }
3532     const UnicodeString tests[] = {
3533         "Any", latinTest,
3534         "Latin", latinTest,
3535         "Halfwidth", latinTest,
3536         "Devanagari", devaTest,
3537         "Katakana", kataTest,
3538         "" // END MARKER
3539     };
3540
3541     UnicodeString test("The Quick Brown Fox Jumped Over The Lazy Dog.");
3542     int32_t i = 0, j=0, k=0;
3543     int32_t sources = Transliterator::countAvailableSources();
3544     for (i = 0; i < sources; i++) {
3545         UnicodeString source;
3546         Transliterator::getAvailableSource(i, source);
3547         UnicodeString test = _findMatch(source, tests);
3548         if (test.length() == 0) {
3549             logln((UnicodeString)"Skipping " + source + "-X");
3550             continue;
3551         }
3552         int32_t targets = Transliterator::countAvailableTargets(source);
3553         for (j = 0; j < targets; j++) {
3554             UnicodeString target;
3555             Transliterator::getAvailableTarget(j, source, target);
3556             int32_t variants = Transliterator::countAvailableVariants(source, target);
3557             for (k =0; k< variants; k++) {
3558                 UnicodeString variant;
3559                 UParseError err;
3560                 UErrorCode status = U_ZERO_ERROR;
3561
3562                 Transliterator::getAvailableVariant(k, source, target, variant);
3563                 UnicodeString id = source + "-" + target + "/" + variant;
3564
3565                 Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, err, status);
3566                 if (U_FAILURE(status)) {
3567                     dataerrln((UnicodeString)"FAIL: Could not create " + id);
3568                     delete t;
3569                     continue;
3570                 }
3571                 status = U_ZERO_ERROR;
3572                 CheckIncrementalAux(t, test);
3573
3574                 UnicodeString rev;
3575                 _trans(*t, test, rev);
3576                 Transliterator *inv = t->createInverse(status);
3577                 if (U_FAILURE(status)) {
3578                     // The following are forward-only, it is OK that creating an inverse will not work:
3579                     // 1. Devanagari-Arabic
3580                     // 2. Any-*/BGN
3581                     // 2a. Any-*/BGN_1981
3582                     // 3. Any-*/UNGEGN
3583                     // 4. Any-*/MNS
3584                     // If UCONFIG_NO_BREAK_ITERATION is on, Latin-Thai is also not expected to work.
3585                     if (    id.compare((UnicodeString)"Devanagari-Arabic/") != 0
3586                          && !(id.startsWith((UnicodeString)"Any-") &&
3587                                 (id.endsWith((UnicodeString)"/BGN") || id.endsWith((UnicodeString)"/BGN_1981") || id.endsWith((UnicodeString)"/UNGEGN") || id.endsWith((UnicodeString)"/MNS"))
3588                              )
3589 #if UCONFIG_NO_BREAK_ITERATION
3590                          && id.compare((UnicodeString)"Latin-Thai/") != 0
3591 #endif
3592                        )
3593                     {
3594                         errln((UnicodeString)"FAIL: Could not create inverse of " + id);
3595                     }
3596                     delete t;
3597                     delete inv;
3598                     continue;
3599                 }
3600                 CheckIncrementalAux(inv, rev);
3601                 delete t;
3602                 delete inv;
3603             }
3604         }
3605     }
3606 }
3607
3608 void TransliteratorTest::CheckIncrementalAux(const Transliterator* t,
3609                                                       const UnicodeString& input) {
3610     UErrorCode ec = U_ZERO_ERROR;
3611     UTransPosition pos;
3612     UnicodeString test = input;
3613
3614     pos.contextStart = 0;
3615     pos.contextLimit = input.length();
3616     pos.start = 0;
3617     pos.limit = input.length();
3618
3619     t->transliterate(test, pos, ec);
3620     if (U_FAILURE(ec)) {
3621         errln((UnicodeString)"FAIL: transliterate() error " + u_errorName(ec));
3622         return;
3623     }
3624     UBool gotError = FALSE;
3625     (void)gotError;    // Suppress set but not used warning.
3626
3627     // we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X?
3628
3629     if (pos.start == 0 && pos.limit != 0 && t->getID() != "Hex-Any/Unicode") {
3630         errln((UnicodeString)"No Progress, " +
3631               t->getID() + ": " + formatInput(test, input, pos));
3632         gotError = TRUE;
3633     } else {
3634         logln((UnicodeString)"PASS Progress, " +
3635               t->getID() + ": " + formatInput(test, input, pos));
3636     }
3637     t->finishTransliteration(test, pos);
3638     if (pos.start != pos.limit) {
3639         errln((UnicodeString)"Incomplete, " +
3640               t->getID() + ": " + formatInput(test, input, pos));
3641         gotError = TRUE;
3642     }
3643 }
3644
3645 void TransliteratorTest::TestFunction() {
3646     // Careful with spacing and ';' here:  Phrase this exactly
3647     // as toRules() is going to return it.  If toRules() changes
3648     // with regard to spacing or ';', then adjust this string.
3649     UnicodeString rule =
3650         "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';";
3651
3652     UParseError pe;
3653     UErrorCode ec = U_ZERO_ERROR;
3654     Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3655     if (t == NULL) {
3656         dataerrln("FAIL: createFromRules failed - %s", u_errorName(ec));
3657         return;
3658     }
3659
3660     UnicodeString r;
3661     t->toRules(r, TRUE);
3662     if (r == rule) {
3663         logln((UnicodeString)"OK: toRules() => " + r);
3664     } else {
3665         errln((UnicodeString)"FAIL: toRules() => " + r +
3666               ", expected " + rule);
3667     }
3668
3669     expect(*t, "The Quick Brown Fox",
3670            UNICODE_STRING_SIMPLE("T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox"));
3671
3672     delete t;
3673 }
3674
3675 void TransliteratorTest::TestInvalidBackRef(void) {
3676     UnicodeString rule =  ". > $1;";
3677     UnicodeString rule2 =CharsToUnicodeString("(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\\u0020;");
3678     UParseError pe;
3679     UErrorCode ec = U_ZERO_ERROR;
3680     Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3681     Transliterator *t2 = Transliterator::createFromRules("Test2", rule2, UTRANS_FORWARD, pe, ec);
3682
3683     if (t != NULL) {
3684         errln("FAIL: createFromRules should have returned NULL");
3685         delete t;
3686     }
3687
3688     if (t2 != NULL) {
3689         errln("FAIL: createFromRules should have returned NULL");
3690         delete t2;
3691     }
3692
3693     if (U_SUCCESS(ec)) {
3694         errln("FAIL: Ok: . > $1; => no error");
3695     } else {
3696         logln((UnicodeString)"Ok: . > $1; => " + u_errorName(ec));
3697     }
3698 }
3699
3700 void TransliteratorTest::TestMulticharStringSet() {
3701     // Basic testing
3702     const char* rule =
3703         "       [{aa}]       > x;"
3704         "         a          > y;"
3705         "       [b{bc}]      > z;"
3706         "[{gd}] { e          > q;"
3707         "         e } [{fg}] > r;" ;
3708
3709     UParseError pe;
3710     UErrorCode ec = U_ZERO_ERROR;
3711     Transliterator* t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3712     if (t == NULL || U_FAILURE(ec)) {
3713         delete t;
3714         errln("FAIL: createFromRules failed");
3715         return;
3716     }
3717
3718     expect(*t, "a aa ab bc d gd de gde gdefg ddefg",
3719            "y x yz z d gd de gdq gdqfg ddrfg");
3720     delete t;
3721
3722     // Overlapped string test.  Make sure that when multiple
3723     // strings can match that the longest one is matched.
3724     rule =
3725         "    [a {ab} {abc}]    > x;"
3726         "           b          > y;"
3727         "           c          > z;"
3728         " q [t {st} {rst}] { e > p;" ;
3729
3730     t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3731     if (t == NULL || U_FAILURE(ec)) {
3732         delete t;
3733         errln("FAIL: createFromRules failed");
3734         return;
3735     }
3736
3737     expect(*t, "a ab abc qte qste qrste",
3738            "x x x qtp qstp qrstp");
3739     delete t;
3740 }
3741
3742 // vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
3743 // BEGIN TestUserFunction support factory
3744
3745 Transliterator* _TUFF[4];
3746 UnicodeString* _TUFID[4];
3747
3748 static Transliterator* U_EXPORT2 _TUFFactory(const UnicodeString& /*ID*/,
3749                                    Transliterator::Token context) {
3750     return _TUFF[context.integer]->clone();
3751 }
3752
3753 static void _TUFReg(const UnicodeString& ID, Transliterator* t, int32_t n) {
3754     _TUFF[n] = t;
3755     _TUFID[n] = new UnicodeString(ID);
3756     Transliterator::registerFactory(ID, _TUFFactory, Transliterator::integerToken(n));
3757 }
3758
3759 static void _TUFUnreg(int32_t n) {
3760     if (_TUFF[n] != NULL) {
3761         Transliterator::unregister(*_TUFID[n]);
3762         delete _TUFF[n];
3763         delete _TUFID[n];
3764     }
3765 }
3766
3767 // END TestUserFunction support factory
3768 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
3769
3770 /**
3771  * Test that user-registered transliterators can be used under function
3772  * syntax.
3773  */
3774 void TransliteratorTest::TestUserFunction() {
3775
3776     Transliterator* t;
3777     UParseError pe;
3778     UErrorCode ec = U_ZERO_ERROR;
3779
3780     // Setup our factory
3781     int32_t i;
3782     for (i=0; i<4; ++i) {
3783         _TUFF[i] = NULL;
3784     }
3785
3786     // There's no need to register inverses if we don't use them
3787     t = Transliterator::createFromRules("gif",
3788                                         UNICODE_STRING_SIMPLE("'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';"),
3789                                         UTRANS_FORWARD, pe, ec);
3790     if (t == NULL || U_FAILURE(ec)) {
3791         dataerrln((UnicodeString)"FAIL: createFromRules gif " + u_errorName(ec));
3792         return;
3793     }
3794     _TUFReg("Any-gif", t, 0);
3795
3796     t = Transliterator::createFromRules("RemoveCurly",
3797                                         UNICODE_STRING_SIMPLE("[\\{\\}] > ; '\\N' > ;"),
3798                                         UTRANS_FORWARD, pe, ec);
3799     if (t == NULL || U_FAILURE(ec)) {
3800         errln((UnicodeString)"FAIL: createFromRules RemoveCurly " + u_errorName(ec));
3801         goto FAIL;
3802     }
3803     expect(*t, UNICODE_STRING_SIMPLE("\\N{name}"), "name");
3804     _TUFReg("Any-RemoveCurly", t, 1);
3805
3806     logln("Trying &hex");
3807     t = Transliterator::createFromRules("hex2",
3808                                         "(.) > &hex($1);",
3809                                         UTRANS_FORWARD, pe, ec);
3810     if (t == NULL || U_FAILURE(ec)) {
3811         errln("FAIL: createFromRules");
3812         goto FAIL;
3813     }
3814     logln("Registering");
3815     _TUFReg("Any-hex2", t, 2);
3816     t = Transliterator::createInstance("Any-hex2", UTRANS_FORWARD, ec);
3817     if (t == NULL || U_FAILURE(ec)) {
3818         errln((UnicodeString)"FAIL: createInstance Any-hex2 " + u_errorName(ec));
3819         goto FAIL;
3820     }
3821     expect(*t, "abc", UNICODE_STRING_SIMPLE("\\u0061\\u0062\\u0063"));
3822     delete t;
3823
3824     logln("Trying &gif");
3825     t = Transliterator::createFromRules("gif2",
3826                                         "(.) > &Gif(&Hex2($1));",
3827                                         UTRANS_FORWARD, pe, ec);
3828     if (t == NULL || U_FAILURE(ec)) {
3829         errln((UnicodeString)"FAIL: createFromRules gif2 " + u_errorName(ec));
3830         goto FAIL;
3831     }
3832     logln("Registering");
3833     _TUFReg("Any-gif2", t, 3);
3834     t = Transliterator::createInstance("Any-gif2", UTRANS_FORWARD, ec);
3835     if (t == NULL || U_FAILURE(ec)) {
3836         errln((UnicodeString)"FAIL: createInstance Any-gif2 " + u_errorName(ec));
3837         goto FAIL;
3838     }
3839     expect(*t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">"
3840            "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">");
3841     delete t;
3842
3843     // Test that filters are allowed after &
3844     t = Transliterator::createFromRules("test",
3845                                         "(.) > &Hex($1) ' ' &RemoveCurly(&Name($1)) ' ';",
3846                                         UTRANS_FORWARD, pe, ec);
3847     if (t == NULL || U_FAILURE(ec)) {
3848         errln((UnicodeString)"FAIL: createFromRules test " + u_errorName(ec));
3849         goto FAIL;
3850     }
3851     expect(*t, "abc",
3852            UNICODE_STRING_SIMPLE("\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C "));
3853     delete t;
3854
3855  FAIL:
3856     for (i=0; i<4; ++i) {
3857         _TUFUnreg(i);
3858     }
3859 }
3860
3861 /**
3862  * Test the Any-X transliterators.
3863  */
3864 void TransliteratorTest::TestAnyX(void) {
3865     UParseError parseError;
3866     UErrorCode status = U_ZERO_ERROR;
3867     Transliterator* anyLatin =
3868         Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
3869     if (anyLatin==0) {
3870         dataerrln("FAIL: createInstance returned NULL - %s", u_errorName(status));
3871         delete anyLatin;
3872         return;
3873     }
3874
3875     expect(*anyLatin,
3876            CharsToUnicodeString("greek:\\u03B1\\u03B2\\u03BA\\u0391\\u0392\\u039A hiragana:\\u3042\\u3076\\u304F cyrillic:\\u0430\\u0431\\u0446"),
3877            CharsToUnicodeString("greek:abkABK hiragana:abuku cyrillic:abc"));
3878
3879     delete anyLatin;
3880
3881     status = U_ZERO_ERROR;
3882     Transliterator* anyASCII =
3883         Transliterator::createInstance("Any-Latin;Latin-ASCII", UTRANS_FORWARD, parseError, status);
3884     if (U_FAILURE(status) || anyASCII==0) {
3885         dataerrln("FAIL: createInstance returned NULL and/or set status %s", u_errorName(status));
3886         delete anyASCII;
3887         return;
3888     }
3889
3890     expect(*anyASCII,
3891            CharsToUnicodeString("ArabicDigits:\\u0660\\u0661\\u0664\\u0669 PersianDigits:\\u06F0\\u06F1\\u06F4\\u06F9"),
3892            CharsToUnicodeString("ArabicDigits:0149 PersianDigits:0149"));
3893
3894     delete anyASCII;
3895 }
3896
3897 /**
3898  * Test Any-X transliterators with sample letters from all scripts.
3899  */
3900 void TransliteratorTest::TestAny(void) {
3901     UErrorCode status = U_ZERO_ERROR;
3902     // Note: there is a lot of implict construction of UnicodeStrings from (char *) in
3903     //       function call parameters going on in this test.
3904     UnicodeSet alphabetic("[:alphabetic:]", status);
3905     if (U_FAILURE(status)) {
3906         dataerrln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3907         return;
3908     }
3909     alphabetic.freeze();
3910
3911     UnicodeString testString;
3912     for (int32_t i = 0; i < USCRIPT_CODE_LIMIT; i++) {
3913         const char *scriptName = uscript_getShortName((UScriptCode)i);
3914         if (scriptName == NULL) {
3915             errln("Failure: file %s, line %d: Script Code %d is invalid, ", __FILE__, __LINE__, i);
3916             return;
3917         }
3918
3919         UnicodeSet sample;
3920         sample.applyPropertyAlias("script", scriptName, status);
3921         if (U_FAILURE(status)) {
3922             errln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3923             return;
3924         }
3925         sample.retainAll(alphabetic);
3926         for (int32_t count=0; count<5; count++) {
3927             UChar32 c = sample.charAt(count);
3928             if (c == -1) {
3929                 break;
3930             }
3931             testString.append(c);
3932         }
3933     }
3934
3935     UParseError parseError;
3936     Transliterator* anyLatin =
3937         Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
3938     if (U_FAILURE(status)) {
3939         dataerrln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3940         return;
3941     }
3942
3943     logln(UnicodeString("Sample set for Any-Latin: ") + testString);
3944     anyLatin->transliterate(testString);
3945     logln(UnicodeString("Sample result for Any-Latin: ") + testString);
3946     delete anyLatin;
3947 }
3948
3949
3950 /**
3951  * Test the source and target set API.  These are only implemented
3952  * for RBT and CompoundTransliterator at this time.
3953  */
3954 void TransliteratorTest::TestSourceTargetSet() {
3955     UErrorCode ec = U_ZERO_ERROR;
3956
3957     // Rules
3958     const char* r =
3959         "a > b; "
3960         "r [x{lu}] > q;";
3961
3962     // Expected source
3963     UnicodeSet expSrc("[arx{lu}]", ec);
3964
3965     // Expected target
3966     UnicodeSet expTrg("[bq]", ec);
3967
3968     UParseError pe;
3969     Transliterator* t = Transliterator::createFromRules("test", r, UTRANS_FORWARD, pe, ec);
3970
3971     if (U_FAILURE(ec)) {
3972         delete t;
3973         errln("FAIL: Couldn't set up test");
3974         return;
3975     }
3976
3977     UnicodeSet src; t->getSourceSet(src);
3978     UnicodeSet trg; t->getTargetSet(trg);
3979
3980     if (src == expSrc && trg == expTrg) {
3981         UnicodeString a, b;
3982         logln((UnicodeString)"Ok: " +
3983               r + " => source = " + src.toPattern(a, TRUE) +
3984               ", target = " + trg.toPattern(b, TRUE));
3985     } else {
3986         UnicodeString a, b, c, d;
3987         errln((UnicodeString)"FAIL: " +
3988               r + " => source = " + src.toPattern(a, TRUE) +
3989               ", expected " + expSrc.toPattern(b, TRUE) +
3990               "; target = " + trg.toPattern(c, TRUE) +
3991               ", expected " + expTrg.toPattern(d, TRUE));
3992     }
3993
3994     delete t;
3995 }
3996
3997 /**
3998  * Test handling of Pattern_White_Space, for both RBT and UnicodeSet.
3999  */
4000 void TransliteratorTest::TestPatternWhiteSpace() {
4001     // Rules
4002     const char* r = "a > \\u200E b;";
4003
4004     UErrorCode ec = U_ZERO_ERROR;
4005     UParseError pe;
4006     Transliterator* t = Transliterator::createFromRules("test", CharsToUnicodeString(r), UTRANS_FORWARD, pe, ec);
4007
4008     if (U_FAILURE(ec)) {
4009         errln("FAIL: Couldn't set up test");
4010     } else {
4011         expect(*t, "a", "b");
4012     }
4013     delete t;
4014
4015     // UnicodeSet
4016     ec = U_ZERO_ERROR;
4017     UnicodeSet set(CharsToUnicodeString("[a \\u200E]"), ec);
4018
4019     if (U_FAILURE(ec)) {
4020         errln("FAIL: Couldn't set up test");
4021     } else {
4022         if (set.contains(0x200E)) {
4023             errln("FAIL: U+200E not being ignored by UnicodeSet");
4024         }
4025     }
4026 }
4027 //======================================================================
4028 // this method is in TestUScript.java
4029 //======================================================================
4030 void TransliteratorTest::TestAllCodepoints(){
4031     UScriptCode code= USCRIPT_INVALID_CODE;
4032     char id[256]={'\0'};
4033     char abbr[256]={'\0'};
4034     char newId[256]={'\0'};
4035     char newAbbrId[256]={'\0'};
4036     char oldId[256]={'\0'};
4037     char oldAbbrId[256]={'\0'};
4038
4039     UErrorCode status =U_ZERO_ERROR;
4040     UParseError pe;
4041
4042     for(uint32_t i = 0; i<=0x10ffff; i++){
4043         code =  uscript_getScript(i,&status);
4044         if(code == USCRIPT_INVALID_CODE){
4045             dataerrln("uscript_getScript for codepoint \\U%08X failed.", i);
4046         }
4047         const char* myId = uscript_getName(code);
4048         if(!myId) {
4049           dataerrln("Valid script code returned NULL name. Check your data!");
4050           return;
4051         }
4052         uprv_strcpy(id,myId);
4053         uprv_strcpy(abbr,uscript_getShortName(code));
4054
4055         uprv_strcpy(newId,"[:");
4056         uprv_strcat(newId,id);
4057         uprv_strcat(newId,":];NFD");
4058
4059         uprv_strcpy(newAbbrId,"[:");
4060         uprv_strcat(newAbbrId,abbr);
4061         uprv_strcat(newAbbrId,":];NFD");
4062
4063         if(uprv_strcmp(newId,oldId)!=0){
4064             Transliterator* t = Transliterator::createInstance(newId,UTRANS_FORWARD,pe,status);
4065             if(t==NULL || U_FAILURE(status)){
4066                 dataerrln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(status));
4067             }
4068             delete t;
4069         }
4070         if(uprv_strcmp(newAbbrId,oldAbbrId)!=0){
4071             Transliterator* t = Transliterator::createInstance(newAbbrId,UTRANS_FORWARD,pe,status);
4072             if(t==NULL || U_FAILURE(status)){
4073                 dataerrln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(status));
4074             }
4075             delete t;
4076         }
4077         uprv_strcpy(oldId,newId);
4078         uprv_strcpy(oldAbbrId, newAbbrId);
4079
4080     }
4081
4082 }
4083
4084 #define TEST_TRANSLIT_ID(id, cls) { \
4085   UErrorCode ec = U_ZERO_ERROR; \
4086   Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); \
4087   if (U_FAILURE(ec)) { \
4088     dataerrln("FAIL: Couldn't create %s - %s", id, u_errorName(ec)); \
4089   } else { \
4090     if (t->getDynamicClassID() != cls::getStaticClassID()) { \
4091       errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
4092     } \
4093     /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
4094   } \
4095   delete t; \
4096 }
4097
4098 #define TEST_TRANSLIT_RULE(rule, cls) { \
4099   UErrorCode ec = U_ZERO_ERROR; \
4100   UParseError pe; \
4101   Transliterator* t = Transliterator::createFromRules("_", rule, UTRANS_FORWARD, pe, ec); \
4102   if (U_FAILURE(ec)) { \
4103     errln("FAIL: Couldn't create " rule); \
4104   } else { \
4105     if (t->getDynamicClassID() != cls ::getStaticClassID()) { \
4106       errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
4107     } \
4108     /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
4109   } \
4110   delete t; \
4111 }
4112
4113 void TransliteratorTest::TestBoilerplate() {
4114     TEST_TRANSLIT_ID("Any-Latin", AnyTransliterator);
4115     TEST_TRANSLIT_ID("Any-Hex", EscapeTransliterator);
4116     TEST_TRANSLIT_ID("Hex-Any", UnescapeTransliterator);
4117     TEST_TRANSLIT_ID("Lower", LowercaseTransliterator);
4118     TEST_TRANSLIT_ID("Upper", UppercaseTransliterator);
4119     TEST_TRANSLIT_ID("Title", TitlecaseTransliterator);
4120     TEST_TRANSLIT_ID("Null", NullTransliterator);
4121     TEST_TRANSLIT_ID("Remove", RemoveTransliterator);
4122     TEST_TRANSLIT_ID("Any-Name", UnicodeNameTransliterator);
4123     TEST_TRANSLIT_ID("Name-Any", NameUnicodeTransliterator);
4124     TEST_TRANSLIT_ID("NFD", NormalizationTransliterator);
4125     TEST_TRANSLIT_ID("Latin-Greek", CompoundTransliterator);
4126     TEST_TRANSLIT_RULE("a>b;", RuleBasedTransliterator);
4127 }
4128
4129 void TransliteratorTest::TestAlternateSyntax() {
4130     // U+2206 == &
4131     // U+2190 == <
4132     // U+2192 == >
4133     // U+2194 == <>
4134     expect(CharsToUnicodeString("a \\u2192 x; b \\u2190 y; c \\u2194 z"),
4135            "abc",
4136            "xbz");
4137     expect(CharsToUnicodeString("([:^ASCII:]) \\u2192 \\u2206Name($1);"),
4138            CharsToUnicodeString("<=\\u2190; >=\\u2192; <>=\\u2194; &=\\u2206"),
4139            UNICODE_STRING_SIMPLE("<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}"));
4140 }
4141
4142 static const char* BEGIN_END_RULES[] = {
4143     // [0]
4144     "abc > xy;"
4145     "aba > z;",
4146
4147     // [1]
4148 /*
4149     "::BEGIN;"
4150     "abc > xy;"
4151     "::END;"
4152     "::BEGIN;"
4153     "aba > z;"
4154     "::END;",
4155 */
4156     "", // test case commented out below, this is here to keep from messing up the indexes
4157
4158     // [2]
4159 /*
4160     "abc > xy;"
4161     "::BEGIN;"
4162     "aba > z;"
4163     "::END;",
4164 */
4165     "", // test case commented out below, this is here to keep from messing up the indexes
4166
4167     // [3]
4168 /*
4169     "::BEGIN;"
4170     "abc > xy;"
4171     "::END;"
4172     "aba > z;",
4173 */
4174     "", // test case commented out below, this is here to keep from messing up the indexes
4175
4176     // [4]
4177     "abc > xy;"
4178     "::Null;"
4179     "aba > z;",
4180
4181     // [5]
4182     "::Upper;"
4183     "ABC > xy;"
4184     "AB > x;"
4185     "C > z;"
4186     "::Upper;"
4187     "XYZ > p;"
4188     "XY > q;"
4189     "Z > r;"
4190     "::Upper;",
4191
4192     // [6]
4193     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4194     "$delim = [\\-$ws];"
4195     "$ws $delim* > ' ';"
4196     "'-' $delim* > '-';",
4197
4198     // [7]
4199     "::Null;"
4200     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4201     "$delim = [\\-$ws];"
4202     "$ws $delim* > ' ';"
4203     "'-' $delim* > '-';",
4204
4205     // [8]
4206     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4207     "$delim = [\\-$ws];"
4208     "$ws $delim* > ' ';"
4209     "'-' $delim* > '-';"
4210     "::Null;",
4211
4212     // [9]
4213     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4214     "$delim = [\\-$ws];"
4215     "::Null;"
4216     "$ws $delim* > ' ';"
4217     "'-' $delim* > '-';",
4218
4219     // [10]
4220 /*
4221     "::BEGIN;"
4222     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4223     "$delim = [\\-$ws];"
4224     "::END;"
4225     "$ws $delim* > ' ';"
4226     "'-' $delim* > '-';",
4227 */
4228     "", // test case commented out below, this is here to keep from messing up the indexes
4229
4230     // [11]
4231 /*
4232     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4233     "$delim = [\\-$ws];"
4234     "::BEGIN;"
4235     "$ws $delim* > ' ';"
4236     "'-' $delim* > '-';"
4237     "::END;",
4238 */
4239     "", // test case commented out below, this is here to keep from messing up the indexes
4240
4241     // [12]
4242 /*
4243     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4244     "$delim = [\\-$ws];"
4245     "$ab = [ab];"
4246     "::BEGIN;"
4247     "$ws $delim* > ' ';"
4248     "'-' $delim* > '-';"
4249     "::END;"
4250     "::BEGIN;"
4251     "$ab { ' ' } $ab > '-';"
4252     "c { ' ' > ;"
4253     "::END;"
4254     "::BEGIN;"
4255     "'a-a' > a\\%|a;"
4256     "::END;",
4257 */
4258     "", // test case commented out below, this is here to keep from messing up the indexes
4259
4260     // [13]
4261     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4262     "$delim = [\\-$ws];"
4263     "$ab = [ab];"
4264     "::Null;"
4265     "$ws $delim* > ' ';"
4266     "'-' $delim* > '-';"
4267     "::Null;"
4268     "$ab { ' ' } $ab > '-';"
4269     "c { ' ' > ;"
4270     "::Null;"
4271     "'a-a' > a\\%|a;",
4272
4273     // [14]
4274 /*
4275     "::[abc];"
4276     "::BEGIN;"
4277     "abc > xy;"
4278     "::END;"
4279     "::BEGIN;"
4280     "aba > yz;"
4281     "::END;"
4282     "::Upper;",
4283 */
4284     "", // test case commented out below, this is here to keep from messing up the indexes
4285
4286     // [15]
4287     "::[abc];"
4288     "abc > xy;"
4289     "::Null;"
4290     "aba > yz;"
4291     "::Upper;",
4292
4293     // [16]
4294 /*
4295     "::[abc];"
4296     "::BEGIN;"
4297     "abc <> xy;"
4298     "::END;"
4299     "::BEGIN;"
4300     "aba <> yz;"
4301     "::END;"
4302     "::Upper(Lower);"
4303     "::([XYZ]);"
4304 */
4305     "", // test case commented out below, this is here to keep from messing up the indexes
4306
4307     // [17]
4308     "::[abc];"
4309     "abc <> xy;"
4310     "::Null;"
4311     "aba <> yz;"
4312     "::Upper(Lower);"
4313     "::([XYZ]);"
4314 };
4315
4316 /*
4317 (This entire test is commented out below and will need some heavy revision when we re-add
4318 the ::BEGIN/::END stuff)
4319 static const char* BOGUS_BEGIN_END_RULES[] = {
4320     // [7]
4321     "::BEGIN;"
4322     "abc > xy;"
4323     "::BEGIN;"
4324     "aba > z;"
4325     "::END;"
4326     "::END;",
4327
4328     // [8]
4329     "abc > xy;"
4330     " aba > z;"
4331     "::END;",
4332
4333     // [9]
4334     "::BEGIN;"
4335     "::Upper;"
4336     "::END;"
4337 };
4338 static const int32_t BOGUS_BEGIN_END_RULES_length = UPRV_LENGTHOF(BOGUS_BEGIN_END_RULES);
4339 */
4340
4341 static const char* BEGIN_END_TEST_CASES[] = {
4342     // rules             input                   expected output
4343     BEGIN_END_RULES[0],  "abc ababc aba",        "xy zbc z",
4344 //    BEGIN_END_RULES[1],  "abc ababc aba",        "xy abxy z",
4345 //    BEGIN_END_RULES[2],  "abc ababc aba",        "xy abxy z",
4346 //    BEGIN_END_RULES[3],  "abc ababc aba",        "xy abxy z",
4347     BEGIN_END_RULES[4],  "abc ababc aba",        "xy abxy z",
4348     BEGIN_END_RULES[5],  "abccabaacababcbc",     "PXAARXQBR",
4349
4350     BEGIN_END_RULES[6],  "e   e - e---e-  e",    "e e e-e-e",
4351     BEGIN_END_RULES[7],  "e   e - e---e-  e",    "e e e-e-e",
4352     BEGIN_END_RULES[8],  "e   e - e---e-  e",    "e e e-e-e",
4353     BEGIN_END_RULES[9],  "e   e - e---e-  e",    "e e e-e-e",
4354 //    BEGIN_END_RULES[10],  "e   e - e---e-  e",    "e e e-e-e",
4355 //    BEGIN_END_RULES[11], "e   e - e---e-  e",    "e e e-e-e",
4356 //    BEGIN_END_RULES[12], "e   e - e---e-  e",    "e e e-e-e",
4357 //    BEGIN_END_RULES[12], "a    a    a    a",     "a%a%a%a",
4358 //    BEGIN_END_RULES[12], "a a-b c b a",          "a%a-b cb-a",
4359     BEGIN_END_RULES[13], "e   e - e---e-  e",    "e e e-e-e",
4360     BEGIN_END_RULES[13], "a    a    a    a",     "a%a%a%a",
4361     BEGIN_END_RULES[13], "a a-b c b a",          "a%a-b cb-a",
4362
4363 //    BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4364     BEGIN_END_RULES[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4365 //    BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4366     BEGIN_END_RULES[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ"
4367 };
4368 static const int32_t BEGIN_END_TEST_CASES_length = UPRV_LENGTHOF(BEGIN_END_TEST_CASES);
4369
4370 void TransliteratorTest::TestBeginEnd() {
4371     // run through the list of test cases above
4372     int32_t i = 0;
4373     for (i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
4374         expect((UnicodeString)"Test case #" + (i / 3),
4375                UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
4376                UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
4377                UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
4378     }
4379
4380     // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing
4381     UParseError parseError;
4382     UErrorCode status = U_ZERO_ERROR;
4383     Transliterator* reversed  = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
4384             UTRANS_REVERSE, parseError, status);
4385     if (reversed == 0 || U_FAILURE(status)) {
4386         reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
4387     } else {
4388         expect(*reversed, UnicodeString("xy XY XYZ yz YZ"), UnicodeString("xy abc xaba yz aba"));
4389     }
4390     delete reversed;
4391
4392     // finally, run through the list of syntactically-ill-formed rule sets above and make sure
4393     // that all of them cause errors
4394 /*
4395 (commented out until we have the real ::BEGIN/::END stuff in place
4396     for (i = 0; i < BOGUS_BEGIN_END_RULES_length; i++) {
4397         UParseError parseError;
4398         UErrorCode status = U_ZERO_ERROR;
4399         Transliterator* t = Transliterator::createFromRules("foo", UnicodeString(BOGUS_BEGIN_END_RULES[i]),
4400                 UTRANS_FORWARD, parseError, status);
4401         if (!U_FAILURE(status)) {
4402             delete t;
4403             errln((UnicodeString)"Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]);
4404         }
4405     }
4406 */
4407 }
4408
4409 void TransliteratorTest::TestBeginEndToRules() {
4410     // run through the same list of test cases we used above, but this time, instead of just
4411     // instantiating a Transliterator from the rules and running the test against it, we instantiate
4412     // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from
4413     // the resulting set of rules, and make sure that the generated rule set is semantically equivalent
4414     // to (i.e., does the same thing as) the original rule set
4415     for (int32_t i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
4416         UParseError parseError;
4417         UErrorCode status = U_ZERO_ERROR;
4418         Transliterator* t = Transliterator::createFromRules("--", UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
4419                 UTRANS_FORWARD, parseError, status);
4420         if (U_FAILURE(status)) {
4421             reportParseError(UnicodeString("FAIL: Couldn't create transliterator"), parseError, status);
4422         } else {
4423             UnicodeString rules;
4424             t->toRules(rules, TRUE);
4425             Transliterator* t2 = Transliterator::createFromRules((UnicodeString)"Test case #" + (i / 3), rules,
4426                     UTRANS_FORWARD, parseError, status);
4427             if (U_FAILURE(status)) {
4428                 reportParseError(UnicodeString("FAIL: Couldn't create transliterator from generated rules"),
4429                         parseError, status);
4430                 delete t;
4431             } else {
4432                 expect(*t2,
4433                        UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
4434                        UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
4435                 delete t;
4436                 delete t2;
4437             }
4438         }
4439     }
4440
4441     // do the same thing for the reversible test case
4442     UParseError parseError;
4443     UErrorCode status = U_ZERO_ERROR;
4444     Transliterator* reversed = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
4445             UTRANS_REVERSE, parseError, status);
4446     if (U_FAILURE(status)) {
4447         reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
4448     } else {
4449         UnicodeString rules;
4450         reversed->toRules(rules, FALSE);
4451         Transliterator* reversed2 = Transliterator::createFromRules("Reversed", rules, UTRANS_FORWARD,
4452                 parseError, status);
4453         if (U_FAILURE(status)) {
4454             reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator from generated rules"),
4455                     parseError, status);
4456             delete reversed;
4457         } else {
4458             expect(*reversed2,
4459                    UnicodeString("xy XY XYZ yz YZ"),
4460                    UnicodeString("xy abc xaba yz aba"));
4461             delete reversed;
4462             delete reversed2;
4463         }
4464     }
4465 }
4466
4467 void TransliteratorTest::TestRegisterAlias() {
4468     UnicodeString longID("Lower;[aeiou]Upper");
4469     UnicodeString shortID("Any-CapVowels");
4470     UnicodeString reallyShortID("CapVowels");
4471
4472     Transliterator::registerAlias(shortID, longID);
4473
4474     UErrorCode err = U_ZERO_ERROR;
4475     Transliterator* t1 = Transliterator::createInstance(longID, UTRANS_FORWARD, err);
4476     if (U_FAILURE(err)) {
4477         errln("Failed to instantiate transliterator with long ID");
4478         Transliterator::unregister(shortID);
4479         return;
4480     }
4481     Transliterator* t2 = Transliterator::createInstance(reallyShortID, UTRANS_FORWARD, err);
4482     if (U_FAILURE(err)) {
4483         errln("Failed to instantiate transliterator with short ID");
4484         delete t1;
4485         Transliterator::unregister(shortID);
4486         return;
4487     }
4488
4489     if (t1->getID() != longID)
4490         errln("Transliterator instantiated with long ID doesn't have long ID");
4491     if (t2->getID() != reallyShortID)
4492         errln("Transliterator instantiated with short ID doesn't have short ID");
4493
4494     UnicodeString rules1;
4495     UnicodeString rules2;
4496
4497     t1->toRules(rules1, TRUE);
4498     t2->toRules(rules2, TRUE);
4499     if (rules1 != rules2)
4500         errln("Alias transliterators aren't the same");
4501
4502     delete t1;
4503     delete t2;
4504     Transliterator::unregister(shortID);
4505
4506     t1 = Transliterator::createInstance(shortID, UTRANS_FORWARD, err);
4507     if (U_SUCCESS(err)) {
4508         errln("Instantiation with short ID succeeded after short ID was unregistered");
4509         delete t1;
4510     }
4511
4512     // try the same thing again, but this time with something other than
4513     // an instance of CompoundTransliterator
4514     UnicodeString realID("Latin-Greek");
4515     UnicodeString fakeID("Latin-dlgkjdflkjdl");
4516     Transliterator::registerAlias(fakeID, realID);
4517
4518     err = U_ZERO_ERROR;
4519     t1 = Transliterator::createInstance(realID, UTRANS_FORWARD, err);
4520     if (U_FAILURE(err)) {
4521         dataerrln("Failed to instantiate transliterator with real ID - %s", u_errorName(err));
4522         Transliterator::unregister(realID);
4523         return;
4524     }
4525     t2 = Transliterator::createInstance(fakeID, UTRANS_FORWARD, err);
4526     if (U_FAILURE(err)) {
4527         errln("Failed to instantiate transliterator with fake ID");
4528         delete t1;
4529         Transliterator::unregister(realID);
4530         return;
4531     }
4532
4533     t1->toRules(rules1, TRUE);
4534     t2->toRules(rules2, TRUE);
4535     if (rules1 != rules2)
4536         errln("Alias transliterators aren't the same");
4537
4538     delete t1;
4539     delete t2;
4540     Transliterator::unregister(fakeID);
4541 }
4542
4543 void TransliteratorTest::TestRuleStripping() {
4544     /*
4545 #
4546 \uE001>\u0C01; # SIGN
4547     */
4548     static const UChar rule[] = {
4549         0x0023,0x0020,0x000D,0x000A,
4550         0xE001,0x003E,0x0C01,0x003B,0x0020,0x0023,0x0020,0x0053,0x0049,0x0047,0x004E,0
4551     };
4552     static const UChar expectedRule[] = {
4553         0xE001,0x003E,0x0C01,0x003B,0
4554     };
4555     UChar result[UPRV_LENGTHOF(rule)];
4556     UErrorCode status = U_ZERO_ERROR;
4557     int32_t len = utrans_stripRules(rule, UPRV_LENGTHOF(rule), result, &status);
4558     if (len != u_strlen(expectedRule)) {
4559         errln("utrans_stripRules return len = %d", len);
4560     }
4561     if (u_strncmp(expectedRule, result, len) != 0) {
4562         errln("utrans_stripRules did not return expected string");
4563     }
4564 }
4565
4566 /**
4567  * Test the Halfwidth-Fullwidth transliterator (ticket 6281).
4568  */
4569 void TransliteratorTest::TestHalfwidthFullwidth(void) {
4570     UParseError parseError;
4571     UErrorCode status = U_ZERO_ERROR;
4572     Transliterator* hf = Transliterator::createInstance("Halfwidth-Fullwidth", UTRANS_FORWARD, parseError, status);
4573     Transliterator* fh = Transliterator::createInstance("Fullwidth-Halfwidth", UTRANS_FORWARD, parseError, status);
4574     if (hf == 0 || fh == 0) {
4575         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
4576         delete hf;
4577         delete fh;
4578         return;
4579     }
4580
4581     // Array of 2n items
4582     // Each item is
4583     //   "hf"|"fh"|"both",
4584     //   <Halfwidth>,
4585     //   <Fullwidth>
4586     const char* DATA[] = {
4587         "both",
4588         "\\uFFE9\\uFFEA\\uFFEB\\uFFEC\\u0061\\uFF71\\u00AF\\u0020",
4589         "\\u2190\\u2191\\u2192\\u2193\\uFF41\\u30A2\\uFFE3\\u3000",
4590     };
4591     int32_t DATA_length = UPRV_LENGTHOF(DATA);
4592
4593     for (int32_t i=0; i<DATA_length; i+=3) {
4594         UnicodeString h = CharsToUnicodeString(DATA[i+1]);
4595         UnicodeString f = CharsToUnicodeString(DATA[i+2]);
4596         switch (*DATA[i]) {
4597         case 0x68: //'h': // Halfwidth-Fullwidth only
4598             expect(*hf, h, f);
4599             break;
4600         case 0x66: //'f': // Fullwidth-Halfwidth only
4601             expect(*fh, f, h);
4602             break;
4603         case 0x62: //'b': // both directions
4604             expect(*hf, h, f);
4605             expect(*fh, f, h);
4606             break;
4607         }
4608     }
4609     delete hf;
4610     delete fh;
4611 }
4612
4613
4614     /**
4615      *  Test Thai.  The text is the first paragraph of "What is Unicode" from the Unicode.org web site.
4616      *              TODO: confirm that the expected results are correct.
4617      *              For now, test just confirms that C++ and Java give identical results.
4618      */
4619 void TransliteratorTest::TestThai(void) {
4620 #if !UCONFIG_NO_BREAK_ITERATION
4621     UParseError parseError;
4622     UErrorCode status = U_ZERO_ERROR;
4623     Transliterator* tr = Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
4624     if (tr == 0) {
4625         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
4626         return;
4627     }
4628     if (U_FAILURE(status)) {
4629         errln("FAIL: createInstance failed with %s", u_errorName(status));
4630         return;
4631     }
4632     const char *thaiText =
4633         "\\u0e42\\u0e14\\u0e22\\u0e1e\\u0e37\\u0e49\\u0e19\\u0e10\\u0e32\\u0e19\\u0e41\\u0e25\\u0e49\\u0e27, \\u0e04\\u0e2d"
4634         "\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d\\u0e23\\u0e4c\\u0e08\\u0e30\\u0e40\\u0e01\\u0e35\\u0e48\\u0e22"
4635         "\\u0e27\\u0e02\\u0e49\\u0e2d\\u0e07\\u0e01\\u0e31\\u0e1a\\u0e40\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e02\\u0e2d"
4636         "\\u0e07\\u0e15\\u0e31\\u0e27\\u0e40\\u0e25\\u0e02. \\u0e04\\u0e2d\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d"
4637         "\\u0e23\\u0e4c\\u0e08\\u0e31\\u0e14\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29"
4638         "\\u0e23\\u0e41\\u0e25\\u0e30\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30\\u0e2d\\u0e37\\u0e48\\u0e19\\u0e46 \\u0e42"
4639         "\\u0e14\\u0e22\\u0e01\\u0e32\\u0e23\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25"
4640         "\\u0e02\\u0e43\\u0e2b\\u0e49\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e41\\u0e15\\u0e48\\u0e25\\u0e30\\u0e15"
4641         "\\u0e31\\u0e27. \\u0e01\\u0e48\\u0e2d\\u0e19\\u0e2b\\u0e19\\u0e49\\u0e32\\u0e17\\u0e35\\u0e48\\u0e4a Unicode \\u0e08"
4642         "\\u0e30\\u0e16\\u0e39\\u0e01\\u0e2a\\u0e23\\u0e49\\u0e32\\u0e07\\u0e02\\u0e36\\u0e49\\u0e19, \\u0e44\\u0e14\\u0e49"
4643         "\\u0e21\\u0e35\\u0e23\\u0e30\\u0e1a\\u0e1a encoding \\u0e2d\\u0e22\\u0e39\\u0e48\\u0e2b\\u0e25\\u0e32\\u0e22\\u0e23"
4644         "\\u0e49\\u0e2d\\u0e22\\u0e23\\u0e30\\u0e1a\\u0e1a\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e01\\u0e32\\u0e23"
4645         "\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25\\u0e02\\u0e40\\u0e2b\\u0e25\\u0e48"
4646         "\\u0e32\\u0e19\\u0e35\\u0e49. \\u0e44\\u0e21\\u0e48\\u0e21\\u0e35 encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48"
4647         "\\u0e21\\u0e35\\u0e08\\u0e33\\u0e19\\u0e27\\u0e19\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30"
4648         "\\u0e21\\u0e32\\u0e01\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d: \\u0e22\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d"
4649         "\\u0e22\\u0e48\\u0e32\\u0e07\\u0e40\\u0e0a\\u0e48\\u0e19, \\u0e40\\u0e09\\u0e1e\\u0e32\\u0e30\\u0e43\\u0e19\\u0e01"
4650         "\\u0e25\\u0e38\\u0e48\\u0e21\\u0e2a\\u0e2b\\u0e20\\u0e32\\u0e1e\\u0e22\\u0e38\\u0e42\\u0e23\\u0e1b\\u0e40\\u0e1e"
4651         "\\u0e35\\u0e22\\u0e07\\u0e41\\u0e2b\\u0e48\\u0e07\\u0e40\\u0e14\\u0e35\\u0e22\\u0e27 \\u0e01\\u0e47\\u0e15\\u0e49"
4652         "\\u0e2d\\u0e07\\u0e01\\u0e32\\u0e23\\u0e2b\\u0e25\\u0e32\\u0e22 encoding \\u0e43\\u0e19\\u0e01\\u0e32\\u0e23\\u0e04"
4653         "\\u0e23\\u0e2d\\u0e1a\\u0e04\\u0e25\\u0e38\\u0e21\\u0e17\\u0e38\\u0e01\\u0e20\\u0e32\\u0e29\\u0e32\\u0e43\\u0e19"
4654         "\\u0e01\\u0e25\\u0e38\\u0e48\\u0e21. \\u0e2b\\u0e23\\u0e37\\u0e2d\\u0e41\\u0e21\\u0e49\\u0e41\\u0e15\\u0e48\\u0e43"
4655         "\\u0e19\\u0e20\\u0e32\\u0e29\\u0e32\\u0e40\\u0e14\\u0e35\\u0e48\\u0e22\\u0e27 \\u0e40\\u0e0a\\u0e48\\u0e19 \\u0e20"
4656         "\\u0e32\\u0e29\\u0e32\\u0e2d\\u0e31\\u0e07\\u0e01\\u0e24\\u0e29 \\u0e01\\u0e47\\u0e44\\u0e21\\u0e48\\u0e21\\u0e35"
4657         " encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d\\u0e2a\\u0e33\\u0e2b"
4658         "\\u0e23\\u0e31\\u0e1a\\u0e17\\u0e38\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29\\u0e23, \\u0e40\\u0e04"
4659         "\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e27\\u0e23\\u0e23\\u0e04\\u0e15\\u0e2d\\u0e19"
4660         " \\u0e41\\u0e25\\u0e30\\u0e2a\\u0e31\\u0e0d\\u0e25\\u0e31\\u0e01\\u0e29\\u0e13\\u0e4c\\u0e17\\u0e32\\u0e07\\u0e40"
4661         "\\u0e17\\u0e04\\u0e19\\u0e34\\u0e04\\u0e17\\u0e35\\u0e48\\u0e43\\u0e0a\\u0e49\\u0e01\\u0e31\\u0e19\\u0e2d\\u0e22"
4662         "\\u0e39\\u0e48\\u0e17\\u0e31\\u0e48\\u0e27\\u0e44\\u0e1b.";
4663
4664     const char *latinText =
4665         "doy ph\\u1ee5\\u0304\\u0302n \\u1e6d\\u0304h\\u0101n l\\u00e6\\u0302w, khxmphiwtexr\\u0312 ca ke\\u012b\\u0300"
4666         "ywk\\u0304\\u0125xng k\\u1ea1b re\\u1ee5\\u0304\\u0300xng k\\u0304hxng t\\u1ea1wlek\\u0304h. khxmphiwtexr"
4667         "\\u0312 c\\u1ea1d k\\u0115b t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r l\\u00e6a x\\u1ea1kk\\u0304h ra x\\u1ee5\\u0304"
4668         "\\u0300n\\u00ab doy k\\u0101r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304\\u0131\\u0302 s\\u0304"
4669         "\\u1ea3h\\u0304r\\u1ea1b t\\u00e6\\u0300la t\\u1ea1w. k\\u0300xn h\\u0304n\\u0302\\u0101 th\\u012b\\u0300\\u0301"
4670         " Unicode ca t\\u0304h\\u016bk s\\u0304r\\u0302\\u0101ng k\\u0304h\\u1ee5\\u0302n, d\\u1ecb\\u0302 m\\u012b "
4671         "rabb encoding xy\\u016b\\u0300 h\\u0304l\\u0101y r\\u0302xy rabb s\\u0304\\u1ea3h\\u0304r\\u1ea1b k\\u0101"
4672         "r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304el\\u0300\\u0101 n\\u012b\\u0302. m\\u1ecb\\u0300m"
4673         "\\u012b encoding d\\u0131 th\\u012b\\u0300 m\\u012b c\\u1ea3nwn t\\u1ea1w x\\u1ea1kk\\u0304hra m\\u0101k p"
4674         "he\\u012byng phx: yk t\\u1ea1wx\\u1ef3\\u0101ng ch\\u00e8n, c\\u0304heph\\u0101a n\\u0131 kl\\u00f9m s\\u0304"
4675         "h\\u0304p\\u0323h\\u0101ph yurop phe\\u012byng h\\u0304\\u00e6\\u0300ng de\\u012byw k\\u0306 t\\u0302xngk\\u0101"
4676         "r h\\u0304l\\u0101y encoding n\\u0131 k\\u0101r khrxbkhlum thuk p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 n\\u0131"
4677         " kl\\u00f9m. h\\u0304r\\u1ee5\\u0304x m\\u00e6\\u0302t\\u00e6\\u0300 n\\u0131 p\\u0323h\\u0101s\\u0304\\u02b9"
4678         "\\u0101 de\\u012b\\u0300yw ch\\u00e8n p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 x\\u1ea1ngkvs\\u0304\\u02b9 k\\u0306"
4679         " m\\u1ecb\\u0300m\\u012b encoding d\\u0131 th\\u012b\\u0300 phe\\u012byng phx s\\u0304\\u1ea3h\\u0304r\\u1ea1"
4680         "b thuk t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r, kher\\u1ee5\\u0304\\u0300xngh\\u0304m\\u0101y wrrkh txn l\\u00e6"
4681         "a s\\u0304\\u1ea1\\u1ef5l\\u1ea1ks\\u0304\\u02b9\\u1e47\\u0312 th\\u0101ng thekhnikh th\\u012b\\u0300 ch\\u0131"
4682         "\\u0302 k\\u1ea1n xy\\u016b\\u0300 th\\u1ea1\\u0300wp\\u1ecb.";
4683
4684
4685     UnicodeString  xlitText(thaiText);
4686     xlitText = xlitText.unescape();
4687     tr->transliterate(xlitText);
4688
4689     UnicodeString expectedText(latinText);
4690     expectedText = expectedText.unescape();
4691     expect(*tr, xlitText, expectedText);
4692
4693     delete tr;
4694 #endif
4695 }
4696
4697
4698 //======================================================================
4699 // Support methods
4700 //======================================================================
4701 void TransliteratorTest::expectT(const UnicodeString& id,
4702                                  const UnicodeString& source,
4703                                  const UnicodeString& expectedResult) {
4704     UErrorCode ec = U_ZERO_ERROR;
4705     UParseError pe;
4706     Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
4707     if (U_FAILURE(ec)) {
4708         errln((UnicodeString)"FAIL: Could not create " + id + " -  " + u_errorName(ec));
4709         delete t;
4710         return;
4711     }
4712     expect(*t, source, expectedResult);
4713     delete t;
4714 }
4715
4716 void TransliteratorTest::reportParseError(const UnicodeString& message,
4717                                           const UParseError& parseError,
4718                                           const UErrorCode& status) {
4719     dataerrln(message +
4720           /*", parse error " + parseError.code +*/
4721           ", line " + parseError.line +
4722           ", offset " + parseError.offset +
4723           ", pre-context " + prettify(parseError.preContext, TRUE) +
4724           ", post-context " + prettify(parseError.postContext,TRUE) +
4725           ", Error: " + u_errorName(status));
4726 }
4727
4728 void TransliteratorTest::expect(const UnicodeString& rules,
4729                                 const UnicodeString& source,
4730                                 const UnicodeString& expectedResult,
4731                                 UTransPosition *pos) {
4732     expect("<ID>", rules, source, expectedResult, pos);
4733 }
4734
4735 void TransliteratorTest::expect(const UnicodeString& id,
4736                                 const UnicodeString& rules,
4737                                 const UnicodeString& source,
4738                                 const UnicodeString& expectedResult,
4739                                 UTransPosition *pos) {
4740     UErrorCode status = U_ZERO_ERROR;
4741     UParseError parseError;
4742     Transliterator* t = Transliterator::createFromRules(id, rules, UTRANS_FORWARD, parseError, status);
4743     if (U_FAILURE(status)) {
4744         reportParseError(UnicodeString("Couldn't create transliterator from ") + rules, parseError, status);
4745     } else {
4746         expect(*t, source, expectedResult, pos);
4747     }
4748     delete t;
4749 }
4750
4751 void TransliteratorTest::expect(const Transliterator& t,
4752                                 const UnicodeString& source,
4753                                 const UnicodeString& expectedResult,
4754                                 const Transliterator& reverseTransliterator) {
4755     expect(t, source, expectedResult);
4756     expect(reverseTransliterator, expectedResult, source);
4757 }
4758
4759 void TransliteratorTest::expect(const Transliterator& t,
4760                                 const UnicodeString& source,
4761                                 const UnicodeString& expectedResult,
4762                                 UTransPosition *pos) {
4763     if (pos == 0) {
4764         UnicodeString result(source);
4765         t.transliterate(result);
4766         expectAux(t.getID() + ":String", source, result, expectedResult);
4767     }
4768     UTransPosition index={0, 0, 0, 0};
4769     if (pos != 0) {
4770         index = *pos;
4771     }
4772
4773     UnicodeString rsource(source);
4774     if (pos == 0) {
4775         t.transliterate(rsource);
4776     } else {
4777         // Do it all at once -- below we do it incrementally
4778         t.finishTransliteration(rsource, *pos);
4779     }
4780     expectAux(t.getID() + ":Replaceable", source, rsource, expectedResult);
4781
4782     // Test keyboard (incremental) transliteration -- this result
4783     // must be the same after we finalize (see below).
4784     UnicodeString log;
4785     rsource.remove();
4786     if (pos != 0) {
4787         rsource = source;
4788         formatInput(log, rsource, index);
4789         log.append(" -> ");
4790         UErrorCode status = U_ZERO_ERROR;
4791         t.transliterate(rsource, index, status);
4792         formatInput(log, rsource, index);
4793     } else {
4794         for (int32_t i=0; i<source.length(); ++i) {
4795             if (i != 0) {
4796                 log.append(" + ");
4797             }
4798             log.append(source.charAt(i)).append(" -> ");
4799             UErrorCode status = U_ZERO_ERROR;
4800             t.transliterate(rsource, index, source.charAt(i), status);
4801             formatInput(log, rsource, index);
4802         }
4803     }
4804
4805     // As a final step in keyboard transliteration, we must call
4806     // transliterate to finish off any pending partial matches that
4807     // were waiting for more input.
4808     t.finishTransliteration(rsource, index);
4809     log.append(" => ").append(rsource);
4810
4811     expectAux(t.getID() + ":Keyboard", log,
4812               rsource == expectedResult,
4813               expectedResult);
4814 }
4815
4816
4817 /**
4818  * @param appendTo result is appended to this param.
4819  * @param input the string being transliterated
4820  * @param pos the index struct
4821  */
4822 UnicodeString& TransliteratorTest::formatInput(UnicodeString &appendTo,
4823                                                const UnicodeString& input,
4824                                                const UTransPosition& pos) {
4825     // Output a string of the form aaa{bbb|ccc|ddd}eee, where
4826     // the {} indicate the context start and limit, and the ||
4827     // indicate the start and limit.
4828     if (0 <= pos.contextStart &&
4829         pos.contextStart <= pos.start &&
4830         pos.start <= pos.limit &&
4831         pos.limit <= pos.contextLimit &&
4832         pos.contextLimit <= input.length()) {
4833
4834         UnicodeString a, b, c, d, e;
4835         input.extractBetween(0, pos.contextStart, a);
4836         input.extractBetween(pos.contextStart, pos.start, b);
4837         input.extractBetween(pos.start, pos.limit, c);
4838         input.extractBetween(pos.limit, pos.contextLimit, d);
4839         input.extractBetween(pos.contextLimit, input.length(), e);
4840         appendTo.append(a).append((UChar)123/*{*/).append(b).
4841             append((UChar)PIPE).append(c).append((UChar)PIPE).append(d).
4842             append((UChar)125/*}*/).append(e);
4843     } else {
4844         appendTo.append((UnicodeString)"INVALID UTransPosition {cs=" +
4845                         pos.contextStart + ", s=" + pos.start + ", l=" +
4846                         pos.limit + ", cl=" + pos.contextLimit + "} on " +
4847                         input);
4848     }
4849     return appendTo;
4850 }
4851
4852 void TransliteratorTest::expectAux(const UnicodeString& tag,
4853                                    const UnicodeString& source,
4854                                    const UnicodeString& result,
4855                                    const UnicodeString& expectedResult) {
4856     expectAux(tag, source + " -> " + result,
4857               result == expectedResult,
4858               expectedResult);
4859 }
4860
4861 void TransliteratorTest::expectAux(const UnicodeString& tag,
4862                                    const UnicodeString& summary, UBool pass,
4863                                    const UnicodeString& expectedResult) {
4864     if (pass) {
4865         logln(UnicodeString("(")+tag+") " + prettify(summary));
4866     } else {
4867         dataerrln(UnicodeString("FAIL: (")+tag+") "
4868               + prettify(summary)
4869               + ", expected " + prettify(expectedResult));
4870     }
4871 }
4872
4873 #endif /* #if !UCONFIG_NO_TRANSLITERATION */