icuSources/test/intltest/transtst.cpp

   1 // © 2016 and later: Unicode, Inc. and others.
   2 // License & terms of use: http://www.unicode.org/copyright.html
   3 /*
   4 **********************************************************************
   5 *   Copyright (C) 1999-2016, International Business Machines
   6 *   Corporation and others.  All Rights Reserved.
   7 **********************************************************************
   8 *   Date        Name        Description
   9 *   11/10/99    aliu        Creation.
  10 **********************************************************************
  11 */
  12
  13 #include "unicode/utypes.h"
  14
  15 #if !UCONFIG_NO_TRANSLITERATION
  16
  17 #include "transtst.h"
  18 #include "unicode/locid.h"
  19 #include "unicode/dtfmtsym.h"
  20 #include "unicode/normlzr.h"
  21 #include "unicode/translit.h"
  22 #include "unicode/uchar.h"
  23 #include "unicode/unifilt.h"
  24 #include "unicode/uniset.h"
  25 #include "unicode/ustring.h"
  26 #include "unicode/usetiter.h"
  27 #include "unicode/uscript.h"
  28 #include "unicode/utf16.h"
  29 #include "cpdtrans.h"
  30 #include "nultrans.h"
  31 #include "rbt.h"
  32 #include "rbt_pars.h"
  33 #include "anytrans.h"
  34 #include "esctrn.h"
  35 #include "name2uni.h"
  36 #include "nortrans.h"
  37 #include "remtrans.h"
  38 #include "titletrn.h"
  39 #include "tolowtrn.h"
  40 #include "toupptrn.h"
  41 #include "unesctrn.h"
  42 #include "uni2name.h"
  43 #include "cstring.h"
  44 #include "cmemory.h"
  45 #include <stdio.h>
  46
  47 /***********************************************************************
  48
  49                      HOW TO USE THIS TEST FILE
  50                                -or-
  51                   How I developed on two platforms
  52                 without losing (too much of) my mind
  53
  54
  55 1. Add new tests by copying/pasting/changing existing tests.  On Java,
  56    any public void method named Test...() taking no parameters becomes
  57    a test.  On C++, you need to modify the header and add a line to
  58    the runIndexedTest() dispatch method.
  59
  60 2. Make liberal use of the expect() method; it is your friend.
  61
  62 3. The tests in this file exactly match those in a sister file on the
  63    other side.  The two files are:
  64
  65    icu4j:  src/com/ibm/test/translit/TransliteratorTest.java
  66    icu4c:  source/test/intltest/transtst.cpp
  67
  68                   ==> THIS IS THE IMPORTANT PART <==
  69
  70    When you add a test in this file, add it in TransliteratorTest.java
  71    too.  Give it the same name and put it in the same relative place.
  72    This makes maintenance a lot simpler for any poor soul who ends up
  73    trying to synchronize the tests between icu4j and icu4c.
  74
  75 4. If you MUST enter a test that is NOT paralleled in the sister file,
  76    then add it in the special non-mirrored section.  These are
  77    labeled
  78
  79      "icu4j ONLY"
  80
  81    or
  82
  83      "icu4c ONLY"
  84
  85    Make sure you document the reason the test is here and not there.
  86
  87
  88 Thank you.
  89 The Management
  90 ***********************************************************************/
  91
  92 // Define character constants thusly to be EBCDIC-friendly
  93 enum {
  94     LEFT_BRACE=((UChar)0x007B), /*{*/
  95     PIPE      =((UChar)0x007C), /*|*/
  96     ZERO      =((UChar)0x0030), /*0*/
  97     UPPER_A   =((UChar)0x0041)  /*A*/
  98 };
  99
 100 TransliteratorTest::TransliteratorTest()
 101 :   DESERET_DEE((UChar32)0x10414),
 102     DESERET_dee((UChar32)0x1043C)
 103 {
 104 }
 105
 106 TransliteratorTest::~TransliteratorTest() {}
 107
 108 void
 109 TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
 110                                    const char* &name, char* /*par*/) {
 111     switch (index) {
 112         TESTCASE(0,TestInstantiation);
 113         TESTCASE(1,TestSimpleRules);
 114         TESTCASE(2,TestRuleBasedInverse);
 115         TESTCASE(3,TestKeyboard);
 116         TESTCASE(4,TestKeyboard2);
 117         TESTCASE(5,TestKeyboard3);
 118         TESTCASE(6,TestArabic);
 119         TESTCASE(7,TestCompoundKana);
 120         TESTCASE(8,TestCompoundHex);
 121         TESTCASE(9,TestFiltering);
 122         TESTCASE(10,TestInlineSet);
 123         TESTCASE(11,TestPatternQuoting);
 124         TESTCASE(12,TestJ277);
 125         TESTCASE(13,TestJ243);
 126         TESTCASE(14,TestJ329);
 127         TESTCASE(15,TestSegments);
 128         TESTCASE(16,TestCursorOffset);
 129         TESTCASE(17,TestArbitraryVariableValues);
 130         TESTCASE(18,TestPositionHandling);
 131         TESTCASE(19,TestHiraganaKatakana);
 132         TESTCASE(20,TestCopyJ476);
 133         TESTCASE(21,TestAnchors);
 134         TESTCASE(22,TestInterIndic);
 135         TESTCASE(23,TestFilterIDs);
 136         TESTCASE(24,TestCaseMap);
 137         TESTCASE(25,TestNameMap);
 138         TESTCASE(26,TestLiberalizedID);
 139         TESTCASE(27,TestCreateInstance);
 140         TESTCASE(28,TestNormalizationTransliterator);
 141         TESTCASE(29,TestCompoundRBT);
 142         TESTCASE(30,TestCompoundFilter);
 143         TESTCASE(31,TestRemove);
 144         TESTCASE(32,TestToRules);
 145         TESTCASE(33,TestContext);
 146         TESTCASE(34,TestSupplemental);
 147         TESTCASE(35,TestQuantifier);
 148         TESTCASE(36,TestSTV);
 149         TESTCASE(37,TestCompoundInverse);
 150         TESTCASE(38,TestNFDChainRBT);
 151         TESTCASE(39,TestNullInverse);
 152         TESTCASE(40,TestAliasInverseID);
 153         TESTCASE(41,TestCompoundInverseID);
 154         TESTCASE(42,TestUndefinedVariable);
 155         TESTCASE(43,TestEmptyContext);
 156         TESTCASE(44,TestCompoundFilterID);
 157         TESTCASE(45,TestPropertySet);
 158         TESTCASE(46,TestNewEngine);
 159         TESTCASE(47,TestQuantifiedSegment);
 160         TESTCASE(48,TestDevanagariLatinRT);
 161         TESTCASE(49,TestTeluguLatinRT);
 162         TESTCASE(50,TestCompoundLatinRT);
 163         TESTCASE(51,TestSanskritLatinRT);
 164         TESTCASE(52,TestLocaleInstantiation);
 165         TESTCASE(53,TestTitleAccents);
 166         TESTCASE(54,TestLocaleResource);
 167         TESTCASE(55,TestParseError);
 168         TESTCASE(56,TestOutputSet);
 169         TESTCASE(57,TestVariableRange);
 170         TESTCASE(58,TestInvalidPostContext);
 171         TESTCASE(59,TestIDForms);
 172         TESTCASE(60,TestToRulesMark);
 173         TESTCASE(61,TestEscape);
 174         TESTCASE(62,TestAnchorMasking);
 175         TESTCASE(63,TestDisplayName);
 176         TESTCASE(64,TestSpecialCases);
 177 #if !UCONFIG_NO_FILE_IO
 178         TESTCASE(65,TestIncrementalProgress);
 179 #endif
 180         TESTCASE(66,TestSurrogateCasing);
 181         TESTCASE(67,TestFunction);
 182         TESTCASE(68,TestInvalidBackRef);
 183         TESTCASE(69,TestMulticharStringSet);
 184         TESTCASE(70,TestUserFunction);
 185         TESTCASE(71,TestAnyX);
 186         TESTCASE(72,TestSourceTargetSet);
 187         TESTCASE(73,TestGurmukhiDevanagari);
 188         TESTCASE(74,TestPatternWhiteSpace);
 189         TESTCASE(75,TestAllCodepoints);
 190         TESTCASE(76,TestBoilerplate);
 191         TESTCASE(77,TestAlternateSyntax);
 192         TESTCASE(78,TestBeginEnd);
 193         TESTCASE(79,TestBeginEndToRules);
 194         TESTCASE(80,TestRegisterAlias);
 195         TESTCASE(81,TestRuleStripping);
 196         TESTCASE(82,TestHalfwidthFullwidth);
 197         TESTCASE(83,TestThai);
 198         TESTCASE(84,TestAny);
 199         default: name = ""; break;
 200     }
 201 }
 202
 203 /**
 204  * Make sure every system transliterator can be instantiated.
 205  *
 206  * ALSO test that the result of toRules() for each rule is a valid
 207  * rule.  Do this here so we don't have to have another test that
 208  * instantiates everything as well.
 209  */
 210 void TransliteratorTest::TestInstantiation() {
 211     UErrorCode ec = U_ZERO_ERROR;
 212     StringEnumeration* avail = Transliterator::getAvailableIDs(ec);
 213     assertSuccess("getAvailableIDs()", ec);
 214     assertTrue("getAvailableIDs()!=NULL", avail!=NULL);
 215     int32_t n = Transliterator::countAvailableIDs();
 216     assertTrue("getAvailableIDs().count()==countAvailableIDs()",
 217                avail->count(ec) == n);
 218     assertSuccess("count()", ec);
 219     UnicodeString name;
 220     for (int32_t i=0; i<n; ++i) {
 221         const UnicodeString& id = *avail->snext(ec);
 222         if (!assertSuccess("snext()", ec) ||
 223             !assertTrue("snext()!=NULL", (&id)!=NULL, TRUE)) {
 224             break;
 225         }
 226         UnicodeString id2 = Transliterator::getAvailableID(i);
 227         if (id.length() < 1) {
 228             errln(UnicodeString("FAIL: getAvailableID(") +
 229                   i + ") returned empty string");
 230             continue;
 231         }
 232         if (id != id2) {
 233             errln(UnicodeString("FAIL: getAvailableID(") +
 234                   i + ") != getAvailableIDs().snext()");
 235             continue;
 236         }
 237         UParseError parseError;
 238         UErrorCode status = U_ZERO_ERROR;
 239         Transliterator* t = Transliterator::createInstance(id,
 240                               UTRANS_FORWARD, parseError,status);
 241         name.truncate(0);
 242         Transliterator::getDisplayName(id, name);
 243         if (t == 0) {
 244 #if UCONFIG_NO_BREAK_ITERATION
 245             // If UCONFIG_NO_BREAK_ITERATION is on, then only Thai should fail.
 246             if (id.compare((UnicodeString)"Thai-Latn") != 0 &&
 247                 id.compare((UnicodeString)"Thai-Latin") != 0)
 248 #endif
 249                 dataerrln(UnicodeString("FAIL: Couldn't create ") + id +
 250                       /*", parse error " + parseError.code +*/
 251                       ", line " + parseError.line +
 252                       ", offset " + parseError.offset +
 253                       ", pre-context " + prettify(parseError.preContext, TRUE) +
 254                       ", post-context " +prettify(parseError.postContext,TRUE) +
 255                       ", Error: " + u_errorName(status));
 256                 // When createInstance fails, it deletes the failing
 257                 // entry from the available ID list.  We detect this
 258                 // here by looking for a change in countAvailableIDs.
 259             int32_t nn = Transliterator::countAvailableIDs();
 260             if (nn == (n - 1)) {
 261                 n = nn;
 262                 --i; // Compensate for deleted entry
 263             }
 264         } else {
 265             logln(UnicodeString("OK: ") + name + " (" + id + ")");
 266
 267             // Now test toRules
 268             UnicodeString rules;
 269             t->toRules(rules, TRUE);
 270             Transliterator *u = Transliterator::createFromRules("x",
 271                                     rules, UTRANS_FORWARD, parseError,status);
 272             if (u == 0) {
 273                 errln(UnicodeString("FAIL: ") + id +
 274                       ".createFromRules() => bad rules" +
 275                       /*", parse error " + parseError.code +*/
 276                       ", line " + parseError.line +
 277                       ", offset " + parseError.offset +
 278                       ", context " + prettify(parseError.preContext, TRUE) +
 279                       ", rules: " + prettify(rules, TRUE));
 280             } else {
 281                 delete u;
 282             }
 283             delete t;
 284         }
 285     }
 286     assertTrue("snext()==NULL", avail->snext(ec)==NULL);
 287     assertSuccess("snext()", ec);
 288     delete avail;
 289
 290     // Now test the failure path
 291     UParseError parseError;
 292     UErrorCode status = U_ZERO_ERROR;
 293     UnicodeString id("<Not a valid Transliterator ID>");
 294     Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
 295     if (t != 0) {
 296         errln("FAIL: " + id + " returned a transliterator");
 297         delete t;
 298     } else {
 299         logln("OK: Bogus ID handled properly");
 300     }
 301 }
 302
 303 void TransliteratorTest::TestSimpleRules(void) {
 304     /* Example: rules 1. ab>x|y
 305      *                2. yc>z
 306      *
 307      * []|eabcd  start - no match, copy e to tranlated buffer
 308      * [e]|abcd  match rule 1 - copy output & adjust cursor
 309      * [ex|y]cd  match rule 2 - copy output & adjust cursor
 310      * [exz]|d   no match, copy d to transliterated buffer
 311      * [exzd]|   done
 312      */
 313     expect(UnicodeString("ab>x|y;", "") +
 314            "yc>z",
 315            "eabcd", "exzd");
 316
 317     /* Another set of rules:
 318      *    1. ab>x|yzacw
 319      *    2. za>q
 320      *    3. qc>r
 321      *    4. cw>n
 322      *
 323      * []|ab       Rule 1
 324      * [x|yzacw]   No match
 325      * [xy|zacw]   Rule 2
 326      * [xyq|cw]    Rule 4
 327      * [xyqn]|     Done
 328      */
 329     expect(UnicodeString("ab>x|yzacw;") +
 330            "za>q;" +
 331            "qc>r;" +
 332            "cw>n",
 333            "ab", "xyqn");
 334
 335     /* Test categories
 336      */
 337     UErrorCode status = U_ZERO_ERROR;
 338     UParseError parseError;
 339     Transliterator *t = Transliterator::createFromRules(
 340         "<ID>",
 341         UnicodeString("$dummy=").append((UChar)0xE100) +
 342         UnicodeString(";"
 343                       "$vowel=[aeiouAEIOU];"
 344                       "$lu=[:Lu:];"
 345                       "$vowel } $lu > '!';"
 346                       "$vowel > '&';"
 347                       "'!' { $lu > '^';"
 348                       "$lu > '*';"
 349                       "a > ERROR", ""),
 350         UTRANS_FORWARD, parseError,
 351         status);
 352     if (U_FAILURE(status)) {
 353         dataerrln("FAIL: RBT constructor failed - %s", u_errorName(status));
 354         return;
 355     }
 356     expect(*t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
 357     delete t;
 358 }
 359
 360 /**
 361  * Test inline set syntax and set variable syntax.
 362  */
 363 void TransliteratorTest::TestInlineSet(void) {
 364     expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz");
 365     expect("a[0-9]b > qrs", "1a7b9", "1qrs9");
 366
 367     expect(UnicodeString(
 368            "$digit = [0-9];"
 369            "$alpha = [a-zA-Z];"
 370            "$alphanumeric = [$digit $alpha];" // ***
 371            "$special = [^$alphanumeric];"     // ***
 372            "$alphanumeric > '-';"
 373            "$special > '*';", ""),
 374
 375            "thx-1138", "---*----");
 376 }
 377
 378 /**
 379  * Create some inverses and confirm that they work.  We have to be
 380  * careful how we do this, since the inverses will not be true
 381  * inverses -- we can't throw any random string at the composition
 382  * of the transliterators and expect the identity function.  F x
 383  * F' != I.  However, if we are careful about the input, we will
 384  * get the expected results.
 385  */
 386 void TransliteratorTest::TestRuleBasedInverse(void) {
 387     UnicodeString RULES =
 388         UnicodeString("abc>zyx;") +
 389         "ab>yz;" +
 390         "bc>zx;" +
 391         "ca>xy;" +
 392         "a>x;" +
 393         "b>y;" +
 394         "c>z;" +
 395
 396         "abc<zyx;" +
 397         "ab<yz;" +
 398         "bc<zx;" +
 399         "ca<xy;" +
 400         "a<x;" +
 401         "b<y;" +
 402         "c<z;" +
 403
 404         "";
 405
 406     const char* DATA[] = {
 407         // Careful here -- random strings will not work.  If we keep
 408         // the left side to the domain and the right side to the range
 409         // we will be okay though (left, abc; right xyz).
 410         "a", "x",
 411         "abcacab", "zyxxxyy",
 412         "caccb", "xyzzy",
 413     };
 414
 415     int32_t DATA_length = UPRV_LENGTHOF(DATA);
 416
 417     UErrorCode status = U_ZERO_ERROR;
 418     UParseError parseError;
 419     Transliterator *fwd = Transliterator::createFromRules("<ID>", RULES,
 420                                 UTRANS_FORWARD, parseError, status);
 421     Transliterator *rev = Transliterator::createFromRules("<ID>", RULES,
 422                                 UTRANS_REVERSE, parseError, status);
 423     if (U_FAILURE(status)) {
 424         errln("FAIL: RBT constructor failed");
 425         return;
 426     }
 427     for (int32_t i=0; i<DATA_length; i+=2) {
 428         expect(*fwd, DATA[i], DATA[i+1]);
 429         expect(*rev, DATA[i+1], DATA[i]);
 430     }
 431     delete fwd;
 432     delete rev;
 433 }
 434
 435 /**
 436  * Basic test of keyboard.
 437  */
 438 void TransliteratorTest::TestKeyboard(void) {
 439     UParseError parseError;
 440     UErrorCode status = U_ZERO_ERROR;
 441     Transliterator *t = Transliterator::createFromRules("<ID>",
 442                               UnicodeString("psch>Y;")
 443                               +"ps>y;"
 444                               +"ch>x;"
 445                               +"a>A;",
 446                               UTRANS_FORWARD, parseError,
 447                               status);
 448     if (U_FAILURE(status)) {
 449         errln("FAIL: RBT constructor failed");
 450         return;
 451     }
 452     const char* DATA[] = {
 453         // insertion, buffer
 454         "a", "A",
 455         "p", "Ap",
 456         "s", "Aps",
 457         "c", "Apsc",
 458         "a", "AycA",
 459         "psch", "AycAY",
 460         0, "AycAY", // null means finishKeyboardTransliteration
 461     };
 462
 463     keyboardAux(*t, DATA, UPRV_LENGTHOF(DATA));
 464     delete t;
 465 }
 466
 467 /**
 468  * Basic test of keyboard with cursor.
 469  */
 470 void TransliteratorTest::TestKeyboard2(void) {
 471     UParseError parseError;
 472     UErrorCode status = U_ZERO_ERROR;
 473     Transliterator *t = Transliterator::createFromRules("<ID>",
 474                               UnicodeString("ych>Y;")
 475                               +"ps>|y;"
 476                               +"ch>x;"
 477                               +"a>A;",
 478                               UTRANS_FORWARD, parseError,
 479                               status);
 480     if (U_FAILURE(status)) {
 481         errln("FAIL: RBT constructor failed");
 482         return;
 483     }
 484     const char* DATA[] = {
 485         // insertion, buffer
 486         "a", "A",
 487         "p", "Ap",
 488         "s", "Aps", // modified for rollback - "Ay",
 489         "c", "Apsc", // modified for rollback - "Ayc",
 490         "a", "AycA",
 491         "p", "AycAp",
 492         "s", "AycAps", // modified for rollback - "AycAy",
 493         "c", "AycApsc", // modified for rollback - "AycAyc",
 494         "h", "AycAY",
 495         0, "AycAY", // null means finishKeyboardTransliteration
 496     };
 497
 498     keyboardAux(*t, DATA, UPRV_LENGTHOF(DATA));
 499     delete t;
 500 }
 501
 502 /**
 503  * Test keyboard transliteration with back-replacement.
 504  */
 505 void TransliteratorTest::TestKeyboard3(void) {
 506     // We want th>z but t>y.  Furthermore, during keyboard
 507     // transliteration we want t>y then yh>z if t, then h are
 508     // typed.
 509     UnicodeString RULES("t>|y;"
 510                         "yh>z;");
 511
 512     const char* DATA[] = {
 513         // Column 1: characters to add to buffer (as if typed)
 514         // Column 2: expected appearance of buffer after
 515         //           keyboard xliteration.
 516         "a", "a",
 517         "b", "ab",
 518         "t", "abt", // modified for rollback - "aby",
 519         "c", "abyc",
 520         "t", "abyct", // modified for rollback - "abycy",
 521         "h", "abycz",
 522         0, "abycz", // null means finishKeyboardTransliteration
 523     };
 524
 525     UParseError parseError;
 526     UErrorCode status = U_ZERO_ERROR;
 527     Transliterator *t = Transliterator::createFromRules("<ID>", RULES, UTRANS_FORWARD, parseError, status);
 528     if (U_FAILURE(status)) {
 529         errln("FAIL: RBT constructor failed");
 530         return;
 531     }
 532     keyboardAux(*t, DATA, UPRV_LENGTHOF(DATA));
 533     delete t;
 534 }
 535
 536 void TransliteratorTest::keyboardAux(const Transliterator& t,
 537                                      const char* DATA[], int32_t DATA_length) {
 538     UErrorCode status = U_ZERO_ERROR;
 539     UTransPosition index={0, 0, 0, 0};
 540     UnicodeString s;
 541     for (int32_t i=0; i<DATA_length; i+=2) {
 542         UnicodeString log;
 543         if (DATA[i] != 0) {
 544             log = s + " + "
 545                 + DATA[i]
 546                 + " -> ";
 547             t.transliterate(s, index, DATA[i], status);
 548         } else {
 549             log = s + " => ";
 550             t.finishTransliteration(s, index);
 551         }
 552         // Show the start index '{' and the cursor '|'
 553         UnicodeString a, b, c;
 554         s.extractBetween(0, index.contextStart, a);
 555         s.extractBetween(index.contextStart, index.start, b);
 556         s.extractBetween(index.start, s.length(), c);
 557         log.append(a).
 558             append((UChar)LEFT_BRACE).
 559             append(b).
 560             append((UChar)PIPE).
 561             append(c);
 562         if (s == DATA[i+1] && U_SUCCESS(status)) {
 563             logln(log);
 564         } else {
 565             errln(UnicodeString("FAIL: ") + log + ", expected " + DATA[i+1]);
 566         }
 567     }
 568 }
 569
 570 void TransliteratorTest::TestArabic(void) {
 571 // Test disabled for 2.0 until new Arabic transliterator can be written.
 572 //    /*
 573 //    const char* DATA[] = {
 574 //        "Arabic", "\u062a\u062a\u0645\u062a\u0639\u0020"+
 575 //                  "\u0627\u0644\u0644\u063a\u0629\u0020"+
 576 //                  "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629\u0020"+
 577 //                  "\u0628\u0628\u0646\u0638\u0645\u0020"+
 578 //                  "\u0643\u062a\u0627\u0628\u0628\u064a\u0629\u0020"+
 579 //                  "\u062c\u0645\u064a\u0644\u0629",
 580 //    };
 581 //    */
 582 //
 583 //    UChar ar_raw[] = {
 584 //        0x062a, 0x062a, 0x0645, 0x062a, 0x0639, 0x0020, 0x0627,
 585 //        0x0644, 0x0644, 0x063a, 0x0629, 0x0020, 0x0627, 0x0644,
 586 //        0x0639, 0x0631, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
 587 //        0x0628, 0x0628, 0x0646, 0x0638, 0x0645, 0x0020, 0x0643,
 588 //        0x062a, 0x0627, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
 589 //        0x062c, 0x0645, 0x064a, 0x0644, 0x0629, 0
 590 //    };
 591 //    UnicodeString ar(ar_raw);
 592 //    UErrorCode status=U_ZERO_ERROR;
 593 //    UParseError parseError;
 594 //    Transliterator *t = Transliterator::createInstance("Latin-Arabic", UTRANS_FORWARD, parseError, status);
 595 //    if (t == 0) {
 596 //        errln("FAIL: createInstance failed");
 597 //        return;
 598 //    }
 599 //    expect(*t, "Arabic", ar);
 600 //    delete t;
 601 }
 602
 603 /**
 604  * Compose the Kana transliterator forward and reverse and try
 605  * some strings that should come out unchanged.
 606  */
 607 void TransliteratorTest::TestCompoundKana(void) {
 608     UParseError parseError;
 609     UErrorCode status = U_ZERO_ERROR;
 610     Transliterator* t = Transliterator::createInstance("Latin-Hiragana;Hiragana-Latin", UTRANS_FORWARD, parseError, status);
 611     if (t == 0) {
 612         dataerrln("FAIL: construction of Latin-Hiragana;Hiragana-Latin failed - %s", u_errorName(status));
 613     } else {
 614         expect(*t, "aaaaa", "aaaaa");
 615         delete t;
 616     }
 617 }
 618
 619 /**
 620  * Compose the hex transliterators forward and reverse.
 621  */
 622 void TransliteratorTest::TestCompoundHex(void) {
 623     UParseError parseError;
 624     UErrorCode status = U_ZERO_ERROR;
 625     Transliterator* a = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
 626     Transliterator* b = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, parseError, status);
 627     Transliterator* transab[] = { a, b };
 628     Transliterator* transba[] = { b, a };
 629     if (a == 0 || b == 0) {
 630         errln("FAIL: construction failed");
 631         delete a;
 632         delete b;
 633         return;
 634     }
 635     // Do some basic tests of a
 636     expect(*a, "01", UnicodeString("\\u0030\\u0031", ""));
 637     // Do some basic tests of b
 638     expect(*b, UnicodeString("\\u0030\\u0031", ""), "01");
 639
 640     Transliterator* ab = new CompoundTransliterator(transab, 2);
 641     UnicodeString s("abcde", "");
 642     expect(*ab, s, s);
 643
 644     UnicodeString str(s);
 645     a->transliterate(str);
 646     Transliterator* ba = new CompoundTransliterator(transba, 2);
 647     expect(*ba, str, str);
 648
 649     delete ab;
 650     delete ba;
 651     delete a;
 652     delete b;
 653 }
 654
 655 int gTestFilterClassID = 0;
 656 /**
 657  * Used by TestFiltering().
 658  */
 659 class TestFilter : public UnicodeFilter {
 660     virtual UnicodeFunctor* clone() const {
 661         return new TestFilter(*this);
 662     }
 663     virtual UBool contains(UChar32 c) const {
 664         return c != (UChar)0x0063 /*c*/;
 665     }
 666     // Stubs
 667     virtual UnicodeString& toPattern(UnicodeString& result,
 668                                      UBool /*escapeUnprintable*/) const {
 669         return result;
 670     }
 671     virtual UBool matchesIndexValue(uint8_t /*v*/) const {
 672         return FALSE;
 673     }
 674     virtual void addMatchSetTo(UnicodeSet& /*toUnionTo*/) const {}
 675 public:
 676     UClassID getDynamicClassID() const { return (UClassID)&gTestFilterClassID; }
 677 };
 678
 679 /**
 680  * Do some basic tests of filtering.
 681  */
 682 void TransliteratorTest::TestFiltering(void) {
 683     UParseError parseError;
 684     UErrorCode status = U_ZERO_ERROR;
 685     Transliterator* hex = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
 686     if (hex == 0) {
 687         errln("FAIL: createInstance(Any-Hex) failed");
 688         return;
 689     }
 690     hex->adoptFilter(new TestFilter());
 691     UnicodeString s("abcde");
 692     hex->transliterate(s);
 693     UnicodeString exp("\\u0061\\u0062c\\u0064\\u0065", "");
 694     if (s == exp) {
 695         logln(UnicodeString("Ok:   \"") + exp + "\"");
 696     } else {
 697         logln(UnicodeString("FAIL: \"") + s + "\", wanted \"" + exp + "\"");
 698     }
 699
 700     // ICU4C ONLY. Do not find Transliterator.orphanFilter() in ICU4J.
 701     UnicodeFilter *f = hex->orphanFilter();
 702     if (f == NULL){
 703         errln("FAIL: orphanFilter() should get a UnicodeFilter");
 704     } else {
 705         delete f;
 706     }
 707     delete hex;
 708 }
 709
 710 /**
 711  * Test anchors
 712  */
 713 void TransliteratorTest::TestAnchors(void) {
 714     expect(UnicodeString("^a  > 0; a$ > 2 ; a > 1;", ""),
 715            "aaa",
 716            "012");
 717     expect(UnicodeString("$s=[z$]; $s{a>0; a}$s>2; a>1;", ""),
 718            "aaa",
 719            "012");
 720     expect(UnicodeString("^ab  > 01 ;"
 721            " ab  > |8 ;"
 722            "  b  > k ;"
 723            " 8x$ > 45 ;"
 724            " 8x  > 77 ;", ""),
 725
 726            "ababbabxabx",
 727            "018k7745");
 728     expect(UnicodeString("$s = [z$] ;"
 729            "$s{ab    > 01 ;"
 730            "   ab    > |8 ;"
 731            "    b    > k ;"
 732            "   8x}$s > 45 ;"
 733            "   8x    > 77 ;", ""),
 734
 735            "abzababbabxzabxabx",
 736            "01z018k45z01x45");
 737 }
 738
 739 /**
 740  * Test pattern quoting and escape mechanisms.
 741  */
 742 void TransliteratorTest::TestPatternQuoting(void) {
 743     // Array of 3n items
 744     // Each item is <rules>, <input>, <expected output>
 745     const UnicodeString DATA[] = {
 746         UnicodeString(UChar(0x4E01)) + ">'[male adult]'",
 747         UnicodeString(UChar(0x4E01)),
 748         "[male adult]"
 749     };
 750
 751     for (int32_t i=0; i<3; i+=3) {
 752         logln(UnicodeString("Pattern: ") + prettify(DATA[i]));
 753         UParseError parseError;
 754         UErrorCode status = U_ZERO_ERROR;
 755         Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
 756         if (U_FAILURE(status)) {
 757             errln("RBT constructor failed");
 758         } else {
 759             expect(*t, DATA[i+1], DATA[i+2]);
 760         }
 761         delete t;
 762     }
 763 }
 764
 765 /**
 766  * Regression test for bugs found in Greek transliteration.
 767  */
 768 void TransliteratorTest::TestJ277(void) {
 769     UErrorCode status = U_ZERO_ERROR;
 770     UParseError parseError;
 771     Transliterator *gl = Transliterator::createInstance("Greek-Latin; NFD; [:M:]Remove; NFC", UTRANS_FORWARD, parseError, status);
 772     if (gl == NULL) {
 773         dataerrln("FAIL: createInstance(Greek-Latin) returned NULL - %s", u_errorName(status));
 774         return;
 775     }
 776
 777     UChar sigma = 0x3C3;
 778     UChar upsilon = 0x3C5;
 779     UChar nu = 0x3BD;
 780 //    UChar PHI = 0x3A6;
 781     UChar alpha = 0x3B1;
 782 //    UChar omega = 0x3C9;
 783 //    UChar omicron = 0x3BF;
 784 //    UChar epsilon = 0x3B5;
 785
 786     // sigma upsilon nu -> syn
 787     UnicodeString syn;
 788     syn.append(sigma).append(upsilon).append(nu);
 789     expect(*gl, syn, "syn");
 790
 791     // sigma alpha upsilon nu -> saun
 792     UnicodeString sayn;
 793     sayn.append(sigma).append(alpha).append(upsilon).append(nu);
 794     expect(*gl, sayn, "saun");
 795
 796     // Again, using a smaller rule set
 797     UnicodeString rules(
 798                 "$alpha   = \\u03B1;"
 799                 "$nu      = \\u03BD;"
 800                 "$sigma   = \\u03C3;"
 801                 "$ypsilon = \\u03C5;"
 802                 "$vowel   = [aeiouAEIOU$alpha$ypsilon];"
 803                 "s <>           $sigma;"
 804                 "a <>           $alpha;"
 805                 "u <>  $vowel { $ypsilon;"
 806                 "y <>           $ypsilon;"
 807                 "n <>           $nu;",
 808                 "");
 809     Transliterator *mini = Transliterator::createFromRules("mini", rules, UTRANS_REVERSE, parseError, status);
 810     if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
 811     expect(*mini, syn, "syn");
 812     expect(*mini, sayn, "saun");
 813     delete mini;
 814     mini = NULL;
 815
 816 #if !UCONFIG_NO_FORMATTING
 817     // Transliterate the Greek locale data
 818     Locale el("el");
 819     DateFormatSymbols syms(el, status);
 820     if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
 821     int32_t i, count;
 822     const UnicodeString* data = syms.getMonths(count);
 823     for (i=0; i<count; ++i) {
 824         if (data[i].length() == 0) {
 825             continue;
 826         }
 827         UnicodeString out(data[i]);
 828         gl->transliterate(out);
 829         UBool ok = TRUE;
 830         if (data[i].length() >= 2 && out.length() >= 2 &&
 831             u_isupper(data[i].charAt(0)) && u_islower(data[i].charAt(1))) {
 832             if (!(u_isupper(out.charAt(0)) && u_islower(out.charAt(1)))) {
 833                 ok = FALSE;
 834             }
 835         }
 836         if (ok) {
 837             logln(prettify(data[i] + " -> " + out));
 838         } else {
 839             errln(UnicodeString("FAIL: ") + prettify(data[i] + " -> " + out));
 840         }
 841     }
 842 #endif
 843
 844     delete gl;
 845 }
 846
 847 /**
 848  * Prefix, suffix support in hex transliterators
 849  */
 850 void TransliteratorTest::TestJ243(void) {
 851     UErrorCode ec = U_ZERO_ERROR;
 852
 853     // Test default Hex-Any, which should handle
 854     // \u, \U, u+, and U+
 855     Transliterator *hex =
 856         Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, ec);
 857     if (assertSuccess("getInstance", ec)) {
 858         expect(*hex, UnicodeString("\\u0041+\\U00000042,U+0043uU+0044z", ""), "A+B,CuDz");
 859     }
 860     delete hex;
 861
 862 //    // Try a custom Hex-Unicode
 863 //    // \uXXXX and &#xXXXX;
 864 //    ec = U_ZERO_ERROR;
 865 //    HexToUnicodeTransliterator hex2(UnicodeString("\\\\u###0;&\\#x###0\\;", ""), ec);
 866 //    expect(hex2, UnicodeString("\\u61\\u062\\u0063\\u00645\\u66x&#x30;&#x031;&#x0032;&#x00033;", ""),
 867 //           "abcd5fx012&#x00033;");
 868 //    // Try custom Any-Hex (default is tested elsewhere)
 869 //    ec = U_ZERO_ERROR;
 870 //    UnicodeToHexTransliterator hex3(UnicodeString("&\\#x###0;", ""), ec);
 871 //    expect(hex3, "012", "&#x30;&#x31;&#x32;");
 872 }
 873
 874 /**
 875  * Parsers need better syntax error messages.
 876  */
 877 void TransliteratorTest::TestJ329(void) {
 878
 879     struct { UBool containsErrors; const char* rule; } DATA[] = {
 880         { FALSE, "a > b; c > d" },
 881         { TRUE,  "a > b; no operator; c > d" },
 882     };
 883     int32_t DATA_length = UPRV_LENGTHOF(DATA);
 884
 885     for (int32_t i=0; i<DATA_length; ++i) {
 886         UErrorCode status = U_ZERO_ERROR;
 887         UParseError parseError;
 888         Transliterator *rbt = Transliterator::createFromRules("<ID>",
 889                                     DATA[i].rule,
 890                                     UTRANS_FORWARD,
 891                                     parseError,
 892                                     status);
 893         UBool gotError = U_FAILURE(status);
 894         UnicodeString desc(DATA[i].rule);
 895         desc.append(gotError ? " -> error" : " -> no error");
 896         if (gotError) {
 897             desc = desc + ", ParseError code=" + u_errorName(status) +
 898                 " line=" + parseError.line +
 899                 " offset=" + parseError.offset +
 900                 " context=" + parseError.preContext;
 901         }
 902         if (gotError == DATA[i].containsErrors) {
 903             logln(UnicodeString("Ok:   ") + desc);
 904         } else {
 905             errln(UnicodeString("FAIL: ") + desc);
 906         }
 907         delete rbt;
 908     }
 909 }
 910
 911 /**
 912  * Test segments and segment references.
 913  */
 914 void TransliteratorTest::TestSegments(void) {
 915     // Array of 3n items
 916     // Each item is <rules>, <input>, <expected output>
 917     UnicodeString DATA[] = {
 918         "([a-z]) '.' ([0-9]) > $2 '-' $1",
 919         "abc.123.xyz.456",
 920         "ab1-c23.xy4-z56",
 921
 922         // nested
 923         "(([a-z])([0-9])) > $1 '.' $2 '.' $3;",
 924         "a1 b2",
 925         "a1.a.1 b2.b.2",
 926     };
 927     int32_t DATA_length = UPRV_LENGTHOF(DATA);
 928
 929     for (int32_t i=0; i<DATA_length; i+=3) {
 930         logln("Pattern: " + prettify(DATA[i]));
 931         UParseError parseError;
 932         UErrorCode status = U_ZERO_ERROR;
 933         Transliterator *t = Transliterator::createFromRules("ID", DATA[i], UTRANS_FORWARD, parseError, status);
 934         if (U_FAILURE(status)) {
 935             errln("FAIL: RBT constructor");
 936         } else {
 937             expect(*t, DATA[i+1], DATA[i+2]);
 938         }
 939         delete t;
 940     }
 941 }
 942
 943 /**
 944  * Test cursor positioning outside of the key
 945  */
 946 void TransliteratorTest::TestCursorOffset(void) {
 947     // Array of 3n items
 948     // Each item is <rules>, <input>, <expected output>
 949     UnicodeString DATA[] = {
 950         "pre {alpha} post > | @ ALPHA ;"
 951         "eALPHA > beta ;"
 952         "pre {beta} post > BETA @@ | ;"
 953         "post > xyz",
 954
 955         "prealphapost prebetapost",
 956
 957         "prbetaxyz preBETApost",
 958     };
 959     int32_t DATA_length = UPRV_LENGTHOF(DATA);
 960
 961     for (int32_t i=0; i<DATA_length; i+=3) {
 962         logln("Pattern: " + prettify(DATA[i]));
 963         UParseError parseError;
 964         UErrorCode status = U_ZERO_ERROR;
 965         Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
 966         if (U_FAILURE(status)) {
 967             errln("FAIL: RBT constructor");
 968         } else {
 969             expect(*t, DATA[i+1], DATA[i+2]);
 970         }
 971         delete t;
 972     }
 973 }
 974
 975 /**
 976  * Test zero length and > 1 char length variable values.  Test
 977  * use of variable refs in UnicodeSets.
 978  */
 979 void TransliteratorTest::TestArbitraryVariableValues(void) {
 980     // Array of 3n items
 981     // Each item is <rules>, <input>, <expected output>
 982     UnicodeString DATA[] = {
 983         "$abe = ab;"
 984         "$pat = x[yY]z;"
 985         "$ll  = 'a-z';"
 986         "$llZ = [$ll];"
 987         "$llY = [$ll$pat];"
 988         "$emp = ;"
 989
 990         "$abe > ABE;"
 991         "$pat > END;"
 992         "$llZ > 1;"
 993         "$llY > 2;"
 994         "7$emp 8 > 9;"
 995         "",
 996
 997         "ab xYzxyz stY78",
 998         "ABE ENDEND 1129",
 999     };
1000     int32_t DATA_length = UPRV_LENGTHOF(DATA);
1001
1002     for (int32_t i=0; i<DATA_length; i+=3) {
1003         logln("Pattern: " + prettify(DATA[i]));
1004         UParseError parseError;
1005         UErrorCode status = U_ZERO_ERROR;
1006         Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
1007         if (U_FAILURE(status)) {
1008             errln("FAIL: RBT constructor");
1009         } else {
1010             expect(*t, DATA[i+1], DATA[i+2]);
1011         }
1012         delete t;
1013     }
1014 }
1015
1016 /**
1017  * Confirm that the contextStart, contextLimit, start, and limit
1018  * behave correctly. J474.
1019  */
1020 void TransliteratorTest::TestPositionHandling(void) {
1021     // Array of 3n items
1022     // Each item is <rules>, <input>, <expected output>
1023     const char* DATA[] = {
1024         "a{t} > SS ; {t}b > UU ; {t} > TT ;",
1025         "xtat txtb", // pos 0,9,0,9
1026         "xTTaSS TTxUUb",
1027
1028         "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1029         "xtat txtb", // pos 2,9,3,8
1030         "xtaSS TTxUUb",
1031
1032         "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1033         "xtat txtb", // pos 3,8,3,8
1034         "xtaTT TTxTTb",
1035     };
1036
1037     // Array of 4n positions -- these go with the DATA array
1038     // They are: contextStart, contextLimit, start, limit
1039     int32_t POS[] = {
1040         0, 9, 0, 9,
1041         2, 9, 3, 8,
1042         3, 8, 3, 8,
1043     };
1044
1045     int32_t n = UPRV_LENGTHOF(DATA) / 3;
1046     for (int32_t i=0; i<n; i++) {
1047         UErrorCode status = U_ZERO_ERROR;
1048         UParseError parseError;
1049         Transliterator *t = Transliterator::createFromRules("<ID>",
1050                                 DATA[3*i], UTRANS_FORWARD, parseError, status);
1051         if (U_FAILURE(status)) {
1052             delete t;
1053             errln("FAIL: RBT constructor");
1054             return;
1055         }
1056         UTransPosition pos;
1057         pos.contextStart= POS[4*i];
1058         pos.contextLimit = POS[4*i+1];
1059         pos.start = POS[4*i+2];
1060         pos.limit = POS[4*i+3];
1061         UnicodeString rsource(DATA[3*i+1]);
1062         t->transliterate(rsource, pos, status);
1063         if (U_FAILURE(status)) {
1064             delete t;
1065             errln("FAIL: transliterate");
1066             return;
1067         }
1068         t->finishTransliteration(rsource, pos);
1069         expectAux(DATA[3*i],
1070                   DATA[3*i+1],
1071                   rsource,
1072                   DATA[3*i+2]);
1073         delete t;
1074     }
1075 }
1076
1077 /**
1078  * Test the Hiragana-Katakana transliterator.
1079  */
1080 void TransliteratorTest::TestHiraganaKatakana(void) {
1081     UParseError parseError;
1082     UErrorCode status = U_ZERO_ERROR;
1083     Transliterator* hk = Transliterator::createInstance("Hiragana-Katakana", UTRANS_FORWARD, parseError, status);
1084     Transliterator* kh = Transliterator::createInstance("Katakana-Hiragana", UTRANS_FORWARD, parseError, status);
1085     if (hk == 0 || kh == 0) {
1086         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
1087         delete hk;
1088         delete kh;
1089         return;
1090     }
1091
1092     // Array of 3n items
1093     // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana>
1094     const char* DATA[] = {
1095         "both",
1096         "\\u3042\\u3090\\u3099\\u3092\\u3050",
1097         "\\u30A2\\u30F8\\u30F2\\u30B0",
1098
1099         "kh",
1100         "\\u307C\\u3051\\u3060\\u3042\\u3093\\u30FC",
1101         "\\u30DC\\u30F6\\u30C0\\u30FC\\u30F3\\u30FC",
1102     };
1103     int32_t DATA_length = UPRV_LENGTHOF(DATA);
1104
1105     for (int32_t i=0; i<DATA_length; i+=3) {
1106         UnicodeString h = CharsToUnicodeString(DATA[i+1]);
1107         UnicodeString k = CharsToUnicodeString(DATA[i+2]);
1108         switch (*DATA[i]) {
1109         case 0x68: //'h': // Hiragana-Katakana
1110             expect(*hk, h, k);
1111             break;
1112         case 0x6B: //'k': // Katakana-Hiragana
1113             expect(*kh, k, h);
1114             break;
1115         case 0x62: //'b': // both
1116             expect(*hk, h, k);
1117             expect(*kh, k, h);
1118             break;
1119         }
1120     }
1121     delete hk;
1122     delete kh;
1123 }
1124
1125 /**
1126  * Test cloning / copy constructor of RBT.
1127  */
1128 void TransliteratorTest::TestCopyJ476(void) {
1129     // The real test here is what happens when the destructors are
1130     // called.  So we let one object get destructed, and check to
1131     // see that its copy still works.
1132     Transliterator *t2 = 0;
1133     {
1134         UParseError parseError;
1135         UErrorCode status = U_ZERO_ERROR;
1136         Transliterator *t1 = Transliterator::createFromRules("t1",
1137             "a>A;b>B;'foo'+>'bar'", UTRANS_FORWARD, parseError, status);
1138         if (U_FAILURE(status)) {
1139             errln("FAIL: RBT constructor");
1140             return;
1141         }
1142         t2 = t1->clone(); // Call copy constructor under the covers.
1143         expect(*t1, "abcfoofoo", "ABcbar");
1144         delete t1;
1145     }
1146     expect(*t2, "abcfoofoo", "ABcbar");
1147     delete t2;
1148 }
1149
1150 /**
1151  * Test inter-Indic transliterators.  These are composed.
1152  * ICU4C Jitterbug 483.
1153  */
1154 void TransliteratorTest::TestInterIndic(void) {
1155     UnicodeString ID("Devanagari-Gujarati", "");
1156     UErrorCode status = U_ZERO_ERROR;
1157     UParseError parseError;
1158     Transliterator* dg = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
1159     if (dg == 0) {
1160         dataerrln("FAIL: createInstance(" + ID + ") returned NULL - " + u_errorName(status));
1161         return;
1162     }
1163     UnicodeString id = dg->getID();
1164     if (id != ID) {
1165         errln("FAIL: createInstance(" + ID + ")->getID() => " + id);
1166     }
1167     UnicodeString dev = CharsToUnicodeString("\\u0901\\u090B\\u0925");
1168     UnicodeString guj = CharsToUnicodeString("\\u0A81\\u0A8B\\u0AA5");
1169     expect(*dg, dev, guj);
1170     delete dg;
1171 }
1172
1173 /**
1174  * Test filter syntax in IDs. (J918)
1175  */
1176 void TransliteratorTest::TestFilterIDs(void) {
1177     // Array of 3n strings:
1178     // <id>, <inverse id>, <input>, <expected output>
1179     const char* DATA[] = {
1180         "[aeiou]Any-Hex", // ID
1181         "[aeiou]Hex-Any", // expected inverse ID
1182         "quizzical",      // src
1183         "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src)
1184
1185         "[aeiou]Any-Hex;[^5]Hex-Any",
1186         "[^5]Any-Hex;[aeiou]Hex-Any",
1187         "quizzical",
1188         "q\\u0075izzical",
1189
1190         "[abc]Null",
1191         "[abc]Null",
1192         "xyz",
1193         "xyz",
1194     };
1195     enum { DATA_length = UPRV_LENGTHOF(DATA) };
1196
1197     for (int i=0; i<DATA_length; i+=4) {
1198         UnicodeString ID(DATA[i], "");
1199         UnicodeString uID(DATA[i+1], "");
1200         UnicodeString data2(DATA[i+2], "");
1201         UnicodeString data3(DATA[i+3], "");
1202         UParseError parseError;
1203         UErrorCode status = U_ZERO_ERROR;
1204         Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
1205         if (t == 0) {
1206             errln("FAIL: createInstance(" + ID + ") returned NULL");
1207             return;
1208         }
1209         expect(*t, data2, data3);
1210
1211         // Check the ID
1212         if (ID != t->getID()) {
1213             errln("FAIL: createInstance(" + ID + ").getID() => " +
1214                   t->getID());
1215         }
1216
1217         // Check the inverse
1218         Transliterator *u = t->createInverse(status);
1219         if (u == 0) {
1220             errln("FAIL: " + ID + ".createInverse() returned NULL");
1221         } else if (u->getID() != uID) {
1222             errln("FAIL: " + ID + ".createInverse().getID() => " +
1223                   u->getID() + ", expected " + uID);
1224         }
1225
1226         delete t;
1227         delete u;
1228     }
1229 }
1230
1231 /**
1232  * Test the case mapping transliterators.
1233  */
1234 void TransliteratorTest::TestCaseMap(void) {
1235     UParseError parseError;
1236     UErrorCode status = U_ZERO_ERROR;
1237     Transliterator* toUpper =
1238         Transliterator::createInstance("Any-Upper[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1239     Transliterator* toLower =
1240         Transliterator::createInstance("Any-Lower[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1241     Transliterator* toTitle =
1242         Transliterator::createInstance("Any-Title[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1243     if (toUpper==0 || toLower==0 || toTitle==0) {
1244         errln("FAIL: createInstance returned NULL");
1245         delete toUpper;
1246         delete toLower;
1247         delete toTitle;
1248         return;
1249     }
1250
1251     expect(*toUpper, "The quick brown fox jumped over the lazy dogs.",
1252            "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS.");
1253     expect(*toLower, "The quIck brown fOX jUMPED OVER THE LAzY dogs.",
1254            "the quick brown foX jumped over the lazY dogs.");
1255     expect(*toTitle, "the quick brown foX can't jump over the laZy dogs.",
1256            "The Quick Brown FoX Can't Jump Over The LaZy Dogs.");
1257
1258     delete toUpper;
1259     delete toLower;
1260     delete toTitle;
1261 }
1262
1263 /**
1264  * Test the name mapping transliterators.
1265  */
1266 void TransliteratorTest::TestNameMap(void) {
1267     UParseError parseError;
1268     UErrorCode status = U_ZERO_ERROR;
1269     Transliterator* uni2name =
1270         Transliterator::createInstance("Any-Name[^abc]", UTRANS_FORWARD, parseError, status);
1271     Transliterator* name2uni =
1272         Transliterator::createInstance("Name-Any", UTRANS_FORWARD, parseError, status);
1273     if (uni2name==0 || name2uni==0) {
1274         errln("FAIL: createInstance returned NULL");
1275         delete uni2name;
1276         delete name2uni;
1277         return;
1278     }
1279
1280     // Careful:  CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1281     expect(*uni2name, CharsToUnicodeString("\\u00A0abc\\u4E01\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF"),
1282            CharsToUnicodeString("\\\\N{NO-BREAK SPACE}abc\\\\N{CJK UNIFIED IDEOGRAPH-4E01}\\\\N{MICRO SIGN}\\\\N{GUJARATI SIGN CANDRABINDU}\\\\N{REPLACEMENT CHARACTER}\\\\N{<control-0004>}\\\\N{<control-0009>}\\\\N{<control-0081>}\\\\N{<noncharacter-FFFF>}"));
1283     expect(*name2uni, UNICODE_STRING_SIMPLE("{\\N { NO-BREAK SPACE}abc\\N{  CJK UNIFIED  IDEOGRAPH-4E01  }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{"),
1284            CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{"));
1285
1286     delete uni2name;
1287     delete name2uni;
1288
1289     // round trip
1290     Transliterator* t =
1291         Transliterator::createInstance("Any-Name;Name-Any", UTRANS_FORWARD, parseError, status);
1292     if (t==0) {
1293         errln("FAIL: createInstance returned NULL");
1294         delete t;
1295         return;
1296     }
1297
1298     // Careful:  CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1299     UnicodeString s = CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{");
1300     expect(*t, s, s);
1301     delete t;
1302 }
1303
1304 /**
1305  * Test liberalized ID syntax.  1006c
1306  */
1307 void TransliteratorTest::TestLiberalizedID(void) {
1308     // Some test cases have an expected getID() value of NULL.  This
1309     // means I have disabled the test case for now.  This stuff is
1310     // still under development, and I haven't decided whether to make
1311     // getID() return canonical case yet.  It will all get rewritten
1312     // with the move to Source-Target/Variant IDs anyway. [aliu]
1313     const char* DATA[] = {
1314         "latin-greek", NULL /*"Latin-Greek"*/, "case insensitivity",
1315         "  Null  ", "Null", "whitespace",
1316         " Latin[a-z]-Greek  ", "[a-z]Latin-Greek", "inline filter",
1317         "  null  ; latin-greek  ", NULL /*"Null;Latin-Greek"*/, "compound whitespace",
1318     };
1319     const int32_t DATA_length = UPRV_LENGTHOF(DATA);
1320     UParseError parseError;
1321     UErrorCode status= U_ZERO_ERROR;
1322     for (int32_t i=0; i<DATA_length; i+=3) {
1323         Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, parseError, status);
1324         if (t == 0) {
1325             dataerrln(UnicodeString("FAIL: ") + DATA[i+2] +
1326                   " cannot create ID \"" + DATA[i] + "\" - " + u_errorName(status));
1327         } else {
1328             UnicodeString exp;
1329             if (DATA[i+1]) {
1330                 exp = UnicodeString(DATA[i+1], "");
1331             }
1332             // Don't worry about getID() if the expected char*
1333             // is NULL -- see above.
1334             if (exp.length() == 0 || exp == t->getID()) {
1335                 logln(UnicodeString("Ok: ") + DATA[i+2] +
1336                       " create ID \"" + DATA[i] + "\" => \"" +
1337                       exp + "\"");
1338             } else {
1339                 errln(UnicodeString("FAIL: ") + DATA[i+2] +
1340                       " create ID \"" + DATA[i] + "\" => \"" +
1341                       t->getID() + "\", exp \"" + exp + "\"");
1342             }
1343             delete t;
1344         }
1345     }
1346 }
1347
1348 /* test for Jitterbug 912 */
1349 void TransliteratorTest::TestCreateInstance(){
1350     const char* FORWARD = "F";
1351     const char* REVERSE = "R";
1352     const char* DATA[] = {
1353         // Column 1: id
1354         // Column 2: direction
1355         // Column 3: expected ID, or "" if expect failure
1356         "Latin-Hangul", REVERSE, "Hangul-Latin", // JB#912
1357
1358         // JB#2689: bad compound causes crash
1359         "InvalidSource-InvalidTarget", FORWARD, "",
1360         "InvalidSource-InvalidTarget", REVERSE, "",
1361         "Hex-Any;InvalidSource-InvalidTarget", FORWARD, "",
1362         "Hex-Any;InvalidSource-InvalidTarget", REVERSE, "",
1363         "InvalidSource-InvalidTarget;Hex-Any", FORWARD, "",
1364         "InvalidSource-InvalidTarget;Hex-Any", REVERSE, "",
1365
1366         NULL
1367     };
1368
1369     for (int32_t i=0; DATA[i]; i+=3) {
1370         UParseError err;
1371         UErrorCode ec = U_ZERO_ERROR;
1372         UnicodeString id(DATA[i]);
1373         UTransDirection dir = (DATA[i+1]==FORWARD)?
1374             UTRANS_FORWARD:UTRANS_REVERSE;
1375         UnicodeString expID(DATA[i+2]);
1376         Transliterator* t =
1377             Transliterator::createInstance(id,dir,err,ec);
1378         UnicodeString newID;
1379         if (t) {
1380             newID = t->getID();
1381         }
1382         UBool ok = (newID == expID);
1383         if (!t) {
1384             newID = u_errorName(ec);
1385         }
1386         if (ok) {
1387             logln((UnicodeString)"Ok: createInstance(" +
1388                   id + "," + DATA[i+1] + ") => " + newID);
1389         } else {
1390             dataerrln((UnicodeString)"FAIL: createInstance(" +
1391                   id + "," + DATA[i+1] + ") => " + newID +
1392                   ", expected " + expID);
1393         }
1394         delete t;
1395     }
1396 }
1397
1398 /**
1399  * Test the normalization transliterator.
1400  */
1401 void TransliteratorTest::TestNormalizationTransliterator() {
1402     // THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.test.normalizer.BasicTest
1403     // PLEASE KEEP THEM IN SYNC WITH BasicTest.
1404     const char* CANON[] = {
1405         // Input               Decomposed            Composed
1406         "cat",                "cat",                "cat"               ,
1407         "\\u00e0ardvark",      "a\\u0300ardvark",     "\\u00e0ardvark"    ,
1408
1409         "\\u1e0a",             "D\\u0307",            "\\u1e0a"            , // D-dot_above
1410         "D\\u0307",            "D\\u0307",            "\\u1e0a"            , // D dot_above
1411
1412         "\\u1e0c\\u0307",       "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D-dot_below dot_above
1413         "\\u1e0a\\u0323",       "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D-dot_above dot_below
1414         "D\\u0307\\u0323",      "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D dot_below dot_above
1415
1416         "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307","\\u1e10\\u0323\\u0307", // D dot_below cedilla dot_above
1417         "D\\u0307\\u0328\\u0323","D\\u0328\\u0323\\u0307","\\u1e0c\\u0328\\u0307", // D dot_above ogonek dot_below
1418
1419         "\\u1E14",             "E\\u0304\\u0300",      "\\u1E14"            , // E-macron-grave
1420         "\\u0112\\u0300",       "E\\u0304\\u0300",      "\\u1E14"            , // E-macron + grave
1421         "\\u00c8\\u0304",       "E\\u0300\\u0304",      "\\u00c8\\u0304"      , // E-grave + macron
1422
1423         "\\u212b",             "A\\u030a",            "\\u00c5"            , // angstrom_sign
1424         "\\u00c5",             "A\\u030a",            "\\u00c5"            , // A-ring
1425
1426         "\\u00fdffin",         "y\\u0301ffin",        "\\u00fdffin"        ,    //updated with 3.0
1427         "\\u00fd\\uFB03n",      "y\\u0301\\uFB03n",     "\\u00fd\\uFB03n"     , //updated with 3.0
1428
1429         "Henry IV",           "Henry IV",           "Henry IV"          ,
1430         "Henry \\u2163",       "Henry \\u2163",       "Henry \\u2163"      ,
1431
1432         "\\u30AC",             "\\u30AB\\u3099",       "\\u30AC"            , // ga (Katakana)
1433         "\\u30AB\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // ka + ten
1434         "\\uFF76\\uFF9E",       "\\uFF76\\uFF9E",       "\\uFF76\\uFF9E"      , // hw_ka + hw_ten
1435         "\\u30AB\\uFF9E",       "\\u30AB\\uFF9E",       "\\u30AB\\uFF9E"      , // ka + hw_ten
1436         "\\uFF76\\u3099",       "\\uFF76\\u3099",       "\\uFF76\\u3099"      , // hw_ka + ten
1437
1438         "A\\u0300\\u0316",      "A\\u0316\\u0300",      "\\u00C0\\u0316"      ,
1439         0 // end
1440     };
1441
1442     const char* COMPAT[] = {
1443         // Input               Decomposed            Composed
1444         "\\uFB4f",             "\\u05D0\\u05DC",       "\\u05D0\\u05DC"     , // Alef-Lamed vs. Alef, Lamed
1445
1446         "\\u00fdffin",         "y\\u0301ffin",        "\\u00fdffin"        ,    //updated for 3.0
1447         "\\u00fd\\uFB03n",      "y\\u0301ffin",        "\\u00fdffin"        , // ffi ligature -> f + f + i
1448
1449         "Henry IV",           "Henry IV",           "Henry IV"          ,
1450         "Henry \\u2163",       "Henry IV",           "Henry IV"          ,
1451
1452         "\\u30AC",             "\\u30AB\\u3099",       "\\u30AC"            , // ga (Katakana)
1453         "\\u30AB\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // ka + ten
1454
1455         "\\uFF76\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // hw_ka + ten
1456         0 // end
1457     };
1458
1459     int32_t i;
1460     UParseError parseError;
1461     UErrorCode status = U_ZERO_ERROR;
1462     Transliterator* NFD = Transliterator::createInstance("NFD", UTRANS_FORWARD, parseError, status);
1463     Transliterator* NFC = Transliterator::createInstance("NFC", UTRANS_FORWARD, parseError, status);
1464     if (!NFD || !NFC) {
1465         dataerrln("FAIL: createInstance failed: %s", u_errorName(status));
1466         delete NFD;
1467         delete NFC;
1468         return;
1469     }
1470     for (i=0; CANON[i]; i+=3) {
1471         UnicodeString in = CharsToUnicodeString(CANON[i]);
1472         UnicodeString expd = CharsToUnicodeString(CANON[i+1]);
1473         UnicodeString expc = CharsToUnicodeString(CANON[i+2]);
1474         expect(*NFD, in, expd);
1475         expect(*NFC, in, expc);
1476     }
1477     delete NFD;
1478     delete NFC;
1479
1480     Transliterator* NFKD = Transliterator::createInstance("NFKD", UTRANS_FORWARD, parseError, status);
1481     Transliterator* NFKC = Transliterator::createInstance("NFKC", UTRANS_FORWARD, parseError, status);
1482     if (!NFKD || !NFKC) {
1483         dataerrln("FAIL: createInstance failed");
1484         delete NFKD;
1485         delete NFKC;
1486         return;
1487     }
1488     for (i=0; COMPAT[i]; i+=3) {
1489         UnicodeString in = CharsToUnicodeString(COMPAT[i]);
1490         UnicodeString expkd = CharsToUnicodeString(COMPAT[i+1]);
1491         UnicodeString expkc = CharsToUnicodeString(COMPAT[i+2]);
1492         expect(*NFKD, in, expkd);
1493         expect(*NFKC, in, expkc);
1494     }
1495     delete NFKD;
1496     delete NFKC;
1497
1498     UParseError pe;
1499     status = U_ZERO_ERROR;
1500     Transliterator *t = Transliterator::createInstance("NFD; [x]Remove",
1501                                                        UTRANS_FORWARD,
1502                                                        pe, status);
1503     if (t == 0) {
1504         errln("FAIL: createInstance failed");
1505     }
1506     expect(*t, CharsToUnicodeString("\\u010dx"),
1507            CharsToUnicodeString("c\\u030C"));
1508     delete t;
1509 }
1510
1511 /**
1512  * Test compound RBT rules.
1513  */
1514 void TransliteratorTest::TestCompoundRBT(void) {
1515     // Careful with spacing and ';' here:  Phrase this exactly
1516     // as toRules() is going to return it.  If toRules() changes
1517     // with regard to spacing or ';', then adjust this string.
1518     UnicodeString rule("::Hex-Any;\n"
1519                        "::Any-Lower;\n"
1520                        "a > '.A.';\n"
1521                        "b > '.B.';\n"
1522                        "::[^t]Any-Upper;", "");
1523     UParseError parseError;
1524     UErrorCode status = U_ZERO_ERROR;
1525     Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, parseError, status);
1526     if (t == 0) {
1527         errln("FAIL: createFromRules failed");
1528         return;
1529     }
1530     expect(*t, UNICODE_STRING_SIMPLE("\\u0043at in the hat, bat on the mat"),
1531            "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");
1532     UnicodeString r;
1533     t->toRules(r, TRUE);
1534     if (r == rule) {
1535         logln((UnicodeString)"OK: toRules() => " + r);
1536     } else {
1537         errln((UnicodeString)"FAIL: toRules() => " + r +
1538               ", expected " + rule);
1539     }
1540     delete t;
1541
1542     // Now test toRules
1543     t = Transliterator::createInstance("Greek-Latin; Latin-Cyrillic", UTRANS_FORWARD, parseError, status);
1544     if (t == 0) {
1545         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
1546         return;
1547     }
1548     UnicodeString exp("::Greek-Latin;\n::Latin-Cyrillic;");
1549     t->toRules(r, TRUE);
1550     if (r != exp) {
1551         errln((UnicodeString)"FAIL: toRules() => " + r +
1552               ", expected " + exp);
1553     } else {
1554         logln((UnicodeString)"OK: toRules() => " + r);
1555     }
1556     delete t;
1557
1558     // Round trip the result of toRules
1559     t = Transliterator::createFromRules("Test", r, UTRANS_FORWARD, parseError, status);
1560     if (t == 0) {
1561         errln("FAIL: createFromRules #2 failed");
1562         return;
1563     } else {
1564         logln((UnicodeString)"OK: createFromRules(" + r + ") succeeded");
1565     }
1566
1567     // Test toRules again
1568     t->toRules(r, TRUE);
1569     if (r != exp) {
1570         errln((UnicodeString)"FAIL: toRules() => " + r +
1571               ", expected " + exp);
1572     } else {
1573         logln((UnicodeString)"OK: toRules() => " + r);
1574     }
1575
1576     delete t;
1577
1578     // Test Foo(Bar) IDs.  Careful with spacing in id; make it conform
1579     // to what the regenerated ID will look like.
1580     UnicodeString id("Upper(Lower);(NFKC)", "");
1581     t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
1582     if (t == 0) {
1583         errln("FAIL: createInstance #2 failed");
1584         return;
1585     }
1586     if (t->getID() == id) {
1587         logln((UnicodeString)"OK: created " + id);
1588     } else {
1589         errln((UnicodeString)"FAIL: createInstance(" + id +
1590               ").getID() => " + t->getID());
1591     }
1592
1593     Transliterator *u = t->createInverse(status);
1594     if (u == 0) {
1595         errln("FAIL: createInverse failed");
1596         delete t;
1597         return;
1598     }
1599     exp = "NFKC();Lower(Upper)";
1600     if (u->getID() == exp) {
1601         logln((UnicodeString)"OK: createInverse(" + id + ") => " +
1602               u->getID());
1603     } else {
1604         errln((UnicodeString)"FAIL: createInverse(" + id + ") => " +
1605               u->getID());
1606     }
1607     delete t;
1608     delete u;
1609 }
1610
1611 /**
1612  * Compound filter semantics were orginially not implemented
1613  * correctly.  Originally, each component filter f(i) is replaced by
1614  * f'(i) = f(i) && g, where g is the filter for the compound
1615  * transliterator.
1616  *
1617  * From Mark:
1618  *
1619  * Suppose and I have a transliterator X. Internally X is
1620  * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A].
1621  *
1622  * The compound should convert all greek characters (through latin) to
1623  * cyrillic, then lowercase the result. The filter should say "don't
1624  * touch 'A' in the original". But because an intermediate result
1625  * happens to go through "A", the Greek Alpha gets hung up.
1626  */
1627 void TransliteratorTest::TestCompoundFilter(void) {
1628     UParseError parseError;
1629     UErrorCode status = U_ZERO_ERROR;
1630     Transliterator *t = Transliterator::createInstance
1631         ("Greek-Latin; Latin-Greek; Lower", UTRANS_FORWARD, parseError, status);
1632     if (t == 0) {
1633         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
1634         return;
1635     }
1636     t->adoptFilter(new UnicodeSet("[^A]", status));
1637     if (U_FAILURE(status)) {
1638         errln("FAIL: UnicodeSet ct failed");
1639         delete t;
1640         return;
1641     }
1642
1643     // Only the 'A' at index 1 should remain unchanged
1644     expect(*t,
1645            CharsToUnicodeString("BA\\u039A\\u0391"),
1646            CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1"));
1647     delete t;
1648 }
1649
1650 void TransliteratorTest::TestRemove(void) {
1651     UParseError parseError;
1652     UErrorCode status = U_ZERO_ERROR;
1653     Transliterator *t = Transliterator::createInstance("Remove[abc]", UTRANS_FORWARD, parseError, status);
1654     if (t == 0) {
1655         errln("FAIL: createInstance failed");
1656         return;
1657     }
1658
1659     expect(*t, "Able bodied baker's cats", "Ale odied ker's ts");
1660
1661     // extra test for RemoveTransliterator::clone(), which at one point wasn't
1662     // duplicating the filter
1663     Transliterator* t2 = t->clone();
1664     expect(*t2, "Able bodied baker's cats", "Ale odied ker's ts");
1665
1666     delete t;
1667     delete t2;
1668 }
1669
1670 void TransliteratorTest::TestToRules(void) {
1671     const char* RBT = "rbt";
1672     const char* SET = "set";
1673     static const char* DATA[] = {
1674         RBT,
1675         "$a=\\u4E61; [$a] > A;",
1676         "[\\u4E61] > A;",
1677
1678         RBT,
1679         "$white=[[:Zs:][:Zl:]]; $white{a} > A;",
1680         "[[:Zs:][:Zl:]]{a} > A;",
1681
1682         SET,
1683         "[[:Zs:][:Zl:]]",
1684         "[[:Zs:][:Zl:]]",
1685
1686         SET,
1687         "[:Ps:]",
1688         "[:Ps:]",
1689
1690         SET,
1691         "[:L:]",
1692         "[:L:]",
1693
1694         SET,
1695         "[[:L:]-[A]]",
1696         "[[:L:]-[A]]",
1697
1698         SET,
1699         "[~[:Lu:][:Ll:]]",
1700         "[~[:Lu:][:Ll:]]",
1701
1702         SET,
1703         "[~[a-z]]",
1704         "[~[a-z]]",
1705
1706         RBT,
1707         "$white=[:Zs:]; $black=[^$white]; $black{a} > A;",
1708         "[^[:Zs:]]{a} > A;",
1709
1710         RBT,
1711         "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;",
1712         "[[a-z]-[:Zs:]]{a} > A;",
1713
1714         RBT,
1715         "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;",
1716         "[[:Zs:]&[a-z]]{a} > A;",
1717
1718         RBT,
1719         "$a=[:Zs:]; $b=[x$a]; $b{a} > A;",
1720         "[x[:Zs:]]{a} > A;",
1721
1722         RBT,
1723         "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"
1724         "$macron = \\u0304 ;"
1725         "$evowel = [aeiouyAEIOUY] ;"
1726         "$iotasub = \\u0345 ;"
1727         "($evowel $macron $accentMinus *) i > | $1 $iotasub ;",
1728         "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;",
1729
1730         RBT,
1731         "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1732         "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1733     };
1734     static const int32_t DATA_length = UPRV_LENGTHOF(DATA);
1735
1736     for (int32_t d=0; d < DATA_length; d+=3) {
1737         if (DATA[d] == RBT) {
1738             // Transliterator test
1739             UParseError parseError;
1740             UErrorCode status = U_ZERO_ERROR;
1741             Transliterator *t = Transliterator::createFromRules("ID",
1742                                                                 UnicodeString(DATA[d+1], -1, US_INV), UTRANS_FORWARD, parseError, status);
1743             if (t == 0) {
1744                 dataerrln("FAIL: createFromRules failed - %s", u_errorName(status));
1745                 return;
1746             }
1747             UnicodeString rules, escapedRules;
1748             t->toRules(rules, FALSE);
1749             t->toRules(escapedRules, TRUE);
1750             UnicodeString expRules = CharsToUnicodeString(DATA[d+2]);
1751             UnicodeString expEscapedRules(DATA[d+2], -1, US_INV);
1752             if (rules == expRules) {
1753                 logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
1754                       " => " + rules);
1755             } else {
1756                 errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
1757                       " => " + rules + ", exp " + expRules);
1758             }
1759             if (escapedRules == expEscapedRules) {
1760                 logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
1761                       " => " + escapedRules);
1762             } else {
1763                 errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
1764                       " => " + escapedRules + ", exp " + expEscapedRules);
1765             }
1766             delete t;
1767
1768         } else {
1769             // UnicodeSet test
1770             UErrorCode status = U_ZERO_ERROR;
1771             UnicodeString pat(DATA[d+1], -1, US_INV);
1772             UnicodeString expToPat(DATA[d+2], -1, US_INV);
1773             UnicodeSet set(pat, status);
1774             if (U_FAILURE(status)) {
1775                 errln("FAIL: UnicodeSet ct failed");
1776                 return;
1777             }
1778             // Adjust spacing etc. as necessary.
1779             UnicodeString toPat;
1780             set.toPattern(toPat);
1781             if (expToPat == toPat) {
1782                 logln((UnicodeString)"Ok: " + pat +
1783                       " => " + toPat);
1784             } else {
1785                 errln((UnicodeString)"FAIL: " + pat +
1786                       " => " + prettify(toPat, TRUE) +
1787                       ", exp " + prettify(pat, TRUE));
1788             }
1789         }
1790     }
1791 }
1792
1793 void TransliteratorTest::TestContext() {
1794     UTransPosition pos = {0, 2, 0, 1}; // cs cl s l
1795     expect("de > x; {d}e > y;",
1796            "de",
1797            "ye",
1798            &pos);
1799
1800     expect("ab{c} > z;",
1801            "xadabdabcy",
1802            "xadabdabzy");
1803 }
1804
1805 void TransliteratorTest::TestSupplemental() {
1806
1807     expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];"
1808                                 "a > $a; $s > i;"),
1809            CharsToUnicodeString("ab\\U0001030Fx"),
1810            CharsToUnicodeString("\\U00010300bix"));
1811
1812     expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];"
1813                                 "$b=[A-Z\\U00010400-\\U0001044D];"
1814                                 "($a)($b) > $2 $1;"),
1815            CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"),
1816            CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301"));
1817
1818     // k|ax\\U00010300xm
1819
1820     // k|a\\U00010400\\U00010300xm
1821     // ky|\\U00010400\\U00010300xm
1822     // ky\\U00010400|\\U00010300xm
1823
1824     // ky\\U00010400|\\U00010300\\U00010400m
1825     // ky\\U00010400y|\\U00010400m
1826     expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];"
1827                                 "$a {x} > | @ \\U00010400;"
1828                                 "{$a} [^\\u0000-\\uFFFF] > y;"),
1829            CharsToUnicodeString("kax\\U00010300xm"),
1830            CharsToUnicodeString("ky\\U00010400y\\U00010400m"));
1831
1832     expectT("Any-Name",
1833            CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"),
1834            UNICODE_STRING_SIMPLE("\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}"));
1835
1836     expectT("Any-Hex/Unicode",
1837            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1838            UNICODE_STRING_SIMPLE("U+10330U+10FF00U+E0061U+00A0"));
1839
1840     expectT("Any-Hex/C",
1841            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1842            UNICODE_STRING_SIMPLE("\\U00010330\\U0010FF00\\U000E0061\\u00A0"));
1843
1844     expectT("Any-Hex/Perl",
1845            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1846            UNICODE_STRING_SIMPLE("\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}"));
1847
1848     expectT("Any-Hex/Java",
1849            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1850            UNICODE_STRING_SIMPLE("\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0"));
1851
1852     expectT("Any-Hex/XML",
1853            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1854            "&#x10330;&#x10FF00;&#xE0061;&#xA0;");
1855
1856     expectT("Any-Hex/XML10",
1857            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1858            "&#66352;&#1113856;&#917601;&#160;");
1859
1860     expectT(UNICODE_STRING_SIMPLE("[\\U000E0000-\\U000E0FFF] Remove"),
1861            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1862            CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0"));
1863 }
1864
1865 void TransliteratorTest::TestQuantifier() {
1866
1867     // Make sure @ in a quantified anteContext works
1868     expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';",
1869            "AAAAAb",
1870            "aaa(aac)");
1871
1872     // Make sure @ in a quantified postContext works
1873     expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';",
1874            "baaaaa",
1875            "caa(aaa)");
1876
1877     // Make sure @ in a quantified postContext with seg ref works
1878     expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';",
1879            "baaaaa",
1880            "baa(aaa)");
1881
1882     // Make sure @ past ante context doesn't enter ante context
1883     UTransPosition pos = {0, 5, 3, 5};
1884     expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';",
1885            "xxxab",
1886            "xxx(ac)",
1887            &pos);
1888
1889     // Make sure @ past post context doesn't pass limit
1890     UTransPosition pos2 = {0, 4, 0, 2};
1891     expect("{b} a+ > c @@ |; x > y; a > A;",
1892            "baxx",
1893            "caxx",
1894            &pos2);
1895
1896     // Make sure @ past post context doesn't enter post context
1897     expect("{b} a+ > c @@ |; x > y; a > A;",
1898            "baxx",
1899            "cayy");
1900
1901     expect("(ab)? c > d;",
1902            "c abc ababc",
1903            "d d abd");
1904
1905     // NOTE: The (ab)+ when referenced just yields a single "ab",
1906     // not the full sequence of them.  This accords with perl behavior.
1907     expect("(ab)+ {x} > '(' $1 ')';",
1908            "x abx ababxy",
1909            "x ab(ab) abab(ab)y");
1910
1911     expect("b+ > x;",
1912            "ac abc abbc abbbc",
1913            "ac axc axc axc");
1914
1915     expect("[abc]+ > x;",
1916            "qac abrc abbcs abtbbc",
1917            "qx xrx xs xtx");
1918
1919     expect("q{(ab)+} > x;",
1920            "qa qab qaba qababc qaba",
1921            "qa qx qxa qxc qxa");
1922
1923     expect("q(ab)* > x;",
1924            "qa qab qaba qababc",
1925            "xa x xa xc");
1926
1927     // NOTE: The (ab)+ when referenced just yields a single "ab",
1928     // not the full sequence of them.  This accords with perl behavior.
1929     expect("q(ab)* > '(' $1 ')';",
1930            "qa qab qaba qababc",
1931            "()a (ab) (ab)a (ab)c");
1932
1933     // 'foo'+ and 'foo'* -- the quantifier should apply to the entire
1934     // quoted string
1935     expect("'ab'+ > x;",
1936            "bb ab ababb",
1937            "bb x xb");
1938
1939     // $foo+ and $foo* -- the quantifier should apply to the entire
1940     // variable reference
1941     expect("$var = ab; $var+ > x;",
1942            "bb ab ababb",
1943            "bb x xb");
1944 }
1945
1946 class TestTrans : public Transliterator {
1947 public:
1948     TestTrans(const UnicodeString& id) : Transliterator(id, 0) {
1949     }
1950     virtual Transliterator* clone(void) const {
1951         return new TestTrans(getID());
1952     }
1953     virtual void handleTransliterate(Replaceable& /*text*/, UTransPosition& offsets,
1954         UBool /*isIncremental*/) const
1955     {
1956         offsets.start = offsets.limit;
1957     }
1958     virtual UClassID getDynamicClassID() const;
1959     static UClassID U_EXPORT2 getStaticClassID();
1960 };
1961 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TestTrans)
1962
1963 /**
1964  * Test Source-Target/Variant.
1965  */
1966 void TransliteratorTest::TestSTV(void) {
1967     int32_t ns = Transliterator::countAvailableSources();
1968     if (ns < 0 || ns > 255) {
1969         errln((UnicodeString)"FAIL: Bad source count: " + ns);
1970         return;
1971     }
1972     int32_t i, j;
1973     for (i=0; i<ns; ++i) {
1974         UnicodeString source;
1975         Transliterator::getAvailableSource(i, source);
1976         logln((UnicodeString)"" + i + ": " + source);
1977         if (source.length() == 0) {
1978             errln("FAIL: empty source");
1979             continue;
1980         }
1981         int32_t nt = Transliterator::countAvailableTargets(source);
1982         if (nt < 0 || nt > 255) {
1983             errln((UnicodeString)"FAIL: Bad target count: " + nt);
1984             continue;
1985         }
1986         for (int32_t j=0; j<nt; ++j) {
1987             UnicodeString target;
1988             Transliterator::getAvailableTarget(j, source, target);
1989             logln((UnicodeString)" " + j + ": " + target);
1990             if (target.length() == 0) {
1991                 errln("FAIL: empty target");
1992                 continue;
1993             }
1994             int32_t nv = Transliterator::countAvailableVariants(source, target);
1995             if (nv < 0 || nv > 255) {
1996                 errln((UnicodeString)"FAIL: Bad variant count: " + nv);
1997                 continue;
1998             }
1999             for (int32_t k=0; k<nv; ++k) {
2000                 UnicodeString variant;
2001                 Transliterator::getAvailableVariant(k, source, target, variant);
2002                 if (variant.length() == 0) {
2003                     logln((UnicodeString)"  " + k + ": <empty>");
2004                 } else {
2005                     logln((UnicodeString)"  " + k + ": " + variant);
2006                 }
2007             }
2008         }
2009     }
2010
2011     // Test registration
2012     const char* IDS[] = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2013     const char* FULL_IDS[] = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2014     const char* SOURCES[] = { NULL, "Seoridf", "Oewoir" };
2015     for (i=0; i<3; ++i) {
2016         Transliterator *t = new TestTrans(IDS[i]);
2017         if (t == 0) {
2018             errln("FAIL: out of memory");
2019             return;
2020         }
2021         if (t->getID() != IDS[i]) {
2022             errln((UnicodeString)"FAIL: ID mismatch for " + IDS[i]);
2023             delete t;
2024             return;
2025         }
2026         Transliterator::registerInstance(t);
2027         UErrorCode status = U_ZERO_ERROR;
2028         t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
2029         if (t == NULL) {
2030             errln((UnicodeString)"FAIL: Registration/creation failed for ID " +
2031                   IDS[i]);
2032         } else {
2033             logln((UnicodeString)"Ok: Registration/creation succeeded for ID " +
2034                   IDS[i]);
2035             delete t;
2036         }
2037         Transliterator::unregister(IDS[i]);
2038         t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
2039         if (t != NULL) {
2040             errln((UnicodeString)"FAIL: Unregistration failed for ID " +
2041                   IDS[i]);
2042             delete t;
2043         }
2044     }
2045
2046     // Make sure getAvailable API reflects removal
2047     int32_t n = Transliterator::countAvailableIDs();
2048     for (i=0; i<n; ++i) {
2049         UnicodeString id = Transliterator::getAvailableID(i);
2050         for (j=0; j<3; ++j) {
2051             if (id.caseCompare(FULL_IDS[j],0)==0) {
2052                 errln((UnicodeString)"FAIL: unregister(" + id + ") failed");
2053             }
2054         }
2055     }
2056     n = Transliterator::countAvailableTargets("Any");
2057     for (i=0; i<n; ++i) {
2058         UnicodeString t;
2059         Transliterator::getAvailableTarget(i, "Any", t);
2060         if (t.caseCompare(IDS[0],0)==0) {
2061             errln((UnicodeString)"FAIL: unregister(Any-" + t + ") failed");
2062         }
2063     }
2064     n = Transliterator::countAvailableSources();
2065     for (i=0; i<n; ++i) {
2066         UnicodeString s;
2067         Transliterator::getAvailableSource(i, s);
2068         for (j=0; j<3; ++j) {
2069             if (SOURCES[j] == NULL) continue;
2070             if (s.caseCompare(SOURCES[j],0)==0) {
2071                 errln((UnicodeString)"FAIL: unregister(" + s + "-*) failed");
2072             }
2073         }
2074     }
2075 }
2076
2077 /**
2078  * Test inverse of Greek-Latin; Title()
2079  */
2080 void TransliteratorTest::TestCompoundInverse(void) {
2081     UParseError parseError;
2082     UErrorCode status = U_ZERO_ERROR;
2083     Transliterator *t = Transliterator::createInstance
2084         ("Greek-Latin; Title()", UTRANS_REVERSE,parseError, status);
2085     if (t == 0) {
2086         dataerrln("FAIL: createInstance - %s", u_errorName(status));
2087         return;
2088     }
2089     UnicodeString exp("(Title);Latin-Greek");
2090     if (t->getID() == exp) {
2091         logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" +
2092               t->getID());
2093     } else {
2094         errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" +
2095               t->getID() + "\", expected \"" + exp + "\"");
2096     }
2097     delete t;
2098 }
2099
2100 /**
2101  * Test NFD chaining with RBT
2102  */
2103 void TransliteratorTest::TestNFDChainRBT() {
2104     UParseError pe;
2105     UErrorCode ec = U_ZERO_ERROR;
2106     Transliterator* t = Transliterator::createFromRules(
2107                                "TEST", "::NFD; aa > Q; a > q;",
2108                                UTRANS_FORWARD, pe, ec);
2109     if (t == NULL || U_FAILURE(ec)) {
2110         dataerrln("FAIL: Transliterator::createFromRules failed with %s", u_errorName(ec));
2111         return;
2112     }
2113     expect(*t, "aa", "Q");
2114     delete t;
2115
2116     // TEMPORARY TESTS -- BEING DEBUGGED
2117 //=-    UnicodeString s, s2;
2118 //=-    t = Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, pe, ec);
2119 //=-    s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2120 //=-    s2 = CharsToUnicodeString("\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D");
2121 //=-    expect(*t, s, s2);
2122 //=-    delete t;
2123 //=-
2124 //=-    t = Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2125 //=-    expect(*t, s2, s);
2126 //=-    delete t;
2127 //=-
2128 //=-    t = Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2129 //=-    s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2130 //=-    expect(*t, s, s);
2131 //=-    delete t;
2132
2133 //    const char* source[] = {
2134 //        /*
2135 //        "\\u015Br\\u012Bmad",
2136 //        "bhagavadg\\u012Bt\\u0101",
2137 //        "adhy\\u0101ya",
2138 //        "arjuna",
2139 //        "vi\\u1E63\\u0101da",
2140 //        "y\\u014Dga",
2141 //        "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2142 //        "uv\\u0101cr\\u0325",
2143 //        */
2144 //        "rmk\\u1E63\\u0113t",
2145 //      //"dharmak\\u1E63\\u0113tr\\u0113",
2146 //        /*
2147 //        "kuruk\\u1E63\\u0113tr\\u0113",
2148 //        "samav\\u0113t\\u0101",
2149 //        "yuyutsava-\\u1E25",
2150 //        "m\\u0101mak\\u0101-\\u1E25",
2151 //     // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2152 //        "kimakurvata",
2153 //        "san\\u0304java",
2154 //        */
2155 //
2156 //        0
2157 //    };
2158 //    const char* expected[] = {
2159 //        /*
2160 //        "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2161 //        "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2162 //        "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2163 //        "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2164 //        "\\u0935\\u093f\\u0937\\u093e\\u0926",
2165 //        "\\u092f\\u094b\\u0917",
2166 //        "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2167 //        "\\u0909\\u0935\\u093E\\u091A\\u0943",
2168 //        */
2169 //        "\\u0927",
2170 //        //"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2171 //        /*
2172 //        "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2173 //        "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2174 //        "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2175 //        "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2176 //    //  "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2177 //        "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2178 //        "\\u0938\\u0902\\u091c\\u0935",
2179 //        */
2180 //        0
2181 //    };
2182 //    UErrorCode status = U_ZERO_ERROR;
2183 //    UParseError parseError;
2184 //    UnicodeString message;
2185 //    Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2186 //    Transliterator* devToLatinToDev=Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2187 //    if(U_FAILURE(status)){
2188 //        errln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2189 //        errln("PreContext: " + prettify(parseError.preContext) + "PostContext: " + prettify( parseError.postContext) );
2190 //        delete latinToDevToLatin;
2191 //        delete devToLatinToDev;
2192 //        return;
2193 //    }
2194 //    UnicodeString gotResult;
2195 //    for(int i= 0; source[i] != 0; i++){
2196 //        gotResult = source[i];
2197 //        expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2198 //        expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2199 //    }
2200 //    delete latinToDevToLatin;
2201 //    delete devToLatinToDev;
2202 }
2203
2204 /**
2205  * Inverse of "Null" should be "Null". (J21)
2206  */
2207 void TransliteratorTest::TestNullInverse() {
2208     UParseError pe;
2209     UErrorCode ec = U_ZERO_ERROR;
2210     Transliterator *t = Transliterator::createInstance("Null", UTRANS_FORWARD, pe, ec);
2211     if (t == 0 || U_FAILURE(ec)) {
2212         errln("FAIL: createInstance");
2213         return;
2214     }
2215     Transliterator *u = t->createInverse(ec);
2216     if (u == 0 || U_FAILURE(ec)) {
2217         errln("FAIL: createInverse");
2218         delete t;
2219         return;
2220     }
2221     if (u->getID() != "Null") {
2222         errln("FAIL: Inverse of Null should be Null");
2223     }
2224     delete t;
2225     delete u;
2226 }
2227
2228 /**
2229  * Check ID of inverse of alias. (J22)
2230  */
2231 void TransliteratorTest::TestAliasInverseID() {
2232     UnicodeString ID("Latin-Hangul", ""); // This should be any alias ID with an inverse
2233     UParseError pe;
2234     UErrorCode ec = U_ZERO_ERROR;
2235     Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
2236     if (t == 0 || U_FAILURE(ec)) {
2237         dataerrln("FAIL: createInstance - %s", u_errorName(ec));
2238         return;
2239     }
2240     Transliterator *u = t->createInverse(ec);
2241     if (u == 0 || U_FAILURE(ec)) {
2242         errln("FAIL: createInverse");
2243         delete t;
2244         return;
2245     }
2246     UnicodeString exp = "Hangul-Latin";
2247     UnicodeString got = u->getID();
2248     if (got != exp) {
2249         errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
2250               ", expected " + exp);
2251     }
2252     delete t;
2253     delete u;
2254 }
2255
2256 /**
2257  * Test IDs of inverses of compound transliterators. (J20)
2258  */
2259 void TransliteratorTest::TestCompoundInverseID() {
2260     UnicodeString ID = "Latin-Jamo;NFC(NFD)";
2261     UParseError pe;
2262     UErrorCode ec = U_ZERO_ERROR;
2263     Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
2264     if (t == 0 || U_FAILURE(ec)) {
2265         dataerrln("FAIL: createInstance - %s", u_errorName(ec));
2266         return;
2267     }
2268     Transliterator *u = t->createInverse(ec);
2269     if (u == 0 || U_FAILURE(ec)) {
2270         errln("FAIL: createInverse");
2271         delete t;
2272         return;
2273     }
2274     UnicodeString exp = "NFD(NFC);Jamo-Latin";
2275     UnicodeString got = u->getID();
2276     if (got != exp) {
2277         errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
2278               ", expected " + exp);
2279     }
2280     delete t;
2281     delete u;
2282 }
2283
2284 /**
2285  * Test undefined variable.
2286
2287  */
2288 void TransliteratorTest::TestUndefinedVariable() {
2289     UnicodeString rule = "$initial } a <> \\u1161;";
2290     UParseError pe;
2291     UErrorCode ec = U_ZERO_ERROR;
2292     Transliterator *t = Transliterator::createFromRules("<ID>", rule, UTRANS_FORWARD, pe, ec);
2293     delete t;
2294     if (U_FAILURE(ec)) {
2295         logln((UnicodeString)"OK: Got exception for " + rule + ", as expected: " +
2296               u_errorName(ec));
2297         return;
2298     }
2299     errln((UnicodeString)"Fail: bogus rule " + rule + " compiled with error " +
2300           u_errorName(ec));
2301 }
2302
2303 /**
2304  * Test empty context.
2305  */
2306 void TransliteratorTest::TestEmptyContext() {
2307     expect(" { a } > b;", "xay a ", "xby b ");
2308 }
2309
2310 /**
2311 * Test compound filter ID syntax
2312 */
2313 void TransliteratorTest::TestCompoundFilterID(void) {
2314     static const char* DATA[] = {
2315         // Col. 1 = ID or rule set (latter must start with #)
2316
2317         // = columns > 1 are null if expect col. 1 to be illegal =
2318
2319         // Col. 2 = direction, "F..." or "R..."
2320         // Col. 3 = source string
2321         // Col. 4 = exp result
2322
2323         "[abc]; [abc]", NULL, NULL, NULL, // multiple filters
2324         "Latin-Greek; [abc];", NULL, NULL, NULL, // misplaced filter
2325         "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\\u0392c",
2326         "[b]; (Lower); Latin-Greek; Upper(); ([\\u0392])", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2327         "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\\u0392c",
2328         "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\\u0392]);", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2329         NULL,
2330     };
2331
2332     for (int32_t i=0; DATA[i]; i+=4) {
2333         UnicodeString id = CharsToUnicodeString(DATA[i]);
2334         UTransDirection direction = (DATA[i+1] != NULL && DATA[i+1][0] == 'R') ?
2335             UTRANS_REVERSE : UTRANS_FORWARD;
2336         UnicodeString source;
2337         UnicodeString exp;
2338         if (DATA[i+2] != NULL) {
2339             source = CharsToUnicodeString(DATA[i+2]);
2340             exp = CharsToUnicodeString(DATA[i+3]);
2341         }
2342         UBool expOk = (DATA[i+1] != NULL);
2343         Transliterator* t = NULL;
2344         UParseError pe;
2345         UErrorCode ec = U_ZERO_ERROR;
2346         if (id.charAt(0) == 0x23/*#*/) {
2347             t = Transliterator::createFromRules("ID", id, direction, pe, ec);
2348         } else {
2349             t = Transliterator::createInstance(id, direction, pe, ec);
2350         }
2351         UBool ok = (t != NULL && U_SUCCESS(ec));
2352         UnicodeString transID;
2353         if (t!=0) {
2354             transID = t->getID();
2355         }
2356         else {
2357             transID = UnicodeString("NULL", "");
2358         }
2359         if (ok == expOk) {
2360             logln((UnicodeString)"Ok: " + id + " => " + transID + ", " +
2361                   u_errorName(ec));
2362             if (source.length() != 0) {
2363                 expect(*t, source, exp);
2364             }
2365             delete t;
2366         } else {
2367             dataerrln((UnicodeString)"FAIL: " + id + " => " + transID + ", " +
2368                   u_errorName(ec));
2369         }
2370     }
2371 }
2372
2373 /**
2374  * Test new property set syntax
2375  */
2376 void TransliteratorTest::TestPropertySet() {
2377     expect(UNICODE_STRING_SIMPLE("a>A; \\p{Lu}>x; \\p{ANY}>y;"), "abcDEF", "Ayyxxx");
2378     expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9",
2379            "[ a stitch ]\n[ in time ]\r[ saves 9]");
2380 }
2381
2382 /**
2383  * Test various failure points of the new 2.0 engine.
2384  */
2385 void TransliteratorTest::TestNewEngine() {
2386     UParseError pe;
2387     UErrorCode ec = U_ZERO_ERROR;
2388     Transliterator *t = Transliterator::createInstance("Latin-Hiragana", UTRANS_FORWARD, pe, ec);
2389     if (t == 0 || U_FAILURE(ec)) {
2390         dataerrln("FAIL: createInstance Latin-Hiragana - %s", u_errorName(ec));
2391         return;
2392     }
2393     // Katakana should be untouched
2394     expect(*t, CharsToUnicodeString("a\\u3042\\u30A2"),
2395            CharsToUnicodeString("\\u3042\\u3042\\u30A2"));
2396
2397     delete t;
2398
2399 #if 1
2400     // This test will only work if Transliterator.ROLLBACK is
2401     // true.  Otherwise, this test will fail, revealing a
2402     // limitation of global filters in incremental mode.
2403     Transliterator *a =
2404         Transliterator::createFromRules("a_to_A", "a > A;", UTRANS_FORWARD, pe, ec);
2405     Transliterator *A =
2406         Transliterator::createFromRules("A_to_b", "A > b;", UTRANS_FORWARD, pe, ec);
2407     if (U_FAILURE(ec)) {
2408         delete a;
2409         delete A;
2410         return;
2411     }
2412
2413     Transliterator* array[3];
2414     array[0] = a;
2415     array[1] = Transliterator::createInstance("NFD", UTRANS_FORWARD, pe, ec);
2416     array[2] = A;
2417     if (U_FAILURE(ec)) {
2418         errln("FAIL: createInstance NFD");
2419         delete a;
2420         delete A;
2421         delete array[1];
2422         return;
2423     }
2424
2425     t = new CompoundTransliterator(array, 3, new UnicodeSet("[:Ll:]", ec));
2426     if (U_FAILURE(ec)) {
2427         errln("FAIL: UnicodeSet constructor");
2428         delete a;
2429         delete A;
2430         delete array[1];
2431         delete t;
2432         return;
2433     }
2434
2435     expect(*t, "aAaA", "bAbA");
2436
2437     assertTrue("countElements", t->countElements() == 3);
2438     assertEquals("getElement(0)", t->getElement(0, ec).getID(), "a_to_A");
2439     assertEquals("getElement(1)", t->getElement(1, ec).getID(), "NFD");
2440     assertEquals("getElement(2)", t->getElement(2, ec).getID(), "A_to_b");
2441     assertSuccess("getElement", ec);
2442
2443     delete a;
2444     delete A;
2445     delete array[1];
2446     delete t;
2447 #endif
2448
2449     expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;",
2450            "a",
2451            "ax");
2452
2453     UnicodeString gr = CharsToUnicodeString(
2454         "$ddot = \\u0308 ;"
2455         "$lcgvowel = [\\u03b1\\u03b5\\u03b7\\u03b9\\u03bf\\u03c5\\u03c9] ;"
2456         "$rough = \\u0314 ;"
2457         "($lcgvowel+ $ddot?) $rough > h | $1 ;"
2458         "\\u03b1 <> a ;"
2459         "$rough <> h ;");
2460
2461     expect(gr, CharsToUnicodeString("\\u03B1\\u0314"), "ha");
2462 }
2463
2464 /**
2465  * Test quantified segment behavior.  We want:
2466  * ([abc])+ > x $1 x; applied to "cba" produces "xax"
2467  */
2468 void TransliteratorTest::TestQuantifiedSegment(void) {
2469     // The normal case
2470     expect("([abc]+) > x $1 x;", "cba", "xcbax");
2471
2472     // The tricky case; the quantifier is around the segment
2473     expect("([abc])+ > x $1 x;", "cba", "xax");
2474
2475     // Tricky case in reverse direction
2476     expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax");
2477
2478     // Check post-context segment
2479     expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba");
2480
2481     // Test toRule/toPattern for non-quantified segment.
2482     // Careful with spacing here.
2483     UnicodeString r("([a-c]){q} > x $1 x;");
2484     UParseError pe;
2485     UErrorCode ec = U_ZERO_ERROR;
2486     Transliterator* t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
2487     if (U_FAILURE(ec)) {
2488         errln("FAIL: createFromRules");
2489         delete t;
2490         return;
2491     }
2492     UnicodeString rr;
2493     t->toRules(rr, TRUE);
2494     if (r != rr) {
2495         errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
2496     } else {
2497         logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
2498     }
2499     delete t;
2500
2501     // Test toRule/toPattern for quantified segment.
2502     // Careful with spacing here.
2503     r = "([a-c])+{q} > x $1 x;";
2504     t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
2505     if (U_FAILURE(ec)) {
2506         errln("FAIL: createFromRules");
2507         delete t;
2508         return;
2509     }
2510     t->toRules(rr, TRUE);
2511     if (r != rr) {
2512         errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
2513     } else {
2514         logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
2515     }
2516     delete t;
2517 }
2518
2519 //======================================================================
2520 // Ram's tests
2521 //======================================================================
2522 void TransliteratorTest::TestDevanagariLatinRT(){
2523     const int MAX_LEN= 52;
2524     const char* const source[MAX_LEN] = {
2525         "bh\\u0101rata",
2526         "kra",
2527         "k\\u1E63a",
2528         "khra",
2529         "gra",
2530         "\\u1E45ra",
2531         "cra",
2532         "chra",
2533         "j\\u00F1a",
2534         "jhra",
2535         "\\u00F1ra",
2536         "\\u1E6Dya",
2537         "\\u1E6Dhra",
2538         "\\u1E0Dya",
2539       //"r\\u0323ya", // \u095c is not valid in Devanagari
2540         "\\u1E0Dhya",
2541         "\\u1E5Bhra",
2542         "\\u1E47ra",
2543         "tta",
2544         "thra",
2545         "dda",
2546         "dhra",
2547         "nna",
2548         "pra",
2549         "phra",
2550         "bra",
2551         "bhra",
2552         "mra",
2553         "\\u1E49ra",
2554       //"l\\u0331ra",
2555         "yra",
2556         "\\u1E8Fra",
2557       //"l-",
2558         "vra",
2559         "\\u015Bra",
2560         "\\u1E63ra",
2561         "sra",
2562         "hma",
2563         "\\u1E6D\\u1E6Da",
2564         "\\u1E6D\\u1E6Dha",
2565         "\\u1E6Dh\\u1E6Dha",
2566         "\\u1E0D\\u1E0Da",
2567         "\\u1E0D\\u1E0Dha",
2568         "\\u1E6Dya",
2569         "\\u1E6Dhya",
2570         "\\u1E0Dya",
2571         "\\u1E0Dhya",
2572         // Not roundtrippable --
2573         // \\u0939\\u094d\\u094d\\u092E  - hma
2574         // \\u0939\\u094d\\u092E         - hma
2575         // CharsToUnicodeString("hma"),
2576         "hya",
2577         "\\u015Br\\u0325",
2578         "\\u015Bca",
2579         "\\u0115",
2580         "san\\u0304j\\u012Bb s\\u0113nagupta",
2581         "\\u0101nand vaddir\\u0101ju",
2582         "\\u0101",
2583         "a"
2584     };
2585     const char* const expected[MAX_LEN] = {
2586         "\\u092D\\u093E\\u0930\\u0924",   /* bha\\u0304rata */
2587         "\\u0915\\u094D\\u0930",          /* kra         */
2588         "\\u0915\\u094D\\u0937",          /* ks\\u0323a  */
2589         "\\u0916\\u094D\\u0930",          /* khra        */
2590         "\\u0917\\u094D\\u0930",          /* gra         */
2591         "\\u0919\\u094D\\u0930",          /* n\\u0307ra  */
2592         "\\u091A\\u094D\\u0930",          /* cra         */
2593         "\\u091B\\u094D\\u0930",          /* chra        */
2594         "\\u091C\\u094D\\u091E",          /* jn\\u0303a  */
2595         "\\u091D\\u094D\\u0930",          /* jhra        */
2596         "\\u091E\\u094D\\u0930",          /* n\\u0303ra  */
2597         "\\u091F\\u094D\\u092F",          /* t\\u0323ya  */
2598         "\\u0920\\u094D\\u0930",          /* t\\u0323hra */
2599         "\\u0921\\u094D\\u092F",          /* d\\u0323ya  */
2600       //"\\u095C\\u094D\\u092F",        /* r\\u0323ya  */ // \u095c is not valid in Devanagari
2601         "\\u0922\\u094D\\u092F",          /* d\\u0323hya */
2602         "\\u0922\\u093C\\u094D\\u0930",   /* r\\u0323hra */
2603         "\\u0923\\u094D\\u0930",          /* n\\u0323ra  */
2604         "\\u0924\\u094D\\u0924",          /* tta         */
2605         "\\u0925\\u094D\\u0930",          /* thra        */
2606         "\\u0926\\u094D\\u0926",          /* dda         */
2607         "\\u0927\\u094D\\u0930",          /* dhra        */
2608         "\\u0928\\u094D\\u0928",          /* nna         */
2609         "\\u092A\\u094D\\u0930",          /* pra         */
2610         "\\u092B\\u094D\\u0930",          /* phra        */
2611         "\\u092C\\u094D\\u0930",          /* bra         */
2612         "\\u092D\\u094D\\u0930",          /* bhra        */
2613         "\\u092E\\u094D\\u0930",          /* mra         */
2614         "\\u0929\\u094D\\u0930",          /* n\\u0331ra  */
2615       //"\\u0934\\u094D\\u0930",        /* l\\u0331ra  */
2616         "\\u092F\\u094D\\u0930",          /* yra         */
2617         "\\u092F\\u093C\\u094D\\u0930",   /* y\\u0307ra  */
2618       //"l-",
2619         "\\u0935\\u094D\\u0930",          /* vra         */
2620         "\\u0936\\u094D\\u0930",          /* s\\u0301ra  */
2621         "\\u0937\\u094D\\u0930",          /* s\\u0323ra  */
2622         "\\u0938\\u094D\\u0930",          /* sra         */
2623         "\\u0939\\u094d\\u092E",          /* hma         */
2624         "\\u091F\\u094D\\u091F",          /* t\\u0323t\\u0323a  */
2625         "\\u091F\\u094D\\u0920",          /* t\\u0323t\\u0323ha */
2626         "\\u0920\\u094D\\u0920",          /* t\\u0323ht\\u0323ha*/
2627         "\\u0921\\u094D\\u0921",          /* d\\u0323d\\u0323a  */
2628         "\\u0921\\u094D\\u0922",          /* d\\u0323d\\u0323ha */
2629         "\\u091F\\u094D\\u092F",          /* t\\u0323ya  */
2630         "\\u0920\\u094D\\u092F",          /* t\\u0323hya */
2631         "\\u0921\\u094D\\u092F",          /* d\\u0323ya  */
2632         "\\u0922\\u094D\\u092F",          /* d\\u0323hya */
2633      // "hma",                         /* hma         */
2634         "\\u0939\\u094D\\u092F",          /* hya         */
2635         "\\u0936\\u0943",                 /* s\\u0301r\\u0325a  */
2636         "\\u0936\\u094D\\u091A",          /* s\\u0301ca  */
2637         "\\u090d",                        /* e\\u0306    */
2638         "\\u0938\\u0902\\u091C\\u0940\\u092C\\u094D \\u0938\\u0947\\u0928\\u0917\\u0941\\u092A\\u094D\\u0924",
2639         "\\u0906\\u0928\\u0902\\u0926\\u094D \\u0935\\u0926\\u094D\\u0926\\u093F\\u0930\\u093E\\u091C\\u0941",
2640         "\\u0906",
2641         "\\u0905",
2642     };
2643     UErrorCode status = U_ZERO_ERROR;
2644     UParseError parseError;
2645     UnicodeString message;
2646     Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2647     Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2648     if(U_FAILURE(status)){
2649         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2650         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2651         return;
2652     }
2653     UnicodeString gotResult;
2654     for(int i= 0; i<MAX_LEN; i++){
2655         gotResult = source[i];
2656         expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2657         expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2658     }
2659     delete latinToDev;
2660     delete devToLatin;
2661 }
2662
2663 void TransliteratorTest::TestTeluguLatinRT(){
2664     const int MAX_LEN=10;
2665     const char* const source[MAX_LEN] = {
2666         "raghur\\u0101m vi\\u015Bvan\\u0101dha",                         /* Raghuram Viswanadha    */
2667         "\\u0101nand vaddir\\u0101ju",                                   /* Anand Vaddiraju        */
2668         "r\\u0101j\\u012Bv ka\\u015Barab\\u0101da",                      /* Rajeev Kasarabada      */
2669         "san\\u0304j\\u012Bv ka\\u015Barab\\u0101da",                    /* sanjeev kasarabada     */
2670         "san\\u0304j\\u012Bb sen'gupta",                                 /* sanjib sengupata       */
2671         "amar\\u0113ndra hanum\\u0101nula",                              /* Amarendra hanumanula   */
2672         "ravi kum\\u0101r vi\\u015Bvan\\u0101dha",                       /* Ravi Kumar Viswanadha  */
2673         "\\u0101ditya kandr\\u0113gula",                                 /* Aditya Kandregula      */
2674         "\\u015Br\\u012Bdhar ka\\u1E47\\u1E6Dama\\u015Be\\u1E6D\\u1E6Di",/* Shridhar Kantamsetty   */
2675         "m\\u0101dhav de\\u015Be\\u1E6D\\u1E6Di"                         /* Madhav Desetty         */
2676     };
2677
2678     const char* const expected[MAX_LEN] = {
2679         "\\u0c30\\u0c18\\u0c41\\u0c30\\u0c3e\\u0c2e\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2680         "\\u0c06\\u0c28\\u0c02\\u0c26\\u0c4d \\u0C35\\u0C26\\u0C4D\\u0C26\\u0C3F\\u0C30\\u0C3E\\u0C1C\\u0C41",
2681         "\\u0c30\\u0c3e\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2682         "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2683         "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c2c\\u0c4d \\u0c38\\u0c46\\u0c28\\u0c4d\\u0c17\\u0c41\\u0c2a\\u0c4d\\u0c24",
2684         "\\u0c05\\u0c2e\\u0c30\\u0c47\\u0c02\\u0c26\\u0c4d\\u0c30 \\u0c39\\u0c28\\u0c41\\u0c2e\\u0c3e\\u0c28\\u0c41\\u0c32",
2685         "\\u0c30\\u0c35\\u0c3f \\u0c15\\u0c41\\u0c2e\\u0c3e\\u0c30\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2686         "\\u0c06\\u0c26\\u0c3f\\u0c24\\u0c4d\\u0c2f \\u0C15\\u0C02\\u0C26\\u0C4D\\u0C30\\u0C47\\u0C17\\u0C41\\u0c32",
2687         "\\u0c36\\u0c4d\\u0c30\\u0c40\\u0C27\\u0C30\\u0C4D \\u0c15\\u0c02\\u0c1f\\u0c2e\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2688         "\\u0c2e\\u0c3e\\u0c27\\u0c35\\u0c4d \\u0c26\\u0c46\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2689     };
2690
2691     UErrorCode status = U_ZERO_ERROR;
2692     UParseError parseError;
2693     UnicodeString message;
2694     Transliterator* latinToDev=Transliterator::createInstance("Latin-Telugu", UTRANS_FORWARD, parseError, status);
2695     Transliterator* devToLatin=Transliterator::createInstance("Telugu-Latin", UTRANS_FORWARD, parseError, status);
2696     if(U_FAILURE(status)){
2697         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2698         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2699         return;
2700     }
2701     UnicodeString gotResult;
2702     for(int i= 0; i<MAX_LEN; i++){
2703         gotResult = source[i];
2704         expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2705         expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2706     }
2707     delete latinToDev;
2708     delete devToLatin;
2709 }
2710
2711 void TransliteratorTest::TestSanskritLatinRT(){
2712     const int MAX_LEN =16;
2713     const char* const source[MAX_LEN] = {
2714         "rmk\\u1E63\\u0113t",
2715         "\\u015Br\\u012Bmad",
2716         "bhagavadg\\u012Bt\\u0101",
2717         "adhy\\u0101ya",
2718         "arjuna",
2719         "vi\\u1E63\\u0101da",
2720         "y\\u014Dga",
2721         "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2722         "uv\\u0101cr\\u0325",
2723         "dharmak\\u1E63\\u0113tr\\u0113",
2724         "kuruk\\u1E63\\u0113tr\\u0113",
2725         "samav\\u0113t\\u0101",
2726         "yuyutsava\\u1E25",
2727         "m\\u0101mak\\u0101\\u1E25",
2728     // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2729         "kimakurvata",
2730         "san\\u0304java",
2731     };
2732     const char* const expected[MAX_LEN] = {
2733         "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2734         "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2735         "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2736         "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2737         "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2738         "\\u0935\\u093f\\u0937\\u093e\\u0926",
2739         "\\u092f\\u094b\\u0917",
2740         "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2741         "\\u0909\\u0935\\u093E\\u091A\\u0943",
2742         "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2743         "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2744         "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2745         "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2746         "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2747     //"\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2748         "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2749         "\\u0938\\u0902\\u091c\\u0935",
2750     };
2751     UErrorCode status = U_ZERO_ERROR;
2752     UParseError parseError;
2753     UnicodeString message;
2754     Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2755     Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2756     if(U_FAILURE(status)){
2757         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2758         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2759         return;
2760     }
2761     UnicodeString gotResult;
2762     for(int i= 0; i<MAX_LEN; i++){
2763         gotResult = source[i];
2764         expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2765         expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2766     }
2767     delete latinToDev;
2768     delete devToLatin;
2769 }
2770
2771
2772 void TransliteratorTest::TestCompoundLatinRT(){
2773     const char* const source[] = {
2774         "rmk\\u1E63\\u0113t",
2775         "\\u015Br\\u012Bmad",
2776         "bhagavadg\\u012Bt\\u0101",
2777         "adhy\\u0101ya",
2778         "arjuna",
2779         "vi\\u1E63\\u0101da",
2780         "y\\u014Dga",
2781         "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2782         "uv\\u0101cr\\u0325",
2783         "dharmak\\u1E63\\u0113tr\\u0113",
2784         "kuruk\\u1E63\\u0113tr\\u0113",
2785         "samav\\u0113t\\u0101",
2786         "yuyutsava\\u1E25",
2787         "m\\u0101mak\\u0101\\u1E25",
2788      // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2789         "kimakurvata",
2790         "san\\u0304java"
2791     };
2792     const int MAX_LEN = UPRV_LENGTHOF(source);
2793     const char* const expected[MAX_LEN] = {
2794         "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2795         "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2796         "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2797         "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2798         "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2799         "\\u0935\\u093f\\u0937\\u093e\\u0926",
2800         "\\u092f\\u094b\\u0917",
2801         "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2802         "\\u0909\\u0935\\u093E\\u091A\\u0943",
2803         "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2804         "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2805         "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2806         "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2807         "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2808     //  "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2809         "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2810         "\\u0938\\u0902\\u091c\\u0935"
2811     };
2812     if(MAX_LEN != UPRV_LENGTHOF(expected)) {
2813         errln("error in TestCompoundLatinRT: source[] and expected[] have different lengths!");
2814         return;
2815     }
2816
2817     UErrorCode status = U_ZERO_ERROR;
2818     UParseError parseError;
2819     UnicodeString message;
2820     Transliterator* devToLatinToDev  =Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2821     Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2822     Transliterator* devToTelToDev    =Transliterator::createInstance("Devanagari-Telugu;Telugu-Devanagari", UTRANS_FORWARD, parseError, status);
2823     Transliterator* latinToTelToLatin=Transliterator::createInstance("Latin-Telugu;Telugu-Latin", UTRANS_FORWARD, parseError, status);
2824
2825     if(U_FAILURE(status)){
2826         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2827         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2828         return;
2829     }
2830     UnicodeString gotResult;
2831     for(int i= 0; i<MAX_LEN; i++){
2832         gotResult = source[i];
2833         expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2834         expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2835         expect(*latinToTelToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2836
2837     }
2838     delete(latinToDevToLatin);
2839     delete(devToLatinToDev);
2840     delete(devToTelToDev);
2841     delete(latinToTelToLatin);
2842 }
2843
2844 /**
2845  * Test Gurmukhi-Devanagari Tippi and Bindi
2846  */
2847 void TransliteratorTest::TestGurmukhiDevanagari(){
2848     // the rule says:
2849     // (\u0902) (when preceded by vowel)      --->  (\u0A02)
2850     // (\u0902) (when preceded by consonant)  --->  (\u0A70)
2851     UErrorCode status = U_ZERO_ERROR;
2852     UnicodeSet vowel(UnicodeString("[\\u0905-\\u090A \\u090F\\u0910\\u0913\\u0914 \\u093e-\\u0942\\u0947\\u0948\\u094B\\u094C\\u094D]", -1, US_INV).unescape(), status);
2853     UnicodeSet non_vowel(UnicodeString("[\\u0915-\\u0928\\u092A-\\u0930]", -1, US_INV).unescape(), status);
2854     UParseError parseError;
2855
2856     UnicodeSetIterator vIter(vowel);
2857     UnicodeSetIterator nvIter(non_vowel);
2858     Transliterator* trans = Transliterator::createInstance("Devanagari-Gurmukhi",UTRANS_FORWARD, parseError, status);
2859     if(U_FAILURE(status)) {
2860       dataerrln("Error creating transliterator %s", u_errorName(status));
2861       delete trans;
2862       return;
2863     }
2864     UnicodeString src (" \\u0902", -1, US_INV);
2865     UnicodeString expected(" \\u0A02", -1, US_INV);
2866     src = src.unescape();
2867     expected= expected.unescape();
2868
2869     while(vIter.next()){
2870         src.setCharAt(0,(UChar) vIter.getCodepoint());
2871         expected.setCharAt(0,(UChar) (vIter.getCodepoint()+0x0100));
2872         expect(*trans,src,expected);
2873     }
2874
2875     expected.setCharAt(1,0x0A70);
2876     while(nvIter.next()){
2877         //src.setCharAt(0,(char) nvIter.codepoint);
2878         src.setCharAt(0,(UChar)nvIter.getCodepoint());
2879         expected.setCharAt(0,(UChar) (nvIter.getCodepoint()+0x0100));
2880         expect(*trans,src,expected);
2881     }
2882     delete trans;
2883 }
2884 /**
2885  * Test instantiation from a locale.
2886  */
2887 void TransliteratorTest::TestLocaleInstantiation(void) {
2888     UParseError pe;
2889     UErrorCode ec = U_ZERO_ERROR;
2890     Transliterator *t = Transliterator::createInstance("ru_RU-Latin", UTRANS_FORWARD, pe, ec);
2891     if (U_FAILURE(ec)) {
2892         dataerrln("FAIL: createInstance(ru_RU-Latin) - %s", u_errorName(ec));
2893         delete t;
2894         return;
2895     }
2896     expect(*t, CharsToUnicodeString("\\u0430"), "a");
2897     delete t;
2898
2899     t = Transliterator::createInstance("en-el", UTRANS_FORWARD, pe, ec);
2900     if (U_FAILURE(ec)) {
2901         errln("FAIL: createInstance(en-el)");
2902         delete t;
2903         return;
2904     }
2905     expect(*t, "a", CharsToUnicodeString("\\u03B1"));
2906     delete t;
2907 }
2908
2909 /**
2910  * Test title case handling of accent (should ignore accents)
2911  */
2912 void TransliteratorTest::TestTitleAccents(void) {
2913     UParseError pe;
2914     UErrorCode ec = U_ZERO_ERROR;
2915     Transliterator *t = Transliterator::createInstance("Title", UTRANS_FORWARD, pe, ec);
2916     if (U_FAILURE(ec)) {
2917         errln("FAIL: createInstance(Title)");
2918         delete t;
2919         return;
2920     }
2921     expect(*t, CharsToUnicodeString("a\\u0300b can't abe"), CharsToUnicodeString("A\\u0300b Can't Abe"));
2922     delete t;
2923 }
2924
2925 /**
2926  * Basic test of a locale resource based rule.
2927  */
2928 void TransliteratorTest::TestLocaleResource() {
2929     const char* DATA[] = {
2930         // id                    from               to
2931         //"Latin-Greek/UNGEGN",    "b",               "\\u03bc\\u03c0",
2932         "Latin-el",              "b",               "\\u03bc\\u03c0",
2933         "Latin-Greek",           "b",               "\\u03B2",
2934         "Greek-Latin/UNGEGN",    "\\u03B2",         "v",
2935         "el-Latin",              "\\u03B2",         "v",
2936         "Greek-Latin",           "\\u03B2",         "b",
2937     };
2938     const int32_t DATA_length = UPRV_LENGTHOF(DATA);
2939     for (int32_t i=0; i<DATA_length; i+=3) {
2940         UParseError pe;
2941         UErrorCode ec = U_ZERO_ERROR;
2942         Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, pe, ec);
2943         if (U_FAILURE(ec)) {
2944             dataerrln((UnicodeString)"FAIL: createInstance(" + DATA[i] + ") - " + u_errorName(ec));
2945             delete t;
2946             continue;
2947         }
2948         expect(*t, CharsToUnicodeString(DATA[i+1]),
2949                CharsToUnicodeString(DATA[i+2]));
2950         delete t;
2951     }
2952 }
2953
2954 /**
2955  * Make sure parse errors reference the right line.
2956  */
2957 void TransliteratorTest::TestParseError() {
2958     static const char* rule =
2959         "a > b;\n"
2960         "# more stuff\n"
2961         "d << b;";
2962     UErrorCode ec = U_ZERO_ERROR;
2963     UParseError pe;
2964     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
2965     delete t;
2966     if (U_FAILURE(ec)) {
2967         UnicodeString err(pe.preContext);
2968         err.append((UChar)124/*|*/).append(pe.postContext);
2969         if (err.indexOf("d << b") >= 0) {
2970             logln("Ok: " + err);
2971         } else {
2972             errln("FAIL: " + err);
2973         }
2974     }
2975     else {
2976         errln("FAIL: no syntax error");
2977     }
2978     static const char* maskingRule =
2979         "a>x;\n"
2980         "# more stuff\n"
2981         "ab>y;";
2982     ec = U_ZERO_ERROR;
2983     delete Transliterator::createFromRules("ID", maskingRule, UTRANS_FORWARD, pe, ec);
2984     if (ec != U_RULE_MASK_ERROR) {
2985         errln("FAIL: returned %s instead of U_RULE_MASK_ERROR", u_errorName(ec));
2986     }
2987     else if (UnicodeString("a > x;") != UnicodeString(pe.preContext)) {
2988         errln("FAIL: did not get expected precontext");
2989     }
2990     else if (UnicodeString("ab > y;") != UnicodeString(pe.postContext)) {
2991         errln("FAIL: did not get expected postcontext");
2992     }
2993 }
2994
2995 /**
2996  * Make sure sets on output are disallowed.
2997  */
2998 void TransliteratorTest::TestOutputSet() {
2999     UnicodeString rule = "$set = [a-cm-n]; b > $set;";
3000     UErrorCode ec = U_ZERO_ERROR;
3001     UParseError pe;
3002     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3003     delete t;
3004     if (U_FAILURE(ec)) {
3005         UnicodeString err(pe.preContext);
3006         err.append((UChar)124/*|*/).append(pe.postContext);
3007         logln("Ok: " + err);
3008         return;
3009     }
3010     errln("FAIL: No syntax error");
3011 }
3012
3013 /**
3014  * Test the use variable range pragma, making sure that use of
3015  * variable range characters is detected and flagged as an error.
3016  */
3017 void TransliteratorTest::TestVariableRange() {
3018     UnicodeString rule = "use variable range 0x70 0x72; a > A; b > B; q > Q;";
3019     UErrorCode ec = U_ZERO_ERROR;
3020     UParseError pe;
3021     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3022     delete t;
3023     if (U_FAILURE(ec)) {
3024         UnicodeString err(pe.preContext);
3025         err.append((UChar)124/*|*/).append(pe.postContext);
3026         logln("Ok: " + err);
3027         return;
3028     }
3029     errln("FAIL: No syntax error");
3030 }
3031
3032 /**
3033  * Test invalid post context error handling
3034  */
3035 void TransliteratorTest::TestInvalidPostContext() {
3036     UnicodeString rule = "a}b{c>d;";
3037     UErrorCode ec = U_ZERO_ERROR;
3038     UParseError pe;
3039     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3040     delete t;
3041     if (U_FAILURE(ec)) {
3042         UnicodeString err(pe.preContext);
3043         err.append((UChar)124/*|*/).append(pe.postContext);
3044         if (err.indexOf("a}b{c") >= 0) {
3045             logln("Ok: " + err);
3046         } else {
3047             errln("FAIL: " + err);
3048         }
3049         return;
3050     }
3051     errln("FAIL: No syntax error");
3052 }
3053
3054 /**
3055  * Test ID form variants
3056  */
3057 void TransliteratorTest::TestIDForms() {
3058     const char* DATA[] = {
3059         "NFC", NULL, "NFD",
3060         "nfd", NULL, "NFC", // make sure case is ignored
3061         "Any-NFKD", NULL, "Any-NFKC",
3062         "Null", NULL, "Null",
3063         "-nfkc", "nfkc", "NFKD",
3064         "-nfkc/", "nfkc", "NFKD",
3065         "Latin-Greek/UNGEGN", NULL, "Greek-Latin/UNGEGN",
3066         "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN",
3067         "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali",
3068         "Source-", NULL, NULL,
3069         "Source/Variant-", NULL, NULL,
3070         "Source-/Variant", NULL, NULL,
3071         "/Variant", NULL, NULL,
3072         "/Variant-", NULL, NULL,
3073         "-/Variant", NULL, NULL,
3074         "-/", NULL, NULL,
3075         "-", NULL, NULL,
3076         "/", NULL, NULL,
3077     };
3078     const int32_t DATA_length = UPRV_LENGTHOF(DATA);
3079
3080     for (int32_t i=0; i<DATA_length; i+=3) {
3081         const char* ID = DATA[i];
3082         const char* expID = DATA[i+1];
3083         const char* expInvID = DATA[i+2];
3084         UBool expValid = (expInvID != NULL);
3085         if (expID == NULL) {
3086             expID = ID;
3087         }
3088         UParseError pe;
3089         UErrorCode ec = U_ZERO_ERROR;
3090         Transliterator *t =
3091             Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
3092         if (U_FAILURE(ec)) {
3093             if (!expValid) {
3094                 logln((UnicodeString)"Ok: getInstance(" + ID +") => " + u_errorName(ec));
3095             } else {
3096                 dataerrln((UnicodeString)"FAIL: Couldn't create " + ID + " - " + u_errorName(ec));
3097             }
3098             delete t;
3099             continue;
3100         }
3101         Transliterator *u = t->createInverse(ec);
3102         if (U_FAILURE(ec)) {
3103             errln((UnicodeString)"FAIL: Couldn't create inverse of " + ID);
3104             delete t;
3105             delete u;
3106             continue;
3107         }
3108         if (t->getID() == expID &&
3109             u->getID() == expInvID) {
3110             logln((UnicodeString)"Ok: " + ID + ".getInverse() => " + expInvID);
3111         } else {
3112             errln((UnicodeString)"FAIL: getInstance(" + ID + ") => " +
3113                   t->getID() + " x getInverse() => " + u->getID() +
3114                   ", expected " + expInvID);
3115         }
3116         delete t;
3117         delete u;
3118     }
3119 }
3120
3121 static const UChar SPACE[]   = {32,0};
3122 static const UChar NEWLINE[] = {10,0};
3123 static const UChar RETURN[]  = {13,0};
3124 static const UChar EMPTY[]   = {0};
3125
3126 void TransliteratorTest::checkRules(const UnicodeString& label, Transliterator& t2,
3127                                     const UnicodeString& testRulesForward) {
3128     UnicodeString rules2; t2.toRules(rules2, TRUE);
3129     //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
3130     rules2.findAndReplace(SPACE, EMPTY);
3131     rules2.findAndReplace(NEWLINE, EMPTY);
3132     rules2.findAndReplace(RETURN, EMPTY);
3133
3134     UnicodeString testRules(testRulesForward); testRules.findAndReplace(SPACE, EMPTY);
3135
3136     if (rules2 != testRules) {
3137         errln(label);
3138         logln((UnicodeString)"GENERATED RULES: " + rules2);
3139         logln((UnicodeString)"SHOULD BE:       " + testRulesForward);
3140     }
3141 }
3142
3143 /**
3144  * Mark's toRules test.
3145  */
3146 void TransliteratorTest::TestToRulesMark() {
3147     const char* testRules =
3148         "::[[:Latin:][:Mark:]];"
3149         "::NFKD (NFC);"
3150         "::Lower (Lower);"
3151         "a <> \\u03B1;" // alpha
3152         "::NFKC (NFD);"
3153         "::Upper (Lower);"
3154         "::Lower ();"
3155         "::([[:Greek:][:Mark:]]);"
3156         ;
3157     const char* testRulesForward =
3158         "::[[:Latin:][:Mark:]];"
3159         "::NFKD(NFC);"
3160         "::Lower(Lower);"
3161         "a > \\u03B1;"
3162         "::NFKC(NFD);"
3163         "::Upper (Lower);"
3164         "::Lower ();"
3165         ;
3166     const char* testRulesBackward =
3167         "::[[:Greek:][:Mark:]];"
3168         "::Lower (Upper);"
3169         "::NFD(NFKC);"
3170         "\\u03B1 > a;"
3171         "::Lower(Lower);"
3172         "::NFC(NFKD);"
3173         ;
3174     UnicodeString source = CharsToUnicodeString("\\u00E1"); // a-acute
3175     UnicodeString target = CharsToUnicodeString("\\u03AC"); // alpha-acute
3176
3177     UParseError pe;
3178     UErrorCode ec = U_ZERO_ERROR;
3179     Transliterator *t2 = Transliterator::createFromRules("source-target", UnicodeString(testRules, -1, US_INV), UTRANS_FORWARD, pe, ec);
3180     Transliterator *t3 = Transliterator::createFromRules("target-source", UnicodeString(testRules, -1, US_INV), UTRANS_REVERSE, pe, ec);
3181
3182     if (U_FAILURE(ec)) {
3183         delete t2;
3184         delete t3;
3185         dataerrln((UnicodeString)"FAIL: createFromRules => " + u_errorName(ec));
3186         return;
3187     }
3188
3189     expect(*t2, source, target);
3190     expect(*t3, target, source);
3191
3192     checkRules("Failed toRules FORWARD", *t2, UnicodeString(testRulesForward, -1, US_INV));
3193     checkRules("Failed toRules BACKWARD", *t3, UnicodeString(testRulesBackward, -1, US_INV));
3194
3195     delete t2;
3196     delete t3;
3197 }
3198
3199 /**
3200  * Test Escape and Unescape transliterators.
3201  */
3202 void TransliteratorTest::TestEscape() {
3203     UParseError pe;
3204     UErrorCode ec;
3205     Transliterator *t;
3206
3207     ec = U_ZERO_ERROR;
3208     t = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, pe, ec);
3209     if (U_FAILURE(ec)) {
3210         errln((UnicodeString)"FAIL: createInstance");
3211     } else {
3212         expect(*t,
3213                UNICODE_STRING_SIMPLE("\\x{40}\\U00000031&#x32;&#81;"),
3214                "@12Q");
3215     }
3216     delete t;
3217
3218     ec = U_ZERO_ERROR;
3219     t = Transliterator::createInstance("Any-Hex/C", UTRANS_FORWARD, pe, ec);
3220     if (U_FAILURE(ec)) {
3221         errln((UnicodeString)"FAIL: createInstance");
3222     } else {
3223         expect(*t,
3224                CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3225                UNICODE_STRING_SIMPLE("\\u0041\\U0010BEEF\\uFEED"));
3226     }
3227     delete t;
3228
3229     ec = U_ZERO_ERROR;
3230     t = Transliterator::createInstance("Any-Hex/Java", UTRANS_FORWARD, pe, ec);
3231     if (U_FAILURE(ec)) {
3232         errln((UnicodeString)"FAIL: createInstance");
3233     } else {
3234         expect(*t,
3235                CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3236                UNICODE_STRING_SIMPLE("\\u0041\\uDBEF\\uDEEF\\uFEED"));
3237     }
3238     delete t;
3239
3240     ec = U_ZERO_ERROR;
3241     t = Transliterator::createInstance("Any-Hex/Perl", UTRANS_FORWARD, pe, ec);
3242     if (U_FAILURE(ec)) {
3243         errln((UnicodeString)"FAIL: createInstance");
3244     } else {
3245         expect(*t,
3246                CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3247                UNICODE_STRING_SIMPLE("\\x{41}\\x{10BEEF}\\x{FEED}"));
3248     }
3249     delete t;
3250 }
3251
3252
3253 void TransliteratorTest::TestAnchorMasking(){
3254     UnicodeString rule ("^a > Q; a > q;");
3255     UErrorCode status= U_ZERO_ERROR;
3256     UParseError parseError;
3257
3258     Transliterator* t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD,parseError,status);
3259     if(U_FAILURE(status)){
3260         errln(UnicodeString("FAIL: ") + "ID" +
3261               ".createFromRules() => bad rules" +
3262               /*", parse error " + parseError.code +*/
3263               ", line " + parseError.line +
3264               ", offset " + parseError.offset +
3265               ", context " + prettify(parseError.preContext, TRUE) +
3266               ", rules: " + prettify(rule, TRUE));
3267     }
3268     delete t;
3269 }
3270
3271 /**
3272  * Make sure display names of variants look reasonable.
3273  */
3274 void TransliteratorTest::TestDisplayName() {
3275 #if UCONFIG_NO_FORMATTING
3276     logln("Skipping, UCONFIG_NO_FORMATTING is set\n");
3277     return;
3278 #else
3279     static const char* DATA[] = {
3280         // ID, forward name, reverse name
3281         // Update the text as necessary -- the important thing is
3282         // not the text itself, but how various cases are handled.
3283
3284         // Basic test
3285         "Any-Hex", "Any to Hex Escape", "Hex Escape to Any",
3286
3287         // Variants
3288         "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl",
3289
3290         // Target-only IDs
3291         "NFC", "Any to NFC", "Any to NFD",
3292     };
3293
3294     int32_t DATA_length = UPRV_LENGTHOF(DATA);
3295
3296     Locale US("en", "US");
3297
3298     for (int32_t i=0; i<DATA_length; i+=3) {
3299         UnicodeString name;
3300         Transliterator::getDisplayName(DATA[i], US, name);
3301         if (name != DATA[i+1]) {
3302             dataerrln((UnicodeString)"FAIL: " + DATA[i] + ".getDisplayName() => " +
3303                   name + ", expected " + DATA[i+1]);
3304         } else {
3305             logln((UnicodeString)"Ok: " + DATA[i] + ".getDisplayName() => " + name);
3306         }
3307         UErrorCode ec = U_ZERO_ERROR;
3308         UParseError pe;
3309         Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_REVERSE, pe, ec);
3310         if (U_FAILURE(ec)) {
3311             delete t;
3312             dataerrln("FAIL: createInstance failed - %s", u_errorName(ec));
3313             continue;
3314         }
3315         name = Transliterator::getDisplayName(t->getID(), US, name);
3316         if (name != DATA[i+2]) {
3317             dataerrln((UnicodeString)"FAIL: " + t->getID() + ".getDisplayName() => " +
3318                   name + ", expected " + DATA[i+2]);
3319         } else {
3320             logln((UnicodeString)"Ok: " + t->getID() + ".getDisplayName() => " + name);
3321         }
3322         delete t;
3323     }
3324 #endif
3325 }
3326
3327 void TransliteratorTest::TestSpecialCases(void) {
3328     const UnicodeString registerRules[] = {
3329         "Any-Dev1", "x > X; y > Y;",
3330         "Any-Dev2", "XY > Z",
3331         "Greek-Latin/FAKE",
3332             CharsToUnicodeString
3333             ("[^[:L:][:M:]] { \\u03bc\\u03c0 > b ; \\u03bc\\u03c0 } [^[:L:][:M:]] > b ; [^[:L:][:M:]] { [\\u039c\\u03bc][\\u03a0\\u03c0] > B ; [\\u039c\\u03bc][\\u03a0\\u03c0] } [^[:L:][:M:]] > B ;"),
3334         "" // END MARKER
3335     };
3336
3337     const UnicodeString testCases[] = {
3338         // NORMALIZATION
3339         // should add more test cases
3340         "NFD" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3341         "NFC" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3342         "NFKD", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3343         "NFKC", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3344
3345         // mp -> b BUG
3346         "Greek-Latin/UNGEGN", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3347         "Greek-Latin/FAKE", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3348
3349         // check for devanagari bug
3350         "nfd;Dev1;Dev2;nfc", "xy", "Z",
3351
3352         // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE
3353         "Title", CharsToUnicodeString("ab'cD ffi\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3354                  CharsToUnicodeString("Ab'cd Ffi\\u0131ii\\u0307 \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3355
3356         //TODO: enable this test once Titlecase works right
3357         /*
3358         "Title", CharsToUnicodeString("\\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3359                  CharsToUnicodeString("Ffi\\u0131ii \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3360                  */
3361         "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3362                  CharsToUnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 ") + DESERET_DEE + DESERET_DEE,
3363         "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3364                  CharsToUnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 ") + DESERET_dee + DESERET_dee,
3365
3366         "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
3367         "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
3368
3369          // FORMS OF S
3370         "Greek-Latin/UNGEGN",  CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3371                                CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3372         "Latin-Greek/UNGEGN",  CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3373                                CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3") ,
3374         "Greek-Latin",  CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3375                         CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3376         "Latin-Greek",  CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3377                         CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3378         // Tatiana bug
3379         // Upper: TAT\\u02B9\\u00C2NA
3380         // Lower: tat\\u02B9\\u00E2na
3381         // Title: Tat\\u02B9\\u00E2na
3382         "Upper", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3383                  CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3384         "Lower", CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3385                  CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3386         "Title", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3387                  CharsToUnicodeString("Tat\\u02B9\\u00E2na"),
3388
3389         "" // END MARKER
3390     };
3391
3392     UParseError pos;
3393     int32_t i;
3394     for (i = 0; registerRules[i].length()!=0; i+=2) {
3395         UErrorCode status = U_ZERO_ERROR;
3396
3397         Transliterator *t = Transliterator::createFromRules(registerRules[0+i],
3398             registerRules[i+1], UTRANS_FORWARD, pos, status);
3399         if (U_FAILURE(status)) {
3400             dataerrln("Fails: Unable to create the transliterator from rules. - %s", u_errorName(status));
3401         } else {
3402             Transliterator::registerInstance(t);
3403         }
3404     }
3405     for (i = 0; testCases[i].length()!=0; i+=3) {
3406         UErrorCode ec = U_ZERO_ERROR;
3407         UParseError pe;
3408         const UnicodeString& name = testCases[i];
3409         Transliterator *t = Transliterator::createInstance(name, UTRANS_FORWARD, pe, ec);
3410         if (U_FAILURE(ec)) {
3411             dataerrln((UnicodeString)"FAIL: Couldn't create " + name + " - " + u_errorName(ec));
3412             delete t;
3413             continue;
3414         }
3415         const UnicodeString& id = t->getID();
3416         const UnicodeString& source = testCases[i+1];
3417         UnicodeString target;
3418
3419         // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe)
3420
3421         if (testCases[i+2].length() > 0) {
3422             target = testCases[i+2];
3423         } else if (0==id.caseCompare("NFD", U_FOLD_CASE_DEFAULT)) {
3424             Normalizer::normalize(source, UNORM_NFD, 0, target, ec);
3425         } else if (0==id.caseCompare("NFC", U_FOLD_CASE_DEFAULT)) {
3426             Normalizer::normalize(source, UNORM_NFC, 0, target, ec);
3427         } else if (0==id.caseCompare("NFKD", U_FOLD_CASE_DEFAULT)) {
3428             Normalizer::normalize(source, UNORM_NFKD, 0, target, ec);
3429         } else if (0==id.caseCompare("NFKC", U_FOLD_CASE_DEFAULT)) {
3430             Normalizer::normalize(source, UNORM_NFKC, 0, target, ec);
3431         } else if (0==id.caseCompare("Lower", U_FOLD_CASE_DEFAULT)) {
3432             target = source;
3433             target.toLower(Locale::getUS());
3434         } else if (0==id.caseCompare("Upper", U_FOLD_CASE_DEFAULT)) {
3435             target = source;
3436             target.toUpper(Locale::getUS());
3437         }
3438         if (U_FAILURE(ec)) {
3439             errln((UnicodeString)"FAIL: Internal error normalizing " + source);
3440             continue;
3441         }
3442
3443         expect(*t, source, target);
3444         delete t;
3445     }
3446     for (i = 0; registerRules[i].length()!=0; i+=2) {
3447         Transliterator::unregister(registerRules[i]);
3448     }
3449 }
3450
3451 char* Char32ToEscapedChars(UChar32 ch, char* buffer) {
3452     if (ch <= 0xFFFF) {
3453         sprintf(buffer, "\\u%04x", (int)ch);
3454     } else {
3455         sprintf(buffer, "\\U%08x", (int)ch);
3456     }
3457     return buffer;
3458 }
3459
3460 void TransliteratorTest::TestSurrogateCasing (void) {
3461     // check that casing handles surrogates
3462     // titlecase is currently defective
3463     char buffer[20];
3464     UChar buffer2[20];
3465     UChar32 dee;
3466     U16_GET(DESERET_dee,0, 0, DESERET_dee.length(), dee);
3467     UnicodeString DEE(u_totitle(dee));
3468     if (DEE != DESERET_DEE) {
3469         err("Fails titlecase of surrogates");
3470         err(Char32ToEscapedChars(dee, buffer));
3471         err(", ");
3472         errln(Char32ToEscapedChars(DEE.char32At(0), buffer));
3473     }
3474
3475     UnicodeString deeDEETest=DESERET_dee + DESERET_DEE;
3476     UnicodeString deedeeTest = DESERET_dee + DESERET_dee;
3477     UnicodeString DEEDEETest = DESERET_DEE + DESERET_DEE;
3478     UErrorCode status= U_ZERO_ERROR;
3479
3480     u_strToUpper(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
3481     if (U_FAILURE(status) || (UnicodeString(buffer2)!= DEEDEETest)) {
3482         errln("Fails: Can't uppercase surrogates.");
3483     }
3484
3485     status= U_ZERO_ERROR;
3486     u_strToLower(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
3487     if (U_FAILURE(status) || (UnicodeString(buffer2)!= deedeeTest)) {
3488         errln("Fails: Can't lowercase surrogates.");
3489     }
3490 }
3491
3492 static void _trans(Transliterator& t, const UnicodeString& src,
3493                    UnicodeString& result) {
3494     result = src;
3495     t.transliterate(result);
3496 }
3497
3498 static void _trans(const UnicodeString& id, const UnicodeString& src,
3499                    UnicodeString& result, UErrorCode ec) {
3500     UParseError pe;
3501     Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
3502     if (U_SUCCESS(ec)) {
3503         _trans(*t, src, result);
3504     }
3505     delete t;
3506 }
3507
3508 static UnicodeString _findMatch(const UnicodeString& source,
3509                                        const UnicodeString* pairs) {
3510     UnicodeString empty;
3511     for (int32_t i=0; pairs[i].length() > 0; i+=2) {
3512         if (0==source.caseCompare(pairs[i], U_FOLD_CASE_DEFAULT)) {
3513             return pairs[i+1];
3514         }
3515     }
3516     return empty;
3517 }
3518
3519 // Check to see that incremental gets at least part way through a reasonable string.
3520
3521 void TransliteratorTest::TestIncrementalProgress(void) {
3522     UErrorCode ec = U_ZERO_ERROR;
3523     UnicodeString latinTest = "The Quick Brown Fox.";
3524     UnicodeString devaTest;
3525     _trans("Latin-Devanagari", latinTest, devaTest, ec);
3526     UnicodeString kataTest;
3527     _trans("Latin-Katakana", latinTest, kataTest, ec);
3528     if (U_FAILURE(ec)) {
3529         errln("FAIL: Internal error");
3530         return;
3531     }
3532     const UnicodeString tests[] = {
3533         "Any", latinTest,
3534         "Latin", latinTest,
3535         "Halfwidth", latinTest,
3536         "Devanagari", devaTest,
3537         "Katakana", kataTest,
3538         "" // END MARKER
3539     };
3540
3541     UnicodeString test("The Quick Brown Fox Jumped Over The Lazy Dog.");
3542     int32_t i = 0, j=0, k=0;
3543     int32_t sources = Transliterator::countAvailableSources();
3544     for (i = 0; i < sources; i++) {
3545         UnicodeString source;
3546         Transliterator::getAvailableSource(i, source);
3547         UnicodeString test = _findMatch(source, tests);
3548         if (test.length() == 0) {
3549             logln((UnicodeString)"Skipping " + source + "-X");
3550             continue;
3551         }
3552         int32_t targets = Transliterator::countAvailableTargets(source);
3553         for (j = 0; j < targets; j++) {
3554             UnicodeString target;
3555             Transliterator::getAvailableTarget(j, source, target);
3556             int32_t variants = Transliterator::countAvailableVariants(source, target);
3557             for (k =0; k< variants; k++) {
3558                 UnicodeString variant;
3559                 UParseError err;
3560                 UErrorCode status = U_ZERO_ERROR;
3561
3562                 Transliterator::getAvailableVariant(k, source, target, variant);
3563                 UnicodeString id = source + "-" + target + "/" + variant;
3564
3565                 Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, err, status);
3566                 if (U_FAILURE(status)) {
3567                     dataerrln((UnicodeString)"FAIL: Could not create " + id);
3568                     delete t;
3569                     continue;
3570                 }
3571                 status = U_ZERO_ERROR;
3572                 CheckIncrementalAux(t, test);
3573
3574                 UnicodeString rev;
3575                 _trans(*t, test, rev);
3576                 Transliterator *inv = t->createInverse(status);
3577                 if (U_FAILURE(status)) {
3578                     // The following are forward-only, it is OK that creating an inverse will not work:
3579                     // 1. Devanagari-Arabic
3580                     // 2. Any-*/BGN
3581                     // 3. Any-*/UNGEGN
3582                     // If UCONFIG_NO_BREAK_ITERATION is on, Latin-Thai is also not expected to work.
3583                     if (    id.compare((UnicodeString)"Devanagari-Arabic/") != 0
3584                          && !(id.startsWith((UnicodeString)"Any-") &&
3585                                 (id.endsWith((UnicodeString)"/BGN") || id.endsWith((UnicodeString)"/UNGEGN") || id.endsWith((UnicodeString)"/MNS"))
3586                              )
3587 #if UCONFIG_NO_BREAK_ITERATION
3588                          && id.compare((UnicodeString)"Latin-Thai/") != 0
3589 #endif
3590                        )
3591                     {
3592                         errln((UnicodeString)"FAIL: Could not create inverse of " + id);
3593                     }
3594                     delete t;
3595                     delete inv;
3596                     continue;
3597                 }
3598                 CheckIncrementalAux(inv, rev);
3599                 delete t;
3600                 delete inv;
3601             }
3602         }
3603     }
3604 }
3605
3606 void TransliteratorTest::CheckIncrementalAux(const Transliterator* t,
3607                                                       const UnicodeString& input) {
3608     UErrorCode ec = U_ZERO_ERROR;
3609     UTransPosition pos;
3610     UnicodeString test = input;
3611
3612     pos.contextStart = 0;
3613     pos.contextLimit = input.length();
3614     pos.start = 0;
3615     pos.limit = input.length();
3616
3617     t->transliterate(test, pos, ec);
3618     if (U_FAILURE(ec)) {
3619         errln((UnicodeString)"FAIL: transliterate() error " + u_errorName(ec));
3620         return;
3621     }
3622     UBool gotError = FALSE;
3623     (void)gotError;    // Suppress set but not used warning.
3624
3625     // we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X?
3626
3627     if (pos.start == 0 && pos.limit != 0 && t->getID() != "Hex-Any/Unicode") {
3628         errln((UnicodeString)"No Progress, " +
3629               t->getID() + ": " + formatInput(test, input, pos));
3630         gotError = TRUE;
3631     } else {
3632         logln((UnicodeString)"PASS Progress, " +
3633               t->getID() + ": " + formatInput(test, input, pos));
3634     }
3635     t->finishTransliteration(test, pos);
3636     if (pos.start != pos.limit) {
3637         errln((UnicodeString)"Incomplete, " +
3638               t->getID() + ": " + formatInput(test, input, pos));
3639         gotError = TRUE;
3640     }
3641 }
3642
3643 void TransliteratorTest::TestFunction() {
3644     // Careful with spacing and ';' here:  Phrase this exactly
3645     // as toRules() is going to return it.  If toRules() changes
3646     // with regard to spacing or ';', then adjust this string.
3647     UnicodeString rule =
3648         "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';";
3649
3650     UParseError pe;
3651     UErrorCode ec = U_ZERO_ERROR;
3652     Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3653     if (t == NULL) {
3654         dataerrln("FAIL: createFromRules failed - %s", u_errorName(ec));
3655         return;
3656     }
3657
3658     UnicodeString r;
3659     t->toRules(r, TRUE);
3660     if (r == rule) {
3661         logln((UnicodeString)"OK: toRules() => " + r);
3662     } else {
3663         errln((UnicodeString)"FAIL: toRules() => " + r +
3664               ", expected " + rule);
3665     }
3666
3667     expect(*t, "The Quick Brown Fox",
3668            UNICODE_STRING_SIMPLE("T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox"));
3669
3670     delete t;
3671 }
3672
3673 void TransliteratorTest::TestInvalidBackRef(void) {
3674     UnicodeString rule =  ". > $1;";
3675     UnicodeString rule2 =CharsToUnicodeString("(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\\u0020;");
3676     UParseError pe;
3677     UErrorCode ec = U_ZERO_ERROR;
3678     Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3679     Transliterator *t2 = Transliterator::createFromRules("Test2", rule2, UTRANS_FORWARD, pe, ec);
3680
3681     if (t != NULL) {
3682         errln("FAIL: createFromRules should have returned NULL");
3683         delete t;
3684     }
3685
3686     if (t2 != NULL) {
3687         errln("FAIL: createFromRules should have returned NULL");
3688         delete t2;
3689     }
3690
3691     if (U_SUCCESS(ec)) {
3692         errln("FAIL: Ok: . > $1; => no error");
3693     } else {
3694         logln((UnicodeString)"Ok: . > $1; => " + u_errorName(ec));
3695     }
3696 }
3697
3698 void TransliteratorTest::TestMulticharStringSet() {
3699     // Basic testing
3700     const char* rule =
3701         "       [{aa}]       > x;"
3702         "         a          > y;"
3703         "       [b{bc}]      > z;"
3704         "[{gd}] { e          > q;"
3705         "         e } [{fg}] > r;" ;
3706
3707     UParseError pe;
3708     UErrorCode ec = U_ZERO_ERROR;
3709     Transliterator* t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3710     if (t == NULL || U_FAILURE(ec)) {
3711         delete t;
3712         errln("FAIL: createFromRules failed");
3713         return;
3714     }
3715
3716     expect(*t, "a aa ab bc d gd de gde gdefg ddefg",
3717            "y x yz z d gd de gdq gdqfg ddrfg");
3718     delete t;
3719
3720     // Overlapped string test.  Make sure that when multiple
3721     // strings can match that the longest one is matched.
3722     rule =
3723         "    [a {ab} {abc}]    > x;"
3724         "           b          > y;"
3725         "           c          > z;"
3726         " q [t {st} {rst}] { e > p;" ;
3727
3728     t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3729     if (t == NULL || U_FAILURE(ec)) {
3730         delete t;
3731         errln("FAIL: createFromRules failed");
3732         return;
3733     }
3734
3735     expect(*t, "a ab abc qte qste qrste",
3736            "x x x qtp qstp qrstp");
3737     delete t;
3738 }
3739
3740 // vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
3741 // BEGIN TestUserFunction support factory
3742
3743 Transliterator* _TUFF[4];
3744 UnicodeString* _TUFID[4];
3745
3746 static Transliterator* U_EXPORT2 _TUFFactory(const UnicodeString& /*ID*/,
3747                                    Transliterator::Token context) {
3748     return _TUFF[context.integer]->clone();
3749 }
3750
3751 static void _TUFReg(const UnicodeString& ID, Transliterator* t, int32_t n) {
3752     _TUFF[n] = t;
3753     _TUFID[n] = new UnicodeString(ID);
3754     Transliterator::registerFactory(ID, _TUFFactory, Transliterator::integerToken(n));
3755 }
3756
3757 static void _TUFUnreg(int32_t n) {
3758     if (_TUFF[n] != NULL) {
3759         Transliterator::unregister(*_TUFID[n]);
3760         delete _TUFF[n];
3761         delete _TUFID[n];
3762     }
3763 }
3764
3765 // END TestUserFunction support factory
3766 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
3767
3768 /**
3769  * Test that user-registered transliterators can be used under function
3770  * syntax.
3771  */
3772 void TransliteratorTest::TestUserFunction() {
3773
3774     Transliterator* t;
3775     UParseError pe;
3776     UErrorCode ec = U_ZERO_ERROR;
3777
3778     // Setup our factory
3779     int32_t i;
3780     for (i=0; i<4; ++i) {
3781         _TUFF[i] = NULL;
3782     }
3783
3784     // There's no need to register inverses if we don't use them
3785     t = Transliterator::createFromRules("gif",
3786                                         UNICODE_STRING_SIMPLE("'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';"),
3787                                         UTRANS_FORWARD, pe, ec);
3788     if (t == NULL || U_FAILURE(ec)) {
3789         dataerrln((UnicodeString)"FAIL: createFromRules gif " + u_errorName(ec));
3790         return;
3791     }
3792     _TUFReg("Any-gif", t, 0);
3793
3794     t = Transliterator::createFromRules("RemoveCurly",
3795                                         UNICODE_STRING_SIMPLE("[\\{\\}] > ; '\\N' > ;"),
3796                                         UTRANS_FORWARD, pe, ec);
3797     if (t == NULL || U_FAILURE(ec)) {
3798         errln((UnicodeString)"FAIL: createFromRules RemoveCurly " + u_errorName(ec));
3799         goto FAIL;
3800     }
3801     expect(*t, UNICODE_STRING_SIMPLE("\\N{name}"), "name");
3802     _TUFReg("Any-RemoveCurly", t, 1);
3803
3804     logln("Trying &hex");
3805     t = Transliterator::createFromRules("hex2",
3806                                         "(.) > &hex($1);",
3807                                         UTRANS_FORWARD, pe, ec);
3808     if (t == NULL || U_FAILURE(ec)) {
3809         errln("FAIL: createFromRules");
3810         goto FAIL;
3811     }
3812     logln("Registering");
3813     _TUFReg("Any-hex2", t, 2);
3814     t = Transliterator::createInstance("Any-hex2", UTRANS_FORWARD, ec);
3815     if (t == NULL || U_FAILURE(ec)) {
3816         errln((UnicodeString)"FAIL: createInstance Any-hex2 " + u_errorName(ec));
3817         goto FAIL;
3818     }
3819     expect(*t, "abc", UNICODE_STRING_SIMPLE("\\u0061\\u0062\\u0063"));
3820     delete t;
3821
3822     logln("Trying &gif");
3823     t = Transliterator::createFromRules("gif2",
3824                                         "(.) > &Gif(&Hex2($1));",
3825                                         UTRANS_FORWARD, pe, ec);
3826     if (t == NULL || U_FAILURE(ec)) {
3827         errln((UnicodeString)"FAIL: createFromRules gif2 " + u_errorName(ec));
3828         goto FAIL;
3829     }
3830     logln("Registering");
3831     _TUFReg("Any-gif2", t, 3);
3832     t = Transliterator::createInstance("Any-gif2", UTRANS_FORWARD, ec);
3833     if (t == NULL || U_FAILURE(ec)) {
3834         errln((UnicodeString)"FAIL: createInstance Any-gif2 " + u_errorName(ec));
3835         goto FAIL;
3836     }
3837     expect(*t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">"
3838            "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">");
3839     delete t;
3840
3841     // Test that filters are allowed after &
3842     t = Transliterator::createFromRules("test",
3843                                         "(.) > &Hex($1) ' ' &RemoveCurly(&Name($1)) ' ';",
3844                                         UTRANS_FORWARD, pe, ec);
3845     if (t == NULL || U_FAILURE(ec)) {
3846         errln((UnicodeString)"FAIL: createFromRules test " + u_errorName(ec));
3847         goto FAIL;
3848     }
3849     expect(*t, "abc",
3850            UNICODE_STRING_SIMPLE("\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C "));
3851     delete t;
3852
3853  FAIL:
3854     for (i=0; i<4; ++i) {
3855         _TUFUnreg(i);
3856     }
3857 }
3858
3859 /**
3860  * Test the Any-X transliterators.
3861  */
3862 void TransliteratorTest::TestAnyX(void) {
3863     UParseError parseError;
3864     UErrorCode status = U_ZERO_ERROR;
3865     Transliterator* anyLatin =
3866         Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
3867     if (anyLatin==0) {
3868         dataerrln("FAIL: createInstance returned NULL - %s", u_errorName(status));
3869         delete anyLatin;
3870         return;
3871     }
3872
3873     expect(*anyLatin,
3874            CharsToUnicodeString("greek:\\u03B1\\u03B2\\u03BA\\u0391\\u0392\\u039A hiragana:\\u3042\\u3076\\u304F cyrillic:\\u0430\\u0431\\u0446"),
3875            CharsToUnicodeString("greek:abkABK hiragana:abuku cyrillic:abc"));
3876
3877     delete anyLatin;
3878
3879     status = U_ZERO_ERROR;
3880     Transliterator* anyASCII =
3881         Transliterator::createInstance("Any-Latin;Latin-ASCII", UTRANS_FORWARD, parseError, status);
3882     if (U_FAILURE(status) || anyASCII==0) {
3883         dataerrln("FAIL: createInstance returned NULL and/or set status %s", u_errorName(status));
3884         delete anyASCII;
3885         return;
3886     }
3887
3888     expect(*anyASCII,
3889            CharsToUnicodeString("ArabicDigits:\\u0660\\u0661\\u0664\\u0669 PersianDigits:\\u06F0\\u06F1\\u06F4\\u06F9"),
3890            CharsToUnicodeString("ArabicDigits:0149 PersianDigits:0149"));
3891
3892     delete anyASCII;
3893 }
3894
3895 /**
3896  * Test Any-X transliterators with sample letters from all scripts.
3897  */
3898 void TransliteratorTest::TestAny(void) {
3899     UErrorCode status = U_ZERO_ERROR;
3900     // Note: there is a lot of implict construction of UnicodeStrings from (char *) in
3901     //       function call parameters going on in this test.
3902     UnicodeSet alphabetic("[:alphabetic:]", status);
3903     if (U_FAILURE(status)) {
3904         dataerrln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3905         return;
3906     }
3907     alphabetic.freeze();
3908
3909     UnicodeString testString;
3910     for (int32_t i = 0; i < USCRIPT_CODE_LIMIT; i++) {
3911         const char *scriptName = uscript_getShortName((UScriptCode)i);
3912         if (scriptName == NULL) {
3913             errln("Failure: file %s, line %d: Script Code %d is invalid, ", __FILE__, __LINE__, i);
3914             return;
3915         }
3916
3917         UnicodeSet sample;
3918         sample.applyPropertyAlias("script", scriptName, status);
3919         if (U_FAILURE(status)) {
3920             errln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3921             return;
3922         }
3923         sample.retainAll(alphabetic);
3924         for (int32_t count=0; count<5; count++) {
3925             UChar32 c = sample.charAt(count);
3926             if (c == -1) {
3927                 break;
3928             }
3929             testString.append(c);
3930         }
3931     }
3932
3933     UParseError parseError;
3934     Transliterator* anyLatin =
3935         Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
3936     if (U_FAILURE(status)) {
3937         dataerrln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3938         return;
3939     }
3940
3941     logln(UnicodeString("Sample set for Any-Latin: ") + testString);
3942     anyLatin->transliterate(testString);
3943     logln(UnicodeString("Sample result for Any-Latin: ") + testString);
3944     delete anyLatin;
3945 }
3946
3947
3948 /**
3949  * Test the source and target set API.  These are only implemented
3950  * for RBT and CompoundTransliterator at this time.
3951  */
3952 void TransliteratorTest::TestSourceTargetSet() {
3953     UErrorCode ec = U_ZERO_ERROR;
3954
3955     // Rules
3956     const char* r =
3957         "a > b; "
3958         "r [x{lu}] > q;";
3959
3960     // Expected source
3961     UnicodeSet expSrc("[arx{lu}]", ec);
3962
3963     // Expected target
3964     UnicodeSet expTrg("[bq]", ec);
3965
3966     UParseError pe;
3967     Transliterator* t = Transliterator::createFromRules("test", r, UTRANS_FORWARD, pe, ec);
3968
3969     if (U_FAILURE(ec)) {
3970         delete t;
3971         errln("FAIL: Couldn't set up test");
3972         return;
3973     }
3974
3975     UnicodeSet src; t->getSourceSet(src);
3976     UnicodeSet trg; t->getTargetSet(trg);
3977
3978     if (src == expSrc && trg == expTrg) {
3979         UnicodeString a, b;
3980         logln((UnicodeString)"Ok: " +
3981               r + " => source = " + src.toPattern(a, TRUE) +
3982               ", target = " + trg.toPattern(b, TRUE));
3983     } else {
3984         UnicodeString a, b, c, d;
3985         errln((UnicodeString)"FAIL: " +
3986               r + " => source = " + src.toPattern(a, TRUE) +
3987               ", expected " + expSrc.toPattern(b, TRUE) +
3988               "; target = " + trg.toPattern(c, TRUE) +
3989               ", expected " + expTrg.toPattern(d, TRUE));
3990     }
3991
3992     delete t;
3993 }
3994
3995 /**
3996  * Test handling of Pattern_White_Space, for both RBT and UnicodeSet.
3997  */
3998 void TransliteratorTest::TestPatternWhiteSpace() {
3999     // Rules
4000     const char* r = "a > \\u200E b;";
4001
4002     UErrorCode ec = U_ZERO_ERROR;
4003     UParseError pe;
4004     Transliterator* t = Transliterator::createFromRules("test", CharsToUnicodeString(r), UTRANS_FORWARD, pe, ec);
4005
4006     if (U_FAILURE(ec)) {
4007         errln("FAIL: Couldn't set up test");
4008     } else {
4009         expect(*t, "a", "b");
4010     }
4011     delete t;
4012
4013     // UnicodeSet
4014     ec = U_ZERO_ERROR;
4015     UnicodeSet set(CharsToUnicodeString("[a \\u200E]"), ec);
4016
4017     if (U_FAILURE(ec)) {
4018         errln("FAIL: Couldn't set up test");
4019     } else {
4020         if (set.contains(0x200E)) {
4021             errln("FAIL: U+200E not being ignored by UnicodeSet");
4022         }
4023     }
4024 }
4025 //======================================================================
4026 // this method is in TestUScript.java
4027 //======================================================================
4028 void TransliteratorTest::TestAllCodepoints(){
4029     UScriptCode code= USCRIPT_INVALID_CODE;
4030     char id[256]={'\0'};
4031     char abbr[256]={'\0'};
4032     char newId[256]={'\0'};
4033     char newAbbrId[256]={'\0'};
4034     char oldId[256]={'\0'};
4035     char oldAbbrId[256]={'\0'};
4036
4037     UErrorCode status =U_ZERO_ERROR;
4038     UParseError pe;
4039
4040     for(uint32_t i = 0; i<=0x10ffff; i++){
4041         code =  uscript_getScript(i,&status);
4042         if(code == USCRIPT_INVALID_CODE){
4043             dataerrln("uscript_getScript for codepoint \\U%08X failed.", i);
4044         }
4045         const char* myId = uscript_getName(code);
4046         if(!myId) {
4047           dataerrln("Valid script code returned NULL name. Check your data!");
4048           return;
4049         }
4050         uprv_strcpy(id,myId);
4051         uprv_strcpy(abbr,uscript_getShortName(code));
4052
4053         uprv_strcpy(newId,"[:");
4054         uprv_strcat(newId,id);
4055         uprv_strcat(newId,":];NFD");
4056
4057         uprv_strcpy(newAbbrId,"[:");
4058         uprv_strcat(newAbbrId,abbr);
4059         uprv_strcat(newAbbrId,":];NFD");
4060
4061         if(uprv_strcmp(newId,oldId)!=0){
4062             Transliterator* t = Transliterator::createInstance(newId,UTRANS_FORWARD,pe,status);
4063             if(t==NULL || U_FAILURE(status)){
4064                 dataerrln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(status));
4065             }
4066             delete t;
4067         }
4068         if(uprv_strcmp(newAbbrId,oldAbbrId)!=0){
4069             Transliterator* t = Transliterator::createInstance(newAbbrId,UTRANS_FORWARD,pe,status);
4070             if(t==NULL || U_FAILURE(status)){
4071                 dataerrln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(status));
4072             }
4073             delete t;
4074         }
4075         uprv_strcpy(oldId,newId);
4076         uprv_strcpy(oldAbbrId, newAbbrId);
4077
4078     }
4079
4080 }
4081
4082 #define TEST_TRANSLIT_ID(id, cls) { \
4083   UErrorCode ec = U_ZERO_ERROR; \
4084   Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); \
4085   if (U_FAILURE(ec)) { \
4086     dataerrln("FAIL: Couldn't create %s - %s", id, u_errorName(ec)); \
4087   } else { \
4088     if (t->getDynamicClassID() != cls::getStaticClassID()) { \
4089       errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
4090     } \
4091     /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
4092   } \
4093   delete t; \
4094 }
4095
4096 #define TEST_TRANSLIT_RULE(rule, cls) { \
4097   UErrorCode ec = U_ZERO_ERROR; \
4098   UParseError pe; \
4099   Transliterator* t = Transliterator::createFromRules("_", rule, UTRANS_FORWARD, pe, ec); \
4100   if (U_FAILURE(ec)) { \
4101     errln("FAIL: Couldn't create " rule); \
4102   } else { \
4103     if (t->getDynamicClassID() != cls ::getStaticClassID()) { \
4104       errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
4105     } \
4106     /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
4107   } \
4108   delete t; \
4109 }
4110
4111 void TransliteratorTest::TestBoilerplate() {
4112     TEST_TRANSLIT_ID("Any-Latin", AnyTransliterator);
4113     TEST_TRANSLIT_ID("Any-Hex", EscapeTransliterator);
4114     TEST_TRANSLIT_ID("Hex-Any", UnescapeTransliterator);
4115     TEST_TRANSLIT_ID("Lower", LowercaseTransliterator);
4116     TEST_TRANSLIT_ID("Upper", UppercaseTransliterator);
4117     TEST_TRANSLIT_ID("Title", TitlecaseTransliterator);
4118     TEST_TRANSLIT_ID("Null", NullTransliterator);
4119     TEST_TRANSLIT_ID("Remove", RemoveTransliterator);
4120     TEST_TRANSLIT_ID("Any-Name", UnicodeNameTransliterator);
4121     TEST_TRANSLIT_ID("Name-Any", NameUnicodeTransliterator);
4122     TEST_TRANSLIT_ID("NFD", NormalizationTransliterator);
4123     TEST_TRANSLIT_ID("Latin-Greek", CompoundTransliterator);
4124     TEST_TRANSLIT_RULE("a>b;", RuleBasedTransliterator);
4125 }
4126
4127 void TransliteratorTest::TestAlternateSyntax() {
4128     // U+2206 == &
4129     // U+2190 == <
4130     // U+2192 == >
4131     // U+2194 == <>
4132     expect(CharsToUnicodeString("a \\u2192 x; b \\u2190 y; c \\u2194 z"),
4133            "abc",
4134            "xbz");
4135     expect(CharsToUnicodeString("([:^ASCII:]) \\u2192 \\u2206Name($1);"),
4136            CharsToUnicodeString("<=\\u2190; >=\\u2192; <>=\\u2194; &=\\u2206"),
4137            UNICODE_STRING_SIMPLE("<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}"));
4138 }
4139
4140 static const char* BEGIN_END_RULES[] = {
4141     // [0]
4142     "abc > xy;"
4143     "aba > z;",
4144
4145     // [1]
4146 /*
4147     "::BEGIN;"
4148     "abc > xy;"
4149     "::END;"
4150     "::BEGIN;"
4151     "aba > z;"
4152     "::END;",
4153 */
4154     "", // test case commented out below, this is here to keep from messing up the indexes
4155
4156     // [2]
4157 /*
4158     "abc > xy;"
4159     "::BEGIN;"
4160     "aba > z;"
4161     "::END;",
4162 */
4163     "", // test case commented out below, this is here to keep from messing up the indexes
4164
4165     // [3]
4166 /*
4167     "::BEGIN;"
4168     "abc > xy;"
4169     "::END;"
4170     "aba > z;",
4171 */
4172     "", // test case commented out below, this is here to keep from messing up the indexes
4173
4174     // [4]
4175     "abc > xy;"
4176     "::Null;"
4177     "aba > z;",
4178
4179     // [5]
4180     "::Upper;"
4181     "ABC > xy;"
4182     "AB > x;"
4183     "C > z;"
4184     "::Upper;"
4185     "XYZ > p;"
4186     "XY > q;"
4187     "Z > r;"
4188     "::Upper;",
4189
4190     // [6]
4191     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4192     "$delim = [\\-$ws];"
4193     "$ws $delim* > ' ';"
4194     "'-' $delim* > '-';",
4195
4196     // [7]
4197     "::Null;"
4198     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4199     "$delim = [\\-$ws];"
4200     "$ws $delim* > ' ';"
4201     "'-' $delim* > '-';",
4202
4203     // [8]
4204     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4205     "$delim = [\\-$ws];"
4206     "$ws $delim* > ' ';"
4207     "'-' $delim* > '-';"
4208     "::Null;",
4209
4210     // [9]
4211     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4212     "$delim = [\\-$ws];"
4213     "::Null;"
4214     "$ws $delim* > ' ';"
4215     "'-' $delim* > '-';",
4216
4217     // [10]
4218 /*
4219     "::BEGIN;"
4220     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4221     "$delim = [\\-$ws];"
4222     "::END;"
4223     "$ws $delim* > ' ';"
4224     "'-' $delim* > '-';",
4225 */
4226     "", // test case commented out below, this is here to keep from messing up the indexes
4227
4228     // [11]
4229 /*
4230     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4231     "$delim = [\\-$ws];"
4232     "::BEGIN;"
4233     "$ws $delim* > ' ';"
4234     "'-' $delim* > '-';"
4235     "::END;",
4236 */
4237     "", // test case commented out below, this is here to keep from messing up the indexes
4238
4239     // [12]
4240 /*
4241     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4242     "$delim = [\\-$ws];"
4243     "$ab = [ab];"
4244     "::BEGIN;"
4245     "$ws $delim* > ' ';"
4246     "'-' $delim* > '-';"
4247     "::END;"
4248     "::BEGIN;"
4249     "$ab { ' ' } $ab > '-';"
4250     "c { ' ' > ;"
4251     "::END;"
4252     "::BEGIN;"
4253     "'a-a' > a\\%|a;"
4254     "::END;",
4255 */
4256     "", // test case commented out below, this is here to keep from messing up the indexes
4257
4258     // [13]
4259     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4260     "$delim = [\\-$ws];"
4261     "$ab = [ab];"
4262     "::Null;"
4263     "$ws $delim* > ' ';"
4264     "'-' $delim* > '-';"
4265     "::Null;"
4266     "$ab { ' ' } $ab > '-';"
4267     "c { ' ' > ;"
4268     "::Null;"
4269     "'a-a' > a\\%|a;",
4270
4271     // [14]
4272 /*
4273     "::[abc];"
4274     "::BEGIN;"
4275     "abc > xy;"
4276     "::END;"
4277     "::BEGIN;"
4278     "aba > yz;"
4279     "::END;"
4280     "::Upper;",
4281 */
4282     "", // test case commented out below, this is here to keep from messing up the indexes
4283
4284     // [15]
4285     "::[abc];"
4286     "abc > xy;"
4287     "::Null;"
4288     "aba > yz;"
4289     "::Upper;",
4290
4291     // [16]
4292 /*
4293     "::[abc];"
4294     "::BEGIN;"
4295     "abc <> xy;"
4296     "::END;"
4297     "::BEGIN;"
4298     "aba <> yz;"
4299     "::END;"
4300     "::Upper(Lower);"
4301     "::([XYZ]);"
4302 */
4303     "", // test case commented out below, this is here to keep from messing up the indexes
4304
4305     // [17]
4306     "::[abc];"
4307     "abc <> xy;"
4308     "::Null;"
4309     "aba <> yz;"
4310     "::Upper(Lower);"
4311     "::([XYZ]);"
4312 };
4313
4314 /*
4315 (This entire test is commented out below and will need some heavy revision when we re-add
4316 the ::BEGIN/::END stuff)
4317 static const char* BOGUS_BEGIN_END_RULES[] = {
4318     // [7]
4319     "::BEGIN;"
4320     "abc > xy;"
4321     "::BEGIN;"
4322     "aba > z;"
4323     "::END;"
4324     "::END;",
4325
4326     // [8]
4327     "abc > xy;"
4328     " aba > z;"
4329     "::END;",
4330
4331     // [9]
4332     "::BEGIN;"
4333     "::Upper;"
4334     "::END;"
4335 };
4336 static const int32_t BOGUS_BEGIN_END_RULES_length = UPRV_LENGTHOF(BOGUS_BEGIN_END_RULES);
4337 */
4338
4339 static const char* BEGIN_END_TEST_CASES[] = {
4340     // rules             input                   expected output
4341     BEGIN_END_RULES[0],  "abc ababc aba",        "xy zbc z",
4342 //    BEGIN_END_RULES[1],  "abc ababc aba",        "xy abxy z",
4343 //    BEGIN_END_RULES[2],  "abc ababc aba",        "xy abxy z",
4344 //    BEGIN_END_RULES[3],  "abc ababc aba",        "xy abxy z",
4345     BEGIN_END_RULES[4],  "abc ababc aba",        "xy abxy z",
4346     BEGIN_END_RULES[5],  "abccabaacababcbc",     "PXAARXQBR",
4347
4348     BEGIN_END_RULES[6],  "e   e - e---e-  e",    "e e e-e-e",
4349     BEGIN_END_RULES[7],  "e   e - e---e-  e",    "e e e-e-e",
4350     BEGIN_END_RULES[8],  "e   e - e---e-  e",    "e e e-e-e",
4351     BEGIN_END_RULES[9],  "e   e - e---e-  e",    "e e e-e-e",
4352 //    BEGIN_END_RULES[10],  "e   e - e---e-  e",    "e e e-e-e",
4353 //    BEGIN_END_RULES[11], "e   e - e---e-  e",    "e e e-e-e",
4354 //    BEGIN_END_RULES[12], "e   e - e---e-  e",    "e e e-e-e",
4355 //    BEGIN_END_RULES[12], "a    a    a    a",     "a%a%a%a",
4356 //    BEGIN_END_RULES[12], "a a-b c b a",          "a%a-b cb-a",
4357     BEGIN_END_RULES[13], "e   e - e---e-  e",    "e e e-e-e",
4358     BEGIN_END_RULES[13], "a    a    a    a",     "a%a%a%a",
4359     BEGIN_END_RULES[13], "a a-b c b a",          "a%a-b cb-a",
4360
4361 //    BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4362     BEGIN_END_RULES[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4363 //    BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4364     BEGIN_END_RULES[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ"
4365 };
4366 static const int32_t BEGIN_END_TEST_CASES_length = UPRV_LENGTHOF(BEGIN_END_TEST_CASES);
4367
4368 void TransliteratorTest::TestBeginEnd() {
4369     // run through the list of test cases above
4370     int32_t i = 0;
4371     for (i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
4372         expect((UnicodeString)"Test case #" + (i / 3),
4373                UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
4374                UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
4375                UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
4376     }
4377
4378     // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing
4379     UParseError parseError;
4380     UErrorCode status = U_ZERO_ERROR;
4381     Transliterator* reversed  = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
4382             UTRANS_REVERSE, parseError, status);
4383     if (reversed == 0 || U_FAILURE(status)) {
4384         reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
4385     } else {
4386         expect(*reversed, UnicodeString("xy XY XYZ yz YZ"), UnicodeString("xy abc xaba yz aba"));
4387     }
4388     delete reversed;
4389
4390     // finally, run through the list of syntactically-ill-formed rule sets above and make sure
4391     // that all of them cause errors
4392 /*
4393 (commented out until we have the real ::BEGIN/::END stuff in place
4394     for (i = 0; i < BOGUS_BEGIN_END_RULES_length; i++) {
4395         UParseError parseError;
4396         UErrorCode status = U_ZERO_ERROR;
4397         Transliterator* t = Transliterator::createFromRules("foo", UnicodeString(BOGUS_BEGIN_END_RULES[i]),
4398                 UTRANS_FORWARD, parseError, status);
4399         if (!U_FAILURE(status)) {
4400             delete t;
4401             errln((UnicodeString)"Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]);
4402         }
4403     }
4404 */
4405 }
4406
4407 void TransliteratorTest::TestBeginEndToRules() {
4408     // run through the same list of test cases we used above, but this time, instead of just
4409     // instantiating a Transliterator from the rules and running the test against it, we instantiate
4410     // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from
4411     // the resulting set of rules, and make sure that the generated rule set is semantically equivalent
4412     // to (i.e., does the same thing as) the original rule set
4413     for (int32_t i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
4414         UParseError parseError;
4415         UErrorCode status = U_ZERO_ERROR;
4416         Transliterator* t = Transliterator::createFromRules("--", UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
4417                 UTRANS_FORWARD, parseError, status);
4418         if (U_FAILURE(status)) {
4419             reportParseError(UnicodeString("FAIL: Couldn't create transliterator"), parseError, status);
4420         } else {
4421             UnicodeString rules;
4422             t->toRules(rules, TRUE);
4423             Transliterator* t2 = Transliterator::createFromRules((UnicodeString)"Test case #" + (i / 3), rules,
4424                     UTRANS_FORWARD, parseError, status);
4425             if (U_FAILURE(status)) {
4426                 reportParseError(UnicodeString("FAIL: Couldn't create transliterator from generated rules"),
4427                         parseError, status);
4428                 delete t;
4429             } else {
4430                 expect(*t2,
4431                        UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
4432                        UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
4433                 delete t;
4434                 delete t2;
4435             }
4436         }
4437     }
4438
4439     // do the same thing for the reversible test case
4440     UParseError parseError;
4441     UErrorCode status = U_ZERO_ERROR;
4442     Transliterator* reversed = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
4443             UTRANS_REVERSE, parseError, status);
4444     if (U_FAILURE(status)) {
4445         reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
4446     } else {
4447         UnicodeString rules;
4448         reversed->toRules(rules, FALSE);
4449         Transliterator* reversed2 = Transliterator::createFromRules("Reversed", rules, UTRANS_FORWARD,
4450                 parseError, status);
4451         if (U_FAILURE(status)) {
4452             reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator from generated rules"),
4453                     parseError, status);
4454             delete reversed;
4455         } else {
4456             expect(*reversed2,
4457                    UnicodeString("xy XY XYZ yz YZ"),
4458                    UnicodeString("xy abc xaba yz aba"));
4459             delete reversed;
4460             delete reversed2;
4461         }
4462     }
4463 }
4464
4465 void TransliteratorTest::TestRegisterAlias() {
4466     UnicodeString longID("Lower;[aeiou]Upper");
4467     UnicodeString shortID("Any-CapVowels");
4468     UnicodeString reallyShortID("CapVowels");
4469
4470     Transliterator::registerAlias(shortID, longID);
4471
4472     UErrorCode err = U_ZERO_ERROR;
4473     Transliterator* t1 = Transliterator::createInstance(longID, UTRANS_FORWARD, err);
4474     if (U_FAILURE(err)) {
4475         errln("Failed to instantiate transliterator with long ID");
4476         Transliterator::unregister(shortID);
4477         return;
4478     }
4479     Transliterator* t2 = Transliterator::createInstance(reallyShortID, UTRANS_FORWARD, err);
4480     if (U_FAILURE(err)) {
4481         errln("Failed to instantiate transliterator with short ID");
4482         delete t1;
4483         Transliterator::unregister(shortID);
4484         return;
4485     }
4486
4487     if (t1->getID() != longID)
4488         errln("Transliterator instantiated with long ID doesn't have long ID");
4489     if (t2->getID() != reallyShortID)
4490         errln("Transliterator instantiated with short ID doesn't have short ID");
4491
4492     UnicodeString rules1;
4493     UnicodeString rules2;
4494
4495     t1->toRules(rules1, TRUE);
4496     t2->toRules(rules2, TRUE);
4497     if (rules1 != rules2)
4498         errln("Alias transliterators aren't the same");
4499
4500     delete t1;
4501     delete t2;
4502     Transliterator::unregister(shortID);
4503
4504     t1 = Transliterator::createInstance(shortID, UTRANS_FORWARD, err);
4505     if (U_SUCCESS(err)) {
4506         errln("Instantiation with short ID succeeded after short ID was unregistered");
4507         delete t1;
4508     }
4509
4510     // try the same thing again, but this time with something other than
4511     // an instance of CompoundTransliterator
4512     UnicodeString realID("Latin-Greek");
4513     UnicodeString fakeID("Latin-dlgkjdflkjdl");
4514     Transliterator::registerAlias(fakeID, realID);
4515
4516     err = U_ZERO_ERROR;
4517     t1 = Transliterator::createInstance(realID, UTRANS_FORWARD, err);
4518     if (U_FAILURE(err)) {
4519         dataerrln("Failed to instantiate transliterator with real ID - %s", u_errorName(err));
4520         Transliterator::unregister(realID);
4521         return;
4522     }
4523     t2 = Transliterator::createInstance(fakeID, UTRANS_FORWARD, err);
4524     if (U_FAILURE(err)) {
4525         errln("Failed to instantiate transliterator with fake ID");
4526         delete t1;
4527         Transliterator::unregister(realID);
4528         return;
4529     }
4530
4531     t1->toRules(rules1, TRUE);
4532     t2->toRules(rules2, TRUE);
4533     if (rules1 != rules2)
4534         errln("Alias transliterators aren't the same");
4535
4536     delete t1;
4537     delete t2;
4538     Transliterator::unregister(fakeID);
4539 }
4540
4541 void TransliteratorTest::TestRuleStripping() {
4542     /*
4543 #
4544 \uE001>\u0C01; # SIGN
4545     */
4546     static const UChar rule[] = {
4547         0x0023,0x0020,0x000D,0x000A,
4548         0xE001,0x003E,0x0C01,0x003B,0x0020,0x0023,0x0020,0x0053,0x0049,0x0047,0x004E,0
4549     };
4550     static const UChar expectedRule[] = {
4551         0xE001,0x003E,0x0C01,0x003B,0
4552     };
4553     UChar result[UPRV_LENGTHOF(rule)];
4554     UErrorCode status = U_ZERO_ERROR;
4555     int32_t len = utrans_stripRules(rule, UPRV_LENGTHOF(rule), result, &status);
4556     if (len != u_strlen(expectedRule)) {
4557         errln("utrans_stripRules return len = %d", len);
4558     }
4559     if (u_strncmp(expectedRule, result, len) != 0) {
4560         errln("utrans_stripRules did not return expected string");
4561     }
4562 }
4563
4564 /**
4565  * Test the Halfwidth-Fullwidth transliterator (ticket 6281).
4566  */
4567 void TransliteratorTest::TestHalfwidthFullwidth(void) {
4568     UParseError parseError;
4569     UErrorCode status = U_ZERO_ERROR;
4570     Transliterator* hf = Transliterator::createInstance("Halfwidth-Fullwidth", UTRANS_FORWARD, parseError, status);
4571     Transliterator* fh = Transliterator::createInstance("Fullwidth-Halfwidth", UTRANS_FORWARD, parseError, status);
4572     if (hf == 0 || fh == 0) {
4573         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
4574         delete hf;
4575         delete fh;
4576         return;
4577     }
4578
4579     // Array of 2n items
4580     // Each item is
4581     //   "hf"|"fh"|"both",
4582     //   <Halfwidth>,
4583     //   <Fullwidth>
4584     const char* DATA[] = {
4585         "both",
4586         "\\uFFE9\\uFFEA\\uFFEB\\uFFEC\\u0061\\uFF71\\u00AF\\u0020",
4587         "\\u2190\\u2191\\u2192\\u2193\\uFF41\\u30A2\\uFFE3\\u3000",
4588     };
4589     int32_t DATA_length = UPRV_LENGTHOF(DATA);
4590
4591     for (int32_t i=0; i<DATA_length; i+=3) {
4592         UnicodeString h = CharsToUnicodeString(DATA[i+1]);
4593         UnicodeString f = CharsToUnicodeString(DATA[i+2]);
4594         switch (*DATA[i]) {
4595         case 0x68: //'h': // Halfwidth-Fullwidth only
4596             expect(*hf, h, f);
4597             break;
4598         case 0x66: //'f': // Fullwidth-Halfwidth only
4599             expect(*fh, f, h);
4600             break;
4601         case 0x62: //'b': // both directions
4602             expect(*hf, h, f);
4603             expect(*fh, f, h);
4604             break;
4605         }
4606     }
4607     delete hf;
4608     delete fh;
4609 }
4610
4611
4612     /**
4613      *  Test Thai.  The text is the first paragraph of "What is Unicode" from the Unicode.org web site.
4614      *              TODO: confirm that the expected results are correct.
4615      *              For now, test just confirms that C++ and Java give identical results.
4616      */
4617 void TransliteratorTest::TestThai(void) {
4618 #if !UCONFIG_NO_BREAK_ITERATION
4619     UParseError parseError;
4620     UErrorCode status = U_ZERO_ERROR;
4621     Transliterator* tr = Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
4622     if (tr == 0) {
4623         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
4624         return;
4625     }
4626     if (U_FAILURE(status)) {
4627         errln("FAIL: createInstance failed with %s", u_errorName(status));
4628         return;
4629     }
4630     const char *thaiText =
4631         "\\u0e42\\u0e14\\u0e22\\u0e1e\\u0e37\\u0e49\\u0e19\\u0e10\\u0e32\\u0e19\\u0e41\\u0e25\\u0e49\\u0e27, \\u0e04\\u0e2d"
4632         "\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d\\u0e23\\u0e4c\\u0e08\\u0e30\\u0e40\\u0e01\\u0e35\\u0e48\\u0e22"
4633         "\\u0e27\\u0e02\\u0e49\\u0e2d\\u0e07\\u0e01\\u0e31\\u0e1a\\u0e40\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e02\\u0e2d"
4634         "\\u0e07\\u0e15\\u0e31\\u0e27\\u0e40\\u0e25\\u0e02. \\u0e04\\u0e2d\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d"
4635         "\\u0e23\\u0e4c\\u0e08\\u0e31\\u0e14\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29"
4636         "\\u0e23\\u0e41\\u0e25\\u0e30\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30\\u0e2d\\u0e37\\u0e48\\u0e19\\u0e46 \\u0e42"
4637         "\\u0e14\\u0e22\\u0e01\\u0e32\\u0e23\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25"
4638         "\\u0e02\\u0e43\\u0e2b\\u0e49\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e41\\u0e15\\u0e48\\u0e25\\u0e30\\u0e15"
4639         "\\u0e31\\u0e27. \\u0e01\\u0e48\\u0e2d\\u0e19\\u0e2b\\u0e19\\u0e49\\u0e32\\u0e17\\u0e35\\u0e48\\u0e4a Unicode \\u0e08"
4640         "\\u0e30\\u0e16\\u0e39\\u0e01\\u0e2a\\u0e23\\u0e49\\u0e32\\u0e07\\u0e02\\u0e36\\u0e49\\u0e19, \\u0e44\\u0e14\\u0e49"
4641         "\\u0e21\\u0e35\\u0e23\\u0e30\\u0e1a\\u0e1a encoding \\u0e2d\\u0e22\\u0e39\\u0e48\\u0e2b\\u0e25\\u0e32\\u0e22\\u0e23"
4642         "\\u0e49\\u0e2d\\u0e22\\u0e23\\u0e30\\u0e1a\\u0e1a\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e01\\u0e32\\u0e23"
4643         "\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25\\u0e02\\u0e40\\u0e2b\\u0e25\\u0e48"
4644         "\\u0e32\\u0e19\\u0e35\\u0e49. \\u0e44\\u0e21\\u0e48\\u0e21\\u0e35 encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48"
4645         "\\u0e21\\u0e35\\u0e08\\u0e33\\u0e19\\u0e27\\u0e19\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30"
4646         "\\u0e21\\u0e32\\u0e01\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d: \\u0e22\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d"
4647         "\\u0e22\\u0e48\\u0e32\\u0e07\\u0e40\\u0e0a\\u0e48\\u0e19, \\u0e40\\u0e09\\u0e1e\\u0e32\\u0e30\\u0e43\\u0e19\\u0e01"
4648         "\\u0e25\\u0e38\\u0e48\\u0e21\\u0e2a\\u0e2b\\u0e20\\u0e32\\u0e1e\\u0e22\\u0e38\\u0e42\\u0e23\\u0e1b\\u0e40\\u0e1e"
4649         "\\u0e35\\u0e22\\u0e07\\u0e41\\u0e2b\\u0e48\\u0e07\\u0e40\\u0e14\\u0e35\\u0e22\\u0e27 \\u0e01\\u0e47\\u0e15\\u0e49"
4650         "\\u0e2d\\u0e07\\u0e01\\u0e32\\u0e23\\u0e2b\\u0e25\\u0e32\\u0e22 encoding \\u0e43\\u0e19\\u0e01\\u0e32\\u0e23\\u0e04"
4651         "\\u0e23\\u0e2d\\u0e1a\\u0e04\\u0e25\\u0e38\\u0e21\\u0e17\\u0e38\\u0e01\\u0e20\\u0e32\\u0e29\\u0e32\\u0e43\\u0e19"
4652         "\\u0e01\\u0e25\\u0e38\\u0e48\\u0e21. \\u0e2b\\u0e23\\u0e37\\u0e2d\\u0e41\\u0e21\\u0e49\\u0e41\\u0e15\\u0e48\\u0e43"
4653         "\\u0e19\\u0e20\\u0e32\\u0e29\\u0e32\\u0e40\\u0e14\\u0e35\\u0e48\\u0e22\\u0e27 \\u0e40\\u0e0a\\u0e48\\u0e19 \\u0e20"
4654         "\\u0e32\\u0e29\\u0e32\\u0e2d\\u0e31\\u0e07\\u0e01\\u0e24\\u0e29 \\u0e01\\u0e47\\u0e44\\u0e21\\u0e48\\u0e21\\u0e35"
4655         " encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d\\u0e2a\\u0e33\\u0e2b"
4656         "\\u0e23\\u0e31\\u0e1a\\u0e17\\u0e38\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29\\u0e23, \\u0e40\\u0e04"
4657         "\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e27\\u0e23\\u0e23\\u0e04\\u0e15\\u0e2d\\u0e19"
4658         " \\u0e41\\u0e25\\u0e30\\u0e2a\\u0e31\\u0e0d\\u0e25\\u0e31\\u0e01\\u0e29\\u0e13\\u0e4c\\u0e17\\u0e32\\u0e07\\u0e40"
4659         "\\u0e17\\u0e04\\u0e19\\u0e34\\u0e04\\u0e17\\u0e35\\u0e48\\u0e43\\u0e0a\\u0e49\\u0e01\\u0e31\\u0e19\\u0e2d\\u0e22"
4660         "\\u0e39\\u0e48\\u0e17\\u0e31\\u0e48\\u0e27\\u0e44\\u0e1b.";
4661
4662     const char *latinText =
4663         "doy ph\\u1ee5\\u0304\\u0302n \\u1e6d\\u0304h\\u0101n l\\u00e6\\u0302w, khxmphiwtexr\\u0312 ca ke\\u012b\\u0300"
4664         "ywk\\u0304\\u0125xng k\\u1ea1b re\\u1ee5\\u0304\\u0300xng k\\u0304hxng t\\u1ea1wlek\\u0304h. khxmphiwtexr"
4665         "\\u0312 c\\u1ea1d k\\u0115b t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r l\\u00e6a x\\u1ea1kk\\u0304h ra x\\u1ee5\\u0304"
4666         "\\u0300n\\u00ab doy k\\u0101r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304\\u0131\\u0302 s\\u0304"
4667         "\\u1ea3h\\u0304r\\u1ea1b t\\u00e6\\u0300la t\\u1ea1w. k\\u0300xn h\\u0304n\\u0302\\u0101 th\\u012b\\u0300\\u0301"
4668         " Unicode ca t\\u0304h\\u016bk s\\u0304r\\u0302\\u0101ng k\\u0304h\\u1ee5\\u0302n, d\\u1ecb\\u0302 m\\u012b "
4669         "rabb encoding xy\\u016b\\u0300 h\\u0304l\\u0101y r\\u0302xy rabb s\\u0304\\u1ea3h\\u0304r\\u1ea1b k\\u0101"
4670         "r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304el\\u0300\\u0101 n\\u012b\\u0302. m\\u1ecb\\u0300m"
4671         "\\u012b encoding d\\u0131 th\\u012b\\u0300 m\\u012b c\\u1ea3nwn t\\u1ea1w x\\u1ea1kk\\u0304hra m\\u0101k p"
4672         "he\\u012byng phx: yk t\\u1ea1wx\\u1ef3\\u0101ng ch\\u00e8n, c\\u0304heph\\u0101a n\\u0131 kl\\u00f9m s\\u0304"
4673         "h\\u0304p\\u0323h\\u0101ph yurop phe\\u012byng h\\u0304\\u00e6\\u0300ng de\\u012byw k\\u0306 t\\u0302xngk\\u0101"
4674         "r h\\u0304l\\u0101y encoding n\\u0131 k\\u0101r khrxbkhlum thuk p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 n\\u0131"
4675         " kl\\u00f9m. h\\u0304r\\u1ee5\\u0304x m\\u00e6\\u0302t\\u00e6\\u0300 n\\u0131 p\\u0323h\\u0101s\\u0304\\u02b9"
4676         "\\u0101 de\\u012b\\u0300yw ch\\u00e8n p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 x\\u1ea1ngkvs\\u0304\\u02b9 k\\u0306"
4677         " m\\u1ecb\\u0300m\\u012b encoding d\\u0131 th\\u012b\\u0300 phe\\u012byng phx s\\u0304\\u1ea3h\\u0304r\\u1ea1"
4678         "b thuk t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r, kher\\u1ee5\\u0304\\u0300xngh\\u0304m\\u0101y wrrkh txn l\\u00e6"
4679         "a s\\u0304\\u1ea1\\u1ef5l\\u1ea1ks\\u0304\\u02b9\\u1e47\\u0312 th\\u0101ng thekhnikh th\\u012b\\u0300 ch\\u0131"
4680         "\\u0302 k\\u1ea1n xy\\u016b\\u0300 th\\u1ea1\\u0300wp\\u1ecb.";
4681
4682
4683     UnicodeString  xlitText(thaiText);
4684     xlitText = xlitText.unescape();
4685     tr->transliterate(xlitText);
4686
4687     UnicodeString expectedText(latinText);
4688     expectedText = expectedText.unescape();
4689     expect(*tr, xlitText, expectedText);
4690
4691     delete tr;
4692 #endif
4693 }
4694
4695
4696 //======================================================================
4697 // Support methods
4698 //======================================================================
4699 void TransliteratorTest::expectT(const UnicodeString& id,
4700                                  const UnicodeString& source,
4701                                  const UnicodeString& expectedResult) {
4702     UErrorCode ec = U_ZERO_ERROR;
4703     UParseError pe;
4704     Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
4705     if (U_FAILURE(ec)) {
4706         errln((UnicodeString)"FAIL: Could not create " + id + " -  " + u_errorName(ec));
4707         delete t;
4708         return;
4709     }
4710     expect(*t, source, expectedResult);
4711     delete t;
4712 }
4713
4714 void TransliteratorTest::reportParseError(const UnicodeString& message,
4715                                           const UParseError& parseError,
4716                                           const UErrorCode& status) {
4717     dataerrln(message +
4718           /*", parse error " + parseError.code +*/
4719           ", line " + parseError.line +
4720           ", offset " + parseError.offset +
4721           ", pre-context " + prettify(parseError.preContext, TRUE) +
4722           ", post-context " + prettify(parseError.postContext,TRUE) +
4723           ", Error: " + u_errorName(status));
4724 }
4725
4726 void TransliteratorTest::expect(const UnicodeString& rules,
4727                                 const UnicodeString& source,
4728                                 const UnicodeString& expectedResult,
4729                                 UTransPosition *pos) {
4730     expect("<ID>", rules, source, expectedResult, pos);
4731 }
4732
4733 void TransliteratorTest::expect(const UnicodeString& id,
4734                                 const UnicodeString& rules,
4735                                 const UnicodeString& source,
4736                                 const UnicodeString& expectedResult,
4737                                 UTransPosition *pos) {
4738     UErrorCode status = U_ZERO_ERROR;
4739     UParseError parseError;
4740     Transliterator* t = Transliterator::createFromRules(id, rules, UTRANS_FORWARD, parseError, status);
4741     if (U_FAILURE(status)) {
4742         reportParseError(UnicodeString("Couldn't create transliterator from ") + rules, parseError, status);
4743     } else {
4744         expect(*t, source, expectedResult, pos);
4745     }
4746     delete t;
4747 }
4748
4749 void TransliteratorTest::expect(const Transliterator& t,
4750                                 const UnicodeString& source,
4751                                 const UnicodeString& expectedResult,
4752                                 const Transliterator& reverseTransliterator) {
4753     expect(t, source, expectedResult);
4754     expect(reverseTransliterator, expectedResult, source);
4755 }
4756
4757 void TransliteratorTest::expect(const Transliterator& t,
4758                                 const UnicodeString& source,
4759                                 const UnicodeString& expectedResult,
4760                                 UTransPosition *pos) {
4761     if (pos == 0) {
4762         UnicodeString result(source);
4763         t.transliterate(result);
4764         expectAux(t.getID() + ":String", source, result, expectedResult);
4765     }
4766     UTransPosition index={0, 0, 0, 0};
4767     if (pos != 0) {
4768         index = *pos;
4769     }
4770
4771     UnicodeString rsource(source);
4772     if (pos == 0) {
4773         t.transliterate(rsource);
4774     } else {
4775         // Do it all at once -- below we do it incrementally
4776         t.finishTransliteration(rsource, *pos);
4777     }
4778     expectAux(t.getID() + ":Replaceable", source, rsource, expectedResult);
4779
4780     // Test keyboard (incremental) transliteration -- this result
4781     // must be the same after we finalize (see below).
4782     UnicodeString log;
4783     rsource.remove();
4784     if (pos != 0) {
4785         rsource = source;
4786         formatInput(log, rsource, index);
4787         log.append(" -> ");
4788         UErrorCode status = U_ZERO_ERROR;
4789         t.transliterate(rsource, index, status);
4790         formatInput(log, rsource, index);
4791     } else {
4792         for (int32_t i=0; i<source.length(); ++i) {
4793             if (i != 0) {
4794                 log.append(" + ");
4795             }
4796             log.append(source.charAt(i)).append(" -> ");
4797             UErrorCode status = U_ZERO_ERROR;
4798             t.transliterate(rsource, index, source.charAt(i), status);
4799             formatInput(log, rsource, index);
4800         }
4801     }
4802
4803     // As a final step in keyboard transliteration, we must call
4804     // transliterate to finish off any pending partial matches that
4805     // were waiting for more input.
4806     t.finishTransliteration(rsource, index);
4807     log.append(" => ").append(rsource);
4808
4809     expectAux(t.getID() + ":Keyboard", log,
4810               rsource == expectedResult,
4811               expectedResult);
4812 }
4813
4814
4815 /**
4816  * @param appendTo result is appended to this param.
4817  * @param input the string being transliterated
4818  * @param pos the index struct
4819  */
4820 UnicodeString& TransliteratorTest::formatInput(UnicodeString &appendTo,
4821                                                const UnicodeString& input,
4822                                                const UTransPosition& pos) {
4823     // Output a string of the form aaa{bbb|ccc|ddd}eee, where
4824     // the {} indicate the context start and limit, and the ||
4825     // indicate the start and limit.
4826     if (0 <= pos.contextStart &&
4827         pos.contextStart <= pos.start &&
4828         pos.start <= pos.limit &&
4829         pos.limit <= pos.contextLimit &&
4830         pos.contextLimit <= input.length()) {
4831
4832         UnicodeString a, b, c, d, e;
4833         input.extractBetween(0, pos.contextStart, a);
4834         input.extractBetween(pos.contextStart, pos.start, b);
4835         input.extractBetween(pos.start, pos.limit, c);
4836         input.extractBetween(pos.limit, pos.contextLimit, d);
4837         input.extractBetween(pos.contextLimit, input.length(), e);
4838         appendTo.append(a).append((UChar)123/*{*/).append(b).
4839             append((UChar)PIPE).append(c).append((UChar)PIPE).append(d).
4840             append((UChar)125/*}*/).append(e);
4841     } else {
4842         appendTo.append((UnicodeString)"INVALID UTransPosition {cs=" +
4843                         pos.contextStart + ", s=" + pos.start + ", l=" +
4844                         pos.limit + ", cl=" + pos.contextLimit + "} on " +
4845                         input);
4846     }
4847     return appendTo;
4848 }
4849
4850 void TransliteratorTest::expectAux(const UnicodeString& tag,
4851                                    const UnicodeString& source,
4852                                    const UnicodeString& result,
4853                                    const UnicodeString& expectedResult) {
4854     expectAux(tag, source + " -> " + result,
4855               result == expectedResult,
4856               expectedResult);
4857 }
4858
4859 void TransliteratorTest::expectAux(const UnicodeString& tag,
4860                                    const UnicodeString& summary, UBool pass,
4861                                    const UnicodeString& expectedResult) {
4862     if (pass) {
4863         logln(UnicodeString("(")+tag+") " + prettify(summary));
4864     } else {
4865         dataerrln(UnicodeString("FAIL: (")+tag+") "
4866               + prettify(summary)
4867               + ", expected " + prettify(expectedResult));
4868     }
4869 }
4870
4871 #endif /* #if !UCONFIG_NO_TRANSLITERATION */