icuSources/test/intltest/transtst.cpp

   1 // © 2016 and later: Unicode, Inc. and others.
   2 // License & terms of use: http://www.unicode.org/copyright.html
   3 /*
   4 **********************************************************************
   5 *   Copyright (C) 1999-2016, International Business Machines
   6 *   Corporation and others.  All Rights Reserved.
   7 **********************************************************************
   8 *   Date        Name        Description
   9 *   11/10/99    aliu        Creation.
  10 **********************************************************************
  11 */
  12
  13 #include "unicode/utypes.h"
  14
  15 #if !UCONFIG_NO_TRANSLITERATION
  16
  17 #include "transtst.h"
  18 #include "unicode/locid.h"
  19 #include "unicode/dtfmtsym.h"
  20 #include "unicode/normlzr.h"
  21 #include "unicode/translit.h"
  22 #include "unicode/uchar.h"
  23 #include "unicode/unifilt.h"
  24 #include "unicode/uniset.h"
  25 #include "unicode/ustring.h"
  26 #include "unicode/usetiter.h"
  27 #include "unicode/uscript.h"
  28 #include "unicode/utf16.h"
  29 #include "cpdtrans.h"
  30 #include "nultrans.h"
  31 #include "rbt.h"
  32 #include "rbt_pars.h"
  33 #include "anytrans.h"
  34 #include "esctrn.h"
  35 #include "name2uni.h"
  36 #include "nortrans.h"
  37 #include "remtrans.h"
  38 #include "titletrn.h"
  39 #include "tolowtrn.h"
  40 #include "toupptrn.h"
  41 #include "unesctrn.h"
  42 #include "uni2name.h"
  43 #include "cstring.h"
  44 #include "cmemory.h"
  45 #include <stdio.h>
  46
  47 /***********************************************************************
  48
  49                      HOW TO USE THIS TEST FILE
  50                                -or-
  51                   How I developed on two platforms
  52                 without losing (too much of) my mind
  53
  54
  55 1. Add new tests by copying/pasting/changing existing tests.  On Java,
  56    any public void method named Test...() taking no parameters becomes
  57    a test.  On C++, you need to modify the header and add a line to
  58    the runIndexedTest() dispatch method.
  59
  60 2. Make liberal use of the expect() method; it is your friend.
  61
  62 3. The tests in this file exactly match those in a sister file on the
  63    other side.  The two files are:
  64
  65    icu4j:  src/com/ibm/test/translit/TransliteratorTest.java
  66    icu4c:  source/test/intltest/transtst.cpp
  67
  68                   ==> THIS IS THE IMPORTANT PART <==
  69
  70    When you add a test in this file, add it in TransliteratorTest.java
  71    too.  Give it the same name and put it in the same relative place.
  72    This makes maintenance a lot simpler for any poor soul who ends up
  73    trying to synchronize the tests between icu4j and icu4c.
  74
  75 4. If you MUST enter a test that is NOT paralleled in the sister file,
  76    then add it in the special non-mirrored section.  These are
  77    labeled
  78
  79      "icu4j ONLY"
  80
  81    or
  82
  83      "icu4c ONLY"
  84
  85    Make sure you document the reason the test is here and not there.
  86
  87
  88 Thank you.
  89 The Management
  90 ***********************************************************************/
  91
  92 // Define character constants thusly to be EBCDIC-friendly
  93 enum {
  94     LEFT_BRACE=((UChar)0x007B), /*{*/
  95     PIPE      =((UChar)0x007C), /*|*/
  96     ZERO      =((UChar)0x0030), /*0*/
  97     UPPER_A   =((UChar)0x0041)  /*A*/
  98 };
  99
 100 TransliteratorTest::TransliteratorTest()
 101 :   DESERET_DEE((UChar32)0x10414),
 102     DESERET_dee((UChar32)0x1043C)
 103 {
 104 }
 105
 106 TransliteratorTest::~TransliteratorTest() {}
 107
 108 void
 109 TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
 110                                    const char* &name, char* /*par*/) {
 111     switch (index) {
 112         TESTCASE(0,TestInstantiation);
 113         TESTCASE(1,TestSimpleRules);
 114         TESTCASE(2,TestRuleBasedInverse);
 115         TESTCASE(3,TestKeyboard);
 116         TESTCASE(4,TestKeyboard2);
 117         TESTCASE(5,TestKeyboard3);
 118         TESTCASE(6,TestArabic);
 119         TESTCASE(7,TestCompoundKana);
 120         TESTCASE(8,TestCompoundHex);
 121         TESTCASE(9,TestFiltering);
 122         TESTCASE(10,TestInlineSet);
 123         TESTCASE(11,TestPatternQuoting);
 124         TESTCASE(12,TestJ277);
 125         TESTCASE(13,TestJ243);
 126         TESTCASE(14,TestJ329);
 127         TESTCASE(15,TestSegments);
 128         TESTCASE(16,TestCursorOffset);
 129         TESTCASE(17,TestArbitraryVariableValues);
 130         TESTCASE(18,TestPositionHandling);
 131         TESTCASE(19,TestHiraganaKatakana);
 132         TESTCASE(20,TestCopyJ476);
 133         TESTCASE(21,TestAnchors);
 134         TESTCASE(22,TestInterIndic);
 135         TESTCASE(23,TestFilterIDs);
 136         TESTCASE(24,TestCaseMap);
 137         TESTCASE(25,TestNameMap);
 138         TESTCASE(26,TestLiberalizedID);
 139         TESTCASE(27,TestCreateInstance);
 140         TESTCASE(28,TestNormalizationTransliterator);
 141         TESTCASE(29,TestCompoundRBT);
 142         TESTCASE(30,TestCompoundFilter);
 143         TESTCASE(31,TestRemove);
 144         TESTCASE(32,TestToRules);
 145         TESTCASE(33,TestContext);
 146         TESTCASE(34,TestSupplemental);
 147         TESTCASE(35,TestQuantifier);
 148         TESTCASE(36,TestSTV);
 149         TESTCASE(37,TestCompoundInverse);
 150         TESTCASE(38,TestNFDChainRBT);
 151         TESTCASE(39,TestNullInverse);
 152         TESTCASE(40,TestAliasInverseID);
 153         TESTCASE(41,TestCompoundInverseID);
 154         TESTCASE(42,TestUndefinedVariable);
 155         TESTCASE(43,TestEmptyContext);
 156         TESTCASE(44,TestCompoundFilterID);
 157         TESTCASE(45,TestPropertySet);
 158         TESTCASE(46,TestNewEngine);
 159         TESTCASE(47,TestQuantifiedSegment);
 160         TESTCASE(48,TestDevanagariLatinRT);
 161         TESTCASE(49,TestTeluguLatinRT);
 162         TESTCASE(50,TestCompoundLatinRT);
 163         TESTCASE(51,TestSanskritLatinRT);
 164         TESTCASE(52,TestLocaleInstantiation);
 165         TESTCASE(53,TestTitleAccents);
 166         TESTCASE(54,TestLocaleResource);
 167         TESTCASE(55,TestParseError);
 168         TESTCASE(56,TestOutputSet);
 169         TESTCASE(57,TestVariableRange);
 170         TESTCASE(58,TestInvalidPostContext);
 171         TESTCASE(59,TestIDForms);
 172         TESTCASE(60,TestToRulesMark);
 173         TESTCASE(61,TestEscape);
 174         TESTCASE(62,TestAnchorMasking);
 175         TESTCASE(63,TestDisplayName);
 176         TESTCASE(64,TestSpecialCases);
 177 #if !UCONFIG_NO_FILE_IO
 178         TESTCASE(65,TestIncrementalProgress);
 179 #endif
 180         TESTCASE(66,TestSurrogateCasing);
 181         TESTCASE(67,TestFunction);
 182         TESTCASE(68,TestInvalidBackRef);
 183         TESTCASE(69,TestMulticharStringSet);
 184         TESTCASE(70,TestUserFunction);
 185         TESTCASE(71,TestAnyX);
 186         TESTCASE(72,TestSourceTargetSet);
 187         TESTCASE(73,TestGurmukhiDevanagari);
 188         TESTCASE(74,TestPatternWhiteSpace);
 189         TESTCASE(75,TestAllCodepoints);
 190         TESTCASE(76,TestBoilerplate);
 191         TESTCASE(77,TestAlternateSyntax);
 192         TESTCASE(78,TestBeginEnd);
 193         TESTCASE(79,TestBeginEndToRules);
 194         TESTCASE(80,TestRegisterAlias);
 195         TESTCASE(81,TestRuleStripping);
 196         TESTCASE(82,TestHalfwidthFullwidth);
 197         TESTCASE(83,TestThai);
 198         TESTCASE(84,TestAny);
 199         TESTCASE(85,TestHansHant);
 200         default: name = ""; break;
 201     }
 202 }
 203
 204 /**
 205  * Make sure every system transliterator can be instantiated.
 206  *
 207  * ALSO test that the result of toRules() for each rule is a valid
 208  * rule.  Do this here so we don't have to have another test that
 209  * instantiates everything as well.
 210  */
 211 void TransliteratorTest::TestInstantiation() {
 212     UErrorCode ec = U_ZERO_ERROR;
 213     StringEnumeration* avail = Transliterator::getAvailableIDs(ec);
 214     assertSuccess("getAvailableIDs()", ec);
 215     assertTrue("getAvailableIDs()!=NULL", avail!=NULL);
 216     int32_t n = Transliterator::countAvailableIDs();
 217     assertTrue("getAvailableIDs().count()==countAvailableIDs()",
 218                avail->count(ec) == n);
 219     assertSuccess("count()", ec);
 220     UnicodeString name;
 221     for (int32_t i=0; i<n; ++i) {
 222         const UnicodeString& id = *avail->snext(ec);
 223         if (!assertSuccess("snext()", ec) ||
 224             !assertTrue("snext()!=NULL", (&id)!=NULL, TRUE)) {
 225             break;
 226         }
 227         UnicodeString id2 = Transliterator::getAvailableID(i);
 228         if (id.length() < 1) {
 229             errln(UnicodeString("FAIL: getAvailableID(") +
 230                   i + ") returned empty string");
 231             continue;
 232         }
 233         if (id != id2) {
 234             errln(UnicodeString("FAIL: getAvailableID(") +
 235                   i + ") != getAvailableIDs().snext()");
 236             continue;
 237         }
 238         UParseError parseError;
 239         UErrorCode status = U_ZERO_ERROR;
 240         Transliterator* t = Transliterator::createInstance(id,
 241                               UTRANS_FORWARD, parseError,status);
 242         name.truncate(0);
 243         Transliterator::getDisplayName(id, name);
 244         if (t == 0) {
 245 #if UCONFIG_NO_BREAK_ITERATION
 246             // If UCONFIG_NO_BREAK_ITERATION is on, then only Thai should fail.
 247             if (id.compare((UnicodeString)"Thai-Latn") != 0 &&
 248                 id.compare((UnicodeString)"Thai-Latin") != 0)
 249 #endif
 250                 dataerrln(UnicodeString("FAIL: Couldn't create ") + id +
 251                       /*", parse error " + parseError.code +*/
 252                       ", line " + parseError.line +
 253                       ", offset " + parseError.offset +
 254                       ", pre-context " + prettify(parseError.preContext, TRUE) +
 255                       ", post-context " +prettify(parseError.postContext,TRUE) +
 256                       ", Error: " + u_errorName(status));
 257                 // When createInstance fails, it deletes the failing
 258                 // entry from the available ID list.  We detect this
 259                 // here by looking for a change in countAvailableIDs.
 260             int32_t nn = Transliterator::countAvailableIDs();
 261             if (nn == (n - 1)) {
 262                 n = nn;
 263                 --i; // Compensate for deleted entry
 264             }
 265         } else {
 266             logln(UnicodeString("OK: ") + name + " (" + id + ")");
 267
 268             // Now test toRules
 269             UnicodeString rules;
 270             t->toRules(rules, TRUE);
 271             Transliterator *u = Transliterator::createFromRules("x",
 272                                     rules, UTRANS_FORWARD, parseError,status);
 273             if (u == 0) {
 274                 errln(UnicodeString("FAIL: ") + id +
 275                       ".createFromRules() => bad rules" +
 276                       /*", parse error " + parseError.code +*/
 277                       ", line " + parseError.line +
 278                       ", offset " + parseError.offset +
 279                       ", context " + prettify(parseError.preContext, TRUE) +
 280                       ", rules: " + prettify(rules, TRUE));
 281             } else {
 282                 delete u;
 283             }
 284             delete t;
 285         }
 286     }
 287     assertTrue("snext()==NULL", avail->snext(ec)==NULL);
 288     assertSuccess("snext()", ec);
 289     delete avail;
 290
 291     // Now test the failure path
 292     UParseError parseError;
 293     UErrorCode status = U_ZERO_ERROR;
 294     UnicodeString id("<Not a valid Transliterator ID>");
 295     Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
 296     if (t != 0) {
 297         errln("FAIL: " + id + " returned a transliterator");
 298         delete t;
 299     } else {
 300         logln("OK: Bogus ID handled properly");
 301     }
 302 }
 303
 304 void TransliteratorTest::TestSimpleRules(void) {
 305     /* Example: rules 1. ab>x|y
 306      *                2. yc>z
 307      *
 308      * []|eabcd  start - no match, copy e to tranlated buffer
 309      * [e]|abcd  match rule 1 - copy output & adjust cursor
 310      * [ex|y]cd  match rule 2 - copy output & adjust cursor
 311      * [exz]|d   no match, copy d to transliterated buffer
 312      * [exzd]|   done
 313      */
 314     expect(UnicodeString("ab>x|y;", "") +
 315            "yc>z",
 316            "eabcd", "exzd");
 317
 318     /* Another set of rules:
 319      *    1. ab>x|yzacw
 320      *    2. za>q
 321      *    3. qc>r
 322      *    4. cw>n
 323      *
 324      * []|ab       Rule 1
 325      * [x|yzacw]   No match
 326      * [xy|zacw]   Rule 2
 327      * [xyq|cw]    Rule 4
 328      * [xyqn]|     Done
 329      */
 330     expect(UnicodeString("ab>x|yzacw;") +
 331            "za>q;" +
 332            "qc>r;" +
 333            "cw>n",
 334            "ab", "xyqn");
 335
 336     /* Test categories
 337      */
 338     UErrorCode status = U_ZERO_ERROR;
 339     UParseError parseError;
 340     Transliterator *t = Transliterator::createFromRules(
 341         "<ID>",
 342         UnicodeString("$dummy=").append((UChar)0xE100) +
 343         UnicodeString(";"
 344                       "$vowel=[aeiouAEIOU];"
 345                       "$lu=[:Lu:];"
 346                       "$vowel } $lu > '!';"
 347                       "$vowel > '&';"
 348                       "'!' { $lu > '^';"
 349                       "$lu > '*';"
 350                       "a > ERROR", ""),
 351         UTRANS_FORWARD, parseError,
 352         status);
 353     if (U_FAILURE(status)) {
 354         dataerrln("FAIL: RBT constructor failed - %s", u_errorName(status));
 355         return;
 356     }
 357     expect(*t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
 358     delete t;
 359 }
 360
 361 /**
 362  * Test inline set syntax and set variable syntax.
 363  */
 364 void TransliteratorTest::TestInlineSet(void) {
 365     expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz");
 366     expect("a[0-9]b > qrs", "1a7b9", "1qrs9");
 367
 368     expect(UnicodeString(
 369            "$digit = [0-9];"
 370            "$alpha = [a-zA-Z];"
 371            "$alphanumeric = [$digit $alpha];" // ***
 372            "$special = [^$alphanumeric];"     // ***
 373            "$alphanumeric > '-';"
 374            "$special > '*';", ""),
 375
 376            "thx-1138", "---*----");
 377 }
 378
 379 /**
 380  * Create some inverses and confirm that they work.  We have to be
 381  * careful how we do this, since the inverses will not be true
 382  * inverses -- we can't throw any random string at the composition
 383  * of the transliterators and expect the identity function.  F x
 384  * F' != I.  However, if we are careful about the input, we will
 385  * get the expected results.
 386  */
 387 void TransliteratorTest::TestRuleBasedInverse(void) {
 388     UnicodeString RULES =
 389         UnicodeString("abc>zyx;") +
 390         "ab>yz;" +
 391         "bc>zx;" +
 392         "ca>xy;" +
 393         "a>x;" +
 394         "b>y;" +
 395         "c>z;" +
 396
 397         "abc<zyx;" +
 398         "ab<yz;" +
 399         "bc<zx;" +
 400         "ca<xy;" +
 401         "a<x;" +
 402         "b<y;" +
 403         "c<z;" +
 404
 405         "";
 406
 407     const char* DATA[] = {
 408         // Careful here -- random strings will not work.  If we keep
 409         // the left side to the domain and the right side to the range
 410         // we will be okay though (left, abc; right xyz).
 411         "a", "x",
 412         "abcacab", "zyxxxyy",
 413         "caccb", "xyzzy",
 414     };
 415
 416     int32_t DATA_length = UPRV_LENGTHOF(DATA);
 417
 418     UErrorCode status = U_ZERO_ERROR;
 419     UParseError parseError;
 420     Transliterator *fwd = Transliterator::createFromRules("<ID>", RULES,
 421                                 UTRANS_FORWARD, parseError, status);
 422     Transliterator *rev = Transliterator::createFromRules("<ID>", RULES,
 423                                 UTRANS_REVERSE, parseError, status);
 424     if (U_FAILURE(status)) {
 425         errln("FAIL: RBT constructor failed");
 426         return;
 427     }
 428     for (int32_t i=0; i<DATA_length; i+=2) {
 429         expect(*fwd, DATA[i], DATA[i+1]);
 430         expect(*rev, DATA[i+1], DATA[i]);
 431     }
 432     delete fwd;
 433     delete rev;
 434 }
 435
 436 /**
 437  * Basic test of keyboard.
 438  */
 439 void TransliteratorTest::TestKeyboard(void) {
 440     UParseError parseError;
 441     UErrorCode status = U_ZERO_ERROR;
 442     Transliterator *t = Transliterator::createFromRules("<ID>",
 443                               UnicodeString("psch>Y;")
 444                               +"ps>y;"
 445                               +"ch>x;"
 446                               +"a>A;",
 447                               UTRANS_FORWARD, parseError,
 448                               status);
 449     if (U_FAILURE(status)) {
 450         errln("FAIL: RBT constructor failed");
 451         return;
 452     }
 453     const char* DATA[] = {
 454         // insertion, buffer
 455         "a", "A",
 456         "p", "Ap",
 457         "s", "Aps",
 458         "c", "Apsc",
 459         "a", "AycA",
 460         "psch", "AycAY",
 461         0, "AycAY", // null means finishKeyboardTransliteration
 462     };
 463
 464     keyboardAux(*t, DATA, UPRV_LENGTHOF(DATA));
 465     delete t;
 466 }
 467
 468 /**
 469  * Basic test of keyboard with cursor.
 470  */
 471 void TransliteratorTest::TestKeyboard2(void) {
 472     UParseError parseError;
 473     UErrorCode status = U_ZERO_ERROR;
 474     Transliterator *t = Transliterator::createFromRules("<ID>",
 475                               UnicodeString("ych>Y;")
 476                               +"ps>|y;"
 477                               +"ch>x;"
 478                               +"a>A;",
 479                               UTRANS_FORWARD, parseError,
 480                               status);
 481     if (U_FAILURE(status)) {
 482         errln("FAIL: RBT constructor failed");
 483         return;
 484     }
 485     const char* DATA[] = {
 486         // insertion, buffer
 487         "a", "A",
 488         "p", "Ap",
 489         "s", "Aps", // modified for rollback - "Ay",
 490         "c", "Apsc", // modified for rollback - "Ayc",
 491         "a", "AycA",
 492         "p", "AycAp",
 493         "s", "AycAps", // modified for rollback - "AycAy",
 494         "c", "AycApsc", // modified for rollback - "AycAyc",
 495         "h", "AycAY",
 496         0, "AycAY", // null means finishKeyboardTransliteration
 497     };
 498
 499     keyboardAux(*t, DATA, UPRV_LENGTHOF(DATA));
 500     delete t;
 501 }
 502
 503 /**
 504  * Test keyboard transliteration with back-replacement.
 505  */
 506 void TransliteratorTest::TestKeyboard3(void) {
 507     // We want th>z but t>y.  Furthermore, during keyboard
 508     // transliteration we want t>y then yh>z if t, then h are
 509     // typed.
 510     UnicodeString RULES("t>|y;"
 511                         "yh>z;");
 512
 513     const char* DATA[] = {
 514         // Column 1: characters to add to buffer (as if typed)
 515         // Column 2: expected appearance of buffer after
 516         //           keyboard xliteration.
 517         "a", "a",
 518         "b", "ab",
 519         "t", "abt", // modified for rollback - "aby",
 520         "c", "abyc",
 521         "t", "abyct", // modified for rollback - "abycy",
 522         "h", "abycz",
 523         0, "abycz", // null means finishKeyboardTransliteration
 524     };
 525
 526     UParseError parseError;
 527     UErrorCode status = U_ZERO_ERROR;
 528     Transliterator *t = Transliterator::createFromRules("<ID>", RULES, UTRANS_FORWARD, parseError, status);
 529     if (U_FAILURE(status)) {
 530         errln("FAIL: RBT constructor failed");
 531         return;
 532     }
 533     keyboardAux(*t, DATA, UPRV_LENGTHOF(DATA));
 534     delete t;
 535 }
 536
 537 void TransliteratorTest::keyboardAux(const Transliterator& t,
 538                                      const char* DATA[], int32_t DATA_length) {
 539     UErrorCode status = U_ZERO_ERROR;
 540     UTransPosition index={0, 0, 0, 0};
 541     UnicodeString s;
 542     for (int32_t i=0; i<DATA_length; i+=2) {
 543         UnicodeString log;
 544         if (DATA[i] != 0) {
 545             log = s + " + "
 546                 + DATA[i]
 547                 + " -> ";
 548             t.transliterate(s, index, DATA[i], status);
 549         } else {
 550             log = s + " => ";
 551             t.finishTransliteration(s, index);
 552         }
 553         // Show the start index '{' and the cursor '|'
 554         UnicodeString a, b, c;
 555         s.extractBetween(0, index.contextStart, a);
 556         s.extractBetween(index.contextStart, index.start, b);
 557         s.extractBetween(index.start, s.length(), c);
 558         log.append(a).
 559             append((UChar)LEFT_BRACE).
 560             append(b).
 561             append((UChar)PIPE).
 562             append(c);
 563         if (s == DATA[i+1] && U_SUCCESS(status)) {
 564             logln(log);
 565         } else {
 566             errln(UnicodeString("FAIL: ") + log + ", expected " + DATA[i+1]);
 567         }
 568     }
 569 }
 570
 571 void TransliteratorTest::TestArabic(void) {
 572 // Test disabled for 2.0 until new Arabic transliterator can be written.
 573 //    /*
 574 //    const char* DATA[] = {
 575 //        "Arabic", "\u062a\u062a\u0645\u062a\u0639\u0020"+
 576 //                  "\u0627\u0644\u0644\u063a\u0629\u0020"+
 577 //                  "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629\u0020"+
 578 //                  "\u0628\u0628\u0646\u0638\u0645\u0020"+
 579 //                  "\u0643\u062a\u0627\u0628\u0628\u064a\u0629\u0020"+
 580 //                  "\u062c\u0645\u064a\u0644\u0629",
 581 //    };
 582 //    */
 583 //
 584 //    UChar ar_raw[] = {
 585 //        0x062a, 0x062a, 0x0645, 0x062a, 0x0639, 0x0020, 0x0627,
 586 //        0x0644, 0x0644, 0x063a, 0x0629, 0x0020, 0x0627, 0x0644,
 587 //        0x0639, 0x0631, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
 588 //        0x0628, 0x0628, 0x0646, 0x0638, 0x0645, 0x0020, 0x0643,
 589 //        0x062a, 0x0627, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
 590 //        0x062c, 0x0645, 0x064a, 0x0644, 0x0629, 0
 591 //    };
 592 //    UnicodeString ar(ar_raw);
 593 //    UErrorCode status=U_ZERO_ERROR;
 594 //    UParseError parseError;
 595 //    Transliterator *t = Transliterator::createInstance("Latin-Arabic", UTRANS_FORWARD, parseError, status);
 596 //    if (t == 0) {
 597 //        errln("FAIL: createInstance failed");
 598 //        return;
 599 //    }
 600 //    expect(*t, "Arabic", ar);
 601 //    delete t;
 602 }
 603
 604 /**
 605  * Compose the Kana transliterator forward and reverse and try
 606  * some strings that should come out unchanged.
 607  */
 608 void TransliteratorTest::TestCompoundKana(void) {
 609     UParseError parseError;
 610     UErrorCode status = U_ZERO_ERROR;
 611     Transliterator* t = Transliterator::createInstance("Latin-Hiragana;Hiragana-Latin", UTRANS_FORWARD, parseError, status);
 612     if (t == 0) {
 613         dataerrln("FAIL: construction of Latin-Hiragana;Hiragana-Latin failed - %s", u_errorName(status));
 614     } else {
 615         expect(*t, "aaaaa", "aaaaa");
 616         delete t;
 617     }
 618 }
 619
 620 /**
 621  * Compose the hex transliterators forward and reverse.
 622  */
 623 void TransliteratorTest::TestCompoundHex(void) {
 624     UParseError parseError;
 625     UErrorCode status = U_ZERO_ERROR;
 626     Transliterator* a = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
 627     Transliterator* b = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, parseError, status);
 628     Transliterator* transab[] = { a, b };
 629     Transliterator* transba[] = { b, a };
 630     if (a == 0 || b == 0) {
 631         errln("FAIL: construction failed");
 632         delete a;
 633         delete b;
 634         return;
 635     }
 636     // Do some basic tests of a
 637     expect(*a, "01", UnicodeString("\\u0030\\u0031", ""));
 638     // Do some basic tests of b
 639     expect(*b, UnicodeString("\\u0030\\u0031", ""), "01");
 640
 641     Transliterator* ab = new CompoundTransliterator(transab, 2);
 642     UnicodeString s("abcde", "");
 643     expect(*ab, s, s);
 644
 645     UnicodeString str(s);
 646     a->transliterate(str);
 647     Transliterator* ba = new CompoundTransliterator(transba, 2);
 648     expect(*ba, str, str);
 649
 650     delete ab;
 651     delete ba;
 652     delete a;
 653     delete b;
 654 }
 655
 656 int gTestFilterClassID = 0;
 657 /**
 658  * Used by TestFiltering().
 659  */
 660 class TestFilter : public UnicodeFilter {
 661     virtual TestFilter* clone() const {
 662         return new TestFilter(*this);
 663     }
 664     virtual UBool contains(UChar32 c) const {
 665         return c != (UChar)0x0063 /*c*/;
 666     }
 667     // Stubs
 668     virtual UnicodeString& toPattern(UnicodeString& result,
 669                                      UBool /*escapeUnprintable*/) const {
 670         return result;
 671     }
 672     virtual UBool matchesIndexValue(uint8_t /*v*/) const {
 673         return FALSE;
 674     }
 675     virtual void addMatchSetTo(UnicodeSet& /*toUnionTo*/) const {}
 676 public:
 677     UClassID getDynamicClassID() const { return (UClassID)&gTestFilterClassID; }
 678 };
 679
 680 /**
 681  * Do some basic tests of filtering.
 682  */
 683 void TransliteratorTest::TestFiltering(void) {
 684     UParseError parseError;
 685     UErrorCode status = U_ZERO_ERROR;
 686     Transliterator* hex = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
 687     if (hex == 0) {
 688         errln("FAIL: createInstance(Any-Hex) failed");
 689         return;
 690     }
 691     hex->adoptFilter(new TestFilter());
 692     UnicodeString s("abcde");
 693     hex->transliterate(s);
 694     UnicodeString exp("\\u0061\\u0062c\\u0064\\u0065", "");
 695     if (s == exp) {
 696         logln(UnicodeString("Ok:   \"") + exp + "\"");
 697     } else {
 698         logln(UnicodeString("FAIL: \"") + s + "\", wanted \"" + exp + "\"");
 699     }
 700
 701     // ICU4C ONLY. Do not find Transliterator.orphanFilter() in ICU4J.
 702     UnicodeFilter *f = hex->orphanFilter();
 703     if (f == NULL){
 704         errln("FAIL: orphanFilter() should get a UnicodeFilter");
 705     } else {
 706         delete f;
 707     }
 708     delete hex;
 709 }
 710
 711 /**
 712  * Test anchors
 713  */
 714 void TransliteratorTest::TestAnchors(void) {
 715     expect(UnicodeString("^a  > 0; a$ > 2 ; a > 1;", ""),
 716            "aaa",
 717            "012");
 718     expect(UnicodeString("$s=[z$]; $s{a>0; a}$s>2; a>1;", ""),
 719            "aaa",
 720            "012");
 721     expect(UnicodeString("^ab  > 01 ;"
 722            " ab  > |8 ;"
 723            "  b  > k ;"
 724            " 8x$ > 45 ;"
 725            " 8x  > 77 ;", ""),
 726
 727            "ababbabxabx",
 728            "018k7745");
 729     expect(UnicodeString("$s = [z$] ;"
 730            "$s{ab    > 01 ;"
 731            "   ab    > |8 ;"
 732            "    b    > k ;"
 733            "   8x}$s > 45 ;"
 734            "   8x    > 77 ;", ""),
 735
 736            "abzababbabxzabxabx",
 737            "01z018k45z01x45");
 738 }
 739
 740 /**
 741  * Test pattern quoting and escape mechanisms.
 742  */
 743 void TransliteratorTest::TestPatternQuoting(void) {
 744     // Array of 3n items
 745     // Each item is <rules>, <input>, <expected output>
 746     const UnicodeString DATA[] = {
 747         UnicodeString(UChar(0x4E01)) + ">'[male adult]'",
 748         UnicodeString(UChar(0x4E01)),
 749         "[male adult]"
 750     };
 751
 752     for (int32_t i=0; i<3; i+=3) {
 753         logln(UnicodeString("Pattern: ") + prettify(DATA[i]));
 754         UParseError parseError;
 755         UErrorCode status = U_ZERO_ERROR;
 756         Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
 757         if (U_FAILURE(status)) {
 758             errln("RBT constructor failed");
 759         } else {
 760             expect(*t, DATA[i+1], DATA[i+2]);
 761         }
 762         delete t;
 763     }
 764 }
 765
 766 /**
 767  * Regression test for bugs found in Greek transliteration.
 768  */
 769 void TransliteratorTest::TestJ277(void) {
 770     UErrorCode status = U_ZERO_ERROR;
 771     UParseError parseError;
 772     Transliterator *gl = Transliterator::createInstance("Greek-Latin; NFD; [:M:]Remove; NFC", UTRANS_FORWARD, parseError, status);
 773     if (gl == NULL) {
 774         dataerrln("FAIL: createInstance(Greek-Latin) returned NULL - %s", u_errorName(status));
 775         return;
 776     }
 777
 778     UChar sigma = 0x3C3;
 779     UChar upsilon = 0x3C5;
 780     UChar nu = 0x3BD;
 781 //    UChar PHI = 0x3A6;
 782     UChar alpha = 0x3B1;
 783 //    UChar omega = 0x3C9;
 784 //    UChar omicron = 0x3BF;
 785 //    UChar epsilon = 0x3B5;
 786
 787     // sigma upsilon nu -> syn
 788     UnicodeString syn;
 789     syn.append(sigma).append(upsilon).append(nu);
 790     expect(*gl, syn, "syn");
 791
 792     // sigma alpha upsilon nu -> saun
 793     UnicodeString sayn;
 794     sayn.append(sigma).append(alpha).append(upsilon).append(nu);
 795     expect(*gl, sayn, "saun");
 796
 797     // Again, using a smaller rule set
 798     UnicodeString rules(
 799                 "$alpha   = \\u03B1;"
 800                 "$nu      = \\u03BD;"
 801                 "$sigma   = \\u03C3;"
 802                 "$ypsilon = \\u03C5;"
 803                 "$vowel   = [aeiouAEIOU$alpha$ypsilon];"
 804                 "s <>           $sigma;"
 805                 "a <>           $alpha;"
 806                 "u <>  $vowel { $ypsilon;"
 807                 "y <>           $ypsilon;"
 808                 "n <>           $nu;",
 809                 "");
 810     Transliterator *mini = Transliterator::createFromRules("mini", rules, UTRANS_REVERSE, parseError, status);
 811     if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
 812     expect(*mini, syn, "syn");
 813     expect(*mini, sayn, "saun");
 814     delete mini;
 815     mini = NULL;
 816
 817 #if !UCONFIG_NO_FORMATTING
 818     // Transliterate the Greek locale data
 819     Locale el("el");
 820     DateFormatSymbols syms(el, status);
 821     if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
 822     int32_t i, count;
 823     const UnicodeString* data = syms.getMonths(count);
 824     for (i=0; i<count; ++i) {
 825         if (data[i].length() == 0) {
 826             continue;
 827         }
 828         UnicodeString out(data[i]);
 829         gl->transliterate(out);
 830         UBool ok = TRUE;
 831         if (data[i].length() >= 2 && out.length() >= 2 &&
 832             u_isupper(data[i].charAt(0)) && u_islower(data[i].charAt(1))) {
 833             if (!(u_isupper(out.charAt(0)) && u_islower(out.charAt(1)))) {
 834                 ok = FALSE;
 835             }
 836         }
 837         if (ok) {
 838             logln(prettify(data[i] + " -> " + out));
 839         } else {
 840             errln(UnicodeString("FAIL: ") + prettify(data[i] + " -> " + out));
 841         }
 842     }
 843 #endif
 844
 845     delete gl;
 846 }
 847
 848 /**
 849  * Prefix, suffix support in hex transliterators
 850  */
 851 void TransliteratorTest::TestJ243(void) {
 852     UErrorCode ec = U_ZERO_ERROR;
 853
 854     // Test default Hex-Any, which should handle
 855     // \u, \U, u+, and U+
 856     Transliterator *hex =
 857         Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, ec);
 858     if (assertSuccess("getInstance", ec)) {
 859         expect(*hex, UnicodeString("\\u0041+\\U00000042,U+0043uU+0044z", ""), "A+B,CuDz");
 860     }
 861     delete hex;
 862
 863 //    // Try a custom Hex-Unicode
 864 //    // \uXXXX and &#xXXXX;
 865 //    ec = U_ZERO_ERROR;
 866 //    HexToUnicodeTransliterator hex2(UnicodeString("\\\\u###0;&\\#x###0\\;", ""), ec);
 867 //    expect(hex2, UnicodeString("\\u61\\u062\\u0063\\u00645\\u66x&#x30;&#x031;&#x0032;&#x00033;", ""),
 868 //           "abcd5fx012&#x00033;");
 869 //    // Try custom Any-Hex (default is tested elsewhere)
 870 //    ec = U_ZERO_ERROR;
 871 //    UnicodeToHexTransliterator hex3(UnicodeString("&\\#x###0;", ""), ec);
 872 //    expect(hex3, "012", "&#x30;&#x31;&#x32;");
 873 }
 874
 875 /**
 876  * Parsers need better syntax error messages.
 877  */
 878 void TransliteratorTest::TestJ329(void) {
 879
 880     struct { UBool containsErrors; const char* rule; } DATA[] = {
 881         { FALSE, "a > b; c > d" },
 882         { TRUE,  "a > b; no operator; c > d" },
 883     };
 884     int32_t DATA_length = UPRV_LENGTHOF(DATA);
 885
 886     for (int32_t i=0; i<DATA_length; ++i) {
 887         UErrorCode status = U_ZERO_ERROR;
 888         UParseError parseError;
 889         Transliterator *rbt = Transliterator::createFromRules("<ID>",
 890                                     DATA[i].rule,
 891                                     UTRANS_FORWARD,
 892                                     parseError,
 893                                     status);
 894         UBool gotError = U_FAILURE(status);
 895         UnicodeString desc(DATA[i].rule);
 896         desc.append(gotError ? " -> error" : " -> no error");
 897         if (gotError) {
 898             desc = desc + ", ParseError code=" + u_errorName(status) +
 899                 " line=" + parseError.line +
 900                 " offset=" + parseError.offset +
 901                 " context=" + parseError.preContext;
 902         }
 903         if (gotError == DATA[i].containsErrors) {
 904             logln(UnicodeString("Ok:   ") + desc);
 905         } else {
 906             errln(UnicodeString("FAIL: ") + desc);
 907         }
 908         delete rbt;
 909     }
 910 }
 911
 912 /**
 913  * Test segments and segment references.
 914  */
 915 void TransliteratorTest::TestSegments(void) {
 916     // Array of 3n items
 917     // Each item is <rules>, <input>, <expected output>
 918     UnicodeString DATA[] = {
 919         "([a-z]) '.' ([0-9]) > $2 '-' $1",
 920         "abc.123.xyz.456",
 921         "ab1-c23.xy4-z56",
 922
 923         // nested
 924         "(([a-z])([0-9])) > $1 '.' $2 '.' $3;",
 925         "a1 b2",
 926         "a1.a.1 b2.b.2",
 927     };
 928     int32_t DATA_length = UPRV_LENGTHOF(DATA);
 929
 930     for (int32_t i=0; i<DATA_length; i+=3) {
 931         logln("Pattern: " + prettify(DATA[i]));
 932         UParseError parseError;
 933         UErrorCode status = U_ZERO_ERROR;
 934         Transliterator *t = Transliterator::createFromRules("ID", DATA[i], UTRANS_FORWARD, parseError, status);
 935         if (U_FAILURE(status)) {
 936             errln("FAIL: RBT constructor");
 937         } else {
 938             expect(*t, DATA[i+1], DATA[i+2]);
 939         }
 940         delete t;
 941     }
 942 }
 943
 944 /**
 945  * Test cursor positioning outside of the key
 946  */
 947 void TransliteratorTest::TestCursorOffset(void) {
 948     // Array of 3n items
 949     // Each item is <rules>, <input>, <expected output>
 950     UnicodeString DATA[] = {
 951         "pre {alpha} post > | @ ALPHA ;"
 952         "eALPHA > beta ;"
 953         "pre {beta} post > BETA @@ | ;"
 954         "post > xyz",
 955
 956         "prealphapost prebetapost",
 957
 958         "prbetaxyz preBETApost",
 959     };
 960     int32_t DATA_length = UPRV_LENGTHOF(DATA);
 961
 962     for (int32_t i=0; i<DATA_length; i+=3) {
 963         logln("Pattern: " + prettify(DATA[i]));
 964         UParseError parseError;
 965         UErrorCode status = U_ZERO_ERROR;
 966         Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
 967         if (U_FAILURE(status)) {
 968             errln("FAIL: RBT constructor");
 969         } else {
 970             expect(*t, DATA[i+1], DATA[i+2]);
 971         }
 972         delete t;
 973     }
 974 }
 975
 976 /**
 977  * Test zero length and > 1 char length variable values.  Test
 978  * use of variable refs in UnicodeSets.
 979  */
 980 void TransliteratorTest::TestArbitraryVariableValues(void) {
 981     // Array of 3n items
 982     // Each item is <rules>, <input>, <expected output>
 983     UnicodeString DATA[] = {
 984         "$abe = ab;"
 985         "$pat = x[yY]z;"
 986         "$ll  = 'a-z';"
 987         "$llZ = [$ll];"
 988         "$llY = [$ll$pat];"
 989         "$emp = ;"
 990
 991         "$abe > ABE;"
 992         "$pat > END;"
 993         "$llZ > 1;"
 994         "$llY > 2;"
 995         "7$emp 8 > 9;"
 996         "",
 997
 998         "ab xYzxyz stY78",
 999         "ABE ENDEND 1129",
1000     };
1001     int32_t DATA_length = UPRV_LENGTHOF(DATA);
1002
1003     for (int32_t i=0; i<DATA_length; i+=3) {
1004         logln("Pattern: " + prettify(DATA[i]));
1005         UParseError parseError;
1006         UErrorCode status = U_ZERO_ERROR;
1007         Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
1008         if (U_FAILURE(status)) {
1009             errln("FAIL: RBT constructor");
1010         } else {
1011             expect(*t, DATA[i+1], DATA[i+2]);
1012         }
1013         delete t;
1014     }
1015 }
1016
1017 /**
1018  * Confirm that the contextStart, contextLimit, start, and limit
1019  * behave correctly. J474.
1020  */
1021 void TransliteratorTest::TestPositionHandling(void) {
1022     // Array of 3n items
1023     // Each item is <rules>, <input>, <expected output>
1024     const char* DATA[] = {
1025         "a{t} > SS ; {t}b > UU ; {t} > TT ;",
1026         "xtat txtb", // pos 0,9,0,9
1027         "xTTaSS TTxUUb",
1028
1029         "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1030         "xtat txtb", // pos 2,9,3,8
1031         "xtaSS TTxUUb",
1032
1033         "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1034         "xtat txtb", // pos 3,8,3,8
1035         "xtaTT TTxTTb",
1036     };
1037
1038     // Array of 4n positions -- these go with the DATA array
1039     // They are: contextStart, contextLimit, start, limit
1040     int32_t POS[] = {
1041         0, 9, 0, 9,
1042         2, 9, 3, 8,
1043         3, 8, 3, 8,
1044     };
1045
1046     int32_t n = UPRV_LENGTHOF(DATA) / 3;
1047     for (int32_t i=0; i<n; i++) {
1048         UErrorCode status = U_ZERO_ERROR;
1049         UParseError parseError;
1050         Transliterator *t = Transliterator::createFromRules("<ID>",
1051                                 DATA[3*i], UTRANS_FORWARD, parseError, status);
1052         if (U_FAILURE(status)) {
1053             delete t;
1054             errln("FAIL: RBT constructor");
1055             return;
1056         }
1057         UTransPosition pos;
1058         pos.contextStart= POS[4*i];
1059         pos.contextLimit = POS[4*i+1];
1060         pos.start = POS[4*i+2];
1061         pos.limit = POS[4*i+3];
1062         UnicodeString rsource(DATA[3*i+1]);
1063         t->transliterate(rsource, pos, status);
1064         if (U_FAILURE(status)) {
1065             delete t;
1066             errln("FAIL: transliterate");
1067             return;
1068         }
1069         t->finishTransliteration(rsource, pos);
1070         expectAux(DATA[3*i],
1071                   DATA[3*i+1],
1072                   rsource,
1073                   DATA[3*i+2]);
1074         delete t;
1075     }
1076 }
1077
1078 /**
1079  * Test the Hiragana-Katakana transliterator.
1080  */
1081 void TransliteratorTest::TestHiraganaKatakana(void) {
1082     UParseError parseError;
1083     UErrorCode status = U_ZERO_ERROR;
1084     Transliterator* hk = Transliterator::createInstance("Hiragana-Katakana", UTRANS_FORWARD, parseError, status);
1085     Transliterator* kh = Transliterator::createInstance("Katakana-Hiragana", UTRANS_FORWARD, parseError, status);
1086     if (hk == 0 || kh == 0) {
1087         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
1088         delete hk;
1089         delete kh;
1090         return;
1091     }
1092
1093     // Array of 3n items
1094     // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana>
1095     const char* DATA[] = {
1096         "both",
1097         "\\u3042\\u3090\\u3099\\u3092\\u3050",
1098         "\\u30A2\\u30F8\\u30F2\\u30B0",
1099
1100         "kh",
1101         "\\u307C\\u3051\\u3060\\u3042\\u3093\\u30FC",
1102         "\\u30DC\\u30F6\\u30C0\\u30FC\\u30F3\\u30FC",
1103     };
1104     int32_t DATA_length = UPRV_LENGTHOF(DATA);
1105
1106     for (int32_t i=0; i<DATA_length; i+=3) {
1107         UnicodeString h = CharsToUnicodeString(DATA[i+1]);
1108         UnicodeString k = CharsToUnicodeString(DATA[i+2]);
1109         switch (*DATA[i]) {
1110         case 0x68: //'h': // Hiragana-Katakana
1111             expect(*hk, h, k);
1112             break;
1113         case 0x6B: //'k': // Katakana-Hiragana
1114             expect(*kh, k, h);
1115             break;
1116         case 0x62: //'b': // both
1117             expect(*hk, h, k);
1118             expect(*kh, k, h);
1119             break;
1120         }
1121     }
1122     delete hk;
1123     delete kh;
1124 }
1125
1126 /**
1127  * Test cloning / copy constructor of RBT.
1128  */
1129 void TransliteratorTest::TestCopyJ476(void) {
1130     // The real test here is what happens when the destructors are
1131     // called.  So we let one object get destructed, and check to
1132     // see that its copy still works.
1133     Transliterator *t2 = 0;
1134     {
1135         UParseError parseError;
1136         UErrorCode status = U_ZERO_ERROR;
1137         Transliterator *t1 = Transliterator::createFromRules("t1",
1138             "a>A;b>B;'foo'+>'bar'", UTRANS_FORWARD, parseError, status);
1139         if (U_FAILURE(status)) {
1140             errln("FAIL: RBT constructor");
1141             return;
1142         }
1143         t2 = t1->clone(); // Call copy constructor under the covers.
1144         expect(*t1, "abcfoofoo", "ABcbar");
1145         delete t1;
1146     }
1147     expect(*t2, "abcfoofoo", "ABcbar");
1148     delete t2;
1149 }
1150
1151 /**
1152  * Test inter-Indic transliterators.  These are composed.
1153  * ICU4C Jitterbug 483.
1154  */
1155 void TransliteratorTest::TestInterIndic(void) {
1156     UnicodeString ID("Devanagari-Gujarati", "");
1157     UErrorCode status = U_ZERO_ERROR;
1158     UParseError parseError;
1159     Transliterator* dg = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
1160     if (dg == 0) {
1161         dataerrln("FAIL: createInstance(" + ID + ") returned NULL - " + u_errorName(status));
1162         return;
1163     }
1164     UnicodeString id = dg->getID();
1165     if (id != ID) {
1166         errln("FAIL: createInstance(" + ID + ")->getID() => " + id);
1167     }
1168     UnicodeString dev = CharsToUnicodeString("\\u0901\\u090B\\u0925");
1169     UnicodeString guj = CharsToUnicodeString("\\u0A81\\u0A8B\\u0AA5");
1170     expect(*dg, dev, guj);
1171     delete dg;
1172 }
1173
1174 /**
1175  * Test filter syntax in IDs. (J918)
1176  */
1177 void TransliteratorTest::TestFilterIDs(void) {
1178     // Array of 3n strings:
1179     // <id>, <inverse id>, <input>, <expected output>
1180     const char* DATA[] = {
1181         "[aeiou]Any-Hex", // ID
1182         "[aeiou]Hex-Any", // expected inverse ID
1183         "quizzical",      // src
1184         "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src)
1185
1186         "[aeiou]Any-Hex;[^5]Hex-Any",
1187         "[^5]Any-Hex;[aeiou]Hex-Any",
1188         "quizzical",
1189         "q\\u0075izzical",
1190
1191         "[abc]Null",
1192         "[abc]Null",
1193         "xyz",
1194         "xyz",
1195     };
1196     enum { DATA_length = UPRV_LENGTHOF(DATA) };
1197
1198     for (int i=0; i<DATA_length; i+=4) {
1199         UnicodeString ID(DATA[i], "");
1200         UnicodeString uID(DATA[i+1], "");
1201         UnicodeString data2(DATA[i+2], "");
1202         UnicodeString data3(DATA[i+3], "");
1203         UParseError parseError;
1204         UErrorCode status = U_ZERO_ERROR;
1205         Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
1206         if (t == 0) {
1207             errln("FAIL: createInstance(" + ID + ") returned NULL");
1208             return;
1209         }
1210         expect(*t, data2, data3);
1211
1212         // Check the ID
1213         if (ID != t->getID()) {
1214             errln("FAIL: createInstance(" + ID + ").getID() => " +
1215                   t->getID());
1216         }
1217
1218         // Check the inverse
1219         Transliterator *u = t->createInverse(status);
1220         if (u == 0) {
1221             errln("FAIL: " + ID + ".createInverse() returned NULL");
1222         } else if (u->getID() != uID) {
1223             errln("FAIL: " + ID + ".createInverse().getID() => " +
1224                   u->getID() + ", expected " + uID);
1225         }
1226
1227         delete t;
1228         delete u;
1229     }
1230 }
1231
1232 /**
1233  * Test the case mapping transliterators.
1234  */
1235 void TransliteratorTest::TestCaseMap(void) {
1236     UParseError parseError;
1237     UErrorCode status = U_ZERO_ERROR;
1238     Transliterator* toUpper =
1239         Transliterator::createInstance("Any-Upper[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1240     Transliterator* toLower =
1241         Transliterator::createInstance("Any-Lower[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1242     Transliterator* toTitle =
1243         Transliterator::createInstance("Any-Title[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1244     if (toUpper==0 || toLower==0 || toTitle==0) {
1245         errln("FAIL: createInstance returned NULL");
1246         delete toUpper;
1247         delete toLower;
1248         delete toTitle;
1249         return;
1250     }
1251
1252     expect(*toUpper, "The quick brown fox jumped over the lazy dogs.",
1253            "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS.");
1254     expect(*toLower, "The quIck brown fOX jUMPED OVER THE LAzY dogs.",
1255            "the quick brown foX jumped over the lazY dogs.");
1256     expect(*toTitle, "the quick brown foX can't jump over the laZy dogs.",
1257            "The Quick Brown FoX Can't Jump Over The LaZy Dogs.");
1258
1259     delete toUpper;
1260     delete toLower;
1261     delete toTitle;
1262 }
1263
1264 /**
1265  * Test the name mapping transliterators.
1266  */
1267 void TransliteratorTest::TestNameMap(void) {
1268     UParseError parseError;
1269     UErrorCode status = U_ZERO_ERROR;
1270     Transliterator* uni2name =
1271         Transliterator::createInstance("Any-Name[^abc]", UTRANS_FORWARD, parseError, status);
1272     Transliterator* name2uni =
1273         Transliterator::createInstance("Name-Any", UTRANS_FORWARD, parseError, status);
1274     if (uni2name==0 || name2uni==0) {
1275         errln("FAIL: createInstance returned NULL");
1276         delete uni2name;
1277         delete name2uni;
1278         return;
1279     }
1280
1281     // Careful:  CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1282     expect(*uni2name, CharsToUnicodeString("\\u00A0abc\\u4E01\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF"),
1283            CharsToUnicodeString("\\\\N{NO-BREAK SPACE}abc\\\\N{CJK UNIFIED IDEOGRAPH-4E01}\\\\N{MICRO SIGN}\\\\N{GUJARATI SIGN CANDRABINDU}\\\\N{REPLACEMENT CHARACTER}\\\\N{<control-0004>}\\\\N{<control-0009>}\\\\N{<control-0081>}\\\\N{<noncharacter-FFFF>}"));
1284     expect(*name2uni, UNICODE_STRING_SIMPLE("{\\N { NO-BREAK SPACE}abc\\N{  CJK UNIFIED  IDEOGRAPH-4E01  }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{"),
1285            CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{"));
1286
1287     delete uni2name;
1288     delete name2uni;
1289
1290     // round trip
1291     Transliterator* t =
1292         Transliterator::createInstance("Any-Name;Name-Any", UTRANS_FORWARD, parseError, status);
1293     if (t==0) {
1294         errln("FAIL: createInstance returned NULL");
1295         delete t;
1296         return;
1297     }
1298
1299     // Careful:  CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1300     UnicodeString s = CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{");
1301     expect(*t, s, s);
1302     delete t;
1303 }
1304
1305 /**
1306  * Test liberalized ID syntax.  1006c
1307  */
1308 void TransliteratorTest::TestLiberalizedID(void) {
1309     // Some test cases have an expected getID() value of NULL.  This
1310     // means I have disabled the test case for now.  This stuff is
1311     // still under development, and I haven't decided whether to make
1312     // getID() return canonical case yet.  It will all get rewritten
1313     // with the move to Source-Target/Variant IDs anyway. [aliu]
1314     const char* DATA[] = {
1315         "latin-greek", NULL /*"Latin-Greek"*/, "case insensitivity",
1316         "  Null  ", "Null", "whitespace",
1317         " Latin[a-z]-Greek  ", "[a-z]Latin-Greek", "inline filter",
1318         "  null  ; latin-greek  ", NULL /*"Null;Latin-Greek"*/, "compound whitespace",
1319     };
1320     const int32_t DATA_length = UPRV_LENGTHOF(DATA);
1321     UParseError parseError;
1322     UErrorCode status= U_ZERO_ERROR;
1323     for (int32_t i=0; i<DATA_length; i+=3) {
1324         Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, parseError, status);
1325         if (t == 0) {
1326             dataerrln(UnicodeString("FAIL: ") + DATA[i+2] +
1327                   " cannot create ID \"" + DATA[i] + "\" - " + u_errorName(status));
1328         } else {
1329             UnicodeString exp;
1330             if (DATA[i+1]) {
1331                 exp = UnicodeString(DATA[i+1], "");
1332             }
1333             // Don't worry about getID() if the expected char*
1334             // is NULL -- see above.
1335             if (exp.length() == 0 || exp == t->getID()) {
1336                 logln(UnicodeString("Ok: ") + DATA[i+2] +
1337                       " create ID \"" + DATA[i] + "\" => \"" +
1338                       exp + "\"");
1339             } else {
1340                 errln(UnicodeString("FAIL: ") + DATA[i+2] +
1341                       " create ID \"" + DATA[i] + "\" => \"" +
1342                       t->getID() + "\", exp \"" + exp + "\"");
1343             }
1344             delete t;
1345         }
1346     }
1347 }
1348
1349 /* test for Jitterbug 912 */
1350 void TransliteratorTest::TestCreateInstance(){
1351     const char* FORWARD = "F";
1352     const char* REVERSE = "R";
1353     const char* DATA[] = {
1354         // Column 1: id
1355         // Column 2: direction
1356         // Column 3: expected ID, or "" if expect failure
1357         "Latin-Hangul", REVERSE, "Hangul-Latin", // JB#912
1358
1359         // JB#2689: bad compound causes crash
1360         "InvalidSource-InvalidTarget", FORWARD, "",
1361         "InvalidSource-InvalidTarget", REVERSE, "",
1362         "Hex-Any;InvalidSource-InvalidTarget", FORWARD, "",
1363         "Hex-Any;InvalidSource-InvalidTarget", REVERSE, "",
1364         "InvalidSource-InvalidTarget;Hex-Any", FORWARD, "",
1365         "InvalidSource-InvalidTarget;Hex-Any", REVERSE, "",
1366
1367         NULL
1368     };
1369
1370     for (int32_t i=0; DATA[i]; i+=3) {
1371         UParseError err;
1372         UErrorCode ec = U_ZERO_ERROR;
1373         UnicodeString id(DATA[i]);
1374         UTransDirection dir = (DATA[i+1]==FORWARD)?
1375             UTRANS_FORWARD:UTRANS_REVERSE;
1376         UnicodeString expID(DATA[i+2]);
1377         Transliterator* t =
1378             Transliterator::createInstance(id,dir,err,ec);
1379         UnicodeString newID;
1380         if (t) {
1381             newID = t->getID();
1382         }
1383         UBool ok = (newID == expID);
1384         if (!t) {
1385             newID = u_errorName(ec);
1386         }
1387         if (ok) {
1388             logln((UnicodeString)"Ok: createInstance(" +
1389                   id + "," + DATA[i+1] + ") => " + newID);
1390         } else {
1391             dataerrln((UnicodeString)"FAIL: createInstance(" +
1392                   id + "," + DATA[i+1] + ") => " + newID +
1393                   ", expected " + expID);
1394         }
1395         delete t;
1396     }
1397 }
1398
1399 /**
1400  * Test the normalization transliterator.
1401  */
1402 void TransliteratorTest::TestNormalizationTransliterator() {
1403     // THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.test.normalizer.BasicTest
1404     // PLEASE KEEP THEM IN SYNC WITH BasicTest.
1405     const char* CANON[] = {
1406         // Input               Decomposed            Composed
1407         "cat",                "cat",                "cat"               ,
1408         "\\u00e0ardvark",      "a\\u0300ardvark",     "\\u00e0ardvark"    ,
1409
1410         "\\u1e0a",             "D\\u0307",            "\\u1e0a"            , // D-dot_above
1411         "D\\u0307",            "D\\u0307",            "\\u1e0a"            , // D dot_above
1412
1413         "\\u1e0c\\u0307",       "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D-dot_below dot_above
1414         "\\u1e0a\\u0323",       "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D-dot_above dot_below
1415         "D\\u0307\\u0323",      "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D dot_below dot_above
1416
1417         "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307","\\u1e10\\u0323\\u0307", // D dot_below cedilla dot_above
1418         "D\\u0307\\u0328\\u0323","D\\u0328\\u0323\\u0307","\\u1e0c\\u0328\\u0307", // D dot_above ogonek dot_below
1419
1420         "\\u1E14",             "E\\u0304\\u0300",      "\\u1E14"            , // E-macron-grave
1421         "\\u0112\\u0300",       "E\\u0304\\u0300",      "\\u1E14"            , // E-macron + grave
1422         "\\u00c8\\u0304",       "E\\u0300\\u0304",      "\\u00c8\\u0304"      , // E-grave + macron
1423
1424         "\\u212b",             "A\\u030a",            "\\u00c5"            , // angstrom_sign
1425         "\\u00c5",             "A\\u030a",            "\\u00c5"            , // A-ring
1426
1427         "\\u00fdffin",         "y\\u0301ffin",        "\\u00fdffin"        ,    //updated with 3.0
1428         "\\u00fd\\uFB03n",      "y\\u0301\\uFB03n",     "\\u00fd\\uFB03n"     , //updated with 3.0
1429
1430         "Henry IV",           "Henry IV",           "Henry IV"          ,
1431         "Henry \\u2163",       "Henry \\u2163",       "Henry \\u2163"      ,
1432
1433         "\\u30AC",             "\\u30AB\\u3099",       "\\u30AC"            , // ga (Katakana)
1434         "\\u30AB\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // ka + ten
1435         "\\uFF76\\uFF9E",       "\\uFF76\\uFF9E",       "\\uFF76\\uFF9E"      , // hw_ka + hw_ten
1436         "\\u30AB\\uFF9E",       "\\u30AB\\uFF9E",       "\\u30AB\\uFF9E"      , // ka + hw_ten
1437         "\\uFF76\\u3099",       "\\uFF76\\u3099",       "\\uFF76\\u3099"      , // hw_ka + ten
1438
1439         "A\\u0300\\u0316",      "A\\u0316\\u0300",      "\\u00C0\\u0316"      ,
1440         0 // end
1441     };
1442
1443     const char* COMPAT[] = {
1444         // Input               Decomposed            Composed
1445         "\\uFB4f",             "\\u05D0\\u05DC",       "\\u05D0\\u05DC"     , // Alef-Lamed vs. Alef, Lamed
1446
1447         "\\u00fdffin",         "y\\u0301ffin",        "\\u00fdffin"        ,    //updated for 3.0
1448         "\\u00fd\\uFB03n",      "y\\u0301ffin",        "\\u00fdffin"        , // ffi ligature -> f + f + i
1449
1450         "Henry IV",           "Henry IV",           "Henry IV"          ,
1451         "Henry \\u2163",       "Henry IV",           "Henry IV"          ,
1452
1453         "\\u30AC",             "\\u30AB\\u3099",       "\\u30AC"            , // ga (Katakana)
1454         "\\u30AB\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // ka + ten
1455
1456         "\\uFF76\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // hw_ka + ten
1457         0 // end
1458     };
1459
1460     int32_t i;
1461     UParseError parseError;
1462     UErrorCode status = U_ZERO_ERROR;
1463     Transliterator* NFD = Transliterator::createInstance("NFD", UTRANS_FORWARD, parseError, status);
1464     Transliterator* NFC = Transliterator::createInstance("NFC", UTRANS_FORWARD, parseError, status);
1465     if (!NFD || !NFC) {
1466         dataerrln("FAIL: createInstance failed: %s", u_errorName(status));
1467         delete NFD;
1468         delete NFC;
1469         return;
1470     }
1471     for (i=0; CANON[i]; i+=3) {
1472         UnicodeString in = CharsToUnicodeString(CANON[i]);
1473         UnicodeString expd = CharsToUnicodeString(CANON[i+1]);
1474         UnicodeString expc = CharsToUnicodeString(CANON[i+2]);
1475         expect(*NFD, in, expd);
1476         expect(*NFC, in, expc);
1477     }
1478     delete NFD;
1479     delete NFC;
1480
1481     Transliterator* NFKD = Transliterator::createInstance("NFKD", UTRANS_FORWARD, parseError, status);
1482     Transliterator* NFKC = Transliterator::createInstance("NFKC", UTRANS_FORWARD, parseError, status);
1483     if (!NFKD || !NFKC) {
1484         dataerrln("FAIL: createInstance failed");
1485         delete NFKD;
1486         delete NFKC;
1487         return;
1488     }
1489     for (i=0; COMPAT[i]; i+=3) {
1490         UnicodeString in = CharsToUnicodeString(COMPAT[i]);
1491         UnicodeString expkd = CharsToUnicodeString(COMPAT[i+1]);
1492         UnicodeString expkc = CharsToUnicodeString(COMPAT[i+2]);
1493         expect(*NFKD, in, expkd);
1494         expect(*NFKC, in, expkc);
1495     }
1496     delete NFKD;
1497     delete NFKC;
1498
1499     UParseError pe;
1500     status = U_ZERO_ERROR;
1501     Transliterator *t = Transliterator::createInstance("NFD; [x]Remove",
1502                                                        UTRANS_FORWARD,
1503                                                        pe, status);
1504     if (t == 0) {
1505         errln("FAIL: createInstance failed");
1506     }
1507     expect(*t, CharsToUnicodeString("\\u010dx"),
1508            CharsToUnicodeString("c\\u030C"));
1509     delete t;
1510 }
1511
1512 /**
1513  * Test compound RBT rules.
1514  */
1515 void TransliteratorTest::TestCompoundRBT(void) {
1516     // Careful with spacing and ';' here:  Phrase this exactly
1517     // as toRules() is going to return it.  If toRules() changes
1518     // with regard to spacing or ';', then adjust this string.
1519     UnicodeString rule("::Hex-Any;\n"
1520                        "::Any-Lower;\n"
1521                        "a > '.A.';\n"
1522                        "b > '.B.';\n"
1523                        "::[^t]Any-Upper;", "");
1524     UParseError parseError;
1525     UErrorCode status = U_ZERO_ERROR;
1526     Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, parseError, status);
1527     if (t == 0) {
1528         errln("FAIL: createFromRules failed");
1529         return;
1530     }
1531     expect(*t, UNICODE_STRING_SIMPLE("\\u0043at in the hat, bat on the mat"),
1532            "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");
1533     UnicodeString r;
1534     t->toRules(r, TRUE);
1535     if (r == rule) {
1536         logln((UnicodeString)"OK: toRules() => " + r);
1537     } else {
1538         errln((UnicodeString)"FAIL: toRules() => " + r +
1539               ", expected " + rule);
1540     }
1541     delete t;
1542
1543     // Now test toRules
1544     t = Transliterator::createInstance("Greek-Latin; Latin-Cyrillic", UTRANS_FORWARD, parseError, status);
1545     if (t == 0) {
1546         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
1547         return;
1548     }
1549     UnicodeString exp("::Greek-Latin;\n::Latin-Cyrillic;");
1550     t->toRules(r, TRUE);
1551     if (r != exp) {
1552         errln((UnicodeString)"FAIL: toRules() => " + r +
1553               ", expected " + exp);
1554     } else {
1555         logln((UnicodeString)"OK: toRules() => " + r);
1556     }
1557     delete t;
1558
1559     // Round trip the result of toRules
1560     t = Transliterator::createFromRules("Test", r, UTRANS_FORWARD, parseError, status);
1561     if (t == 0) {
1562         errln("FAIL: createFromRules #2 failed");
1563         return;
1564     } else {
1565         logln((UnicodeString)"OK: createFromRules(" + r + ") succeeded");
1566     }
1567
1568     // Test toRules again
1569     t->toRules(r, TRUE);
1570     if (r != exp) {
1571         errln((UnicodeString)"FAIL: toRules() => " + r +
1572               ", expected " + exp);
1573     } else {
1574         logln((UnicodeString)"OK: toRules() => " + r);
1575     }
1576
1577     delete t;
1578
1579     // Test Foo(Bar) IDs.  Careful with spacing in id; make it conform
1580     // to what the regenerated ID will look like.
1581     UnicodeString id("Upper(Lower);(NFKC)", "");
1582     t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
1583     if (t == 0) {
1584         errln("FAIL: createInstance #2 failed");
1585         return;
1586     }
1587     if (t->getID() == id) {
1588         logln((UnicodeString)"OK: created " + id);
1589     } else {
1590         errln((UnicodeString)"FAIL: createInstance(" + id +
1591               ").getID() => " + t->getID());
1592     }
1593
1594     Transliterator *u = t->createInverse(status);
1595     if (u == 0) {
1596         errln("FAIL: createInverse failed");
1597         delete t;
1598         return;
1599     }
1600     exp = "NFKC();Lower(Upper)";
1601     if (u->getID() == exp) {
1602         logln((UnicodeString)"OK: createInverse(" + id + ") => " +
1603               u->getID());
1604     } else {
1605         errln((UnicodeString)"FAIL: createInverse(" + id + ") => " +
1606               u->getID());
1607     }
1608     delete t;
1609     delete u;
1610 }
1611
1612 /**
1613  * Compound filter semantics were orginially not implemented
1614  * correctly.  Originally, each component filter f(i) is replaced by
1615  * f'(i) = f(i) && g, where g is the filter for the compound
1616  * transliterator.
1617  *
1618  * From Mark:
1619  *
1620  * Suppose and I have a transliterator X. Internally X is
1621  * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A].
1622  *
1623  * The compound should convert all greek characters (through latin) to
1624  * cyrillic, then lowercase the result. The filter should say "don't
1625  * touch 'A' in the original". But because an intermediate result
1626  * happens to go through "A", the Greek Alpha gets hung up.
1627  */
1628 void TransliteratorTest::TestCompoundFilter(void) {
1629     UParseError parseError;
1630     UErrorCode status = U_ZERO_ERROR;
1631     Transliterator *t = Transliterator::createInstance
1632         ("Greek-Latin; Latin-Greek; Lower", UTRANS_FORWARD, parseError, status);
1633     if (t == 0) {
1634         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
1635         return;
1636     }
1637     t->adoptFilter(new UnicodeSet("[^A]", status));
1638     if (U_FAILURE(status)) {
1639         errln("FAIL: UnicodeSet ct failed");
1640         delete t;
1641         return;
1642     }
1643
1644     // Only the 'A' at index 1 should remain unchanged
1645     expect(*t,
1646            CharsToUnicodeString("BA\\u039A\\u0391"),
1647            CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1"));
1648     delete t;
1649 }
1650
1651 void TransliteratorTest::TestRemove(void) {
1652     UParseError parseError;
1653     UErrorCode status = U_ZERO_ERROR;
1654     Transliterator *t = Transliterator::createInstance("Remove[abc]", UTRANS_FORWARD, parseError, status);
1655     if (t == 0) {
1656         errln("FAIL: createInstance failed");
1657         return;
1658     }
1659
1660     expect(*t, "Able bodied baker's cats", "Ale odied ker's ts");
1661
1662     // extra test for RemoveTransliterator::clone(), which at one point wasn't
1663     // duplicating the filter
1664     Transliterator* t2 = t->clone();
1665     expect(*t2, "Able bodied baker's cats", "Ale odied ker's ts");
1666
1667     delete t;
1668     delete t2;
1669 }
1670
1671 void TransliteratorTest::TestToRules(void) {
1672     const char* RBT = "rbt";
1673     const char* SET = "set";
1674     static const char* DATA[] = {
1675         RBT,
1676         "$a=\\u4E61; [$a] > A;",
1677         "[\\u4E61] > A;",
1678
1679         RBT,
1680         "$white=[[:Zs:][:Zl:]]; $white{a} > A;",
1681         "[[:Zs:][:Zl:]]{a} > A;",
1682
1683         SET,
1684         "[[:Zs:][:Zl:]]",
1685         "[[:Zs:][:Zl:]]",
1686
1687         SET,
1688         "[:Ps:]",
1689         "[:Ps:]",
1690
1691         SET,
1692         "[:L:]",
1693         "[:L:]",
1694
1695         SET,
1696         "[[:L:]-[A]]",
1697         "[[:L:]-[A]]",
1698
1699         SET,
1700         "[~[:Lu:][:Ll:]]",
1701         "[~[:Lu:][:Ll:]]",
1702
1703         SET,
1704         "[~[a-z]]",
1705         "[~[a-z]]",
1706
1707         RBT,
1708         "$white=[:Zs:]; $black=[^$white]; $black{a} > A;",
1709         "[^[:Zs:]]{a} > A;",
1710
1711         RBT,
1712         "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;",
1713         "[[a-z]-[:Zs:]]{a} > A;",
1714
1715         RBT,
1716         "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;",
1717         "[[:Zs:]&[a-z]]{a} > A;",
1718
1719         RBT,
1720         "$a=[:Zs:]; $b=[x$a]; $b{a} > A;",
1721         "[x[:Zs:]]{a} > A;",
1722
1723         RBT,
1724         "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"
1725         "$macron = \\u0304 ;"
1726         "$evowel = [aeiouyAEIOUY] ;"
1727         "$iotasub = \\u0345 ;"
1728         "($evowel $macron $accentMinus *) i > | $1 $iotasub ;",
1729         "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;",
1730
1731         RBT,
1732         "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1733         "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1734     };
1735     static const int32_t DATA_length = UPRV_LENGTHOF(DATA);
1736
1737     for (int32_t d=0; d < DATA_length; d+=3) {
1738         if (DATA[d] == RBT) {
1739             // Transliterator test
1740             UParseError parseError;
1741             UErrorCode status = U_ZERO_ERROR;
1742             Transliterator *t = Transliterator::createFromRules("ID",
1743                                                                 UnicodeString(DATA[d+1], -1, US_INV), UTRANS_FORWARD, parseError, status);
1744             if (t == 0) {
1745                 dataerrln("FAIL: createFromRules failed - %s", u_errorName(status));
1746                 return;
1747             }
1748             UnicodeString rules, escapedRules;
1749             t->toRules(rules, FALSE);
1750             t->toRules(escapedRules, TRUE);
1751             UnicodeString expRules = CharsToUnicodeString(DATA[d+2]);
1752             UnicodeString expEscapedRules(DATA[d+2], -1, US_INV);
1753             if (rules == expRules) {
1754                 logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
1755                       " => " + rules);
1756             } else {
1757                 errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
1758                       " => " + rules + ", exp " + expRules);
1759             }
1760             if (escapedRules == expEscapedRules) {
1761                 logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
1762                       " => " + escapedRules);
1763             } else {
1764                 errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
1765                       " => " + escapedRules + ", exp " + expEscapedRules);
1766             }
1767             delete t;
1768
1769         } else {
1770             // UnicodeSet test
1771             UErrorCode status = U_ZERO_ERROR;
1772             UnicodeString pat(DATA[d+1], -1, US_INV);
1773             UnicodeString expToPat(DATA[d+2], -1, US_INV);
1774             UnicodeSet set(pat, status);
1775             if (U_FAILURE(status)) {
1776                 errln("FAIL: UnicodeSet ct failed");
1777                 return;
1778             }
1779             // Adjust spacing etc. as necessary.
1780             UnicodeString toPat;
1781             set.toPattern(toPat);
1782             if (expToPat == toPat) {
1783                 logln((UnicodeString)"Ok: " + pat +
1784                       " => " + toPat);
1785             } else {
1786                 errln((UnicodeString)"FAIL: " + pat +
1787                       " => " + prettify(toPat, TRUE) +
1788                       ", exp " + prettify(pat, TRUE));
1789             }
1790         }
1791     }
1792 }
1793
1794 void TransliteratorTest::TestContext() {
1795     UTransPosition pos = {0, 2, 0, 1}; // cs cl s l
1796     expect("de > x; {d}e > y;",
1797            "de",
1798            "ye",
1799            &pos);
1800
1801     expect("ab{c} > z;",
1802            "xadabdabcy",
1803            "xadabdabzy");
1804 }
1805
1806 void TransliteratorTest::TestSupplemental() {
1807
1808     expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];"
1809                                 "a > $a; $s > i;"),
1810            CharsToUnicodeString("ab\\U0001030Fx"),
1811            CharsToUnicodeString("\\U00010300bix"));
1812
1813     expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];"
1814                                 "$b=[A-Z\\U00010400-\\U0001044D];"
1815                                 "($a)($b) > $2 $1;"),
1816            CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"),
1817            CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301"));
1818
1819     // k|ax\\U00010300xm
1820
1821     // k|a\\U00010400\\U00010300xm
1822     // ky|\\U00010400\\U00010300xm
1823     // ky\\U00010400|\\U00010300xm
1824
1825     // ky\\U00010400|\\U00010300\\U00010400m
1826     // ky\\U00010400y|\\U00010400m
1827     expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];"
1828                                 "$a {x} > | @ \\U00010400;"
1829                                 "{$a} [^\\u0000-\\uFFFF] > y;"),
1830            CharsToUnicodeString("kax\\U00010300xm"),
1831            CharsToUnicodeString("ky\\U00010400y\\U00010400m"));
1832
1833     expectT("Any-Name",
1834            CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"),
1835            UNICODE_STRING_SIMPLE("\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}"));
1836
1837     expectT("Any-Hex/Unicode",
1838            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1839            UNICODE_STRING_SIMPLE("U+10330U+10FF00U+E0061U+00A0"));
1840
1841     expectT("Any-Hex/C",
1842            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1843            UNICODE_STRING_SIMPLE("\\U00010330\\U0010FF00\\U000E0061\\u00A0"));
1844
1845     expectT("Any-Hex/Perl",
1846            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1847            UNICODE_STRING_SIMPLE("\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}"));
1848
1849     expectT("Any-Hex/Java",
1850            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1851            UNICODE_STRING_SIMPLE("\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0"));
1852
1853     expectT("Any-Hex/XML",
1854            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1855            "&#x10330;&#x10FF00;&#xE0061;&#xA0;");
1856
1857     expectT("Any-Hex/XML10",
1858            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1859            "&#66352;&#1113856;&#917601;&#160;");
1860
1861     expectT(UNICODE_STRING_SIMPLE("[\\U000E0000-\\U000E0FFF] Remove"),
1862            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1863            CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0"));
1864 }
1865
1866 void TransliteratorTest::TestQuantifier() {
1867
1868     // Make sure @ in a quantified anteContext works
1869     expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';",
1870            "AAAAAb",
1871            "aaa(aac)");
1872
1873     // Make sure @ in a quantified postContext works
1874     expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';",
1875            "baaaaa",
1876            "caa(aaa)");
1877
1878     // Make sure @ in a quantified postContext with seg ref works
1879     expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';",
1880            "baaaaa",
1881            "baa(aaa)");
1882
1883     // Make sure @ past ante context doesn't enter ante context
1884     UTransPosition pos = {0, 5, 3, 5};
1885     expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';",
1886            "xxxab",
1887            "xxx(ac)",
1888            &pos);
1889
1890     // Make sure @ past post context doesn't pass limit
1891     UTransPosition pos2 = {0, 4, 0, 2};
1892     expect("{b} a+ > c @@ |; x > y; a > A;",
1893            "baxx",
1894            "caxx",
1895            &pos2);
1896
1897     // Make sure @ past post context doesn't enter post context
1898     expect("{b} a+ > c @@ |; x > y; a > A;",
1899            "baxx",
1900            "cayy");
1901
1902     expect("(ab)? c > d;",
1903            "c abc ababc",
1904            "d d abd");
1905
1906     // NOTE: The (ab)+ when referenced just yields a single "ab",
1907     // not the full sequence of them.  This accords with perl behavior.
1908     expect("(ab)+ {x} > '(' $1 ')';",
1909            "x abx ababxy",
1910            "x ab(ab) abab(ab)y");
1911
1912     expect("b+ > x;",
1913            "ac abc abbc abbbc",
1914            "ac axc axc axc");
1915
1916     expect("[abc]+ > x;",
1917            "qac abrc abbcs abtbbc",
1918            "qx xrx xs xtx");
1919
1920     expect("q{(ab)+} > x;",
1921            "qa qab qaba qababc qaba",
1922            "qa qx qxa qxc qxa");
1923
1924     expect("q(ab)* > x;",
1925            "qa qab qaba qababc",
1926            "xa x xa xc");
1927
1928     // NOTE: The (ab)+ when referenced just yields a single "ab",
1929     // not the full sequence of them.  This accords with perl behavior.
1930     expect("q(ab)* > '(' $1 ')';",
1931            "qa qab qaba qababc",
1932            "()a (ab) (ab)a (ab)c");
1933
1934     // 'foo'+ and 'foo'* -- the quantifier should apply to the entire
1935     // quoted string
1936     expect("'ab'+ > x;",
1937            "bb ab ababb",
1938            "bb x xb");
1939
1940     // $foo+ and $foo* -- the quantifier should apply to the entire
1941     // variable reference
1942     expect("$var = ab; $var+ > x;",
1943            "bb ab ababb",
1944            "bb x xb");
1945 }
1946
1947 class TestTrans : public Transliterator {
1948 public:
1949     TestTrans(const UnicodeString& id) : Transliterator(id, 0) {
1950     }
1951     virtual TestTrans* clone(void) const {
1952         return new TestTrans(getID());
1953     }
1954     virtual void handleTransliterate(Replaceable& /*text*/, UTransPosition& offsets,
1955         UBool /*isIncremental*/) const
1956     {
1957         offsets.start = offsets.limit;
1958     }
1959     virtual UClassID getDynamicClassID() const;
1960     static UClassID U_EXPORT2 getStaticClassID();
1961 };
1962 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TestTrans)
1963
1964 /**
1965  * Test Source-Target/Variant.
1966  */
1967 void TransliteratorTest::TestSTV(void) {
1968     int32_t ns = Transliterator::countAvailableSources();
1969     if (ns < 0 || ns > 255) {
1970         errln((UnicodeString)"FAIL: Bad source count: " + ns);
1971         return;
1972     }
1973     int32_t i, j;
1974     for (i=0; i<ns; ++i) {
1975         UnicodeString source;
1976         Transliterator::getAvailableSource(i, source);
1977         logln((UnicodeString)"" + i + ": " + source);
1978         if (source.length() == 0) {
1979             errln("FAIL: empty source");
1980             continue;
1981         }
1982         int32_t nt = Transliterator::countAvailableTargets(source);
1983         if (nt < 0 || nt > 255) {
1984             errln((UnicodeString)"FAIL: Bad target count: " + nt);
1985             continue;
1986         }
1987         for (int32_t j=0; j<nt; ++j) {
1988             UnicodeString target;
1989             Transliterator::getAvailableTarget(j, source, target);
1990             logln((UnicodeString)" " + j + ": " + target);
1991             if (target.length() == 0) {
1992                 errln("FAIL: empty target");
1993                 continue;
1994             }
1995             int32_t nv = Transliterator::countAvailableVariants(source, target);
1996             if (nv < 0 || nv > 255) {
1997                 errln((UnicodeString)"FAIL: Bad variant count: " + nv);
1998                 continue;
1999             }
2000             for (int32_t k=0; k<nv; ++k) {
2001                 UnicodeString variant;
2002                 Transliterator::getAvailableVariant(k, source, target, variant);
2003                 if (variant.length() == 0) {
2004                     logln((UnicodeString)"  " + k + ": <empty>");
2005                 } else {
2006                     logln((UnicodeString)"  " + k + ": " + variant);
2007                 }
2008             }
2009         }
2010     }
2011
2012     // Test registration
2013     const char* IDS[] = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2014     const char* FULL_IDS[] = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2015     const char* SOURCES[] = { NULL, "Seoridf", "Oewoir" };
2016     for (i=0; i<3; ++i) {
2017         Transliterator *t = new TestTrans(IDS[i]);
2018         if (t == 0) {
2019             errln("FAIL: out of memory");
2020             return;
2021         }
2022         if (t->getID() != IDS[i]) {
2023             errln((UnicodeString)"FAIL: ID mismatch for " + IDS[i]);
2024             delete t;
2025             return;
2026         }
2027         Transliterator::registerInstance(t);
2028         UErrorCode status = U_ZERO_ERROR;
2029         t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
2030         if (t == NULL) {
2031             errln((UnicodeString)"FAIL: Registration/creation failed for ID " +
2032                   IDS[i]);
2033         } else {
2034             logln((UnicodeString)"Ok: Registration/creation succeeded for ID " +
2035                   IDS[i]);
2036             delete t;
2037         }
2038         Transliterator::unregister(IDS[i]);
2039         t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
2040         if (t != NULL) {
2041             errln((UnicodeString)"FAIL: Unregistration failed for ID " +
2042                   IDS[i]);
2043             delete t;
2044         }
2045     }
2046
2047     // Make sure getAvailable API reflects removal
2048     int32_t n = Transliterator::countAvailableIDs();
2049     for (i=0; i<n; ++i) {
2050         UnicodeString id = Transliterator::getAvailableID(i);
2051         for (j=0; j<3; ++j) {
2052             if (id.caseCompare(FULL_IDS[j],0)==0) {
2053                 errln((UnicodeString)"FAIL: unregister(" + id + ") failed");
2054             }
2055         }
2056     }
2057     n = Transliterator::countAvailableTargets("Any");
2058     for (i=0; i<n; ++i) {
2059         UnicodeString t;
2060         Transliterator::getAvailableTarget(i, "Any", t);
2061         if (t.caseCompare(IDS[0],0)==0) {
2062             errln((UnicodeString)"FAIL: unregister(Any-" + t + ") failed");
2063         }
2064     }
2065     n = Transliterator::countAvailableSources();
2066     for (i=0; i<n; ++i) {
2067         UnicodeString s;
2068         Transliterator::getAvailableSource(i, s);
2069         for (j=0; j<3; ++j) {
2070             if (SOURCES[j] == NULL) continue;
2071             if (s.caseCompare(SOURCES[j],0)==0) {
2072                 errln((UnicodeString)"FAIL: unregister(" + s + "-*) failed");
2073             }
2074         }
2075     }
2076 }
2077
2078 /**
2079  * Test inverse of Greek-Latin; Title()
2080  */
2081 void TransliteratorTest::TestCompoundInverse(void) {
2082     UParseError parseError;
2083     UErrorCode status = U_ZERO_ERROR;
2084     Transliterator *t = Transliterator::createInstance
2085         ("Greek-Latin; Title()", UTRANS_REVERSE,parseError, status);
2086     if (t == 0) {
2087         dataerrln("FAIL: createInstance - %s", u_errorName(status));
2088         return;
2089     }
2090     UnicodeString exp("(Title);Latin-Greek");
2091     if (t->getID() == exp) {
2092         logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" +
2093               t->getID());
2094     } else {
2095         errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" +
2096               t->getID() + "\", expected \"" + exp + "\"");
2097     }
2098     delete t;
2099 }
2100
2101 /**
2102  * Test NFD chaining with RBT
2103  */
2104 void TransliteratorTest::TestNFDChainRBT() {
2105     UParseError pe;
2106     UErrorCode ec = U_ZERO_ERROR;
2107     Transliterator* t = Transliterator::createFromRules(
2108                                "TEST", "::NFD; aa > Q; a > q;",
2109                                UTRANS_FORWARD, pe, ec);
2110     if (t == NULL || U_FAILURE(ec)) {
2111         dataerrln("FAIL: Transliterator::createFromRules failed with %s", u_errorName(ec));
2112         return;
2113     }
2114     expect(*t, "aa", "Q");
2115     delete t;
2116
2117     // TEMPORARY TESTS -- BEING DEBUGGED
2118 //=-    UnicodeString s, s2;
2119 //=-    t = Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, pe, ec);
2120 //=-    s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2121 //=-    s2 = CharsToUnicodeString("\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D");
2122 //=-    expect(*t, s, s2);
2123 //=-    delete t;
2124 //=-
2125 //=-    t = Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2126 //=-    expect(*t, s2, s);
2127 //=-    delete t;
2128 //=-
2129 //=-    t = Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2130 //=-    s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2131 //=-    expect(*t, s, s);
2132 //=-    delete t;
2133
2134 //    const char* source[] = {
2135 //        /*
2136 //        "\\u015Br\\u012Bmad",
2137 //        "bhagavadg\\u012Bt\\u0101",
2138 //        "adhy\\u0101ya",
2139 //        "arjuna",
2140 //        "vi\\u1E63\\u0101da",
2141 //        "y\\u014Dga",
2142 //        "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2143 //        "uv\\u0101cr\\u0325",
2144 //        */
2145 //        "rmk\\u1E63\\u0113t",
2146 //      //"dharmak\\u1E63\\u0113tr\\u0113",
2147 //        /*
2148 //        "kuruk\\u1E63\\u0113tr\\u0113",
2149 //        "samav\\u0113t\\u0101",
2150 //        "yuyutsava-\\u1E25",
2151 //        "m\\u0101mak\\u0101-\\u1E25",
2152 //     // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2153 //        "kimakurvata",
2154 //        "san\\u0304java",
2155 //        */
2156 //
2157 //        0
2158 //    };
2159 //    const char* expected[] = {
2160 //        /*
2161 //        "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2162 //        "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2163 //        "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2164 //        "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2165 //        "\\u0935\\u093f\\u0937\\u093e\\u0926",
2166 //        "\\u092f\\u094b\\u0917",
2167 //        "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2168 //        "\\u0909\\u0935\\u093E\\u091A\\u0943",
2169 //        */
2170 //        "\\u0927",
2171 //        //"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2172 //        /*
2173 //        "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2174 //        "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2175 //        "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2176 //        "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2177 //    //  "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2178 //        "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2179 //        "\\u0938\\u0902\\u091c\\u0935",
2180 //        */
2181 //        0
2182 //    };
2183 //    UErrorCode status = U_ZERO_ERROR;
2184 //    UParseError parseError;
2185 //    UnicodeString message;
2186 //    Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2187 //    Transliterator* devToLatinToDev=Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2188 //    if(U_FAILURE(status)){
2189 //        errln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2190 //        errln("PreContext: " + prettify(parseError.preContext) + "PostContext: " + prettify( parseError.postContext) );
2191 //        delete latinToDevToLatin;
2192 //        delete devToLatinToDev;
2193 //        return;
2194 //    }
2195 //    UnicodeString gotResult;
2196 //    for(int i= 0; source[i] != 0; i++){
2197 //        gotResult = source[i];
2198 //        expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2199 //        expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2200 //    }
2201 //    delete latinToDevToLatin;
2202 //    delete devToLatinToDev;
2203 }
2204
2205 /**
2206  * Inverse of "Null" should be "Null". (J21)
2207  */
2208 void TransliteratorTest::TestNullInverse() {
2209     UParseError pe;
2210     UErrorCode ec = U_ZERO_ERROR;
2211     Transliterator *t = Transliterator::createInstance("Null", UTRANS_FORWARD, pe, ec);
2212     if (t == 0 || U_FAILURE(ec)) {
2213         errln("FAIL: createInstance");
2214         return;
2215     }
2216     Transliterator *u = t->createInverse(ec);
2217     if (u == 0 || U_FAILURE(ec)) {
2218         errln("FAIL: createInverse");
2219         delete t;
2220         return;
2221     }
2222     if (u->getID() != "Null") {
2223         errln("FAIL: Inverse of Null should be Null");
2224     }
2225     delete t;
2226     delete u;
2227 }
2228
2229 /**
2230  * Check ID of inverse of alias. (J22)
2231  */
2232 void TransliteratorTest::TestAliasInverseID() {
2233     UnicodeString ID("Latin-Hangul", ""); // This should be any alias ID with an inverse
2234     UParseError pe;
2235     UErrorCode ec = U_ZERO_ERROR;
2236     Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
2237     if (t == 0 || U_FAILURE(ec)) {
2238         dataerrln("FAIL: createInstance - %s", u_errorName(ec));
2239         return;
2240     }
2241     Transliterator *u = t->createInverse(ec);
2242     if (u == 0 || U_FAILURE(ec)) {
2243         errln("FAIL: createInverse");
2244         delete t;
2245         return;
2246     }
2247     UnicodeString exp = "Hangul-Latin";
2248     UnicodeString got = u->getID();
2249     if (got != exp) {
2250         errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
2251               ", expected " + exp);
2252     }
2253     delete t;
2254     delete u;
2255 }
2256
2257 /**
2258  * Test IDs of inverses of compound transliterators. (J20)
2259  */
2260 void TransliteratorTest::TestCompoundInverseID() {
2261     UnicodeString ID = "Latin-Jamo;NFC(NFD)";
2262     UParseError pe;
2263     UErrorCode ec = U_ZERO_ERROR;
2264     Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
2265     if (t == 0 || U_FAILURE(ec)) {
2266         dataerrln("FAIL: createInstance - %s", u_errorName(ec));
2267         return;
2268     }
2269     Transliterator *u = t->createInverse(ec);
2270     if (u == 0 || U_FAILURE(ec)) {
2271         errln("FAIL: createInverse");
2272         delete t;
2273         return;
2274     }
2275     UnicodeString exp = "NFD(NFC);Jamo-Latin";
2276     UnicodeString got = u->getID();
2277     if (got != exp) {
2278         errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
2279               ", expected " + exp);
2280     }
2281     delete t;
2282     delete u;
2283 }
2284
2285 /**
2286  * Test undefined variable.
2287
2288  */
2289 void TransliteratorTest::TestUndefinedVariable() {
2290     UnicodeString rule = "$initial } a <> \\u1161;";
2291     UParseError pe;
2292     UErrorCode ec = U_ZERO_ERROR;
2293     Transliterator *t = Transliterator::createFromRules("<ID>", rule, UTRANS_FORWARD, pe, ec);
2294     delete t;
2295     if (U_FAILURE(ec)) {
2296         logln((UnicodeString)"OK: Got exception for " + rule + ", as expected: " +
2297               u_errorName(ec));
2298         return;
2299     }
2300     errln((UnicodeString)"Fail: bogus rule " + rule + " compiled with error " +
2301           u_errorName(ec));
2302 }
2303
2304 /**
2305  * Test empty context.
2306  */
2307 void TransliteratorTest::TestEmptyContext() {
2308     expect(" { a } > b;", "xay a ", "xby b ");
2309 }
2310
2311 /**
2312 * Test compound filter ID syntax
2313 */
2314 void TransliteratorTest::TestCompoundFilterID(void) {
2315     static const char* DATA[] = {
2316         // Col. 1 = ID or rule set (latter must start with #)
2317
2318         // = columns > 1 are null if expect col. 1 to be illegal =
2319
2320         // Col. 2 = direction, "F..." or "R..."
2321         // Col. 3 = source string
2322         // Col. 4 = exp result
2323
2324         "[abc]; [abc]", NULL, NULL, NULL, // multiple filters
2325         "Latin-Greek; [abc];", NULL, NULL, NULL, // misplaced filter
2326         "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\\u0392c",
2327         "[b]; (Lower); Latin-Greek; Upper(); ([\\u0392])", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2328         "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\\u0392c",
2329         "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\\u0392]);", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2330         NULL,
2331     };
2332
2333     for (int32_t i=0; DATA[i]; i+=4) {
2334         UnicodeString id = CharsToUnicodeString(DATA[i]);
2335         UTransDirection direction = (DATA[i+1] != NULL && DATA[i+1][0] == 'R') ?
2336             UTRANS_REVERSE : UTRANS_FORWARD;
2337         UnicodeString source;
2338         UnicodeString exp;
2339         if (DATA[i+2] != NULL) {
2340             source = CharsToUnicodeString(DATA[i+2]);
2341             exp = CharsToUnicodeString(DATA[i+3]);
2342         }
2343         UBool expOk = (DATA[i+1] != NULL);
2344         LocalPointer<Transliterator> t;
2345         UParseError pe;
2346         UErrorCode ec = U_ZERO_ERROR;
2347         if (id.charAt(0) == 0x23/*#*/) {
2348             t.adoptInstead(Transliterator::createFromRules("ID", id, direction, pe, ec));
2349         } else {
2350             t.adoptInstead(Transliterator::createInstance(id, direction, pe, ec));
2351         }
2352         UBool ok = (t.isValid() && U_SUCCESS(ec));
2353         UnicodeString transID;
2354         if (t.isValid()) {
2355             transID = t->getID();
2356         }
2357         else {
2358             transID = UnicodeString("NULL", "");
2359         }
2360         if (ok == expOk) {
2361             logln((UnicodeString)"Ok: " + id + " => " + transID + ", " +
2362                   u_errorName(ec));
2363             if (source.length() != 0) {
2364                 expect(*t, source, exp);
2365             }
2366         } else {
2367             dataerrln((UnicodeString)"FAIL: " + id + " => " + transID + ", " +
2368                   u_errorName(ec));
2369         }
2370     }
2371 }
2372
2373 /**
2374  * Test new property set syntax
2375  */
2376 void TransliteratorTest::TestPropertySet() {
2377     expect(UNICODE_STRING_SIMPLE("a>A; \\p{Lu}>x; \\p{ANY}>y;"), "abcDEF", "Ayyxxx");
2378     expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9",
2379            "[ a stitch ]\n[ in time ]\r[ saves 9]");
2380 }
2381
2382 /**
2383  * Test various failure points of the new 2.0 engine.
2384  */
2385 void TransliteratorTest::TestNewEngine() {
2386     UParseError pe;
2387     UErrorCode ec = U_ZERO_ERROR;
2388     Transliterator *t = Transliterator::createInstance("Latin-Hiragana", UTRANS_FORWARD, pe, ec);
2389     if (t == 0 || U_FAILURE(ec)) {
2390         dataerrln("FAIL: createInstance Latin-Hiragana - %s", u_errorName(ec));
2391         return;
2392     }
2393     // Katakana should be untouched
2394     expect(*t, CharsToUnicodeString("a\\u3042\\u30A2"),
2395            CharsToUnicodeString("\\u3042\\u3042\\u30A2"));
2396
2397     delete t;
2398
2399 #if 1
2400     // This test will only work if Transliterator.ROLLBACK is
2401     // true.  Otherwise, this test will fail, revealing a
2402     // limitation of global filters in incremental mode.
2403     Transliterator *a =
2404         Transliterator::createFromRules("a_to_A", "a > A;", UTRANS_FORWARD, pe, ec);
2405     Transliterator *A =
2406         Transliterator::createFromRules("A_to_b", "A > b;", UTRANS_FORWARD, pe, ec);
2407     if (U_FAILURE(ec)) {
2408         delete a;
2409         delete A;
2410         return;
2411     }
2412
2413     Transliterator* array[3];
2414     array[0] = a;
2415     array[1] = Transliterator::createInstance("NFD", UTRANS_FORWARD, pe, ec);
2416     array[2] = A;
2417     if (U_FAILURE(ec)) {
2418         errln("FAIL: createInstance NFD");
2419         delete a;
2420         delete A;
2421         delete array[1];
2422         return;
2423     }
2424
2425     t = new CompoundTransliterator(array, 3, new UnicodeSet("[:Ll:]", ec));
2426     if (U_FAILURE(ec)) {
2427         errln("FAIL: UnicodeSet constructor");
2428         delete a;
2429         delete A;
2430         delete array[1];
2431         delete t;
2432         return;
2433     }
2434
2435     expect(*t, "aAaA", "bAbA");
2436
2437     assertTrue("countElements", t->countElements() == 3);
2438     assertEquals("getElement(0)", t->getElement(0, ec).getID(), "a_to_A");
2439     assertEquals("getElement(1)", t->getElement(1, ec).getID(), "NFD");
2440     assertEquals("getElement(2)", t->getElement(2, ec).getID(), "A_to_b");
2441     assertSuccess("getElement", ec);
2442
2443     delete a;
2444     delete A;
2445     delete array[1];
2446     delete t;
2447 #endif
2448
2449     expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;",
2450            "a",
2451            "ax");
2452
2453     UnicodeString gr = CharsToUnicodeString(
2454         "$ddot = \\u0308 ;"
2455         "$lcgvowel = [\\u03b1\\u03b5\\u03b7\\u03b9\\u03bf\\u03c5\\u03c9] ;"
2456         "$rough = \\u0314 ;"
2457         "($lcgvowel+ $ddot?) $rough > h | $1 ;"
2458         "\\u03b1 <> a ;"
2459         "$rough <> h ;");
2460
2461     expect(gr, CharsToUnicodeString("\\u03B1\\u0314"), "ha");
2462 }
2463
2464 /**
2465  * Test quantified segment behavior.  We want:
2466  * ([abc])+ > x $1 x; applied to "cba" produces "xax"
2467  */
2468 void TransliteratorTest::TestQuantifiedSegment(void) {
2469     // The normal case
2470     expect("([abc]+) > x $1 x;", "cba", "xcbax");
2471
2472     // The tricky case; the quantifier is around the segment
2473     expect("([abc])+ > x $1 x;", "cba", "xax");
2474
2475     // Tricky case in reverse direction
2476     expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax");
2477
2478     // Check post-context segment
2479     expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba");
2480
2481     // Test toRule/toPattern for non-quantified segment.
2482     // Careful with spacing here.
2483     UnicodeString r("([a-c]){q} > x $1 x;");
2484     UParseError pe;
2485     UErrorCode ec = U_ZERO_ERROR;
2486     Transliterator* t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
2487     if (U_FAILURE(ec)) {
2488         errln("FAIL: createFromRules");
2489         delete t;
2490         return;
2491     }
2492     UnicodeString rr;
2493     t->toRules(rr, TRUE);
2494     if (r != rr) {
2495         errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
2496     } else {
2497         logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
2498     }
2499     delete t;
2500
2501     // Test toRule/toPattern for quantified segment.
2502     // Careful with spacing here.
2503     r = "([a-c])+{q} > x $1 x;";
2504     t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
2505     if (U_FAILURE(ec)) {
2506         errln("FAIL: createFromRules");
2507         delete t;
2508         return;
2509     }
2510     t->toRules(rr, TRUE);
2511     if (r != rr) {
2512         errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
2513     } else {
2514         logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
2515     }
2516     delete t;
2517 }
2518
2519 //======================================================================
2520 // Ram's tests
2521 //======================================================================
2522 void TransliteratorTest::TestDevanagariLatinRT(){
2523     const int MAX_LEN= 52;
2524     const char* const source[MAX_LEN] = {
2525         "bh\\u0101rata",
2526         "kra",
2527         "k\\u1E63a",
2528         "khra",
2529         "gra",
2530         "\\u1E45ra",
2531         "cra",
2532         "chra",
2533         "j\\u00F1a",
2534         "jhra",
2535         "\\u00F1ra",
2536         "\\u1E6Dya",
2537         "\\u1E6Dhra",
2538         "\\u1E0Dya",
2539       //"r\\u0323ya", // \u095c is not valid in Devanagari
2540         "\\u1E0Dhya",
2541         "\\u1E5Bhra",
2542         "\\u1E47ra",
2543         "tta",
2544         "thra",
2545         "dda",
2546         "dhra",
2547         "nna",
2548         "pra",
2549         "phra",
2550         "bra",
2551         "bhra",
2552         "mra",
2553         "\\u1E49ra",
2554       //"l\\u0331ra",
2555         "yra",
2556         "\\u1E8Fra",
2557       //"l-",
2558         "vra",
2559         "\\u015Bra",
2560         "\\u1E63ra",
2561         "sra",
2562         "hma",
2563         "\\u1E6D\\u1E6Da",
2564         "\\u1E6D\\u1E6Dha",
2565         "\\u1E6Dh\\u1E6Dha",
2566         "\\u1E0D\\u1E0Da",
2567         "\\u1E0D\\u1E0Dha",
2568         "\\u1E6Dya",
2569         "\\u1E6Dhya",
2570         "\\u1E0Dya",
2571         "\\u1E0Dhya",
2572         // Not roundtrippable --
2573         // \\u0939\\u094d\\u094d\\u092E  - hma
2574         // \\u0939\\u094d\\u092E         - hma
2575         // CharsToUnicodeString("hma"),
2576         "hya",
2577         "\\u015Br\\u0325",
2578         "\\u015Bca",
2579         "\\u0115",
2580         "san\\u0304j\\u012Bb s\\u0113nagupta",
2581         "\\u0101nand vaddir\\u0101ju",
2582         "\\u0101",
2583         "a"
2584     };
2585     const char* const expected[MAX_LEN] = {
2586         "\\u092D\\u093E\\u0930\\u0924",   /* bha\\u0304rata */
2587         "\\u0915\\u094D\\u0930",          /* kra         */
2588         "\\u0915\\u094D\\u0937",          /* ks\\u0323a  */
2589         "\\u0916\\u094D\\u0930",          /* khra        */
2590         "\\u0917\\u094D\\u0930",          /* gra         */
2591         "\\u0919\\u094D\\u0930",          /* n\\u0307ra  */
2592         "\\u091A\\u094D\\u0930",          /* cra         */
2593         "\\u091B\\u094D\\u0930",          /* chra        */
2594         "\\u091C\\u094D\\u091E",          /* jn\\u0303a  */
2595         "\\u091D\\u094D\\u0930",          /* jhra        */
2596         "\\u091E\\u094D\\u0930",          /* n\\u0303ra  */
2597         "\\u091F\\u094D\\u092F",          /* t\\u0323ya  */
2598         "\\u0920\\u094D\\u0930",          /* t\\u0323hra */
2599         "\\u0921\\u094D\\u092F",          /* d\\u0323ya  */
2600       //"\\u095C\\u094D\\u092F",        /* r\\u0323ya  */ // \u095c is not valid in Devanagari
2601         "\\u0922\\u094D\\u092F",          /* d\\u0323hya */
2602         "\\u0922\\u093C\\u094D\\u0930",   /* r\\u0323hra */
2603         "\\u0923\\u094D\\u0930",          /* n\\u0323ra  */
2604         "\\u0924\\u094D\\u0924",          /* tta         */
2605         "\\u0925\\u094D\\u0930",          /* thra        */
2606         "\\u0926\\u094D\\u0926",          /* dda         */
2607         "\\u0927\\u094D\\u0930",          /* dhra        */
2608         "\\u0928\\u094D\\u0928",          /* nna         */
2609         "\\u092A\\u094D\\u0930",          /* pra         */
2610         "\\u092B\\u094D\\u0930",          /* phra        */
2611         "\\u092C\\u094D\\u0930",          /* bra         */
2612         "\\u092D\\u094D\\u0930",          /* bhra        */
2613         "\\u092E\\u094D\\u0930",          /* mra         */
2614         "\\u0929\\u094D\\u0930",          /* n\\u0331ra  */
2615       //"\\u0934\\u094D\\u0930",        /* l\\u0331ra  */
2616         "\\u092F\\u094D\\u0930",          /* yra         */
2617         "\\u092F\\u093C\\u094D\\u0930",   /* y\\u0307ra  */
2618       //"l-",
2619         "\\u0935\\u094D\\u0930",          /* vra         */
2620         "\\u0936\\u094D\\u0930",          /* s\\u0301ra  */
2621         "\\u0937\\u094D\\u0930",          /* s\\u0323ra  */
2622         "\\u0938\\u094D\\u0930",          /* sra         */
2623         "\\u0939\\u094d\\u092E",          /* hma         */
2624         "\\u091F\\u094D\\u091F",          /* t\\u0323t\\u0323a  */
2625         "\\u091F\\u094D\\u0920",          /* t\\u0323t\\u0323ha */
2626         "\\u0920\\u094D\\u0920",          /* t\\u0323ht\\u0323ha*/
2627         "\\u0921\\u094D\\u0921",          /* d\\u0323d\\u0323a  */
2628         "\\u0921\\u094D\\u0922",          /* d\\u0323d\\u0323ha */
2629         "\\u091F\\u094D\\u092F",          /* t\\u0323ya  */
2630         "\\u0920\\u094D\\u092F",          /* t\\u0323hya */
2631         "\\u0921\\u094D\\u092F",          /* d\\u0323ya  */
2632         "\\u0922\\u094D\\u092F",          /* d\\u0323hya */
2633      // "hma",                         /* hma         */
2634         "\\u0939\\u094D\\u092F",          /* hya         */
2635         "\\u0936\\u0943",                 /* s\\u0301r\\u0325a  */
2636         "\\u0936\\u094D\\u091A",          /* s\\u0301ca  */
2637         "\\u090d",                        /* e\\u0306    */
2638         "\\u0938\\u0902\\u091C\\u0940\\u092C\\u094D \\u0938\\u0947\\u0928\\u0917\\u0941\\u092A\\u094D\\u0924",
2639         "\\u0906\\u0928\\u0902\\u0926\\u094D \\u0935\\u0926\\u094D\\u0926\\u093F\\u0930\\u093E\\u091C\\u0941",
2640         "\\u0906",
2641         "\\u0905",
2642     };
2643     UErrorCode status = U_ZERO_ERROR;
2644     UParseError parseError;
2645     UnicodeString message;
2646     Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2647     Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2648     if(U_FAILURE(status)){
2649         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2650         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2651         return;
2652     }
2653     UnicodeString gotResult;
2654     for(int i= 0; i<MAX_LEN; i++){
2655         gotResult = source[i];
2656         expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2657         expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2658     }
2659     delete latinToDev;
2660     delete devToLatin;
2661 }
2662
2663 void TransliteratorTest::TestTeluguLatinRT(){
2664     const int MAX_LEN=10;
2665     const char* const source[MAX_LEN] = {
2666         "raghur\\u0101m vi\\u015Bvan\\u0101dha",                         /* Raghuram Viswanadha    */
2667         "\\u0101nand vaddir\\u0101ju",                                   /* Anand Vaddiraju        */
2668         "r\\u0101j\\u012Bv ka\\u015Barab\\u0101da",                      /* Rajeev Kasarabada      */
2669         "san\\u0304j\\u012Bv ka\\u015Barab\\u0101da",                    /* sanjeev kasarabada     */
2670         "san\\u0304j\\u012Bb sen'gupta",                                 /* sanjib sengupata       */
2671         "amar\\u0113ndra hanum\\u0101nula",                              /* Amarendra hanumanula   */
2672         "ravi kum\\u0101r vi\\u015Bvan\\u0101dha",                       /* Ravi Kumar Viswanadha  */
2673         "\\u0101ditya kandr\\u0113gula",                                 /* Aditya Kandregula      */
2674         "\\u015Br\\u012Bdhar ka\\u1E47\\u1E6Dama\\u015Be\\u1E6D\\u1E6Di",/* Shridhar Kantamsetty   */
2675         "m\\u0101dhav de\\u015Be\\u1E6D\\u1E6Di"                         /* Madhav Desetty         */
2676     };
2677
2678     const char* const expected[MAX_LEN] = {
2679         "\\u0c30\\u0c18\\u0c41\\u0c30\\u0c3e\\u0c2e\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2680         "\\u0c06\\u0c28\\u0c02\\u0c26\\u0c4d \\u0C35\\u0C26\\u0C4D\\u0C26\\u0C3F\\u0C30\\u0C3E\\u0C1C\\u0C41",
2681         "\\u0c30\\u0c3e\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2682         "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2683         "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c2c\\u0c4d \\u0c38\\u0c46\\u0c28\\u0c4d\\u0c17\\u0c41\\u0c2a\\u0c4d\\u0c24",
2684         "\\u0c05\\u0c2e\\u0c30\\u0c47\\u0c02\\u0c26\\u0c4d\\u0c30 \\u0c39\\u0c28\\u0c41\\u0c2e\\u0c3e\\u0c28\\u0c41\\u0c32",
2685         "\\u0c30\\u0c35\\u0c3f \\u0c15\\u0c41\\u0c2e\\u0c3e\\u0c30\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2686         "\\u0c06\\u0c26\\u0c3f\\u0c24\\u0c4d\\u0c2f \\u0C15\\u0C02\\u0C26\\u0C4D\\u0C30\\u0C47\\u0C17\\u0C41\\u0c32",
2687         "\\u0c36\\u0c4d\\u0c30\\u0c40\\u0C27\\u0C30\\u0C4D \\u0c15\\u0c02\\u0c1f\\u0c2e\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2688         "\\u0c2e\\u0c3e\\u0c27\\u0c35\\u0c4d \\u0c26\\u0c46\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2689     };
2690
2691     UErrorCode status = U_ZERO_ERROR;
2692     UParseError parseError;
2693     UnicodeString message;
2694     Transliterator* latinToDev=Transliterator::createInstance("Latin-Telugu", UTRANS_FORWARD, parseError, status);
2695     Transliterator* devToLatin=Transliterator::createInstance("Telugu-Latin", UTRANS_FORWARD, parseError, status);
2696     if(U_FAILURE(status)){
2697         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2698         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2699         return;
2700     }
2701     UnicodeString gotResult;
2702     for(int i= 0; i<MAX_LEN; i++){
2703         gotResult = source[i];
2704         expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2705         expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2706     }
2707     delete latinToDev;
2708     delete devToLatin;
2709 }
2710
2711 void TransliteratorTest::TestSanskritLatinRT(){
2712     const int MAX_LEN =16;
2713     const char* const source[MAX_LEN] = {
2714         "rmk\\u1E63\\u0113t",
2715         "\\u015Br\\u012Bmad",
2716         "bhagavadg\\u012Bt\\u0101",
2717         "adhy\\u0101ya",
2718         "arjuna",
2719         "vi\\u1E63\\u0101da",
2720         "y\\u014Dga",
2721         "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2722         "uv\\u0101cr\\u0325",
2723         "dharmak\\u1E63\\u0113tr\\u0113",
2724         "kuruk\\u1E63\\u0113tr\\u0113",
2725         "samav\\u0113t\\u0101",
2726         "yuyutsava\\u1E25",
2727         "m\\u0101mak\\u0101\\u1E25",
2728     // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2729         "kimakurvata",
2730         "san\\u0304java",
2731     };
2732     const char* const expected[MAX_LEN] = {
2733         "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2734         "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2735         "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2736         "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2737         "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2738         "\\u0935\\u093f\\u0937\\u093e\\u0926",
2739         "\\u092f\\u094b\\u0917",
2740         "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2741         "\\u0909\\u0935\\u093E\\u091A\\u0943",
2742         "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2743         "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2744         "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2745         "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2746         "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2747     //"\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2748         "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2749         "\\u0938\\u0902\\u091c\\u0935",
2750     };
2751     UErrorCode status = U_ZERO_ERROR;
2752     UParseError parseError;
2753     UnicodeString message;
2754     Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2755     Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2756     if(U_FAILURE(status)){
2757         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2758         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2759         return;
2760     }
2761     UnicodeString gotResult;
2762     for(int i= 0; i<MAX_LEN; i++){
2763         gotResult = source[i];
2764         expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2765         expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2766     }
2767     delete latinToDev;
2768     delete devToLatin;
2769 }
2770
2771
2772 void TransliteratorTest::TestCompoundLatinRT(){
2773     const char* const source[] = {
2774         "rmk\\u1E63\\u0113t",
2775         "\\u015Br\\u012Bmad",
2776         "bhagavadg\\u012Bt\\u0101",
2777         "adhy\\u0101ya",
2778         "arjuna",
2779         "vi\\u1E63\\u0101da",
2780         "y\\u014Dga",
2781         "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2782         "uv\\u0101cr\\u0325",
2783         "dharmak\\u1E63\\u0113tr\\u0113",
2784         "kuruk\\u1E63\\u0113tr\\u0113",
2785         "samav\\u0113t\\u0101",
2786         "yuyutsava\\u1E25",
2787         "m\\u0101mak\\u0101\\u1E25",
2788      // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2789         "kimakurvata",
2790         "san\\u0304java"
2791     };
2792     const int MAX_LEN = UPRV_LENGTHOF(source);
2793     const char* const expected[MAX_LEN] = {
2794         "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2795         "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2796         "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2797         "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2798         "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2799         "\\u0935\\u093f\\u0937\\u093e\\u0926",
2800         "\\u092f\\u094b\\u0917",
2801         "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2802         "\\u0909\\u0935\\u093E\\u091A\\u0943",
2803         "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2804         "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2805         "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2806         "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2807         "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2808     //  "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2809         "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2810         "\\u0938\\u0902\\u091c\\u0935"
2811     };
2812     if(MAX_LEN != UPRV_LENGTHOF(expected)) {
2813         errln("error in TestCompoundLatinRT: source[] and expected[] have different lengths!");
2814         return;
2815     }
2816
2817     UErrorCode status = U_ZERO_ERROR;
2818     UParseError parseError;
2819     UnicodeString message;
2820     Transliterator* devToLatinToDev  =Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2821     Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2822     Transliterator* devToTelToDev    =Transliterator::createInstance("Devanagari-Telugu;Telugu-Devanagari", UTRANS_FORWARD, parseError, status);
2823     Transliterator* latinToTelToLatin=Transliterator::createInstance("Latin-Telugu;Telugu-Latin", UTRANS_FORWARD, parseError, status);
2824
2825     if(U_FAILURE(status)){
2826         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2827         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2828         return;
2829     }
2830     UnicodeString gotResult;
2831     for(int i= 0; i<MAX_LEN; i++){
2832         gotResult = source[i];
2833         expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2834         expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2835         expect(*latinToTelToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2836
2837     }
2838     delete(latinToDevToLatin);
2839     delete(devToLatinToDev);
2840     delete(devToTelToDev);
2841     delete(latinToTelToLatin);
2842 }
2843
2844 /**
2845  * Test Gurmukhi-Devanagari Tippi and Bindi
2846  */
2847 void TransliteratorTest::TestGurmukhiDevanagari(){
2848     // the rule says:
2849     // (\u0902) (when preceded by vowel)      --->  (\u0A02)
2850     // (\u0902) (when preceded by consonant)  --->  (\u0A70)
2851     UErrorCode status = U_ZERO_ERROR;
2852     UnicodeSet vowel(UnicodeString("[\\u0905-\\u090A \\u090F\\u0910\\u0913\\u0914 \\u093e-\\u0942\\u0947\\u0948\\u094B\\u094C\\u094D]", -1, US_INV).unescape(), status);
2853     UnicodeSet non_vowel(UnicodeString("[\\u0915-\\u0928\\u092A-\\u0930]", -1, US_INV).unescape(), status);
2854     UParseError parseError;
2855
2856     UnicodeSetIterator vIter(vowel);
2857     UnicodeSetIterator nvIter(non_vowel);
2858     Transliterator* trans = Transliterator::createInstance("Devanagari-Gurmukhi",UTRANS_FORWARD, parseError, status);
2859     if(U_FAILURE(status)) {
2860       dataerrln("Error creating transliterator %s", u_errorName(status));
2861       delete trans;
2862       return;
2863     }
2864     UnicodeString src (" \\u0902", -1, US_INV);
2865     UnicodeString expected(" \\u0A02", -1, US_INV);
2866     src = src.unescape();
2867     expected= expected.unescape();
2868
2869     while(vIter.next()){
2870         src.setCharAt(0,(UChar) vIter.getCodepoint());
2871         expected.setCharAt(0,(UChar) (vIter.getCodepoint()+0x0100));
2872         expect(*trans,src,expected);
2873     }
2874
2875     expected.setCharAt(1,0x0A70);
2876     while(nvIter.next()){
2877         //src.setCharAt(0,(char) nvIter.codepoint);
2878         src.setCharAt(0,(UChar)nvIter.getCodepoint());
2879         expected.setCharAt(0,(UChar) (nvIter.getCodepoint()+0x0100));
2880         expect(*trans,src,expected);
2881     }
2882     delete trans;
2883 }
2884 /**
2885  * Test instantiation from a locale.
2886  */
2887 void TransliteratorTest::TestLocaleInstantiation(void) {
2888     UParseError pe;
2889     UErrorCode ec = U_ZERO_ERROR;
2890     Transliterator *t = Transliterator::createInstance("ru_RU-Latin", UTRANS_FORWARD, pe, ec);
2891     if (U_FAILURE(ec)) {
2892         dataerrln("FAIL: createInstance(ru_RU-Latin) - %s", u_errorName(ec));
2893         delete t;
2894         return;
2895     }
2896     expect(*t, CharsToUnicodeString("\\u0430"), "a");
2897     delete t;
2898
2899     t = Transliterator::createInstance("en-el", UTRANS_FORWARD, pe, ec);
2900     if (U_FAILURE(ec)) {
2901         errln("FAIL: createInstance(en-el)");
2902         delete t;
2903         return;
2904     }
2905     expect(*t, "a", CharsToUnicodeString("\\u03B1"));
2906     delete t;
2907 }
2908
2909 /**
2910  * Test title case handling of accent (should ignore accents)
2911  */
2912 void TransliteratorTest::TestTitleAccents(void) {
2913     UParseError pe;
2914     UErrorCode ec = U_ZERO_ERROR;
2915     Transliterator *t = Transliterator::createInstance("Title", UTRANS_FORWARD, pe, ec);
2916     if (U_FAILURE(ec)) {
2917         errln("FAIL: createInstance(Title)");
2918         delete t;
2919         return;
2920     }
2921     expect(*t, CharsToUnicodeString("a\\u0300b can't abe"), CharsToUnicodeString("A\\u0300b Can't Abe"));
2922     delete t;
2923 }
2924
2925 /**
2926  * Basic test of a locale resource based rule.
2927  */
2928 void TransliteratorTest::TestLocaleResource() {
2929     const char* DATA[] = {
2930         // id                    from               to
2931         //"Latin-Greek/UNGEGN",    "b",               "\\u03bc\\u03c0",
2932         "Latin-el",              "b",               "\\u03bc\\u03c0",
2933         "Latin-Greek",           "b",               "\\u03B2",
2934         "Greek-Latin/UNGEGN",    "\\u03B2",         "v",
2935         "el-Latin",              "\\u03B2",         "v",
2936         "Greek-Latin",           "\\u03B2",         "b",
2937     };
2938     const int32_t DATA_length = UPRV_LENGTHOF(DATA);
2939     for (int32_t i=0; i<DATA_length; i+=3) {
2940         UParseError pe;
2941         UErrorCode ec = U_ZERO_ERROR;
2942         Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, pe, ec);
2943         if (U_FAILURE(ec)) {
2944             dataerrln((UnicodeString)"FAIL: createInstance(" + DATA[i] + ") - " + u_errorName(ec));
2945             delete t;
2946             continue;
2947         }
2948         expect(*t, CharsToUnicodeString(DATA[i+1]),
2949                CharsToUnicodeString(DATA[i+2]));
2950         delete t;
2951     }
2952 }
2953
2954 /**
2955  * Make sure parse errors reference the right line.
2956  */
2957 void TransliteratorTest::TestParseError() {
2958     static const char* rule =
2959         "a > b;\n"
2960         "# more stuff\n"
2961         "d << b;";
2962     UErrorCode ec = U_ZERO_ERROR;
2963     UParseError pe;
2964     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
2965     delete t;
2966     if (U_FAILURE(ec)) {
2967         UnicodeString err(pe.preContext);
2968         err.append((UChar)124/*|*/).append(pe.postContext);
2969         if (err.indexOf("d << b") >= 0) {
2970             logln("Ok: " + err);
2971         } else {
2972             errln("FAIL: " + err);
2973         }
2974     }
2975     else {
2976         errln("FAIL: no syntax error");
2977     }
2978     static const char* maskingRule =
2979         "a>x;\n"
2980         "# more stuff\n"
2981         "ab>y;";
2982     ec = U_ZERO_ERROR;
2983     delete Transliterator::createFromRules("ID", maskingRule, UTRANS_FORWARD, pe, ec);
2984     if (ec != U_RULE_MASK_ERROR) {
2985         errln("FAIL: returned %s instead of U_RULE_MASK_ERROR", u_errorName(ec));
2986     }
2987     else if (UnicodeString("a > x;") != UnicodeString(pe.preContext)) {
2988         errln("FAIL: did not get expected precontext");
2989     }
2990     else if (UnicodeString("ab > y;") != UnicodeString(pe.postContext)) {
2991         errln("FAIL: did not get expected postcontext");
2992     }
2993 }
2994
2995 /**
2996  * Make sure sets on output are disallowed.
2997  */
2998 void TransliteratorTest::TestOutputSet() {
2999     UnicodeString rule = "$set = [a-cm-n]; b > $set;";
3000     UErrorCode ec = U_ZERO_ERROR;
3001     UParseError pe;
3002     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3003     delete t;
3004     if (U_FAILURE(ec)) {
3005         UnicodeString err(pe.preContext);
3006         err.append((UChar)124/*|*/).append(pe.postContext);
3007         logln("Ok: " + err);
3008         return;
3009     }
3010     errln("FAIL: No syntax error");
3011 }
3012
3013 /**
3014  * Test the use variable range pragma, making sure that use of
3015  * variable range characters is detected and flagged as an error.
3016  */
3017 void TransliteratorTest::TestVariableRange() {
3018     UnicodeString rule = "use variable range 0x70 0x72; a > A; b > B; q > Q;";
3019     UErrorCode ec = U_ZERO_ERROR;
3020     UParseError pe;
3021     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3022     delete t;
3023     if (U_FAILURE(ec)) {
3024         UnicodeString err(pe.preContext);
3025         err.append((UChar)124/*|*/).append(pe.postContext);
3026         logln("Ok: " + err);
3027         return;
3028     }
3029     errln("FAIL: No syntax error");
3030 }
3031
3032 /**
3033  * Test invalid post context error handling
3034  */
3035 void TransliteratorTest::TestInvalidPostContext() {
3036     UnicodeString rule = "a}b{c>d;";
3037     UErrorCode ec = U_ZERO_ERROR;
3038     UParseError pe;
3039     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3040     delete t;
3041     if (U_FAILURE(ec)) {
3042         UnicodeString err(pe.preContext);
3043         err.append((UChar)124/*|*/).append(pe.postContext);
3044         if (err.indexOf("a}b{c") >= 0) {
3045             logln("Ok: " + err);
3046         } else {
3047             errln("FAIL: " + err);
3048         }
3049         return;
3050     }
3051     errln("FAIL: No syntax error");
3052 }
3053
3054 /**
3055  * Test ID form variants
3056  */
3057 void TransliteratorTest::TestIDForms() {
3058     const char* DATA[] = {
3059         "NFC", NULL, "NFD",
3060         "nfd", NULL, "NFC", // make sure case is ignored
3061         "Any-NFKD", NULL, "Any-NFKC",
3062         "Null", NULL, "Null",
3063         "-nfkc", "nfkc", "NFKD",
3064         "-nfkc/", "nfkc", "NFKD",
3065         "Latin-Greek/UNGEGN", NULL, "Greek-Latin/UNGEGN",
3066         "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN",
3067         "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali",
3068         "Source-", NULL, NULL,
3069         "Source/Variant-", NULL, NULL,
3070         "Source-/Variant", NULL, NULL,
3071         "/Variant", NULL, NULL,
3072         "/Variant-", NULL, NULL,
3073         "-/Variant", NULL, NULL,
3074         "-/", NULL, NULL,
3075         "-", NULL, NULL,
3076         "/", NULL, NULL,
3077     };
3078     const int32_t DATA_length = UPRV_LENGTHOF(DATA);
3079
3080     for (int32_t i=0; i<DATA_length; i+=3) {
3081         const char* ID = DATA[i];
3082         const char* expID = DATA[i+1];
3083         const char* expInvID = DATA[i+2];
3084         UBool expValid = (expInvID != NULL);
3085         if (expID == NULL) {
3086             expID = ID;
3087         }
3088         UParseError pe;
3089         UErrorCode ec = U_ZERO_ERROR;
3090         Transliterator *t =
3091             Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
3092         if (U_FAILURE(ec)) {
3093             if (!expValid) {
3094                 logln((UnicodeString)"Ok: getInstance(" + ID +") => " + u_errorName(ec));
3095             } else {
3096                 dataerrln((UnicodeString)"FAIL: Couldn't create " + ID + " - " + u_errorName(ec));
3097             }
3098             delete t;
3099             continue;
3100         }
3101         Transliterator *u = t->createInverse(ec);
3102         if (U_FAILURE(ec)) {
3103             errln((UnicodeString)"FAIL: Couldn't create inverse of " + ID);
3104             delete t;
3105             delete u;
3106             continue;
3107         }
3108         if (t->getID() == expID &&
3109             u->getID() == expInvID) {
3110             logln((UnicodeString)"Ok: " + ID + ".getInverse() => " + expInvID);
3111         } else {
3112             errln((UnicodeString)"FAIL: getInstance(" + ID + ") => " +
3113                   t->getID() + " x getInverse() => " + u->getID() +
3114                   ", expected " + expInvID);
3115         }
3116         delete t;
3117         delete u;
3118     }
3119 }
3120
3121 static const UChar SPACE[]   = {32,0};
3122 static const UChar NEWLINE[] = {10,0};
3123 static const UChar RETURN[]  = {13,0};
3124 static const UChar EMPTY[]   = {0};
3125
3126 void TransliteratorTest::checkRules(const UnicodeString& label, Transliterator& t2,
3127                                     const UnicodeString& testRulesForward) {
3128     UnicodeString rules2; t2.toRules(rules2, TRUE);
3129     //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
3130     rules2.findAndReplace(SPACE, EMPTY);
3131     rules2.findAndReplace(NEWLINE, EMPTY);
3132     rules2.findAndReplace(RETURN, EMPTY);
3133
3134     UnicodeString testRules(testRulesForward); testRules.findAndReplace(SPACE, EMPTY);
3135
3136     if (rules2 != testRules) {
3137         errln(label);
3138         logln((UnicodeString)"GENERATED RULES: " + rules2);
3139         logln((UnicodeString)"SHOULD BE:       " + testRulesForward);
3140     }
3141 }
3142
3143 /**
3144  * Mark's toRules test.
3145  */
3146 void TransliteratorTest::TestToRulesMark() {
3147     const char* testRules =
3148         "::[[:Latin:][:Mark:]];"
3149         "::NFKD (NFC);"
3150         "::Lower (Lower);"
3151         "a <> \\u03B1;" // alpha
3152         "::NFKC (NFD);"
3153         "::Upper (Lower);"
3154         "::Lower ();"
3155         "::([[:Greek:][:Mark:]]);"
3156         ;
3157     const char* testRulesForward =
3158         "::[[:Latin:][:Mark:]];"
3159         "::NFKD(NFC);"
3160         "::Lower(Lower);"
3161         "a > \\u03B1;"
3162         "::NFKC(NFD);"
3163         "::Upper (Lower);"
3164         "::Lower ();"
3165         ;
3166     const char* testRulesBackward =
3167         "::[[:Greek:][:Mark:]];"
3168         "::Lower (Upper);"
3169         "::NFD(NFKC);"
3170         "\\u03B1 > a;"
3171         "::Lower(Lower);"
3172         "::NFC(NFKD);"
3173         ;
3174     UnicodeString source = CharsToUnicodeString("\\u00E1"); // a-acute
3175     UnicodeString target = CharsToUnicodeString("\\u03AC"); // alpha-acute
3176
3177     UParseError pe;
3178     UErrorCode ec = U_ZERO_ERROR;
3179     LocalPointer<Transliterator> t2(
3180             Transliterator::createFromRules("source-target", UnicodeString(testRules, -1, US_INV), UTRANS_FORWARD, pe, ec));
3181     LocalPointer<Transliterator> t3(
3182             Transliterator::createFromRules("target-source", UnicodeString(testRules, -1, US_INV), UTRANS_REVERSE, pe, ec));
3183
3184     if (U_FAILURE(ec)) {
3185         dataerrln((UnicodeString)"FAIL: createFromRules => " + u_errorName(ec));
3186         return;
3187     }
3188
3189     expect(*t2, source, target);
3190     expect(*t3, target, source);
3191
3192     checkRules("Failed toRules FORWARD", *t2, UnicodeString(testRulesForward, -1, US_INV));
3193     checkRules("Failed toRules BACKWARD", *t3, UnicodeString(testRulesBackward, -1, US_INV));
3194 }
3195
3196 /**
3197  * Test Escape and Unescape transliterators.
3198  */
3199 void TransliteratorTest::TestEscape() {
3200     UParseError pe;
3201     UErrorCode ec;
3202     Transliterator *t;
3203
3204     ec = U_ZERO_ERROR;
3205     t = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, pe, ec);
3206     if (U_FAILURE(ec)) {
3207         errln((UnicodeString)"FAIL: createInstance");
3208     } else {
3209         expect(*t,
3210                UNICODE_STRING_SIMPLE("\\x{40}\\U00000031&#x32;&#81;"),
3211                "@12Q");
3212     }
3213     delete t;
3214
3215     ec = U_ZERO_ERROR;
3216     t = Transliterator::createInstance("Any-Hex/C", UTRANS_FORWARD, pe, ec);
3217     if (U_FAILURE(ec)) {
3218         errln((UnicodeString)"FAIL: createInstance");
3219     } else {
3220         expect(*t,
3221                CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3222                UNICODE_STRING_SIMPLE("\\u0041\\U0010BEEF\\uFEED"));
3223     }
3224     delete t;
3225
3226     ec = U_ZERO_ERROR;
3227     t = Transliterator::createInstance("Any-Hex/Java", UTRANS_FORWARD, pe, ec);
3228     if (U_FAILURE(ec)) {
3229         errln((UnicodeString)"FAIL: createInstance");
3230     } else {
3231         expect(*t,
3232                CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3233                UNICODE_STRING_SIMPLE("\\u0041\\uDBEF\\uDEEF\\uFEED"));
3234     }
3235     delete t;
3236
3237     ec = U_ZERO_ERROR;
3238     t = Transliterator::createInstance("Any-Hex/Perl", UTRANS_FORWARD, pe, ec);
3239     if (U_FAILURE(ec)) {
3240         errln((UnicodeString)"FAIL: createInstance");
3241     } else {
3242         expect(*t,
3243                CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3244                UNICODE_STRING_SIMPLE("\\x{41}\\x{10BEEF}\\x{FEED}"));
3245     }
3246     delete t;
3247 }
3248
3249
3250 void TransliteratorTest::TestAnchorMasking(){
3251     UnicodeString rule ("^a > Q; a > q;");
3252     UErrorCode status= U_ZERO_ERROR;
3253     UParseError parseError;
3254
3255     Transliterator* t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD,parseError,status);
3256     if(U_FAILURE(status)){
3257         errln(UnicodeString("FAIL: ") + "ID" +
3258               ".createFromRules() => bad rules" +
3259               /*", parse error " + parseError.code +*/
3260               ", line " + parseError.line +
3261               ", offset " + parseError.offset +
3262               ", context " + prettify(parseError.preContext, TRUE) +
3263               ", rules: " + prettify(rule, TRUE));
3264     }
3265     delete t;
3266 }
3267
3268 /**
3269  * Make sure display names of variants look reasonable.
3270  */
3271 void TransliteratorTest::TestDisplayName() {
3272 #if UCONFIG_NO_FORMATTING
3273     logln("Skipping, UCONFIG_NO_FORMATTING is set\n");
3274     return;
3275 #else
3276     static const char* DATA[] = {
3277         // ID, forward name, reverse name
3278         // Update the text as necessary -- the important thing is
3279         // not the text itself, but how various cases are handled.
3280
3281         // Basic test
3282         "Any-Hex", "Any to Hex Escape", "Hex Escape to Any",
3283
3284         // Variants
3285         "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl",
3286
3287         // Target-only IDs
3288         "NFC", "Any to NFC", "Any to NFD",
3289     };
3290
3291     int32_t DATA_length = UPRV_LENGTHOF(DATA);
3292
3293     Locale US("en", "US");
3294
3295     for (int32_t i=0; i<DATA_length; i+=3) {
3296         UnicodeString name;
3297         Transliterator::getDisplayName(DATA[i], US, name);
3298         if (name != DATA[i+1]) {
3299             dataerrln((UnicodeString)"FAIL: " + DATA[i] + ".getDisplayName() => " +
3300                   name + ", expected " + DATA[i+1]);
3301         } else {
3302             logln((UnicodeString)"Ok: " + DATA[i] + ".getDisplayName() => " + name);
3303         }
3304         UErrorCode ec = U_ZERO_ERROR;
3305         UParseError pe;
3306         Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_REVERSE, pe, ec);
3307         if (U_FAILURE(ec)) {
3308             delete t;
3309             dataerrln("FAIL: createInstance failed - %s", u_errorName(ec));
3310             continue;
3311         }
3312         name = Transliterator::getDisplayName(t->getID(), US, name);
3313         if (name != DATA[i+2]) {
3314             dataerrln((UnicodeString)"FAIL: " + t->getID() + ".getDisplayName() => " +
3315                   name + ", expected " + DATA[i+2]);
3316         } else {
3317             logln((UnicodeString)"Ok: " + t->getID() + ".getDisplayName() => " + name);
3318         }
3319         delete t;
3320     }
3321 #endif
3322 }
3323
3324 void TransliteratorTest::TestSpecialCases(void) {
3325     const UnicodeString registerRules[] = {
3326         "Any-Dev1", "x > X; y > Y;",
3327         "Any-Dev2", "XY > Z",
3328         "Greek-Latin/FAKE",
3329             CharsToUnicodeString
3330             ("[^[:L:][:M:]] { \\u03bc\\u03c0 > b ; \\u03bc\\u03c0 } [^[:L:][:M:]] > b ; [^[:L:][:M:]] { [\\u039c\\u03bc][\\u03a0\\u03c0] > B ; [\\u039c\\u03bc][\\u03a0\\u03c0] } [^[:L:][:M:]] > B ;"),
3331         "" // END MARKER
3332     };
3333
3334     const UnicodeString testCases[] = {
3335         // NORMALIZATION
3336         // should add more test cases
3337         "NFD" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3338         "NFC" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3339         "NFKD", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3340         "NFKC", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3341
3342         // mp -> b BUG
3343         "Greek-Latin/UNGEGN", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3344         "Greek-Latin/FAKE", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3345
3346         // check for devanagari bug
3347         "nfd;Dev1;Dev2;nfc", "xy", "Z",
3348
3349         // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE
3350         "Title", CharsToUnicodeString("ab'cD ffi\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3351                  CharsToUnicodeString("Ab'cd Ffi\\u0131ii\\u0307 \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3352
3353         //TODO: enable this test once Titlecase works right
3354         /*
3355         "Title", CharsToUnicodeString("\\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3356                  CharsToUnicodeString("Ffi\\u0131ii \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3357                  */
3358         "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3359                  CharsToUnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 ") + DESERET_DEE + DESERET_DEE,
3360         "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3361                  CharsToUnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 ") + DESERET_dee + DESERET_dee,
3362
3363         "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
3364         "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
3365
3366          // FORMS OF S
3367         "Greek-Latin/UNGEGN",  CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3368                                CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3369         "Latin-Greek/UNGEGN",  CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3370                                CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3") ,
3371         "Greek-Latin",  CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3372                         CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3373         "Latin-Greek",  CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3374                         CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3375         // Tatiana bug
3376         // Upper: TAT\\u02B9\\u00C2NA
3377         // Lower: tat\\u02B9\\u00E2na
3378         // Title: Tat\\u02B9\\u00E2na
3379         "Upper", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3380                  CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3381         "Lower", CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3382                  CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3383         "Title", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3384                  CharsToUnicodeString("Tat\\u02B9\\u00E2na"),
3385
3386         "" // END MARKER
3387     };
3388
3389     UParseError pos;
3390     int32_t i;
3391     for (i = 0; registerRules[i].length()!=0; i+=2) {
3392         UErrorCode status = U_ZERO_ERROR;
3393
3394         Transliterator *t = Transliterator::createFromRules(registerRules[0+i],
3395             registerRules[i+1], UTRANS_FORWARD, pos, status);
3396         if (U_FAILURE(status)) {
3397             dataerrln("Fails: Unable to create the transliterator from rules. - %s", u_errorName(status));
3398         } else {
3399             Transliterator::registerInstance(t);
3400         }
3401     }
3402     for (i = 0; testCases[i].length()!=0; i+=3) {
3403         UErrorCode ec = U_ZERO_ERROR;
3404         UParseError pe;
3405         const UnicodeString& name = testCases[i];
3406         Transliterator *t = Transliterator::createInstance(name, UTRANS_FORWARD, pe, ec);
3407         if (U_FAILURE(ec)) {
3408             dataerrln((UnicodeString)"FAIL: Couldn't create " + name + " - " + u_errorName(ec));
3409             delete t;
3410             continue;
3411         }
3412         const UnicodeString& id = t->getID();
3413         const UnicodeString& source = testCases[i+1];
3414         UnicodeString target;
3415
3416         // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe)
3417
3418         if (testCases[i+2].length() > 0) {
3419             target = testCases[i+2];
3420         } else if (0==id.caseCompare("NFD", U_FOLD_CASE_DEFAULT)) {
3421             Normalizer::normalize(source, UNORM_NFD, 0, target, ec);
3422         } else if (0==id.caseCompare("NFC", U_FOLD_CASE_DEFAULT)) {
3423             Normalizer::normalize(source, UNORM_NFC, 0, target, ec);
3424         } else if (0==id.caseCompare("NFKD", U_FOLD_CASE_DEFAULT)) {
3425             Normalizer::normalize(source, UNORM_NFKD, 0, target, ec);
3426         } else if (0==id.caseCompare("NFKC", U_FOLD_CASE_DEFAULT)) {
3427             Normalizer::normalize(source, UNORM_NFKC, 0, target, ec);
3428         } else if (0==id.caseCompare("Lower", U_FOLD_CASE_DEFAULT)) {
3429             target = source;
3430             target.toLower(Locale::getUS());
3431         } else if (0==id.caseCompare("Upper", U_FOLD_CASE_DEFAULT)) {
3432             target = source;
3433             target.toUpper(Locale::getUS());
3434         }
3435         if (U_FAILURE(ec)) {
3436             errln((UnicodeString)"FAIL: Internal error normalizing " + source);
3437             continue;
3438         }
3439
3440         expect(*t, source, target);
3441         delete t;
3442     }
3443     for (i = 0; registerRules[i].length()!=0; i+=2) {
3444         Transliterator::unregister(registerRules[i]);
3445     }
3446 }
3447
3448 char* Char32ToEscapedChars(UChar32 ch, char* buffer) {
3449     if (ch <= 0xFFFF) {
3450         sprintf(buffer, "\\u%04x", (int)ch);
3451     } else {
3452         sprintf(buffer, "\\U%08x", (int)ch);
3453     }
3454     return buffer;
3455 }
3456
3457 void TransliteratorTest::TestSurrogateCasing (void) {
3458     // check that casing handles surrogates
3459     // titlecase is currently defective
3460     char buffer[20];
3461     UChar buffer2[20];
3462     UChar32 dee;
3463     U16_GET(DESERET_dee,0, 0, DESERET_dee.length(), dee);
3464     UnicodeString DEE(u_totitle(dee));
3465     if (DEE != DESERET_DEE) {
3466         err("Fails titlecase of surrogates");
3467         err(Char32ToEscapedChars(dee, buffer));
3468         err(", ");
3469         errln(Char32ToEscapedChars(DEE.char32At(0), buffer));
3470     }
3471
3472     UnicodeString deeDEETest=DESERET_dee + DESERET_DEE;
3473     UnicodeString deedeeTest = DESERET_dee + DESERET_dee;
3474     UnicodeString DEEDEETest = DESERET_DEE + DESERET_DEE;
3475     UErrorCode status= U_ZERO_ERROR;
3476
3477     u_strToUpper(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
3478     if (U_FAILURE(status) || (UnicodeString(buffer2)!= DEEDEETest)) {
3479         errln("Fails: Can't uppercase surrogates.");
3480     }
3481
3482     status= U_ZERO_ERROR;
3483     u_strToLower(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
3484     if (U_FAILURE(status) || (UnicodeString(buffer2)!= deedeeTest)) {
3485         errln("Fails: Can't lowercase surrogates.");
3486     }
3487 }
3488
3489 static void _trans(Transliterator& t, const UnicodeString& src,
3490                    UnicodeString& result) {
3491     result = src;
3492     t.transliterate(result);
3493 }
3494
3495 static void _trans(const UnicodeString& id, const UnicodeString& src,
3496                    UnicodeString& result, UErrorCode ec) {
3497     UParseError pe;
3498     Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
3499     if (U_SUCCESS(ec)) {
3500         _trans(*t, src, result);
3501     }
3502     delete t;
3503 }
3504
3505 static UnicodeString _findMatch(const UnicodeString& source,
3506                                        const UnicodeString* pairs) {
3507     UnicodeString empty;
3508     for (int32_t i=0; pairs[i].length() > 0; i+=2) {
3509         if (0==source.caseCompare(pairs[i], U_FOLD_CASE_DEFAULT)) {
3510             return pairs[i+1];
3511         }
3512     }
3513     return empty;
3514 }
3515
3516 // Check to see that incremental gets at least part way through a reasonable string.
3517
3518 void TransliteratorTest::TestIncrementalProgress(void) {
3519     UErrorCode ec = U_ZERO_ERROR;
3520     UnicodeString latinTest = "The Quick Brown Fox.";
3521     UnicodeString devaTest;
3522     _trans("Latin-Devanagari", latinTest, devaTest, ec);
3523     UnicodeString kataTest;
3524     _trans("Latin-Katakana", latinTest, kataTest, ec);
3525     if (U_FAILURE(ec)) {
3526         errln("FAIL: Internal error");
3527         return;
3528     }
3529     const UnicodeString tests[] = {
3530         "Any", latinTest,
3531         "Latin", latinTest,
3532         "Halfwidth", latinTest,
3533         "Devanagari", devaTest,
3534         "Katakana", kataTest,
3535         "" // END MARKER
3536     };
3537
3538     UnicodeString test("The Quick Brown Fox Jumped Over The Lazy Dog.");
3539     int32_t i = 0, j=0, k=0;
3540     int32_t sources = Transliterator::countAvailableSources();
3541     for (i = 0; i < sources; i++) {
3542         UnicodeString source;
3543         Transliterator::getAvailableSource(i, source);
3544         UnicodeString test = _findMatch(source, tests);
3545         if (test.length() == 0) {
3546             logln((UnicodeString)"Skipping " + source + "-X");
3547             continue;
3548         }
3549         int32_t targets = Transliterator::countAvailableTargets(source);
3550         for (j = 0; j < targets; j++) {
3551             UnicodeString target;
3552             Transliterator::getAvailableTarget(j, source, target);
3553             int32_t variants = Transliterator::countAvailableVariants(source, target);
3554             for (k =0; k< variants; k++) {
3555                 UnicodeString variant;
3556                 UParseError err;
3557                 UErrorCode status = U_ZERO_ERROR;
3558
3559                 Transliterator::getAvailableVariant(k, source, target, variant);
3560                 UnicodeString id = source + "-" + target + "/" + variant;
3561
3562                 Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, err, status);
3563                 if (U_FAILURE(status)) {
3564                     dataerrln((UnicodeString)"FAIL: Could not create " + id);
3565                     delete t;
3566                     continue;
3567                 }
3568                 status = U_ZERO_ERROR;
3569                 CheckIncrementalAux(t, test);
3570
3571                 UnicodeString rev;
3572                 _trans(*t, test, rev);
3573                 Transliterator *inv = t->createInverse(status);
3574                 if (U_FAILURE(status)) {
3575                     // The following are forward-only, it is OK that creating an inverse will not work:
3576                     // 1. Devanagari-Arabic
3577                     // 2. Any-*/BGN
3578                     // 2a. Any-*/BGN_1981
3579                     // 3. Any-*/UNGEGN
3580                     // 4. Any-*/MNS
3581                     // If UCONFIG_NO_BREAK_ITERATION is on, Latin-Thai is also not expected to work.
3582                     if (    id.compare((UnicodeString)"Devanagari-Arabic/") != 0
3583                          && !(id.startsWith((UnicodeString)"Any-") &&
3584                                 (id.endsWith((UnicodeString)"/BGN") || id.endsWith((UnicodeString)"/BGN_1981") || id.endsWith((UnicodeString)"/UNGEGN") || id.endsWith((UnicodeString)"/MNS"))
3585                              )
3586 #if UCONFIG_NO_BREAK_ITERATION
3587                          && id.compare((UnicodeString)"Latin-Thai/") != 0
3588 #endif
3589                        )
3590                     {
3591                         errln((UnicodeString)"FAIL: Could not create inverse of " + id);
3592                     }
3593                     delete t;
3594                     delete inv;
3595                     continue;
3596                 }
3597                 CheckIncrementalAux(inv, rev);
3598                 delete t;
3599                 delete inv;
3600             }
3601         }
3602     }
3603 }
3604
3605 void TransliteratorTest::CheckIncrementalAux(const Transliterator* t,
3606                                                       const UnicodeString& input) {
3607     UErrorCode ec = U_ZERO_ERROR;
3608     UTransPosition pos;
3609     UnicodeString test = input;
3610
3611     pos.contextStart = 0;
3612     pos.contextLimit = input.length();
3613     pos.start = 0;
3614     pos.limit = input.length();
3615
3616     t->transliterate(test, pos, ec);
3617     if (U_FAILURE(ec)) {
3618         errln((UnicodeString)"FAIL: transliterate() error " + u_errorName(ec));
3619         return;
3620     }
3621     UBool gotError = FALSE;
3622     (void)gotError;    // Suppress set but not used warning.
3623
3624     // we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X?
3625
3626     if (pos.start == 0 && pos.limit != 0 && t->getID() != "Hex-Any/Unicode") {
3627         errln((UnicodeString)"No Progress, " +
3628               t->getID() + ": " + formatInput(test, input, pos));
3629         gotError = TRUE;
3630     } else {
3631         logln((UnicodeString)"PASS Progress, " +
3632               t->getID() + ": " + formatInput(test, input, pos));
3633     }
3634     t->finishTransliteration(test, pos);
3635     if (pos.start != pos.limit) {
3636         errln((UnicodeString)"Incomplete, " +
3637               t->getID() + ": " + formatInput(test, input, pos));
3638         gotError = TRUE;
3639     }
3640 }
3641
3642 void TransliteratorTest::TestFunction() {
3643     // Careful with spacing and ';' here:  Phrase this exactly
3644     // as toRules() is going to return it.  If toRules() changes
3645     // with regard to spacing or ';', then adjust this string.
3646     UnicodeString rule =
3647         "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';";
3648
3649     UParseError pe;
3650     UErrorCode ec = U_ZERO_ERROR;
3651     Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3652     if (t == NULL) {
3653         dataerrln("FAIL: createFromRules failed - %s", u_errorName(ec));
3654         return;
3655     }
3656
3657     UnicodeString r;
3658     t->toRules(r, TRUE);
3659     if (r == rule) {
3660         logln((UnicodeString)"OK: toRules() => " + r);
3661     } else {
3662         errln((UnicodeString)"FAIL: toRules() => " + r +
3663               ", expected " + rule);
3664     }
3665
3666     expect(*t, "The Quick Brown Fox",
3667            UNICODE_STRING_SIMPLE("T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox"));
3668
3669     delete t;
3670 }
3671
3672 void TransliteratorTest::TestInvalidBackRef(void) {
3673     UnicodeString rule =  ". > $1;";
3674     UnicodeString rule2 =CharsToUnicodeString("(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\\u0020;");
3675     UParseError pe;
3676     UErrorCode ec = U_ZERO_ERROR;
3677     Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3678     Transliterator *t2 = Transliterator::createFromRules("Test2", rule2, UTRANS_FORWARD, pe, ec);
3679
3680     if (t != NULL) {
3681         errln("FAIL: createFromRules should have returned NULL");
3682         delete t;
3683     }
3684
3685     if (t2 != NULL) {
3686         errln("FAIL: createFromRules should have returned NULL");
3687         delete t2;
3688     }
3689
3690     if (U_SUCCESS(ec)) {
3691         errln("FAIL: Ok: . > $1; => no error");
3692     } else {
3693         logln((UnicodeString)"Ok: . > $1; => " + u_errorName(ec));
3694     }
3695 }
3696
3697 void TransliteratorTest::TestMulticharStringSet() {
3698     // Basic testing
3699     const char* rule =
3700         "       [{aa}]       > x;"
3701         "         a          > y;"
3702         "       [b{bc}]      > z;"
3703         "[{gd}] { e          > q;"
3704         "         e } [{fg}] > r;" ;
3705
3706     UParseError pe;
3707     UErrorCode ec = U_ZERO_ERROR;
3708     Transliterator* t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3709     if (t == NULL || U_FAILURE(ec)) {
3710         delete t;
3711         errln("FAIL: createFromRules failed");
3712         return;
3713     }
3714
3715     expect(*t, "a aa ab bc d gd de gde gdefg ddefg",
3716            "y x yz z d gd de gdq gdqfg ddrfg");
3717     delete t;
3718
3719     // Overlapped string test.  Make sure that when multiple
3720     // strings can match that the longest one is matched.
3721     rule =
3722         "    [a {ab} {abc}]    > x;"
3723         "           b          > y;"
3724         "           c          > z;"
3725         " q [t {st} {rst}] { e > p;" ;
3726
3727     t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3728     if (t == NULL || U_FAILURE(ec)) {
3729         delete t;
3730         errln("FAIL: createFromRules failed");
3731         return;
3732     }
3733
3734     expect(*t, "a ab abc qte qste qrste",
3735            "x x x qtp qstp qrstp");
3736     delete t;
3737 }
3738
3739 // vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
3740 // BEGIN TestUserFunction support factory
3741
3742 Transliterator* _TUFF[4];
3743 UnicodeString* _TUFID[4];
3744
3745 static Transliterator* U_EXPORT2 _TUFFactory(const UnicodeString& /*ID*/,
3746                                    Transliterator::Token context) {
3747     return _TUFF[context.integer]->clone();
3748 }
3749
3750 static void _TUFReg(const UnicodeString& ID, Transliterator* t, int32_t n) {
3751     _TUFF[n] = t;
3752     _TUFID[n] = new UnicodeString(ID);
3753     Transliterator::registerFactory(ID, _TUFFactory, Transliterator::integerToken(n));
3754 }
3755
3756 static void _TUFUnreg(int32_t n) {
3757     if (_TUFF[n] != NULL) {
3758         Transliterator::unregister(*_TUFID[n]);
3759         delete _TUFF[n];
3760         delete _TUFID[n];
3761     }
3762 }
3763
3764 // END TestUserFunction support factory
3765 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
3766
3767 /**
3768  * Test that user-registered transliterators can be used under function
3769  * syntax.
3770  */
3771 void TransliteratorTest::TestUserFunction() {
3772
3773     Transliterator* t;
3774     UParseError pe;
3775     UErrorCode ec = U_ZERO_ERROR;
3776
3777     // Setup our factory
3778     int32_t i;
3779     for (i=0; i<4; ++i) {
3780         _TUFF[i] = NULL;
3781     }
3782
3783     // There's no need to register inverses if we don't use them
3784     t = Transliterator::createFromRules("gif",
3785                                         UNICODE_STRING_SIMPLE("'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';"),
3786                                         UTRANS_FORWARD, pe, ec);
3787     if (t == NULL || U_FAILURE(ec)) {
3788         dataerrln((UnicodeString)"FAIL: createFromRules gif " + u_errorName(ec));
3789         return;
3790     }
3791     _TUFReg("Any-gif", t, 0);
3792
3793     t = Transliterator::createFromRules("RemoveCurly",
3794                                         UNICODE_STRING_SIMPLE("[\\{\\}] > ; '\\N' > ;"),
3795                                         UTRANS_FORWARD, pe, ec);
3796     if (t == NULL || U_FAILURE(ec)) {
3797         errln((UnicodeString)"FAIL: createFromRules RemoveCurly " + u_errorName(ec));
3798         goto FAIL;
3799     }
3800     expect(*t, UNICODE_STRING_SIMPLE("\\N{name}"), "name");
3801     _TUFReg("Any-RemoveCurly", t, 1);
3802
3803     logln("Trying &hex");
3804     t = Transliterator::createFromRules("hex2",
3805                                         "(.) > &hex($1);",
3806                                         UTRANS_FORWARD, pe, ec);
3807     if (t == NULL || U_FAILURE(ec)) {
3808         errln("FAIL: createFromRules");
3809         goto FAIL;
3810     }
3811     logln("Registering");
3812     _TUFReg("Any-hex2", t, 2);
3813     t = Transliterator::createInstance("Any-hex2", UTRANS_FORWARD, ec);
3814     if (t == NULL || U_FAILURE(ec)) {
3815         errln((UnicodeString)"FAIL: createInstance Any-hex2 " + u_errorName(ec));
3816         goto FAIL;
3817     }
3818     expect(*t, "abc", UNICODE_STRING_SIMPLE("\\u0061\\u0062\\u0063"));
3819     delete t;
3820
3821     logln("Trying &gif");
3822     t = Transliterator::createFromRules("gif2",
3823                                         "(.) > &Gif(&Hex2($1));",
3824                                         UTRANS_FORWARD, pe, ec);
3825     if (t == NULL || U_FAILURE(ec)) {
3826         errln((UnicodeString)"FAIL: createFromRules gif2 " + u_errorName(ec));
3827         goto FAIL;
3828     }
3829     logln("Registering");
3830     _TUFReg("Any-gif2", t, 3);
3831     t = Transliterator::createInstance("Any-gif2", UTRANS_FORWARD, ec);
3832     if (t == NULL || U_FAILURE(ec)) {
3833         errln((UnicodeString)"FAIL: createInstance Any-gif2 " + u_errorName(ec));
3834         goto FAIL;
3835     }
3836     expect(*t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">"
3837            "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">");
3838     delete t;
3839
3840     // Test that filters are allowed after &
3841     t = Transliterator::createFromRules("test",
3842                                         "(.) > &Hex($1) ' ' &RemoveCurly(&Name($1)) ' ';",
3843                                         UTRANS_FORWARD, pe, ec);
3844     if (t == NULL || U_FAILURE(ec)) {
3845         errln((UnicodeString)"FAIL: createFromRules test " + u_errorName(ec));
3846         goto FAIL;
3847     }
3848     expect(*t, "abc",
3849            UNICODE_STRING_SIMPLE("\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C "));
3850     delete t;
3851
3852  FAIL:
3853     for (i=0; i<4; ++i) {
3854         _TUFUnreg(i);
3855     }
3856 }
3857
3858 /**
3859  * Test the Any-X transliterators.
3860  */
3861 void TransliteratorTest::TestAnyX(void) {
3862     UParseError parseError;
3863     UErrorCode status = U_ZERO_ERROR;
3864     Transliterator* anyLatin =
3865         Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
3866     if (anyLatin==0) {
3867         dataerrln("FAIL: createInstance returned NULL - %s", u_errorName(status));
3868         delete anyLatin;
3869         return;
3870     }
3871
3872     expect(*anyLatin,
3873            CharsToUnicodeString("greek:\\u03B1\\u03B2\\u03BA\\u0391\\u0392\\u039A hiragana:\\u3042\\u3076\\u304F cyrillic:\\u0430\\u0431\\u0446"),
3874            CharsToUnicodeString("greek:abkABK hiragana:abuku cyrillic:abc"));
3875
3876     delete anyLatin;
3877
3878     status = U_ZERO_ERROR;
3879     Transliterator* anyASCII =
3880         Transliterator::createInstance("Any-Latin;Latin-ASCII", UTRANS_FORWARD, parseError, status);
3881     if (U_FAILURE(status) || anyASCII==0) {
3882         dataerrln("FAIL: createInstance returned NULL and/or set status %s", u_errorName(status));
3883         delete anyASCII;
3884         return;
3885     }
3886
3887     expect(*anyASCII,
3888            CharsToUnicodeString("ArabicDigits:\\u0660\\u0661\\u0664\\u0669 PersianDigits:\\u06F0\\u06F1\\u06F4\\u06F9"),
3889            CharsToUnicodeString("ArabicDigits:0149 PersianDigits:0149"));
3890
3891     delete anyASCII;
3892 }
3893
3894 /**
3895  * Test Any-X transliterators with sample letters from all scripts.
3896  */
3897 void TransliteratorTest::TestAny(void) {
3898     UErrorCode status = U_ZERO_ERROR;
3899     // Note: there is a lot of implict construction of UnicodeStrings from (char *) in
3900     //       function call parameters going on in this test.
3901     UnicodeSet alphabetic("[:alphabetic:]", status);
3902     if (U_FAILURE(status)) {
3903         dataerrln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3904         return;
3905     }
3906     alphabetic.freeze();
3907
3908     UnicodeString testString;
3909     for (int32_t i = 0; i < USCRIPT_CODE_LIMIT; i++) {
3910         const char *scriptName = uscript_getShortName((UScriptCode)i);
3911         if (scriptName == NULL) {
3912             errln("Failure: file %s, line %d: Script Code %d is invalid, ", __FILE__, __LINE__, i);
3913             return;
3914         }
3915
3916         UnicodeSet sample;
3917         sample.applyPropertyAlias("script", scriptName, status);
3918         if (U_FAILURE(status)) {
3919             errln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3920             return;
3921         }
3922         sample.retainAll(alphabetic);
3923         for (int32_t count=0; count<5; count++) {
3924             UChar32 c = sample.charAt(count);
3925             if (c == -1) {
3926                 break;
3927             }
3928             testString.append(c);
3929         }
3930     }
3931
3932     UParseError parseError;
3933     Transliterator* anyLatin =
3934         Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
3935     if (U_FAILURE(status)) {
3936         dataerrln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3937         return;
3938     }
3939
3940     logln(UnicodeString("Sample set for Any-Latin: ") + testString);
3941     anyLatin->transliterate(testString);
3942     logln(UnicodeString("Sample result for Any-Latin: ") + testString);
3943     delete anyLatin;
3944 }
3945
3946
3947 /**
3948  * Test the source and target set API.  These are only implemented
3949  * for RBT and CompoundTransliterator at this time.
3950  */
3951 void TransliteratorTest::TestSourceTargetSet() {
3952     UErrorCode ec = U_ZERO_ERROR;
3953
3954     // Rules
3955     const char* r =
3956         "a > b; "
3957         "r [x{lu}] > q;";
3958
3959     // Expected source
3960     UnicodeSet expSrc("[arx{lu}]", ec);
3961
3962     // Expected target
3963     UnicodeSet expTrg("[bq]", ec);
3964
3965     UParseError pe;
3966     Transliterator* t = Transliterator::createFromRules("test", r, UTRANS_FORWARD, pe, ec);
3967
3968     if (U_FAILURE(ec)) {
3969         delete t;
3970         errln("FAIL: Couldn't set up test");
3971         return;
3972     }
3973
3974     UnicodeSet src; t->getSourceSet(src);
3975     UnicodeSet trg; t->getTargetSet(trg);
3976
3977     if (src == expSrc && trg == expTrg) {
3978         UnicodeString a, b;
3979         logln((UnicodeString)"Ok: " +
3980               r + " => source = " + src.toPattern(a, TRUE) +
3981               ", target = " + trg.toPattern(b, TRUE));
3982     } else {
3983         UnicodeString a, b, c, d;
3984         errln((UnicodeString)"FAIL: " +
3985               r + " => source = " + src.toPattern(a, TRUE) +
3986               ", expected " + expSrc.toPattern(b, TRUE) +
3987               "; target = " + trg.toPattern(c, TRUE) +
3988               ", expected " + expTrg.toPattern(d, TRUE));
3989     }
3990
3991     delete t;
3992 }
3993
3994 /**
3995  * Test handling of Pattern_White_Space, for both RBT and UnicodeSet.
3996  */
3997 void TransliteratorTest::TestPatternWhiteSpace() {
3998     // Rules
3999     const char* r = "a > \\u200E b;";
4000
4001     UErrorCode ec = U_ZERO_ERROR;
4002     UParseError pe;
4003     Transliterator* t = Transliterator::createFromRules("test", CharsToUnicodeString(r), UTRANS_FORWARD, pe, ec);
4004
4005     if (U_FAILURE(ec)) {
4006         errln("FAIL: Couldn't set up test");
4007     } else {
4008         expect(*t, "a", "b");
4009     }
4010     delete t;
4011
4012     // UnicodeSet
4013     ec = U_ZERO_ERROR;
4014     UnicodeSet set(CharsToUnicodeString("[a \\u200E]"), ec);
4015
4016     if (U_FAILURE(ec)) {
4017         errln("FAIL: Couldn't set up test");
4018     } else {
4019         if (set.contains(0x200E)) {
4020             errln("FAIL: U+200E not being ignored by UnicodeSet");
4021         }
4022     }
4023 }
4024 //======================================================================
4025 // this method is in TestUScript.java
4026 //======================================================================
4027 void TransliteratorTest::TestAllCodepoints(){
4028     UScriptCode code= USCRIPT_INVALID_CODE;
4029     char id[256]={'\0'};
4030     char abbr[256]={'\0'};
4031     char newId[256]={'\0'};
4032     char newAbbrId[256]={'\0'};
4033     char oldId[256]={'\0'};
4034     char oldAbbrId[256]={'\0'};
4035
4036     UErrorCode status =U_ZERO_ERROR;
4037     UParseError pe;
4038
4039     for(uint32_t i = 0; i<=0x10ffff; i++){
4040         code =  uscript_getScript(i,&status);
4041         if(code == USCRIPT_INVALID_CODE){
4042             dataerrln("uscript_getScript for codepoint \\U%08X failed.", i);
4043         }
4044         const char* myId = uscript_getName(code);
4045         if(!myId) {
4046           dataerrln("Valid script code returned NULL name. Check your data!");
4047           return;
4048         }
4049         uprv_strcpy(id,myId);
4050         uprv_strcpy(abbr,uscript_getShortName(code));
4051
4052         uprv_strcpy(newId,"[:");
4053         uprv_strcat(newId,id);
4054         uprv_strcat(newId,":];NFD");
4055
4056         uprv_strcpy(newAbbrId,"[:");
4057         uprv_strcat(newAbbrId,abbr);
4058         uprv_strcat(newAbbrId,":];NFD");
4059
4060         if(uprv_strcmp(newId,oldId)!=0){
4061             Transliterator* t = Transliterator::createInstance(newId,UTRANS_FORWARD,pe,status);
4062             if(t==NULL || U_FAILURE(status)){
4063                 dataerrln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(status));
4064             }
4065             delete t;
4066         }
4067         if(uprv_strcmp(newAbbrId,oldAbbrId)!=0){
4068             Transliterator* t = Transliterator::createInstance(newAbbrId,UTRANS_FORWARD,pe,status);
4069             if(t==NULL || U_FAILURE(status)){
4070                 dataerrln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(status));
4071             }
4072             delete t;
4073         }
4074         uprv_strcpy(oldId,newId);
4075         uprv_strcpy(oldAbbrId, newAbbrId);
4076
4077     }
4078
4079 }
4080
4081 #define TEST_TRANSLIT_ID(id, cls) UPRV_BLOCK_MACRO_BEGIN { \
4082   UErrorCode ec = U_ZERO_ERROR; \
4083   Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); \
4084   if (U_FAILURE(ec)) { \
4085     dataerrln("FAIL: Couldn't create %s - %s", id, u_errorName(ec)); \
4086   } else { \
4087     if (t->getDynamicClassID() != cls::getStaticClassID()) { \
4088       errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
4089     } \
4090     /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
4091   } \
4092   delete t; \
4093 } UPRV_BLOCK_MACRO_END
4094
4095 #define TEST_TRANSLIT_RULE(rule, cls) UPRV_BLOCK_MACRO_BEGIN { \
4096   UErrorCode ec = U_ZERO_ERROR; \
4097   UParseError pe; \
4098   Transliterator* t = Transliterator::createFromRules("_", rule, UTRANS_FORWARD, pe, ec); \
4099   if (U_FAILURE(ec)) { \
4100     errln("FAIL: Couldn't create " rule); \
4101   } else { \
4102     if (t->getDynamicClassID() != cls ::getStaticClassID()) { \
4103       errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
4104     } \
4105     /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
4106   } \
4107   delete t; \
4108 } UPRV_BLOCK_MACRO_END
4109
4110 void TransliteratorTest::TestBoilerplate() {
4111     TEST_TRANSLIT_ID("Any-Latin", AnyTransliterator);
4112     TEST_TRANSLIT_ID("Any-Hex", EscapeTransliterator);
4113     TEST_TRANSLIT_ID("Hex-Any", UnescapeTransliterator);
4114     TEST_TRANSLIT_ID("Lower", LowercaseTransliterator);
4115     TEST_TRANSLIT_ID("Upper", UppercaseTransliterator);
4116     TEST_TRANSLIT_ID("Title", TitlecaseTransliterator);
4117     TEST_TRANSLIT_ID("Null", NullTransliterator);
4118     TEST_TRANSLIT_ID("Remove", RemoveTransliterator);
4119     TEST_TRANSLIT_ID("Any-Name", UnicodeNameTransliterator);
4120     TEST_TRANSLIT_ID("Name-Any", NameUnicodeTransliterator);
4121     TEST_TRANSLIT_ID("NFD", NormalizationTransliterator);
4122     TEST_TRANSLIT_ID("Latin-Greek", CompoundTransliterator);
4123     TEST_TRANSLIT_RULE("a>b;", RuleBasedTransliterator);
4124 }
4125
4126 void TransliteratorTest::TestAlternateSyntax() {
4127     // U+2206 == &
4128     // U+2190 == <
4129     // U+2192 == >
4130     // U+2194 == <>
4131     expect(CharsToUnicodeString("a \\u2192 x; b \\u2190 y; c \\u2194 z"),
4132            "abc",
4133            "xbz");
4134     expect(CharsToUnicodeString("([:^ASCII:]) \\u2192 \\u2206Name($1);"),
4135            CharsToUnicodeString("<=\\u2190; >=\\u2192; <>=\\u2194; &=\\u2206"),
4136            UNICODE_STRING_SIMPLE("<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}"));
4137 }
4138
4139 static const char* BEGIN_END_RULES[] = {
4140     // [0]
4141     "abc > xy;"
4142     "aba > z;",
4143
4144     // [1]
4145 /*
4146     "::BEGIN;"
4147     "abc > xy;"
4148     "::END;"
4149     "::BEGIN;"
4150     "aba > z;"
4151     "::END;",
4152 */
4153     "", // test case commented out below, this is here to keep from messing up the indexes
4154
4155     // [2]
4156 /*
4157     "abc > xy;"
4158     "::BEGIN;"
4159     "aba > z;"
4160     "::END;",
4161 */
4162     "", // test case commented out below, this is here to keep from messing up the indexes
4163
4164     // [3]
4165 /*
4166     "::BEGIN;"
4167     "abc > xy;"
4168     "::END;"
4169     "aba > z;",
4170 */
4171     "", // test case commented out below, this is here to keep from messing up the indexes
4172
4173     // [4]
4174     "abc > xy;"
4175     "::Null;"
4176     "aba > z;",
4177
4178     // [5]
4179     "::Upper;"
4180     "ABC > xy;"
4181     "AB > x;"
4182     "C > z;"
4183     "::Upper;"
4184     "XYZ > p;"
4185     "XY > q;"
4186     "Z > r;"
4187     "::Upper;",
4188
4189     // [6]
4190     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4191     "$delim = [\\-$ws];"
4192     "$ws $delim* > ' ';"
4193     "'-' $delim* > '-';",
4194
4195     // [7]
4196     "::Null;"
4197     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4198     "$delim = [\\-$ws];"
4199     "$ws $delim* > ' ';"
4200     "'-' $delim* > '-';",
4201
4202     // [8]
4203     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4204     "$delim = [\\-$ws];"
4205     "$ws $delim* > ' ';"
4206     "'-' $delim* > '-';"
4207     "::Null;",
4208
4209     // [9]
4210     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4211     "$delim = [\\-$ws];"
4212     "::Null;"
4213     "$ws $delim* > ' ';"
4214     "'-' $delim* > '-';",
4215
4216     // [10]
4217 /*
4218     "::BEGIN;"
4219     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4220     "$delim = [\\-$ws];"
4221     "::END;"
4222     "$ws $delim* > ' ';"
4223     "'-' $delim* > '-';",
4224 */
4225     "", // test case commented out below, this is here to keep from messing up the indexes
4226
4227     // [11]
4228 /*
4229     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4230     "$delim = [\\-$ws];"
4231     "::BEGIN;"
4232     "$ws $delim* > ' ';"
4233     "'-' $delim* > '-';"
4234     "::END;",
4235 */
4236     "", // test case commented out below, this is here to keep from messing up the indexes
4237
4238     // [12]
4239 /*
4240     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4241     "$delim = [\\-$ws];"
4242     "$ab = [ab];"
4243     "::BEGIN;"
4244     "$ws $delim* > ' ';"
4245     "'-' $delim* > '-';"
4246     "::END;"
4247     "::BEGIN;"
4248     "$ab { ' ' } $ab > '-';"
4249     "c { ' ' > ;"
4250     "::END;"
4251     "::BEGIN;"
4252     "'a-a' > a\\%|a;"
4253     "::END;",
4254 */
4255     "", // test case commented out below, this is here to keep from messing up the indexes
4256
4257     // [13]
4258     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4259     "$delim = [\\-$ws];"
4260     "$ab = [ab];"
4261     "::Null;"
4262     "$ws $delim* > ' ';"
4263     "'-' $delim* > '-';"
4264     "::Null;"
4265     "$ab { ' ' } $ab > '-';"
4266     "c { ' ' > ;"
4267     "::Null;"
4268     "'a-a' > a\\%|a;",
4269
4270     // [14]
4271 /*
4272     "::[abc];"
4273     "::BEGIN;"
4274     "abc > xy;"
4275     "::END;"
4276     "::BEGIN;"
4277     "aba > yz;"
4278     "::END;"
4279     "::Upper;",
4280 */
4281     "", // test case commented out below, this is here to keep from messing up the indexes
4282
4283     // [15]
4284     "::[abc];"
4285     "abc > xy;"
4286     "::Null;"
4287     "aba > yz;"
4288     "::Upper;",
4289
4290     // [16]
4291 /*
4292     "::[abc];"
4293     "::BEGIN;"
4294     "abc <> xy;"
4295     "::END;"
4296     "::BEGIN;"
4297     "aba <> yz;"
4298     "::END;"
4299     "::Upper(Lower);"
4300     "::([XYZ]);"
4301 */
4302     "", // test case commented out below, this is here to keep from messing up the indexes
4303
4304     // [17]
4305     "::[abc];"
4306     "abc <> xy;"
4307     "::Null;"
4308     "aba <> yz;"
4309     "::Upper(Lower);"
4310     "::([XYZ]);"
4311 };
4312
4313 /*
4314 (This entire test is commented out below and will need some heavy revision when we re-add
4315 the ::BEGIN/::END stuff)
4316 static const char* BOGUS_BEGIN_END_RULES[] = {
4317     // [7]
4318     "::BEGIN;"
4319     "abc > xy;"
4320     "::BEGIN;"
4321     "aba > z;"
4322     "::END;"
4323     "::END;",
4324
4325     // [8]
4326     "abc > xy;"
4327     " aba > z;"
4328     "::END;",
4329
4330     // [9]
4331     "::BEGIN;"
4332     "::Upper;"
4333     "::END;"
4334 };
4335 static const int32_t BOGUS_BEGIN_END_RULES_length = UPRV_LENGTHOF(BOGUS_BEGIN_END_RULES);
4336 */
4337
4338 static const char* BEGIN_END_TEST_CASES[] = {
4339     // rules             input                   expected output
4340     BEGIN_END_RULES[0],  "abc ababc aba",        "xy zbc z",
4341 //    BEGIN_END_RULES[1],  "abc ababc aba",        "xy abxy z",
4342 //    BEGIN_END_RULES[2],  "abc ababc aba",        "xy abxy z",
4343 //    BEGIN_END_RULES[3],  "abc ababc aba",        "xy abxy z",
4344     BEGIN_END_RULES[4],  "abc ababc aba",        "xy abxy z",
4345     BEGIN_END_RULES[5],  "abccabaacababcbc",     "PXAARXQBR",
4346
4347     BEGIN_END_RULES[6],  "e   e - e---e-  e",    "e e e-e-e",
4348     BEGIN_END_RULES[7],  "e   e - e---e-  e",    "e e e-e-e",
4349     BEGIN_END_RULES[8],  "e   e - e---e-  e",    "e e e-e-e",
4350     BEGIN_END_RULES[9],  "e   e - e---e-  e",    "e e e-e-e",
4351 //    BEGIN_END_RULES[10],  "e   e - e---e-  e",    "e e e-e-e",
4352 //    BEGIN_END_RULES[11], "e   e - e---e-  e",    "e e e-e-e",
4353 //    BEGIN_END_RULES[12], "e   e - e---e-  e",    "e e e-e-e",
4354 //    BEGIN_END_RULES[12], "a    a    a    a",     "a%a%a%a",
4355 //    BEGIN_END_RULES[12], "a a-b c b a",          "a%a-b cb-a",
4356     BEGIN_END_RULES[13], "e   e - e---e-  e",    "e e e-e-e",
4357     BEGIN_END_RULES[13], "a    a    a    a",     "a%a%a%a",
4358     BEGIN_END_RULES[13], "a a-b c b a",          "a%a-b cb-a",
4359
4360 //    BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4361     BEGIN_END_RULES[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4362 //    BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4363     BEGIN_END_RULES[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ"
4364 };
4365 static const int32_t BEGIN_END_TEST_CASES_length = UPRV_LENGTHOF(BEGIN_END_TEST_CASES);
4366
4367 void TransliteratorTest::TestBeginEnd() {
4368     // run through the list of test cases above
4369     int32_t i = 0;
4370     for (i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
4371         expect((UnicodeString)"Test case #" + (i / 3),
4372                UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
4373                UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
4374                UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
4375     }
4376
4377     // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing
4378     UParseError parseError;
4379     UErrorCode status = U_ZERO_ERROR;
4380     Transliterator* reversed  = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
4381             UTRANS_REVERSE, parseError, status);
4382     if (reversed == 0 || U_FAILURE(status)) {
4383         reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
4384     } else {
4385         expect(*reversed, UnicodeString("xy XY XYZ yz YZ"), UnicodeString("xy abc xaba yz aba"));
4386     }
4387     delete reversed;
4388
4389     // finally, run through the list of syntactically-ill-formed rule sets above and make sure
4390     // that all of them cause errors
4391 /*
4392 (commented out until we have the real ::BEGIN/::END stuff in place
4393     for (i = 0; i < BOGUS_BEGIN_END_RULES_length; i++) {
4394         UParseError parseError;
4395         UErrorCode status = U_ZERO_ERROR;
4396         Transliterator* t = Transliterator::createFromRules("foo", UnicodeString(BOGUS_BEGIN_END_RULES[i]),
4397                 UTRANS_FORWARD, parseError, status);
4398         if (!U_FAILURE(status)) {
4399             delete t;
4400             errln((UnicodeString)"Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]);
4401         }
4402     }
4403 */
4404 }
4405
4406 void TransliteratorTest::TestBeginEndToRules() {
4407     // run through the same list of test cases we used above, but this time, instead of just
4408     // instantiating a Transliterator from the rules and running the test against it, we instantiate
4409     // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from
4410     // the resulting set of rules, and make sure that the generated rule set is semantically equivalent
4411     // to (i.e., does the same thing as) the original rule set
4412     for (int32_t i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
4413         UParseError parseError;
4414         UErrorCode status = U_ZERO_ERROR;
4415         Transliterator* t = Transliterator::createFromRules("--", UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
4416                 UTRANS_FORWARD, parseError, status);
4417         if (U_FAILURE(status)) {
4418             reportParseError(UnicodeString("FAIL: Couldn't create transliterator"), parseError, status);
4419         } else {
4420             UnicodeString rules;
4421             t->toRules(rules, TRUE);
4422             Transliterator* t2 = Transliterator::createFromRules((UnicodeString)"Test case #" + (i / 3), rules,
4423                     UTRANS_FORWARD, parseError, status);
4424             if (U_FAILURE(status)) {
4425                 reportParseError(UnicodeString("FAIL: Couldn't create transliterator from generated rules"),
4426                         parseError, status);
4427                 delete t;
4428             } else {
4429                 expect(*t2,
4430                        UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
4431                        UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
4432                 delete t;
4433                 delete t2;
4434             }
4435         }
4436     }
4437
4438     // do the same thing for the reversible test case
4439     UParseError parseError;
4440     UErrorCode status = U_ZERO_ERROR;
4441     Transliterator* reversed = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
4442             UTRANS_REVERSE, parseError, status);
4443     if (U_FAILURE(status)) {
4444         reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
4445     } else {
4446         UnicodeString rules;
4447         reversed->toRules(rules, FALSE);
4448         Transliterator* reversed2 = Transliterator::createFromRules("Reversed", rules, UTRANS_FORWARD,
4449                 parseError, status);
4450         if (U_FAILURE(status)) {
4451             reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator from generated rules"),
4452                     parseError, status);
4453             delete reversed;
4454         } else {
4455             expect(*reversed2,
4456                    UnicodeString("xy XY XYZ yz YZ"),
4457                    UnicodeString("xy abc xaba yz aba"));
4458             delete reversed;
4459             delete reversed2;
4460         }
4461     }
4462 }
4463
4464 void TransliteratorTest::TestRegisterAlias() {
4465     UnicodeString longID("Lower;[aeiou]Upper");
4466     UnicodeString shortID("Any-CapVowels");
4467     UnicodeString reallyShortID("CapVowels");
4468
4469     Transliterator::registerAlias(shortID, longID);
4470
4471     UErrorCode err = U_ZERO_ERROR;
4472     Transliterator* t1 = Transliterator::createInstance(longID, UTRANS_FORWARD, err);
4473     if (U_FAILURE(err)) {
4474         errln("Failed to instantiate transliterator with long ID");
4475         Transliterator::unregister(shortID);
4476         return;
4477     }
4478     Transliterator* t2 = Transliterator::createInstance(reallyShortID, UTRANS_FORWARD, err);
4479     if (U_FAILURE(err)) {
4480         errln("Failed to instantiate transliterator with short ID");
4481         delete t1;
4482         Transliterator::unregister(shortID);
4483         return;
4484     }
4485
4486     if (t1->getID() != longID)
4487         errln("Transliterator instantiated with long ID doesn't have long ID");
4488     if (t2->getID() != reallyShortID)
4489         errln("Transliterator instantiated with short ID doesn't have short ID");
4490
4491     UnicodeString rules1;
4492     UnicodeString rules2;
4493
4494     t1->toRules(rules1, TRUE);
4495     t2->toRules(rules2, TRUE);
4496     if (rules1 != rules2)
4497         errln("Alias transliterators aren't the same");
4498
4499     delete t1;
4500     delete t2;
4501     Transliterator::unregister(shortID);
4502
4503     t1 = Transliterator::createInstance(shortID, UTRANS_FORWARD, err);
4504     if (U_SUCCESS(err)) {
4505         errln("Instantiation with short ID succeeded after short ID was unregistered");
4506         delete t1;
4507     }
4508
4509     // try the same thing again, but this time with something other than
4510     // an instance of CompoundTransliterator
4511     UnicodeString realID("Latin-Greek");
4512     UnicodeString fakeID("Latin-dlgkjdflkjdl");
4513     Transliterator::registerAlias(fakeID, realID);
4514
4515     err = U_ZERO_ERROR;
4516     t1 = Transliterator::createInstance(realID, UTRANS_FORWARD, err);
4517     if (U_FAILURE(err)) {
4518         dataerrln("Failed to instantiate transliterator with real ID - %s", u_errorName(err));
4519         Transliterator::unregister(realID);
4520         return;
4521     }
4522     t2 = Transliterator::createInstance(fakeID, UTRANS_FORWARD, err);
4523     if (U_FAILURE(err)) {
4524         errln("Failed to instantiate transliterator with fake ID");
4525         delete t1;
4526         Transliterator::unregister(realID);
4527         return;
4528     }
4529
4530     t1->toRules(rules1, TRUE);
4531     t2->toRules(rules2, TRUE);
4532     if (rules1 != rules2)
4533         errln("Alias transliterators aren't the same");
4534
4535     delete t1;
4536     delete t2;
4537     Transliterator::unregister(fakeID);
4538 }
4539
4540 void TransliteratorTest::TestRuleStripping() {
4541     /*
4542 #
4543 \uE001>\u0C01; # SIGN
4544     */
4545     static const UChar rule[] = {
4546         0x0023,0x0020,0x000D,0x000A,
4547         0xE001,0x003E,0x0C01,0x003B,0x0020,0x0023,0x0020,0x0053,0x0049,0x0047,0x004E,0
4548     };
4549     static const UChar expectedRule[] = {
4550         0xE001,0x003E,0x0C01,0x003B,0
4551     };
4552     UChar result[UPRV_LENGTHOF(rule)];
4553     UErrorCode status = U_ZERO_ERROR;
4554     int32_t len = utrans_stripRules(rule, UPRV_LENGTHOF(rule), result, &status);
4555     if (len != u_strlen(expectedRule)) {
4556         errln("utrans_stripRules return len = %d", len);
4557     }
4558     if (u_strncmp(expectedRule, result, len) != 0) {
4559         errln("utrans_stripRules did not return expected string");
4560     }
4561 }
4562
4563 /**
4564  * Test the Halfwidth-Fullwidth transliterator (ticket 6281).
4565  */
4566 void TransliteratorTest::TestHalfwidthFullwidth(void) {
4567     UParseError parseError;
4568     UErrorCode status = U_ZERO_ERROR;
4569     Transliterator* hf = Transliterator::createInstance("Halfwidth-Fullwidth", UTRANS_FORWARD, parseError, status);
4570     Transliterator* fh = Transliterator::createInstance("Fullwidth-Halfwidth", UTRANS_FORWARD, parseError, status);
4571     if (hf == 0 || fh == 0) {
4572         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
4573         delete hf;
4574         delete fh;
4575         return;
4576     }
4577
4578     // Array of 2n items
4579     // Each item is
4580     //   "hf"|"fh"|"both",
4581     //   <Halfwidth>,
4582     //   <Fullwidth>
4583     const char* DATA[] = {
4584         "both",
4585         "\\uFFE9\\uFFEA\\uFFEB\\uFFEC\\u0061\\uFF71\\u00AF\\u0020",
4586         "\\u2190\\u2191\\u2192\\u2193\\uFF41\\u30A2\\uFFE3\\u3000",
4587     };
4588     int32_t DATA_length = UPRV_LENGTHOF(DATA);
4589
4590     for (int32_t i=0; i<DATA_length; i+=3) {
4591         UnicodeString h = CharsToUnicodeString(DATA[i+1]);
4592         UnicodeString f = CharsToUnicodeString(DATA[i+2]);
4593         switch (*DATA[i]) {
4594         case 0x68: //'h': // Halfwidth-Fullwidth only
4595             expect(*hf, h, f);
4596             break;
4597         case 0x66: //'f': // Fullwidth-Halfwidth only
4598             expect(*fh, f, h);
4599             break;
4600         case 0x62: //'b': // both directions
4601             expect(*hf, h, f);
4602             expect(*fh, f, h);
4603             break;
4604         }
4605     }
4606     delete hf;
4607     delete fh;
4608 }
4609
4610
4611     /**
4612      *  Test Thai.  The text is the first paragraph of "What is Unicode" from the Unicode.org web site.
4613      *              TODO: confirm that the expected results are correct.
4614      *              For now, test just confirms that C++ and Java give identical results.
4615      */
4616 void TransliteratorTest::TestThai(void) {
4617 #if !UCONFIG_NO_BREAK_ITERATION
4618     UParseError parseError;
4619     UErrorCode status = U_ZERO_ERROR;
4620     Transliterator* tr = Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
4621     if (tr == 0) {
4622         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
4623         return;
4624     }
4625     if (U_FAILURE(status)) {
4626         errln("FAIL: createInstance failed with %s", u_errorName(status));
4627         return;
4628     }
4629     const char *thaiText =
4630         "\\u0e42\\u0e14\\u0e22\\u0e1e\\u0e37\\u0e49\\u0e19\\u0e10\\u0e32\\u0e19\\u0e41\\u0e25\\u0e49\\u0e27, \\u0e04\\u0e2d"
4631         "\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d\\u0e23\\u0e4c\\u0e08\\u0e30\\u0e40\\u0e01\\u0e35\\u0e48\\u0e22"
4632         "\\u0e27\\u0e02\\u0e49\\u0e2d\\u0e07\\u0e01\\u0e31\\u0e1a\\u0e40\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e02\\u0e2d"
4633         "\\u0e07\\u0e15\\u0e31\\u0e27\\u0e40\\u0e25\\u0e02. \\u0e04\\u0e2d\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d"
4634         "\\u0e23\\u0e4c\\u0e08\\u0e31\\u0e14\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29"
4635         "\\u0e23\\u0e41\\u0e25\\u0e30\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30\\u0e2d\\u0e37\\u0e48\\u0e19\\u0e46 \\u0e42"
4636         "\\u0e14\\u0e22\\u0e01\\u0e32\\u0e23\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25"
4637         "\\u0e02\\u0e43\\u0e2b\\u0e49\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e41\\u0e15\\u0e48\\u0e25\\u0e30\\u0e15"
4638         "\\u0e31\\u0e27. \\u0e01\\u0e48\\u0e2d\\u0e19\\u0e2b\\u0e19\\u0e49\\u0e32\\u0e17\\u0e35\\u0e48\\u0e4a Unicode \\u0e08"
4639         "\\u0e30\\u0e16\\u0e39\\u0e01\\u0e2a\\u0e23\\u0e49\\u0e32\\u0e07\\u0e02\\u0e36\\u0e49\\u0e19, \\u0e44\\u0e14\\u0e49"
4640         "\\u0e21\\u0e35\\u0e23\\u0e30\\u0e1a\\u0e1a encoding \\u0e2d\\u0e22\\u0e39\\u0e48\\u0e2b\\u0e25\\u0e32\\u0e22\\u0e23"
4641         "\\u0e49\\u0e2d\\u0e22\\u0e23\\u0e30\\u0e1a\\u0e1a\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e01\\u0e32\\u0e23"
4642         "\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25\\u0e02\\u0e40\\u0e2b\\u0e25\\u0e48"
4643         "\\u0e32\\u0e19\\u0e35\\u0e49. \\u0e44\\u0e21\\u0e48\\u0e21\\u0e35 encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48"
4644         "\\u0e21\\u0e35\\u0e08\\u0e33\\u0e19\\u0e27\\u0e19\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30"
4645         "\\u0e21\\u0e32\\u0e01\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d: \\u0e22\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d"
4646         "\\u0e22\\u0e48\\u0e32\\u0e07\\u0e40\\u0e0a\\u0e48\\u0e19, \\u0e40\\u0e09\\u0e1e\\u0e32\\u0e30\\u0e43\\u0e19\\u0e01"
4647         "\\u0e25\\u0e38\\u0e48\\u0e21\\u0e2a\\u0e2b\\u0e20\\u0e32\\u0e1e\\u0e22\\u0e38\\u0e42\\u0e23\\u0e1b\\u0e40\\u0e1e"
4648         "\\u0e35\\u0e22\\u0e07\\u0e41\\u0e2b\\u0e48\\u0e07\\u0e40\\u0e14\\u0e35\\u0e22\\u0e27 \\u0e01\\u0e47\\u0e15\\u0e49"
4649         "\\u0e2d\\u0e07\\u0e01\\u0e32\\u0e23\\u0e2b\\u0e25\\u0e32\\u0e22 encoding \\u0e43\\u0e19\\u0e01\\u0e32\\u0e23\\u0e04"
4650         "\\u0e23\\u0e2d\\u0e1a\\u0e04\\u0e25\\u0e38\\u0e21\\u0e17\\u0e38\\u0e01\\u0e20\\u0e32\\u0e29\\u0e32\\u0e43\\u0e19"
4651         "\\u0e01\\u0e25\\u0e38\\u0e48\\u0e21. \\u0e2b\\u0e23\\u0e37\\u0e2d\\u0e41\\u0e21\\u0e49\\u0e41\\u0e15\\u0e48\\u0e43"
4652         "\\u0e19\\u0e20\\u0e32\\u0e29\\u0e32\\u0e40\\u0e14\\u0e35\\u0e48\\u0e22\\u0e27 \\u0e40\\u0e0a\\u0e48\\u0e19 \\u0e20"
4653         "\\u0e32\\u0e29\\u0e32\\u0e2d\\u0e31\\u0e07\\u0e01\\u0e24\\u0e29 \\u0e01\\u0e47\\u0e44\\u0e21\\u0e48\\u0e21\\u0e35"
4654         " encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d\\u0e2a\\u0e33\\u0e2b"
4655         "\\u0e23\\u0e31\\u0e1a\\u0e17\\u0e38\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29\\u0e23, \\u0e40\\u0e04"
4656         "\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e27\\u0e23\\u0e23\\u0e04\\u0e15\\u0e2d\\u0e19"
4657         " \\u0e41\\u0e25\\u0e30\\u0e2a\\u0e31\\u0e0d\\u0e25\\u0e31\\u0e01\\u0e29\\u0e13\\u0e4c\\u0e17\\u0e32\\u0e07\\u0e40"
4658         "\\u0e17\\u0e04\\u0e19\\u0e34\\u0e04\\u0e17\\u0e35\\u0e48\\u0e43\\u0e0a\\u0e49\\u0e01\\u0e31\\u0e19\\u0e2d\\u0e22"
4659         "\\u0e39\\u0e48\\u0e17\\u0e31\\u0e48\\u0e27\\u0e44\\u0e1b.";
4660
4661     const char *latinText =
4662         "doy ph\\u1ee5\\u0304\\u0302n \\u1e6d\\u0304h\\u0101n l\\u00e6\\u0302w, khxmphiwtexr\\u0312 ca ke\\u012b\\u0300"
4663         "ywk\\u0304\\u0125xng k\\u1ea1b re\\u1ee5\\u0304\\u0300xng k\\u0304hxng t\\u1ea1wlek\\u0304h. khxmphiwtexr"
4664         "\\u0312 c\\u1ea1d k\\u0115b t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r l\\u00e6a x\\u1ea1kk\\u0304h ra x\\u1ee5\\u0304"
4665         "\\u0300n\\u00ab doy k\\u0101r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304\\u0131\\u0302 s\\u0304"
4666         "\\u1ea3h\\u0304r\\u1ea1b t\\u00e6\\u0300la t\\u1ea1w. k\\u0300xn h\\u0304n\\u0302\\u0101 th\\u012b\\u0300\\u0301"
4667         " Unicode ca t\\u0304h\\u016bk s\\u0304r\\u0302\\u0101ng k\\u0304h\\u1ee5\\u0302n, d\\u1ecb\\u0302 m\\u012b "
4668         "rabb encoding xy\\u016b\\u0300 h\\u0304l\\u0101y r\\u0302xy rabb s\\u0304\\u1ea3h\\u0304r\\u1ea1b k\\u0101"
4669         "r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304el\\u0300\\u0101 n\\u012b\\u0302. m\\u1ecb\\u0300m"
4670         "\\u012b encoding d\\u0131 th\\u012b\\u0300 m\\u012b c\\u1ea3nwn t\\u1ea1w x\\u1ea1kk\\u0304hra m\\u0101k p"
4671         "he\\u012byng phx: yk t\\u1ea1wx\\u1ef3\\u0101ng ch\\u00e8n, c\\u0304heph\\u0101a n\\u0131 kl\\u00f9m s\\u0304"
4672         "h\\u0304p\\u0323h\\u0101ph yurop phe\\u012byng h\\u0304\\u00e6\\u0300ng de\\u012byw k\\u0306 t\\u0302xngk\\u0101"
4673         "r h\\u0304l\\u0101y encoding n\\u0131 k\\u0101r khrxbkhlum thuk p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 n\\u0131"
4674         " kl\\u00f9m. h\\u0304r\\u1ee5\\u0304x m\\u00e6\\u0302t\\u00e6\\u0300 n\\u0131 p\\u0323h\\u0101s\\u0304\\u02b9"
4675         "\\u0101 de\\u012b\\u0300yw ch\\u00e8n p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 x\\u1ea1ngkvs\\u0304\\u02b9 k\\u0306"
4676         " m\\u1ecb\\u0300m\\u012b encoding d\\u0131 th\\u012b\\u0300 phe\\u012byng phx s\\u0304\\u1ea3h\\u0304r\\u1ea1"
4677         "b thuk t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r, kher\\u1ee5\\u0304\\u0300xngh\\u0304m\\u0101y wrrkh txn l\\u00e6"
4678         "a s\\u0304\\u1ea1\\u1ef5l\\u1ea1ks\\u0304\\u02b9\\u1e47\\u0312 th\\u0101ng thekhnikh th\\u012b\\u0300 ch\\u0131"
4679         "\\u0302 k\\u1ea1n xy\\u016b\\u0300 th\\u1ea1\\u0300wp\\u1ecb.";
4680
4681
4682     UnicodeString  xlitText(thaiText);
4683     xlitText = xlitText.unescape();
4684     tr->transliterate(xlitText);
4685
4686     UnicodeString expectedText(latinText);
4687     expectedText = expectedText.unescape();
4688     expect(*tr, xlitText, expectedText);
4689
4690     delete tr;
4691 #endif
4692 }
4693
4694 /**
4695  * Test for rdar://problem/61817095 (and maybe eventually other Hans-Hant errors)
4696  * Apple-only
4697  * ICU4C only
4698  */
4699 void TransliteratorTest::TestHansHant(void) {
4700     UParseError parseError;
4701     UErrorCode status = U_ZERO_ERROR;
4702     Transliterator* tr = Transliterator::createInstance("Hans-Hant", UTRANS_FORWARD, parseError, status);
4703     if (U_FAILURE(status)) {
4704         errln("FAIL: createInstance failed with %s", u_errorName(status));
4705         return;
4706     }
4707
4708     const char* _sourceText =     "\\u810f \\u5185\\u810f \\u810f\\u5668 \\u4e94\\u810f \\u5fc3\\u810f \\u809d\\u810f \\u813e\\u810f \\u80c3\\u810f \\u80be\\u810f \\u80f0\\u810f \\u810f\\u8151 \\u80ba\\u810f";
4709     const char* _expectedResult = "\\u9ad2 \\u5167\\u81df \\u81df\\u5668 \\u4e94\\u81df \\u5fc3\\u81df \\u809d\\u81df \\u813e\\u81df \\u80c3\\u81df \\u814e\\u81df \\u80f0\\u81df \\u81df\\u8151 \\u80ba\\u81df";
4710
4711     UnicodeString sourceText(_sourceText);
4712     UnicodeString expectedResult(_expectedResult);
4713     sourceText = sourceText.unescape();
4714     expectedResult = expectedResult.unescape();
4715
4716     expect(*tr, sourceText, expectedResult);
4717     delete tr;
4718 }
4719
4720
4721 //======================================================================
4722 // Support methods
4723 //======================================================================
4724 void TransliteratorTest::expectT(const UnicodeString& id,
4725                                  const UnicodeString& source,
4726                                  const UnicodeString& expectedResult) {
4727     UErrorCode ec = U_ZERO_ERROR;
4728     UParseError pe;
4729     Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
4730     if (U_FAILURE(ec)) {
4731         errln((UnicodeString)"FAIL: Could not create " + id + " -  " + u_errorName(ec));
4732         delete t;
4733         return;
4734     }
4735     expect(*t, source, expectedResult);
4736     delete t;
4737 }
4738
4739 void TransliteratorTest::reportParseError(const UnicodeString& message,
4740                                           const UParseError& parseError,
4741                                           const UErrorCode& status) {
4742     dataerrln(message +
4743           /*", parse error " + parseError.code +*/
4744           ", line " + parseError.line +
4745           ", offset " + parseError.offset +
4746           ", pre-context " + prettify(parseError.preContext, TRUE) +
4747           ", post-context " + prettify(parseError.postContext,TRUE) +
4748           ", Error: " + u_errorName(status));
4749 }
4750
4751 void TransliteratorTest::expect(const UnicodeString& rules,
4752                                 const UnicodeString& source,
4753                                 const UnicodeString& expectedResult,
4754                                 UTransPosition *pos) {
4755     expect("<ID>", rules, source, expectedResult, pos);
4756 }
4757
4758 void TransliteratorTest::expect(const UnicodeString& id,
4759                                 const UnicodeString& rules,
4760                                 const UnicodeString& source,
4761                                 const UnicodeString& expectedResult,
4762                                 UTransPosition *pos) {
4763     UErrorCode status = U_ZERO_ERROR;
4764     UParseError parseError;
4765     Transliterator* t = Transliterator::createFromRules(id, rules, UTRANS_FORWARD, parseError, status);
4766     if (U_FAILURE(status)) {
4767         reportParseError(UnicodeString("Couldn't create transliterator from ") + rules, parseError, status);
4768     } else {
4769         expect(*t, source, expectedResult, pos);
4770     }
4771     delete t;
4772 }
4773
4774 void TransliteratorTest::expect(const Transliterator& t,
4775                                 const UnicodeString& source,
4776                                 const UnicodeString& expectedResult,
4777                                 const Transliterator& reverseTransliterator) {
4778     expect(t, source, expectedResult);
4779     expect(reverseTransliterator, expectedResult, source);
4780 }
4781
4782 void TransliteratorTest::expect(const Transliterator& t,
4783                                 const UnicodeString& source,
4784                                 const UnicodeString& expectedResult,
4785                                 UTransPosition *pos) {
4786     if (pos == 0) {
4787         UnicodeString result(source);
4788         t.transliterate(result);
4789         expectAux(t.getID() + ":String", source, result, expectedResult);
4790     }
4791     UTransPosition index={0, 0, 0, 0};
4792     if (pos != 0) {
4793         index = *pos;
4794     }
4795
4796     UnicodeString rsource(source);
4797     if (pos == 0) {
4798         t.transliterate(rsource);
4799     } else {
4800         // Do it all at once -- below we do it incrementally
4801         t.finishTransliteration(rsource, *pos);
4802     }
4803     expectAux(t.getID() + ":Replaceable", source, rsource, expectedResult);
4804
4805     // Test keyboard (incremental) transliteration -- this result
4806     // must be the same after we finalize (see below).
4807     UnicodeString log;
4808     rsource.remove();
4809     if (pos != 0) {
4810         rsource = source;
4811         formatInput(log, rsource, index);
4812         log.append(" -> ");
4813         UErrorCode status = U_ZERO_ERROR;
4814         t.transliterate(rsource, index, status);
4815         formatInput(log, rsource, index);
4816     } else {
4817         for (int32_t i=0; i<source.length(); ++i) {
4818             if (i != 0) {
4819                 log.append(" + ");
4820             }
4821             log.append(source.charAt(i)).append(" -> ");
4822             UErrorCode status = U_ZERO_ERROR;
4823             t.transliterate(rsource, index, source.charAt(i), status);
4824             formatInput(log, rsource, index);
4825         }
4826     }
4827
4828     // As a final step in keyboard transliteration, we must call
4829     // transliterate to finish off any pending partial matches that
4830     // were waiting for more input.
4831     t.finishTransliteration(rsource, index);
4832     log.append(" => ").append(rsource);
4833
4834     expectAux(t.getID() + ":Keyboard", log,
4835               rsource == expectedResult,
4836               expectedResult);
4837 }
4838
4839
4840 /**
4841  * @param appendTo result is appended to this param.
4842  * @param input the string being transliterated
4843  * @param pos the index struct
4844  */
4845 UnicodeString& TransliteratorTest::formatInput(UnicodeString &appendTo,
4846                                                const UnicodeString& input,
4847                                                const UTransPosition& pos) {
4848     // Output a string of the form aaa{bbb|ccc|ddd}eee, where
4849     // the {} indicate the context start and limit, and the ||
4850     // indicate the start and limit.
4851     if (0 <= pos.contextStart &&
4852         pos.contextStart <= pos.start &&
4853         pos.start <= pos.limit &&
4854         pos.limit <= pos.contextLimit &&
4855         pos.contextLimit <= input.length()) {
4856
4857         UnicodeString a, b, c, d, e;
4858         input.extractBetween(0, pos.contextStart, a);
4859         input.extractBetween(pos.contextStart, pos.start, b);
4860         input.extractBetween(pos.start, pos.limit, c);
4861         input.extractBetween(pos.limit, pos.contextLimit, d);
4862         input.extractBetween(pos.contextLimit, input.length(), e);
4863         appendTo.append(a).append((UChar)123/*{*/).append(b).
4864             append((UChar)PIPE).append(c).append((UChar)PIPE).append(d).
4865             append((UChar)125/*}*/).append(e);
4866     } else {
4867         appendTo.append((UnicodeString)"INVALID UTransPosition {cs=" +
4868                         pos.contextStart + ", s=" + pos.start + ", l=" +
4869                         pos.limit + ", cl=" + pos.contextLimit + "} on " +
4870                         input);
4871     }
4872     return appendTo;
4873 }
4874
4875 void TransliteratorTest::expectAux(const UnicodeString& tag,
4876                                    const UnicodeString& source,
4877                                    const UnicodeString& result,
4878                                    const UnicodeString& expectedResult) {
4879     expectAux(tag, source + " -> " + result,
4880               result == expectedResult,
4881               expectedResult);
4882 }
4883
4884 void TransliteratorTest::expectAux(const UnicodeString& tag,
4885                                    const UnicodeString& summary, UBool pass,
4886                                    const UnicodeString& expectedResult) {
4887     if (pass) {
4888         logln(UnicodeString("(")+tag+") " + prettify(summary));
4889     } else {
4890         dataerrln(UnicodeString("FAIL: (")+tag+") "
4891               + prettify(summary)
4892               + ", expected " + prettify(expectedResult));
4893     }
4894 }
4895
4896 #endif /* #if !UCONFIG_NO_TRANSLITERATION */