icuSources/test/intltest/transtst.cpp

   1 /*
   2 **********************************************************************
   3 *   Copyright (C) 1999-2011, International Business Machines
   4 *   Corporation and others.  All Rights Reserved.
   5 **********************************************************************
   6 *   Date        Name        Description
   7 *   11/10/99    aliu        Creation.
   8 **********************************************************************
   9 */
  10
  11 #include "unicode/utypes.h"
  12
  13 #if !UCONFIG_NO_TRANSLITERATION
  14
  15 #include "transtst.h"
  16 #include "unicode/locid.h"
  17 #include "unicode/dtfmtsym.h"
  18 #include "unicode/normlzr.h"
  19 #include "unicode/translit.h"
  20 #include "unicode/uchar.h"
  21 #include "unicode/unifilt.h"
  22 #include "unicode/uniset.h"
  23 #include "unicode/ustring.h"
  24 #include "unicode/usetiter.h"
  25 #include "unicode/uscript.h"
  26 #include "unicode/utf16.h"
  27 #include "cpdtrans.h"
  28 #include "nultrans.h"
  29 #include "rbt.h"
  30 #include "rbt_pars.h"
  31 #include "anytrans.h"
  32 #include "esctrn.h"
  33 #include "name2uni.h"
  34 #include "nortrans.h"
  35 #include "remtrans.h"
  36 #include "titletrn.h"
  37 #include "tolowtrn.h"
  38 #include "toupptrn.h"
  39 #include "unesctrn.h"
  40 #include "uni2name.h"
  41 #include "cstring.h"
  42 #include "cmemory.h"
  43 #include <stdio.h>
  44
  45 /***********************************************************************
  46
  47                      HOW TO USE THIS TEST FILE
  48                                -or-
  49                   How I developed on two platforms
  50                 without losing (too much of) my mind
  51
  52
  53 1. Add new tests by copying/pasting/changing existing tests.  On Java,
  54    any public void method named Test...() taking no parameters becomes
  55    a test.  On C++, you need to modify the header and add a line to
  56    the runIndexedTest() dispatch method.
  57
  58 2. Make liberal use of the expect() method; it is your friend.
  59
  60 3. The tests in this file exactly match those in a sister file on the
  61    other side.  The two files are:
  62
  63    icu4j:  src/com/ibm/test/translit/TransliteratorTest.java
  64    icu4c:  source/test/intltest/transtst.cpp
  65
  66                   ==> THIS IS THE IMPORTANT PART <==
  67
  68    When you add a test in this file, add it in TransliteratorTest.java
  69    too.  Give it the same name and put it in the same relative place.
  70    This makes maintenance a lot simpler for any poor soul who ends up
  71    trying to synchronize the tests between icu4j and icu4c.
  72
  73 4. If you MUST enter a test that is NOT paralleled in the sister file,
  74    then add it in the special non-mirrored section.  These are
  75    labeled
  76
  77      "icu4j ONLY"
  78
  79    or
  80
  81      "icu4c ONLY"
  82
  83    Make sure you document the reason the test is here and not there.
  84
  85
  86 Thank you.
  87 The Management
  88 ***********************************************************************/
  89
  90 // Define character constants thusly to be EBCDIC-friendly
  91 enum {
  92     LEFT_BRACE=((UChar)0x007B), /*{*/
  93     PIPE      =((UChar)0x007C), /*|*/
  94     ZERO      =((UChar)0x0030), /*0*/
  95     UPPER_A   =((UChar)0x0041)  /*A*/
  96 };
  97
  98 TransliteratorTest::TransliteratorTest()
  99 :   DESERET_DEE((UChar32)0x10414),
 100     DESERET_dee((UChar32)0x1043C)
 101 {
 102 }
 103
 104 TransliteratorTest::~TransliteratorTest() {}
 105
 106 void
 107 TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
 108                                    const char* &name, char* /*par*/) {
 109     switch (index) {
 110         TESTCASE(0,TestInstantiation);
 111         TESTCASE(1,TestSimpleRules);
 112         TESTCASE(2,TestRuleBasedInverse);
 113         TESTCASE(3,TestKeyboard);
 114         TESTCASE(4,TestKeyboard2);
 115         TESTCASE(5,TestKeyboard3);
 116         TESTCASE(6,TestArabic);
 117         TESTCASE(7,TestCompoundKana);
 118         TESTCASE(8,TestCompoundHex);
 119         TESTCASE(9,TestFiltering);
 120         TESTCASE(10,TestInlineSet);
 121         TESTCASE(11,TestPatternQuoting);
 122         TESTCASE(12,TestJ277);
 123         TESTCASE(13,TestJ243);
 124         TESTCASE(14,TestJ329);
 125         TESTCASE(15,TestSegments);
 126         TESTCASE(16,TestCursorOffset);
 127         TESTCASE(17,TestArbitraryVariableValues);
 128         TESTCASE(18,TestPositionHandling);
 129         TESTCASE(19,TestHiraganaKatakana);
 130         TESTCASE(20,TestCopyJ476);
 131         TESTCASE(21,TestAnchors);
 132         TESTCASE(22,TestInterIndic);
 133         TESTCASE(23,TestFilterIDs);
 134         TESTCASE(24,TestCaseMap);
 135         TESTCASE(25,TestNameMap);
 136         TESTCASE(26,TestLiberalizedID);
 137         TESTCASE(27,TestCreateInstance);
 138         TESTCASE(28,TestNormalizationTransliterator);
 139         TESTCASE(29,TestCompoundRBT);
 140         TESTCASE(30,TestCompoundFilter);
 141         TESTCASE(31,TestRemove);
 142         TESTCASE(32,TestToRules);
 143         TESTCASE(33,TestContext);
 144         TESTCASE(34,TestSupplemental);
 145         TESTCASE(35,TestQuantifier);
 146         TESTCASE(36,TestSTV);
 147         TESTCASE(37,TestCompoundInverse);
 148         TESTCASE(38,TestNFDChainRBT);
 149         TESTCASE(39,TestNullInverse);
 150         TESTCASE(40,TestAliasInverseID);
 151         TESTCASE(41,TestCompoundInverseID);
 152         TESTCASE(42,TestUndefinedVariable);
 153         TESTCASE(43,TestEmptyContext);
 154         TESTCASE(44,TestCompoundFilterID);
 155         TESTCASE(45,TestPropertySet);
 156         TESTCASE(46,TestNewEngine);
 157         TESTCASE(47,TestQuantifiedSegment);
 158         TESTCASE(48,TestDevanagariLatinRT);
 159         TESTCASE(49,TestTeluguLatinRT);
 160         TESTCASE(50,TestCompoundLatinRT);
 161         TESTCASE(51,TestSanskritLatinRT);
 162         TESTCASE(52,TestLocaleInstantiation);
 163         TESTCASE(53,TestTitleAccents);
 164         TESTCASE(54,TestLocaleResource);
 165         TESTCASE(55,TestParseError);
 166         TESTCASE(56,TestOutputSet);
 167         TESTCASE(57,TestVariableRange);
 168         TESTCASE(58,TestInvalidPostContext);
 169         TESTCASE(59,TestIDForms);
 170         TESTCASE(60,TestToRulesMark);
 171         TESTCASE(61,TestEscape);
 172         TESTCASE(62,TestAnchorMasking);
 173         TESTCASE(63,TestDisplayName);
 174         TESTCASE(64,TestSpecialCases);
 175 #if !UCONFIG_NO_FILE_IO
 176         TESTCASE(65,TestIncrementalProgress);
 177 #endif
 178         TESTCASE(66,TestSurrogateCasing);
 179         TESTCASE(67,TestFunction);
 180         TESTCASE(68,TestInvalidBackRef);
 181         TESTCASE(69,TestMulticharStringSet);
 182         TESTCASE(70,TestUserFunction);
 183         TESTCASE(71,TestAnyX);
 184         TESTCASE(72,TestSourceTargetSet);
 185         TESTCASE(73,TestGurmukhiDevanagari);
 186         TESTCASE(74,TestPatternWhiteSpace);
 187         TESTCASE(75,TestAllCodepoints);
 188         TESTCASE(76,TestBoilerplate);
 189         TESTCASE(77,TestAlternateSyntax);
 190         TESTCASE(78,TestBeginEnd);
 191         TESTCASE(79,TestBeginEndToRules);
 192         TESTCASE(80,TestRegisterAlias);
 193         TESTCASE(81,TestRuleStripping);
 194         TESTCASE(82,TestHalfwidthFullwidth);
 195         TESTCASE(83,TestThai);
 196         TESTCASE(84,TestAny);
 197         default: name = ""; break;
 198     }
 199 }
 200
 201 static const UVersionInfo ICU_39 = {3,9,4,0};
 202 /**
 203  * Make sure every system transliterator can be instantiated.
 204  *
 205  * ALSO test that the result of toRules() for each rule is a valid
 206  * rule.  Do this here so we don't have to have another test that
 207  * instantiates everything as well.
 208  */
 209 void TransliteratorTest::TestInstantiation() {
 210     UErrorCode ec = U_ZERO_ERROR;
 211     StringEnumeration* avail = Transliterator::getAvailableIDs(ec);
 212     assertSuccess("getAvailableIDs()", ec);
 213     assertTrue("getAvailableIDs()!=NULL", avail!=NULL);
 214     int32_t n = Transliterator::countAvailableIDs();
 215     assertTrue("getAvailableIDs().count()==countAvailableIDs()",
 216                avail->count(ec) == n);
 217     assertSuccess("count()", ec);
 218     UnicodeString name;
 219     for (int32_t i=0; i<n; ++i) {
 220         const UnicodeString& id = *avail->snext(ec);
 221         if (!assertSuccess("snext()", ec) ||
 222             !assertTrue("snext()!=NULL", (&id)!=NULL, TRUE)) {
 223             break;
 224         }
 225         UnicodeString id2 = Transliterator::getAvailableID(i);
 226         if (id.length() < 1) {
 227             errln(UnicodeString("FAIL: getAvailableID(") +
 228                   i + ") returned empty string");
 229             continue;
 230         }
 231         if (id != id2) {
 232             errln(UnicodeString("FAIL: getAvailableID(") +
 233                   i + ") != getAvailableIDs().snext()");
 234             continue;
 235         }
 236         UParseError parseError;
 237         UErrorCode status = U_ZERO_ERROR;
 238         Transliterator* t = Transliterator::createInstance(id,
 239                               UTRANS_FORWARD, parseError,status);
 240         name.truncate(0);
 241         Transliterator::getDisplayName(id, name);
 242         if (t == 0) {
 243 #if UCONFIG_NO_BREAK_ITERATION
 244             // If UCONFIG_NO_BREAK_ITERATION is on, then only Thai should fail.
 245             if (id.compare((UnicodeString)"Thai-Latin") != 0)
 246 #endif
 247                 dataerrln(UnicodeString("FAIL: Couldn't create ") + id +
 248                       /*", parse error " + parseError.code +*/
 249                       ", line " + parseError.line +
 250                       ", offset " + parseError.offset +
 251                       ", pre-context " + prettify(parseError.preContext, TRUE) +
 252                       ", post-context " +prettify(parseError.postContext,TRUE) +
 253                       ", Error: " + u_errorName(status));
 254                 // When createInstance fails, it deletes the failing
 255                 // entry from the available ID list.  We detect this
 256                 // here by looking for a change in countAvailableIDs.
 257             int32_t nn = Transliterator::countAvailableIDs();
 258             if (nn == (n - 1)) {
 259                 n = nn;
 260                 --i; // Compensate for deleted entry
 261             }
 262         } else {
 263             logln(UnicodeString("OK: ") + name + " (" + id + ")");
 264
 265             // Now test toRules
 266             UnicodeString rules;
 267             t->toRules(rules, TRUE);
 268             Transliterator *u = Transliterator::createFromRules("x",
 269                                     rules, UTRANS_FORWARD, parseError,status);
 270             if (u == 0) {
 271                 errln(UnicodeString("FAIL: ") + id +
 272                       ".createFromRules() => bad rules" +
 273                       /*", parse error " + parseError.code +*/
 274                       ", line " + parseError.line +
 275                       ", offset " + parseError.offset +
 276                       ", context " + prettify(parseError.preContext, TRUE) +
 277                       ", rules: " + prettify(rules, TRUE));
 278             } else {
 279                 delete u;
 280             }
 281             delete t;
 282         }
 283     }
 284     assertTrue("snext()==NULL", avail->snext(ec)==NULL);
 285     assertSuccess("snext()", ec);
 286     delete avail;
 287
 288     // Now test the failure path
 289     UParseError parseError;
 290     UErrorCode status = U_ZERO_ERROR;
 291     UnicodeString id("<Not a valid Transliterator ID>");
 292     Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
 293     if (t != 0) {
 294         errln("FAIL: " + id + " returned a transliterator");
 295         delete t;
 296     } else {
 297         logln("OK: Bogus ID handled properly");
 298     }
 299 }
 300
 301 void TransliteratorTest::TestSimpleRules(void) {
 302     /* Example: rules 1. ab>x|y
 303      *                2. yc>z
 304      *
 305      * []|eabcd  start - no match, copy e to tranlated buffer
 306      * [e]|abcd  match rule 1 - copy output & adjust cursor
 307      * [ex|y]cd  match rule 2 - copy output & adjust cursor
 308      * [exz]|d   no match, copy d to transliterated buffer
 309      * [exzd]|   done
 310      */
 311     expect(UnicodeString("ab>x|y;", "") +
 312            "yc>z",
 313            "eabcd", "exzd");
 314
 315     /* Another set of rules:
 316      *    1. ab>x|yzacw
 317      *    2. za>q
 318      *    3. qc>r
 319      *    4. cw>n
 320      *
 321      * []|ab       Rule 1
 322      * [x|yzacw]   No match
 323      * [xy|zacw]   Rule 2
 324      * [xyq|cw]    Rule 4
 325      * [xyqn]|     Done
 326      */
 327     expect(UnicodeString("ab>x|yzacw;") +
 328            "za>q;" +
 329            "qc>r;" +
 330            "cw>n",
 331            "ab", "xyqn");
 332
 333     /* Test categories
 334      */
 335     UErrorCode status = U_ZERO_ERROR;
 336     UParseError parseError;
 337     Transliterator *t = Transliterator::createFromRules(
 338         "<ID>",
 339         UnicodeString("$dummy=").append((UChar)0xE100) +
 340         UnicodeString(";"
 341                       "$vowel=[aeiouAEIOU];"
 342                       "$lu=[:Lu:];"
 343                       "$vowel } $lu > '!';"
 344                       "$vowel > '&';"
 345                       "'!' { $lu > '^';"
 346                       "$lu > '*';"
 347                       "a > ERROR", ""),
 348         UTRANS_FORWARD, parseError,
 349         status);
 350     if (U_FAILURE(status)) {
 351         dataerrln("FAIL: RBT constructor failed - %s", u_errorName(status));
 352         return;
 353     }
 354     expect(*t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
 355     delete t;
 356 }
 357
 358 /**
 359  * Test inline set syntax and set variable syntax.
 360  */
 361 void TransliteratorTest::TestInlineSet(void) {
 362     expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz");
 363     expect("a[0-9]b > qrs", "1a7b9", "1qrs9");
 364
 365     expect(UnicodeString(
 366            "$digit = [0-9];"
 367            "$alpha = [a-zA-Z];"
 368            "$alphanumeric = [$digit $alpha];" // ***
 369            "$special = [^$alphanumeric];"     // ***
 370            "$alphanumeric > '-';"
 371            "$special > '*';", ""),
 372
 373            "thx-1138", "---*----");
 374 }
 375
 376 /**
 377  * Create some inverses and confirm that they work.  We have to be
 378  * careful how we do this, since the inverses will not be true
 379  * inverses -- we can't throw any random string at the composition
 380  * of the transliterators and expect the identity function.  F x
 381  * F' != I.  However, if we are careful about the input, we will
 382  * get the expected results.
 383  */
 384 void TransliteratorTest::TestRuleBasedInverse(void) {
 385     UnicodeString RULES =
 386         UnicodeString("abc>zyx;") +
 387         "ab>yz;" +
 388         "bc>zx;" +
 389         "ca>xy;" +
 390         "a>x;" +
 391         "b>y;" +
 392         "c>z;" +
 393
 394         "abc<zyx;" +
 395         "ab<yz;" +
 396         "bc<zx;" +
 397         "ca<xy;" +
 398         "a<x;" +
 399         "b<y;" +
 400         "c<z;" +
 401
 402         "";
 403
 404     const char* DATA[] = {
 405         // Careful here -- random strings will not work.  If we keep
 406         // the left side to the domain and the right side to the range
 407         // we will be okay though (left, abc; right xyz).
 408         "a", "x",
 409         "abcacab", "zyxxxyy",
 410         "caccb", "xyzzy",
 411     };
 412
 413     int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
 414
 415     UErrorCode status = U_ZERO_ERROR;
 416     UParseError parseError;
 417     Transliterator *fwd = Transliterator::createFromRules("<ID>", RULES,
 418                                 UTRANS_FORWARD, parseError, status);
 419     Transliterator *rev = Transliterator::createFromRules("<ID>", RULES,
 420                                 UTRANS_REVERSE, parseError, status);
 421     if (U_FAILURE(status)) {
 422         errln("FAIL: RBT constructor failed");
 423         return;
 424     }
 425     for (int32_t i=0; i<DATA_length; i+=2) {
 426         expect(*fwd, DATA[i], DATA[i+1]);
 427         expect(*rev, DATA[i+1], DATA[i]);
 428     }
 429     delete fwd;
 430     delete rev;
 431 }
 432
 433 /**
 434  * Basic test of keyboard.
 435  */
 436 void TransliteratorTest::TestKeyboard(void) {
 437     UParseError parseError;
 438     UErrorCode status = U_ZERO_ERROR;
 439     Transliterator *t = Transliterator::createFromRules("<ID>",
 440                               UnicodeString("psch>Y;")
 441                               +"ps>y;"
 442                               +"ch>x;"
 443                               +"a>A;",
 444                               UTRANS_FORWARD, parseError,
 445                               status);
 446     if (U_FAILURE(status)) {
 447         errln("FAIL: RBT constructor failed");
 448         return;
 449     }
 450     const char* DATA[] = {
 451         // insertion, buffer
 452         "a", "A",
 453         "p", "Ap",
 454         "s", "Aps",
 455         "c", "Apsc",
 456         "a", "AycA",
 457         "psch", "AycAY",
 458         0, "AycAY", // null means finishKeyboardTransliteration
 459     };
 460
 461     keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
 462     delete t;
 463 }
 464
 465 /**
 466  * Basic test of keyboard with cursor.
 467  */
 468 void TransliteratorTest::TestKeyboard2(void) {
 469     UParseError parseError;
 470     UErrorCode status = U_ZERO_ERROR;
 471     Transliterator *t = Transliterator::createFromRules("<ID>",
 472                               UnicodeString("ych>Y;")
 473                               +"ps>|y;"
 474                               +"ch>x;"
 475                               +"a>A;",
 476                               UTRANS_FORWARD, parseError,
 477                               status);
 478     if (U_FAILURE(status)) {
 479         errln("FAIL: RBT constructor failed");
 480         return;
 481     }
 482     const char* DATA[] = {
 483         // insertion, buffer
 484         "a", "A",
 485         "p", "Ap",
 486         "s", "Aps", // modified for rollback - "Ay",
 487         "c", "Apsc", // modified for rollback - "Ayc",
 488         "a", "AycA",
 489         "p", "AycAp",
 490         "s", "AycAps", // modified for rollback - "AycAy",
 491         "c", "AycApsc", // modified for rollback - "AycAyc",
 492         "h", "AycAY",
 493         0, "AycAY", // null means finishKeyboardTransliteration
 494     };
 495
 496     keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
 497     delete t;
 498 }
 499
 500 /**
 501  * Test keyboard transliteration with back-replacement.
 502  */
 503 void TransliteratorTest::TestKeyboard3(void) {
 504     // We want th>z but t>y.  Furthermore, during keyboard
 505     // transliteration we want t>y then yh>z if t, then h are
 506     // typed.
 507     UnicodeString RULES("t>|y;"
 508                         "yh>z;");
 509
 510     const char* DATA[] = {
 511         // Column 1: characters to add to buffer (as if typed)
 512         // Column 2: expected appearance of buffer after
 513         //           keyboard xliteration.
 514         "a", "a",
 515         "b", "ab",
 516         "t", "abt", // modified for rollback - "aby",
 517         "c", "abyc",
 518         "t", "abyct", // modified for rollback - "abycy",
 519         "h", "abycz",
 520         0, "abycz", // null means finishKeyboardTransliteration
 521     };
 522
 523     UParseError parseError;
 524     UErrorCode status = U_ZERO_ERROR;
 525     Transliterator *t = Transliterator::createFromRules("<ID>", RULES, UTRANS_FORWARD, parseError, status);
 526     if (U_FAILURE(status)) {
 527         errln("FAIL: RBT constructor failed");
 528         return;
 529     }
 530     keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
 531     delete t;
 532 }
 533
 534 void TransliteratorTest::keyboardAux(const Transliterator& t,
 535                                      const char* DATA[], int32_t DATA_length) {
 536     UErrorCode status = U_ZERO_ERROR;
 537     UTransPosition index={0, 0, 0, 0};
 538     UnicodeString s;
 539     for (int32_t i=0; i<DATA_length; i+=2) {
 540         UnicodeString log;
 541         if (DATA[i] != 0) {
 542             log = s + " + "
 543                 + DATA[i]
 544                 + " -> ";
 545             t.transliterate(s, index, DATA[i], status);
 546         } else {
 547             log = s + " => ";
 548             t.finishTransliteration(s, index);
 549         }
 550         // Show the start index '{' and the cursor '|'
 551         UnicodeString a, b, c;
 552         s.extractBetween(0, index.contextStart, a);
 553         s.extractBetween(index.contextStart, index.start, b);
 554         s.extractBetween(index.start, s.length(), c);
 555         log.append(a).
 556             append((UChar)LEFT_BRACE).
 557             append(b).
 558             append((UChar)PIPE).
 559             append(c);
 560         if (s == DATA[i+1] && U_SUCCESS(status)) {
 561             logln(log);
 562         } else {
 563             errln(UnicodeString("FAIL: ") + log + ", expected " + DATA[i+1]);
 564         }
 565     }
 566 }
 567
 568 void TransliteratorTest::TestArabic(void) {
 569 // Test disabled for 2.0 until new Arabic transliterator can be written.
 570 //    /*
 571 //    const char* DATA[] = {
 572 //        "Arabic", "\u062a\u062a\u0645\u062a\u0639\u0020"+
 573 //                  "\u0627\u0644\u0644\u063a\u0629\u0020"+
 574 //                  "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629\u0020"+
 575 //                  "\u0628\u0628\u0646\u0638\u0645\u0020"+
 576 //                  "\u0643\u062a\u0627\u0628\u0628\u064a\u0629\u0020"+
 577 //                  "\u062c\u0645\u064a\u0644\u0629",
 578 //    };
 579 //    */
 580 //
 581 //    UChar ar_raw[] = {
 582 //        0x062a, 0x062a, 0x0645, 0x062a, 0x0639, 0x0020, 0x0627,
 583 //        0x0644, 0x0644, 0x063a, 0x0629, 0x0020, 0x0627, 0x0644,
 584 //        0x0639, 0x0631, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
 585 //        0x0628, 0x0628, 0x0646, 0x0638, 0x0645, 0x0020, 0x0643,
 586 //        0x062a, 0x0627, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
 587 //        0x062c, 0x0645, 0x064a, 0x0644, 0x0629, 0
 588 //    };
 589 //    UnicodeString ar(ar_raw);
 590 //    UErrorCode status=U_ZERO_ERROR;
 591 //    UParseError parseError;
 592 //    Transliterator *t = Transliterator::createInstance("Latin-Arabic", UTRANS_FORWARD, parseError, status);
 593 //    if (t == 0) {
 594 //        errln("FAIL: createInstance failed");
 595 //        return;
 596 //    }
 597 //    expect(*t, "Arabic", ar);
 598 //    delete t;
 599 }
 600
 601 /**
 602  * Compose the Kana transliterator forward and reverse and try
 603  * some strings that should come out unchanged.
 604  */
 605 void TransliteratorTest::TestCompoundKana(void) {
 606     UParseError parseError;
 607     UErrorCode status = U_ZERO_ERROR;
 608     Transliterator* t = Transliterator::createInstance("Latin-Hiragana;Hiragana-Latin", UTRANS_FORWARD, parseError, status);
 609     if (t == 0) {
 610         dataerrln("FAIL: construction of Latin-Hiragana;Hiragana-Latin failed - %s", u_errorName(status));
 611     } else {
 612         expect(*t, "aaaaa", "aaaaa");
 613         delete t;
 614     }
 615 }
 616
 617 /**
 618  * Compose the hex transliterators forward and reverse.
 619  */
 620 void TransliteratorTest::TestCompoundHex(void) {
 621     UParseError parseError;
 622     UErrorCode status = U_ZERO_ERROR;
 623     Transliterator* a = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
 624     Transliterator* b = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, parseError, status);
 625     Transliterator* transab[] = { a, b };
 626     Transliterator* transba[] = { b, a };
 627     if (a == 0 || b == 0) {
 628         errln("FAIL: construction failed");
 629         delete a;
 630         delete b;
 631         return;
 632     }
 633     // Do some basic tests of a
 634     expect(*a, "01", UnicodeString("\\u0030\\u0031", ""));
 635     // Do some basic tests of b
 636     expect(*b, UnicodeString("\\u0030\\u0031", ""), "01");
 637
 638     Transliterator* ab = new CompoundTransliterator(transab, 2);
 639     UnicodeString s("abcde", "");
 640     expect(*ab, s, s);
 641
 642     UnicodeString str(s);
 643     a->transliterate(str);
 644     Transliterator* ba = new CompoundTransliterator(transba, 2);
 645     expect(*ba, str, str);
 646
 647     delete ab;
 648     delete ba;
 649     delete a;
 650     delete b;
 651 }
 652
 653 int gTestFilterClassID = 0;
 654 /**
 655  * Used by TestFiltering().
 656  */
 657 class TestFilter : public UnicodeFilter {
 658     virtual UnicodeFunctor* clone() const {
 659         return new TestFilter(*this);
 660     }
 661     virtual UBool contains(UChar32 c) const {
 662         return c != (UChar)0x0063 /*c*/;
 663     }
 664     // Stubs
 665     virtual UnicodeString& toPattern(UnicodeString& result,
 666                                      UBool /*escapeUnprintable*/) const {
 667         return result;
 668     }
 669     virtual UBool matchesIndexValue(uint8_t /*v*/) const {
 670         return FALSE;
 671     }
 672     virtual void addMatchSetTo(UnicodeSet& /*toUnionTo*/) const {}
 673 public:
 674     UClassID getDynamicClassID() const { return (UClassID)&gTestFilterClassID; }
 675 };
 676
 677 /**
 678  * Do some basic tests of filtering.
 679  */
 680 void TransliteratorTest::TestFiltering(void) {
 681     UParseError parseError;
 682     UErrorCode status = U_ZERO_ERROR;
 683     Transliterator* hex = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
 684     if (hex == 0) {
 685         errln("FAIL: createInstance(Any-Hex) failed");
 686         return;
 687     }
 688     hex->adoptFilter(new TestFilter());
 689     UnicodeString s("abcde");
 690     hex->transliterate(s);
 691     UnicodeString exp("\\u0061\\u0062c\\u0064\\u0065", "");
 692     if (s == exp) {
 693         logln(UnicodeString("Ok:   \"") + exp + "\"");
 694     } else {
 695         logln(UnicodeString("FAIL: \"") + s + "\", wanted \"" + exp + "\"");
 696     }
 697
 698     // ICU4C ONLY. Do not find Transliterator.orphanFilter() in ICU4J.
 699     UnicodeFilter *f = hex->orphanFilter();
 700     if (f == NULL){
 701         errln("FAIL: orphanFilter() should get a UnicodeFilter");
 702     } else {
 703         delete f;
 704     }
 705     delete hex;
 706 }
 707
 708 /**
 709  * Test anchors
 710  */
 711 void TransliteratorTest::TestAnchors(void) {
 712     expect(UnicodeString("^a  > 0; a$ > 2 ; a > 1;", ""),
 713            "aaa",
 714            "012");
 715     expect(UnicodeString("$s=[z$]; $s{a>0; a}$s>2; a>1;", ""),
 716            "aaa",
 717            "012");
 718     expect(UnicodeString("^ab  > 01 ;"
 719            " ab  > |8 ;"
 720            "  b  > k ;"
 721            " 8x$ > 45 ;"
 722            " 8x  > 77 ;", ""),
 723
 724            "ababbabxabx",
 725            "018k7745");
 726     expect(UnicodeString("$s = [z$] ;"
 727            "$s{ab    > 01 ;"
 728            "   ab    > |8 ;"
 729            "    b    > k ;"
 730            "   8x}$s > 45 ;"
 731            "   8x    > 77 ;", ""),
 732
 733            "abzababbabxzabxabx",
 734            "01z018k45z01x45");
 735 }
 736
 737 /**
 738  * Test pattern quoting and escape mechanisms.
 739  */
 740 void TransliteratorTest::TestPatternQuoting(void) {
 741     // Array of 3n items
 742     // Each item is <rules>, <input>, <expected output>
 743     const UnicodeString DATA[] = {
 744         UnicodeString(UChar(0x4E01)) + ">'[male adult]'",
 745         UnicodeString(UChar(0x4E01)),
 746         "[male adult]"
 747     };
 748
 749     for (int32_t i=0; i<3; i+=3) {
 750         logln(UnicodeString("Pattern: ") + prettify(DATA[i]));
 751         UParseError parseError;
 752         UErrorCode status = U_ZERO_ERROR;
 753         Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
 754         if (U_FAILURE(status)) {
 755             errln("RBT constructor failed");
 756         } else {
 757             expect(*t, DATA[i+1], DATA[i+2]);
 758         }
 759         delete t;
 760     }
 761 }
 762
 763 /**
 764  * Regression test for bugs found in Greek transliteration.
 765  */
 766 void TransliteratorTest::TestJ277(void) {
 767     UErrorCode status = U_ZERO_ERROR;
 768     UParseError parseError;
 769     Transliterator *gl = Transliterator::createInstance("Greek-Latin; NFD; [:M:]Remove; NFC", UTRANS_FORWARD, parseError, status);
 770     if (gl == NULL) {
 771         dataerrln("FAIL: createInstance(Greek-Latin) returned NULL - %s", u_errorName(status));
 772         return;
 773     }
 774
 775     UChar sigma = 0x3C3;
 776     UChar upsilon = 0x3C5;
 777     UChar nu = 0x3BD;
 778 //    UChar PHI = 0x3A6;
 779     UChar alpha = 0x3B1;
 780 //    UChar omega = 0x3C9;
 781 //    UChar omicron = 0x3BF;
 782 //    UChar epsilon = 0x3B5;
 783
 784     // sigma upsilon nu -> syn
 785     UnicodeString syn;
 786     syn.append(sigma).append(upsilon).append(nu);
 787     expect(*gl, syn, "syn");
 788
 789     // sigma alpha upsilon nu -> saun
 790     UnicodeString sayn;
 791     sayn.append(sigma).append(alpha).append(upsilon).append(nu);
 792     expect(*gl, sayn, "saun");
 793
 794     // Again, using a smaller rule set
 795     UnicodeString rules(
 796                 "$alpha   = \\u03B1;"
 797                 "$nu      = \\u03BD;"
 798                 "$sigma   = \\u03C3;"
 799                 "$ypsilon = \\u03C5;"
 800                 "$vowel   = [aeiouAEIOU$alpha$ypsilon];"
 801                 "s <>           $sigma;"
 802                 "a <>           $alpha;"
 803                 "u <>  $vowel { $ypsilon;"
 804                 "y <>           $ypsilon;"
 805                 "n <>           $nu;",
 806                 "");
 807     Transliterator *mini = Transliterator::createFromRules("mini", rules, UTRANS_REVERSE, parseError, status);
 808     if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
 809     expect(*mini, syn, "syn");
 810     expect(*mini, sayn, "saun");
 811     delete mini;
 812     mini = NULL;
 813
 814 #if !UCONFIG_NO_FORMATTING
 815     // Transliterate the Greek locale data
 816     Locale el("el");
 817     DateFormatSymbols syms(el, status);
 818     if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
 819     int32_t i, count;
 820     const UnicodeString* data = syms.getMonths(count);
 821     for (i=0; i<count; ++i) {
 822         if (data[i].length() == 0) {
 823             continue;
 824         }
 825         UnicodeString out(data[i]);
 826         gl->transliterate(out);
 827         UBool ok = TRUE;
 828         if (data[i].length() >= 2 && out.length() >= 2 &&
 829             u_isupper(data[i].charAt(0)) && u_islower(data[i].charAt(1))) {
 830             if (!(u_isupper(out.charAt(0)) && u_islower(out.charAt(1)))) {
 831                 ok = FALSE;
 832             }
 833         }
 834         if (ok) {
 835             logln(prettify(data[i] + " -> " + out));
 836         } else {
 837             errln(UnicodeString("FAIL: ") + prettify(data[i] + " -> " + out));
 838         }
 839     }
 840 #endif
 841
 842     delete gl;
 843 }
 844
 845 /**
 846  * Prefix, suffix support in hex transliterators
 847  */
 848 void TransliteratorTest::TestJ243(void) {
 849     UErrorCode ec = U_ZERO_ERROR;
 850
 851     // Test default Hex-Any, which should handle
 852     // \u, \U, u+, and U+
 853     Transliterator *hex =
 854         Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, ec);
 855     if (assertSuccess("getInstance", ec)) {
 856         expect(*hex, UnicodeString("\\u0041+\\U00000042,U+0043uU+0044z", ""), "A+B,CuDz");
 857     }
 858     delete hex;
 859
 860 //    // Try a custom Hex-Unicode
 861 //    // \uXXXX and &#xXXXX;
 862 //    ec = U_ZERO_ERROR;
 863 //    HexToUnicodeTransliterator hex2(UnicodeString("\\\\u###0;&\\#x###0\\;", ""), ec);
 864 //    expect(hex2, UnicodeString("\\u61\\u062\\u0063\\u00645\\u66x&#x30;&#x031;&#x0032;&#x00033;", ""),
 865 //           "abcd5fx012&#x00033;");
 866 //    // Try custom Any-Hex (default is tested elsewhere)
 867 //    ec = U_ZERO_ERROR;
 868 //    UnicodeToHexTransliterator hex3(UnicodeString("&\\#x###0;", ""), ec);
 869 //    expect(hex3, "012", "&#x30;&#x31;&#x32;");
 870 }
 871
 872 /**
 873  * Parsers need better syntax error messages.
 874  */
 875 void TransliteratorTest::TestJ329(void) {
 876
 877     struct { UBool containsErrors; const char* rule; } DATA[] = {
 878         { FALSE, "a > b; c > d" },
 879         { TRUE,  "a > b; no operator; c > d" },
 880     };
 881     int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
 882
 883     for (int32_t i=0; i<DATA_length; ++i) {
 884         UErrorCode status = U_ZERO_ERROR;
 885         UParseError parseError;
 886         Transliterator *rbt = Transliterator::createFromRules("<ID>",
 887                                     DATA[i].rule,
 888                                     UTRANS_FORWARD,
 889                                     parseError,
 890                                     status);
 891         UBool gotError = U_FAILURE(status);
 892         UnicodeString desc(DATA[i].rule);
 893         desc.append(gotError ? " -> error" : " -> no error");
 894         if (gotError) {
 895             desc = desc + ", ParseError code=" + u_errorName(status) +
 896                 " line=" + parseError.line +
 897                 " offset=" + parseError.offset +
 898                 " context=" + parseError.preContext;
 899         }
 900         if (gotError == DATA[i].containsErrors) {
 901             logln(UnicodeString("Ok:   ") + desc);
 902         } else {
 903             errln(UnicodeString("FAIL: ") + desc);
 904         }
 905         delete rbt;
 906     }
 907 }
 908
 909 /**
 910  * Test segments and segment references.
 911  */
 912 void TransliteratorTest::TestSegments(void) {
 913     // Array of 3n items
 914     // Each item is <rules>, <input>, <expected output>
 915     UnicodeString DATA[] = {
 916         "([a-z]) '.' ([0-9]) > $2 '-' $1",
 917         "abc.123.xyz.456",
 918         "ab1-c23.xy4-z56",
 919
 920         // nested
 921         "(([a-z])([0-9])) > $1 '.' $2 '.' $3;",
 922         "a1 b2",
 923         "a1.a.1 b2.b.2",
 924     };
 925     int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
 926
 927     for (int32_t i=0; i<DATA_length; i+=3) {
 928         logln("Pattern: " + prettify(DATA[i]));
 929         UParseError parseError;
 930         UErrorCode status = U_ZERO_ERROR;
 931         Transliterator *t = Transliterator::createFromRules("ID", DATA[i], UTRANS_FORWARD, parseError, status);
 932         if (U_FAILURE(status)) {
 933             errln("FAIL: RBT constructor");
 934         } else {
 935             expect(*t, DATA[i+1], DATA[i+2]);
 936         }
 937         delete t;
 938     }
 939 }
 940
 941 /**
 942  * Test cursor positioning outside of the key
 943  */
 944 void TransliteratorTest::TestCursorOffset(void) {
 945     // Array of 3n items
 946     // Each item is <rules>, <input>, <expected output>
 947     UnicodeString DATA[] = {
 948         "pre {alpha} post > | @ ALPHA ;"
 949         "eALPHA > beta ;"
 950         "pre {beta} post > BETA @@ | ;"
 951         "post > xyz",
 952
 953         "prealphapost prebetapost",
 954
 955         "prbetaxyz preBETApost",
 956     };
 957     int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
 958
 959     for (int32_t i=0; i<DATA_length; i+=3) {
 960         logln("Pattern: " + prettify(DATA[i]));
 961         UParseError parseError;
 962         UErrorCode status = U_ZERO_ERROR;
 963         Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
 964         if (U_FAILURE(status)) {
 965             errln("FAIL: RBT constructor");
 966         } else {
 967             expect(*t, DATA[i+1], DATA[i+2]);
 968         }
 969         delete t;
 970     }
 971 }
 972
 973 /**
 974  * Test zero length and > 1 char length variable values.  Test
 975  * use of variable refs in UnicodeSets.
 976  */
 977 void TransliteratorTest::TestArbitraryVariableValues(void) {
 978     // Array of 3n items
 979     // Each item is <rules>, <input>, <expected output>
 980     UnicodeString DATA[] = {
 981         "$abe = ab;"
 982         "$pat = x[yY]z;"
 983         "$ll  = 'a-z';"
 984         "$llZ = [$ll];"
 985         "$llY = [$ll$pat];"
 986         "$emp = ;"
 987
 988         "$abe > ABE;"
 989         "$pat > END;"
 990         "$llZ > 1;"
 991         "$llY > 2;"
 992         "7$emp 8 > 9;"
 993         "",
 994
 995         "ab xYzxyz stY78",
 996         "ABE ENDEND 1129",
 997     };
 998     int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
 999
1000     for (int32_t i=0; i<DATA_length; i+=3) {
1001         logln("Pattern: " + prettify(DATA[i]));
1002         UParseError parseError;
1003         UErrorCode status = U_ZERO_ERROR;
1004         Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
1005         if (U_FAILURE(status)) {
1006             errln("FAIL: RBT constructor");
1007         } else {
1008             expect(*t, DATA[i+1], DATA[i+2]);
1009         }
1010         delete t;
1011     }
1012 }
1013
1014 /**
1015  * Confirm that the contextStart, contextLimit, start, and limit
1016  * behave correctly. J474.
1017  */
1018 void TransliteratorTest::TestPositionHandling(void) {
1019     // Array of 3n items
1020     // Each item is <rules>, <input>, <expected output>
1021     const char* DATA[] = {
1022         "a{t} > SS ; {t}b > UU ; {t} > TT ;",
1023         "xtat txtb", // pos 0,9,0,9
1024         "xTTaSS TTxUUb",
1025
1026         "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1027         "xtat txtb", // pos 2,9,3,8
1028         "xtaSS TTxUUb",
1029
1030         "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1031         "xtat txtb", // pos 3,8,3,8
1032         "xtaTT TTxTTb",
1033     };
1034
1035     // Array of 4n positions -- these go with the DATA array
1036     // They are: contextStart, contextLimit, start, limit
1037     int32_t POS[] = {
1038         0, 9, 0, 9,
1039         2, 9, 3, 8,
1040         3, 8, 3, 8,
1041     };
1042
1043     int32_t n = (int32_t)(sizeof(DATA) / sizeof(DATA[0])) / 3;
1044     for (int32_t i=0; i<n; i++) {
1045         UErrorCode status = U_ZERO_ERROR;
1046         UParseError parseError;
1047         Transliterator *t = Transliterator::createFromRules("<ID>",
1048                                 DATA[3*i], UTRANS_FORWARD, parseError, status);
1049         if (U_FAILURE(status)) {
1050             delete t;
1051             errln("FAIL: RBT constructor");
1052             return;
1053         }
1054         UTransPosition pos;
1055         pos.contextStart= POS[4*i];
1056         pos.contextLimit = POS[4*i+1];
1057         pos.start = POS[4*i+2];
1058         pos.limit = POS[4*i+3];
1059         UnicodeString rsource(DATA[3*i+1]);
1060         t->transliterate(rsource, pos, status);
1061         if (U_FAILURE(status)) {
1062             delete t;
1063             errln("FAIL: transliterate");
1064             return;
1065         }
1066         t->finishTransliteration(rsource, pos);
1067         expectAux(DATA[3*i],
1068                   DATA[3*i+1],
1069                   rsource,
1070                   DATA[3*i+2]);
1071         delete t;
1072     }
1073 }
1074
1075 /**
1076  * Test the Hiragana-Katakana transliterator.
1077  */
1078 void TransliteratorTest::TestHiraganaKatakana(void) {
1079     UParseError parseError;
1080     UErrorCode status = U_ZERO_ERROR;
1081     Transliterator* hk = Transliterator::createInstance("Hiragana-Katakana", UTRANS_FORWARD, parseError, status);
1082     Transliterator* kh = Transliterator::createInstance("Katakana-Hiragana", UTRANS_FORWARD, parseError, status);
1083     if (hk == 0 || kh == 0) {
1084         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
1085         delete hk;
1086         delete kh;
1087         return;
1088     }
1089
1090     // Array of 3n items
1091     // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana>
1092     const char* DATA[] = {
1093         "both",
1094         "\\u3042\\u3090\\u3099\\u3092\\u3050",
1095         "\\u30A2\\u30F8\\u30F2\\u30B0",
1096
1097         "kh",
1098         "\\u307C\\u3051\\u3060\\u3042\\u3093\\u30FC",
1099         "\\u30DC\\u30F6\\u30C0\\u30FC\\u30F3\\u30FC",
1100     };
1101     int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
1102
1103     for (int32_t i=0; i<DATA_length; i+=3) {
1104         UnicodeString h = CharsToUnicodeString(DATA[i+1]);
1105         UnicodeString k = CharsToUnicodeString(DATA[i+2]);
1106         switch (*DATA[i]) {
1107         case 0x68: //'h': // Hiragana-Katakana
1108             expect(*hk, h, k);
1109             break;
1110         case 0x6B: //'k': // Katakana-Hiragana
1111             expect(*kh, k, h);
1112             break;
1113         case 0x62: //'b': // both
1114             expect(*hk, h, k);
1115             expect(*kh, k, h);
1116             break;
1117         }
1118     }
1119     delete hk;
1120     delete kh;
1121 }
1122
1123 /**
1124  * Test cloning / copy constructor of RBT.
1125  */
1126 void TransliteratorTest::TestCopyJ476(void) {
1127     // The real test here is what happens when the destructors are
1128     // called.  So we let one object get destructed, and check to
1129     // see that its copy still works.
1130     Transliterator *t2 = 0;
1131     {
1132         UParseError parseError;
1133         UErrorCode status = U_ZERO_ERROR;
1134         Transliterator *t1 = Transliterator::createFromRules("t1",
1135             "a>A;b>B;'foo'+>'bar'", UTRANS_FORWARD, parseError, status);
1136         if (U_FAILURE(status)) {
1137             errln("FAIL: RBT constructor");
1138             return;
1139         }
1140         t2 = t1->clone(); // Call copy constructor under the covers.
1141         expect(*t1, "abcfoofoo", "ABcbar");
1142         delete t1;
1143     }
1144     expect(*t2, "abcfoofoo", "ABcbar");
1145     delete t2;
1146 }
1147
1148 /**
1149  * Test inter-Indic transliterators.  These are composed.
1150  * ICU4C Jitterbug 483.
1151  */
1152 void TransliteratorTest::TestInterIndic(void) {
1153     UnicodeString ID("Devanagari-Gujarati", "");
1154     UErrorCode status = U_ZERO_ERROR;
1155     UParseError parseError;
1156     Transliterator* dg = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
1157     if (dg == 0) {
1158         dataerrln("FAIL: createInstance(" + ID + ") returned NULL - " + u_errorName(status));
1159         return;
1160     }
1161     UnicodeString id = dg->getID();
1162     if (id != ID) {
1163         errln("FAIL: createInstance(" + ID + ")->getID() => " + id);
1164     }
1165     UnicodeString dev = CharsToUnicodeString("\\u0901\\u090B\\u0925");
1166     UnicodeString guj = CharsToUnicodeString("\\u0A81\\u0A8B\\u0AA5");
1167     expect(*dg, dev, guj);
1168     delete dg;
1169 }
1170
1171 /**
1172  * Test filter syntax in IDs. (J918)
1173  */
1174 void TransliteratorTest::TestFilterIDs(void) {
1175     // Array of 3n strings:
1176     // <id>, <inverse id>, <input>, <expected output>
1177     const char* DATA[] = {
1178         "[aeiou]Any-Hex", // ID
1179         "[aeiou]Hex-Any", // expected inverse ID
1180         "quizzical",      // src
1181         "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src)
1182
1183         "[aeiou]Any-Hex;[^5]Hex-Any",
1184         "[^5]Any-Hex;[aeiou]Hex-Any",
1185         "quizzical",
1186         "q\\u0075izzical",
1187
1188         "[abc]Null",
1189         "[abc]Null",
1190         "xyz",
1191         "xyz",
1192     };
1193     enum { DATA_length = sizeof(DATA) / sizeof(DATA[0]) };
1194
1195     for (int i=0; i<DATA_length; i+=4) {
1196         UnicodeString ID(DATA[i], "");
1197         UnicodeString uID(DATA[i+1], "");
1198         UnicodeString data2(DATA[i+2], "");
1199         UnicodeString data3(DATA[i+3], "");
1200         UParseError parseError;
1201         UErrorCode status = U_ZERO_ERROR;
1202         Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
1203         if (t == 0) {
1204             errln("FAIL: createInstance(" + ID + ") returned NULL");
1205             return;
1206         }
1207         expect(*t, data2, data3);
1208
1209         // Check the ID
1210         if (ID != t->getID()) {
1211             errln("FAIL: createInstance(" + ID + ").getID() => " +
1212                   t->getID());
1213         }
1214
1215         // Check the inverse
1216         Transliterator *u = t->createInverse(status);
1217         if (u == 0) {
1218             errln("FAIL: " + ID + ".createInverse() returned NULL");
1219         } else if (u->getID() != uID) {
1220             errln("FAIL: " + ID + ".createInverse().getID() => " +
1221                   u->getID() + ", expected " + uID);
1222         }
1223
1224         delete t;
1225         delete u;
1226     }
1227 }
1228
1229 /**
1230  * Test the case mapping transliterators.
1231  */
1232 void TransliteratorTest::TestCaseMap(void) {
1233     UParseError parseError;
1234     UErrorCode status = U_ZERO_ERROR;
1235     Transliterator* toUpper =
1236         Transliterator::createInstance("Any-Upper[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1237     Transliterator* toLower =
1238         Transliterator::createInstance("Any-Lower[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1239     Transliterator* toTitle =
1240         Transliterator::createInstance("Any-Title[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1241     if (toUpper==0 || toLower==0 || toTitle==0) {
1242         errln("FAIL: createInstance returned NULL");
1243         delete toUpper;
1244         delete toLower;
1245         delete toTitle;
1246         return;
1247     }
1248
1249     expect(*toUpper, "The quick brown fox jumped over the lazy dogs.",
1250            "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS.");
1251     expect(*toLower, "The quIck brown fOX jUMPED OVER THE LAzY dogs.",
1252            "the quick brown foX jumped over the lazY dogs.");
1253     expect(*toTitle, "the quick brown foX can't jump over the laZy dogs.",
1254            "The Quick Brown FoX Can't Jump Over The LaZy Dogs.");
1255
1256     delete toUpper;
1257     delete toLower;
1258     delete toTitle;
1259 }
1260
1261 /**
1262  * Test the name mapping transliterators.
1263  */
1264 void TransliteratorTest::TestNameMap(void) {
1265     UParseError parseError;
1266     UErrorCode status = U_ZERO_ERROR;
1267     Transliterator* uni2name =
1268         Transliterator::createInstance("Any-Name[^abc]", UTRANS_FORWARD, parseError, status);
1269     Transliterator* name2uni =
1270         Transliterator::createInstance("Name-Any", UTRANS_FORWARD, parseError, status);
1271     if (uni2name==0 || name2uni==0) {
1272         errln("FAIL: createInstance returned NULL");
1273         delete uni2name;
1274         delete name2uni;
1275         return;
1276     }
1277
1278     // Careful:  CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1279     expect(*uni2name, CharsToUnicodeString("\\u00A0abc\\u4E01\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF"),
1280            CharsToUnicodeString("\\\\N{NO-BREAK SPACE}abc\\\\N{CJK UNIFIED IDEOGRAPH-4E01}\\\\N{MICRO SIGN}\\\\N{GUJARATI SIGN CANDRABINDU}\\\\N{REPLACEMENT CHARACTER}\\\\N{<control-0004>}\\\\N{<control-0009>}\\\\N{<control-0081>}\\\\N{<noncharacter-FFFF>}"));
1281     expect(*name2uni, UNICODE_STRING_SIMPLE("{\\N { NO-BREAK SPACE}abc\\N{  CJK UNIFIED  IDEOGRAPH-4E01  }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{"),
1282            CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{"));
1283
1284     delete uni2name;
1285     delete name2uni;
1286
1287     // round trip
1288     Transliterator* t =
1289         Transliterator::createInstance("Any-Name;Name-Any", UTRANS_FORWARD, parseError, status);
1290     if (t==0) {
1291         errln("FAIL: createInstance returned NULL");
1292         delete t;
1293         return;
1294     }
1295
1296     // Careful:  CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1297     UnicodeString s = CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{");
1298     expect(*t, s, s);
1299     delete t;
1300 }
1301
1302 /**
1303  * Test liberalized ID syntax.  1006c
1304  */
1305 void TransliteratorTest::TestLiberalizedID(void) {
1306     // Some test cases have an expected getID() value of NULL.  This
1307     // means I have disabled the test case for now.  This stuff is
1308     // still under development, and I haven't decided whether to make
1309     // getID() return canonical case yet.  It will all get rewritten
1310     // with the move to Source-Target/Variant IDs anyway. [aliu]
1311     const char* DATA[] = {
1312         "latin-greek", NULL /*"Latin-Greek"*/, "case insensitivity",
1313         "  Null  ", "Null", "whitespace",
1314         " Latin[a-z]-Greek  ", "[a-z]Latin-Greek", "inline filter",
1315         "  null  ; latin-greek  ", NULL /*"Null;Latin-Greek"*/, "compound whitespace",
1316     };
1317     const int32_t DATA_length = sizeof(DATA)/sizeof(DATA[0]);
1318     UParseError parseError;
1319     UErrorCode status= U_ZERO_ERROR;
1320     for (int32_t i=0; i<DATA_length; i+=3) {
1321         Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, parseError, status);
1322         if (t == 0) {
1323             dataerrln(UnicodeString("FAIL: ") + DATA[i+2] +
1324                   " cannot create ID \"" + DATA[i] + "\" - " + u_errorName(status));
1325         } else {
1326             UnicodeString exp;
1327             if (DATA[i+1]) {
1328                 exp = UnicodeString(DATA[i+1], "");
1329             }
1330             // Don't worry about getID() if the expected char*
1331             // is NULL -- see above.
1332             if (exp.length() == 0 || exp == t->getID()) {
1333                 logln(UnicodeString("Ok: ") + DATA[i+2] +
1334                       " create ID \"" + DATA[i] + "\" => \"" +
1335                       exp + "\"");
1336             } else {
1337                 errln(UnicodeString("FAIL: ") + DATA[i+2] +
1338                       " create ID \"" + DATA[i] + "\" => \"" +
1339                       t->getID() + "\", exp \"" + exp + "\"");
1340             }
1341             delete t;
1342         }
1343     }
1344 }
1345
1346 /* test for Jitterbug 912 */
1347 void TransliteratorTest::TestCreateInstance(){
1348     const char* FORWARD = "F";
1349     const char* REVERSE = "R";
1350     const char* DATA[] = {
1351         // Column 1: id
1352         // Column 2: direction
1353         // Column 3: expected ID, or "" if expect failure
1354         "Latin-Hangul", REVERSE, "Hangul-Latin", // JB#912
1355
1356         // JB#2689: bad compound causes crash
1357         "InvalidSource-InvalidTarget", FORWARD, "",
1358         "InvalidSource-InvalidTarget", REVERSE, "",
1359         "Hex-Any;InvalidSource-InvalidTarget", FORWARD, "",
1360         "Hex-Any;InvalidSource-InvalidTarget", REVERSE, "",
1361         "InvalidSource-InvalidTarget;Hex-Any", FORWARD, "",
1362         "InvalidSource-InvalidTarget;Hex-Any", REVERSE, "",
1363
1364         NULL
1365     };
1366
1367     for (int32_t i=0; DATA[i]; i+=3) {
1368         UParseError err;
1369         UErrorCode ec = U_ZERO_ERROR;
1370         UnicodeString id(DATA[i]);
1371         UTransDirection dir = (DATA[i+1]==FORWARD)?
1372             UTRANS_FORWARD:UTRANS_REVERSE;
1373         UnicodeString expID(DATA[i+2]);
1374         Transliterator* t =
1375             Transliterator::createInstance(id,dir,err,ec);
1376         UnicodeString newID;
1377         if (t) {
1378             newID = t->getID();
1379         }
1380         UBool ok = (newID == expID);
1381         if (!t) {
1382             newID = u_errorName(ec);
1383         }
1384         if (ok) {
1385             logln((UnicodeString)"Ok: createInstance(" +
1386                   id + "," + DATA[i+1] + ") => " + newID);
1387         } else {
1388             dataerrln((UnicodeString)"FAIL: createInstance(" +
1389                   id + "," + DATA[i+1] + ") => " + newID +
1390                   ", expected " + expID);
1391         }
1392         delete t;
1393     }
1394 }
1395
1396 /**
1397  * Test the normalization transliterator.
1398  */
1399 void TransliteratorTest::TestNormalizationTransliterator() {
1400     // THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.test.normalizer.BasicTest
1401     // PLEASE KEEP THEM IN SYNC WITH BasicTest.
1402     const char* CANON[] = {
1403         // Input               Decomposed            Composed
1404         "cat",                "cat",                "cat"               ,
1405         "\\u00e0ardvark",      "a\\u0300ardvark",     "\\u00e0ardvark"    ,
1406
1407         "\\u1e0a",             "D\\u0307",            "\\u1e0a"            , // D-dot_above
1408         "D\\u0307",            "D\\u0307",            "\\u1e0a"            , // D dot_above
1409
1410         "\\u1e0c\\u0307",       "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D-dot_below dot_above
1411         "\\u1e0a\\u0323",       "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D-dot_above dot_below
1412         "D\\u0307\\u0323",      "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D dot_below dot_above
1413
1414         "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307","\\u1e10\\u0323\\u0307", // D dot_below cedilla dot_above
1415         "D\\u0307\\u0328\\u0323","D\\u0328\\u0323\\u0307","\\u1e0c\\u0328\\u0307", // D dot_above ogonek dot_below
1416
1417         "\\u1E14",             "E\\u0304\\u0300",      "\\u1E14"            , // E-macron-grave
1418         "\\u0112\\u0300",       "E\\u0304\\u0300",      "\\u1E14"            , // E-macron + grave
1419         "\\u00c8\\u0304",       "E\\u0300\\u0304",      "\\u00c8\\u0304"      , // E-grave + macron
1420
1421         "\\u212b",             "A\\u030a",            "\\u00c5"            , // angstrom_sign
1422         "\\u00c5",             "A\\u030a",            "\\u00c5"            , // A-ring
1423
1424         "\\u00fdffin",         "y\\u0301ffin",        "\\u00fdffin"        ,    //updated with 3.0
1425         "\\u00fd\\uFB03n",      "y\\u0301\\uFB03n",     "\\u00fd\\uFB03n"     , //updated with 3.0
1426
1427         "Henry IV",           "Henry IV",           "Henry IV"          ,
1428         "Henry \\u2163",       "Henry \\u2163",       "Henry \\u2163"      ,
1429
1430         "\\u30AC",             "\\u30AB\\u3099",       "\\u30AC"            , // ga (Katakana)
1431         "\\u30AB\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // ka + ten
1432         "\\uFF76\\uFF9E",       "\\uFF76\\uFF9E",       "\\uFF76\\uFF9E"      , // hw_ka + hw_ten
1433         "\\u30AB\\uFF9E",       "\\u30AB\\uFF9E",       "\\u30AB\\uFF9E"      , // ka + hw_ten
1434         "\\uFF76\\u3099",       "\\uFF76\\u3099",       "\\uFF76\\u3099"      , // hw_ka + ten
1435
1436         "A\\u0300\\u0316",      "A\\u0316\\u0300",      "\\u00C0\\u0316"      ,
1437         0 // end
1438     };
1439
1440     const char* COMPAT[] = {
1441         // Input               Decomposed            Composed
1442         "\\uFB4f",             "\\u05D0\\u05DC",       "\\u05D0\\u05DC"     , // Alef-Lamed vs. Alef, Lamed
1443
1444         "\\u00fdffin",         "y\\u0301ffin",        "\\u00fdffin"        ,    //updated for 3.0
1445         "\\u00fd\\uFB03n",      "y\\u0301ffin",        "\\u00fdffin"        , // ffi ligature -> f + f + i
1446
1447         "Henry IV",           "Henry IV",           "Henry IV"          ,
1448         "Henry \\u2163",       "Henry IV",           "Henry IV"          ,
1449
1450         "\\u30AC",             "\\u30AB\\u3099",       "\\u30AC"            , // ga (Katakana)
1451         "\\u30AB\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // ka + ten
1452
1453         "\\uFF76\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // hw_ka + ten
1454         0 // end
1455     };
1456
1457     int32_t i;
1458     UParseError parseError;
1459     UErrorCode status = U_ZERO_ERROR;
1460     Transliterator* NFD = Transliterator::createInstance("NFD", UTRANS_FORWARD, parseError, status);
1461     Transliterator* NFC = Transliterator::createInstance("NFC", UTRANS_FORWARD, parseError, status);
1462     if (!NFD || !NFC) {
1463         dataerrln("FAIL: createInstance failed: %s", u_errorName(status));
1464         delete NFD;
1465         delete NFC;
1466         return;
1467     }
1468     for (i=0; CANON[i]; i+=3) {
1469         UnicodeString in = CharsToUnicodeString(CANON[i]);
1470         UnicodeString expd = CharsToUnicodeString(CANON[i+1]);
1471         UnicodeString expc = CharsToUnicodeString(CANON[i+2]);
1472         expect(*NFD, in, expd);
1473         expect(*NFC, in, expc);
1474     }
1475     delete NFD;
1476     delete NFC;
1477
1478     Transliterator* NFKD = Transliterator::createInstance("NFKD", UTRANS_FORWARD, parseError, status);
1479     Transliterator* NFKC = Transliterator::createInstance("NFKC", UTRANS_FORWARD, parseError, status);
1480     if (!NFKD || !NFKC) {
1481         errln("FAIL: createInstance failed");
1482         delete NFKD;
1483         delete NFKC;
1484         return;
1485     }
1486     for (i=0; COMPAT[i]; i+=3) {
1487         UnicodeString in = CharsToUnicodeString(COMPAT[i]);
1488         UnicodeString expkd = CharsToUnicodeString(COMPAT[i+1]);
1489         UnicodeString expkc = CharsToUnicodeString(COMPAT[i+2]);
1490         expect(*NFKD, in, expkd);
1491         expect(*NFKC, in, expkc);
1492     }
1493     delete NFKD;
1494     delete NFKC;
1495
1496     UParseError pe;
1497     status = U_ZERO_ERROR;
1498     Transliterator *t = Transliterator::createInstance("NFD; [x]Remove",
1499                                                        UTRANS_FORWARD,
1500                                                        pe, status);
1501     if (t == 0) {
1502         errln("FAIL: createInstance failed");
1503     }
1504     expect(*t, CharsToUnicodeString("\\u010dx"),
1505            CharsToUnicodeString("c\\u030C"));
1506     delete t;
1507 }
1508
1509 /**
1510  * Test compound RBT rules.
1511  */
1512 void TransliteratorTest::TestCompoundRBT(void) {
1513     // Careful with spacing and ';' here:  Phrase this exactly
1514     // as toRules() is going to return it.  If toRules() changes
1515     // with regard to spacing or ';', then adjust this string.
1516     UnicodeString rule("::Hex-Any;\n"
1517                        "::Any-Lower;\n"
1518                        "a > '.A.';\n"
1519                        "b > '.B.';\n"
1520                        "::[^t]Any-Upper;", "");
1521     UParseError parseError;
1522     UErrorCode status = U_ZERO_ERROR;
1523     Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, parseError, status);
1524     if (t == 0) {
1525         errln("FAIL: createFromRules failed");
1526         return;
1527     }
1528     expect(*t, UNICODE_STRING_SIMPLE("\\u0043at in the hat, bat on the mat"),
1529            "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");
1530     UnicodeString r;
1531     t->toRules(r, TRUE);
1532     if (r == rule) {
1533         logln((UnicodeString)"OK: toRules() => " + r);
1534     } else {
1535         errln((UnicodeString)"FAIL: toRules() => " + r +
1536               ", expected " + rule);
1537     }
1538     delete t;
1539
1540     // Now test toRules
1541     t = Transliterator::createInstance("Greek-Latin; Latin-Cyrillic", UTRANS_FORWARD, parseError, status);
1542     if (t == 0) {
1543         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
1544         return;
1545     }
1546     UnicodeString exp("::Greek-Latin;\n::Latin-Cyrillic;");
1547     t->toRules(r, TRUE);
1548     if (r != exp) {
1549         errln((UnicodeString)"FAIL: toRules() => " + r +
1550               ", expected " + exp);
1551     } else {
1552         logln((UnicodeString)"OK: toRules() => " + r);
1553     }
1554     delete t;
1555
1556     // Round trip the result of toRules
1557     t = Transliterator::createFromRules("Test", r, UTRANS_FORWARD, parseError, status);
1558     if (t == 0) {
1559         errln("FAIL: createFromRules #2 failed");
1560         return;
1561     } else {
1562         logln((UnicodeString)"OK: createFromRules(" + r + ") succeeded");
1563     }
1564
1565     // Test toRules again
1566     t->toRules(r, TRUE);
1567     if (r != exp) {
1568         errln((UnicodeString)"FAIL: toRules() => " + r +
1569               ", expected " + exp);
1570     } else {
1571         logln((UnicodeString)"OK: toRules() => " + r);
1572     }
1573
1574     delete t;
1575
1576     // Test Foo(Bar) IDs.  Careful with spacing in id; make it conform
1577     // to what the regenerated ID will look like.
1578     UnicodeString id("Upper(Lower);(NFKC)", "");
1579     t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
1580     if (t == 0) {
1581         errln("FAIL: createInstance #2 failed");
1582         return;
1583     }
1584     if (t->getID() == id) {
1585         logln((UnicodeString)"OK: created " + id);
1586     } else {
1587         errln((UnicodeString)"FAIL: createInstance(" + id +
1588               ").getID() => " + t->getID());
1589     }
1590
1591     Transliterator *u = t->createInverse(status);
1592     if (u == 0) {
1593         errln("FAIL: createInverse failed");
1594         delete t;
1595         return;
1596     }
1597     exp = "NFKC();Lower(Upper)";
1598     if (u->getID() == exp) {
1599         logln((UnicodeString)"OK: createInverse(" + id + ") => " +
1600               u->getID());
1601     } else {
1602         errln((UnicodeString)"FAIL: createInverse(" + id + ") => " +
1603               u->getID());
1604     }
1605     delete t;
1606     delete u;
1607 }
1608
1609 /**
1610  * Compound filter semantics were orginially not implemented
1611  * correctly.  Originally, each component filter f(i) is replaced by
1612  * f'(i) = f(i) && g, where g is the filter for the compound
1613  * transliterator.
1614  *
1615  * From Mark:
1616  *
1617  * Suppose and I have a transliterator X. Internally X is
1618  * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A].
1619  *
1620  * The compound should convert all greek characters (through latin) to
1621  * cyrillic, then lowercase the result. The filter should say "don't
1622  * touch 'A' in the original". But because an intermediate result
1623  * happens to go through "A", the Greek Alpha gets hung up.
1624  */
1625 void TransliteratorTest::TestCompoundFilter(void) {
1626     UParseError parseError;
1627     UErrorCode status = U_ZERO_ERROR;
1628     Transliterator *t = Transliterator::createInstance
1629         ("Greek-Latin; Latin-Greek; Lower", UTRANS_FORWARD, parseError, status);
1630     if (t == 0) {
1631         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
1632         return;
1633     }
1634     t->adoptFilter(new UnicodeSet("[^A]", status));
1635     if (U_FAILURE(status)) {
1636         errln("FAIL: UnicodeSet ct failed");
1637         delete t;
1638         return;
1639     }
1640
1641     // Only the 'A' at index 1 should remain unchanged
1642     expect(*t,
1643            CharsToUnicodeString("BA\\u039A\\u0391"),
1644            CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1"));
1645     delete t;
1646 }
1647
1648 void TransliteratorTest::TestRemove(void) {
1649     UParseError parseError;
1650     UErrorCode status = U_ZERO_ERROR;
1651     Transliterator *t = Transliterator::createInstance("Remove[abc]", UTRANS_FORWARD, parseError, status);
1652     if (t == 0) {
1653         errln("FAIL: createInstance failed");
1654         return;
1655     }
1656
1657     expect(*t, "Able bodied baker's cats", "Ale odied ker's ts");
1658
1659     // extra test for RemoveTransliterator::clone(), which at one point wasn't
1660     // duplicating the filter
1661     Transliterator* t2 = t->clone();
1662     expect(*t2, "Able bodied baker's cats", "Ale odied ker's ts");
1663
1664     delete t;
1665     delete t2;
1666 }
1667
1668 void TransliteratorTest::TestToRules(void) {
1669     const char* RBT = "rbt";
1670     const char* SET = "set";
1671     static const char* DATA[] = {
1672         RBT,
1673         "$a=\\u4E61; [$a] > A;",
1674         "[\\u4E61] > A;",
1675
1676         RBT,
1677         "$white=[[:Zs:][:Zl:]]; $white{a} > A;",
1678         "[[:Zs:][:Zl:]]{a} > A;",
1679
1680         SET,
1681         "[[:Zs:][:Zl:]]",
1682         "[[:Zs:][:Zl:]]",
1683
1684         SET,
1685         "[:Ps:]",
1686         "[:Ps:]",
1687
1688         SET,
1689         "[:L:]",
1690         "[:L:]",
1691
1692         SET,
1693         "[[:L:]-[A]]",
1694         "[[:L:]-[A]]",
1695
1696         SET,
1697         "[~[:Lu:][:Ll:]]",
1698         "[~[:Lu:][:Ll:]]",
1699
1700         SET,
1701         "[~[a-z]]",
1702         "[~[a-z]]",
1703
1704         RBT,
1705         "$white=[:Zs:]; $black=[^$white]; $black{a} > A;",
1706         "[^[:Zs:]]{a} > A;",
1707
1708         RBT,
1709         "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;",
1710         "[[a-z]-[:Zs:]]{a} > A;",
1711
1712         RBT,
1713         "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;",
1714         "[[:Zs:]&[a-z]]{a} > A;",
1715
1716         RBT,
1717         "$a=[:Zs:]; $b=[x$a]; $b{a} > A;",
1718         "[x[:Zs:]]{a} > A;",
1719
1720         RBT,
1721         "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"
1722         "$macron = \\u0304 ;"
1723         "$evowel = [aeiouyAEIOUY] ;"
1724         "$iotasub = \\u0345 ;"
1725         "($evowel $macron $accentMinus *) i > | $1 $iotasub ;",
1726         "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;",
1727
1728         RBT,
1729         "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1730         "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1731     };
1732     static const int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
1733
1734     for (int32_t d=0; d < DATA_length; d+=3) {
1735         if (DATA[d] == RBT) {
1736             // Transliterator test
1737             UParseError parseError;
1738             UErrorCode status = U_ZERO_ERROR;
1739             Transliterator *t = Transliterator::createFromRules("ID",
1740                                                                 UnicodeString(DATA[d+1], -1, US_INV), UTRANS_FORWARD, parseError, status);
1741             if (t == 0) {
1742                 dataerrln("FAIL: createFromRules failed - %s", u_errorName(status));
1743                 return;
1744             }
1745             UnicodeString rules, escapedRules;
1746             t->toRules(rules, FALSE);
1747             t->toRules(escapedRules, TRUE);
1748             UnicodeString expRules = CharsToUnicodeString(DATA[d+2]);
1749             UnicodeString expEscapedRules(DATA[d+2], -1, US_INV);
1750             if (rules == expRules) {
1751                 logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
1752                       " => " + rules);
1753             } else {
1754                 errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
1755                       " => " + rules + ", exp " + expRules);
1756             }
1757             if (escapedRules == expEscapedRules) {
1758                 logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
1759                       " => " + escapedRules);
1760             } else {
1761                 errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
1762                       " => " + escapedRules + ", exp " + expEscapedRules);
1763             }
1764             delete t;
1765
1766         } else {
1767             // UnicodeSet test
1768             UErrorCode status = U_ZERO_ERROR;
1769             UnicodeString pat(DATA[d+1], -1, US_INV);
1770             UnicodeString expToPat(DATA[d+2], -1, US_INV);
1771             UnicodeSet set(pat, status);
1772             if (U_FAILURE(status)) {
1773                 errln("FAIL: UnicodeSet ct failed");
1774                 return;
1775             }
1776             // Adjust spacing etc. as necessary.
1777             UnicodeString toPat;
1778             set.toPattern(toPat);
1779             if (expToPat == toPat) {
1780                 logln((UnicodeString)"Ok: " + pat +
1781                       " => " + toPat);
1782             } else {
1783                 errln((UnicodeString)"FAIL: " + pat +
1784                       " => " + prettify(toPat, TRUE) +
1785                       ", exp " + prettify(pat, TRUE));
1786             }
1787         }
1788     }
1789 }
1790
1791 void TransliteratorTest::TestContext() {
1792     UTransPosition pos = {0, 2, 0, 1}; // cs cl s l
1793     expect("de > x; {d}e > y;",
1794            "de",
1795            "ye",
1796            &pos);
1797
1798     expect("ab{c} > z;",
1799            "xadabdabcy",
1800            "xadabdabzy");
1801 }
1802
1803 void TransliteratorTest::TestSupplemental() {
1804
1805     expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];"
1806                                 "a > $a; $s > i;"),
1807            CharsToUnicodeString("ab\\U0001030Fx"),
1808            CharsToUnicodeString("\\U00010300bix"));
1809
1810     expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];"
1811                                 "$b=[A-Z\\U00010400-\\U0001044D];"
1812                                 "($a)($b) > $2 $1;"),
1813            CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"),
1814            CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301"));
1815
1816     // k|ax\\U00010300xm
1817
1818     // k|a\\U00010400\\U00010300xm
1819     // ky|\\U00010400\\U00010300xm
1820     // ky\\U00010400|\\U00010300xm
1821
1822     // ky\\U00010400|\\U00010300\\U00010400m
1823     // ky\\U00010400y|\\U00010400m
1824     expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];"
1825                                 "$a {x} > | @ \\U00010400;"
1826                                 "{$a} [^\\u0000-\\uFFFF] > y;"),
1827            CharsToUnicodeString("kax\\U00010300xm"),
1828            CharsToUnicodeString("ky\\U00010400y\\U00010400m"));
1829
1830     expectT("Any-Name",
1831            CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"),
1832            UNICODE_STRING_SIMPLE("\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}"));
1833
1834     expectT("Any-Hex/Unicode",
1835            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1836            UNICODE_STRING_SIMPLE("U+10330U+10FF00U+E0061U+00A0"));
1837
1838     expectT("Any-Hex/C",
1839            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1840            UNICODE_STRING_SIMPLE("\\U00010330\\U0010FF00\\U000E0061\\u00A0"));
1841
1842     expectT("Any-Hex/Perl",
1843            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1844            UNICODE_STRING_SIMPLE("\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}"));
1845
1846     expectT("Any-Hex/Java",
1847            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1848            UNICODE_STRING_SIMPLE("\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0"));
1849
1850     expectT("Any-Hex/XML",
1851            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1852            "&#x10330;&#x10FF00;&#xE0061;&#xA0;");
1853
1854     expectT("Any-Hex/XML10",
1855            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1856            "&#66352;&#1113856;&#917601;&#160;");
1857
1858     expectT(UNICODE_STRING_SIMPLE("[\\U000E0000-\\U000E0FFF] Remove"),
1859            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1860            CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0"));
1861 }
1862
1863 void TransliteratorTest::TestQuantifier() {
1864
1865     // Make sure @ in a quantified anteContext works
1866     expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';",
1867            "AAAAAb",
1868            "aaa(aac)");
1869
1870     // Make sure @ in a quantified postContext works
1871     expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';",
1872            "baaaaa",
1873            "caa(aaa)");
1874
1875     // Make sure @ in a quantified postContext with seg ref works
1876     expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';",
1877            "baaaaa",
1878            "baa(aaa)");
1879
1880     // Make sure @ past ante context doesn't enter ante context
1881     UTransPosition pos = {0, 5, 3, 5};
1882     expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';",
1883            "xxxab",
1884            "xxx(ac)",
1885            &pos);
1886
1887     // Make sure @ past post context doesn't pass limit
1888     UTransPosition pos2 = {0, 4, 0, 2};
1889     expect("{b} a+ > c @@ |; x > y; a > A;",
1890            "baxx",
1891            "caxx",
1892            &pos2);
1893
1894     // Make sure @ past post context doesn't enter post context
1895     expect("{b} a+ > c @@ |; x > y; a > A;",
1896            "baxx",
1897            "cayy");
1898
1899     expect("(ab)? c > d;",
1900            "c abc ababc",
1901            "d d abd");
1902
1903     // NOTE: The (ab)+ when referenced just yields a single "ab",
1904     // not the full sequence of them.  This accords with perl behavior.
1905     expect("(ab)+ {x} > '(' $1 ')';",
1906            "x abx ababxy",
1907            "x ab(ab) abab(ab)y");
1908
1909     expect("b+ > x;",
1910            "ac abc abbc abbbc",
1911            "ac axc axc axc");
1912
1913     expect("[abc]+ > x;",
1914            "qac abrc abbcs abtbbc",
1915            "qx xrx xs xtx");
1916
1917     expect("q{(ab)+} > x;",
1918            "qa qab qaba qababc qaba",
1919            "qa qx qxa qxc qxa");
1920
1921     expect("q(ab)* > x;",
1922            "qa qab qaba qababc",
1923            "xa x xa xc");
1924
1925     // NOTE: The (ab)+ when referenced just yields a single "ab",
1926     // not the full sequence of them.  This accords with perl behavior.
1927     expect("q(ab)* > '(' $1 ')';",
1928            "qa qab qaba qababc",
1929            "()a (ab) (ab)a (ab)c");
1930
1931     // 'foo'+ and 'foo'* -- the quantifier should apply to the entire
1932     // quoted string
1933     expect("'ab'+ > x;",
1934            "bb ab ababb",
1935            "bb x xb");
1936
1937     // $foo+ and $foo* -- the quantifier should apply to the entire
1938     // variable reference
1939     expect("$var = ab; $var+ > x;",
1940            "bb ab ababb",
1941            "bb x xb");
1942 }
1943
1944 class TestTrans : public Transliterator {
1945 public:
1946     TestTrans(const UnicodeString& id) : Transliterator(id, 0) {
1947     }
1948     virtual Transliterator* clone(void) const {
1949         return new TestTrans(getID());
1950     }
1951     virtual void handleTransliterate(Replaceable& /*text*/, UTransPosition& offsets,
1952         UBool /*isIncremental*/) const
1953     {
1954         offsets.start = offsets.limit;
1955     }
1956     virtual UClassID getDynamicClassID() const;
1957     static UClassID U_EXPORT2 getStaticClassID();
1958 };
1959 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TestTrans)
1960
1961 /**
1962  * Test Source-Target/Variant.
1963  */
1964 void TransliteratorTest::TestSTV(void) {
1965     int32_t ns = Transliterator::countAvailableSources();
1966     if (ns < 0 || ns > 255) {
1967         errln((UnicodeString)"FAIL: Bad source count: " + ns);
1968         return;
1969     }
1970     int32_t i, j;
1971     for (i=0; i<ns; ++i) {
1972         UnicodeString source;
1973         Transliterator::getAvailableSource(i, source);
1974         logln((UnicodeString)"" + i + ": " + source);
1975         if (source.length() == 0) {
1976             errln("FAIL: empty source");
1977             continue;
1978         }
1979         int32_t nt = Transliterator::countAvailableTargets(source);
1980         if (nt < 0 || nt > 255) {
1981             errln((UnicodeString)"FAIL: Bad target count: " + nt);
1982             continue;
1983         }
1984         for (int32_t j=0; j<nt; ++j) {
1985             UnicodeString target;
1986             Transliterator::getAvailableTarget(j, source, target);
1987             logln((UnicodeString)" " + j + ": " + target);
1988             if (target.length() == 0) {
1989                 errln("FAIL: empty target");
1990                 continue;
1991             }
1992             int32_t nv = Transliterator::countAvailableVariants(source, target);
1993             if (nv < 0 || nv > 255) {
1994                 errln((UnicodeString)"FAIL: Bad variant count: " + nv);
1995                 continue;
1996             }
1997             for (int32_t k=0; k<nv; ++k) {
1998                 UnicodeString variant;
1999                 Transliterator::getAvailableVariant(k, source, target, variant);
2000                 if (variant.length() == 0) {
2001                     logln((UnicodeString)"  " + k + ": <empty>");
2002                 } else {
2003                     logln((UnicodeString)"  " + k + ": " + variant);
2004                 }
2005             }
2006         }
2007     }
2008
2009     // Test registration
2010     const char* IDS[] = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2011     const char* FULL_IDS[] = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2012     const char* SOURCES[] = { NULL, "Seoridf", "Oewoir" };
2013     for (i=0; i<3; ++i) {
2014         Transliterator *t = new TestTrans(IDS[i]);
2015         if (t == 0) {
2016             errln("FAIL: out of memory");
2017             return;
2018         }
2019         if (t->getID() != IDS[i]) {
2020             errln((UnicodeString)"FAIL: ID mismatch for " + IDS[i]);
2021             delete t;
2022             return;
2023         }
2024         Transliterator::registerInstance(t);
2025         UErrorCode status = U_ZERO_ERROR;
2026         t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
2027         if (t == NULL) {
2028             errln((UnicodeString)"FAIL: Registration/creation failed for ID " +
2029                   IDS[i]);
2030         } else {
2031             logln((UnicodeString)"Ok: Registration/creation succeeded for ID " +
2032                   IDS[i]);
2033             delete t;
2034         }
2035         Transliterator::unregister(IDS[i]);
2036         t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
2037         if (t != NULL) {
2038             errln((UnicodeString)"FAIL: Unregistration failed for ID " +
2039                   IDS[i]);
2040             delete t;
2041         }
2042     }
2043
2044     // Make sure getAvailable API reflects removal
2045     int32_t n = Transliterator::countAvailableIDs();
2046     for (i=0; i<n; ++i) {
2047         UnicodeString id = Transliterator::getAvailableID(i);
2048         for (j=0; j<3; ++j) {
2049             if (id.caseCompare(FULL_IDS[j],0)==0) {
2050                 errln((UnicodeString)"FAIL: unregister(" + id + ") failed");
2051             }
2052         }
2053     }
2054     n = Transliterator::countAvailableTargets("Any");
2055     for (i=0; i<n; ++i) {
2056         UnicodeString t;
2057         Transliterator::getAvailableTarget(i, "Any", t);
2058         if (t.caseCompare(IDS[0],0)==0) {
2059             errln((UnicodeString)"FAIL: unregister(Any-" + t + ") failed");
2060         }
2061     }
2062     n = Transliterator::countAvailableSources();
2063     for (i=0; i<n; ++i) {
2064         UnicodeString s;
2065         Transliterator::getAvailableSource(i, s);
2066         for (j=0; j<3; ++j) {
2067             if (SOURCES[j] == NULL) continue;
2068             if (s.caseCompare(SOURCES[j],0)==0) {
2069                 errln((UnicodeString)"FAIL: unregister(" + s + "-*) failed");
2070             }
2071         }
2072     }
2073 }
2074
2075 /**
2076  * Test inverse of Greek-Latin; Title()
2077  */
2078 void TransliteratorTest::TestCompoundInverse(void) {
2079     UParseError parseError;
2080     UErrorCode status = U_ZERO_ERROR;
2081     Transliterator *t = Transliterator::createInstance
2082         ("Greek-Latin; Title()", UTRANS_REVERSE,parseError, status);
2083     if (t == 0) {
2084         dataerrln("FAIL: createInstance - %s", u_errorName(status));
2085         return;
2086     }
2087     UnicodeString exp("(Title);Latin-Greek");
2088     if (t->getID() == exp) {
2089         logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" +
2090               t->getID());
2091     } else {
2092         errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" +
2093               t->getID() + "\", expected \"" + exp + "\"");
2094     }
2095     delete t;
2096 }
2097
2098 /**
2099  * Test NFD chaining with RBT
2100  */
2101 void TransliteratorTest::TestNFDChainRBT() {
2102     UParseError pe;
2103     UErrorCode ec = U_ZERO_ERROR;
2104     Transliterator* t = Transliterator::createFromRules(
2105                                "TEST", "::NFD; aa > Q; a > q;",
2106                                UTRANS_FORWARD, pe, ec);
2107     if (t == NULL || U_FAILURE(ec)) {
2108         dataerrln("FAIL: Transliterator::createFromRules failed with %s", u_errorName(ec));
2109         return;
2110     }
2111     expect(*t, "aa", "Q");
2112     delete t;
2113
2114     // TEMPORARY TESTS -- BEING DEBUGGED
2115 //=-    UnicodeString s, s2;
2116 //=-    t = Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, pe, ec);
2117 //=-    s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2118 //=-    s2 = CharsToUnicodeString("\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D");
2119 //=-    expect(*t, s, s2);
2120 //=-    delete t;
2121 //=-
2122 //=-    t = Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2123 //=-    expect(*t, s2, s);
2124 //=-    delete t;
2125 //=-
2126 //=-    t = Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2127 //=-    s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2128 //=-    expect(*t, s, s);
2129 //=-    delete t;
2130
2131 //    const char* source[] = {
2132 //        /*
2133 //        "\\u015Br\\u012Bmad",
2134 //        "bhagavadg\\u012Bt\\u0101",
2135 //        "adhy\\u0101ya",
2136 //        "arjuna",
2137 //        "vi\\u1E63\\u0101da",
2138 //        "y\\u014Dga",
2139 //        "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2140 //        "uv\\u0101cr\\u0325",
2141 //        */
2142 //        "rmk\\u1E63\\u0113t",
2143 //      //"dharmak\\u1E63\\u0113tr\\u0113",
2144 //        /*
2145 //        "kuruk\\u1E63\\u0113tr\\u0113",
2146 //        "samav\\u0113t\\u0101",
2147 //        "yuyutsava-\\u1E25",
2148 //        "m\\u0101mak\\u0101-\\u1E25",
2149 //     // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2150 //        "kimakurvata",
2151 //        "san\\u0304java",
2152 //        */
2153 //
2154 //        0
2155 //    };
2156 //    const char* expected[] = {
2157 //        /*
2158 //        "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2159 //        "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2160 //        "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2161 //        "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2162 //        "\\u0935\\u093f\\u0937\\u093e\\u0926",
2163 //        "\\u092f\\u094b\\u0917",
2164 //        "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2165 //        "\\u0909\\u0935\\u093E\\u091A\\u0943",
2166 //        */
2167 //        "\\u0927",
2168 //        //"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2169 //        /*
2170 //        "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2171 //        "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2172 //        "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2173 //        "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2174 //    //  "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2175 //        "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2176 //        "\\u0938\\u0902\\u091c\\u0935",
2177 //        */
2178 //        0
2179 //    };
2180 //    UErrorCode status = U_ZERO_ERROR;
2181 //    UParseError parseError;
2182 //    UnicodeString message;
2183 //    Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2184 //    Transliterator* devToLatinToDev=Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2185 //    if(U_FAILURE(status)){
2186 //        errln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2187 //        errln("PreContext: " + prettify(parseError.preContext) + "PostContext: " + prettify( parseError.postContext) );
2188 //        delete latinToDevToLatin;
2189 //        delete devToLatinToDev;
2190 //        return;
2191 //    }
2192 //    UnicodeString gotResult;
2193 //    for(int i= 0; source[i] != 0; i++){
2194 //        gotResult = source[i];
2195 //        expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2196 //        expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2197 //    }
2198 //    delete latinToDevToLatin;
2199 //    delete devToLatinToDev;
2200 }
2201
2202 /**
2203  * Inverse of "Null" should be "Null". (J21)
2204  */
2205 void TransliteratorTest::TestNullInverse() {
2206     UParseError pe;
2207     UErrorCode ec = U_ZERO_ERROR;
2208     Transliterator *t = Transliterator::createInstance("Null", UTRANS_FORWARD, pe, ec);
2209     if (t == 0 || U_FAILURE(ec)) {
2210         errln("FAIL: createInstance");
2211         return;
2212     }
2213     Transliterator *u = t->createInverse(ec);
2214     if (u == 0 || U_FAILURE(ec)) {
2215         errln("FAIL: createInverse");
2216         delete t;
2217         return;
2218     }
2219     if (u->getID() != "Null") {
2220         errln("FAIL: Inverse of Null should be Null");
2221     }
2222     delete t;
2223     delete u;
2224 }
2225
2226 /**
2227  * Check ID of inverse of alias. (J22)
2228  */
2229 void TransliteratorTest::TestAliasInverseID() {
2230     UnicodeString ID("Latin-Hangul", ""); // This should be any alias ID with an inverse
2231     UParseError pe;
2232     UErrorCode ec = U_ZERO_ERROR;
2233     Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
2234     if (t == 0 || U_FAILURE(ec)) {
2235         dataerrln("FAIL: createInstance - %s", u_errorName(ec));
2236         return;
2237     }
2238     Transliterator *u = t->createInverse(ec);
2239     if (u == 0 || U_FAILURE(ec)) {
2240         errln("FAIL: createInverse");
2241         delete t;
2242         return;
2243     }
2244     UnicodeString exp = "Hangul-Latin";
2245     UnicodeString got = u->getID();
2246     if (got != exp) {
2247         errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
2248               ", expected " + exp);
2249     }
2250     delete t;
2251     delete u;
2252 }
2253
2254 /**
2255  * Test IDs of inverses of compound transliterators. (J20)
2256  */
2257 void TransliteratorTest::TestCompoundInverseID() {
2258     UnicodeString ID = "Latin-Jamo;NFC(NFD)";
2259     UParseError pe;
2260     UErrorCode ec = U_ZERO_ERROR;
2261     Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
2262     if (t == 0 || U_FAILURE(ec)) {
2263         dataerrln("FAIL: createInstance - %s", u_errorName(ec));
2264         return;
2265     }
2266     Transliterator *u = t->createInverse(ec);
2267     if (u == 0 || U_FAILURE(ec)) {
2268         errln("FAIL: createInverse");
2269         delete t;
2270         return;
2271     }
2272     UnicodeString exp = "NFD(NFC);Jamo-Latin";
2273     UnicodeString got = u->getID();
2274     if (got != exp) {
2275         errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
2276               ", expected " + exp);
2277     }
2278     delete t;
2279     delete u;
2280 }
2281
2282 /**
2283  * Test undefined variable.
2284
2285  */
2286 void TransliteratorTest::TestUndefinedVariable() {
2287     UnicodeString rule = "$initial } a <> \\u1161;";
2288     UParseError pe;
2289     UErrorCode ec = U_ZERO_ERROR;
2290     Transliterator *t = Transliterator::createFromRules("<ID>", rule, UTRANS_FORWARD, pe, ec);
2291     delete t;
2292     if (U_FAILURE(ec)) {
2293         logln((UnicodeString)"OK: Got exception for " + rule + ", as expected: " +
2294               u_errorName(ec));
2295         return;
2296     }
2297     errln((UnicodeString)"Fail: bogus rule " + rule + " compiled with error " +
2298           u_errorName(ec));
2299 }
2300
2301 /**
2302  * Test empty context.
2303  */
2304 void TransliteratorTest::TestEmptyContext() {
2305     expect(" { a } > b;", "xay a ", "xby b ");
2306 }
2307
2308 /**
2309 * Test compound filter ID syntax
2310 */
2311 void TransliteratorTest::TestCompoundFilterID(void) {
2312     static const char* DATA[] = {
2313         // Col. 1 = ID or rule set (latter must start with #)
2314
2315         // = columns > 1 are null if expect col. 1 to be illegal =
2316
2317         // Col. 2 = direction, "F..." or "R..."
2318         // Col. 3 = source string
2319         // Col. 4 = exp result
2320
2321         "[abc]; [abc]", NULL, NULL, NULL, // multiple filters
2322         "Latin-Greek; [abc];", NULL, NULL, NULL, // misplaced filter
2323         "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\\u0392c",
2324         "[b]; (Lower); Latin-Greek; Upper(); ([\\u0392])", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2325         "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\\u0392c",
2326         "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\\u0392]);", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2327         NULL,
2328     };
2329
2330     for (int32_t i=0; DATA[i]; i+=4) {
2331         UnicodeString id = CharsToUnicodeString(DATA[i]);
2332         UTransDirection direction = (DATA[i+1] != NULL && DATA[i+1][0] == 'R') ?
2333             UTRANS_REVERSE : UTRANS_FORWARD;
2334         UnicodeString source;
2335         UnicodeString exp;
2336         if (DATA[i+2] != NULL) {
2337             source = CharsToUnicodeString(DATA[i+2]);
2338             exp = CharsToUnicodeString(DATA[i+3]);
2339         }
2340         UBool expOk = (DATA[i+1] != NULL);
2341         Transliterator* t = NULL;
2342         UParseError pe;
2343         UErrorCode ec = U_ZERO_ERROR;
2344         if (id.charAt(0) == 0x23/*#*/) {
2345             t = Transliterator::createFromRules("ID", id, direction, pe, ec);
2346         } else {
2347             t = Transliterator::createInstance(id, direction, pe, ec);
2348         }
2349         UBool ok = (t != NULL && U_SUCCESS(ec));
2350         UnicodeString transID;
2351         if (t!=0) {
2352             transID = t->getID();
2353         }
2354         else {
2355             transID = UnicodeString("NULL", "");
2356         }
2357         if (ok == expOk) {
2358             logln((UnicodeString)"Ok: " + id + " => " + transID + ", " +
2359                   u_errorName(ec));
2360             if (source.length() != 0) {
2361                 expect(*t, source, exp);
2362             }
2363             delete t;
2364         } else {
2365             dataerrln((UnicodeString)"FAIL: " + id + " => " + transID + ", " +
2366                   u_errorName(ec));
2367         }
2368     }
2369 }
2370
2371 /**
2372  * Test new property set syntax
2373  */
2374 void TransliteratorTest::TestPropertySet() {
2375     expect(UNICODE_STRING_SIMPLE("a>A; \\p{Lu}>x; \\p{ANY}>y;"), "abcDEF", "Ayyxxx");
2376     expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9",
2377            "[ a stitch ]\n[ in time ]\r[ saves 9]");
2378 }
2379
2380 /**
2381  * Test various failure points of the new 2.0 engine.
2382  */
2383 void TransliteratorTest::TestNewEngine() {
2384     UParseError pe;
2385     UErrorCode ec = U_ZERO_ERROR;
2386     Transliterator *t = Transliterator::createInstance("Latin-Hiragana", UTRANS_FORWARD, pe, ec);
2387     if (t == 0 || U_FAILURE(ec)) {
2388         dataerrln("FAIL: createInstance Latin-Hiragana - %s", u_errorName(ec));
2389         return;
2390     }
2391     // Katakana should be untouched
2392     expect(*t, CharsToUnicodeString("a\\u3042\\u30A2"),
2393            CharsToUnicodeString("\\u3042\\u3042\\u30A2"));
2394
2395     delete t;
2396
2397 #if 1
2398     // This test will only work if Transliterator.ROLLBACK is
2399     // true.  Otherwise, this test will fail, revealing a
2400     // limitation of global filters in incremental mode.
2401     Transliterator *a =
2402         Transliterator::createFromRules("a_to_A", "a > A;", UTRANS_FORWARD, pe, ec);
2403     Transliterator *A =
2404         Transliterator::createFromRules("A_to_b", "A > b;", UTRANS_FORWARD, pe, ec);
2405     if (U_FAILURE(ec)) {
2406         delete a;
2407         delete A;
2408         return;
2409     }
2410
2411     Transliterator* array[3];
2412     array[0] = a;
2413     array[1] = Transliterator::createInstance("NFD", UTRANS_FORWARD, pe, ec);
2414     array[2] = A;
2415     if (U_FAILURE(ec)) {
2416         errln("FAIL: createInstance NFD");
2417         delete a;
2418         delete A;
2419         delete array[1];
2420         return;
2421     }
2422
2423     t = new CompoundTransliterator(array, 3, new UnicodeSet("[:Ll:]", ec));
2424     if (U_FAILURE(ec)) {
2425         errln("FAIL: UnicodeSet constructor");
2426         delete a;
2427         delete A;
2428         delete array[1];
2429         delete t;
2430         return;
2431     }
2432
2433     expect(*t, "aAaA", "bAbA");
2434
2435     assertTrue("countElements", t->countElements() == 3);
2436     assertEquals("getElement(0)", t->getElement(0, ec).getID(), "a_to_A");
2437     assertEquals("getElement(1)", t->getElement(1, ec).getID(), "NFD");
2438     assertEquals("getElement(2)", t->getElement(2, ec).getID(), "A_to_b");
2439     assertSuccess("getElement", ec);
2440
2441     delete a;
2442     delete A;
2443     delete array[1];
2444     delete t;
2445 #endif
2446
2447     expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;",
2448            "a",
2449            "ax");
2450
2451     UnicodeString gr = CharsToUnicodeString(
2452         "$ddot = \\u0308 ;"
2453         "$lcgvowel = [\\u03b1\\u03b5\\u03b7\\u03b9\\u03bf\\u03c5\\u03c9] ;"
2454         "$rough = \\u0314 ;"
2455         "($lcgvowel+ $ddot?) $rough > h | $1 ;"
2456         "\\u03b1 <> a ;"
2457         "$rough <> h ;");
2458
2459     expect(gr, CharsToUnicodeString("\\u03B1\\u0314"), "ha");
2460 }
2461
2462 /**
2463  * Test quantified segment behavior.  We want:
2464  * ([abc])+ > x $1 x; applied to "cba" produces "xax"
2465  */
2466 void TransliteratorTest::TestQuantifiedSegment(void) {
2467     // The normal case
2468     expect("([abc]+) > x $1 x;", "cba", "xcbax");
2469
2470     // The tricky case; the quantifier is around the segment
2471     expect("([abc])+ > x $1 x;", "cba", "xax");
2472
2473     // Tricky case in reverse direction
2474     expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax");
2475
2476     // Check post-context segment
2477     expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba");
2478
2479     // Test toRule/toPattern for non-quantified segment.
2480     // Careful with spacing here.
2481     UnicodeString r("([a-c]){q} > x $1 x;");
2482     UParseError pe;
2483     UErrorCode ec = U_ZERO_ERROR;
2484     Transliterator* t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
2485     if (U_FAILURE(ec)) {
2486         errln("FAIL: createFromRules");
2487         delete t;
2488         return;
2489     }
2490     UnicodeString rr;
2491     t->toRules(rr, TRUE);
2492     if (r != rr) {
2493         errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
2494     } else {
2495         logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
2496     }
2497     delete t;
2498
2499     // Test toRule/toPattern for quantified segment.
2500     // Careful with spacing here.
2501     r = "([a-c])+{q} > x $1 x;";
2502     t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
2503     if (U_FAILURE(ec)) {
2504         errln("FAIL: createFromRules");
2505         delete t;
2506         return;
2507     }
2508     t->toRules(rr, TRUE);
2509     if (r != rr) {
2510         errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
2511     } else {
2512         logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
2513     }
2514     delete t;
2515 }
2516
2517 //======================================================================
2518 // Ram's tests
2519 //======================================================================
2520 void TransliteratorTest::TestDevanagariLatinRT(){
2521     const int MAX_LEN= 52;
2522     const char* const source[MAX_LEN] = {
2523         "bh\\u0101rata",
2524         "kra",
2525         "k\\u1E63a",
2526         "khra",
2527         "gra",
2528         "\\u1E45ra",
2529         "cra",
2530         "chra",
2531         "j\\u00F1a",
2532         "jhra",
2533         "\\u00F1ra",
2534         "\\u1E6Dya",
2535         "\\u1E6Dhra",
2536         "\\u1E0Dya",
2537       //"r\\u0323ya", // \u095c is not valid in Devanagari
2538         "\\u1E0Dhya",
2539         "\\u1E5Bhra",
2540         "\\u1E47ra",
2541         "tta",
2542         "thra",
2543         "dda",
2544         "dhra",
2545         "nna",
2546         "pra",
2547         "phra",
2548         "bra",
2549         "bhra",
2550         "mra",
2551         "\\u1E49ra",
2552       //"l\\u0331ra",
2553         "yra",
2554         "\\u1E8Fra",
2555       //"l-",
2556         "vra",
2557         "\\u015Bra",
2558         "\\u1E63ra",
2559         "sra",
2560         "hma",
2561         "\\u1E6D\\u1E6Da",
2562         "\\u1E6D\\u1E6Dha",
2563         "\\u1E6Dh\\u1E6Dha",
2564         "\\u1E0D\\u1E0Da",
2565         "\\u1E0D\\u1E0Dha",
2566         "\\u1E6Dya",
2567         "\\u1E6Dhya",
2568         "\\u1E0Dya",
2569         "\\u1E0Dhya",
2570         // Not roundtrippable --
2571         // \\u0939\\u094d\\u094d\\u092E  - hma
2572         // \\u0939\\u094d\\u092E         - hma
2573         // CharsToUnicodeString("hma"),
2574         "hya",
2575         "\\u015Br\\u0325",
2576         "\\u015Bca",
2577         "\\u0115",
2578         "san\\u0304j\\u012Bb s\\u0113nagupta",
2579         "\\u0101nand vaddir\\u0101ju",
2580         "\\u0101",
2581         "a"
2582     };
2583     const char* const expected[MAX_LEN] = {
2584         "\\u092D\\u093E\\u0930\\u0924",   /* bha\\u0304rata */
2585         "\\u0915\\u094D\\u0930",          /* kra         */
2586         "\\u0915\\u094D\\u0937",          /* ks\\u0323a  */
2587         "\\u0916\\u094D\\u0930",          /* khra        */
2588         "\\u0917\\u094D\\u0930",          /* gra         */
2589         "\\u0919\\u094D\\u0930",          /* n\\u0307ra  */
2590         "\\u091A\\u094D\\u0930",          /* cra         */
2591         "\\u091B\\u094D\\u0930",          /* chra        */
2592         "\\u091C\\u094D\\u091E",          /* jn\\u0303a  */
2593         "\\u091D\\u094D\\u0930",          /* jhra        */
2594         "\\u091E\\u094D\\u0930",          /* n\\u0303ra  */
2595         "\\u091F\\u094D\\u092F",          /* t\\u0323ya  */
2596         "\\u0920\\u094D\\u0930",          /* t\\u0323hra */
2597         "\\u0921\\u094D\\u092F",          /* d\\u0323ya  */
2598       //"\\u095C\\u094D\\u092F",        /* r\\u0323ya  */ // \u095c is not valid in Devanagari
2599         "\\u0922\\u094D\\u092F",          /* d\\u0323hya */
2600         "\\u0922\\u093C\\u094D\\u0930",   /* r\\u0323hra */
2601         "\\u0923\\u094D\\u0930",          /* n\\u0323ra  */
2602         "\\u0924\\u094D\\u0924",          /* tta         */
2603         "\\u0925\\u094D\\u0930",          /* thra        */
2604         "\\u0926\\u094D\\u0926",          /* dda         */
2605         "\\u0927\\u094D\\u0930",          /* dhra        */
2606         "\\u0928\\u094D\\u0928",          /* nna         */
2607         "\\u092A\\u094D\\u0930",          /* pra         */
2608         "\\u092B\\u094D\\u0930",          /* phra        */
2609         "\\u092C\\u094D\\u0930",          /* bra         */
2610         "\\u092D\\u094D\\u0930",          /* bhra        */
2611         "\\u092E\\u094D\\u0930",          /* mra         */
2612         "\\u0929\\u094D\\u0930",          /* n\\u0331ra  */
2613       //"\\u0934\\u094D\\u0930",        /* l\\u0331ra  */
2614         "\\u092F\\u094D\\u0930",          /* yra         */
2615         "\\u092F\\u093C\\u094D\\u0930",   /* y\\u0307ra  */
2616       //"l-",
2617         "\\u0935\\u094D\\u0930",          /* vra         */
2618         "\\u0936\\u094D\\u0930",          /* s\\u0301ra  */
2619         "\\u0937\\u094D\\u0930",          /* s\\u0323ra  */
2620         "\\u0938\\u094D\\u0930",          /* sra         */
2621         "\\u0939\\u094d\\u092E",          /* hma         */
2622         "\\u091F\\u094D\\u091F",          /* t\\u0323t\\u0323a  */
2623         "\\u091F\\u094D\\u0920",          /* t\\u0323t\\u0323ha */
2624         "\\u0920\\u094D\\u0920",          /* t\\u0323ht\\u0323ha*/
2625         "\\u0921\\u094D\\u0921",          /* d\\u0323d\\u0323a  */
2626         "\\u0921\\u094D\\u0922",          /* d\\u0323d\\u0323ha */
2627         "\\u091F\\u094D\\u092F",          /* t\\u0323ya  */
2628         "\\u0920\\u094D\\u092F",          /* t\\u0323hya */
2629         "\\u0921\\u094D\\u092F",          /* d\\u0323ya  */
2630         "\\u0922\\u094D\\u092F",          /* d\\u0323hya */
2631      // "hma",                         /* hma         */
2632         "\\u0939\\u094D\\u092F",          /* hya         */
2633         "\\u0936\\u0943",                 /* s\\u0301r\\u0325a  */
2634         "\\u0936\\u094D\\u091A",          /* s\\u0301ca  */
2635         "\\u090d",                        /* e\\u0306    */
2636         "\\u0938\\u0902\\u091C\\u0940\\u092C\\u094D \\u0938\\u0947\\u0928\\u0917\\u0941\\u092A\\u094D\\u0924",
2637         "\\u0906\\u0928\\u0902\\u0926\\u094D \\u0935\\u0926\\u094D\\u0926\\u093F\\u0930\\u093E\\u091C\\u0941",
2638         "\\u0906",
2639         "\\u0905",
2640     };
2641     UErrorCode status = U_ZERO_ERROR;
2642     UParseError parseError;
2643     UnicodeString message;
2644     Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2645     Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2646     if(U_FAILURE(status)){
2647         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2648         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2649         return;
2650     }
2651     UnicodeString gotResult;
2652     for(int i= 0; i<MAX_LEN; i++){
2653         gotResult = source[i];
2654         expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2655         expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2656     }
2657     delete latinToDev;
2658     delete devToLatin;
2659 }
2660
2661 void TransliteratorTest::TestTeluguLatinRT(){
2662     const int MAX_LEN=10;
2663     const char* const source[MAX_LEN] = {
2664         "raghur\\u0101m vi\\u015Bvan\\u0101dha",                         /* Raghuram Viswanadha    */
2665         "\\u0101nand vaddir\\u0101ju",                                   /* Anand Vaddiraju        */
2666         "r\\u0101j\\u012Bv ka\\u015Barab\\u0101da",                      /* Rajeev Kasarabada      */
2667         "san\\u0304j\\u012Bv ka\\u015Barab\\u0101da",                    /* sanjeev kasarabada     */
2668         "san\\u0304j\\u012Bb sen'gupta",                                 /* sanjib sengupata       */
2669         "amar\\u0113ndra hanum\\u0101nula",                              /* Amarendra hanumanula   */
2670         "ravi kum\\u0101r vi\\u015Bvan\\u0101dha",                       /* Ravi Kumar Viswanadha  */
2671         "\\u0101ditya kandr\\u0113gula",                                 /* Aditya Kandregula      */
2672         "\\u015Br\\u012Bdhar ka\\u1E47\\u1E6Dama\\u015Be\\u1E6D\\u1E6Di",/* Shridhar Kantamsetty   */
2673         "m\\u0101dhav de\\u015Be\\u1E6D\\u1E6Di"                         /* Madhav Desetty         */
2674     };
2675
2676     const char* const expected[MAX_LEN] = {
2677         "\\u0c30\\u0c18\\u0c41\\u0c30\\u0c3e\\u0c2e\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2678         "\\u0c06\\u0c28\\u0c02\\u0c26\\u0c4d \\u0C35\\u0C26\\u0C4D\\u0C26\\u0C3F\\u0C30\\u0C3E\\u0C1C\\u0C41",
2679         "\\u0c30\\u0c3e\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2680         "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2681         "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c2c\\u0c4d \\u0c38\\u0c46\\u0c28\\u0c4d\\u0c17\\u0c41\\u0c2a\\u0c4d\\u0c24",
2682         "\\u0c05\\u0c2e\\u0c30\\u0c47\\u0c02\\u0c26\\u0c4d\\u0c30 \\u0c39\\u0c28\\u0c41\\u0c2e\\u0c3e\\u0c28\\u0c41\\u0c32",
2683         "\\u0c30\\u0c35\\u0c3f \\u0c15\\u0c41\\u0c2e\\u0c3e\\u0c30\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2684         "\\u0c06\\u0c26\\u0c3f\\u0c24\\u0c4d\\u0c2f \\u0C15\\u0C02\\u0C26\\u0C4D\\u0C30\\u0C47\\u0C17\\u0C41\\u0c32",
2685         "\\u0c36\\u0c4d\\u0c30\\u0c40\\u0C27\\u0C30\\u0C4D \\u0c15\\u0c02\\u0c1f\\u0c2e\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2686         "\\u0c2e\\u0c3e\\u0c27\\u0c35\\u0c4d \\u0c26\\u0c46\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2687     };
2688
2689     UErrorCode status = U_ZERO_ERROR;
2690     UParseError parseError;
2691     UnicodeString message;
2692     Transliterator* latinToDev=Transliterator::createInstance("Latin-Telugu", UTRANS_FORWARD, parseError, status);
2693     Transliterator* devToLatin=Transliterator::createInstance("Telugu-Latin", UTRANS_FORWARD, parseError, status);
2694     if(U_FAILURE(status)){
2695         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2696         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2697         return;
2698     }
2699     UnicodeString gotResult;
2700     for(int i= 0; i<MAX_LEN; i++){
2701         gotResult = source[i];
2702         expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2703         expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2704     }
2705     delete latinToDev;
2706     delete devToLatin;
2707 }
2708
2709 void TransliteratorTest::TestSanskritLatinRT(){
2710     const int MAX_LEN =16;
2711     const char* const source[MAX_LEN] = {
2712         "rmk\\u1E63\\u0113t",
2713         "\\u015Br\\u012Bmad",
2714         "bhagavadg\\u012Bt\\u0101",
2715         "adhy\\u0101ya",
2716         "arjuna",
2717         "vi\\u1E63\\u0101da",
2718         "y\\u014Dga",
2719         "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2720         "uv\\u0101cr\\u0325",
2721         "dharmak\\u1E63\\u0113tr\\u0113",
2722         "kuruk\\u1E63\\u0113tr\\u0113",
2723         "samav\\u0113t\\u0101",
2724         "yuyutsava\\u1E25",
2725         "m\\u0101mak\\u0101\\u1E25",
2726     // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2727         "kimakurvata",
2728         "san\\u0304java",
2729     };
2730     const char* const expected[MAX_LEN] = {
2731         "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2732         "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2733         "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2734         "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2735         "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2736         "\\u0935\\u093f\\u0937\\u093e\\u0926",
2737         "\\u092f\\u094b\\u0917",
2738         "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2739         "\\u0909\\u0935\\u093E\\u091A\\u0943",
2740         "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2741         "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2742         "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2743         "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2744         "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2745     //"\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2746         "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2747         "\\u0938\\u0902\\u091c\\u0935",
2748     };
2749     UErrorCode status = U_ZERO_ERROR;
2750     UParseError parseError;
2751     UnicodeString message;
2752     Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2753     Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2754     if(U_FAILURE(status)){
2755         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2756         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2757         return;
2758     }
2759     UnicodeString gotResult;
2760     for(int i= 0; i<MAX_LEN; i++){
2761         gotResult = source[i];
2762         expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2763         expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2764     }
2765     delete latinToDev;
2766     delete devToLatin;
2767 }
2768
2769
2770 void TransliteratorTest::TestCompoundLatinRT(){
2771     const char* const source[] = {
2772         "rmk\\u1E63\\u0113t",
2773         "\\u015Br\\u012Bmad",
2774         "bhagavadg\\u012Bt\\u0101",
2775         "adhy\\u0101ya",
2776         "arjuna",
2777         "vi\\u1E63\\u0101da",
2778         "y\\u014Dga",
2779         "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2780         "uv\\u0101cr\\u0325",
2781         "dharmak\\u1E63\\u0113tr\\u0113",
2782         "kuruk\\u1E63\\u0113tr\\u0113",
2783         "samav\\u0113t\\u0101",
2784         "yuyutsava\\u1E25",
2785         "m\\u0101mak\\u0101\\u1E25",
2786      // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2787         "kimakurvata",
2788         "san\\u0304java"
2789     };
2790     const int MAX_LEN = sizeof(source)/sizeof(source[0]);
2791     const char* const expected[MAX_LEN] = {
2792         "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2793         "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2794         "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2795         "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2796         "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2797         "\\u0935\\u093f\\u0937\\u093e\\u0926",
2798         "\\u092f\\u094b\\u0917",
2799         "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2800         "\\u0909\\u0935\\u093E\\u091A\\u0943",
2801         "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2802         "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2803         "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2804         "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2805         "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2806     //  "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2807         "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2808         "\\u0938\\u0902\\u091c\\u0935"
2809     };
2810     if(MAX_LEN != sizeof(expected)/sizeof(expected[0])) {
2811         errln("error in TestCompoundLatinRT: source[] and expected[] have different lengths!");
2812         return;
2813     }
2814
2815     UErrorCode status = U_ZERO_ERROR;
2816     UParseError parseError;
2817     UnicodeString message;
2818     Transliterator* devToLatinToDev  =Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2819     Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2820     Transliterator* devToTelToDev    =Transliterator::createInstance("Devanagari-Telugu;Telugu-Devanagari", UTRANS_FORWARD, parseError, status);
2821     Transliterator* latinToTelToLatin=Transliterator::createInstance("Latin-Telugu;Telugu-Latin", UTRANS_FORWARD, parseError, status);
2822
2823     if(U_FAILURE(status)){
2824         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2825         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2826         return;
2827     }
2828     UnicodeString gotResult;
2829     for(int i= 0; i<MAX_LEN; i++){
2830         gotResult = source[i];
2831         expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2832         expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2833         expect(*latinToTelToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2834
2835     }
2836     delete(latinToDevToLatin);
2837     delete(devToLatinToDev);
2838     delete(devToTelToDev);
2839     delete(latinToTelToLatin);
2840 }
2841
2842 /**
2843  * Test Gurmukhi-Devanagari Tippi and Bindi
2844  */
2845 void TransliteratorTest::TestGurmukhiDevanagari(){
2846     // the rule says:
2847     // (\u0902) (when preceded by vowel)      --->  (\u0A02)
2848     // (\u0902) (when preceded by consonant)  --->  (\u0A70)
2849     UErrorCode status = U_ZERO_ERROR;
2850     UnicodeSet vowel(UnicodeString("[\\u0905-\\u090A \\u090F\\u0910\\u0913\\u0914 \\u093e-\\u0942\\u0947\\u0948\\u094B\\u094C\\u094D]", -1, US_INV).unescape(), status);
2851     UnicodeSet non_vowel(UnicodeString("[\\u0915-\\u0928\\u092A-\\u0930]", -1, US_INV).unescape(), status);
2852     UParseError parseError;
2853
2854     UnicodeSetIterator vIter(vowel);
2855     UnicodeSetIterator nvIter(non_vowel);
2856     Transliterator* trans = Transliterator::createInstance("Devanagari-Gurmukhi",UTRANS_FORWARD, parseError, status);
2857     if(U_FAILURE(status)) {
2858       dataerrln("Error creating transliterator %s", u_errorName(status));
2859       delete trans;
2860       return;
2861     }
2862     UnicodeString src (" \\u0902", -1, US_INV);
2863     UnicodeString expected(" \\u0A02", -1, US_INV);
2864     src = src.unescape();
2865     expected= expected.unescape();
2866
2867     while(vIter.next()){
2868         src.setCharAt(0,(UChar) vIter.getCodepoint());
2869         expected.setCharAt(0,(UChar) (vIter.getCodepoint()+0x0100));
2870         expect(*trans,src,expected);
2871     }
2872
2873     expected.setCharAt(1,0x0A70);
2874     while(nvIter.next()){
2875         //src.setCharAt(0,(char) nvIter.codepoint);
2876         src.setCharAt(0,(UChar)nvIter.getCodepoint());
2877         expected.setCharAt(0,(UChar) (nvIter.getCodepoint()+0x0100));
2878         expect(*trans,src,expected);
2879     }
2880     delete trans;
2881 }
2882 /**
2883  * Test instantiation from a locale.
2884  */
2885 void TransliteratorTest::TestLocaleInstantiation(void) {
2886     UParseError pe;
2887     UErrorCode ec = U_ZERO_ERROR;
2888     Transliterator *t = Transliterator::createInstance("ru_RU-Latin", UTRANS_FORWARD, pe, ec);
2889     if (U_FAILURE(ec)) {
2890         dataerrln("FAIL: createInstance(ru_RU-Latin) - %s", u_errorName(ec));
2891         delete t;
2892         return;
2893     }
2894     expect(*t, CharsToUnicodeString("\\u0430"), "a");
2895     delete t;
2896
2897     t = Transliterator::createInstance("en-el", UTRANS_FORWARD, pe, ec);
2898     if (U_FAILURE(ec)) {
2899         errln("FAIL: createInstance(en-el)");
2900         delete t;
2901         return;
2902     }
2903     expect(*t, "a", CharsToUnicodeString("\\u03B1"));
2904     delete t;
2905 }
2906
2907 /**
2908  * Test title case handling of accent (should ignore accents)
2909  */
2910 void TransliteratorTest::TestTitleAccents(void) {
2911     UParseError pe;
2912     UErrorCode ec = U_ZERO_ERROR;
2913     Transliterator *t = Transliterator::createInstance("Title", UTRANS_FORWARD, pe, ec);
2914     if (U_FAILURE(ec)) {
2915         errln("FAIL: createInstance(Title)");
2916         delete t;
2917         return;
2918     }
2919     expect(*t, CharsToUnicodeString("a\\u0300b can't abe"), CharsToUnicodeString("A\\u0300b Can't Abe"));
2920     delete t;
2921 }
2922
2923 /**
2924  * Basic test of a locale resource based rule.
2925  */
2926 void TransliteratorTest::TestLocaleResource() {
2927     const char* DATA[] = {
2928         // id                    from               to
2929         //"Latin-Greek/UNGEGN",    "b",               "\\u03bc\\u03c0",
2930         "Latin-el",              "b",               "\\u03bc\\u03c0",
2931         "Latin-Greek",           "b",               "\\u03B2",
2932         "Greek-Latin/UNGEGN",    "\\u03B2",         "v",
2933         "el-Latin",              "\\u03B2",         "v",
2934         "Greek-Latin",           "\\u03B2",         "b",
2935     };
2936     const int32_t DATA_length = sizeof(DATA) / sizeof(DATA[0]);
2937     for (int32_t i=0; i<DATA_length; i+=3) {
2938         UParseError pe;
2939         UErrorCode ec = U_ZERO_ERROR;
2940         Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, pe, ec);
2941         if (U_FAILURE(ec)) {
2942             dataerrln((UnicodeString)"FAIL: createInstance(" + DATA[i] + ") - " + u_errorName(ec));
2943             delete t;
2944             continue;
2945         }
2946         expect(*t, CharsToUnicodeString(DATA[i+1]),
2947                CharsToUnicodeString(DATA[i+2]));
2948         delete t;
2949     }
2950 }
2951
2952 /**
2953  * Make sure parse errors reference the right line.
2954  */
2955 void TransliteratorTest::TestParseError() {
2956     static const char* rule =
2957         "a > b;\n"
2958         "# more stuff\n"
2959         "d << b;";
2960     UErrorCode ec = U_ZERO_ERROR;
2961     UParseError pe;
2962     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
2963     delete t;
2964     if (U_FAILURE(ec)) {
2965         UnicodeString err(pe.preContext);
2966         err.append((UChar)124/*|*/).append(pe.postContext);
2967         if (err.indexOf("d << b") >= 0) {
2968             logln("Ok: " + err);
2969         } else {
2970             errln("FAIL: " + err);
2971         }
2972     }
2973     else {
2974         errln("FAIL: no syntax error");
2975     }
2976     static const char* maskingRule =
2977         "a>x;\n"
2978         "# more stuff\n"
2979         "ab>y;";
2980     ec = U_ZERO_ERROR;
2981     delete Transliterator::createFromRules("ID", maskingRule, UTRANS_FORWARD, pe, ec);
2982     if (ec != U_RULE_MASK_ERROR) {
2983         errln("FAIL: returned %s instead of U_RULE_MASK_ERROR", u_errorName(ec));
2984     }
2985     else if (UnicodeString("a > x;") != UnicodeString(pe.preContext)) {
2986         errln("FAIL: did not get expected precontext");
2987     }
2988     else if (UnicodeString("ab > y;") != UnicodeString(pe.postContext)) {
2989         errln("FAIL: did not get expected postcontext");
2990     }
2991 }
2992
2993 /**
2994  * Make sure sets on output are disallowed.
2995  */
2996 void TransliteratorTest::TestOutputSet() {
2997     UnicodeString rule = "$set = [a-cm-n]; b > $set;";
2998     UErrorCode ec = U_ZERO_ERROR;
2999     UParseError pe;
3000     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3001     delete t;
3002     if (U_FAILURE(ec)) {
3003         UnicodeString err(pe.preContext);
3004         err.append((UChar)124/*|*/).append(pe.postContext);
3005         logln("Ok: " + err);
3006         return;
3007     }
3008     errln("FAIL: No syntax error");
3009 }
3010
3011 /**
3012  * Test the use variable range pragma, making sure that use of
3013  * variable range characters is detected and flagged as an error.
3014  */
3015 void TransliteratorTest::TestVariableRange() {
3016     UnicodeString rule = "use variable range 0x70 0x72; a > A; b > B; q > Q;";
3017     UErrorCode ec = U_ZERO_ERROR;
3018     UParseError pe;
3019     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3020     delete t;
3021     if (U_FAILURE(ec)) {
3022         UnicodeString err(pe.preContext);
3023         err.append((UChar)124/*|*/).append(pe.postContext);
3024         logln("Ok: " + err);
3025         return;
3026     }
3027     errln("FAIL: No syntax error");
3028 }
3029
3030 /**
3031  * Test invalid post context error handling
3032  */
3033 void TransliteratorTest::TestInvalidPostContext() {
3034     UnicodeString rule = "a}b{c>d;";
3035     UErrorCode ec = U_ZERO_ERROR;
3036     UParseError pe;
3037     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3038     delete t;
3039     if (U_FAILURE(ec)) {
3040         UnicodeString err(pe.preContext);
3041         err.append((UChar)124/*|*/).append(pe.postContext);
3042         if (err.indexOf("a}b{c") >= 0) {
3043             logln("Ok: " + err);
3044         } else {
3045             errln("FAIL: " + err);
3046         }
3047         return;
3048     }
3049     errln("FAIL: No syntax error");
3050 }
3051
3052 /**
3053  * Test ID form variants
3054  */
3055 void TransliteratorTest::TestIDForms() {
3056     const char* DATA[] = {
3057         "NFC", NULL, "NFD",
3058         "nfd", NULL, "NFC", // make sure case is ignored
3059         "Any-NFKD", NULL, "Any-NFKC",
3060         "Null", NULL, "Null",
3061         "-nfkc", "nfkc", "NFKD",
3062         "-nfkc/", "nfkc", "NFKD",
3063         "Latin-Greek/UNGEGN", NULL, "Greek-Latin/UNGEGN",
3064         "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN",
3065         "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali",
3066         "Source-", NULL, NULL,
3067         "Source/Variant-", NULL, NULL,
3068         "Source-/Variant", NULL, NULL,
3069         "/Variant", NULL, NULL,
3070         "/Variant-", NULL, NULL,
3071         "-/Variant", NULL, NULL,
3072         "-/", NULL, NULL,
3073         "-", NULL, NULL,
3074         "/", NULL, NULL,
3075     };
3076     const int32_t DATA_length = sizeof(DATA)/sizeof(DATA[0]);
3077
3078     for (int32_t i=0; i<DATA_length; i+=3) {
3079         const char* ID = DATA[i];
3080         const char* expID = DATA[i+1];
3081         const char* expInvID = DATA[i+2];
3082         UBool expValid = (expInvID != NULL);
3083         if (expID == NULL) {
3084             expID = ID;
3085         }
3086         UParseError pe;
3087         UErrorCode ec = U_ZERO_ERROR;
3088         Transliterator *t =
3089             Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
3090         if (U_FAILURE(ec)) {
3091             if (!expValid) {
3092                 logln((UnicodeString)"Ok: getInstance(" + ID +") => " + u_errorName(ec));
3093             } else {
3094                 dataerrln((UnicodeString)"FAIL: Couldn't create " + ID + " - " + u_errorName(ec));
3095             }
3096             delete t;
3097             continue;
3098         }
3099         Transliterator *u = t->createInverse(ec);
3100         if (U_FAILURE(ec)) {
3101             errln((UnicodeString)"FAIL: Couldn't create inverse of " + ID);
3102             delete t;
3103             delete u;
3104             continue;
3105         }
3106         if (t->getID() == expID &&
3107             u->getID() == expInvID) {
3108             logln((UnicodeString)"Ok: " + ID + ".getInverse() => " + expInvID);
3109         } else {
3110             errln((UnicodeString)"FAIL: getInstance(" + ID + ") => " +
3111                   t->getID() + " x getInverse() => " + u->getID() +
3112                   ", expected " + expInvID);
3113         }
3114         delete t;
3115         delete u;
3116     }
3117 }
3118
3119 static const UChar SPACE[]   = {32,0};
3120 static const UChar NEWLINE[] = {10,0};
3121 static const UChar RETURN[]  = {13,0};
3122 static const UChar EMPTY[]   = {0};
3123
3124 void TransliteratorTest::checkRules(const UnicodeString& label, Transliterator& t2,
3125                                     const UnicodeString& testRulesForward) {
3126     UnicodeString rules2; t2.toRules(rules2, TRUE);
3127     //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
3128     rules2.findAndReplace(SPACE, EMPTY);
3129     rules2.findAndReplace(NEWLINE, EMPTY);
3130     rules2.findAndReplace(RETURN, EMPTY);
3131
3132     UnicodeString testRules(testRulesForward); testRules.findAndReplace(SPACE, EMPTY);
3133
3134     if (rules2 != testRules) {
3135         errln(label);
3136         logln((UnicodeString)"GENERATED RULES: " + rules2);
3137         logln((UnicodeString)"SHOULD BE:       " + testRulesForward);
3138     }
3139 }
3140
3141 /**
3142  * Mark's toRules test.
3143  */
3144 void TransliteratorTest::TestToRulesMark() {
3145     const char* testRules =
3146         "::[[:Latin:][:Mark:]];"
3147         "::NFKD (NFC);"
3148         "::Lower (Lower);"
3149         "a <> \\u03B1;" // alpha
3150         "::NFKC (NFD);"
3151         "::Upper (Lower);"
3152         "::Lower ();"
3153         "::([[:Greek:][:Mark:]]);"
3154         ;
3155     const char* testRulesForward =
3156         "::[[:Latin:][:Mark:]];"
3157         "::NFKD(NFC);"
3158         "::Lower(Lower);"
3159         "a > \\u03B1;"
3160         "::NFKC(NFD);"
3161         "::Upper (Lower);"
3162         "::Lower ();"
3163         ;
3164     const char* testRulesBackward =
3165         "::[[:Greek:][:Mark:]];"
3166         "::Lower (Upper);"
3167         "::NFD(NFKC);"
3168         "\\u03B1 > a;"
3169         "::Lower(Lower);"
3170         "::NFC(NFKD);"
3171         ;
3172     UnicodeString source = CharsToUnicodeString("\\u00E1"); // a-acute
3173     UnicodeString target = CharsToUnicodeString("\\u03AC"); // alpha-acute
3174
3175     UParseError pe;
3176     UErrorCode ec = U_ZERO_ERROR;
3177     Transliterator *t2 = Transliterator::createFromRules("source-target", UnicodeString(testRules, -1, US_INV), UTRANS_FORWARD, pe, ec);
3178     Transliterator *t3 = Transliterator::createFromRules("target-source", UnicodeString(testRules, -1, US_INV), UTRANS_REVERSE, pe, ec);
3179
3180     if (U_FAILURE(ec)) {
3181         delete t2;
3182         delete t3;
3183         dataerrln((UnicodeString)"FAIL: createFromRules => " + u_errorName(ec));
3184         return;
3185     }
3186
3187     expect(*t2, source, target);
3188     expect(*t3, target, source);
3189
3190     checkRules("Failed toRules FORWARD", *t2, UnicodeString(testRulesForward, -1, US_INV));
3191     checkRules("Failed toRules BACKWARD", *t3, UnicodeString(testRulesBackward, -1, US_INV));
3192
3193     delete t2;
3194     delete t3;
3195 }
3196
3197 /**
3198  * Test Escape and Unescape transliterators.
3199  */
3200 void TransliteratorTest::TestEscape() {
3201     UParseError pe;
3202     UErrorCode ec;
3203     Transliterator *t;
3204
3205     ec = U_ZERO_ERROR;
3206     t = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, pe, ec);
3207     if (U_FAILURE(ec)) {
3208         errln((UnicodeString)"FAIL: createInstance");
3209     } else {
3210         expect(*t,
3211                UNICODE_STRING_SIMPLE("\\x{40}\\U00000031&#x32;&#81;"),
3212                "@12Q");
3213     }
3214     delete t;
3215
3216     ec = U_ZERO_ERROR;
3217     t = Transliterator::createInstance("Any-Hex/C", UTRANS_FORWARD, pe, ec);
3218     if (U_FAILURE(ec)) {
3219         errln((UnicodeString)"FAIL: createInstance");
3220     } else {
3221         expect(*t,
3222                CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3223                UNICODE_STRING_SIMPLE("\\u0041\\U0010BEEF\\uFEED"));
3224     }
3225     delete t;
3226
3227     ec = U_ZERO_ERROR;
3228     t = Transliterator::createInstance("Any-Hex/Java", UTRANS_FORWARD, pe, ec);
3229     if (U_FAILURE(ec)) {
3230         errln((UnicodeString)"FAIL: createInstance");
3231     } else {
3232         expect(*t,
3233                CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3234                UNICODE_STRING_SIMPLE("\\u0041\\uDBEF\\uDEEF\\uFEED"));
3235     }
3236     delete t;
3237
3238     ec = U_ZERO_ERROR;
3239     t = Transliterator::createInstance("Any-Hex/Perl", UTRANS_FORWARD, pe, ec);
3240     if (U_FAILURE(ec)) {
3241         errln((UnicodeString)"FAIL: createInstance");
3242     } else {
3243         expect(*t,
3244                CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3245                UNICODE_STRING_SIMPLE("\\x{41}\\x{10BEEF}\\x{FEED}"));
3246     }
3247     delete t;
3248 }
3249
3250
3251 void TransliteratorTest::TestAnchorMasking(){
3252     UnicodeString rule ("^a > Q; a > q;");
3253     UErrorCode status= U_ZERO_ERROR;
3254     UParseError parseError;
3255
3256     Transliterator* t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD,parseError,status);
3257     if(U_FAILURE(status)){
3258         errln(UnicodeString("FAIL: ") + "ID" +
3259               ".createFromRules() => bad rules" +
3260               /*", parse error " + parseError.code +*/
3261               ", line " + parseError.line +
3262               ", offset " + parseError.offset +
3263               ", context " + prettify(parseError.preContext, TRUE) +
3264               ", rules: " + prettify(rule, TRUE));
3265     }
3266     delete t;
3267 }
3268
3269 /**
3270  * Make sure display names of variants look reasonable.
3271  */
3272 void TransliteratorTest::TestDisplayName() {
3273 #if UCONFIG_NO_FORMATTING
3274     logln("Skipping, UCONFIG_NO_FORMATTING is set\n");
3275     return;
3276 #else
3277     static const char* DATA[] = {
3278         // ID, forward name, reverse name
3279         // Update the text as necessary -- the important thing is
3280         // not the text itself, but how various cases are handled.
3281
3282         // Basic test
3283         "Any-Hex", "Any to Hex Escape", "Hex Escape to Any",
3284
3285         // Variants
3286         "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl",
3287
3288         // Target-only IDs
3289         "NFC", "Any to NFC", "Any to NFD",
3290     };
3291
3292     int32_t DATA_length = sizeof(DATA) / sizeof(DATA[0]);
3293
3294     Locale US("en", "US");
3295
3296     for (int32_t i=0; i<DATA_length; i+=3) {
3297         UnicodeString name;
3298         Transliterator::getDisplayName(DATA[i], US, name);
3299         if (name != DATA[i+1]) {
3300             dataerrln((UnicodeString)"FAIL: " + DATA[i] + ".getDisplayName() => " +
3301                   name + ", expected " + DATA[i+1]);
3302         } else {
3303             logln((UnicodeString)"Ok: " + DATA[i] + ".getDisplayName() => " + name);
3304         }
3305         UErrorCode ec = U_ZERO_ERROR;
3306         UParseError pe;
3307         Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_REVERSE, pe, ec);
3308         if (U_FAILURE(ec)) {
3309             delete t;
3310             dataerrln("FAIL: createInstance failed - %s", u_errorName(ec));
3311             continue;
3312         }
3313         name = Transliterator::getDisplayName(t->getID(), US, name);
3314         if (name != DATA[i+2]) {
3315             dataerrln((UnicodeString)"FAIL: " + t->getID() + ".getDisplayName() => " +
3316                   name + ", expected " + DATA[i+2]);
3317         } else {
3318             logln((UnicodeString)"Ok: " + t->getID() + ".getDisplayName() => " + name);
3319         }
3320         delete t;
3321     }
3322 #endif
3323 }
3324
3325 void TransliteratorTest::TestSpecialCases(void) {
3326     const UnicodeString registerRules[] = {
3327         "Any-Dev1", "x > X; y > Y;",
3328         "Any-Dev2", "XY > Z",
3329         "Greek-Latin/FAKE",
3330             CharsToUnicodeString
3331             ("[^[:L:][:M:]] { \\u03bc\\u03c0 > b ; \\u03bc\\u03c0 } [^[:L:][:M:]] > b ; [^[:L:][:M:]] { [\\u039c\\u03bc][\\u03a0\\u03c0] > B ; [\\u039c\\u03bc][\\u03a0\\u03c0] } [^[:L:][:M:]] > B ;"),
3332         "" // END MARKER
3333     };
3334
3335     const UnicodeString testCases[] = {
3336         // NORMALIZATION
3337         // should add more test cases
3338         "NFD" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3339         "NFC" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3340         "NFKD", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3341         "NFKC", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3342
3343         // mp -> b BUG
3344         "Greek-Latin/UNGEGN", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3345         "Greek-Latin/FAKE", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3346
3347         // check for devanagari bug
3348         "nfd;Dev1;Dev2;nfc", "xy", "Z",
3349
3350         // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE
3351         "Title", CharsToUnicodeString("ab'cD ffi\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3352                  CharsToUnicodeString("Ab'cd Ffi\\u0131ii\\u0307 \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3353
3354         //TODO: enable this test once Titlecase works right
3355         /*
3356         "Title", CharsToUnicodeString("\\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3357                  CharsToUnicodeString("Ffi\\u0131ii \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3358                  */
3359         "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3360                  CharsToUnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 ") + DESERET_DEE + DESERET_DEE,
3361         "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3362                  CharsToUnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 ") + DESERET_dee + DESERET_dee,
3363
3364         "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
3365         "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
3366
3367          // FORMS OF S
3368         "Greek-Latin/UNGEGN",  CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3369                                CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3370         "Latin-Greek/UNGEGN",  CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3371                                CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3") ,
3372         "Greek-Latin",  CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3373                         CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3374         "Latin-Greek",  CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3375                         CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3376         // Tatiana bug
3377         // Upper: TAT\\u02B9\\u00C2NA
3378         // Lower: tat\\u02B9\\u00E2na
3379         // Title: Tat\\u02B9\\u00E2na
3380         "Upper", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3381                  CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3382         "Lower", CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3383                  CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3384         "Title", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3385                  CharsToUnicodeString("Tat\\u02B9\\u00E2na"),
3386
3387         "" // END MARKER
3388     };
3389
3390     UParseError pos;
3391     int32_t i;
3392     for (i = 0; registerRules[i].length()!=0; i+=2) {
3393         UErrorCode status = U_ZERO_ERROR;
3394
3395         Transliterator *t = Transliterator::createFromRules(registerRules[0+i],
3396             registerRules[i+1], UTRANS_FORWARD, pos, status);
3397         if (U_FAILURE(status)) {
3398             dataerrln("Fails: Unable to create the transliterator from rules. - %s", u_errorName(status));
3399         } else {
3400             Transliterator::registerInstance(t);
3401         }
3402     }
3403     for (i = 0; testCases[i].length()!=0; i+=3) {
3404         UErrorCode ec = U_ZERO_ERROR;
3405         UParseError pe;
3406         const UnicodeString& name = testCases[i];
3407         Transliterator *t = Transliterator::createInstance(name, UTRANS_FORWARD, pe, ec);
3408         if (U_FAILURE(ec)) {
3409             dataerrln((UnicodeString)"FAIL: Couldn't create " + name + " - " + u_errorName(ec));
3410             delete t;
3411             continue;
3412         }
3413         const UnicodeString& id = t->getID();
3414         const UnicodeString& source = testCases[i+1];
3415         UnicodeString target;
3416
3417         // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe)
3418
3419         if (testCases[i+2].length() > 0) {
3420             target = testCases[i+2];
3421         } else if (0==id.caseCompare("NFD", U_FOLD_CASE_DEFAULT)) {
3422             Normalizer::normalize(source, UNORM_NFD, 0, target, ec);
3423         } else if (0==id.caseCompare("NFC", U_FOLD_CASE_DEFAULT)) {
3424             Normalizer::normalize(source, UNORM_NFC, 0, target, ec);
3425         } else if (0==id.caseCompare("NFKD", U_FOLD_CASE_DEFAULT)) {
3426             Normalizer::normalize(source, UNORM_NFKD, 0, target, ec);
3427         } else if (0==id.caseCompare("NFKC", U_FOLD_CASE_DEFAULT)) {
3428             Normalizer::normalize(source, UNORM_NFKC, 0, target, ec);
3429         } else if (0==id.caseCompare("Lower", U_FOLD_CASE_DEFAULT)) {
3430             target = source;
3431             target.toLower(Locale::getUS());
3432         } else if (0==id.caseCompare("Upper", U_FOLD_CASE_DEFAULT)) {
3433             target = source;
3434             target.toUpper(Locale::getUS());
3435         }
3436         if (U_FAILURE(ec)) {
3437             errln((UnicodeString)"FAIL: Internal error normalizing " + source);
3438             continue;
3439         }
3440
3441         expect(*t, source, target);
3442         delete t;
3443     }
3444     for (i = 0; registerRules[i].length()!=0; i+=2) {
3445         Transliterator::unregister(registerRules[i]);
3446     }
3447 }
3448
3449 char* Char32ToEscapedChars(UChar32 ch, char* buffer) {
3450     if (ch <= 0xFFFF) {
3451         sprintf(buffer, "\\u%04x", (int)ch);
3452     } else {
3453         sprintf(buffer, "\\U%08x", (int)ch);
3454     }
3455     return buffer;
3456 }
3457
3458 void TransliteratorTest::TestSurrogateCasing (void) {
3459     // check that casing handles surrogates
3460     // titlecase is currently defective
3461     char buffer[20];
3462     UChar buffer2[20];
3463     UChar32 dee;
3464     U16_GET(DESERET_dee,0, 0, DESERET_dee.length(), dee);
3465     UnicodeString DEE(u_totitle(dee));
3466     if (DEE != DESERET_DEE) {
3467         err("Fails titlecase of surrogates");
3468         err(Char32ToEscapedChars(dee, buffer));
3469         err(", ");
3470         errln(Char32ToEscapedChars(DEE.char32At(0), buffer));
3471     }
3472
3473     UnicodeString deeDEETest=DESERET_dee + DESERET_DEE;
3474     UnicodeString deedeeTest = DESERET_dee + DESERET_dee;
3475     UnicodeString DEEDEETest = DESERET_DEE + DESERET_DEE;
3476     UErrorCode status= U_ZERO_ERROR;
3477
3478     u_strToUpper(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
3479     if (U_FAILURE(status) || (UnicodeString(buffer2)!= DEEDEETest)) {
3480         errln("Fails: Can't uppercase surrogates.");
3481     }
3482
3483     status= U_ZERO_ERROR;
3484     u_strToLower(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
3485     if (U_FAILURE(status) || (UnicodeString(buffer2)!= deedeeTest)) {
3486         errln("Fails: Can't lowercase surrogates.");
3487     }
3488 }
3489
3490 static void _trans(Transliterator& t, const UnicodeString& src,
3491                    UnicodeString& result) {
3492     result = src;
3493     t.transliterate(result);
3494 }
3495
3496 static void _trans(const UnicodeString& id, const UnicodeString& src,
3497                    UnicodeString& result, UErrorCode ec) {
3498     UParseError pe;
3499     Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
3500     if (U_SUCCESS(ec)) {
3501         _trans(*t, src, result);
3502     }
3503     delete t;
3504 }
3505
3506 static UnicodeString _findMatch(const UnicodeString& source,
3507                                        const UnicodeString* pairs) {
3508     UnicodeString empty;
3509     for (int32_t i=0; pairs[i].length() > 0; i+=2) {
3510         if (0==source.caseCompare(pairs[i], U_FOLD_CASE_DEFAULT)) {
3511             return pairs[i+1];
3512         }
3513     }
3514     return empty;
3515 }
3516
3517 // Check to see that incremental gets at least part way through a reasonable string.
3518
3519 void TransliteratorTest::TestIncrementalProgress(void) {
3520     UErrorCode ec = U_ZERO_ERROR;
3521     UnicodeString latinTest = "The Quick Brown Fox.";
3522     UnicodeString devaTest;
3523     _trans("Latin-Devanagari", latinTest, devaTest, ec);
3524     UnicodeString kataTest;
3525     _trans("Latin-Katakana", latinTest, kataTest, ec);
3526     if (U_FAILURE(ec)) {
3527         errln("FAIL: Internal error");
3528         return;
3529     }
3530     const UnicodeString tests[] = {
3531         "Any", latinTest,
3532         "Latin", latinTest,
3533         "Halfwidth", latinTest,
3534         "Devanagari", devaTest,
3535         "Katakana", kataTest,
3536         "" // END MARKER
3537     };
3538
3539     UnicodeString test("The Quick Brown Fox Jumped Over The Lazy Dog.");
3540     int32_t i = 0, j=0, k=0;
3541     int32_t sources = Transliterator::countAvailableSources();
3542     for (i = 0; i < sources; i++) {
3543         UnicodeString source;
3544         Transliterator::getAvailableSource(i, source);
3545         UnicodeString test = _findMatch(source, tests);
3546         if (test.length() == 0) {
3547             logln((UnicodeString)"Skipping " + source + "-X");
3548             continue;
3549         }
3550         int32_t targets = Transliterator::countAvailableTargets(source);
3551         for (j = 0; j < targets; j++) {
3552             UnicodeString target;
3553             Transliterator::getAvailableTarget(j, source, target);
3554             int32_t variants = Transliterator::countAvailableVariants(source, target);
3555             for (k =0; k< variants; k++) {
3556                 UnicodeString variant;
3557                 UParseError err;
3558                 UErrorCode status = U_ZERO_ERROR;
3559
3560                 Transliterator::getAvailableVariant(k, source, target, variant);
3561                 UnicodeString id = source + "-" + target + "/" + variant;
3562
3563                 Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, err, status);
3564                 if (U_FAILURE(status)) {
3565                     dataerrln((UnicodeString)"FAIL: Could not create " + id);
3566                     delete t;
3567                     continue;
3568                 }
3569                 status = U_ZERO_ERROR;
3570                 CheckIncrementalAux(t, test);
3571
3572                 UnicodeString rev;
3573                 _trans(*t, test, rev);
3574                 Transliterator *inv = t->createInverse(status);
3575                 if (U_FAILURE(status)) {
3576 #if UCONFIG_NO_BREAK_ITERATION
3577                     // If UCONFIG_NO_BREAK_ITERATION is on, then only Thai should fail.
3578                     if (id.compare((UnicodeString)"Latin-Thai/") != 0)
3579 #endif
3580                         errln((UnicodeString)"FAIL: Could not create inverse of " + id);
3581
3582                     delete t;
3583                     delete inv;
3584                     continue;
3585                 }
3586                 CheckIncrementalAux(inv, rev);
3587                 delete t;
3588                 delete inv;
3589             }
3590         }
3591     }
3592 }
3593
3594 void TransliteratorTest::CheckIncrementalAux(const Transliterator* t,
3595                                                       const UnicodeString& input) {
3596     UErrorCode ec = U_ZERO_ERROR;
3597     UTransPosition pos;
3598     UnicodeString test = input;
3599
3600     pos.contextStart = 0;
3601     pos.contextLimit = input.length();
3602     pos.start = 0;
3603     pos.limit = input.length();
3604
3605     t->transliterate(test, pos, ec);
3606     if (U_FAILURE(ec)) {
3607         errln((UnicodeString)"FAIL: transliterate() error " + u_errorName(ec));
3608         return;
3609     }
3610     UBool gotError = FALSE;
3611
3612     // we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X?
3613
3614     if (pos.start == 0 && pos.limit != 0 && t->getID() != "Hex-Any/Unicode") {
3615         errln((UnicodeString)"No Progress, " +
3616               t->getID() + ": " + formatInput(test, input, pos));
3617         gotError = TRUE;
3618     } else {
3619         logln((UnicodeString)"PASS Progress, " +
3620               t->getID() + ": " + formatInput(test, input, pos));
3621     }
3622     t->finishTransliteration(test, pos);
3623     if (pos.start != pos.limit) {
3624         errln((UnicodeString)"Incomplete, " +
3625               t->getID() + ": " + formatInput(test, input, pos));
3626         gotError = TRUE;
3627     }
3628 }
3629
3630 void TransliteratorTest::TestFunction() {
3631     // Careful with spacing and ';' here:  Phrase this exactly
3632     // as toRules() is going to return it.  If toRules() changes
3633     // with regard to spacing or ';', then adjust this string.
3634     UnicodeString rule =
3635         "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';";
3636
3637     UParseError pe;
3638     UErrorCode ec = U_ZERO_ERROR;
3639     Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3640     if (t == NULL) {
3641         dataerrln("FAIL: createFromRules failed - %s", u_errorName(ec));
3642         return;
3643     }
3644
3645     UnicodeString r;
3646     t->toRules(r, TRUE);
3647     if (r == rule) {
3648         logln((UnicodeString)"OK: toRules() => " + r);
3649     } else {
3650         errln((UnicodeString)"FAIL: toRules() => " + r +
3651               ", expected " + rule);
3652     }
3653
3654     expect(*t, "The Quick Brown Fox",
3655            UNICODE_STRING_SIMPLE("T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox"));
3656
3657     delete t;
3658 }
3659
3660 void TransliteratorTest::TestInvalidBackRef(void) {
3661     UnicodeString rule =  ". > $1;";
3662     UnicodeString rule2 =CharsToUnicodeString("(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\\u0020;");
3663     UParseError pe;
3664     UErrorCode ec = U_ZERO_ERROR;
3665     Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3666     Transliterator *t2 = Transliterator::createFromRules("Test2", rule2, UTRANS_FORWARD, pe, ec);
3667
3668     if (t != NULL) {
3669         errln("FAIL: createFromRules should have returned NULL");
3670         delete t;
3671     }
3672
3673     if (t2 != NULL) {
3674         errln("FAIL: createFromRules should have returned NULL");
3675         delete t2;
3676     }
3677
3678     if (U_SUCCESS(ec)) {
3679         errln("FAIL: Ok: . > $1; => no error");
3680     } else {
3681         logln((UnicodeString)"Ok: . > $1; => " + u_errorName(ec));
3682     }
3683 }
3684
3685 void TransliteratorTest::TestMulticharStringSet() {
3686     // Basic testing
3687     const char* rule =
3688         "       [{aa}]       > x;"
3689         "         a          > y;"
3690         "       [b{bc}]      > z;"
3691         "[{gd}] { e          > q;"
3692         "         e } [{fg}] > r;" ;
3693
3694     UParseError pe;
3695     UErrorCode ec = U_ZERO_ERROR;
3696     Transliterator* t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3697     if (t == NULL || U_FAILURE(ec)) {
3698         delete t;
3699         errln("FAIL: createFromRules failed");
3700         return;
3701     }
3702
3703     expect(*t, "a aa ab bc d gd de gde gdefg ddefg",
3704            "y x yz z d gd de gdq gdqfg ddrfg");
3705     delete t;
3706
3707     // Overlapped string test.  Make sure that when multiple
3708     // strings can match that the longest one is matched.
3709     rule =
3710         "    [a {ab} {abc}]    > x;"
3711         "           b          > y;"
3712         "           c          > z;"
3713         " q [t {st} {rst}] { e > p;" ;
3714
3715     t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3716     if (t == NULL || U_FAILURE(ec)) {
3717         delete t;
3718         errln("FAIL: createFromRules failed");
3719         return;
3720     }
3721
3722     expect(*t, "a ab abc qte qste qrste",
3723            "x x x qtp qstp qrstp");
3724     delete t;
3725 }
3726
3727 // vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
3728 // BEGIN TestUserFunction support factory
3729
3730 Transliterator* _TUFF[4];
3731 UnicodeString* _TUFID[4];
3732
3733 static Transliterator* U_EXPORT2 _TUFFactory(const UnicodeString& /*ID*/,
3734                                    Transliterator::Token context) {
3735     return _TUFF[context.integer]->clone();
3736 }
3737
3738 static void _TUFReg(const UnicodeString& ID, Transliterator* t, int32_t n) {
3739     _TUFF[n] = t;
3740     _TUFID[n] = new UnicodeString(ID);
3741     Transliterator::registerFactory(ID, _TUFFactory, Transliterator::integerToken(n));
3742 }
3743
3744 static void _TUFUnreg(int32_t n) {
3745     if (_TUFF[n] != NULL) {
3746         Transliterator::unregister(*_TUFID[n]);
3747         delete _TUFF[n];
3748         delete _TUFID[n];
3749     }
3750 }
3751
3752 // END TestUserFunction support factory
3753 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
3754
3755 /**
3756  * Test that user-registered transliterators can be used under function
3757  * syntax.
3758  */
3759 void TransliteratorTest::TestUserFunction() {
3760
3761     Transliterator* t;
3762     UParseError pe;
3763     UErrorCode ec = U_ZERO_ERROR;
3764
3765     // Setup our factory
3766     int32_t i;
3767     for (i=0; i<4; ++i) {
3768         _TUFF[i] = NULL;
3769     }
3770
3771     // There's no need to register inverses if we don't use them
3772     t = Transliterator::createFromRules("gif",
3773                                         UNICODE_STRING_SIMPLE("'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';"),
3774                                         UTRANS_FORWARD, pe, ec);
3775     if (t == NULL || U_FAILURE(ec)) {
3776         dataerrln((UnicodeString)"FAIL: createFromRules gif " + u_errorName(ec));
3777         return;
3778     }
3779     _TUFReg("Any-gif", t, 0);
3780
3781     t = Transliterator::createFromRules("RemoveCurly",
3782                                         UNICODE_STRING_SIMPLE("[\\{\\}] > ; '\\N' > ;"),
3783                                         UTRANS_FORWARD, pe, ec);
3784     if (t == NULL || U_FAILURE(ec)) {
3785         errln((UnicodeString)"FAIL: createFromRules RemoveCurly " + u_errorName(ec));
3786         goto FAIL;
3787     }
3788     expect(*t, UNICODE_STRING_SIMPLE("\\N{name}"), "name");
3789     _TUFReg("Any-RemoveCurly", t, 1);
3790
3791     logln("Trying &hex");
3792     t = Transliterator::createFromRules("hex2",
3793                                         "(.) > &hex($1);",
3794                                         UTRANS_FORWARD, pe, ec);
3795     if (t == NULL || U_FAILURE(ec)) {
3796         errln("FAIL: createFromRules");
3797         goto FAIL;
3798     }
3799     logln("Registering");
3800     _TUFReg("Any-hex2", t, 2);
3801     t = Transliterator::createInstance("Any-hex2", UTRANS_FORWARD, ec);
3802     if (t == NULL || U_FAILURE(ec)) {
3803         errln((UnicodeString)"FAIL: createInstance Any-hex2 " + u_errorName(ec));
3804         goto FAIL;
3805     }
3806     expect(*t, "abc", UNICODE_STRING_SIMPLE("\\u0061\\u0062\\u0063"));
3807     delete t;
3808
3809     logln("Trying &gif");
3810     t = Transliterator::createFromRules("gif2",
3811                                         "(.) > &Gif(&Hex2($1));",
3812                                         UTRANS_FORWARD, pe, ec);
3813     if (t == NULL || U_FAILURE(ec)) {
3814         errln((UnicodeString)"FAIL: createFromRules gif2 " + u_errorName(ec));
3815         goto FAIL;
3816     }
3817     logln("Registering");
3818     _TUFReg("Any-gif2", t, 3);
3819     t = Transliterator::createInstance("Any-gif2", UTRANS_FORWARD, ec);
3820     if (t == NULL || U_FAILURE(ec)) {
3821         errln((UnicodeString)"FAIL: createInstance Any-gif2 " + u_errorName(ec));
3822         goto FAIL;
3823     }
3824     expect(*t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">"
3825            "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">");
3826     delete t;
3827
3828     // Test that filters are allowed after &
3829     t = Transliterator::createFromRules("test",
3830                                         "(.) > &Hex($1) ' ' &RemoveCurly(&Name($1)) ' ';",
3831                                         UTRANS_FORWARD, pe, ec);
3832     if (t == NULL || U_FAILURE(ec)) {
3833         errln((UnicodeString)"FAIL: createFromRules test " + u_errorName(ec));
3834         goto FAIL;
3835     }
3836     expect(*t, "abc",
3837            UNICODE_STRING_SIMPLE("\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C "));
3838     delete t;
3839
3840  FAIL:
3841     for (i=0; i<4; ++i) {
3842         _TUFUnreg(i);
3843     }
3844 }
3845
3846 /**
3847  * Test the Any-X transliterators.
3848  */
3849 void TransliteratorTest::TestAnyX(void) {
3850     UParseError parseError;
3851     UErrorCode status = U_ZERO_ERROR;
3852     Transliterator* anyLatin =
3853         Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
3854     if (anyLatin==0) {
3855         dataerrln("FAIL: createInstance returned NULL - %s", u_errorName(status));
3856         delete anyLatin;
3857         return;
3858     }
3859
3860     expect(*anyLatin,
3861            CharsToUnicodeString("greek:\\u03B1\\u03B2\\u03BA\\u0391\\u0392\\u039A hiragana:\\u3042\\u3076\\u304F cyrillic:\\u0430\\u0431\\u0446"),
3862            CharsToUnicodeString("greek:abkABK hiragana:abuku cyrillic:abc"));
3863
3864     delete anyLatin;
3865 }
3866
3867 /**
3868  * Test Any-X transliterators with sample letters from all scripts.
3869  */
3870 void TransliteratorTest::TestAny(void) {
3871     UErrorCode status = U_ZERO_ERROR;
3872     // Note: there is a lot of implict construction of UnicodeStrings from (char *) in
3873     //       function call parameters going on in this test.
3874     UnicodeSet alphabetic("[:alphabetic:]", status);
3875     if (U_FAILURE(status)) {
3876         dataerrln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3877         return;
3878     }
3879     alphabetic.freeze();
3880
3881     UnicodeString testString;
3882     for (int32_t i = 0; i < USCRIPT_CODE_LIMIT; i++) {
3883         const char *scriptName = uscript_getShortName((UScriptCode)i);
3884         if (scriptName == NULL) {
3885             errln("Failure: file %s, line %d: Script Code %d is invalid, ", __FILE__, __LINE__, i);
3886             return;
3887         }
3888
3889         UnicodeSet sample;
3890         sample.applyPropertyAlias("script", scriptName, status);
3891         if (U_FAILURE(status)) {
3892             errln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3893             return;
3894         }
3895         sample.retainAll(alphabetic);
3896         for (int32_t count=0; count<5; count++) {
3897             UChar32 c = sample.charAt(count);
3898             if (c == -1) {
3899                 break;
3900             }
3901             testString.append(c);
3902         }
3903     }
3904
3905     UParseError parseError;
3906     Transliterator* anyLatin =
3907         Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
3908     if (U_FAILURE(status)) {
3909         dataerrln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3910         return;
3911     }
3912
3913     logln(UnicodeString("Sample set for Any-Latin: ") + testString);
3914     anyLatin->transliterate(testString);
3915     logln(UnicodeString("Sample result for Any-Latin: ") + testString);
3916     delete anyLatin;
3917 }
3918
3919
3920 /**
3921  * Test the source and target set API.  These are only implemented
3922  * for RBT and CompoundTransliterator at this time.
3923  */
3924 void TransliteratorTest::TestSourceTargetSet() {
3925     UErrorCode ec = U_ZERO_ERROR;
3926
3927     // Rules
3928     const char* r =
3929         "a > b; "
3930         "r [x{lu}] > q;";
3931
3932     // Expected source
3933     UnicodeSet expSrc("[arx{lu}]", ec);
3934
3935     // Expected target
3936     UnicodeSet expTrg("[bq]", ec);
3937
3938     UParseError pe;
3939     Transliterator* t = Transliterator::createFromRules("test", r, UTRANS_FORWARD, pe, ec);
3940
3941     if (U_FAILURE(ec)) {
3942         delete t;
3943         errln("FAIL: Couldn't set up test");
3944         return;
3945     }
3946
3947     UnicodeSet src; t->getSourceSet(src);
3948     UnicodeSet trg; t->getTargetSet(trg);
3949
3950     if (src == expSrc && trg == expTrg) {
3951         UnicodeString a, b;
3952         logln((UnicodeString)"Ok: " +
3953               r + " => source = " + src.toPattern(a, TRUE) +
3954               ", target = " + trg.toPattern(b, TRUE));
3955     } else {
3956         UnicodeString a, b, c, d;
3957         errln((UnicodeString)"FAIL: " +
3958               r + " => source = " + src.toPattern(a, TRUE) +
3959               ", expected " + expSrc.toPattern(b, TRUE) +
3960               "; target = " + trg.toPattern(c, TRUE) +
3961               ", expected " + expTrg.toPattern(d, TRUE));
3962     }
3963
3964     delete t;
3965 }
3966
3967 /**
3968  * Test handling of Pattern_White_Space, for both RBT and UnicodeSet.
3969  */
3970 void TransliteratorTest::TestPatternWhiteSpace() {
3971     // Rules
3972     const char* r = "a > \\u200E b;";
3973
3974     UErrorCode ec = U_ZERO_ERROR;
3975     UParseError pe;
3976     Transliterator* t = Transliterator::createFromRules("test", CharsToUnicodeString(r), UTRANS_FORWARD, pe, ec);
3977
3978     if (U_FAILURE(ec)) {
3979         errln("FAIL: Couldn't set up test");
3980     } else {
3981         expect(*t, "a", "b");
3982     }
3983     delete t;
3984
3985     // UnicodeSet
3986     ec = U_ZERO_ERROR;
3987     UnicodeSet set(CharsToUnicodeString("[a \\u200E]"), ec);
3988
3989     if (U_FAILURE(ec)) {
3990         errln("FAIL: Couldn't set up test");
3991     } else {
3992         if (set.contains(0x200E)) {
3993             errln("FAIL: U+200E not being ignored by UnicodeSet");
3994         }
3995     }
3996 }
3997 //======================================================================
3998 // this method is in TestUScript.java
3999 //======================================================================
4000 void TransliteratorTest::TestAllCodepoints(){
4001     UScriptCode code= USCRIPT_INVALID_CODE;
4002     char id[256]={'\0'};
4003     char abbr[256]={'\0'};
4004     char newId[256]={'\0'};
4005     char newAbbrId[256]={'\0'};
4006     char oldId[256]={'\0'};
4007     char oldAbbrId[256]={'\0'};
4008
4009     UErrorCode status =U_ZERO_ERROR;
4010     UParseError pe;
4011
4012     for(uint32_t i = 0; i<=0x10ffff; i++){
4013         code =  uscript_getScript(i,&status);
4014         if(code == USCRIPT_INVALID_CODE){
4015             dataerrln("uscript_getScript for codepoint \\U%08X failed.", i);
4016         }
4017         const char* myId = uscript_getName(code);
4018         if(!myId) {
4019           dataerrln("Valid script code returned NULL name. Check your data!");
4020           return;
4021         }
4022         uprv_strcpy(id,myId);
4023         uprv_strcpy(abbr,uscript_getShortName(code));
4024
4025         uprv_strcpy(newId,"[:");
4026         uprv_strcat(newId,id);
4027         uprv_strcat(newId,":];NFD");
4028
4029         uprv_strcpy(newAbbrId,"[:");
4030         uprv_strcat(newAbbrId,abbr);
4031         uprv_strcat(newAbbrId,":];NFD");
4032
4033         if(uprv_strcmp(newId,oldId)!=0){
4034             Transliterator* t = Transliterator::createInstance(newId,UTRANS_FORWARD,pe,status);
4035             if(t==NULL || U_FAILURE(status)){
4036                 dataerrln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(status));
4037             }
4038             delete t;
4039         }
4040         if(uprv_strcmp(newAbbrId,oldAbbrId)!=0){
4041             Transliterator* t = Transliterator::createInstance(newAbbrId,UTRANS_FORWARD,pe,status);
4042             if(t==NULL || U_FAILURE(status)){
4043                 dataerrln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(status));
4044             }
4045             delete t;
4046         }
4047         uprv_strcpy(oldId,newId);
4048         uprv_strcpy(oldAbbrId, newAbbrId);
4049
4050     }
4051
4052 }
4053
4054 #define TEST_TRANSLIT_ID(id, cls) { \
4055   UErrorCode ec = U_ZERO_ERROR; \
4056   Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); \
4057   if (U_FAILURE(ec)) { \
4058     dataerrln("FAIL: Couldn't create %s - %s", id, u_errorName(ec)); \
4059   } else { \
4060     if (t->getDynamicClassID() != cls::getStaticClassID()) { \
4061       errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
4062     } \
4063     /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
4064   } \
4065   delete t; \
4066 }
4067
4068 #define TEST_TRANSLIT_RULE(rule, cls) { \
4069   UErrorCode ec = U_ZERO_ERROR; \
4070   UParseError pe; \
4071   Transliterator* t = Transliterator::createFromRules("_", rule, UTRANS_FORWARD, pe, ec); \
4072   if (U_FAILURE(ec)) { \
4073     errln("FAIL: Couldn't create " rule); \
4074   } else { \
4075     if (t->getDynamicClassID() != cls ::getStaticClassID()) { \
4076       errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
4077     } \
4078     /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
4079   } \
4080   delete t; \
4081 }
4082
4083 void TransliteratorTest::TestBoilerplate() {
4084     TEST_TRANSLIT_ID("Any-Latin", AnyTransliterator);
4085     TEST_TRANSLIT_ID("Any-Hex", EscapeTransliterator);
4086     TEST_TRANSLIT_ID("Hex-Any", UnescapeTransliterator);
4087     TEST_TRANSLIT_ID("Lower", LowercaseTransliterator);
4088     TEST_TRANSLIT_ID("Upper", UppercaseTransliterator);
4089     TEST_TRANSLIT_ID("Title", TitlecaseTransliterator);
4090     TEST_TRANSLIT_ID("Null", NullTransliterator);
4091     TEST_TRANSLIT_ID("Remove", RemoveTransliterator);
4092     TEST_TRANSLIT_ID("Any-Name", UnicodeNameTransliterator);
4093     TEST_TRANSLIT_ID("Name-Any", NameUnicodeTransliterator);
4094     TEST_TRANSLIT_ID("NFD", NormalizationTransliterator);
4095     TEST_TRANSLIT_ID("Latin-Greek", CompoundTransliterator);
4096     TEST_TRANSLIT_RULE("a>b;", RuleBasedTransliterator);
4097 }
4098
4099 void TransliteratorTest::TestAlternateSyntax() {
4100     // U+2206 == &
4101     // U+2190 == <
4102     // U+2192 == >
4103     // U+2194 == <>
4104     expect(CharsToUnicodeString("a \\u2192 x; b \\u2190 y; c \\u2194 z"),
4105            "abc",
4106            "xbz");
4107     expect(CharsToUnicodeString("([:^ASCII:]) \\u2192 \\u2206Name($1);"),
4108            CharsToUnicodeString("<=\\u2190; >=\\u2192; <>=\\u2194; &=\\u2206"),
4109            UNICODE_STRING_SIMPLE("<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}"));
4110 }
4111
4112 static const char* BEGIN_END_RULES[] = {
4113     // [0]
4114     "abc > xy;"
4115     "aba > z;",
4116
4117     // [1]
4118 /*
4119     "::BEGIN;"
4120     "abc > xy;"
4121     "::END;"
4122     "::BEGIN;"
4123     "aba > z;"
4124     "::END;",
4125 */
4126     "", // test case commented out below, this is here to keep from messing up the indexes
4127
4128     // [2]
4129 /*
4130     "abc > xy;"
4131     "::BEGIN;"
4132     "aba > z;"
4133     "::END;",
4134 */
4135     "", // test case commented out below, this is here to keep from messing up the indexes
4136
4137     // [3]
4138 /*
4139     "::BEGIN;"
4140     "abc > xy;"
4141     "::END;"
4142     "aba > z;",
4143 */
4144     "", // test case commented out below, this is here to keep from messing up the indexes
4145
4146     // [4]
4147     "abc > xy;"
4148     "::Null;"
4149     "aba > z;",
4150
4151     // [5]
4152     "::Upper;"
4153     "ABC > xy;"
4154     "AB > x;"
4155     "C > z;"
4156     "::Upper;"
4157     "XYZ > p;"
4158     "XY > q;"
4159     "Z > r;"
4160     "::Upper;",
4161
4162     // [6]
4163     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4164     "$delim = [\\-$ws];"
4165     "$ws $delim* > ' ';"
4166     "'-' $delim* > '-';",
4167
4168     // [7]
4169     "::Null;"
4170     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4171     "$delim = [\\-$ws];"
4172     "$ws $delim* > ' ';"
4173     "'-' $delim* > '-';",
4174
4175     // [8]
4176     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4177     "$delim = [\\-$ws];"
4178     "$ws $delim* > ' ';"
4179     "'-' $delim* > '-';"
4180     "::Null;",
4181
4182     // [9]
4183     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4184     "$delim = [\\-$ws];"
4185     "::Null;"
4186     "$ws $delim* > ' ';"
4187     "'-' $delim* > '-';",
4188
4189     // [10]
4190 /*
4191     "::BEGIN;"
4192     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4193     "$delim = [\\-$ws];"
4194     "::END;"
4195     "$ws $delim* > ' ';"
4196     "'-' $delim* > '-';",
4197 */
4198     "", // test case commented out below, this is here to keep from messing up the indexes
4199
4200     // [11]
4201 /*
4202     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4203     "$delim = [\\-$ws];"
4204     "::BEGIN;"
4205     "$ws $delim* > ' ';"
4206     "'-' $delim* > '-';"
4207     "::END;",
4208 */
4209     "", // test case commented out below, this is here to keep from messing up the indexes
4210
4211     // [12]
4212 /*
4213     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4214     "$delim = [\\-$ws];"
4215     "$ab = [ab];"
4216     "::BEGIN;"
4217     "$ws $delim* > ' ';"
4218     "'-' $delim* > '-';"
4219     "::END;"
4220     "::BEGIN;"
4221     "$ab { ' ' } $ab > '-';"
4222     "c { ' ' > ;"
4223     "::END;"
4224     "::BEGIN;"
4225     "'a-a' > a\\%|a;"
4226     "::END;",
4227 */
4228     "", // test case commented out below, this is here to keep from messing up the indexes
4229
4230     // [13]
4231     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4232     "$delim = [\\-$ws];"
4233     "$ab = [ab];"
4234     "::Null;"
4235     "$ws $delim* > ' ';"
4236     "'-' $delim* > '-';"
4237     "::Null;"
4238     "$ab { ' ' } $ab > '-';"
4239     "c { ' ' > ;"
4240     "::Null;"
4241     "'a-a' > a\\%|a;",
4242
4243     // [14]
4244 /*
4245     "::[abc];"
4246     "::BEGIN;"
4247     "abc > xy;"
4248     "::END;"
4249     "::BEGIN;"
4250     "aba > yz;"
4251     "::END;"
4252     "::Upper;",
4253 */
4254     "", // test case commented out below, this is here to keep from messing up the indexes
4255
4256     // [15]
4257     "::[abc];"
4258     "abc > xy;"
4259     "::Null;"
4260     "aba > yz;"
4261     "::Upper;",
4262
4263     // [16]
4264 /*
4265     "::[abc];"
4266     "::BEGIN;"
4267     "abc <> xy;"
4268     "::END;"
4269     "::BEGIN;"
4270     "aba <> yz;"
4271     "::END;"
4272     "::Upper(Lower);"
4273     "::([XYZ]);"
4274 */
4275     "", // test case commented out below, this is here to keep from messing up the indexes
4276
4277     // [17]
4278     "::[abc];"
4279     "abc <> xy;"
4280     "::Null;"
4281     "aba <> yz;"
4282     "::Upper(Lower);"
4283     "::([XYZ]);"
4284 };
4285 static const int32_t BEGIN_END_RULES_length = (int32_t)(sizeof(BEGIN_END_RULES) / sizeof(BEGIN_END_RULES[0]));
4286
4287 /*
4288 (This entire test is commented out below and will need some heavy revision when we re-add
4289 the ::BEGIN/::END stuff)
4290 static const char* BOGUS_BEGIN_END_RULES[] = {
4291     // [7]
4292     "::BEGIN;"
4293     "abc > xy;"
4294     "::BEGIN;"
4295     "aba > z;"
4296     "::END;"
4297     "::END;",
4298
4299     // [8]
4300     "abc > xy;"
4301     " aba > z;"
4302     "::END;",
4303
4304     // [9]
4305     "::BEGIN;"
4306     "::Upper;"
4307     "::END;"
4308 };
4309 static const int32_t BOGUS_BEGIN_END_RULES_length = (int32_t)(sizeof(BOGUS_BEGIN_END_RULES) / sizeof(BOGUS_BEGIN_END_RULES[0]));
4310 */
4311
4312 static const char* BEGIN_END_TEST_CASES[] = {
4313     // rules             input                   expected output
4314     BEGIN_END_RULES[0],  "abc ababc aba",        "xy zbc z",
4315 //    BEGIN_END_RULES[1],  "abc ababc aba",        "xy abxy z",
4316 //    BEGIN_END_RULES[2],  "abc ababc aba",        "xy abxy z",
4317 //    BEGIN_END_RULES[3],  "abc ababc aba",        "xy abxy z",
4318     BEGIN_END_RULES[4],  "abc ababc aba",        "xy abxy z",
4319     BEGIN_END_RULES[5],  "abccabaacababcbc",     "PXAARXQBR",
4320
4321     BEGIN_END_RULES[6],  "e   e - e---e-  e",    "e e e-e-e",
4322     BEGIN_END_RULES[7],  "e   e - e---e-  e",    "e e e-e-e",
4323     BEGIN_END_RULES[8],  "e   e - e---e-  e",    "e e e-e-e",
4324     BEGIN_END_RULES[9],  "e   e - e---e-  e",    "e e e-e-e",
4325 //    BEGIN_END_RULES[10],  "e   e - e---e-  e",    "e e e-e-e",
4326 //    BEGIN_END_RULES[11], "e   e - e---e-  e",    "e e e-e-e",
4327 //    BEGIN_END_RULES[12], "e   e - e---e-  e",    "e e e-e-e",
4328 //    BEGIN_END_RULES[12], "a    a    a    a",     "a%a%a%a",
4329 //    BEGIN_END_RULES[12], "a a-b c b a",          "a%a-b cb-a",
4330     BEGIN_END_RULES[13], "e   e - e---e-  e",    "e e e-e-e",
4331     BEGIN_END_RULES[13], "a    a    a    a",     "a%a%a%a",
4332     BEGIN_END_RULES[13], "a a-b c b a",          "a%a-b cb-a",
4333
4334 //    BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4335     BEGIN_END_RULES[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4336 //    BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4337     BEGIN_END_RULES[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ"
4338 };
4339 static const int32_t BEGIN_END_TEST_CASES_length = (int32_t)(sizeof(BEGIN_END_TEST_CASES) / sizeof(BEGIN_END_TEST_CASES[0]));
4340
4341 void TransliteratorTest::TestBeginEnd() {
4342     // run through the list of test cases above
4343     int32_t i = 0;
4344     for (i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
4345         expect((UnicodeString)"Test case #" + (i / 3),
4346                UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
4347                UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
4348                UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
4349     }
4350
4351     // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing
4352     UParseError parseError;
4353     UErrorCode status = U_ZERO_ERROR;
4354     Transliterator* reversed  = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
4355             UTRANS_REVERSE, parseError, status);
4356     if (reversed == 0 || U_FAILURE(status)) {
4357         reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
4358     } else {
4359         expect(*reversed, UnicodeString("xy XY XYZ yz YZ"), UnicodeString("xy abc xaba yz aba"));
4360     }
4361     delete reversed;
4362
4363     // finally, run through the list of syntactically-ill-formed rule sets above and make sure
4364     // that all of them cause errors
4365 /*
4366 (commented out until we have the real ::BEGIN/::END stuff in place
4367     for (i = 0; i < BOGUS_BEGIN_END_RULES_length; i++) {
4368         UParseError parseError;
4369         UErrorCode status = U_ZERO_ERROR;
4370         Transliterator* t = Transliterator::createFromRules("foo", UnicodeString(BOGUS_BEGIN_END_RULES[i]),
4371                 UTRANS_FORWARD, parseError, status);
4372         if (!U_FAILURE(status)) {
4373             delete t;
4374             errln((UnicodeString)"Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]);
4375         }
4376     }
4377 */
4378 }
4379
4380 void TransliteratorTest::TestBeginEndToRules() {
4381     // run through the same list of test cases we used above, but this time, instead of just
4382     // instantiating a Transliterator from the rules and running the test against it, we instantiate
4383     // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from
4384     // the resulting set of rules, and make sure that the generated rule set is semantically equivalent
4385     // to (i.e., does the same thing as) the original rule set
4386     for (int32_t i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
4387         UParseError parseError;
4388         UErrorCode status = U_ZERO_ERROR;
4389         Transliterator* t = Transliterator::createFromRules("--", UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
4390                 UTRANS_FORWARD, parseError, status);
4391         if (U_FAILURE(status)) {
4392             reportParseError(UnicodeString("FAIL: Couldn't create transliterator"), parseError, status);
4393         } else {
4394             UnicodeString rules;
4395             t->toRules(rules, TRUE);
4396             Transliterator* t2 = Transliterator::createFromRules((UnicodeString)"Test case #" + (i / 3), rules,
4397                     UTRANS_FORWARD, parseError, status);
4398             if (U_FAILURE(status)) {
4399                 reportParseError(UnicodeString("FAIL: Couldn't create transliterator from generated rules"),
4400                         parseError, status);
4401                 delete t;
4402             } else {
4403                 expect(*t2,
4404                        UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
4405                        UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
4406                 delete t;
4407                 delete t2;
4408             }
4409         }
4410     }
4411
4412     // do the same thing for the reversible test case
4413     UParseError parseError;
4414     UErrorCode status = U_ZERO_ERROR;
4415     Transliterator* reversed = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
4416             UTRANS_REVERSE, parseError, status);
4417     if (U_FAILURE(status)) {
4418         reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
4419     } else {
4420         UnicodeString rules;
4421         reversed->toRules(rules, FALSE);
4422         Transliterator* reversed2 = Transliterator::createFromRules("Reversed", rules, UTRANS_FORWARD,
4423                 parseError, status);
4424         if (U_FAILURE(status)) {
4425             reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator from generated rules"),
4426                     parseError, status);
4427             delete reversed;
4428         } else {
4429             expect(*reversed2,
4430                    UnicodeString("xy XY XYZ yz YZ"),
4431                    UnicodeString("xy abc xaba yz aba"));
4432             delete reversed;
4433             delete reversed2;
4434         }
4435     }
4436 }
4437
4438 void TransliteratorTest::TestRegisterAlias() {
4439     UnicodeString longID("Lower;[aeiou]Upper");
4440     UnicodeString shortID("Any-CapVowels");
4441     UnicodeString reallyShortID("CapVowels");
4442
4443     Transliterator::registerAlias(shortID, longID);
4444
4445     UErrorCode err = U_ZERO_ERROR;
4446     Transliterator* t1 = Transliterator::createInstance(longID, UTRANS_FORWARD, err);
4447     if (U_FAILURE(err)) {
4448         errln("Failed to instantiate transliterator with long ID");
4449         Transliterator::unregister(shortID);
4450         return;
4451     }
4452     Transliterator* t2 = Transliterator::createInstance(reallyShortID, UTRANS_FORWARD, err);
4453     if (U_FAILURE(err)) {
4454         errln("Failed to instantiate transliterator with short ID");
4455         delete t1;
4456         Transliterator::unregister(shortID);
4457         return;
4458     }
4459
4460     if (t1->getID() != longID)
4461         errln("Transliterator instantiated with long ID doesn't have long ID");
4462     if (t2->getID() != reallyShortID)
4463         errln("Transliterator instantiated with short ID doesn't have short ID");
4464
4465     UnicodeString rules1;
4466     UnicodeString rules2;
4467
4468     t1->toRules(rules1, TRUE);
4469     t2->toRules(rules2, TRUE);
4470     if (rules1 != rules2)
4471         errln("Alias transliterators aren't the same");
4472
4473     delete t1;
4474     delete t2;
4475     Transliterator::unregister(shortID);
4476
4477     t1 = Transliterator::createInstance(shortID, UTRANS_FORWARD, err);
4478     if (U_SUCCESS(err)) {
4479         errln("Instantiation with short ID succeeded after short ID was unregistered");
4480         delete t1;
4481     }
4482
4483     // try the same thing again, but this time with something other than
4484     // an instance of CompoundTransliterator
4485     UnicodeString realID("Latin-Greek");
4486     UnicodeString fakeID("Latin-dlgkjdflkjdl");
4487     Transliterator::registerAlias(fakeID, realID);
4488
4489     err = U_ZERO_ERROR;
4490     t1 = Transliterator::createInstance(realID, UTRANS_FORWARD, err);
4491     if (U_FAILURE(err)) {
4492         dataerrln("Failed to instantiate transliterator with real ID - %s", u_errorName(err));
4493         Transliterator::unregister(realID);
4494         return;
4495     }
4496     t2 = Transliterator::createInstance(fakeID, UTRANS_FORWARD, err);
4497     if (U_FAILURE(err)) {
4498         errln("Failed to instantiate transliterator with fake ID");
4499         delete t1;
4500         Transliterator::unregister(realID);
4501         return;
4502     }
4503
4504     t1->toRules(rules1, TRUE);
4505     t2->toRules(rules2, TRUE);
4506     if (rules1 != rules2)
4507         errln("Alias transliterators aren't the same");
4508
4509     delete t1;
4510     delete t2;
4511     Transliterator::unregister(fakeID);
4512 }
4513
4514 void TransliteratorTest::TestRuleStripping() {
4515     /*
4516 #
4517 \uE001>\u0C01; # SIGN
4518     */
4519     static const UChar rule[] = {
4520         0x0023,0x0020,0x000D,0x000A,
4521         0xE001,0x003E,0x0C01,0x003B,0x0020,0x0023,0x0020,0x0053,0x0049,0x0047,0x004E,0
4522     };
4523     static const UChar expectedRule[] = {
4524         0xE001,0x003E,0x0C01,0x003B,0
4525     };
4526     UChar result[sizeof(rule)/sizeof(rule[0])];
4527     UErrorCode status = U_ZERO_ERROR;
4528     int32_t len = utrans_stripRules(rule, (int32_t)(sizeof(rule)/sizeof(rule[0])), result, &status);
4529     if (len != u_strlen(expectedRule)) {
4530         errln("utrans_stripRules return len = %d", len);
4531     }
4532     if (u_strncmp(expectedRule, result, len) != 0) {
4533         errln("utrans_stripRules did not return expected string");
4534     }
4535 }
4536
4537 /**
4538  * Test the Halfwidth-Fullwidth transliterator (ticket 6281).
4539  */
4540 void TransliteratorTest::TestHalfwidthFullwidth(void) {
4541     UParseError parseError;
4542     UErrorCode status = U_ZERO_ERROR;
4543     Transliterator* hf = Transliterator::createInstance("Halfwidth-Fullwidth", UTRANS_FORWARD, parseError, status);
4544     Transliterator* fh = Transliterator::createInstance("Fullwidth-Halfwidth", UTRANS_FORWARD, parseError, status);
4545     if (hf == 0 || fh == 0) {
4546         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
4547         delete hf;
4548         delete fh;
4549         return;
4550     }
4551
4552     // Array of 2n items
4553     // Each item is
4554     //   "hf"|"fh"|"both",
4555     //   <Halfwidth>,
4556     //   <Fullwidth>
4557     const char* DATA[] = {
4558         "both",
4559         "\\uFFE9\\uFFEA\\uFFEB\\uFFEC\\u0061\\uFF71\\u00AF\\u0020",
4560         "\\u2190\\u2191\\u2192\\u2193\\uFF41\\u30A2\\uFFE3\\u3000",
4561     };
4562     int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
4563
4564     for (int32_t i=0; i<DATA_length; i+=3) {
4565         UnicodeString h = CharsToUnicodeString(DATA[i+1]);
4566         UnicodeString f = CharsToUnicodeString(DATA[i+2]);
4567         switch (*DATA[i]) {
4568         case 0x68: //'h': // Halfwidth-Fullwidth only
4569             expect(*hf, h, f);
4570             break;
4571         case 0x66: //'f': // Fullwidth-Halfwidth only
4572             expect(*fh, f, h);
4573             break;
4574         case 0x62: //'b': // both directions
4575             expect(*hf, h, f);
4576             expect(*fh, f, h);
4577             break;
4578         }
4579     }
4580     delete hf;
4581     delete fh;
4582 }
4583
4584
4585     /**
4586      *  Test Thai.  The text is the first paragraph of "What is Unicode" from the Unicode.org web site.
4587      *              TODO: confirm that the expected results are correct.
4588      *              For now, test just confirms that C++ and Java give identical results.
4589      */
4590 void TransliteratorTest::TestThai(void) {
4591 #if !UCONFIG_NO_BREAK_ITERATION
4592     UParseError parseError;
4593     UErrorCode status = U_ZERO_ERROR;
4594     Transliterator* tr = Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
4595     if (tr == 0) {
4596         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
4597         return;
4598     }
4599     if (U_FAILURE(status)) {
4600         errln("FAIL: createInstance failed with %s", u_errorName(status));
4601         return;
4602     }
4603     const char *thaiText =
4604         "\\u0e42\\u0e14\\u0e22\\u0e1e\\u0e37\\u0e49\\u0e19\\u0e10\\u0e32\\u0e19\\u0e41\\u0e25\\u0e49\\u0e27, \\u0e04\\u0e2d"
4605         "\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d\\u0e23\\u0e4c\\u0e08\\u0e30\\u0e40\\u0e01\\u0e35\\u0e48\\u0e22"
4606         "\\u0e27\\u0e02\\u0e49\\u0e2d\\u0e07\\u0e01\\u0e31\\u0e1a\\u0e40\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e02\\u0e2d"
4607         "\\u0e07\\u0e15\\u0e31\\u0e27\\u0e40\\u0e25\\u0e02. \\u0e04\\u0e2d\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d"
4608         "\\u0e23\\u0e4c\\u0e08\\u0e31\\u0e14\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29"
4609         "\\u0e23\\u0e41\\u0e25\\u0e30\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30\\u0e2d\\u0e37\\u0e48\\u0e19\\u0e46 \\u0e42"
4610         "\\u0e14\\u0e22\\u0e01\\u0e32\\u0e23\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25"
4611         "\\u0e02\\u0e43\\u0e2b\\u0e49\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e41\\u0e15\\u0e48\\u0e25\\u0e30\\u0e15"
4612         "\\u0e31\\u0e27. \\u0e01\\u0e48\\u0e2d\\u0e19\\u0e2b\\u0e19\\u0e49\\u0e32\\u0e17\\u0e35\\u0e48\\u0e4a Unicode \\u0e08"
4613         "\\u0e30\\u0e16\\u0e39\\u0e01\\u0e2a\\u0e23\\u0e49\\u0e32\\u0e07\\u0e02\\u0e36\\u0e49\\u0e19, \\u0e44\\u0e14\\u0e49"
4614         "\\u0e21\\u0e35\\u0e23\\u0e30\\u0e1a\\u0e1a encoding \\u0e2d\\u0e22\\u0e39\\u0e48\\u0e2b\\u0e25\\u0e32\\u0e22\\u0e23"
4615         "\\u0e49\\u0e2d\\u0e22\\u0e23\\u0e30\\u0e1a\\u0e1a\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e01\\u0e32\\u0e23"
4616         "\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25\\u0e02\\u0e40\\u0e2b\\u0e25\\u0e48"
4617         "\\u0e32\\u0e19\\u0e35\\u0e49. \\u0e44\\u0e21\\u0e48\\u0e21\\u0e35 encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48"
4618         "\\u0e21\\u0e35\\u0e08\\u0e33\\u0e19\\u0e27\\u0e19\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30"
4619         "\\u0e21\\u0e32\\u0e01\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d: \\u0e22\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d"
4620         "\\u0e22\\u0e48\\u0e32\\u0e07\\u0e40\\u0e0a\\u0e48\\u0e19, \\u0e40\\u0e09\\u0e1e\\u0e32\\u0e30\\u0e43\\u0e19\\u0e01"
4621         "\\u0e25\\u0e38\\u0e48\\u0e21\\u0e2a\\u0e2b\\u0e20\\u0e32\\u0e1e\\u0e22\\u0e38\\u0e42\\u0e23\\u0e1b\\u0e40\\u0e1e"
4622         "\\u0e35\\u0e22\\u0e07\\u0e41\\u0e2b\\u0e48\\u0e07\\u0e40\\u0e14\\u0e35\\u0e22\\u0e27 \\u0e01\\u0e47\\u0e15\\u0e49"
4623         "\\u0e2d\\u0e07\\u0e01\\u0e32\\u0e23\\u0e2b\\u0e25\\u0e32\\u0e22 encoding \\u0e43\\u0e19\\u0e01\\u0e32\\u0e23\\u0e04"
4624         "\\u0e23\\u0e2d\\u0e1a\\u0e04\\u0e25\\u0e38\\u0e21\\u0e17\\u0e38\\u0e01\\u0e20\\u0e32\\u0e29\\u0e32\\u0e43\\u0e19"
4625         "\\u0e01\\u0e25\\u0e38\\u0e48\\u0e21. \\u0e2b\\u0e23\\u0e37\\u0e2d\\u0e41\\u0e21\\u0e49\\u0e41\\u0e15\\u0e48\\u0e43"
4626         "\\u0e19\\u0e20\\u0e32\\u0e29\\u0e32\\u0e40\\u0e14\\u0e35\\u0e48\\u0e22\\u0e27 \\u0e40\\u0e0a\\u0e48\\u0e19 \\u0e20"
4627         "\\u0e32\\u0e29\\u0e32\\u0e2d\\u0e31\\u0e07\\u0e01\\u0e24\\u0e29 \\u0e01\\u0e47\\u0e44\\u0e21\\u0e48\\u0e21\\u0e35"
4628         " encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d\\u0e2a\\u0e33\\u0e2b"
4629         "\\u0e23\\u0e31\\u0e1a\\u0e17\\u0e38\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29\\u0e23, \\u0e40\\u0e04"
4630         "\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e27\\u0e23\\u0e23\\u0e04\\u0e15\\u0e2d\\u0e19"
4631         " \\u0e41\\u0e25\\u0e30\\u0e2a\\u0e31\\u0e0d\\u0e25\\u0e31\\u0e01\\u0e29\\u0e13\\u0e4c\\u0e17\\u0e32\\u0e07\\u0e40"
4632         "\\u0e17\\u0e04\\u0e19\\u0e34\\u0e04\\u0e17\\u0e35\\u0e48\\u0e43\\u0e0a\\u0e49\\u0e01\\u0e31\\u0e19\\u0e2d\\u0e22"
4633         "\\u0e39\\u0e48\\u0e17\\u0e31\\u0e48\\u0e27\\u0e44\\u0e1b.";
4634
4635     const char *latinText =
4636         "doy ph\\u1ee5\\u0304\\u0302n \\u1e6d\\u0304h\\u0101n l\\u00e6\\u0302w, khxmphiwtexr\\u0312 ca ke\\u012b\\u0300"
4637         "ywk\\u0304\\u0125xng k\\u1ea1b re\\u1ee5\\u0304\\u0300xng k\\u0304hxng t\\u1ea1wlek\\u0304h. khxmphiwtexr"
4638         "\\u0312 c\\u1ea1d k\\u0115b t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r l\\u00e6a x\\u1ea1kk\\u0304h ra x\\u1ee5\\u0304"
4639         "\\u0300n\\u00ab doy k\\u0101r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304\\u0131\\u0302 s\\u0304"
4640         "\\u1ea3h\\u0304r\\u1ea1b t\\u00e6\\u0300la t\\u1ea1w. k\\u0300xn h\\u0304n\\u0302\\u0101 th\\u012b\\u0300\\u0301"
4641         " Unicode ca t\\u0304h\\u016bk s\\u0304r\\u0302\\u0101ng k\\u0304h\\u1ee5\\u0302n, d\\u1ecb\\u0302 m\\u012b "
4642         "rabb encoding xy\\u016b\\u0300 h\\u0304l\\u0101y r\\u0302xy rabb s\\u0304\\u1ea3h\\u0304r\\u1ea1b k\\u0101"
4643         "r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304el\\u0300\\u0101 n\\u012b\\u0302. m\\u1ecb\\u0300m"
4644         "\\u012b encoding d\\u0131 th\\u012b\\u0300 m\\u012b c\\u1ea3nwn t\\u1ea1w x\\u1ea1kk\\u0304hra m\\u0101k p"
4645         "he\\u012byng phx: yk t\\u1ea1wx\\u1ef3\\u0101ng ch\\u00e8n, c\\u0304heph\\u0101a n\\u0131 kl\\u00f9m s\\u0304"
4646         "h\\u0304p\\u0323h\\u0101ph yurop phe\\u012byng h\\u0304\\u00e6\\u0300ng de\\u012byw k\\u0306 t\\u0302xngk\\u0101"
4647         "r h\\u0304l\\u0101y encoding n\\u0131 k\\u0101r khrxbkhlum thuk p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 n\\u0131"
4648         " kl\\u00f9m. h\\u0304r\\u1ee5\\u0304x m\\u00e6\\u0302t\\u00e6\\u0300 n\\u0131 p\\u0323h\\u0101s\\u0304\\u02b9"
4649         "\\u0101 de\\u012b\\u0300yw ch\\u00e8n p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 x\\u1ea1ngkvs\\u0304\\u02b9 k\\u0306"
4650         " m\\u1ecb\\u0300m\\u012b encoding d\\u0131 th\\u012b\\u0300 phe\\u012byng phx s\\u0304\\u1ea3h\\u0304r\\u1ea1"
4651         "b thuk t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r, kher\\u1ee5\\u0304\\u0300xngh\\u0304m\\u0101y wrrkh txn l\\u00e6"
4652         "a s\\u0304\\u1ea1\\u1ef5l\\u1ea1ks\\u0304\\u02b9\\u1e47\\u0312 th\\u0101ng thekhnikh th\\u012b\\u0300 ch\\u0131"
4653         "\\u0302 k\\u1ea1n xy\\u016b\\u0300 th\\u1ea1\\u0300wp\\u1ecb.";
4654
4655
4656     UnicodeString  xlitText(thaiText);
4657     xlitText = xlitText.unescape();
4658     tr->transliterate(xlitText);
4659
4660     UnicodeString expectedText(latinText);
4661     expectedText = expectedText.unescape();
4662     expect(*tr, xlitText, expectedText);
4663
4664     delete tr;
4665 #endif
4666 }
4667
4668
4669 //======================================================================
4670 // Support methods
4671 //======================================================================
4672 void TransliteratorTest::expectT(const UnicodeString& id,
4673                                  const UnicodeString& source,
4674                                  const UnicodeString& expectedResult) {
4675     UErrorCode ec = U_ZERO_ERROR;
4676     UParseError pe;
4677     Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
4678     if (U_FAILURE(ec)) {
4679         errln((UnicodeString)"FAIL: Could not create " + id + " -  " + u_errorName(ec));
4680         delete t;
4681         return;
4682     }
4683     expect(*t, source, expectedResult);
4684     delete t;
4685 }
4686
4687 void TransliteratorTest::reportParseError(const UnicodeString& message,
4688                                           const UParseError& parseError,
4689                                           const UErrorCode& status) {
4690     dataerrln(message +
4691           /*", parse error " + parseError.code +*/
4692           ", line " + parseError.line +
4693           ", offset " + parseError.offset +
4694           ", pre-context " + prettify(parseError.preContext, TRUE) +
4695           ", post-context " + prettify(parseError.postContext,TRUE) +
4696           ", Error: " + u_errorName(status));
4697 }
4698
4699 void TransliteratorTest::expect(const UnicodeString& rules,
4700                                 const UnicodeString& source,
4701                                 const UnicodeString& expectedResult,
4702                                 UTransPosition *pos) {
4703     expect("<ID>", rules, source, expectedResult, pos);
4704 }
4705
4706 void TransliteratorTest::expect(const UnicodeString& id,
4707                                 const UnicodeString& rules,
4708                                 const UnicodeString& source,
4709                                 const UnicodeString& expectedResult,
4710                                 UTransPosition *pos) {
4711     UErrorCode status = U_ZERO_ERROR;
4712     UParseError parseError;
4713     Transliterator* t = Transliterator::createFromRules(id, rules, UTRANS_FORWARD, parseError, status);
4714     if (U_FAILURE(status)) {
4715         reportParseError(UnicodeString("Couldn't create transliterator from ") + rules, parseError, status);
4716     } else {
4717         expect(*t, source, expectedResult, pos);
4718     }
4719     delete t;
4720 }
4721
4722 void TransliteratorTest::expect(const Transliterator& t,
4723                                 const UnicodeString& source,
4724                                 const UnicodeString& expectedResult,
4725                                 const Transliterator& reverseTransliterator) {
4726     expect(t, source, expectedResult);
4727     expect(reverseTransliterator, expectedResult, source);
4728 }
4729
4730 void TransliteratorTest::expect(const Transliterator& t,
4731                                 const UnicodeString& source,
4732                                 const UnicodeString& expectedResult,
4733                                 UTransPosition *pos) {
4734     if (pos == 0) {
4735         UnicodeString result(source);
4736         t.transliterate(result);
4737         expectAux(t.getID() + ":String", source, result, expectedResult);
4738     }
4739     UTransPosition index={0, 0, 0, 0};
4740     if (pos != 0) {
4741         index = *pos;
4742     }
4743
4744     UnicodeString rsource(source);
4745     if (pos == 0) {
4746         t.transliterate(rsource);
4747     } else {
4748         // Do it all at once -- below we do it incrementally
4749         t.finishTransliteration(rsource, *pos);
4750     }
4751     expectAux(t.getID() + ":Replaceable", source, rsource, expectedResult);
4752
4753     // Test keyboard (incremental) transliteration -- this result
4754     // must be the same after we finalize (see below).
4755     UnicodeString log;
4756     rsource.remove();
4757     if (pos != 0) {
4758         rsource = source;
4759         formatInput(log, rsource, index);
4760         log.append(" -> ");
4761         UErrorCode status = U_ZERO_ERROR;
4762         t.transliterate(rsource, index, status);
4763         formatInput(log, rsource, index);
4764     } else {
4765         for (int32_t i=0; i<source.length(); ++i) {
4766             if (i != 0) {
4767                 log.append(" + ");
4768             }
4769             log.append(source.charAt(i)).append(" -> ");
4770             UErrorCode status = U_ZERO_ERROR;
4771             t.transliterate(rsource, index, source.charAt(i), status);
4772             formatInput(log, rsource, index);
4773         }
4774     }
4775
4776     // As a final step in keyboard transliteration, we must call
4777     // transliterate to finish off any pending partial matches that
4778     // were waiting for more input.
4779     t.finishTransliteration(rsource, index);
4780     log.append(" => ").append(rsource);
4781
4782     expectAux(t.getID() + ":Keyboard", log,
4783               rsource == expectedResult,
4784               expectedResult);
4785 }
4786
4787
4788 /**
4789  * @param appendTo result is appended to this param.
4790  * @param input the string being transliterated
4791  * @param pos the index struct
4792  */
4793 UnicodeString& TransliteratorTest::formatInput(UnicodeString &appendTo,
4794                                                const UnicodeString& input,
4795                                                const UTransPosition& pos) {
4796     // Output a string of the form aaa{bbb|ccc|ddd}eee, where
4797     // the {} indicate the context start and limit, and the ||
4798     // indicate the start and limit.
4799     if (0 <= pos.contextStart &&
4800         pos.contextStart <= pos.start &&
4801         pos.start <= pos.limit &&
4802         pos.limit <= pos.contextLimit &&
4803         pos.contextLimit <= input.length()) {
4804
4805         UnicodeString a, b, c, d, e;
4806         input.extractBetween(0, pos.contextStart, a);
4807         input.extractBetween(pos.contextStart, pos.start, b);
4808         input.extractBetween(pos.start, pos.limit, c);
4809         input.extractBetween(pos.limit, pos.contextLimit, d);
4810         input.extractBetween(pos.contextLimit, input.length(), e);
4811         appendTo.append(a).append((UChar)123/*{*/).append(b).
4812             append((UChar)PIPE).append(c).append((UChar)PIPE).append(d).
4813             append((UChar)125/*}*/).append(e);
4814     } else {
4815         appendTo.append((UnicodeString)"INVALID UTransPosition {cs=" +
4816                         pos.contextStart + ", s=" + pos.start + ", l=" +
4817                         pos.limit + ", cl=" + pos.contextLimit + "} on " +
4818                         input);
4819     }
4820     return appendTo;
4821 }
4822
4823 void TransliteratorTest::expectAux(const UnicodeString& tag,
4824                                    const UnicodeString& source,
4825                                    const UnicodeString& result,
4826                                    const UnicodeString& expectedResult) {
4827     expectAux(tag, source + " -> " + result,
4828               result == expectedResult,
4829               expectedResult);
4830 }
4831
4832 void TransliteratorTest::expectAux(const UnicodeString& tag,
4833                                    const UnicodeString& summary, UBool pass,
4834                                    const UnicodeString& expectedResult) {
4835     if (pass) {
4836         logln(UnicodeString("(")+tag+") " + prettify(summary));
4837     } else {
4838         dataerrln(UnicodeString("FAIL: (")+tag+") "
4839               + prettify(summary)
4840               + ", expected " + prettify(expectedResult));
4841     }
4842 }
4843
4844 #endif /* #if !UCONFIG_NO_TRANSLITERATION */