icuSources/test/intltest/transtst.cpp

   1 /*
   2 **********************************************************************
   3 *   Copyright (C) 1999-2010, International Business Machines
   4 *   Corporation and others.  All Rights Reserved.
   5 **********************************************************************
   6 *   Date        Name        Description
   7 *   11/10/99    aliu        Creation.
   8 **********************************************************************
   9 */
  10
  11 #include "unicode/utypes.h"
  12
  13 #if !UCONFIG_NO_TRANSLITERATION
  14
  15 #include "transtst.h"
  16 #include "unicode/locid.h"
  17 #include "unicode/dtfmtsym.h"
  18 #include "unicode/normlzr.h"
  19 #include "unicode/translit.h"
  20 #include "unicode/uchar.h"
  21 #include "unicode/unifilt.h"
  22 #include "unicode/uniset.h"
  23 #include "unicode/ustring.h"
  24 #include "unicode/usetiter.h"
  25 #include "unicode/uscript.h"
  26 #include "cpdtrans.h"
  27 #include "nultrans.h"
  28 #include "rbt.h"
  29 #include "rbt_pars.h"
  30 #include "anytrans.h"
  31 #include "esctrn.h"
  32 #include "name2uni.h"
  33 #include "nortrans.h"
  34 #include "remtrans.h"
  35 #include "titletrn.h"
  36 #include "tolowtrn.h"
  37 #include "toupptrn.h"
  38 #include "unesctrn.h"
  39 #include "uni2name.h"
  40 #include "cstring.h"
  41 #include "cmemory.h"
  42 #include <stdio.h>
  43
  44 /***********************************************************************
  45
  46                      HOW TO USE THIS TEST FILE
  47                                -or-
  48                   How I developed on two platforms
  49                 without losing (too much of) my mind
  50
  51
  52 1. Add new tests by copying/pasting/changing existing tests.  On Java,
  53    any public void method named Test...() taking no parameters becomes
  54    a test.  On C++, you need to modify the header and add a line to
  55    the runIndexedTest() dispatch method.
  56
  57 2. Make liberal use of the expect() method; it is your friend.
  58
  59 3. The tests in this file exactly match those in a sister file on the
  60    other side.  The two files are:
  61
  62    icu4j:  src/com/ibm/test/translit/TransliteratorTest.java
  63    icu4c:  source/test/intltest/transtst.cpp
  64
  65                   ==> THIS IS THE IMPORTANT PART <==
  66
  67    When you add a test in this file, add it in TransliteratorTest.java
  68    too.  Give it the same name and put it in the same relative place.
  69    This makes maintenance a lot simpler for any poor soul who ends up
  70    trying to synchronize the tests between icu4j and icu4c.
  71
  72 4. If you MUST enter a test that is NOT paralleled in the sister file,
  73    then add it in the special non-mirrored section.  These are
  74    labeled
  75
  76      "icu4j ONLY"
  77
  78    or
  79
  80      "icu4c ONLY"
  81
  82    Make sure you document the reason the test is here and not there.
  83
  84
  85 Thank you.
  86 The Management
  87 ***********************************************************************/
  88
  89 // Define character constants thusly to be EBCDIC-friendly
  90 enum {
  91     LEFT_BRACE=((UChar)0x007B), /*{*/
  92     PIPE      =((UChar)0x007C), /*|*/
  93     ZERO      =((UChar)0x0030), /*0*/
  94     UPPER_A   =((UChar)0x0041)  /*A*/
  95 };
  96
  97 TransliteratorTest::TransliteratorTest()
  98 :   DESERET_DEE((UChar32)0x10414),
  99     DESERET_dee((UChar32)0x1043C)
 100 {
 101 }
 102
 103 TransliteratorTest::~TransliteratorTest() {}
 104
 105 void
 106 TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
 107                                    const char* &name, char* /*par*/) {
 108     switch (index) {
 109         TESTCASE(0,TestInstantiation);
 110         TESTCASE(1,TestSimpleRules);
 111         TESTCASE(2,TestRuleBasedInverse);
 112         TESTCASE(3,TestKeyboard);
 113         TESTCASE(4,TestKeyboard2);
 114         TESTCASE(5,TestKeyboard3);
 115         TESTCASE(6,TestArabic);
 116         TESTCASE(7,TestCompoundKana);
 117         TESTCASE(8,TestCompoundHex);
 118         TESTCASE(9,TestFiltering);
 119         TESTCASE(10,TestInlineSet);
 120         TESTCASE(11,TestPatternQuoting);
 121         TESTCASE(12,TestJ277);
 122         TESTCASE(13,TestJ243);
 123         TESTCASE(14,TestJ329);
 124         TESTCASE(15,TestSegments);
 125         TESTCASE(16,TestCursorOffset);
 126         TESTCASE(17,TestArbitraryVariableValues);
 127         TESTCASE(18,TestPositionHandling);
 128         TESTCASE(19,TestHiraganaKatakana);
 129         TESTCASE(20,TestCopyJ476);
 130         TESTCASE(21,TestAnchors);
 131         TESTCASE(22,TestInterIndic);
 132         TESTCASE(23,TestFilterIDs);
 133         TESTCASE(24,TestCaseMap);
 134         TESTCASE(25,TestNameMap);
 135         TESTCASE(26,TestLiberalizedID);
 136         TESTCASE(27,TestCreateInstance);
 137         TESTCASE(28,TestNormalizationTransliterator);
 138         TESTCASE(29,TestCompoundRBT);
 139         TESTCASE(30,TestCompoundFilter);
 140         TESTCASE(31,TestRemove);
 141         TESTCASE(32,TestToRules);
 142         TESTCASE(33,TestContext);
 143         TESTCASE(34,TestSupplemental);
 144         TESTCASE(35,TestQuantifier);
 145         TESTCASE(36,TestSTV);
 146         TESTCASE(37,TestCompoundInverse);
 147         TESTCASE(38,TestNFDChainRBT);
 148         TESTCASE(39,TestNullInverse);
 149         TESTCASE(40,TestAliasInverseID);
 150         TESTCASE(41,TestCompoundInverseID);
 151         TESTCASE(42,TestUndefinedVariable);
 152         TESTCASE(43,TestEmptyContext);
 153         TESTCASE(44,TestCompoundFilterID);
 154         TESTCASE(45,TestPropertySet);
 155         TESTCASE(46,TestNewEngine);
 156         TESTCASE(47,TestQuantifiedSegment);
 157         TESTCASE(48,TestDevanagariLatinRT);
 158         TESTCASE(49,TestTeluguLatinRT);
 159         TESTCASE(50,TestCompoundLatinRT);
 160         TESTCASE(51,TestSanskritLatinRT);
 161         TESTCASE(52,TestLocaleInstantiation);
 162         TESTCASE(53,TestTitleAccents);
 163         TESTCASE(54,TestLocaleResource);
 164         TESTCASE(55,TestParseError);
 165         TESTCASE(56,TestOutputSet);
 166         TESTCASE(57,TestVariableRange);
 167         TESTCASE(58,TestInvalidPostContext);
 168         TESTCASE(59,TestIDForms);
 169         TESTCASE(60,TestToRulesMark);
 170         TESTCASE(61,TestEscape);
 171         TESTCASE(62,TestAnchorMasking);
 172         TESTCASE(63,TestDisplayName);
 173         TESTCASE(64,TestSpecialCases);
 174 #if !UCONFIG_NO_FILE_IO
 175         TESTCASE(65,TestIncrementalProgress);
 176 #endif
 177         TESTCASE(66,TestSurrogateCasing);
 178         TESTCASE(67,TestFunction);
 179         TESTCASE(68,TestInvalidBackRef);
 180         TESTCASE(69,TestMulticharStringSet);
 181         TESTCASE(70,TestUserFunction);
 182         TESTCASE(71,TestAnyX);
 183         TESTCASE(72,TestSourceTargetSet);
 184         TESTCASE(73,TestGurmukhiDevanagari);
 185         TESTCASE(74,TestRuleWhitespace);
 186         TESTCASE(75,TestAllCodepoints);
 187         TESTCASE(76,TestBoilerplate);
 188         TESTCASE(77,TestAlternateSyntax);
 189         TESTCASE(78,TestBeginEnd);
 190         TESTCASE(79,TestBeginEndToRules);
 191         TESTCASE(80,TestRegisterAlias);
 192         TESTCASE(81,TestRuleStripping);
 193         TESTCASE(82,TestHalfwidthFullwidth);
 194         TESTCASE(83,TestThai);
 195         TESTCASE(84,TestAny);
 196         default: name = ""; break;
 197     }
 198 }
 199
 200 static const UVersionInfo ICU_39 = {3,9,4,0};
 201 /**
 202  * Make sure every system transliterator can be instantiated.
 203  *
 204  * ALSO test that the result of toRules() for each rule is a valid
 205  * rule.  Do this here so we don't have to have another test that
 206  * instantiates everything as well.
 207  */
 208 void TransliteratorTest::TestInstantiation() {
 209     UErrorCode ec = U_ZERO_ERROR;
 210     StringEnumeration* avail = Transliterator::getAvailableIDs(ec);
 211     assertSuccess("getAvailableIDs()", ec);
 212     assertTrue("getAvailableIDs()!=NULL", avail!=NULL);
 213     int32_t n = Transliterator::countAvailableIDs();
 214     assertTrue("getAvailableIDs().count()==countAvailableIDs()",
 215                avail->count(ec) == n);
 216     assertSuccess("count()", ec);
 217     UnicodeString name;
 218     for (int32_t i=0; i<n; ++i) {
 219         const UnicodeString& id = *avail->snext(ec);
 220         if (!assertSuccess("snext()", ec) ||
 221             !assertTrue("snext()!=NULL", (&id)!=NULL, TRUE)) {
 222             break;
 223         }
 224         UnicodeString id2 = Transliterator::getAvailableID(i);
 225         if (id.length() < 1) {
 226             errln(UnicodeString("FAIL: getAvailableID(") +
 227                   i + ") returned empty string");
 228             continue;
 229         }
 230         if (id != id2) {
 231             errln(UnicodeString("FAIL: getAvailableID(") +
 232                   i + ") != getAvailableIDs().snext()");
 233             continue;
 234         }
 235         UParseError parseError;
 236         UErrorCode status = U_ZERO_ERROR;
 237         Transliterator* t = Transliterator::createInstance(id,
 238                               UTRANS_FORWARD, parseError,status);
 239         name.truncate(0);
 240         Transliterator::getDisplayName(id, name);
 241         if (t == 0) {
 242 #if UCONFIG_NO_BREAK_ITERATION
 243             // If UCONFIG_NO_BREAK_ITERATION is on, then only Thai should fail.
 244             if (id.compare((UnicodeString)"Thai-Latin") != 0)
 245 #endif
 246                 dataerrln(UnicodeString("FAIL: Couldn't create ") + id +
 247                       /*", parse error " + parseError.code +*/
 248                       ", line " + parseError.line +
 249                       ", offset " + parseError.offset +
 250                       ", pre-context " + prettify(parseError.preContext, TRUE) +
 251                       ", post-context " +prettify(parseError.postContext,TRUE) +
 252                       ", Error: " + u_errorName(status));
 253                 // When createInstance fails, it deletes the failing
 254                 // entry from the available ID list.  We detect this
 255                 // here by looking for a change in countAvailableIDs.
 256             int32_t nn = Transliterator::countAvailableIDs();
 257             if (nn == (n - 1)) {
 258                 n = nn;
 259                 --i; // Compensate for deleted entry
 260             }
 261         } else {
 262             logln(UnicodeString("OK: ") + name + " (" + id + ")");
 263
 264             // Now test toRules
 265             UnicodeString rules;
 266             t->toRules(rules, TRUE);
 267             Transliterator *u = Transliterator::createFromRules("x",
 268                                     rules, UTRANS_FORWARD, parseError,status);
 269             if (u == 0) {
 270                 errln(UnicodeString("FAIL: ") + id +
 271                       ".createFromRules() => bad rules" +
 272                       /*", parse error " + parseError.code +*/
 273                       ", line " + parseError.line +
 274                       ", offset " + parseError.offset +
 275                       ", context " + prettify(parseError.preContext, TRUE) +
 276                       ", rules: " + prettify(rules, TRUE));
 277             } else {
 278                 delete u;
 279             }
 280             delete t;
 281         }
 282     }
 283     assertTrue("snext()==NULL", avail->snext(ec)==NULL);
 284     assertSuccess("snext()", ec);
 285     delete avail;
 286
 287     // Now test the failure path
 288     UParseError parseError;
 289     UErrorCode status = U_ZERO_ERROR;
 290     UnicodeString id("<Not a valid Transliterator ID>");
 291     Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
 292     if (t != 0) {
 293         errln("FAIL: " + id + " returned a transliterator");
 294         delete t;
 295     } else {
 296         logln("OK: Bogus ID handled properly");
 297     }
 298 }
 299
 300 void TransliteratorTest::TestSimpleRules(void) {
 301     /* Example: rules 1. ab>x|y
 302      *                2. yc>z
 303      *
 304      * []|eabcd  start - no match, copy e to tranlated buffer
 305      * [e]|abcd  match rule 1 - copy output & adjust cursor
 306      * [ex|y]cd  match rule 2 - copy output & adjust cursor
 307      * [exz]|d   no match, copy d to transliterated buffer
 308      * [exzd]|   done
 309      */
 310     expect(UnicodeString("ab>x|y;", "") +
 311            "yc>z",
 312            "eabcd", "exzd");
 313
 314     /* Another set of rules:
 315      *    1. ab>x|yzacw
 316      *    2. za>q
 317      *    3. qc>r
 318      *    4. cw>n
 319      *
 320      * []|ab       Rule 1
 321      * [x|yzacw]   No match
 322      * [xy|zacw]   Rule 2
 323      * [xyq|cw]    Rule 4
 324      * [xyqn]|     Done
 325      */
 326     expect(UnicodeString("ab>x|yzacw;") +
 327            "za>q;" +
 328            "qc>r;" +
 329            "cw>n",
 330            "ab", "xyqn");
 331
 332     /* Test categories
 333      */
 334     UErrorCode status = U_ZERO_ERROR;
 335     UParseError parseError;
 336     Transliterator *t = Transliterator::createFromRules(
 337         "<ID>",
 338         UnicodeString("$dummy=").append((UChar)0xE100) +
 339         UnicodeString(";"
 340                       "$vowel=[aeiouAEIOU];"
 341                       "$lu=[:Lu:];"
 342                       "$vowel } $lu > '!';"
 343                       "$vowel > '&';"
 344                       "'!' { $lu > '^';"
 345                       "$lu > '*';"
 346                       "a > ERROR", ""),
 347         UTRANS_FORWARD, parseError,
 348         status);
 349     if (U_FAILURE(status)) {
 350         dataerrln("FAIL: RBT constructor failed - %s", u_errorName(status));
 351         return;
 352     }
 353     expect(*t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
 354     delete t;
 355 }
 356
 357 /**
 358  * Test inline set syntax and set variable syntax.
 359  */
 360 void TransliteratorTest::TestInlineSet(void) {
 361     expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz");
 362     expect("a[0-9]b > qrs", "1a7b9", "1qrs9");
 363
 364     expect(UnicodeString(
 365            "$digit = [0-9];"
 366            "$alpha = [a-zA-Z];"
 367            "$alphanumeric = [$digit $alpha];" // ***
 368            "$special = [^$alphanumeric];"     // ***
 369            "$alphanumeric > '-';"
 370            "$special > '*';", ""),
 371
 372            "thx-1138", "---*----");
 373 }
 374
 375 /**
 376  * Create some inverses and confirm that they work.  We have to be
 377  * careful how we do this, since the inverses will not be true
 378  * inverses -- we can't throw any random string at the composition
 379  * of the transliterators and expect the identity function.  F x
 380  * F' != I.  However, if we are careful about the input, we will
 381  * get the expected results.
 382  */
 383 void TransliteratorTest::TestRuleBasedInverse(void) {
 384     UnicodeString RULES =
 385         UnicodeString("abc>zyx;") +
 386         "ab>yz;" +
 387         "bc>zx;" +
 388         "ca>xy;" +
 389         "a>x;" +
 390         "b>y;" +
 391         "c>z;" +
 392
 393         "abc<zyx;" +
 394         "ab<yz;" +
 395         "bc<zx;" +
 396         "ca<xy;" +
 397         "a<x;" +
 398         "b<y;" +
 399         "c<z;" +
 400
 401         "";
 402
 403     const char* DATA[] = {
 404         // Careful here -- random strings will not work.  If we keep
 405         // the left side to the domain and the right side to the range
 406         // we will be okay though (left, abc; right xyz).
 407         "a", "x",
 408         "abcacab", "zyxxxyy",
 409         "caccb", "xyzzy",
 410     };
 411
 412     int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
 413
 414     UErrorCode status = U_ZERO_ERROR;
 415     UParseError parseError;
 416     Transliterator *fwd = Transliterator::createFromRules("<ID>", RULES,
 417                                 UTRANS_FORWARD, parseError, status);
 418     Transliterator *rev = Transliterator::createFromRules("<ID>", RULES,
 419                                 UTRANS_REVERSE, parseError, status);
 420     if (U_FAILURE(status)) {
 421         errln("FAIL: RBT constructor failed");
 422         return;
 423     }
 424     for (int32_t i=0; i<DATA_length; i+=2) {
 425         expect(*fwd, DATA[i], DATA[i+1]);
 426         expect(*rev, DATA[i+1], DATA[i]);
 427     }
 428     delete fwd;
 429     delete rev;
 430 }
 431
 432 /**
 433  * Basic test of keyboard.
 434  */
 435 void TransliteratorTest::TestKeyboard(void) {
 436     UParseError parseError;
 437     UErrorCode status = U_ZERO_ERROR;
 438     Transliterator *t = Transliterator::createFromRules("<ID>",
 439                               UnicodeString("psch>Y;")
 440                               +"ps>y;"
 441                               +"ch>x;"
 442                               +"a>A;",
 443                               UTRANS_FORWARD, parseError,
 444                               status);
 445     if (U_FAILURE(status)) {
 446         errln("FAIL: RBT constructor failed");
 447         return;
 448     }
 449     const char* DATA[] = {
 450         // insertion, buffer
 451         "a", "A",
 452         "p", "Ap",
 453         "s", "Aps",
 454         "c", "Apsc",
 455         "a", "AycA",
 456         "psch", "AycAY",
 457         0, "AycAY", // null means finishKeyboardTransliteration
 458     };
 459
 460     keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
 461     delete t;
 462 }
 463
 464 /**
 465  * Basic test of keyboard with cursor.
 466  */
 467 void TransliteratorTest::TestKeyboard2(void) {
 468     UParseError parseError;
 469     UErrorCode status = U_ZERO_ERROR;
 470     Transliterator *t = Transliterator::createFromRules("<ID>",
 471                               UnicodeString("ych>Y;")
 472                               +"ps>|y;"
 473                               +"ch>x;"
 474                               +"a>A;",
 475                               UTRANS_FORWARD, parseError,
 476                               status);
 477     if (U_FAILURE(status)) {
 478         errln("FAIL: RBT constructor failed");
 479         return;
 480     }
 481     const char* DATA[] = {
 482         // insertion, buffer
 483         "a", "A",
 484         "p", "Ap",
 485         "s", "Aps", // modified for rollback - "Ay",
 486         "c", "Apsc", // modified for rollback - "Ayc",
 487         "a", "AycA",
 488         "p", "AycAp",
 489         "s", "AycAps", // modified for rollback - "AycAy",
 490         "c", "AycApsc", // modified for rollback - "AycAyc",
 491         "h", "AycAY",
 492         0, "AycAY", // null means finishKeyboardTransliteration
 493     };
 494
 495     keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
 496     delete t;
 497 }
 498
 499 /**
 500  * Test keyboard transliteration with back-replacement.
 501  */
 502 void TransliteratorTest::TestKeyboard3(void) {
 503     // We want th>z but t>y.  Furthermore, during keyboard
 504     // transliteration we want t>y then yh>z if t, then h are
 505     // typed.
 506     UnicodeString RULES("t>|y;"
 507                         "yh>z;");
 508
 509     const char* DATA[] = {
 510         // Column 1: characters to add to buffer (as if typed)
 511         // Column 2: expected appearance of buffer after
 512         //           keyboard xliteration.
 513         "a", "a",
 514         "b", "ab",
 515         "t", "abt", // modified for rollback - "aby",
 516         "c", "abyc",
 517         "t", "abyct", // modified for rollback - "abycy",
 518         "h", "abycz",
 519         0, "abycz", // null means finishKeyboardTransliteration
 520     };
 521
 522     UParseError parseError;
 523     UErrorCode status = U_ZERO_ERROR;
 524     Transliterator *t = Transliterator::createFromRules("<ID>", RULES, UTRANS_FORWARD, parseError, status);
 525     if (U_FAILURE(status)) {
 526         errln("FAIL: RBT constructor failed");
 527         return;
 528     }
 529     keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
 530     delete t;
 531 }
 532
 533 void TransliteratorTest::keyboardAux(const Transliterator& t,
 534                                      const char* DATA[], int32_t DATA_length) {
 535     UErrorCode status = U_ZERO_ERROR;
 536     UTransPosition index={0, 0, 0, 0};
 537     UnicodeString s;
 538     for (int32_t i=0; i<DATA_length; i+=2) {
 539         UnicodeString log;
 540         if (DATA[i] != 0) {
 541             log = s + " + "
 542                 + DATA[i]
 543                 + " -> ";
 544             t.transliterate(s, index, DATA[i], status);
 545         } else {
 546             log = s + " => ";
 547             t.finishTransliteration(s, index);
 548         }
 549         // Show the start index '{' and the cursor '|'
 550         UnicodeString a, b, c;
 551         s.extractBetween(0, index.contextStart, a);
 552         s.extractBetween(index.contextStart, index.start, b);
 553         s.extractBetween(index.start, s.length(), c);
 554         log.append(a).
 555             append((UChar)LEFT_BRACE).
 556             append(b).
 557             append((UChar)PIPE).
 558             append(c);
 559         if (s == DATA[i+1] && U_SUCCESS(status)) {
 560             logln(log);
 561         } else {
 562             errln(UnicodeString("FAIL: ") + log + ", expected " + DATA[i+1]);
 563         }
 564     }
 565 }
 566
 567 void TransliteratorTest::TestArabic(void) {
 568 // Test disabled for 2.0 until new Arabic transliterator can be written.
 569 //    /*
 570 //    const char* DATA[] = {
 571 //        "Arabic", "\u062a\u062a\u0645\u062a\u0639\u0020"+
 572 //                  "\u0627\u0644\u0644\u063a\u0629\u0020"+
 573 //                  "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629\u0020"+
 574 //                  "\u0628\u0628\u0646\u0638\u0645\u0020"+
 575 //                  "\u0643\u062a\u0627\u0628\u0628\u064a\u0629\u0020"+
 576 //                  "\u062c\u0645\u064a\u0644\u0629",
 577 //    };
 578 //    */
 579 //
 580 //    UChar ar_raw[] = {
 581 //        0x062a, 0x062a, 0x0645, 0x062a, 0x0639, 0x0020, 0x0627,
 582 //        0x0644, 0x0644, 0x063a, 0x0629, 0x0020, 0x0627, 0x0644,
 583 //        0x0639, 0x0631, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
 584 //        0x0628, 0x0628, 0x0646, 0x0638, 0x0645, 0x0020, 0x0643,
 585 //        0x062a, 0x0627, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
 586 //        0x062c, 0x0645, 0x064a, 0x0644, 0x0629, 0
 587 //    };
 588 //    UnicodeString ar(ar_raw);
 589 //    UErrorCode status=U_ZERO_ERROR;
 590 //    UParseError parseError;
 591 //    Transliterator *t = Transliterator::createInstance("Latin-Arabic", UTRANS_FORWARD, parseError, status);
 592 //    if (t == 0) {
 593 //        errln("FAIL: createInstance failed");
 594 //        return;
 595 //    }
 596 //    expect(*t, "Arabic", ar);
 597 //    delete t;
 598 }
 599
 600 /**
 601  * Compose the Kana transliterator forward and reverse and try
 602  * some strings that should come out unchanged.
 603  */
 604 void TransliteratorTest::TestCompoundKana(void) {
 605     UParseError parseError;
 606     UErrorCode status = U_ZERO_ERROR;
 607     Transliterator* t = Transliterator::createInstance("Latin-Hiragana;Hiragana-Latin", UTRANS_FORWARD, parseError, status);
 608     if (t == 0) {
 609         dataerrln("FAIL: construction of Latin-Hiragana;Hiragana-Latin failed - %s", u_errorName(status));
 610     } else {
 611         expect(*t, "aaaaa", "aaaaa");
 612         delete t;
 613     }
 614 }
 615
 616 /**
 617  * Compose the hex transliterators forward and reverse.
 618  */
 619 void TransliteratorTest::TestCompoundHex(void) {
 620     UParseError parseError;
 621     UErrorCode status = U_ZERO_ERROR;
 622     Transliterator* a = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
 623     Transliterator* b = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, parseError, status);
 624     Transliterator* transab[] = { a, b };
 625     Transliterator* transba[] = { b, a };
 626     if (a == 0 || b == 0) {
 627         errln("FAIL: construction failed");
 628         delete a;
 629         delete b;
 630         return;
 631     }
 632     // Do some basic tests of a
 633     expect(*a, "01", UnicodeString("\\u0030\\u0031", ""));
 634     // Do some basic tests of b
 635     expect(*b, UnicodeString("\\u0030\\u0031", ""), "01");
 636
 637     Transliterator* ab = new CompoundTransliterator(transab, 2);
 638     UnicodeString s("abcde", "");
 639     expect(*ab, s, s);
 640
 641     UnicodeString str(s);
 642     a->transliterate(str);
 643     Transliterator* ba = new CompoundTransliterator(transba, 2);
 644     expect(*ba, str, str);
 645
 646     delete ab;
 647     delete ba;
 648     delete a;
 649     delete b;
 650 }
 651
 652 int gTestFilterClassID = 0;
 653 /**
 654  * Used by TestFiltering().
 655  */
 656 class TestFilter : public UnicodeFilter {
 657     virtual UnicodeFunctor* clone() const {
 658         return new TestFilter(*this);
 659     }
 660     virtual UBool contains(UChar32 c) const {
 661         return c != (UChar)0x0063 /*c*/;
 662     }
 663     // Stubs
 664     virtual UnicodeString& toPattern(UnicodeString& result,
 665                                      UBool /*escapeUnprintable*/) const {
 666         return result;
 667     }
 668     virtual UBool matchesIndexValue(uint8_t /*v*/) const {
 669         return FALSE;
 670     }
 671     virtual void addMatchSetTo(UnicodeSet& /*toUnionTo*/) const {}
 672 public:
 673     UClassID getDynamicClassID() const { return (UClassID)&gTestFilterClassID; }
 674 };
 675
 676 /**
 677  * Do some basic tests of filtering.
 678  */
 679 void TransliteratorTest::TestFiltering(void) {
 680     UParseError parseError;
 681     UErrorCode status = U_ZERO_ERROR;
 682     Transliterator* hex = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
 683     if (hex == 0) {
 684         errln("FAIL: createInstance(Any-Hex) failed");
 685         return;
 686     }
 687     hex->adoptFilter(new TestFilter());
 688     UnicodeString s("abcde");
 689     hex->transliterate(s);
 690     UnicodeString exp("\\u0061\\u0062c\\u0064\\u0065", "");
 691     if (s == exp) {
 692         logln(UnicodeString("Ok:   \"") + exp + "\"");
 693     } else {
 694         logln(UnicodeString("FAIL: \"") + s + "\", wanted \"" + exp + "\"");
 695     }
 696
 697     // ICU4C ONLY. Do not find Transliterator.orphanFilter() in ICU4J.
 698     UnicodeFilter *f = hex->orphanFilter();
 699     if (f == NULL){
 700         errln("FAIL: orphanFilter() should get a UnicodeFilter");
 701     } else {
 702         delete f;
 703     }
 704     delete hex;
 705 }
 706
 707 /**
 708  * Test anchors
 709  */
 710 void TransliteratorTest::TestAnchors(void) {
 711     expect(UnicodeString("^a  > 0; a$ > 2 ; a > 1;", ""),
 712            "aaa",
 713            "012");
 714     expect(UnicodeString("$s=[z$]; $s{a>0; a}$s>2; a>1;", ""),
 715            "aaa",
 716            "012");
 717     expect(UnicodeString("^ab  > 01 ;"
 718            " ab  > |8 ;"
 719            "  b  > k ;"
 720            " 8x$ > 45 ;"
 721            " 8x  > 77 ;", ""),
 722
 723            "ababbabxabx",
 724            "018k7745");
 725     expect(UnicodeString("$s = [z$] ;"
 726            "$s{ab    > 01 ;"
 727            "   ab    > |8 ;"
 728            "    b    > k ;"
 729            "   8x}$s > 45 ;"
 730            "   8x    > 77 ;", ""),
 731
 732            "abzababbabxzabxabx",
 733            "01z018k45z01x45");
 734 }
 735
 736 /**
 737  * Test pattern quoting and escape mechanisms.
 738  */
 739 void TransliteratorTest::TestPatternQuoting(void) {
 740     // Array of 3n items
 741     // Each item is <rules>, <input>, <expected output>
 742     const UnicodeString DATA[] = {
 743         UnicodeString(UChar(0x4E01)) + ">'[male adult]'",
 744         UnicodeString(UChar(0x4E01)),
 745         "[male adult]"
 746     };
 747
 748     for (int32_t i=0; i<3; i+=3) {
 749         logln(UnicodeString("Pattern: ") + prettify(DATA[i]));
 750         UParseError parseError;
 751         UErrorCode status = U_ZERO_ERROR;
 752         Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
 753         if (U_FAILURE(status)) {
 754             errln("RBT constructor failed");
 755         } else {
 756             expect(*t, DATA[i+1], DATA[i+2]);
 757         }
 758         delete t;
 759     }
 760 }
 761
 762 /**
 763  * Regression test for bugs found in Greek transliteration.
 764  */
 765 void TransliteratorTest::TestJ277(void) {
 766     UErrorCode status = U_ZERO_ERROR;
 767     UParseError parseError;
 768     Transliterator *gl = Transliterator::createInstance("Greek-Latin; NFD; [:M:]Remove; NFC", UTRANS_FORWARD, parseError, status);
 769     if (gl == NULL) {
 770         dataerrln("FAIL: createInstance(Greek-Latin) returned NULL - %s", u_errorName(status));
 771         return;
 772     }
 773
 774     UChar sigma = 0x3C3;
 775     UChar upsilon = 0x3C5;
 776     UChar nu = 0x3BD;
 777 //    UChar PHI = 0x3A6;
 778     UChar alpha = 0x3B1;
 779 //    UChar omega = 0x3C9;
 780 //    UChar omicron = 0x3BF;
 781 //    UChar epsilon = 0x3B5;
 782
 783     // sigma upsilon nu -> syn
 784     UnicodeString syn;
 785     syn.append(sigma).append(upsilon).append(nu);
 786     expect(*gl, syn, "syn");
 787
 788     // sigma alpha upsilon nu -> saun
 789     UnicodeString sayn;
 790     sayn.append(sigma).append(alpha).append(upsilon).append(nu);
 791     expect(*gl, sayn, "saun");
 792
 793     // Again, using a smaller rule set
 794     UnicodeString rules(
 795                 "$alpha   = \\u03B1;"
 796                 "$nu      = \\u03BD;"
 797                 "$sigma   = \\u03C3;"
 798                 "$ypsilon = \\u03C5;"
 799                 "$vowel   = [aeiouAEIOU$alpha$ypsilon];"
 800                 "s <>           $sigma;"
 801                 "a <>           $alpha;"
 802                 "u <>  $vowel { $ypsilon;"
 803                 "y <>           $ypsilon;"
 804                 "n <>           $nu;",
 805                 "");
 806     Transliterator *mini = Transliterator::createFromRules("mini", rules, UTRANS_REVERSE, parseError, status);
 807     if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
 808     expect(*mini, syn, "syn");
 809     expect(*mini, sayn, "saun");
 810     delete mini;
 811     mini = NULL;
 812
 813 #if !UCONFIG_NO_FORMATTING
 814     // Transliterate the Greek locale data
 815     Locale el("el");
 816     DateFormatSymbols syms(el, status);
 817     if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
 818     int32_t i, count;
 819     const UnicodeString* data = syms.getMonths(count);
 820     for (i=0; i<count; ++i) {
 821         if (data[i].length() == 0) {
 822             continue;
 823         }
 824         UnicodeString out(data[i]);
 825         gl->transliterate(out);
 826         UBool ok = TRUE;
 827         if (data[i].length() >= 2 && out.length() >= 2 &&
 828             u_isupper(data[i].charAt(0)) && u_islower(data[i].charAt(1))) {
 829             if (!(u_isupper(out.charAt(0)) && u_islower(out.charAt(1)))) {
 830                 ok = FALSE;
 831             }
 832         }
 833         if (ok) {
 834             logln(prettify(data[i] + " -> " + out));
 835         } else {
 836             errln(UnicodeString("FAIL: ") + prettify(data[i] + " -> " + out));
 837         }
 838     }
 839 #endif
 840
 841     delete gl;
 842 }
 843
 844 /**
 845  * Prefix, suffix support in hex transliterators
 846  */
 847 void TransliteratorTest::TestJ243(void) {
 848     UErrorCode ec = U_ZERO_ERROR;
 849
 850     // Test default Hex-Any, which should handle
 851     // \u, \U, u+, and U+
 852     Transliterator *hex =
 853         Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, ec);
 854     if (assertSuccess("getInstance", ec)) {
 855         expect(*hex, UnicodeString("\\u0041+\\U00000042,U+0043uU+0044z", ""), "A+B,CuDz");
 856     }
 857     delete hex;
 858
 859 //    // Try a custom Hex-Unicode
 860 //    // \uXXXX and &#xXXXX;
 861 //    ec = U_ZERO_ERROR;
 862 //    HexToUnicodeTransliterator hex2(UnicodeString("\\\\u###0;&\\#x###0\\;", ""), ec);
 863 //    expect(hex2, UnicodeString("\\u61\\u062\\u0063\\u00645\\u66x&#x30;&#x031;&#x0032;&#x00033;", ""),
 864 //           "abcd5fx012&#x00033;");
 865 //    // Try custom Any-Hex (default is tested elsewhere)
 866 //    ec = U_ZERO_ERROR;
 867 //    UnicodeToHexTransliterator hex3(UnicodeString("&\\#x###0;", ""), ec);
 868 //    expect(hex3, "012", "&#x30;&#x31;&#x32;");
 869 }
 870
 871 /**
 872  * Parsers need better syntax error messages.
 873  */
 874 void TransliteratorTest::TestJ329(void) {
 875
 876     struct { UBool containsErrors; const char* rule; } DATA[] = {
 877         { FALSE, "a > b; c > d" },
 878         { TRUE,  "a > b; no operator; c > d" },
 879     };
 880     int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
 881
 882     for (int32_t i=0; i<DATA_length; ++i) {
 883         UErrorCode status = U_ZERO_ERROR;
 884         UParseError parseError;
 885         Transliterator *rbt = Transliterator::createFromRules("<ID>",
 886                                     DATA[i].rule,
 887                                     UTRANS_FORWARD,
 888                                     parseError,
 889                                     status);
 890         UBool gotError = U_FAILURE(status);
 891         UnicodeString desc(DATA[i].rule);
 892         desc.append(gotError ? " -> error" : " -> no error");
 893         if (gotError) {
 894             desc = desc + ", ParseError code=" + u_errorName(status) +
 895                 " line=" + parseError.line +
 896                 " offset=" + parseError.offset +
 897                 " context=" + parseError.preContext;
 898         }
 899         if (gotError == DATA[i].containsErrors) {
 900             logln(UnicodeString("Ok:   ") + desc);
 901         } else {
 902             errln(UnicodeString("FAIL: ") + desc);
 903         }
 904         delete rbt;
 905     }
 906 }
 907
 908 /**
 909  * Test segments and segment references.
 910  */
 911 void TransliteratorTest::TestSegments(void) {
 912     // Array of 3n items
 913     // Each item is <rules>, <input>, <expected output>
 914     UnicodeString DATA[] = {
 915         "([a-z]) '.' ([0-9]) > $2 '-' $1",
 916         "abc.123.xyz.456",
 917         "ab1-c23.xy4-z56",
 918
 919         // nested
 920         "(([a-z])([0-9])) > $1 '.' $2 '.' $3;",
 921         "a1 b2",
 922         "a1.a.1 b2.b.2",
 923     };
 924     int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
 925
 926     for (int32_t i=0; i<DATA_length; i+=3) {
 927         logln("Pattern: " + prettify(DATA[i]));
 928         UParseError parseError;
 929         UErrorCode status = U_ZERO_ERROR;
 930         Transliterator *t = Transliterator::createFromRules("ID", DATA[i], UTRANS_FORWARD, parseError, status);
 931         if (U_FAILURE(status)) {
 932             errln("FAIL: RBT constructor");
 933         } else {
 934             expect(*t, DATA[i+1], DATA[i+2]);
 935         }
 936         delete t;
 937     }
 938 }
 939
 940 /**
 941  * Test cursor positioning outside of the key
 942  */
 943 void TransliteratorTest::TestCursorOffset(void) {
 944     // Array of 3n items
 945     // Each item is <rules>, <input>, <expected output>
 946     UnicodeString DATA[] = {
 947         "pre {alpha} post > | @ ALPHA ;"
 948         "eALPHA > beta ;"
 949         "pre {beta} post > BETA @@ | ;"
 950         "post > xyz",
 951
 952         "prealphapost prebetapost",
 953
 954         "prbetaxyz preBETApost",
 955     };
 956     int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
 957
 958     for (int32_t i=0; i<DATA_length; i+=3) {
 959         logln("Pattern: " + prettify(DATA[i]));
 960         UParseError parseError;
 961         UErrorCode status = U_ZERO_ERROR;
 962         Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
 963         if (U_FAILURE(status)) {
 964             errln("FAIL: RBT constructor");
 965         } else {
 966             expect(*t, DATA[i+1], DATA[i+2]);
 967         }
 968         delete t;
 969     }
 970 }
 971
 972 /**
 973  * Test zero length and > 1 char length variable values.  Test
 974  * use of variable refs in UnicodeSets.
 975  */
 976 void TransliteratorTest::TestArbitraryVariableValues(void) {
 977     // Array of 3n items
 978     // Each item is <rules>, <input>, <expected output>
 979     UnicodeString DATA[] = {
 980         "$abe = ab;"
 981         "$pat = x[yY]z;"
 982         "$ll  = 'a-z';"
 983         "$llZ = [$ll];"
 984         "$llY = [$ll$pat];"
 985         "$emp = ;"
 986
 987         "$abe > ABE;"
 988         "$pat > END;"
 989         "$llZ > 1;"
 990         "$llY > 2;"
 991         "7$emp 8 > 9;"
 992         "",
 993
 994         "ab xYzxyz stY78",
 995         "ABE ENDEND 1129",
 996     };
 997     int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
 998
 999     for (int32_t i=0; i<DATA_length; i+=3) {
1000         logln("Pattern: " + prettify(DATA[i]));
1001         UParseError parseError;
1002         UErrorCode status = U_ZERO_ERROR;
1003         Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
1004         if (U_FAILURE(status)) {
1005             errln("FAIL: RBT constructor");
1006         } else {
1007             expect(*t, DATA[i+1], DATA[i+2]);
1008         }
1009         delete t;
1010     }
1011 }
1012
1013 /**
1014  * Confirm that the contextStart, contextLimit, start, and limit
1015  * behave correctly. J474.
1016  */
1017 void TransliteratorTest::TestPositionHandling(void) {
1018     // Array of 3n items
1019     // Each item is <rules>, <input>, <expected output>
1020     const char* DATA[] = {
1021         "a{t} > SS ; {t}b > UU ; {t} > TT ;",
1022         "xtat txtb", // pos 0,9,0,9
1023         "xTTaSS TTxUUb",
1024
1025         "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1026         "xtat txtb", // pos 2,9,3,8
1027         "xtaSS TTxUUb",
1028
1029         "a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
1030         "xtat txtb", // pos 3,8,3,8
1031         "xtaTT TTxTTb",
1032     };
1033
1034     // Array of 4n positions -- these go with the DATA array
1035     // They are: contextStart, contextLimit, start, limit
1036     int32_t POS[] = {
1037         0, 9, 0, 9,
1038         2, 9, 3, 8,
1039         3, 8, 3, 8,
1040     };
1041
1042     int32_t n = (int32_t)(sizeof(DATA) / sizeof(DATA[0])) / 3;
1043     for (int32_t i=0; i<n; i++) {
1044         UErrorCode status = U_ZERO_ERROR;
1045         UParseError parseError;
1046         Transliterator *t = Transliterator::createFromRules("<ID>",
1047                                 DATA[3*i], UTRANS_FORWARD, parseError, status);
1048         if (U_FAILURE(status)) {
1049             delete t;
1050             errln("FAIL: RBT constructor");
1051             return;
1052         }
1053         UTransPosition pos;
1054         pos.contextStart= POS[4*i];
1055         pos.contextLimit = POS[4*i+1];
1056         pos.start = POS[4*i+2];
1057         pos.limit = POS[4*i+3];
1058         UnicodeString rsource(DATA[3*i+1]);
1059         t->transliterate(rsource, pos, status);
1060         if (U_FAILURE(status)) {
1061             delete t;
1062             errln("FAIL: transliterate");
1063             return;
1064         }
1065         t->finishTransliteration(rsource, pos);
1066         expectAux(DATA[3*i],
1067                   DATA[3*i+1],
1068                   rsource,
1069                   DATA[3*i+2]);
1070         delete t;
1071     }
1072 }
1073
1074 /**
1075  * Test the Hiragana-Katakana transliterator.
1076  */
1077 void TransliteratorTest::TestHiraganaKatakana(void) {
1078     UParseError parseError;
1079     UErrorCode status = U_ZERO_ERROR;
1080     Transliterator* hk = Transliterator::createInstance("Hiragana-Katakana", UTRANS_FORWARD, parseError, status);
1081     Transliterator* kh = Transliterator::createInstance("Katakana-Hiragana", UTRANS_FORWARD, parseError, status);
1082     if (hk == 0 || kh == 0) {
1083         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
1084         delete hk;
1085         delete kh;
1086         return;
1087     }
1088
1089     // Array of 3n items
1090     // Each item is "hk"|"kh"|"both", <Hiragana>, <Katakana>
1091     const char* DATA[] = {
1092         "both",
1093         "\\u3042\\u3090\\u3099\\u3092\\u3050",
1094         "\\u30A2\\u30F8\\u30F2\\u30B0",
1095
1096         "kh",
1097         "\\u307C\\u3051\\u3060\\u3042\\u3093\\u30FC",
1098         "\\u30DC\\u30F6\\u30C0\\u30FC\\u30F3\\u30FC",
1099     };
1100     int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
1101
1102     for (int32_t i=0; i<DATA_length; i+=3) {
1103         UnicodeString h = CharsToUnicodeString(DATA[i+1]);
1104         UnicodeString k = CharsToUnicodeString(DATA[i+2]);
1105         switch (*DATA[i]) {
1106         case 0x68: //'h': // Hiragana-Katakana
1107             expect(*hk, h, k);
1108             break;
1109         case 0x6B: //'k': // Katakana-Hiragana
1110             expect(*kh, k, h);
1111             break;
1112         case 0x62: //'b': // both
1113             expect(*hk, h, k);
1114             expect(*kh, k, h);
1115             break;
1116         }
1117     }
1118     delete hk;
1119     delete kh;
1120 }
1121
1122 /**
1123  * Test cloning / copy constructor of RBT.
1124  */
1125 void TransliteratorTest::TestCopyJ476(void) {
1126     // The real test here is what happens when the destructors are
1127     // called.  So we let one object get destructed, and check to
1128     // see that its copy still works.
1129     Transliterator *t2 = 0;
1130     {
1131         UParseError parseError;
1132         UErrorCode status = U_ZERO_ERROR;
1133         Transliterator *t1 = Transliterator::createFromRules("t1",
1134             "a>A;b>B;'foo'+>'bar'", UTRANS_FORWARD, parseError, status);
1135         if (U_FAILURE(status)) {
1136             errln("FAIL: RBT constructor");
1137             return;
1138         }
1139         t2 = t1->clone(); // Call copy constructor under the covers.
1140         expect(*t1, "abcfoofoo", "ABcbar");
1141         delete t1;
1142     }
1143     expect(*t2, "abcfoofoo", "ABcbar");
1144     delete t2;
1145 }
1146
1147 /**
1148  * Test inter-Indic transliterators.  These are composed.
1149  * ICU4C Jitterbug 483.
1150  */
1151 void TransliteratorTest::TestInterIndic(void) {
1152     UnicodeString ID("Devanagari-Gujarati", "");
1153     UErrorCode status = U_ZERO_ERROR;
1154     UParseError parseError;
1155     Transliterator* dg = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
1156     if (dg == 0) {
1157         dataerrln("FAIL: createInstance(" + ID + ") returned NULL - " + u_errorName(status));
1158         return;
1159     }
1160     UnicodeString id = dg->getID();
1161     if (id != ID) {
1162         errln("FAIL: createInstance(" + ID + ")->getID() => " + id);
1163     }
1164     UnicodeString dev = CharsToUnicodeString("\\u0901\\u090B\\u0925");
1165     UnicodeString guj = CharsToUnicodeString("\\u0A81\\u0A8B\\u0AA5");
1166     expect(*dg, dev, guj);
1167     delete dg;
1168 }
1169
1170 /**
1171  * Test filter syntax in IDs. (J918)
1172  */
1173 void TransliteratorTest::TestFilterIDs(void) {
1174     // Array of 3n strings:
1175     // <id>, <inverse id>, <input>, <expected output>
1176     const char* DATA[] = {
1177         "[aeiou]Any-Hex", // ID
1178         "[aeiou]Hex-Any", // expected inverse ID
1179         "quizzical",      // src
1180         "q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src)
1181
1182         "[aeiou]Any-Hex;[^5]Hex-Any",
1183         "[^5]Any-Hex;[aeiou]Hex-Any",
1184         "quizzical",
1185         "q\\u0075izzical",
1186
1187         "[abc]Null",
1188         "[abc]Null",
1189         "xyz",
1190         "xyz",
1191     };
1192     enum { DATA_length = sizeof(DATA) / sizeof(DATA[0]) };
1193
1194     for (int i=0; i<DATA_length; i+=4) {
1195         UnicodeString ID(DATA[i], "");
1196         UnicodeString uID(DATA[i+1], "");
1197         UnicodeString data2(DATA[i+2], "");
1198         UnicodeString data3(DATA[i+3], "");
1199         UParseError parseError;
1200         UErrorCode status = U_ZERO_ERROR;
1201         Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
1202         if (t == 0) {
1203             errln("FAIL: createInstance(" + ID + ") returned NULL");
1204             return;
1205         }
1206         expect(*t, data2, data3);
1207
1208         // Check the ID
1209         if (ID != t->getID()) {
1210             errln("FAIL: createInstance(" + ID + ").getID() => " +
1211                   t->getID());
1212         }
1213
1214         // Check the inverse
1215         Transliterator *u = t->createInverse(status);
1216         if (u == 0) {
1217             errln("FAIL: " + ID + ".createInverse() returned NULL");
1218         } else if (u->getID() != uID) {
1219             errln("FAIL: " + ID + ".createInverse().getID() => " +
1220                   u->getID() + ", expected " + uID);
1221         }
1222
1223         delete t;
1224         delete u;
1225     }
1226 }
1227
1228 /**
1229  * Test the case mapping transliterators.
1230  */
1231 void TransliteratorTest::TestCaseMap(void) {
1232     UParseError parseError;
1233     UErrorCode status = U_ZERO_ERROR;
1234     Transliterator* toUpper =
1235         Transliterator::createInstance("Any-Upper[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1236     Transliterator* toLower =
1237         Transliterator::createInstance("Any-Lower[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1238     Transliterator* toTitle =
1239         Transliterator::createInstance("Any-Title[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
1240     if (toUpper==0 || toLower==0 || toTitle==0) {
1241         errln("FAIL: createInstance returned NULL");
1242         delete toUpper;
1243         delete toLower;
1244         delete toTitle;
1245         return;
1246     }
1247
1248     expect(*toUpper, "The quick brown fox jumped over the lazy dogs.",
1249            "THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS.");
1250     expect(*toLower, "The quIck brown fOX jUMPED OVER THE LAzY dogs.",
1251            "the quick brown foX jumped over the lazY dogs.");
1252     expect(*toTitle, "the quick brown foX can't jump over the laZy dogs.",
1253            "The Quick Brown FoX Can't Jump Over The LaZy Dogs.");
1254
1255     delete toUpper;
1256     delete toLower;
1257     delete toTitle;
1258 }
1259
1260 /**
1261  * Test the name mapping transliterators.
1262  */
1263 void TransliteratorTest::TestNameMap(void) {
1264     UParseError parseError;
1265     UErrorCode status = U_ZERO_ERROR;
1266     Transliterator* uni2name =
1267         Transliterator::createInstance("Any-Name[^abc]", UTRANS_FORWARD, parseError, status);
1268     Transliterator* name2uni =
1269         Transliterator::createInstance("Name-Any", UTRANS_FORWARD, parseError, status);
1270     if (uni2name==0 || name2uni==0) {
1271         errln("FAIL: createInstance returned NULL");
1272         delete uni2name;
1273         delete name2uni;
1274         return;
1275     }
1276
1277     // Careful:  CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1278     expect(*uni2name, CharsToUnicodeString("\\u00A0abc\\u4E01\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF"),
1279            CharsToUnicodeString("\\\\N{NO-BREAK SPACE}abc\\\\N{CJK UNIFIED IDEOGRAPH-4E01}\\\\N{MICRO SIGN}\\\\N{GUJARATI SIGN CANDRABINDU}\\\\N{REPLACEMENT CHARACTER}\\\\N{END OF TRANSMISSION}\\\\N{CHARACTER TABULATION}\\\\N{<control-0081>}\\\\N{<noncharacter-FFFF>}"));
1280     expect(*name2uni, UNICODE_STRING_SIMPLE("{\\N { NO-BREAK SPACE}abc\\N{  CJK UNIFIED  IDEOGRAPH-4E01  }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{END OF TRANSMISSION}\\N{CHARACTER TABULATION}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{"),
1281            CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{"));
1282
1283     delete uni2name;
1284     delete name2uni;
1285
1286     // round trip
1287     Transliterator* t =
1288         Transliterator::createInstance("Any-Name;Name-Any", UTRANS_FORWARD, parseError, status);
1289     if (t==0) {
1290         errln("FAIL: createInstance returned NULL");
1291         delete t;
1292         return;
1293     }
1294
1295     // Careful:  CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
1296     UnicodeString s = CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{");
1297     expect(*t, s, s);
1298     delete t;
1299 }
1300
1301 /**
1302  * Test liberalized ID syntax.  1006c
1303  */
1304 void TransliteratorTest::TestLiberalizedID(void) {
1305     // Some test cases have an expected getID() value of NULL.  This
1306     // means I have disabled the test case for now.  This stuff is
1307     // still under development, and I haven't decided whether to make
1308     // getID() return canonical case yet.  It will all get rewritten
1309     // with the move to Source-Target/Variant IDs anyway. [aliu]
1310     const char* DATA[] = {
1311         "latin-greek", NULL /*"Latin-Greek"*/, "case insensitivity",
1312         "  Null  ", "Null", "whitespace",
1313         " Latin[a-z]-Greek  ", "[a-z]Latin-Greek", "inline filter",
1314         "  null  ; latin-greek  ", NULL /*"Null;Latin-Greek"*/, "compound whitespace",
1315     };
1316     const int32_t DATA_length = sizeof(DATA)/sizeof(DATA[0]);
1317     UParseError parseError;
1318     UErrorCode status= U_ZERO_ERROR;
1319     for (int32_t i=0; i<DATA_length; i+=3) {
1320         Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, parseError, status);
1321         if (t == 0) {
1322             dataerrln(UnicodeString("FAIL: ") + DATA[i+2] +
1323                   " cannot create ID \"" + DATA[i] + "\" - " + u_errorName(status));
1324         } else {
1325             UnicodeString exp;
1326             if (DATA[i+1]) {
1327                 exp = UnicodeString(DATA[i+1], "");
1328             }
1329             // Don't worry about getID() if the expected char*
1330             // is NULL -- see above.
1331             if (exp.length() == 0 || exp == t->getID()) {
1332                 logln(UnicodeString("Ok: ") + DATA[i+2] +
1333                       " create ID \"" + DATA[i] + "\" => \"" +
1334                       exp + "\"");
1335             } else {
1336                 errln(UnicodeString("FAIL: ") + DATA[i+2] +
1337                       " create ID \"" + DATA[i] + "\" => \"" +
1338                       t->getID() + "\", exp \"" + exp + "\"");
1339             }
1340             delete t;
1341         }
1342     }
1343 }
1344
1345 /* test for Jitterbug 912 */
1346 void TransliteratorTest::TestCreateInstance(){
1347     const char* FORWARD = "F";
1348     const char* REVERSE = "R";
1349     const char* DATA[] = {
1350         // Column 1: id
1351         // Column 2: direction
1352         // Column 3: expected ID, or "" if expect failure
1353         "Latin-Hangul", REVERSE, "Hangul-Latin", // JB#912
1354
1355         // JB#2689: bad compound causes crash
1356         "InvalidSource-InvalidTarget", FORWARD, "",
1357         "InvalidSource-InvalidTarget", REVERSE, "",
1358         "Hex-Any;InvalidSource-InvalidTarget", FORWARD, "",
1359         "Hex-Any;InvalidSource-InvalidTarget", REVERSE, "",
1360         "InvalidSource-InvalidTarget;Hex-Any", FORWARD, "",
1361         "InvalidSource-InvalidTarget;Hex-Any", REVERSE, "",
1362
1363         NULL
1364     };
1365
1366     for (int32_t i=0; DATA[i]; i+=3) {
1367         UParseError err;
1368         UErrorCode ec = U_ZERO_ERROR;
1369         UnicodeString id(DATA[i]);
1370         UTransDirection dir = (DATA[i+1]==FORWARD)?
1371             UTRANS_FORWARD:UTRANS_REVERSE;
1372         UnicodeString expID(DATA[i+2]);
1373         Transliterator* t =
1374             Transliterator::createInstance(id,dir,err,ec);
1375         UnicodeString newID;
1376         if (t) {
1377             newID = t->getID();
1378         }
1379         UBool ok = (newID == expID);
1380         if (!t) {
1381             newID = u_errorName(ec);
1382         }
1383         if (ok) {
1384             logln((UnicodeString)"Ok: createInstance(" +
1385                   id + "," + DATA[i+1] + ") => " + newID);
1386         } else {
1387             dataerrln((UnicodeString)"FAIL: createInstance(" +
1388                   id + "," + DATA[i+1] + ") => " + newID +
1389                   ", expected " + expID);
1390         }
1391         delete t;
1392     }
1393 }
1394
1395 /**
1396  * Test the normalization transliterator.
1397  */
1398 void TransliteratorTest::TestNormalizationTransliterator() {
1399     // THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.test.normalizer.BasicTest
1400     // PLEASE KEEP THEM IN SYNC WITH BasicTest.
1401     const char* CANON[] = {
1402         // Input               Decomposed            Composed
1403         "cat",                "cat",                "cat"               ,
1404         "\\u00e0ardvark",      "a\\u0300ardvark",     "\\u00e0ardvark"    ,
1405
1406         "\\u1e0a",             "D\\u0307",            "\\u1e0a"            , // D-dot_above
1407         "D\\u0307",            "D\\u0307",            "\\u1e0a"            , // D dot_above
1408
1409         "\\u1e0c\\u0307",       "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D-dot_below dot_above
1410         "\\u1e0a\\u0323",       "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D-dot_above dot_below
1411         "D\\u0307\\u0323",      "D\\u0323\\u0307",      "\\u1e0c\\u0307"      , // D dot_below dot_above
1412
1413         "\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307","\\u1e10\\u0323\\u0307", // D dot_below cedilla dot_above
1414         "D\\u0307\\u0328\\u0323","D\\u0328\\u0323\\u0307","\\u1e0c\\u0328\\u0307", // D dot_above ogonek dot_below
1415
1416         "\\u1E14",             "E\\u0304\\u0300",      "\\u1E14"            , // E-macron-grave
1417         "\\u0112\\u0300",       "E\\u0304\\u0300",      "\\u1E14"            , // E-macron + grave
1418         "\\u00c8\\u0304",       "E\\u0300\\u0304",      "\\u00c8\\u0304"      , // E-grave + macron
1419
1420         "\\u212b",             "A\\u030a",            "\\u00c5"            , // angstrom_sign
1421         "\\u00c5",             "A\\u030a",            "\\u00c5"            , // A-ring
1422
1423         "\\u00fdffin",         "y\\u0301ffin",        "\\u00fdffin"        ,    //updated with 3.0
1424         "\\u00fd\\uFB03n",      "y\\u0301\\uFB03n",     "\\u00fd\\uFB03n"     , //updated with 3.0
1425
1426         "Henry IV",           "Henry IV",           "Henry IV"          ,
1427         "Henry \\u2163",       "Henry \\u2163",       "Henry \\u2163"      ,
1428
1429         "\\u30AC",             "\\u30AB\\u3099",       "\\u30AC"            , // ga (Katakana)
1430         "\\u30AB\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // ka + ten
1431         "\\uFF76\\uFF9E",       "\\uFF76\\uFF9E",       "\\uFF76\\uFF9E"      , // hw_ka + hw_ten
1432         "\\u30AB\\uFF9E",       "\\u30AB\\uFF9E",       "\\u30AB\\uFF9E"      , // ka + hw_ten
1433         "\\uFF76\\u3099",       "\\uFF76\\u3099",       "\\uFF76\\u3099"      , // hw_ka + ten
1434
1435         "A\\u0300\\u0316",      "A\\u0316\\u0300",      "\\u00C0\\u0316"      ,
1436         0 // end
1437     };
1438
1439     const char* COMPAT[] = {
1440         // Input               Decomposed            Composed
1441         "\\uFB4f",             "\\u05D0\\u05DC",       "\\u05D0\\u05DC"     , // Alef-Lamed vs. Alef, Lamed
1442
1443         "\\u00fdffin",         "y\\u0301ffin",        "\\u00fdffin"        ,    //updated for 3.0
1444         "\\u00fd\\uFB03n",      "y\\u0301ffin",        "\\u00fdffin"        , // ffi ligature -> f + f + i
1445
1446         "Henry IV",           "Henry IV",           "Henry IV"          ,
1447         "Henry \\u2163",       "Henry IV",           "Henry IV"          ,
1448
1449         "\\u30AC",             "\\u30AB\\u3099",       "\\u30AC"            , // ga (Katakana)
1450         "\\u30AB\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // ka + ten
1451
1452         "\\uFF76\\u3099",       "\\u30AB\\u3099",       "\\u30AC"            , // hw_ka + ten
1453         0 // end
1454     };
1455
1456     int32_t i;
1457     UParseError parseError;
1458     UErrorCode status = U_ZERO_ERROR;
1459     Transliterator* NFD = Transliterator::createInstance("NFD", UTRANS_FORWARD, parseError, status);
1460     Transliterator* NFC = Transliterator::createInstance("NFC", UTRANS_FORWARD, parseError, status);
1461     if (!NFD || !NFC) {
1462         dataerrln("FAIL: createInstance failed: %s", u_errorName(status));
1463         delete NFD;
1464         delete NFC;
1465         return;
1466     }
1467     for (i=0; CANON[i]; i+=3) {
1468         UnicodeString in = CharsToUnicodeString(CANON[i]);
1469         UnicodeString expd = CharsToUnicodeString(CANON[i+1]);
1470         UnicodeString expc = CharsToUnicodeString(CANON[i+2]);
1471         expect(*NFD, in, expd);
1472         expect(*NFC, in, expc);
1473     }
1474     delete NFD;
1475     delete NFC;
1476
1477     Transliterator* NFKD = Transliterator::createInstance("NFKD", UTRANS_FORWARD, parseError, status);
1478     Transliterator* NFKC = Transliterator::createInstance("NFKC", UTRANS_FORWARD, parseError, status);
1479     if (!NFKD || !NFKC) {
1480         errln("FAIL: createInstance failed");
1481         delete NFKD;
1482         delete NFKC;
1483         return;
1484     }
1485     for (i=0; COMPAT[i]; i+=3) {
1486         UnicodeString in = CharsToUnicodeString(COMPAT[i]);
1487         UnicodeString expkd = CharsToUnicodeString(COMPAT[i+1]);
1488         UnicodeString expkc = CharsToUnicodeString(COMPAT[i+2]);
1489         expect(*NFKD, in, expkd);
1490         expect(*NFKC, in, expkc);
1491     }
1492     delete NFKD;
1493     delete NFKC;
1494
1495     UParseError pe;
1496     status = U_ZERO_ERROR;
1497     Transliterator *t = Transliterator::createInstance("NFD; [x]Remove",
1498                                                        UTRANS_FORWARD,
1499                                                        pe, status);
1500     if (t == 0) {
1501         errln("FAIL: createInstance failed");
1502     }
1503     expect(*t, CharsToUnicodeString("\\u010dx"),
1504            CharsToUnicodeString("c\\u030C"));
1505     delete t;
1506 }
1507
1508 /**
1509  * Test compound RBT rules.
1510  */
1511 void TransliteratorTest::TestCompoundRBT(void) {
1512     // Careful with spacing and ';' here:  Phrase this exactly
1513     // as toRules() is going to return it.  If toRules() changes
1514     // with regard to spacing or ';', then adjust this string.
1515     UnicodeString rule("::Hex-Any;\n"
1516                        "::Any-Lower;\n"
1517                        "a > '.A.';\n"
1518                        "b > '.B.';\n"
1519                        "::[^t]Any-Upper;", "");
1520     UParseError parseError;
1521     UErrorCode status = U_ZERO_ERROR;
1522     Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, parseError, status);
1523     if (t == 0) {
1524         errln("FAIL: createFromRules failed");
1525         return;
1526     }
1527     expect(*t, UNICODE_STRING_SIMPLE("\\u0043at in the hat, bat on the mat"),
1528            "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");
1529     UnicodeString r;
1530     t->toRules(r, TRUE);
1531     if (r == rule) {
1532         logln((UnicodeString)"OK: toRules() => " + r);
1533     } else {
1534         errln((UnicodeString)"FAIL: toRules() => " + r +
1535               ", expected " + rule);
1536     }
1537     delete t;
1538
1539     // Now test toRules
1540     t = Transliterator::createInstance("Greek-Latin; Latin-Cyrillic", UTRANS_FORWARD, parseError, status);
1541     if (t == 0) {
1542         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
1543         return;
1544     }
1545     UnicodeString exp("::Greek-Latin;\n::Latin-Cyrillic;");
1546     t->toRules(r, TRUE);
1547     if (r != exp) {
1548         errln((UnicodeString)"FAIL: toRules() => " + r +
1549               ", expected " + exp);
1550     } else {
1551         logln((UnicodeString)"OK: toRules() => " + r);
1552     }
1553     delete t;
1554
1555     // Round trip the result of toRules
1556     t = Transliterator::createFromRules("Test", r, UTRANS_FORWARD, parseError, status);
1557     if (t == 0) {
1558         errln("FAIL: createFromRules #2 failed");
1559         return;
1560     } else {
1561         logln((UnicodeString)"OK: createFromRules(" + r + ") succeeded");
1562     }
1563
1564     // Test toRules again
1565     t->toRules(r, TRUE);
1566     if (r != exp) {
1567         errln((UnicodeString)"FAIL: toRules() => " + r +
1568               ", expected " + exp);
1569     } else {
1570         logln((UnicodeString)"OK: toRules() => " + r);
1571     }
1572
1573     delete t;
1574
1575     // Test Foo(Bar) IDs.  Careful with spacing in id; make it conform
1576     // to what the regenerated ID will look like.
1577     UnicodeString id("Upper(Lower);(NFKC)", "");
1578     t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
1579     if (t == 0) {
1580         errln("FAIL: createInstance #2 failed");
1581         return;
1582     }
1583     if (t->getID() == id) {
1584         logln((UnicodeString)"OK: created " + id);
1585     } else {
1586         errln((UnicodeString)"FAIL: createInstance(" + id +
1587               ").getID() => " + t->getID());
1588     }
1589
1590     Transliterator *u = t->createInverse(status);
1591     if (u == 0) {
1592         errln("FAIL: createInverse failed");
1593         delete t;
1594         return;
1595     }
1596     exp = "NFKC();Lower(Upper)";
1597     if (u->getID() == exp) {
1598         logln((UnicodeString)"OK: createInverse(" + id + ") => " +
1599               u->getID());
1600     } else {
1601         errln((UnicodeString)"FAIL: createInverse(" + id + ") => " +
1602               u->getID());
1603     }
1604     delete t;
1605     delete u;
1606 }
1607
1608 /**
1609  * Compound filter semantics were orginially not implemented
1610  * correctly.  Originally, each component filter f(i) is replaced by
1611  * f'(i) = f(i) && g, where g is the filter for the compound
1612  * transliterator.
1613  *
1614  * From Mark:
1615  *
1616  * Suppose and I have a transliterator X. Internally X is
1617  * "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A].
1618  *
1619  * The compound should convert all greek characters (through latin) to
1620  * cyrillic, then lowercase the result. The filter should say "don't
1621  * touch 'A' in the original". But because an intermediate result
1622  * happens to go through "A", the Greek Alpha gets hung up.
1623  */
1624 void TransliteratorTest::TestCompoundFilter(void) {
1625     UParseError parseError;
1626     UErrorCode status = U_ZERO_ERROR;
1627     Transliterator *t = Transliterator::createInstance
1628         ("Greek-Latin; Latin-Greek; Lower", UTRANS_FORWARD, parseError, status);
1629     if (t == 0) {
1630         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
1631         return;
1632     }
1633     t->adoptFilter(new UnicodeSet("[^A]", status));
1634     if (U_FAILURE(status)) {
1635         errln("FAIL: UnicodeSet ct failed");
1636         delete t;
1637         return;
1638     }
1639
1640     // Only the 'A' at index 1 should remain unchanged
1641     expect(*t,
1642            CharsToUnicodeString("BA\\u039A\\u0391"),
1643            CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1"));
1644     delete t;
1645 }
1646
1647 void TransliteratorTest::TestRemove(void) {
1648     UParseError parseError;
1649     UErrorCode status = U_ZERO_ERROR;
1650     Transliterator *t = Transliterator::createInstance("Remove[abc]", UTRANS_FORWARD, parseError, status);
1651     if (t == 0) {
1652         errln("FAIL: createInstance failed");
1653         return;
1654     }
1655
1656     expect(*t, "Able bodied baker's cats", "Ale odied ker's ts");
1657
1658     // extra test for RemoveTransliterator::clone(), which at one point wasn't
1659     // duplicating the filter
1660     Transliterator* t2 = t->clone();
1661     expect(*t2, "Able bodied baker's cats", "Ale odied ker's ts");
1662
1663     delete t;
1664     delete t2;
1665 }
1666
1667 void TransliteratorTest::TestToRules(void) {
1668     const char* RBT = "rbt";
1669     const char* SET = "set";
1670     static const char* DATA[] = {
1671         RBT,
1672         "$a=\\u4E61; [$a] > A;",
1673         "[\\u4E61] > A;",
1674
1675         RBT,
1676         "$white=[[:Zs:][:Zl:]]; $white{a} > A;",
1677         "[[:Zs:][:Zl:]]{a} > A;",
1678
1679         SET,
1680         "[[:Zs:][:Zl:]]",
1681         "[[:Zs:][:Zl:]]",
1682
1683         SET,
1684         "[:Ps:]",
1685         "[:Ps:]",
1686
1687         SET,
1688         "[:L:]",
1689         "[:L:]",
1690
1691         SET,
1692         "[[:L:]-[A]]",
1693         "[[:L:]-[A]]",
1694
1695         SET,
1696         "[~[:Lu:][:Ll:]]",
1697         "[~[:Lu:][:Ll:]]",
1698
1699         SET,
1700         "[~[a-z]]",
1701         "[~[a-z]]",
1702
1703         RBT,
1704         "$white=[:Zs:]; $black=[^$white]; $black{a} > A;",
1705         "[^[:Zs:]]{a} > A;",
1706
1707         RBT,
1708         "$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;",
1709         "[[a-z]-[:Zs:]]{a} > A;",
1710
1711         RBT,
1712         "$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;",
1713         "[[:Zs:]&[a-z]]{a} > A;",
1714
1715         RBT,
1716         "$a=[:Zs:]; $b=[x$a]; $b{a} > A;",
1717         "[x[:Zs:]]{a} > A;",
1718
1719         RBT,
1720         "$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"
1721         "$macron = \\u0304 ;"
1722         "$evowel = [aeiouyAEIOUY] ;"
1723         "$iotasub = \\u0345 ;"
1724         "($evowel $macron $accentMinus *) i > | $1 $iotasub ;",
1725         "([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > | $1 \\u0345;",
1726
1727         RBT,
1728         "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1729         "([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > | $1 \\u0345;",
1730     };
1731     static const int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
1732
1733     for (int32_t d=0; d < DATA_length; d+=3) {
1734         if (DATA[d] == RBT) {
1735             // Transliterator test
1736             UParseError parseError;
1737             UErrorCode status = U_ZERO_ERROR;
1738             Transliterator *t = Transliterator::createFromRules("ID",
1739                                                                 UnicodeString(DATA[d+1], -1, US_INV), UTRANS_FORWARD, parseError, status);
1740             if (t == 0) {
1741                 dataerrln("FAIL: createFromRules failed - %s", u_errorName(status));
1742                 return;
1743             }
1744             UnicodeString rules, escapedRules;
1745             t->toRules(rules, FALSE);
1746             t->toRules(escapedRules, TRUE);
1747             UnicodeString expRules = CharsToUnicodeString(DATA[d+2]);
1748             UnicodeString expEscapedRules(DATA[d+2], -1, US_INV);
1749             if (rules == expRules) {
1750                 logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
1751                       " => " + rules);
1752             } else {
1753                 errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
1754                       " => " + rules + ", exp " + expRules);
1755             }
1756             if (escapedRules == expEscapedRules) {
1757                 logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
1758                       " => " + escapedRules);
1759             } else {
1760                 errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
1761                       " => " + escapedRules + ", exp " + expEscapedRules);
1762             }
1763             delete t;
1764
1765         } else {
1766             // UnicodeSet test
1767             UErrorCode status = U_ZERO_ERROR;
1768             UnicodeString pat(DATA[d+1], -1, US_INV);
1769             UnicodeString expToPat(DATA[d+2], -1, US_INV);
1770             UnicodeSet set(pat, status);
1771             if (U_FAILURE(status)) {
1772                 errln("FAIL: UnicodeSet ct failed");
1773                 return;
1774             }
1775             // Adjust spacing etc. as necessary.
1776             UnicodeString toPat;
1777             set.toPattern(toPat);
1778             if (expToPat == toPat) {
1779                 logln((UnicodeString)"Ok: " + pat +
1780                       " => " + toPat);
1781             } else {
1782                 errln((UnicodeString)"FAIL: " + pat +
1783                       " => " + prettify(toPat, TRUE) +
1784                       ", exp " + prettify(pat, TRUE));
1785             }
1786         }
1787     }
1788 }
1789
1790 void TransliteratorTest::TestContext() {
1791     UTransPosition pos = {0, 2, 0, 1}; // cs cl s l
1792     expect("de > x; {d}e > y;",
1793            "de",
1794            "ye",
1795            &pos);
1796
1797     expect("ab{c} > z;",
1798            "xadabdabcy",
1799            "xadabdabzy");
1800 }
1801
1802 void TransliteratorTest::TestSupplemental() {
1803
1804     expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];"
1805                                 "a > $a; $s > i;"),
1806            CharsToUnicodeString("ab\\U0001030Fx"),
1807            CharsToUnicodeString("\\U00010300bix"));
1808
1809     expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];"
1810                                 "$b=[A-Z\\U00010400-\\U0001044D];"
1811                                 "($a)($b) > $2 $1;"),
1812            CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"),
1813            CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301"));
1814
1815     // k|ax\\U00010300xm
1816
1817     // k|a\\U00010400\\U00010300xm
1818     // ky|\\U00010400\\U00010300xm
1819     // ky\\U00010400|\\U00010300xm
1820
1821     // ky\\U00010400|\\U00010300\\U00010400m
1822     // ky\\U00010400y|\\U00010400m
1823     expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];"
1824                                 "$a {x} > | @ \\U00010400;"
1825                                 "{$a} [^\\u0000-\\uFFFF] > y;"),
1826            CharsToUnicodeString("kax\\U00010300xm"),
1827            CharsToUnicodeString("ky\\U00010400y\\U00010400m"));
1828
1829     expectT("Any-Name",
1830            CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"),
1831            UNICODE_STRING_SIMPLE("\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}"));
1832
1833     expectT("Any-Hex/Unicode",
1834            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1835            UNICODE_STRING_SIMPLE("U+10330U+10FF00U+E0061U+00A0"));
1836
1837     expectT("Any-Hex/C",
1838            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1839            UNICODE_STRING_SIMPLE("\\U00010330\\U0010FF00\\U000E0061\\u00A0"));
1840
1841     expectT("Any-Hex/Perl",
1842            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1843            UNICODE_STRING_SIMPLE("\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}"));
1844
1845     expectT("Any-Hex/Java",
1846            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1847            UNICODE_STRING_SIMPLE("\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0"));
1848
1849     expectT("Any-Hex/XML",
1850            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1851            "&#x10330;&#x10FF00;&#xE0061;&#xA0;");
1852
1853     expectT("Any-Hex/XML10",
1854            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1855            "&#66352;&#1113856;&#917601;&#160;");
1856
1857     expectT(UNICODE_STRING_SIMPLE("[\\U000E0000-\\U000E0FFF] Remove"),
1858            CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
1859            CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0"));
1860 }
1861
1862 void TransliteratorTest::TestQuantifier() {
1863
1864     // Make sure @ in a quantified anteContext works
1865     expect("a+ {b} > | @@ c; A > a; (a+ c) > '(' $1 ')';",
1866            "AAAAAb",
1867            "aaa(aac)");
1868
1869     // Make sure @ in a quantified postContext works
1870     expect("{b} a+ > c @@ |; (a+) > '(' $1 ')';",
1871            "baaaaa",
1872            "caa(aaa)");
1873
1874     // Make sure @ in a quantified postContext with seg ref works
1875     expect("{(b)} a+ > $1 @@ |; (a+) > '(' $1 ')';",
1876            "baaaaa",
1877            "baa(aaa)");
1878
1879     // Make sure @ past ante context doesn't enter ante context
1880     UTransPosition pos = {0, 5, 3, 5};
1881     expect("a+ {b} > | @@ c; x > y; (a+ c) > '(' $1 ')';",
1882            "xxxab",
1883            "xxx(ac)",
1884            &pos);
1885
1886     // Make sure @ past post context doesn't pass limit
1887     UTransPosition pos2 = {0, 4, 0, 2};
1888     expect("{b} a+ > c @@ |; x > y; a > A;",
1889            "baxx",
1890            "caxx",
1891            &pos2);
1892
1893     // Make sure @ past post context doesn't enter post context
1894     expect("{b} a+ > c @@ |; x > y; a > A;",
1895            "baxx",
1896            "cayy");
1897
1898     expect("(ab)? c > d;",
1899            "c abc ababc",
1900            "d d abd");
1901
1902     // NOTE: The (ab)+ when referenced just yields a single "ab",
1903     // not the full sequence of them.  This accords with perl behavior.
1904     expect("(ab)+ {x} > '(' $1 ')';",
1905            "x abx ababxy",
1906            "x ab(ab) abab(ab)y");
1907
1908     expect("b+ > x;",
1909            "ac abc abbc abbbc",
1910            "ac axc axc axc");
1911
1912     expect("[abc]+ > x;",
1913            "qac abrc abbcs abtbbc",
1914            "qx xrx xs xtx");
1915
1916     expect("q{(ab)+} > x;",
1917            "qa qab qaba qababc qaba",
1918            "qa qx qxa qxc qxa");
1919
1920     expect("q(ab)* > x;",
1921            "qa qab qaba qababc",
1922            "xa x xa xc");
1923
1924     // NOTE: The (ab)+ when referenced just yields a single "ab",
1925     // not the full sequence of them.  This accords with perl behavior.
1926     expect("q(ab)* > '(' $1 ')';",
1927            "qa qab qaba qababc",
1928            "()a (ab) (ab)a (ab)c");
1929
1930     // 'foo'+ and 'foo'* -- the quantifier should apply to the entire
1931     // quoted string
1932     expect("'ab'+ > x;",
1933            "bb ab ababb",
1934            "bb x xb");
1935
1936     // $foo+ and $foo* -- the quantifier should apply to the entire
1937     // variable reference
1938     expect("$var = ab; $var+ > x;",
1939            "bb ab ababb",
1940            "bb x xb");
1941 }
1942
1943 class TestTrans : public Transliterator {
1944 public:
1945     TestTrans(const UnicodeString& id) : Transliterator(id, 0) {
1946     }
1947     virtual Transliterator* clone(void) const {
1948         return new TestTrans(getID());
1949     }
1950     virtual void handleTransliterate(Replaceable& /*text*/, UTransPosition& offsets,
1951         UBool /*isIncremental*/) const
1952     {
1953         offsets.start = offsets.limit;
1954     }
1955     virtual UClassID getDynamicClassID() const;
1956     static UClassID U_EXPORT2 getStaticClassID();
1957 };
1958 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TestTrans)
1959
1960 /**
1961  * Test Source-Target/Variant.
1962  */
1963 void TransliteratorTest::TestSTV(void) {
1964     int32_t ns = Transliterator::countAvailableSources();
1965     if (ns < 0 || ns > 255) {
1966         errln((UnicodeString)"FAIL: Bad source count: " + ns);
1967         return;
1968     }
1969     int32_t i, j;
1970     for (i=0; i<ns; ++i) {
1971         UnicodeString source;
1972         Transliterator::getAvailableSource(i, source);
1973         logln((UnicodeString)"" + i + ": " + source);
1974         if (source.length() == 0) {
1975             errln("FAIL: empty source");
1976             continue;
1977         }
1978         int32_t nt = Transliterator::countAvailableTargets(source);
1979         if (nt < 0 || nt > 255) {
1980             errln((UnicodeString)"FAIL: Bad target count: " + nt);
1981             continue;
1982         }
1983         for (int32_t j=0; j<nt; ++j) {
1984             UnicodeString target;
1985             Transliterator::getAvailableTarget(j, source, target);
1986             logln((UnicodeString)" " + j + ": " + target);
1987             if (target.length() == 0) {
1988                 errln("FAIL: empty target");
1989                 continue;
1990             }
1991             int32_t nv = Transliterator::countAvailableVariants(source, target);
1992             if (nv < 0 || nv > 255) {
1993                 errln((UnicodeString)"FAIL: Bad variant count: " + nv);
1994                 continue;
1995             }
1996             for (int32_t k=0; k<nv; ++k) {
1997                 UnicodeString variant;
1998                 Transliterator::getAvailableVariant(k, source, target, variant);
1999                 if (variant.length() == 0) {
2000                     logln((UnicodeString)"  " + k + ": <empty>");
2001                 } else {
2002                     logln((UnicodeString)"  " + k + ": " + variant);
2003                 }
2004             }
2005         }
2006     }
2007
2008     // Test registration
2009     const char* IDS[] = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2010     const char* FULL_IDS[] = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
2011     const char* SOURCES[] = { NULL, "Seoridf", "Oewoir" };
2012     for (i=0; i<3; ++i) {
2013         Transliterator *t = new TestTrans(IDS[i]);
2014         if (t == 0) {
2015             errln("FAIL: out of memory");
2016             return;
2017         }
2018         if (t->getID() != IDS[i]) {
2019             errln((UnicodeString)"FAIL: ID mismatch for " + IDS[i]);
2020             delete t;
2021             return;
2022         }
2023         Transliterator::registerInstance(t);
2024         UErrorCode status = U_ZERO_ERROR;
2025         t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
2026         if (t == NULL) {
2027             errln((UnicodeString)"FAIL: Registration/creation failed for ID " +
2028                   IDS[i]);
2029         } else {
2030             logln((UnicodeString)"Ok: Registration/creation succeeded for ID " +
2031                   IDS[i]);
2032             delete t;
2033         }
2034         Transliterator::unregister(IDS[i]);
2035         t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
2036         if (t != NULL) {
2037             errln((UnicodeString)"FAIL: Unregistration failed for ID " +
2038                   IDS[i]);
2039             delete t;
2040         }
2041     }
2042
2043     // Make sure getAvailable API reflects removal
2044     int32_t n = Transliterator::countAvailableIDs();
2045     for (i=0; i<n; ++i) {
2046         UnicodeString id = Transliterator::getAvailableID(i);
2047         for (j=0; j<3; ++j) {
2048             if (id.caseCompare(FULL_IDS[j],0)==0) {
2049                 errln((UnicodeString)"FAIL: unregister(" + id + ") failed");
2050             }
2051         }
2052     }
2053     n = Transliterator::countAvailableTargets("Any");
2054     for (i=0; i<n; ++i) {
2055         UnicodeString t;
2056         Transliterator::getAvailableTarget(i, "Any", t);
2057         if (t.caseCompare(IDS[0],0)==0) {
2058             errln((UnicodeString)"FAIL: unregister(Any-" + t + ") failed");
2059         }
2060     }
2061     n = Transliterator::countAvailableSources();
2062     for (i=0; i<n; ++i) {
2063         UnicodeString s;
2064         Transliterator::getAvailableSource(i, s);
2065         for (j=0; j<3; ++j) {
2066             if (SOURCES[j] == NULL) continue;
2067             if (s.caseCompare(SOURCES[j],0)==0) {
2068                 errln((UnicodeString)"FAIL: unregister(" + s + "-*) failed");
2069             }
2070         }
2071     }
2072 }
2073
2074 /**
2075  * Test inverse of Greek-Latin; Title()
2076  */
2077 void TransliteratorTest::TestCompoundInverse(void) {
2078     UParseError parseError;
2079     UErrorCode status = U_ZERO_ERROR;
2080     Transliterator *t = Transliterator::createInstance
2081         ("Greek-Latin; Title()", UTRANS_REVERSE,parseError, status);
2082     if (t == 0) {
2083         dataerrln("FAIL: createInstance - %s", u_errorName(status));
2084         return;
2085     }
2086     UnicodeString exp("(Title);Latin-Greek");
2087     if (t->getID() == exp) {
2088         logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" +
2089               t->getID());
2090     } else {
2091         errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" +
2092               t->getID() + "\", expected \"" + exp + "\"");
2093     }
2094     delete t;
2095 }
2096
2097 /**
2098  * Test NFD chaining with RBT
2099  */
2100 void TransliteratorTest::TestNFDChainRBT() {
2101     UParseError pe;
2102     UErrorCode ec = U_ZERO_ERROR;
2103     Transliterator* t = Transliterator::createFromRules(
2104                                "TEST", "::NFD; aa > Q; a > q;",
2105                                UTRANS_FORWARD, pe, ec);
2106     if (t == NULL || U_FAILURE(ec)) {
2107         dataerrln("FAIL: Transliterator::createFromRules failed with %s", u_errorName(ec));
2108         return;
2109     }
2110     expect(*t, "aa", "Q");
2111     delete t;
2112
2113     // TEMPORARY TESTS -- BEING DEBUGGED
2114 //=-    UnicodeString s, s2;
2115 //=-    t = Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, pe, ec);
2116 //=-    s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2117 //=-    s2 = CharsToUnicodeString("\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D");
2118 //=-    expect(*t, s, s2);
2119 //=-    delete t;
2120 //=-
2121 //=-    t = Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2122 //=-    expect(*t, s2, s);
2123 //=-    delete t;
2124 //=-
2125 //=-    t = Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, pe, ec);
2126 //=-    s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
2127 //=-    expect(*t, s, s);
2128 //=-    delete t;
2129
2130 //    const char* source[] = {
2131 //        /*
2132 //        "\\u015Br\\u012Bmad",
2133 //        "bhagavadg\\u012Bt\\u0101",
2134 //        "adhy\\u0101ya",
2135 //        "arjuna",
2136 //        "vi\\u1E63\\u0101da",
2137 //        "y\\u014Dga",
2138 //        "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2139 //        "uv\\u0101cr\\u0325",
2140 //        */
2141 //        "rmk\\u1E63\\u0113t",
2142 //      //"dharmak\\u1E63\\u0113tr\\u0113",
2143 //        /*
2144 //        "kuruk\\u1E63\\u0113tr\\u0113",
2145 //        "samav\\u0113t\\u0101",
2146 //        "yuyutsava-\\u1E25",
2147 //        "m\\u0101mak\\u0101-\\u1E25",
2148 //     // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2149 //        "kimakurvata",
2150 //        "san\\u0304java",
2151 //        */
2152 //
2153 //        0
2154 //    };
2155 //    const char* expected[] = {
2156 //        /*
2157 //        "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2158 //        "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2159 //        "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2160 //        "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2161 //        "\\u0935\\u093f\\u0937\\u093e\\u0926",
2162 //        "\\u092f\\u094b\\u0917",
2163 //        "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2164 //        "\\u0909\\u0935\\u093E\\u091A\\u0943",
2165 //        */
2166 //        "\\u0927",
2167 //        //"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2168 //        /*
2169 //        "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2170 //        "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2171 //        "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2172 //        "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2173 //    //  "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2174 //        "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2175 //        "\\u0938\\u0902\\u091c\\u0935",
2176 //        */
2177 //        0
2178 //    };
2179 //    UErrorCode status = U_ZERO_ERROR;
2180 //    UParseError parseError;
2181 //    UnicodeString message;
2182 //    Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2183 //    Transliterator* devToLatinToDev=Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2184 //    if(U_FAILURE(status)){
2185 //        errln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2186 //        errln("PreContext: " + prettify(parseError.preContext) + "PostContext: " + prettify( parseError.postContext) );
2187 //        delete latinToDevToLatin;
2188 //        delete devToLatinToDev;
2189 //        return;
2190 //    }
2191 //    UnicodeString gotResult;
2192 //    for(int i= 0; source[i] != 0; i++){
2193 //        gotResult = source[i];
2194 //        expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2195 //        expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2196 //    }
2197 //    delete latinToDevToLatin;
2198 //    delete devToLatinToDev;
2199 }
2200
2201 /**
2202  * Inverse of "Null" should be "Null". (J21)
2203  */
2204 void TransliteratorTest::TestNullInverse() {
2205     UParseError pe;
2206     UErrorCode ec = U_ZERO_ERROR;
2207     Transliterator *t = Transliterator::createInstance("Null", UTRANS_FORWARD, pe, ec);
2208     if (t == 0 || U_FAILURE(ec)) {
2209         errln("FAIL: createInstance");
2210         return;
2211     }
2212     Transliterator *u = t->createInverse(ec);
2213     if (u == 0 || U_FAILURE(ec)) {
2214         errln("FAIL: createInverse");
2215         delete t;
2216         return;
2217     }
2218     if (u->getID() != "Null") {
2219         errln("FAIL: Inverse of Null should be Null");
2220     }
2221     delete t;
2222     delete u;
2223 }
2224
2225 /**
2226  * Check ID of inverse of alias. (J22)
2227  */
2228 void TransliteratorTest::TestAliasInverseID() {
2229     UnicodeString ID("Latin-Hangul", ""); // This should be any alias ID with an inverse
2230     UParseError pe;
2231     UErrorCode ec = U_ZERO_ERROR;
2232     Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
2233     if (t == 0 || U_FAILURE(ec)) {
2234         dataerrln("FAIL: createInstance - %s", u_errorName(ec));
2235         return;
2236     }
2237     Transliterator *u = t->createInverse(ec);
2238     if (u == 0 || U_FAILURE(ec)) {
2239         errln("FAIL: createInverse");
2240         delete t;
2241         return;
2242     }
2243     UnicodeString exp = "Hangul-Latin";
2244     UnicodeString got = u->getID();
2245     if (got != exp) {
2246         errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
2247               ", expected " + exp);
2248     }
2249     delete t;
2250     delete u;
2251 }
2252
2253 /**
2254  * Test IDs of inverses of compound transliterators. (J20)
2255  */
2256 void TransliteratorTest::TestCompoundInverseID() {
2257     UnicodeString ID = "Latin-Jamo;NFC(NFD)";
2258     UParseError pe;
2259     UErrorCode ec = U_ZERO_ERROR;
2260     Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
2261     if (t == 0 || U_FAILURE(ec)) {
2262         dataerrln("FAIL: createInstance - %s", u_errorName(ec));
2263         return;
2264     }
2265     Transliterator *u = t->createInverse(ec);
2266     if (u == 0 || U_FAILURE(ec)) {
2267         errln("FAIL: createInverse");
2268         delete t;
2269         return;
2270     }
2271     UnicodeString exp = "NFD(NFC);Jamo-Latin";
2272     UnicodeString got = u->getID();
2273     if (got != exp) {
2274         errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
2275               ", expected " + exp);
2276     }
2277     delete t;
2278     delete u;
2279 }
2280
2281 /**
2282  * Test undefined variable.
2283
2284  */
2285 void TransliteratorTest::TestUndefinedVariable() {
2286     UnicodeString rule = "$initial } a <> \\u1161;";
2287     UParseError pe;
2288     UErrorCode ec = U_ZERO_ERROR;
2289     Transliterator *t = Transliterator::createFromRules("<ID>", rule, UTRANS_FORWARD, pe, ec);
2290     delete t;
2291     if (U_FAILURE(ec)) {
2292         logln((UnicodeString)"OK: Got exception for " + rule + ", as expected: " +
2293               u_errorName(ec));
2294         return;
2295     }
2296     errln((UnicodeString)"Fail: bogus rule " + rule + " compiled with error " +
2297           u_errorName(ec));
2298 }
2299
2300 /**
2301  * Test empty context.
2302  */
2303 void TransliteratorTest::TestEmptyContext() {
2304     expect(" { a } > b;", "xay a ", "xby b ");
2305 }
2306
2307 /**
2308 * Test compound filter ID syntax
2309 */
2310 void TransliteratorTest::TestCompoundFilterID(void) {
2311     static const char* DATA[] = {
2312         // Col. 1 = ID or rule set (latter must start with #)
2313
2314         // = columns > 1 are null if expect col. 1 to be illegal =
2315
2316         // Col. 2 = direction, "F..." or "R..."
2317         // Col. 3 = source string
2318         // Col. 4 = exp result
2319
2320         "[abc]; [abc]", NULL, NULL, NULL, // multiple filters
2321         "Latin-Greek; [abc];", NULL, NULL, NULL, // misplaced filter
2322         "[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\\u0392c",
2323         "[b]; (Lower); Latin-Greek; Upper(); ([\\u0392])", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2324         "#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\\u0392c",
2325         "#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\\u0392]);", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
2326         NULL,
2327     };
2328
2329     for (int32_t i=0; DATA[i]; i+=4) {
2330         UnicodeString id = CharsToUnicodeString(DATA[i]);
2331         UTransDirection direction = (DATA[i+1] != NULL && DATA[i+1][0] == 'R') ?
2332             UTRANS_REVERSE : UTRANS_FORWARD;
2333         UnicodeString source;
2334         UnicodeString exp;
2335         if (DATA[i+2] != NULL) {
2336             source = CharsToUnicodeString(DATA[i+2]);
2337             exp = CharsToUnicodeString(DATA[i+3]);
2338         }
2339         UBool expOk = (DATA[i+1] != NULL);
2340         Transliterator* t = NULL;
2341         UParseError pe;
2342         UErrorCode ec = U_ZERO_ERROR;
2343         if (id.charAt(0) == 0x23/*#*/) {
2344             t = Transliterator::createFromRules("ID", id, direction, pe, ec);
2345         } else {
2346             t = Transliterator::createInstance(id, direction, pe, ec);
2347         }
2348         UBool ok = (t != NULL && U_SUCCESS(ec));
2349         UnicodeString transID;
2350         if (t!=0) {
2351             transID = t->getID();
2352         }
2353         else {
2354             transID = UnicodeString("NULL", "");
2355         }
2356         if (ok == expOk) {
2357             logln((UnicodeString)"Ok: " + id + " => " + transID + ", " +
2358                   u_errorName(ec));
2359             if (source.length() != 0) {
2360                 expect(*t, source, exp);
2361             }
2362             delete t;
2363         } else {
2364             dataerrln((UnicodeString)"FAIL: " + id + " => " + transID + ", " +
2365                   u_errorName(ec));
2366         }
2367     }
2368 }
2369
2370 /**
2371  * Test new property set syntax
2372  */
2373 void TransliteratorTest::TestPropertySet() {
2374     expect(UNICODE_STRING_SIMPLE("a>A; \\p{Lu}>x; \\p{ANY}>y;"), "abcDEF", "Ayyxxx");
2375     expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9",
2376            "[ a stitch ]\n[ in time ]\r[ saves 9]");
2377 }
2378
2379 /**
2380  * Test various failure points of the new 2.0 engine.
2381  */
2382 void TransliteratorTest::TestNewEngine() {
2383     UParseError pe;
2384     UErrorCode ec = U_ZERO_ERROR;
2385     Transliterator *t = Transliterator::createInstance("Latin-Hiragana", UTRANS_FORWARD, pe, ec);
2386     if (t == 0 || U_FAILURE(ec)) {
2387         dataerrln("FAIL: createInstance Latin-Hiragana - %s", u_errorName(ec));
2388         return;
2389     }
2390     // Katakana should be untouched
2391     expect(*t, CharsToUnicodeString("a\\u3042\\u30A2"),
2392            CharsToUnicodeString("\\u3042\\u3042\\u30A2"));
2393
2394     delete t;
2395
2396 #if 1
2397     // This test will only work if Transliterator.ROLLBACK is
2398     // true.  Otherwise, this test will fail, revealing a
2399     // limitation of global filters in incremental mode.
2400     Transliterator *a =
2401         Transliterator::createFromRules("a_to_A", "a > A;", UTRANS_FORWARD, pe, ec);
2402     Transliterator *A =
2403         Transliterator::createFromRules("A_to_b", "A > b;", UTRANS_FORWARD, pe, ec);
2404     if (U_FAILURE(ec)) {
2405         delete a;
2406         delete A;
2407         return;
2408     }
2409
2410     Transliterator* array[3];
2411     array[0] = a;
2412     array[1] = Transliterator::createInstance("NFD", UTRANS_FORWARD, pe, ec);
2413     array[2] = A;
2414     if (U_FAILURE(ec)) {
2415         errln("FAIL: createInstance NFD");
2416         delete a;
2417         delete A;
2418         delete array[1];
2419         return;
2420     }
2421
2422     t = new CompoundTransliterator(array, 3, new UnicodeSet("[:Ll:]", ec));
2423     if (U_FAILURE(ec)) {
2424         errln("FAIL: UnicodeSet constructor");
2425         delete a;
2426         delete A;
2427         delete array[1];
2428         delete t;
2429         return;
2430     }
2431
2432     expect(*t, "aAaA", "bAbA");
2433
2434     assertTrue("countElements", t->countElements() == 3);
2435     assertEquals("getElement(0)", t->getElement(0, ec).getID(), "a_to_A");
2436     assertEquals("getElement(1)", t->getElement(1, ec).getID(), "NFD");
2437     assertEquals("getElement(2)", t->getElement(2, ec).getID(), "A_to_b");
2438     assertSuccess("getElement", ec);
2439
2440     delete a;
2441     delete A;
2442     delete array[1];
2443     delete t;
2444 #endif
2445
2446     expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > | $1 $smooth ;",
2447            "a",
2448            "ax");
2449
2450     UnicodeString gr = CharsToUnicodeString(
2451         "$ddot = \\u0308 ;"
2452         "$lcgvowel = [\\u03b1\\u03b5\\u03b7\\u03b9\\u03bf\\u03c5\\u03c9] ;"
2453         "$rough = \\u0314 ;"
2454         "($lcgvowel+ $ddot?) $rough > h | $1 ;"
2455         "\\u03b1 <> a ;"
2456         "$rough <> h ;");
2457
2458     expect(gr, CharsToUnicodeString("\\u03B1\\u0314"), "ha");
2459 }
2460
2461 /**
2462  * Test quantified segment behavior.  We want:
2463  * ([abc])+ > x $1 x; applied to "cba" produces "xax"
2464  */
2465 void TransliteratorTest::TestQuantifiedSegment(void) {
2466     // The normal case
2467     expect("([abc]+) > x $1 x;", "cba", "xcbax");
2468
2469     // The tricky case; the quantifier is around the segment
2470     expect("([abc])+ > x $1 x;", "cba", "xax");
2471
2472     // Tricky case in reverse direction
2473     expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax");
2474
2475     // Check post-context segment
2476     expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba");
2477
2478     // Test toRule/toPattern for non-quantified segment.
2479     // Careful with spacing here.
2480     UnicodeString r("([a-c]){q} > x $1 x;");
2481     UParseError pe;
2482     UErrorCode ec = U_ZERO_ERROR;
2483     Transliterator* t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
2484     if (U_FAILURE(ec)) {
2485         errln("FAIL: createFromRules");
2486         delete t;
2487         return;
2488     }
2489     UnicodeString rr;
2490     t->toRules(rr, TRUE);
2491     if (r != rr) {
2492         errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
2493     } else {
2494         logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
2495     }
2496     delete t;
2497
2498     // Test toRule/toPattern for quantified segment.
2499     // Careful with spacing here.
2500     r = "([a-c])+{q} > x $1 x;";
2501     t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
2502     if (U_FAILURE(ec)) {
2503         errln("FAIL: createFromRules");
2504         delete t;
2505         return;
2506     }
2507     t->toRules(rr, TRUE);
2508     if (r != rr) {
2509         errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
2510     } else {
2511         logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
2512     }
2513     delete t;
2514 }
2515
2516 //======================================================================
2517 // Ram's tests
2518 //======================================================================
2519 void TransliteratorTest::TestDevanagariLatinRT(){
2520     const int MAX_LEN= 52;
2521     const char* const source[MAX_LEN] = {
2522         "bh\\u0101rata",
2523         "kra",
2524         "k\\u1E63a",
2525         "khra",
2526         "gra",
2527         "\\u1E45ra",
2528         "cra",
2529         "chra",
2530         "j\\u00F1a",
2531         "jhra",
2532         "\\u00F1ra",
2533         "\\u1E6Dya",
2534         "\\u1E6Dhra",
2535         "\\u1E0Dya",
2536       //"r\\u0323ya", // \u095c is not valid in Devanagari
2537         "\\u1E0Dhya",
2538         "\\u1E5Bhra",
2539         "\\u1E47ra",
2540         "tta",
2541         "thra",
2542         "dda",
2543         "dhra",
2544         "nna",
2545         "pra",
2546         "phra",
2547         "bra",
2548         "bhra",
2549         "mra",
2550         "\\u1E49ra",
2551       //"l\\u0331ra",
2552         "yra",
2553         "\\u1E8Fra",
2554       //"l-",
2555         "vra",
2556         "\\u015Bra",
2557         "\\u1E63ra",
2558         "sra",
2559         "hma",
2560         "\\u1E6D\\u1E6Da",
2561         "\\u1E6D\\u1E6Dha",
2562         "\\u1E6Dh\\u1E6Dha",
2563         "\\u1E0D\\u1E0Da",
2564         "\\u1E0D\\u1E0Dha",
2565         "\\u1E6Dya",
2566         "\\u1E6Dhya",
2567         "\\u1E0Dya",
2568         "\\u1E0Dhya",
2569         // Not roundtrippable --
2570         // \\u0939\\u094d\\u094d\\u092E  - hma
2571         // \\u0939\\u094d\\u092E         - hma
2572         // CharsToUnicodeString("hma"),
2573         "hya",
2574         "\\u015Br\\u0325",
2575         "\\u015Bca",
2576         "\\u0115",
2577         "san\\u0304j\\u012Bb s\\u0113nagupta",
2578         "\\u0101nand vaddir\\u0101ju",
2579         "\\u0101",
2580         "a"
2581     };
2582     const char* const expected[MAX_LEN] = {
2583         "\\u092D\\u093E\\u0930\\u0924",   /* bha\\u0304rata */
2584         "\\u0915\\u094D\\u0930",          /* kra         */
2585         "\\u0915\\u094D\\u0937",          /* ks\\u0323a  */
2586         "\\u0916\\u094D\\u0930",          /* khra        */
2587         "\\u0917\\u094D\\u0930",          /* gra         */
2588         "\\u0919\\u094D\\u0930",          /* n\\u0307ra  */
2589         "\\u091A\\u094D\\u0930",          /* cra         */
2590         "\\u091B\\u094D\\u0930",          /* chra        */
2591         "\\u091C\\u094D\\u091E",          /* jn\\u0303a  */
2592         "\\u091D\\u094D\\u0930",          /* jhra        */
2593         "\\u091E\\u094D\\u0930",          /* n\\u0303ra  */
2594         "\\u091F\\u094D\\u092F",          /* t\\u0323ya  */
2595         "\\u0920\\u094D\\u0930",          /* t\\u0323hra */
2596         "\\u0921\\u094D\\u092F",          /* d\\u0323ya  */
2597       //"\\u095C\\u094D\\u092F",        /* r\\u0323ya  */ // \u095c is not valid in Devanagari
2598         "\\u0922\\u094D\\u092F",          /* d\\u0323hya */
2599         "\\u0922\\u093C\\u094D\\u0930",   /* r\\u0323hra */
2600         "\\u0923\\u094D\\u0930",          /* n\\u0323ra  */
2601         "\\u0924\\u094D\\u0924",          /* tta         */
2602         "\\u0925\\u094D\\u0930",          /* thra        */
2603         "\\u0926\\u094D\\u0926",          /* dda         */
2604         "\\u0927\\u094D\\u0930",          /* dhra        */
2605         "\\u0928\\u094D\\u0928",          /* nna         */
2606         "\\u092A\\u094D\\u0930",          /* pra         */
2607         "\\u092B\\u094D\\u0930",          /* phra        */
2608         "\\u092C\\u094D\\u0930",          /* bra         */
2609         "\\u092D\\u094D\\u0930",          /* bhra        */
2610         "\\u092E\\u094D\\u0930",          /* mra         */
2611         "\\u0929\\u094D\\u0930",          /* n\\u0331ra  */
2612       //"\\u0934\\u094D\\u0930",        /* l\\u0331ra  */
2613         "\\u092F\\u094D\\u0930",          /* yra         */
2614         "\\u092F\\u093C\\u094D\\u0930",   /* y\\u0307ra  */
2615       //"l-",
2616         "\\u0935\\u094D\\u0930",          /* vra         */
2617         "\\u0936\\u094D\\u0930",          /* s\\u0301ra  */
2618         "\\u0937\\u094D\\u0930",          /* s\\u0323ra  */
2619         "\\u0938\\u094D\\u0930",          /* sra         */
2620         "\\u0939\\u094d\\u092E",          /* hma         */
2621         "\\u091F\\u094D\\u091F",          /* t\\u0323t\\u0323a  */
2622         "\\u091F\\u094D\\u0920",          /* t\\u0323t\\u0323ha */
2623         "\\u0920\\u094D\\u0920",          /* t\\u0323ht\\u0323ha*/
2624         "\\u0921\\u094D\\u0921",          /* d\\u0323d\\u0323a  */
2625         "\\u0921\\u094D\\u0922",          /* d\\u0323d\\u0323ha */
2626         "\\u091F\\u094D\\u092F",          /* t\\u0323ya  */
2627         "\\u0920\\u094D\\u092F",          /* t\\u0323hya */
2628         "\\u0921\\u094D\\u092F",          /* d\\u0323ya  */
2629         "\\u0922\\u094D\\u092F",          /* d\\u0323hya */
2630      // "hma",                         /* hma         */
2631         "\\u0939\\u094D\\u092F",          /* hya         */
2632         "\\u0936\\u0943",                 /* s\\u0301r\\u0325a  */
2633         "\\u0936\\u094D\\u091A",          /* s\\u0301ca  */
2634         "\\u090d",                        /* e\\u0306    */
2635         "\\u0938\\u0902\\u091C\\u0940\\u092C\\u094D \\u0938\\u0947\\u0928\\u0917\\u0941\\u092A\\u094D\\u0924",
2636         "\\u0906\\u0928\\u0902\\u0926\\u094D \\u0935\\u0926\\u094D\\u0926\\u093F\\u0930\\u093E\\u091C\\u0941",
2637         "\\u0906",
2638         "\\u0905",
2639     };
2640     UErrorCode status = U_ZERO_ERROR;
2641     UParseError parseError;
2642     UnicodeString message;
2643     Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2644     Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2645     if(U_FAILURE(status)){
2646         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2647         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2648         return;
2649     }
2650     UnicodeString gotResult;
2651     for(int i= 0; i<MAX_LEN; i++){
2652         gotResult = source[i];
2653         expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2654         expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2655     }
2656     delete latinToDev;
2657     delete devToLatin;
2658 }
2659
2660 void TransliteratorTest::TestTeluguLatinRT(){
2661     const int MAX_LEN=10;
2662     const char* const source[MAX_LEN] = {
2663         "raghur\\u0101m vi\\u015Bvan\\u0101dha",                         /* Raghuram Viswanadha    */
2664         "\\u0101nand vaddir\\u0101ju",                                   /* Anand Vaddiraju        */
2665         "r\\u0101j\\u012Bv ka\\u015Barab\\u0101da",                      /* Rajeev Kasarabada      */
2666         "san\\u0304j\\u012Bv ka\\u015Barab\\u0101da",                    /* sanjeev kasarabada     */
2667         "san\\u0304j\\u012Bb sen'gupta",                                 /* sanjib sengupata       */
2668         "amar\\u0113ndra hanum\\u0101nula",                              /* Amarendra hanumanula   */
2669         "ravi kum\\u0101r vi\\u015Bvan\\u0101dha",                       /* Ravi Kumar Viswanadha  */
2670         "\\u0101ditya kandr\\u0113gula",                                 /* Aditya Kandregula      */
2671         "\\u015Br\\u012Bdhar ka\\u1E47\\u1E6Dama\\u015Be\\u1E6D\\u1E6Di",/* Shridhar Kantamsetty   */
2672         "m\\u0101dhav de\\u015Be\\u1E6D\\u1E6Di"                         /* Madhav Desetty         */
2673     };
2674
2675     const char* const expected[MAX_LEN] = {
2676         "\\u0c30\\u0c18\\u0c41\\u0c30\\u0c3e\\u0c2e\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2677         "\\u0c06\\u0c28\\u0c02\\u0c26\\u0c4d \\u0C35\\u0C26\\u0C4D\\u0C26\\u0C3F\\u0C30\\u0C3E\\u0C1C\\u0C41",
2678         "\\u0c30\\u0c3e\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2679         "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
2680         "\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c2c\\u0c4d \\u0c38\\u0c46\\u0c28\\u0c4d\\u0c17\\u0c41\\u0c2a\\u0c4d\\u0c24",
2681         "\\u0c05\\u0c2e\\u0c30\\u0c47\\u0c02\\u0c26\\u0c4d\\u0c30 \\u0c39\\u0c28\\u0c41\\u0c2e\\u0c3e\\u0c28\\u0c41\\u0c32",
2682         "\\u0c30\\u0c35\\u0c3f \\u0c15\\u0c41\\u0c2e\\u0c3e\\u0c30\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
2683         "\\u0c06\\u0c26\\u0c3f\\u0c24\\u0c4d\\u0c2f \\u0C15\\u0C02\\u0C26\\u0C4D\\u0C30\\u0C47\\u0C17\\u0C41\\u0c32",
2684         "\\u0c36\\u0c4d\\u0c30\\u0c40\\u0C27\\u0C30\\u0C4D \\u0c15\\u0c02\\u0c1f\\u0c2e\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2685         "\\u0c2e\\u0c3e\\u0c27\\u0c35\\u0c4d \\u0c26\\u0c46\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
2686     };
2687
2688     UErrorCode status = U_ZERO_ERROR;
2689     UParseError parseError;
2690     UnicodeString message;
2691     Transliterator* latinToDev=Transliterator::createInstance("Latin-Telugu", UTRANS_FORWARD, parseError, status);
2692     Transliterator* devToLatin=Transliterator::createInstance("Telugu-Latin", UTRANS_FORWARD, parseError, status);
2693     if(U_FAILURE(status)){
2694         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2695         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2696         return;
2697     }
2698     UnicodeString gotResult;
2699     for(int i= 0; i<MAX_LEN; i++){
2700         gotResult = source[i];
2701         expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2702         expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2703     }
2704     delete latinToDev;
2705     delete devToLatin;
2706 }
2707
2708 void TransliteratorTest::TestSanskritLatinRT(){
2709     const int MAX_LEN =16;
2710     const char* const source[MAX_LEN] = {
2711         "rmk\\u1E63\\u0113t",
2712         "\\u015Br\\u012Bmad",
2713         "bhagavadg\\u012Bt\\u0101",
2714         "adhy\\u0101ya",
2715         "arjuna",
2716         "vi\\u1E63\\u0101da",
2717         "y\\u014Dga",
2718         "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2719         "uv\\u0101cr\\u0325",
2720         "dharmak\\u1E63\\u0113tr\\u0113",
2721         "kuruk\\u1E63\\u0113tr\\u0113",
2722         "samav\\u0113t\\u0101",
2723         "yuyutsava\\u1E25",
2724         "m\\u0101mak\\u0101\\u1E25",
2725     // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2726         "kimakurvata",
2727         "san\\u0304java",
2728     };
2729     const char* const expected[MAX_LEN] = {
2730         "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2731         "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2732         "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2733         "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2734         "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2735         "\\u0935\\u093f\\u0937\\u093e\\u0926",
2736         "\\u092f\\u094b\\u0917",
2737         "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2738         "\\u0909\\u0935\\u093E\\u091A\\u0943",
2739         "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2740         "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2741         "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2742         "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2743         "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2744     //"\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2745         "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2746         "\\u0938\\u0902\\u091c\\u0935",
2747     };
2748     UErrorCode status = U_ZERO_ERROR;
2749     UParseError parseError;
2750     UnicodeString message;
2751     Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2752     Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2753     if(U_FAILURE(status)){
2754         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2755         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2756         return;
2757     }
2758     UnicodeString gotResult;
2759     for(int i= 0; i<MAX_LEN; i++){
2760         gotResult = source[i];
2761         expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
2762         expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
2763     }
2764     delete latinToDev;
2765     delete devToLatin;
2766 }
2767
2768
2769 void TransliteratorTest::TestCompoundLatinRT(){
2770     const char* const source[] = {
2771         "rmk\\u1E63\\u0113t",
2772         "\\u015Br\\u012Bmad",
2773         "bhagavadg\\u012Bt\\u0101",
2774         "adhy\\u0101ya",
2775         "arjuna",
2776         "vi\\u1E63\\u0101da",
2777         "y\\u014Dga",
2778         "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
2779         "uv\\u0101cr\\u0325",
2780         "dharmak\\u1E63\\u0113tr\\u0113",
2781         "kuruk\\u1E63\\u0113tr\\u0113",
2782         "samav\\u0113t\\u0101",
2783         "yuyutsava\\u1E25",
2784         "m\\u0101mak\\u0101\\u1E25",
2785      // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
2786         "kimakurvata",
2787         "san\\u0304java"
2788     };
2789     const int MAX_LEN = sizeof(source)/sizeof(source[0]);
2790     const char* const expected[MAX_LEN] = {
2791         "\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
2792         "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
2793         "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
2794         "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
2795         "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
2796         "\\u0935\\u093f\\u0937\\u093e\\u0926",
2797         "\\u092f\\u094b\\u0917",
2798         "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
2799         "\\u0909\\u0935\\u093E\\u091A\\u0943",
2800         "\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2801         "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
2802         "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
2803         "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
2804         "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
2805     //  "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
2806         "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
2807         "\\u0938\\u0902\\u091c\\u0935"
2808     };
2809     if(MAX_LEN != sizeof(expected)/sizeof(expected[0])) {
2810         errln("error in TestCompoundLatinRT: source[] and expected[] have different lengths!");
2811         return;
2812     }
2813
2814     UErrorCode status = U_ZERO_ERROR;
2815     UParseError parseError;
2816     UnicodeString message;
2817     Transliterator* devToLatinToDev  =Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
2818     Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
2819     Transliterator* devToTelToDev    =Transliterator::createInstance("Devanagari-Telugu;Telugu-Devanagari", UTRANS_FORWARD, parseError, status);
2820     Transliterator* latinToTelToLatin=Transliterator::createInstance("Latin-Telugu;Telugu-Latin", UTRANS_FORWARD, parseError, status);
2821
2822     if(U_FAILURE(status)){
2823         dataerrln("FAIL: construction " +   UnicodeString(" Error: ") + u_errorName(status));
2824         dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
2825         return;
2826     }
2827     UnicodeString gotResult;
2828     for(int i= 0; i<MAX_LEN; i++){
2829         gotResult = source[i];
2830         expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
2831         expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2832         expect(*latinToTelToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
2833
2834     }
2835     delete(latinToDevToLatin);
2836     delete(devToLatinToDev);
2837     delete(devToTelToDev);
2838     delete(latinToTelToLatin);
2839 }
2840
2841 /**
2842  * Test Gurmukhi-Devanagari Tippi and Bindi
2843  */
2844 void TransliteratorTest::TestGurmukhiDevanagari(){
2845     // the rule says:
2846     // (\u0902) (when preceded by vowel)      --->  (\u0A02)
2847     // (\u0902) (when preceded by consonant)  --->  (\u0A70)
2848     UErrorCode status = U_ZERO_ERROR;
2849     UnicodeSet vowel(UnicodeString("[\\u0905-\\u090A \\u090F\\u0910\\u0913\\u0914 \\u093e-\\u0942\\u0947\\u0948\\u094B\\u094C\\u094D]", -1, US_INV).unescape(), status);
2850     UnicodeSet non_vowel(UnicodeString("[\\u0915-\\u0928\\u092A-\\u0930]", -1, US_INV).unescape(), status);
2851     UParseError parseError;
2852
2853     UnicodeSetIterator vIter(vowel);
2854     UnicodeSetIterator nvIter(non_vowel);
2855     Transliterator* trans = Transliterator::createInstance("Devanagari-Gurmukhi",UTRANS_FORWARD, parseError, status);
2856     if(U_FAILURE(status)) {
2857       dataerrln("Error creating transliterator %s", u_errorName(status));
2858       delete trans;
2859       return;
2860     }
2861     UnicodeString src (" \\u0902", -1, US_INV);
2862     UnicodeString expected(" \\u0A02", -1, US_INV);
2863     src = src.unescape();
2864     expected= expected.unescape();
2865
2866     while(vIter.next()){
2867         src.setCharAt(0,(UChar) vIter.getCodepoint());
2868         expected.setCharAt(0,(UChar) (vIter.getCodepoint()+0x0100));
2869         expect(*trans,src,expected);
2870     }
2871
2872     expected.setCharAt(1,0x0A70);
2873     while(nvIter.next()){
2874         //src.setCharAt(0,(char) nvIter.codepoint);
2875         src.setCharAt(0,(UChar)nvIter.getCodepoint());
2876         expected.setCharAt(0,(UChar) (nvIter.getCodepoint()+0x0100));
2877         expect(*trans,src,expected);
2878     }
2879     delete trans;
2880 }
2881 /**
2882  * Test instantiation from a locale.
2883  */
2884 void TransliteratorTest::TestLocaleInstantiation(void) {
2885     UParseError pe;
2886     UErrorCode ec = U_ZERO_ERROR;
2887     Transliterator *t = Transliterator::createInstance("ru_RU-Latin", UTRANS_FORWARD, pe, ec);
2888     if (U_FAILURE(ec)) {
2889         dataerrln("FAIL: createInstance(ru_RU-Latin) - %s", u_errorName(ec));
2890         delete t;
2891         return;
2892     }
2893     expect(*t, CharsToUnicodeString("\\u0430"), "a");
2894     delete t;
2895
2896     t = Transliterator::createInstance("en-el", UTRANS_FORWARD, pe, ec);
2897     if (U_FAILURE(ec)) {
2898         errln("FAIL: createInstance(en-el)");
2899         delete t;
2900         return;
2901     }
2902     expect(*t, "a", CharsToUnicodeString("\\u03B1"));
2903     delete t;
2904 }
2905
2906 /**
2907  * Test title case handling of accent (should ignore accents)
2908  */
2909 void TransliteratorTest::TestTitleAccents(void) {
2910     UParseError pe;
2911     UErrorCode ec = U_ZERO_ERROR;
2912     Transliterator *t = Transliterator::createInstance("Title", UTRANS_FORWARD, pe, ec);
2913     if (U_FAILURE(ec)) {
2914         errln("FAIL: createInstance(Title)");
2915         delete t;
2916         return;
2917     }
2918     expect(*t, CharsToUnicodeString("a\\u0300b can't abe"), CharsToUnicodeString("A\\u0300b Can't Abe"));
2919     delete t;
2920 }
2921
2922 /**
2923  * Basic test of a locale resource based rule.
2924  */
2925 void TransliteratorTest::TestLocaleResource() {
2926     const char* DATA[] = {
2927         // id                    from               to
2928         //"Latin-Greek/UNGEGN",    "b",               "\\u03bc\\u03c0",
2929         "Latin-el",              "b",               "\\u03bc\\u03c0",
2930         "Latin-Greek",           "b",               "\\u03B2",
2931         "Greek-Latin/UNGEGN",    "\\u03B2",         "v",
2932         "el-Latin",              "\\u03B2",         "v",
2933         "Greek-Latin",           "\\u03B2",         "b",
2934     };
2935     const int32_t DATA_length = sizeof(DATA) / sizeof(DATA[0]);
2936     for (int32_t i=0; i<DATA_length; i+=3) {
2937         UParseError pe;
2938         UErrorCode ec = U_ZERO_ERROR;
2939         Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, pe, ec);
2940         if (U_FAILURE(ec)) {
2941             dataerrln((UnicodeString)"FAIL: createInstance(" + DATA[i] + ") - " + u_errorName(ec));
2942             delete t;
2943             continue;
2944         }
2945         expect(*t, CharsToUnicodeString(DATA[i+1]),
2946                CharsToUnicodeString(DATA[i+2]));
2947         delete t;
2948     }
2949 }
2950
2951 /**
2952  * Make sure parse errors reference the right line.
2953  */
2954 void TransliteratorTest::TestParseError() {
2955     static const char* rule =
2956         "a > b;\n"
2957         "# more stuff\n"
2958         "d << b;";
2959     UErrorCode ec = U_ZERO_ERROR;
2960     UParseError pe;
2961     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
2962     delete t;
2963     if (U_FAILURE(ec)) {
2964         UnicodeString err(pe.preContext);
2965         err.append((UChar)124/*|*/).append(pe.postContext);
2966         if (err.indexOf("d << b") >= 0) {
2967             logln("Ok: " + err);
2968         } else {
2969             errln("FAIL: " + err);
2970         }
2971     }
2972     else {
2973         errln("FAIL: no syntax error");
2974     }
2975     static const char* maskingRule =
2976         "a>x;\n"
2977         "# more stuff\n"
2978         "ab>y;";
2979     ec = U_ZERO_ERROR;
2980     delete Transliterator::createFromRules("ID", maskingRule, UTRANS_FORWARD, pe, ec);
2981     if (ec != U_RULE_MASK_ERROR) {
2982         errln("FAIL: returned %s instead of U_RULE_MASK_ERROR", u_errorName(ec));
2983     }
2984     else if (UnicodeString("a > x;") != UnicodeString(pe.preContext)) {
2985         errln("FAIL: did not get expected precontext");
2986     }
2987     else if (UnicodeString("ab > y;") != UnicodeString(pe.postContext)) {
2988         errln("FAIL: did not get expected postcontext");
2989     }
2990 }
2991
2992 /**
2993  * Make sure sets on output are disallowed.
2994  */
2995 void TransliteratorTest::TestOutputSet() {
2996     UnicodeString rule = "$set = [a-cm-n]; b > $set;";
2997     UErrorCode ec = U_ZERO_ERROR;
2998     UParseError pe;
2999     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3000     delete t;
3001     if (U_FAILURE(ec)) {
3002         UnicodeString err(pe.preContext);
3003         err.append((UChar)124/*|*/).append(pe.postContext);
3004         logln("Ok: " + err);
3005         return;
3006     }
3007     errln("FAIL: No syntax error");
3008 }
3009
3010 /**
3011  * Test the use variable range pragma, making sure that use of
3012  * variable range characters is detected and flagged as an error.
3013  */
3014 void TransliteratorTest::TestVariableRange() {
3015     UnicodeString rule = "use variable range 0x70 0x72; a > A; b > B; q > Q;";
3016     UErrorCode ec = U_ZERO_ERROR;
3017     UParseError pe;
3018     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3019     delete t;
3020     if (U_FAILURE(ec)) {
3021         UnicodeString err(pe.preContext);
3022         err.append((UChar)124/*|*/).append(pe.postContext);
3023         logln("Ok: " + err);
3024         return;
3025     }
3026     errln("FAIL: No syntax error");
3027 }
3028
3029 /**
3030  * Test invalid post context error handling
3031  */
3032 void TransliteratorTest::TestInvalidPostContext() {
3033     UnicodeString rule = "a}b{c>d;";
3034     UErrorCode ec = U_ZERO_ERROR;
3035     UParseError pe;
3036     Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
3037     delete t;
3038     if (U_FAILURE(ec)) {
3039         UnicodeString err(pe.preContext);
3040         err.append((UChar)124/*|*/).append(pe.postContext);
3041         if (err.indexOf("a}b{c") >= 0) {
3042             logln("Ok: " + err);
3043         } else {
3044             errln("FAIL: " + err);
3045         }
3046         return;
3047     }
3048     errln("FAIL: No syntax error");
3049 }
3050
3051 /**
3052  * Test ID form variants
3053  */
3054 void TransliteratorTest::TestIDForms() {
3055     const char* DATA[] = {
3056         "NFC", NULL, "NFD",
3057         "nfd", NULL, "NFC", // make sure case is ignored
3058         "Any-NFKD", NULL, "Any-NFKC",
3059         "Null", NULL, "Null",
3060         "-nfkc", "nfkc", "NFKD",
3061         "-nfkc/", "nfkc", "NFKD",
3062         "Latin-Greek/UNGEGN", NULL, "Greek-Latin/UNGEGN",
3063         "Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN",
3064         "Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali",
3065         "Source-", NULL, NULL,
3066         "Source/Variant-", NULL, NULL,
3067         "Source-/Variant", NULL, NULL,
3068         "/Variant", NULL, NULL,
3069         "/Variant-", NULL, NULL,
3070         "-/Variant", NULL, NULL,
3071         "-/", NULL, NULL,
3072         "-", NULL, NULL,
3073         "/", NULL, NULL,
3074     };
3075     const int32_t DATA_length = sizeof(DATA)/sizeof(DATA[0]);
3076
3077     for (int32_t i=0; i<DATA_length; i+=3) {
3078         const char* ID = DATA[i];
3079         const char* expID = DATA[i+1];
3080         const char* expInvID = DATA[i+2];
3081         UBool expValid = (expInvID != NULL);
3082         if (expID == NULL) {
3083             expID = ID;
3084         }
3085         UParseError pe;
3086         UErrorCode ec = U_ZERO_ERROR;
3087         Transliterator *t =
3088             Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
3089         if (U_FAILURE(ec)) {
3090             if (!expValid) {
3091                 logln((UnicodeString)"Ok: getInstance(" + ID +") => " + u_errorName(ec));
3092             } else {
3093                 dataerrln((UnicodeString)"FAIL: Couldn't create " + ID + " - " + u_errorName(ec));
3094             }
3095             delete t;
3096             continue;
3097         }
3098         Transliterator *u = t->createInverse(ec);
3099         if (U_FAILURE(ec)) {
3100             errln((UnicodeString)"FAIL: Couldn't create inverse of " + ID);
3101             delete t;
3102             delete u;
3103             continue;
3104         }
3105         if (t->getID() == expID &&
3106             u->getID() == expInvID) {
3107             logln((UnicodeString)"Ok: " + ID + ".getInverse() => " + expInvID);
3108         } else {
3109             errln((UnicodeString)"FAIL: getInstance(" + ID + ") => " +
3110                   t->getID() + " x getInverse() => " + u->getID() +
3111                   ", expected " + expInvID);
3112         }
3113         delete t;
3114         delete u;
3115     }
3116 }
3117
3118 static const UChar SPACE[]   = {32,0};
3119 static const UChar NEWLINE[] = {10,0};
3120 static const UChar RETURN[]  = {13,0};
3121 static const UChar EMPTY[]   = {0};
3122
3123 void TransliteratorTest::checkRules(const UnicodeString& label, Transliterator& t2,
3124                                     const UnicodeString& testRulesForward) {
3125     UnicodeString rules2; t2.toRules(rules2, TRUE);
3126     //rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
3127     rules2.findAndReplace(SPACE, EMPTY);
3128     rules2.findAndReplace(NEWLINE, EMPTY);
3129     rules2.findAndReplace(RETURN, EMPTY);
3130
3131     UnicodeString testRules(testRulesForward); testRules.findAndReplace(SPACE, EMPTY);
3132
3133     if (rules2 != testRules) {
3134         errln(label);
3135         logln((UnicodeString)"GENERATED RULES: " + rules2);
3136         logln((UnicodeString)"SHOULD BE:       " + testRulesForward);
3137     }
3138 }
3139
3140 /**
3141  * Mark's toRules test.
3142  */
3143 void TransliteratorTest::TestToRulesMark() {
3144     const char* testRules =
3145         "::[[:Latin:][:Mark:]];"
3146         "::NFKD (NFC);"
3147         "::Lower (Lower);"
3148         "a <> \\u03B1;" // alpha
3149         "::NFKC (NFD);"
3150         "::Upper (Lower);"
3151         "::Lower ();"
3152         "::([[:Greek:][:Mark:]]);"
3153         ;
3154     const char* testRulesForward =
3155         "::[[:Latin:][:Mark:]];"
3156         "::NFKD(NFC);"
3157         "::Lower(Lower);"
3158         "a > \\u03B1;"
3159         "::NFKC(NFD);"
3160         "::Upper (Lower);"
3161         "::Lower ();"
3162         ;
3163     const char* testRulesBackward =
3164         "::[[:Greek:][:Mark:]];"
3165         "::Lower (Upper);"
3166         "::NFD(NFKC);"
3167         "\\u03B1 > a;"
3168         "::Lower(Lower);"
3169         "::NFC(NFKD);"
3170         ;
3171     UnicodeString source = CharsToUnicodeString("\\u00E1"); // a-acute
3172     UnicodeString target = CharsToUnicodeString("\\u03AC"); // alpha-acute
3173
3174     UParseError pe;
3175     UErrorCode ec = U_ZERO_ERROR;
3176     Transliterator *t2 = Transliterator::createFromRules("source-target", UnicodeString(testRules, -1, US_INV), UTRANS_FORWARD, pe, ec);
3177     Transliterator *t3 = Transliterator::createFromRules("target-source", UnicodeString(testRules, -1, US_INV), UTRANS_REVERSE, pe, ec);
3178
3179     if (U_FAILURE(ec)) {
3180         delete t2;
3181         delete t3;
3182         dataerrln((UnicodeString)"FAIL: createFromRules => " + u_errorName(ec));
3183         return;
3184     }
3185
3186     expect(*t2, source, target);
3187     expect(*t3, target, source);
3188
3189     checkRules("Failed toRules FORWARD", *t2, UnicodeString(testRulesForward, -1, US_INV));
3190     checkRules("Failed toRules BACKWARD", *t3, UnicodeString(testRulesBackward, -1, US_INV));
3191
3192     delete t2;
3193     delete t3;
3194 }
3195
3196 /**
3197  * Test Escape and Unescape transliterators.
3198  */
3199 void TransliteratorTest::TestEscape() {
3200     UParseError pe;
3201     UErrorCode ec;
3202     Transliterator *t;
3203
3204     ec = U_ZERO_ERROR;
3205     t = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, pe, ec);
3206     if (U_FAILURE(ec)) {
3207         errln((UnicodeString)"FAIL: createInstance");
3208     } else {
3209         expect(*t,
3210                UNICODE_STRING_SIMPLE("\\x{40}\\U00000031&#x32;&#81;"),
3211                "@12Q");
3212     }
3213     delete t;
3214
3215     ec = U_ZERO_ERROR;
3216     t = Transliterator::createInstance("Any-Hex/C", UTRANS_FORWARD, pe, ec);
3217     if (U_FAILURE(ec)) {
3218         errln((UnicodeString)"FAIL: createInstance");
3219     } else {
3220         expect(*t,
3221                CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3222                UNICODE_STRING_SIMPLE("\\u0041\\U0010BEEF\\uFEED"));
3223     }
3224     delete t;
3225
3226     ec = U_ZERO_ERROR;
3227     t = Transliterator::createInstance("Any-Hex/Java", UTRANS_FORWARD, pe, ec);
3228     if (U_FAILURE(ec)) {
3229         errln((UnicodeString)"FAIL: createInstance");
3230     } else {
3231         expect(*t,
3232                CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3233                UNICODE_STRING_SIMPLE("\\u0041\\uDBEF\\uDEEF\\uFEED"));
3234     }
3235     delete t;
3236
3237     ec = U_ZERO_ERROR;
3238     t = Transliterator::createInstance("Any-Hex/Perl", UTRANS_FORWARD, pe, ec);
3239     if (U_FAILURE(ec)) {
3240         errln((UnicodeString)"FAIL: createInstance");
3241     } else {
3242         expect(*t,
3243                CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
3244                UNICODE_STRING_SIMPLE("\\x{41}\\x{10BEEF}\\x{FEED}"));
3245     }
3246     delete t;
3247 }
3248
3249
3250 void TransliteratorTest::TestAnchorMasking(){
3251     UnicodeString rule ("^a > Q; a > q;");
3252     UErrorCode status= U_ZERO_ERROR;
3253     UParseError parseError;
3254
3255     Transliterator* t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD,parseError,status);
3256     if(U_FAILURE(status)){
3257         errln(UnicodeString("FAIL: ") + "ID" +
3258               ".createFromRules() => bad rules" +
3259               /*", parse error " + parseError.code +*/
3260               ", line " + parseError.line +
3261               ", offset " + parseError.offset +
3262               ", context " + prettify(parseError.preContext, TRUE) +
3263               ", rules: " + prettify(rule, TRUE));
3264     }
3265     delete t;
3266 }
3267
3268 /**
3269  * Make sure display names of variants look reasonable.
3270  */
3271 void TransliteratorTest::TestDisplayName() {
3272 #if UCONFIG_NO_FORMATTING
3273     logln("Skipping, UCONFIG_NO_FORMATTING is set\n");
3274     return;
3275 #else
3276     static const char* DATA[] = {
3277         // ID, forward name, reverse name
3278         // Update the text as necessary -- the important thing is
3279         // not the text itself, but how various cases are handled.
3280
3281         // Basic test
3282         "Any-Hex", "Any to Hex Escape", "Hex Escape to Any",
3283
3284         // Variants
3285         "Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl",
3286
3287         // Target-only IDs
3288         "NFC", "Any to NFC", "Any to NFD",
3289     };
3290
3291     int32_t DATA_length = sizeof(DATA) / sizeof(DATA[0]);
3292
3293     Locale US("en", "US");
3294
3295     for (int32_t i=0; i<DATA_length; i+=3) {
3296         UnicodeString name;
3297         Transliterator::getDisplayName(DATA[i], US, name);
3298         if (name != DATA[i+1]) {
3299             dataerrln((UnicodeString)"FAIL: " + DATA[i] + ".getDisplayName() => " +
3300                   name + ", expected " + DATA[i+1]);
3301         } else {
3302             logln((UnicodeString)"Ok: " + DATA[i] + ".getDisplayName() => " + name);
3303         }
3304         UErrorCode ec = U_ZERO_ERROR;
3305         UParseError pe;
3306         Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_REVERSE, pe, ec);
3307         if (U_FAILURE(ec)) {
3308             delete t;
3309             dataerrln("FAIL: createInstance failed - %s", u_errorName(ec));
3310             continue;
3311         }
3312         name = Transliterator::getDisplayName(t->getID(), US, name);
3313         if (name != DATA[i+2]) {
3314             dataerrln((UnicodeString)"FAIL: " + t->getID() + ".getDisplayName() => " +
3315                   name + ", expected " + DATA[i+2]);
3316         } else {
3317             logln((UnicodeString)"Ok: " + t->getID() + ".getDisplayName() => " + name);
3318         }
3319         delete t;
3320     }
3321 #endif
3322 }
3323
3324 void TransliteratorTest::TestSpecialCases(void) {
3325     const UnicodeString registerRules[] = {
3326         "Any-Dev1", "x > X; y > Y;",
3327         "Any-Dev2", "XY > Z",
3328         "Greek-Latin/FAKE",
3329             CharsToUnicodeString
3330             ("[^[:L:][:M:]] { \\u03bc\\u03c0 > b ; \\u03bc\\u03c0 } [^[:L:][:M:]] > b ; [^[:L:][:M:]] { [\\u039c\\u03bc][\\u03a0\\u03c0] > B ; [\\u039c\\u03bc][\\u03a0\\u03c0] } [^[:L:][:M:]] > B ;"),
3331         "" // END MARKER
3332     };
3333
3334     const UnicodeString testCases[] = {
3335         // NORMALIZATION
3336         // should add more test cases
3337         "NFD" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3338         "NFC" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3339         "NFKD", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3340         "NFKC", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
3341
3342         // mp -> b BUG
3343         "Greek-Latin/UNGEGN", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3344         "Greek-Latin/FAKE", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
3345
3346         // check for devanagari bug
3347         "nfd;Dev1;Dev2;nfc", "xy", "Z",
3348
3349         // ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE
3350         "Title", CharsToUnicodeString("ab'cD ffi\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3351                  CharsToUnicodeString("Ab'cd Ffi\\u0131ii\\u0307 \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3352
3353         //TODO: enable this test once Titlecase works right
3354         /*
3355         "Title", CharsToUnicodeString("\\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3356                  CharsToUnicodeString("Ffi\\u0131ii \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
3357                  */
3358         "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3359                  CharsToUnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 ") + DESERET_DEE + DESERET_DEE,
3360         "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
3361                  CharsToUnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 ") + DESERET_dee + DESERET_dee,
3362
3363         "Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
3364         "Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
3365
3366          // FORMS OF S
3367         "Greek-Latin/UNGEGN",  CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3368                                CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3369         "Latin-Greek/UNGEGN",  CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3370                                CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3") ,
3371         "Greek-Latin",  CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3372                         CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
3373         "Latin-Greek",  CharsToUnicodeString("s ss s\\u0331s\\u0331"),
3374                         CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
3375         // Tatiana bug
3376         // Upper: TAT\\u02B9\\u00C2NA
3377         // Lower: tat\\u02B9\\u00E2na
3378         // Title: Tat\\u02B9\\u00E2na
3379         "Upper", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3380                  CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3381         "Lower", CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
3382                  CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3383         "Title", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
3384                  CharsToUnicodeString("Tat\\u02B9\\u00E2na"),
3385
3386         "" // END MARKER
3387     };
3388
3389     UParseError pos;
3390     int32_t i;
3391     for (i = 0; registerRules[i].length()!=0; i+=2) {
3392         UErrorCode status = U_ZERO_ERROR;
3393
3394         Transliterator *t = Transliterator::createFromRules(registerRules[0+i],
3395             registerRules[i+1], UTRANS_FORWARD, pos, status);
3396         if (U_FAILURE(status)) {
3397             dataerrln("Fails: Unable to create the transliterator from rules. - %s", u_errorName(status));
3398         } else {
3399             Transliterator::registerInstance(t);
3400         }
3401     }
3402     for (i = 0; testCases[i].length()!=0; i+=3) {
3403         UErrorCode ec = U_ZERO_ERROR;
3404         UParseError pe;
3405         const UnicodeString& name = testCases[i];
3406         Transliterator *t = Transliterator::createInstance(name, UTRANS_FORWARD, pe, ec);
3407         if (U_FAILURE(ec)) {
3408             dataerrln((UnicodeString)"FAIL: Couldn't create " + name + " - " + u_errorName(ec));
3409             delete t;
3410             continue;
3411         }
3412         const UnicodeString& id = t->getID();
3413         const UnicodeString& source = testCases[i+1];
3414         UnicodeString target;
3415
3416         // Automatic generation of targets, to make it simpler to add test cases (and more fail-safe)
3417
3418         if (testCases[i+2].length() > 0) {
3419             target = testCases[i+2];
3420         } else if (0==id.caseCompare("NFD", U_FOLD_CASE_DEFAULT)) {
3421             Normalizer::normalize(source, UNORM_NFD, 0, target, ec);
3422         } else if (0==id.caseCompare("NFC", U_FOLD_CASE_DEFAULT)) {
3423             Normalizer::normalize(source, UNORM_NFC, 0, target, ec);
3424         } else if (0==id.caseCompare("NFKD", U_FOLD_CASE_DEFAULT)) {
3425             Normalizer::normalize(source, UNORM_NFKD, 0, target, ec);
3426         } else if (0==id.caseCompare("NFKC", U_FOLD_CASE_DEFAULT)) {
3427             Normalizer::normalize(source, UNORM_NFKC, 0, target, ec);
3428         } else if (0==id.caseCompare("Lower", U_FOLD_CASE_DEFAULT)) {
3429             target = source;
3430             target.toLower(Locale::getUS());
3431         } else if (0==id.caseCompare("Upper", U_FOLD_CASE_DEFAULT)) {
3432             target = source;
3433             target.toUpper(Locale::getUS());
3434         }
3435         if (U_FAILURE(ec)) {
3436             errln((UnicodeString)"FAIL: Internal error normalizing " + source);
3437             continue;
3438         }
3439
3440         expect(*t, source, target);
3441         delete t;
3442     }
3443     for (i = 0; registerRules[i].length()!=0; i+=2) {
3444         Transliterator::unregister(registerRules[i]);
3445     }
3446 }
3447
3448 char* Char32ToEscapedChars(UChar32 ch, char* buffer) {
3449     if (ch <= 0xFFFF) {
3450         sprintf(buffer, "\\u%04x", (int)ch);
3451     } else {
3452         sprintf(buffer, "\\U%08x", (int)ch);
3453     }
3454     return buffer;
3455 }
3456
3457 void TransliteratorTest::TestSurrogateCasing (void) {
3458     // check that casing handles surrogates
3459     // titlecase is currently defective
3460     char buffer[20];
3461     UChar buffer2[20];
3462     UChar32 dee;
3463     UTF_GET_CHAR(DESERET_dee,0, 0, DESERET_dee.length(), dee);
3464     UnicodeString DEE(u_totitle(dee));
3465     if (DEE != DESERET_DEE) {
3466         err("Fails titlecase of surrogates");
3467         err(Char32ToEscapedChars(dee, buffer));
3468         err(", ");
3469         errln(Char32ToEscapedChars(DEE.char32At(0), buffer));
3470     }
3471
3472     UnicodeString deeDEETest=DESERET_dee + DESERET_DEE;
3473     UnicodeString deedeeTest = DESERET_dee + DESERET_dee;
3474     UnicodeString DEEDEETest = DESERET_DEE + DESERET_DEE;
3475     UErrorCode status= U_ZERO_ERROR;
3476
3477     u_strToUpper(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
3478     if (U_FAILURE(status) || (UnicodeString(buffer2)!= DEEDEETest)) {
3479         errln("Fails: Can't uppercase surrogates.");
3480     }
3481
3482     status= U_ZERO_ERROR;
3483     u_strToLower(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
3484     if (U_FAILURE(status) || (UnicodeString(buffer2)!= deedeeTest)) {
3485         errln("Fails: Can't lowercase surrogates.");
3486     }
3487 }
3488
3489 static void _trans(Transliterator& t, const UnicodeString& src,
3490                    UnicodeString& result) {
3491     result = src;
3492     t.transliterate(result);
3493 }
3494
3495 static void _trans(const UnicodeString& id, const UnicodeString& src,
3496                    UnicodeString& result, UErrorCode ec) {
3497     UParseError pe;
3498     Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
3499     if (U_SUCCESS(ec)) {
3500         _trans(*t, src, result);
3501     }
3502     delete t;
3503 }
3504
3505 static UnicodeString _findMatch(const UnicodeString& source,
3506                                        const UnicodeString* pairs) {
3507     UnicodeString empty;
3508     for (int32_t i=0; pairs[i].length() > 0; i+=2) {
3509         if (0==source.caseCompare(pairs[i], U_FOLD_CASE_DEFAULT)) {
3510             return pairs[i+1];
3511         }
3512     }
3513     return empty;
3514 }
3515
3516 // Check to see that incremental gets at least part way through a reasonable string.
3517
3518 void TransliteratorTest::TestIncrementalProgress(void) {
3519     UErrorCode ec = U_ZERO_ERROR;
3520     UnicodeString latinTest = "The Quick Brown Fox.";
3521     UnicodeString devaTest;
3522     _trans("Latin-Devanagari", latinTest, devaTest, ec);
3523     UnicodeString kataTest;
3524     _trans("Latin-Katakana", latinTest, kataTest, ec);
3525     if (U_FAILURE(ec)) {
3526         errln("FAIL: Internal error");
3527         return;
3528     }
3529     const UnicodeString tests[] = {
3530         "Any", latinTest,
3531         "Latin", latinTest,
3532         "Halfwidth", latinTest,
3533         "Devanagari", devaTest,
3534         "Katakana", kataTest,
3535         "" // END MARKER
3536     };
3537
3538     UnicodeString test("The Quick Brown Fox Jumped Over The Lazy Dog.");
3539     int32_t i = 0, j=0, k=0;
3540     int32_t sources = Transliterator::countAvailableSources();
3541     for (i = 0; i < sources; i++) {
3542         UnicodeString source;
3543         Transliterator::getAvailableSource(i, source);
3544         UnicodeString test = _findMatch(source, tests);
3545         if (test.length() == 0) {
3546             logln((UnicodeString)"Skipping " + source + "-X");
3547             continue;
3548         }
3549         int32_t targets = Transliterator::countAvailableTargets(source);
3550         for (j = 0; j < targets; j++) {
3551             UnicodeString target;
3552             Transliterator::getAvailableTarget(j, source, target);
3553             int32_t variants = Transliterator::countAvailableVariants(source, target);
3554             for (k =0; k< variants; k++) {
3555                 UnicodeString variant;
3556                 UParseError err;
3557                 UErrorCode status = U_ZERO_ERROR;
3558
3559                 Transliterator::getAvailableVariant(k, source, target, variant);
3560                 UnicodeString id = source + "-" + target + "/" + variant;
3561
3562                 Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, err, status);
3563                 if (U_FAILURE(status)) {
3564                     dataerrln((UnicodeString)"FAIL: Could not create " + id);
3565                     delete t;
3566                     continue;
3567                 }
3568                 status = U_ZERO_ERROR;
3569                 CheckIncrementalAux(t, test);
3570
3571                 UnicodeString rev;
3572                 _trans(*t, test, rev);
3573                 Transliterator *inv = t->createInverse(status);
3574                 if (U_FAILURE(status)) {
3575 #if UCONFIG_NO_BREAK_ITERATION
3576                     // If UCONFIG_NO_BREAK_ITERATION is on, then only Thai should fail.
3577                     if (id.compare((UnicodeString)"Latin-Thai/") != 0)
3578 #endif
3579                         errln((UnicodeString)"FAIL: Could not create inverse of " + id);
3580
3581                     delete t;
3582                     delete inv;
3583                     continue;
3584                 }
3585                 CheckIncrementalAux(inv, rev);
3586                 delete t;
3587                 delete inv;
3588             }
3589         }
3590     }
3591 }
3592
3593 void TransliteratorTest::CheckIncrementalAux(const Transliterator* t,
3594                                                       const UnicodeString& input) {
3595     UErrorCode ec = U_ZERO_ERROR;
3596     UTransPosition pos;
3597     UnicodeString test = input;
3598
3599     pos.contextStart = 0;
3600     pos.contextLimit = input.length();
3601     pos.start = 0;
3602     pos.limit = input.length();
3603
3604     t->transliterate(test, pos, ec);
3605     if (U_FAILURE(ec)) {
3606         errln((UnicodeString)"FAIL: transliterate() error " + u_errorName(ec));
3607         return;
3608     }
3609     UBool gotError = FALSE;
3610
3611     // we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X?
3612
3613     if (pos.start == 0 && pos.limit != 0 && t->getID() != "Hex-Any/Unicode") {
3614         errln((UnicodeString)"No Progress, " +
3615               t->getID() + ": " + formatInput(test, input, pos));
3616         gotError = TRUE;
3617     } else {
3618         logln((UnicodeString)"PASS Progress, " +
3619               t->getID() + ": " + formatInput(test, input, pos));
3620     }
3621     t->finishTransliteration(test, pos);
3622     if (pos.start != pos.limit) {
3623         errln((UnicodeString)"Incomplete, " +
3624               t->getID() + ": " + formatInput(test, input, pos));
3625         gotError = TRUE;
3626     }
3627 }
3628
3629 void TransliteratorTest::TestFunction() {
3630     // Careful with spacing and ';' here:  Phrase this exactly
3631     // as toRules() is going to return it.  If toRules() changes
3632     // with regard to spacing or ';', then adjust this string.
3633     UnicodeString rule =
3634         "([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';";
3635
3636     UParseError pe;
3637     UErrorCode ec = U_ZERO_ERROR;
3638     Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3639     if (t == NULL) {
3640         dataerrln("FAIL: createFromRules failed - %s", u_errorName(ec));
3641         return;
3642     }
3643
3644     UnicodeString r;
3645     t->toRules(r, TRUE);
3646     if (r == rule) {
3647         logln((UnicodeString)"OK: toRules() => " + r);
3648     } else {
3649         errln((UnicodeString)"FAIL: toRules() => " + r +
3650               ", expected " + rule);
3651     }
3652
3653     expect(*t, "The Quick Brown Fox",
3654            UNICODE_STRING_SIMPLE("T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox"));
3655
3656     delete t;
3657 }
3658
3659 void TransliteratorTest::TestInvalidBackRef(void) {
3660     UnicodeString rule =  ". > $1;";
3661     UnicodeString rule2 =CharsToUnicodeString("(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\\u0020;");
3662     UParseError pe;
3663     UErrorCode ec = U_ZERO_ERROR;
3664     Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3665     Transliterator *t2 = Transliterator::createFromRules("Test2", rule2, UTRANS_FORWARD, pe, ec);
3666
3667     if (t != NULL) {
3668         errln("FAIL: createFromRules should have returned NULL");
3669         delete t;
3670     }
3671
3672     if (t2 != NULL) {
3673         errln("FAIL: createFromRules should have returned NULL");
3674         delete t2;
3675     }
3676
3677     if (U_SUCCESS(ec)) {
3678         errln("FAIL: Ok: . > $1; => no error");
3679     } else {
3680         logln((UnicodeString)"Ok: . > $1; => " + u_errorName(ec));
3681     }
3682 }
3683
3684 void TransliteratorTest::TestMulticharStringSet() {
3685     // Basic testing
3686     const char* rule =
3687         "       [{aa}]       > x;"
3688         "         a          > y;"
3689         "       [b{bc}]      > z;"
3690         "[{gd}] { e          > q;"
3691         "         e } [{fg}] > r;" ;
3692
3693     UParseError pe;
3694     UErrorCode ec = U_ZERO_ERROR;
3695     Transliterator* t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3696     if (t == NULL || U_FAILURE(ec)) {
3697         delete t;
3698         errln("FAIL: createFromRules failed");
3699         return;
3700     }
3701
3702     expect(*t, "a aa ab bc d gd de gde gdefg ddefg",
3703            "y x yz z d gd de gdq gdqfg ddrfg");
3704     delete t;
3705
3706     // Overlapped string test.  Make sure that when multiple
3707     // strings can match that the longest one is matched.
3708     rule =
3709         "    [a {ab} {abc}]    > x;"
3710         "           b          > y;"
3711         "           c          > z;"
3712         " q [t {st} {rst}] { e > p;" ;
3713
3714     t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
3715     if (t == NULL || U_FAILURE(ec)) {
3716         delete t;
3717         errln("FAIL: createFromRules failed");
3718         return;
3719     }
3720
3721     expect(*t, "a ab abc qte qste qrste",
3722            "x x x qtp qstp qrstp");
3723     delete t;
3724 }
3725
3726 // vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
3727 // BEGIN TestUserFunction support factory
3728
3729 Transliterator* _TUFF[4];
3730 UnicodeString* _TUFID[4];
3731
3732 static Transliterator* U_EXPORT2 _TUFFactory(const UnicodeString& /*ID*/,
3733                                    Transliterator::Token context) {
3734     return _TUFF[context.integer]->clone();
3735 }
3736
3737 static void _TUFReg(const UnicodeString& ID, Transliterator* t, int32_t n) {
3738     _TUFF[n] = t;
3739     _TUFID[n] = new UnicodeString(ID);
3740     Transliterator::registerFactory(ID, _TUFFactory, Transliterator::integerToken(n));
3741 }
3742
3743 static void _TUFUnreg(int32_t n) {
3744     if (_TUFF[n] != NULL) {
3745         Transliterator::unregister(*_TUFID[n]);
3746         delete _TUFF[n];
3747         delete _TUFID[n];
3748     }
3749 }
3750
3751 // END TestUserFunction support factory
3752 // ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
3753
3754 /**
3755  * Test that user-registered transliterators can be used under function
3756  * syntax.
3757  */
3758 void TransliteratorTest::TestUserFunction() {
3759
3760     Transliterator* t;
3761     UParseError pe;
3762     UErrorCode ec = U_ZERO_ERROR;
3763
3764     // Setup our factory
3765     int32_t i;
3766     for (i=0; i<4; ++i) {
3767         _TUFF[i] = NULL;
3768     }
3769
3770     // There's no need to register inverses if we don't use them
3771     t = Transliterator::createFromRules("gif",
3772                                         UNICODE_STRING_SIMPLE("'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';"),
3773                                         UTRANS_FORWARD, pe, ec);
3774     if (t == NULL || U_FAILURE(ec)) {
3775         dataerrln((UnicodeString)"FAIL: createFromRules gif " + u_errorName(ec));
3776         return;
3777     }
3778     _TUFReg("Any-gif", t, 0);
3779
3780     t = Transliterator::createFromRules("RemoveCurly",
3781                                         UNICODE_STRING_SIMPLE("[\\{\\}] > ; '\\N' > ;"),
3782                                         UTRANS_FORWARD, pe, ec);
3783     if (t == NULL || U_FAILURE(ec)) {
3784         errln((UnicodeString)"FAIL: createFromRules RemoveCurly " + u_errorName(ec));
3785         goto FAIL;
3786     }
3787     expect(*t, UNICODE_STRING_SIMPLE("\\N{name}"), "name");
3788     _TUFReg("Any-RemoveCurly", t, 1);
3789
3790     logln("Trying &hex");
3791     t = Transliterator::createFromRules("hex2",
3792                                         "(.) > &hex($1);",
3793                                         UTRANS_FORWARD, pe, ec);
3794     if (t == NULL || U_FAILURE(ec)) {
3795         errln("FAIL: createFromRules");
3796         goto FAIL;
3797     }
3798     logln("Registering");
3799     _TUFReg("Any-hex2", t, 2);
3800     t = Transliterator::createInstance("Any-hex2", UTRANS_FORWARD, ec);
3801     if (t == NULL || U_FAILURE(ec)) {
3802         errln((UnicodeString)"FAIL: createInstance Any-hex2 " + u_errorName(ec));
3803         goto FAIL;
3804     }
3805     expect(*t, "abc", UNICODE_STRING_SIMPLE("\\u0061\\u0062\\u0063"));
3806     delete t;
3807
3808     logln("Trying &gif");
3809     t = Transliterator::createFromRules("gif2",
3810                                         "(.) > &Gif(&Hex2($1));",
3811                                         UTRANS_FORWARD, pe, ec);
3812     if (t == NULL || U_FAILURE(ec)) {
3813         errln((UnicodeString)"FAIL: createFromRules gif2 " + u_errorName(ec));
3814         goto FAIL;
3815     }
3816     logln("Registering");
3817     _TUFReg("Any-gif2", t, 3);
3818     t = Transliterator::createInstance("Any-gif2", UTRANS_FORWARD, ec);
3819     if (t == NULL || U_FAILURE(ec)) {
3820         errln((UnicodeString)"FAIL: createInstance Any-gif2 " + u_errorName(ec));
3821         goto FAIL;
3822     }
3823     expect(*t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">"
3824            "<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">");
3825     delete t;
3826
3827     // Test that filters are allowed after &
3828     t = Transliterator::createFromRules("test",
3829                                         "(.) > &Hex($1) ' ' &RemoveCurly(&Name($1)) ' ';",
3830                                         UTRANS_FORWARD, pe, ec);
3831     if (t == NULL || U_FAILURE(ec)) {
3832         errln((UnicodeString)"FAIL: createFromRules test " + u_errorName(ec));
3833         goto FAIL;
3834     }
3835     expect(*t, "abc",
3836            UNICODE_STRING_SIMPLE("\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C "));
3837     delete t;
3838
3839  FAIL:
3840     for (i=0; i<4; ++i) {
3841         _TUFUnreg(i);
3842     }
3843 }
3844
3845 /**
3846  * Test the Any-X transliterators.
3847  */
3848 void TransliteratorTest::TestAnyX(void) {
3849     UParseError parseError;
3850     UErrorCode status = U_ZERO_ERROR;
3851     Transliterator* anyLatin =
3852         Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
3853     if (anyLatin==0) {
3854         dataerrln("FAIL: createInstance returned NULL - %s", u_errorName(status));
3855         delete anyLatin;
3856         return;
3857     }
3858
3859     expect(*anyLatin,
3860            CharsToUnicodeString("greek:\\u03B1\\u03B2\\u03BA\\u0391\\u0392\\u039A hiragana:\\u3042\\u3076\\u304F cyrillic:\\u0430\\u0431\\u0446"),
3861            CharsToUnicodeString("greek:abkABK hiragana:abuku cyrillic:abc"));
3862
3863     delete anyLatin;
3864 }
3865
3866 /**
3867  * Test Any-X transliterators with sample letters from all scripts.
3868  */
3869 void TransliteratorTest::TestAny(void) {
3870     UErrorCode status = U_ZERO_ERROR;
3871     // Note: there is a lot of implict construction of UnicodeStrings from (char *) in
3872     //       function call parameters going on in this test.
3873     UnicodeSet alphabetic("[:alphabetic:]", status);
3874     if (U_FAILURE(status)) {
3875         dataerrln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3876         return;
3877     }
3878     alphabetic.freeze();
3879
3880     UnicodeString testString;
3881     for (int32_t i = 0; i < USCRIPT_CODE_LIMIT; i++) {
3882         const char *scriptName = uscript_getShortName((UScriptCode)i);
3883         if (scriptName == NULL) {
3884             errln("Failure: file %s, line %d: Script Code %d is invalid, ", __FILE__, __LINE__, i);
3885             return;
3886         }
3887
3888         UnicodeSet sample;
3889         sample.applyPropertyAlias("script", scriptName, status);
3890         if (U_FAILURE(status)) {
3891             errln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3892             return;
3893         }
3894         sample.retainAll(alphabetic);
3895         for (int32_t count=0; count<5; count++) {
3896             UChar32 c = sample.charAt(count);
3897             if (c == -1) {
3898                 break;
3899             }
3900             testString.append(c);
3901         }
3902     }
3903
3904     UParseError parseError;
3905     Transliterator* anyLatin =
3906         Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
3907     if (U_FAILURE(status)) {
3908         errln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
3909         return;
3910     }
3911
3912     logln(UnicodeString("Sample set for Any-Latin: ") + testString);
3913     anyLatin->transliterate(testString);
3914     logln(UnicodeString("Sample result for Any-Latin: ") + testString);
3915     delete anyLatin;
3916 }
3917
3918
3919 /**
3920  * Test the source and target set API.  These are only implemented
3921  * for RBT and CompoundTransliterator at this time.
3922  */
3923 void TransliteratorTest::TestSourceTargetSet() {
3924     UErrorCode ec = U_ZERO_ERROR;
3925
3926     // Rules
3927     const char* r =
3928         "a > b; "
3929         "r [x{lu}] > q;";
3930
3931     // Expected source
3932     UnicodeSet expSrc("[arx{lu}]", ec);
3933
3934     // Expected target
3935     UnicodeSet expTrg("[bq]", ec);
3936
3937     UParseError pe;
3938     Transliterator* t = Transliterator::createFromRules("test", r, UTRANS_FORWARD, pe, ec);
3939
3940     if (U_FAILURE(ec)) {
3941         delete t;
3942         errln("FAIL: Couldn't set up test");
3943         return;
3944     }
3945
3946     UnicodeSet src; t->getSourceSet(src);
3947     UnicodeSet trg; t->getTargetSet(trg);
3948
3949     if (src == expSrc && trg == expTrg) {
3950         UnicodeString a, b;
3951         logln((UnicodeString)"Ok: " +
3952               r + " => source = " + src.toPattern(a, TRUE) +
3953               ", target = " + trg.toPattern(b, TRUE));
3954     } else {
3955         UnicodeString a, b, c, d;
3956         errln((UnicodeString)"FAIL: " +
3957               r + " => source = " + src.toPattern(a, TRUE) +
3958               ", expected " + expSrc.toPattern(b, TRUE) +
3959               "; target = " + trg.toPattern(c, TRUE) +
3960               ", expected " + expTrg.toPattern(d, TRUE));
3961     }
3962
3963     delete t;
3964 }
3965
3966 /**
3967  * Test handling of rule whitespace, for both RBT and UnicodeSet.
3968  */
3969 void TransliteratorTest::TestRuleWhitespace() {
3970     // Rules
3971     const char* r = "a > \\u200E b;";
3972
3973     UErrorCode ec = U_ZERO_ERROR;
3974     UParseError pe;
3975     Transliterator* t = Transliterator::createFromRules("test", CharsToUnicodeString(r), UTRANS_FORWARD, pe, ec);
3976
3977     if (U_FAILURE(ec)) {
3978         errln("FAIL: Couldn't set up test");
3979     } else {
3980         expect(*t, "a", "b");
3981     }
3982     delete t;
3983
3984     // UnicodeSet
3985     ec = U_ZERO_ERROR;
3986     UnicodeSet set(CharsToUnicodeString("[a \\u200E]"), ec);
3987
3988     if (U_FAILURE(ec)) {
3989         errln("FAIL: Couldn't set up test");
3990     } else {
3991         if (set.contains(0x200E)) {
3992             errln("FAIL: U+200E not being ignored by UnicodeSet");
3993         }
3994     }
3995 }
3996 //======================================================================
3997 // this method is in TestUScript.java
3998 //======================================================================
3999 void TransliteratorTest::TestAllCodepoints(){
4000     UScriptCode code= USCRIPT_INVALID_CODE;
4001     char id[256]={'\0'};
4002     char abbr[256]={'\0'};
4003     char newId[256]={'\0'};
4004     char newAbbrId[256]={'\0'};
4005     char oldId[256]={'\0'};
4006     char oldAbbrId[256]={'\0'};
4007
4008     UErrorCode status =U_ZERO_ERROR;
4009     UParseError pe;
4010
4011     for(uint32_t i = 0; i<=0x10ffff; i++){
4012         code =  uscript_getScript(i,&status);
4013         if(code == USCRIPT_INVALID_CODE){
4014             errln("uscript_getScript for codepoint \\U%08X failed.\n", i);
4015         }
4016         const char* myId = uscript_getName(code);
4017         if(!myId) {
4018           dataerrln("Valid script code returned NULL name. Check your data!");
4019           return;
4020         }
4021         uprv_strcpy(id,myId);
4022         uprv_strcpy(abbr,uscript_getShortName(code));
4023
4024         uprv_strcpy(newId,"[:");
4025         uprv_strcat(newId,id);
4026         uprv_strcat(newId,":];NFD");
4027
4028         uprv_strcpy(newAbbrId,"[:");
4029         uprv_strcat(newAbbrId,abbr);
4030         uprv_strcat(newAbbrId,":];NFD");
4031
4032         if(uprv_strcmp(newId,oldId)!=0){
4033             Transliterator* t = Transliterator::createInstance(newId,UTRANS_FORWARD,pe,status);
4034             if(t==NULL || U_FAILURE(status)){
4035                 errln((UnicodeString)"FAIL: Could not create " + id);
4036             }
4037             delete t;
4038         }
4039         if(uprv_strcmp(newAbbrId,oldAbbrId)!=0){
4040             Transliterator* t = Transliterator::createInstance(newAbbrId,UTRANS_FORWARD,pe,status);
4041             if(t==NULL || U_FAILURE(status)){
4042                 errln((UnicodeString)"FAIL: Could not create " + id);
4043             }
4044             delete t;
4045         }
4046         uprv_strcpy(oldId,newId);
4047         uprv_strcpy(oldAbbrId, newAbbrId);
4048
4049     }
4050
4051 }
4052
4053 #define TEST_TRANSLIT_ID(id, cls) { \
4054   UErrorCode ec = U_ZERO_ERROR; \
4055   Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); \
4056   if (U_FAILURE(ec)) { \
4057     dataerrln("FAIL: Couldn't create %s - %s", id, u_errorName(ec)); \
4058   } else { \
4059     if (t->getDynamicClassID() != cls::getStaticClassID()) { \
4060       errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
4061     } \
4062     /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
4063   } \
4064   delete t; \
4065 }
4066
4067 #define TEST_TRANSLIT_RULE(rule, cls) { \
4068   UErrorCode ec = U_ZERO_ERROR; \
4069   UParseError pe; \
4070   Transliterator* t = Transliterator::createFromRules("_", rule, UTRANS_FORWARD, pe, ec); \
4071   if (U_FAILURE(ec)) { \
4072     errln("FAIL: Couldn't create " rule); \
4073   } else { \
4074     if (t->getDynamicClassID() != cls ::getStaticClassID()) { \
4075       errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
4076     } \
4077     /* *t = *t; */ /*can't do this: coverage test for assignment op*/ \
4078   } \
4079   delete t; \
4080 }
4081
4082 void TransliteratorTest::TestBoilerplate() {
4083     TEST_TRANSLIT_ID("Any-Latin", AnyTransliterator);
4084     TEST_TRANSLIT_ID("Any-Hex", EscapeTransliterator);
4085     TEST_TRANSLIT_ID("Hex-Any", UnescapeTransliterator);
4086     TEST_TRANSLIT_ID("Lower", LowercaseTransliterator);
4087     TEST_TRANSLIT_ID("Upper", UppercaseTransliterator);
4088     TEST_TRANSLIT_ID("Title", TitlecaseTransliterator);
4089     TEST_TRANSLIT_ID("Null", NullTransliterator);
4090     TEST_TRANSLIT_ID("Remove", RemoveTransliterator);
4091     TEST_TRANSLIT_ID("Any-Name", UnicodeNameTransliterator);
4092     TEST_TRANSLIT_ID("Name-Any", NameUnicodeTransliterator);
4093     TEST_TRANSLIT_ID("NFD", NormalizationTransliterator);
4094     TEST_TRANSLIT_ID("Latin-Greek", CompoundTransliterator);
4095     TEST_TRANSLIT_RULE("a>b;", RuleBasedTransliterator);
4096 }
4097
4098 void TransliteratorTest::TestAlternateSyntax() {
4099     // U+2206 == &
4100     // U+2190 == <
4101     // U+2192 == >
4102     // U+2194 == <>
4103     expect(CharsToUnicodeString("a \\u2192 x; b \\u2190 y; c \\u2194 z"),
4104            "abc",
4105            "xbz");
4106     expect(CharsToUnicodeString("([:^ASCII:]) \\u2192 \\u2206Name($1);"),
4107            CharsToUnicodeString("<=\\u2190; >=\\u2192; <>=\\u2194; &=\\u2206"),
4108            UNICODE_STRING_SIMPLE("<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}"));
4109 }
4110
4111 static const char* BEGIN_END_RULES[] = {
4112     // [0]
4113     "abc > xy;"
4114     "aba > z;",
4115
4116     // [1]
4117 /*
4118     "::BEGIN;"
4119     "abc > xy;"
4120     "::END;"
4121     "::BEGIN;"
4122     "aba > z;"
4123     "::END;",
4124 */
4125     "", // test case commented out below, this is here to keep from messing up the indexes
4126
4127     // [2]
4128 /*
4129     "abc > xy;"
4130     "::BEGIN;"
4131     "aba > z;"
4132     "::END;",
4133 */
4134     "", // test case commented out below, this is here to keep from messing up the indexes
4135
4136     // [3]
4137 /*
4138     "::BEGIN;"
4139     "abc > xy;"
4140     "::END;"
4141     "aba > z;",
4142 */
4143     "", // test case commented out below, this is here to keep from messing up the indexes
4144
4145     // [4]
4146     "abc > xy;"
4147     "::Null;"
4148     "aba > z;",
4149
4150     // [5]
4151     "::Upper;"
4152     "ABC > xy;"
4153     "AB > x;"
4154     "C > z;"
4155     "::Upper;"
4156     "XYZ > p;"
4157     "XY > q;"
4158     "Z > r;"
4159     "::Upper;",
4160
4161     // [6]
4162     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4163     "$delim = [\\-$ws];"
4164     "$ws $delim* > ' ';"
4165     "'-' $delim* > '-';",
4166
4167     // [7]
4168     "::Null;"
4169     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4170     "$delim = [\\-$ws];"
4171     "$ws $delim* > ' ';"
4172     "'-' $delim* > '-';",
4173
4174     // [8]
4175     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4176     "$delim = [\\-$ws];"
4177     "$ws $delim* > ' ';"
4178     "'-' $delim* > '-';"
4179     "::Null;",
4180
4181     // [9]
4182     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4183     "$delim = [\\-$ws];"
4184     "::Null;"
4185     "$ws $delim* > ' ';"
4186     "'-' $delim* > '-';",
4187
4188     // [10]
4189 /*
4190     "::BEGIN;"
4191     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4192     "$delim = [\\-$ws];"
4193     "::END;"
4194     "$ws $delim* > ' ';"
4195     "'-' $delim* > '-';",
4196 */
4197     "", // test case commented out below, this is here to keep from messing up the indexes
4198
4199     // [11]
4200 /*
4201     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4202     "$delim = [\\-$ws];"
4203     "::BEGIN;"
4204     "$ws $delim* > ' ';"
4205     "'-' $delim* > '-';"
4206     "::END;",
4207 */
4208     "", // test case commented out below, this is here to keep from messing up the indexes
4209
4210     // [12]
4211 /*
4212     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4213     "$delim = [\\-$ws];"
4214     "$ab = [ab];"
4215     "::BEGIN;"
4216     "$ws $delim* > ' ';"
4217     "'-' $delim* > '-';"
4218     "::END;"
4219     "::BEGIN;"
4220     "$ab { ' ' } $ab > '-';"
4221     "c { ' ' > ;"
4222     "::END;"
4223     "::BEGIN;"
4224     "'a-a' > a\\%|a;"
4225     "::END;",
4226 */
4227     "", // test case commented out below, this is here to keep from messing up the indexes
4228
4229     // [13]
4230     "$ws = [[:Separator:][\\u0009-\\u000C]$];"
4231     "$delim = [\\-$ws];"
4232     "$ab = [ab];"
4233     "::Null;"
4234     "$ws $delim* > ' ';"
4235     "'-' $delim* > '-';"
4236     "::Null;"
4237     "$ab { ' ' } $ab > '-';"
4238     "c { ' ' > ;"
4239     "::Null;"
4240     "'a-a' > a\\%|a;",
4241
4242     // [14]
4243 /*
4244     "::[abc];"
4245     "::BEGIN;"
4246     "abc > xy;"
4247     "::END;"
4248     "::BEGIN;"
4249     "aba > yz;"
4250     "::END;"
4251     "::Upper;",
4252 */
4253     "", // test case commented out below, this is here to keep from messing up the indexes
4254
4255     // [15]
4256     "::[abc];"
4257     "abc > xy;"
4258     "::Null;"
4259     "aba > yz;"
4260     "::Upper;",
4261
4262     // [16]
4263 /*
4264     "::[abc];"
4265     "::BEGIN;"
4266     "abc <> xy;"
4267     "::END;"
4268     "::BEGIN;"
4269     "aba <> yz;"
4270     "::END;"
4271     "::Upper(Lower);"
4272     "::([XYZ]);"
4273 */
4274     "", // test case commented out below, this is here to keep from messing up the indexes
4275
4276     // [17]
4277     "::[abc];"
4278     "abc <> xy;"
4279     "::Null;"
4280     "aba <> yz;"
4281     "::Upper(Lower);"
4282     "::([XYZ]);"
4283 };
4284 static const int32_t BEGIN_END_RULES_length = (int32_t)(sizeof(BEGIN_END_RULES) / sizeof(BEGIN_END_RULES[0]));
4285
4286 /*
4287 (This entire test is commented out below and will need some heavy revision when we re-add
4288 the ::BEGIN/::END stuff)
4289 static const char* BOGUS_BEGIN_END_RULES[] = {
4290     // [7]
4291     "::BEGIN;"
4292     "abc > xy;"
4293     "::BEGIN;"
4294     "aba > z;"
4295     "::END;"
4296     "::END;",
4297
4298     // [8]
4299     "abc > xy;"
4300     " aba > z;"
4301     "::END;",
4302
4303     // [9]
4304     "::BEGIN;"
4305     "::Upper;"
4306     "::END;"
4307 };
4308 static const int32_t BOGUS_BEGIN_END_RULES_length = (int32_t)(sizeof(BOGUS_BEGIN_END_RULES) / sizeof(BOGUS_BEGIN_END_RULES[0]));
4309 */
4310
4311 static const char* BEGIN_END_TEST_CASES[] = {
4312     // rules             input                   expected output
4313     BEGIN_END_RULES[0],  "abc ababc aba",        "xy zbc z",
4314 //    BEGIN_END_RULES[1],  "abc ababc aba",        "xy abxy z",
4315 //    BEGIN_END_RULES[2],  "abc ababc aba",        "xy abxy z",
4316 //    BEGIN_END_RULES[3],  "abc ababc aba",        "xy abxy z",
4317     BEGIN_END_RULES[4],  "abc ababc aba",        "xy abxy z",
4318     BEGIN_END_RULES[5],  "abccabaacababcbc",     "PXAARXQBR",
4319
4320     BEGIN_END_RULES[6],  "e   e - e---e-  e",    "e e e-e-e",
4321     BEGIN_END_RULES[7],  "e   e - e---e-  e",    "e e e-e-e",
4322     BEGIN_END_RULES[8],  "e   e - e---e-  e",    "e e e-e-e",
4323     BEGIN_END_RULES[9],  "e   e - e---e-  e",    "e e e-e-e",
4324 //    BEGIN_END_RULES[10],  "e   e - e---e-  e",    "e e e-e-e",
4325 //    BEGIN_END_RULES[11], "e   e - e---e-  e",    "e e e-e-e",
4326 //    BEGIN_END_RULES[12], "e   e - e---e-  e",    "e e e-e-e",
4327 //    BEGIN_END_RULES[12], "a    a    a    a",     "a%a%a%a",
4328 //    BEGIN_END_RULES[12], "a a-b c b a",          "a%a-b cb-a",
4329     BEGIN_END_RULES[13], "e   e - e---e-  e",    "e e e-e-e",
4330     BEGIN_END_RULES[13], "a    a    a    a",     "a%a%a%a",
4331     BEGIN_END_RULES[13], "a a-b c b a",          "a%a-b cb-a",
4332
4333 //    BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4334     BEGIN_END_RULES[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4335 //    BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
4336     BEGIN_END_RULES[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ"
4337 };
4338 static const int32_t BEGIN_END_TEST_CASES_length = (int32_t)(sizeof(BEGIN_END_TEST_CASES) / sizeof(BEGIN_END_TEST_CASES[0]));
4339
4340 void TransliteratorTest::TestBeginEnd() {
4341     // run through the list of test cases above
4342     int32_t i = 0;
4343     for (i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
4344         expect((UnicodeString)"Test case #" + (i / 3),
4345                UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
4346                UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
4347                UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
4348     }
4349
4350     // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing
4351     UParseError parseError;
4352     UErrorCode status = U_ZERO_ERROR;
4353     Transliterator* reversed  = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
4354             UTRANS_REVERSE, parseError, status);
4355     if (reversed == 0 || U_FAILURE(status)) {
4356         reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
4357     } else {
4358         expect(*reversed, UnicodeString("xy XY XYZ yz YZ"), UnicodeString("xy abc xaba yz aba"));
4359     }
4360     delete reversed;
4361
4362     // finally, run through the list of syntactically-ill-formed rule sets above and make sure
4363     // that all of them cause errors
4364 /*
4365 (commented out until we have the real ::BEGIN/::END stuff in place
4366     for (i = 0; i < BOGUS_BEGIN_END_RULES_length; i++) {
4367         UParseError parseError;
4368         UErrorCode status = U_ZERO_ERROR;
4369         Transliterator* t = Transliterator::createFromRules("foo", UnicodeString(BOGUS_BEGIN_END_RULES[i]),
4370                 UTRANS_FORWARD, parseError, status);
4371         if (!U_FAILURE(status)) {
4372             delete t;
4373             errln((UnicodeString)"Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]);
4374         }
4375     }
4376 */
4377 }
4378
4379 void TransliteratorTest::TestBeginEndToRules() {
4380     // run through the same list of test cases we used above, but this time, instead of just
4381     // instantiating a Transliterator from the rules and running the test against it, we instantiate
4382     // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from
4383     // the resulting set of rules, and make sure that the generated rule set is semantically equivalent
4384     // to (i.e., does the same thing as) the original rule set
4385     for (int32_t i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
4386         UParseError parseError;
4387         UErrorCode status = U_ZERO_ERROR;
4388         Transliterator* t = Transliterator::createFromRules("--", UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
4389                 UTRANS_FORWARD, parseError, status);
4390         if (U_FAILURE(status)) {
4391             reportParseError(UnicodeString("FAIL: Couldn't create transliterator"), parseError, status);
4392         } else {
4393             UnicodeString rules;
4394             t->toRules(rules, TRUE);
4395             Transliterator* t2 = Transliterator::createFromRules((UnicodeString)"Test case #" + (i / 3), rules,
4396                     UTRANS_FORWARD, parseError, status);
4397             if (U_FAILURE(status)) {
4398                 reportParseError(UnicodeString("FAIL: Couldn't create transliterator from generated rules"),
4399                         parseError, status);
4400                 delete t;
4401             } else {
4402                 expect(*t2,
4403                        UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
4404                        UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
4405                 delete t;
4406                 delete t2;
4407             }
4408         }
4409     }
4410
4411     // do the same thing for the reversible test case
4412     UParseError parseError;
4413     UErrorCode status = U_ZERO_ERROR;
4414     Transliterator* reversed = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
4415             UTRANS_REVERSE, parseError, status);
4416     if (U_FAILURE(status)) {
4417         reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
4418     } else {
4419         UnicodeString rules;
4420         reversed->toRules(rules, FALSE);
4421         Transliterator* reversed2 = Transliterator::createFromRules("Reversed", rules, UTRANS_FORWARD,
4422                 parseError, status);
4423         if (U_FAILURE(status)) {
4424             reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator from generated rules"),
4425                     parseError, status);
4426             delete reversed;
4427         } else {
4428             expect(*reversed2,
4429                    UnicodeString("xy XY XYZ yz YZ"),
4430                    UnicodeString("xy abc xaba yz aba"));
4431             delete reversed;
4432             delete reversed2;
4433         }
4434     }
4435 }
4436
4437 void TransliteratorTest::TestRegisterAlias() {
4438     UnicodeString longID("Lower;[aeiou]Upper");
4439     UnicodeString shortID("Any-CapVowels");
4440     UnicodeString reallyShortID("CapVowels");
4441
4442     Transliterator::registerAlias(shortID, longID);
4443
4444     UErrorCode err = U_ZERO_ERROR;
4445     Transliterator* t1 = Transliterator::createInstance(longID, UTRANS_FORWARD, err);
4446     if (U_FAILURE(err)) {
4447         errln("Failed to instantiate transliterator with long ID");
4448         Transliterator::unregister(shortID);
4449         return;
4450     }
4451     Transliterator* t2 = Transliterator::createInstance(reallyShortID, UTRANS_FORWARD, err);
4452     if (U_FAILURE(err)) {
4453         errln("Failed to instantiate transliterator with short ID");
4454         delete t1;
4455         Transliterator::unregister(shortID);
4456         return;
4457     }
4458
4459     if (t1->getID() != longID)
4460         errln("Transliterator instantiated with long ID doesn't have long ID");
4461     if (t2->getID() != reallyShortID)
4462         errln("Transliterator instantiated with short ID doesn't have short ID");
4463
4464     UnicodeString rules1;
4465     UnicodeString rules2;
4466
4467     t1->toRules(rules1, TRUE);
4468     t2->toRules(rules2, TRUE);
4469     if (rules1 != rules2)
4470         errln("Alias transliterators aren't the same");
4471
4472     delete t1;
4473     delete t2;
4474     Transliterator::unregister(shortID);
4475
4476     t1 = Transliterator::createInstance(shortID, UTRANS_FORWARD, err);
4477     if (U_SUCCESS(err)) {
4478         errln("Instantiation with short ID succeeded after short ID was unregistered");
4479         delete t1;
4480     }
4481
4482     // try the same thing again, but this time with something other than
4483     // an instance of CompoundTransliterator
4484     UnicodeString realID("Latin-Greek");
4485     UnicodeString fakeID("Latin-dlgkjdflkjdl");
4486     Transliterator::registerAlias(fakeID, realID);
4487
4488     err = U_ZERO_ERROR;
4489     t1 = Transliterator::createInstance(realID, UTRANS_FORWARD, err);
4490     if (U_FAILURE(err)) {
4491         dataerrln("Failed to instantiate transliterator with real ID - %s", u_errorName(err));
4492         Transliterator::unregister(realID);
4493         return;
4494     }
4495     t2 = Transliterator::createInstance(fakeID, UTRANS_FORWARD, err);
4496     if (U_FAILURE(err)) {
4497         errln("Failed to instantiate transliterator with fake ID");
4498         delete t1;
4499         Transliterator::unregister(realID);
4500         return;
4501     }
4502
4503     t1->toRules(rules1, TRUE);
4504     t2->toRules(rules2, TRUE);
4505     if (rules1 != rules2)
4506         errln("Alias transliterators aren't the same");
4507
4508     delete t1;
4509     delete t2;
4510     Transliterator::unregister(fakeID);
4511 }
4512
4513 void TransliteratorTest::TestRuleStripping() {
4514     /*
4515 #
4516 \uE001>\u0C01; # SIGN
4517     */
4518     static const UChar rule[] = {
4519         0x0023,0x0020,0x000D,0x000A,
4520         0xE001,0x003E,0x0C01,0x003B,0x0020,0x0023,0x0020,0x0053,0x0049,0x0047,0x004E,0
4521     };
4522     static const UChar expectedRule[] = {
4523         0xE001,0x003E,0x0C01,0x003B,0
4524     };
4525     UChar result[sizeof(rule)/sizeof(rule[0])];
4526     UErrorCode status = U_ZERO_ERROR;
4527     int32_t len = utrans_stripRules(rule, (int32_t)(sizeof(rule)/sizeof(rule[0])), result, &status);
4528     if (len != u_strlen(expectedRule)) {
4529         errln("utrans_stripRules return len = %d", len);
4530     }
4531     if (u_strncmp(expectedRule, result, len) != 0) {
4532         errln("utrans_stripRules did not return expected string");
4533     }
4534 }
4535
4536 /**
4537  * Test the Halfwidth-Fullwidth transliterator (ticket 6281).
4538  */
4539 void TransliteratorTest::TestHalfwidthFullwidth(void) {
4540     UParseError parseError;
4541     UErrorCode status = U_ZERO_ERROR;
4542     Transliterator* hf = Transliterator::createInstance("Halfwidth-Fullwidth", UTRANS_FORWARD, parseError, status);
4543     Transliterator* fh = Transliterator::createInstance("Fullwidth-Halfwidth", UTRANS_FORWARD, parseError, status);
4544     if (hf == 0 || fh == 0) {
4545         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
4546         delete hf;
4547         delete fh;
4548         return;
4549     }
4550
4551     // Array of 2n items
4552     // Each item is
4553     //   "hf"|"fh"|"both",
4554     //   <Halfwidth>,
4555     //   <Fullwidth>
4556     const char* DATA[] = {
4557         "both",
4558         "\\uFFE9\\uFFEA\\uFFEB\\uFFEC\\u0061\\uFF71\\u00AF\\u0020",
4559         "\\u2190\\u2191\\u2192\\u2193\\uFF41\\u30A2\\uFFE3\\u3000",
4560     };
4561     int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
4562
4563     for (int32_t i=0; i<DATA_length; i+=3) {
4564         UnicodeString h = CharsToUnicodeString(DATA[i+1]);
4565         UnicodeString f = CharsToUnicodeString(DATA[i+2]);
4566         switch (*DATA[i]) {
4567         case 0x68: //'h': // Halfwidth-Fullwidth only
4568             expect(*hf, h, f);
4569             break;
4570         case 0x66: //'f': // Fullwidth-Halfwidth only
4571             expect(*fh, f, h);
4572             break;
4573         case 0x62: //'b': // both directions
4574             expect(*hf, h, f);
4575             expect(*fh, f, h);
4576             break;
4577         }
4578     }
4579     delete hf;
4580     delete fh;
4581 }
4582
4583
4584     /**
4585      *  Test Thai.  The text is the first paragraph of "What is Unicode" from the Unicode.org web site.
4586      *              TODO: confirm that the expected results are correct.
4587      *              For now, test just confirms that C++ and Java give identical results.
4588      */
4589 void TransliteratorTest::TestThai(void) {
4590 #if !UCONFIG_NO_BREAK_ITERATION
4591     UParseError parseError;
4592     UErrorCode status = U_ZERO_ERROR;
4593     Transliterator* tr = Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
4594     if (tr == 0) {
4595         dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
4596         return;
4597     }
4598     if (U_FAILURE(status)) {
4599         errln("FAIL: createInstance failed with %s", u_errorName(status));
4600         return;
4601     }
4602     const char *thaiText =
4603         "\\u0e42\\u0e14\\u0e22\\u0e1e\\u0e37\\u0e49\\u0e19\\u0e10\\u0e32\\u0e19\\u0e41\\u0e25\\u0e49\\u0e27, \\u0e04\\u0e2d"
4604         "\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d\\u0e23\\u0e4c\\u0e08\\u0e30\\u0e40\\u0e01\\u0e35\\u0e48\\u0e22"
4605         "\\u0e27\\u0e02\\u0e49\\u0e2d\\u0e07\\u0e01\\u0e31\\u0e1a\\u0e40\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e02\\u0e2d"
4606         "\\u0e07\\u0e15\\u0e31\\u0e27\\u0e40\\u0e25\\u0e02. \\u0e04\\u0e2d\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d"
4607         "\\u0e23\\u0e4c\\u0e08\\u0e31\\u0e14\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29"
4608         "\\u0e23\\u0e41\\u0e25\\u0e30\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30\\u0e2d\\u0e37\\u0e48\\u0e19\\u0e46 \\u0e42"
4609         "\\u0e14\\u0e22\\u0e01\\u0e32\\u0e23\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25"
4610         "\\u0e02\\u0e43\\u0e2b\\u0e49\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e41\\u0e15\\u0e48\\u0e25\\u0e30\\u0e15"
4611         "\\u0e31\\u0e27. \\u0e01\\u0e48\\u0e2d\\u0e19\\u0e2b\\u0e19\\u0e49\\u0e32\\u0e17\\u0e35\\u0e48\\u0e4a Unicode \\u0e08"
4612         "\\u0e30\\u0e16\\u0e39\\u0e01\\u0e2a\\u0e23\\u0e49\\u0e32\\u0e07\\u0e02\\u0e36\\u0e49\\u0e19, \\u0e44\\u0e14\\u0e49"
4613         "\\u0e21\\u0e35\\u0e23\\u0e30\\u0e1a\\u0e1a encoding \\u0e2d\\u0e22\\u0e39\\u0e48\\u0e2b\\u0e25\\u0e32\\u0e22\\u0e23"
4614         "\\u0e49\\u0e2d\\u0e22\\u0e23\\u0e30\\u0e1a\\u0e1a\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e01\\u0e32\\u0e23"
4615         "\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25\\u0e02\\u0e40\\u0e2b\\u0e25\\u0e48"
4616         "\\u0e32\\u0e19\\u0e35\\u0e49. \\u0e44\\u0e21\\u0e48\\u0e21\\u0e35 encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48"
4617         "\\u0e21\\u0e35\\u0e08\\u0e33\\u0e19\\u0e27\\u0e19\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30"
4618         "\\u0e21\\u0e32\\u0e01\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d: \\u0e22\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d"
4619         "\\u0e22\\u0e48\\u0e32\\u0e07\\u0e40\\u0e0a\\u0e48\\u0e19, \\u0e40\\u0e09\\u0e1e\\u0e32\\u0e30\\u0e43\\u0e19\\u0e01"
4620         "\\u0e25\\u0e38\\u0e48\\u0e21\\u0e2a\\u0e2b\\u0e20\\u0e32\\u0e1e\\u0e22\\u0e38\\u0e42\\u0e23\\u0e1b\\u0e40\\u0e1e"
4621         "\\u0e35\\u0e22\\u0e07\\u0e41\\u0e2b\\u0e48\\u0e07\\u0e40\\u0e14\\u0e35\\u0e22\\u0e27 \\u0e01\\u0e47\\u0e15\\u0e49"
4622         "\\u0e2d\\u0e07\\u0e01\\u0e32\\u0e23\\u0e2b\\u0e25\\u0e32\\u0e22 encoding \\u0e43\\u0e19\\u0e01\\u0e32\\u0e23\\u0e04"
4623         "\\u0e23\\u0e2d\\u0e1a\\u0e04\\u0e25\\u0e38\\u0e21\\u0e17\\u0e38\\u0e01\\u0e20\\u0e32\\u0e29\\u0e32\\u0e43\\u0e19"
4624         "\\u0e01\\u0e25\\u0e38\\u0e48\\u0e21. \\u0e2b\\u0e23\\u0e37\\u0e2d\\u0e41\\u0e21\\u0e49\\u0e41\\u0e15\\u0e48\\u0e43"
4625         "\\u0e19\\u0e20\\u0e32\\u0e29\\u0e32\\u0e40\\u0e14\\u0e35\\u0e48\\u0e22\\u0e27 \\u0e40\\u0e0a\\u0e48\\u0e19 \\u0e20"
4626         "\\u0e32\\u0e29\\u0e32\\u0e2d\\u0e31\\u0e07\\u0e01\\u0e24\\u0e29 \\u0e01\\u0e47\\u0e44\\u0e21\\u0e48\\u0e21\\u0e35"
4627         " encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d\\u0e2a\\u0e33\\u0e2b"
4628         "\\u0e23\\u0e31\\u0e1a\\u0e17\\u0e38\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29\\u0e23, \\u0e40\\u0e04"
4629         "\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e27\\u0e23\\u0e23\\u0e04\\u0e15\\u0e2d\\u0e19"
4630         " \\u0e41\\u0e25\\u0e30\\u0e2a\\u0e31\\u0e0d\\u0e25\\u0e31\\u0e01\\u0e29\\u0e13\\u0e4c\\u0e17\\u0e32\\u0e07\\u0e40"
4631         "\\u0e17\\u0e04\\u0e19\\u0e34\\u0e04\\u0e17\\u0e35\\u0e48\\u0e43\\u0e0a\\u0e49\\u0e01\\u0e31\\u0e19\\u0e2d\\u0e22"
4632         "\\u0e39\\u0e48\\u0e17\\u0e31\\u0e48\\u0e27\\u0e44\\u0e1b.";
4633
4634     const char *latinText =
4635         "doy ph\\u1ee5\\u0304\\u0302n \\u1e6d\\u0304h\\u0101n l\\u00e6\\u0302w, khxmphiwtexr\\u0312 ca ke\\u012b\\u0300"
4636         "ywk\\u0304\\u0125xng k\\u1ea1b re\\u1ee5\\u0304\\u0300xng k\\u0304hxng t\\u1ea1wlek\\u0304h. khxmphiwtexr"
4637         "\\u0312 c\\u1ea1d k\\u0115b t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r l\\u00e6a x\\u1ea1kk\\u0304h ra x\\u1ee5\\u0304"
4638         "\\u0300n\\u00ab doy k\\u0101r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304\\u0131\\u0302 s\\u0304"
4639         "\\u1ea3h\\u0304r\\u1ea1b t\\u00e6\\u0300la t\\u1ea1w. k\\u0300xn h\\u0304n\\u0302\\u0101 th\\u012b\\u0300\\u0301"
4640         " Unicode ca t\\u0304h\\u016bk s\\u0304r\\u0302\\u0101ng k\\u0304h\\u1ee5\\u0302n, d\\u1ecb\\u0302 m\\u012b "
4641         "rabb encoding xy\\u016b\\u0300 h\\u0304l\\u0101y r\\u0302xy rabb s\\u0304\\u1ea3h\\u0304r\\u1ea1b k\\u0101"
4642         "r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304el\\u0300\\u0101 n\\u012b\\u0302. m\\u1ecb\\u0300m"
4643         "\\u012b encoding d\\u0131 th\\u012b\\u0300 m\\u012b c\\u1ea3nwn t\\u1ea1w x\\u1ea1kk\\u0304hra m\\u0101k p"
4644         "he\\u012byng phx: yk t\\u1ea1wx\\u1ef3\\u0101ng ch\\u00e8n, c\\u0304heph\\u0101a n\\u0131 kl\\u00f9m s\\u0304"
4645         "h\\u0304p\\u0323h\\u0101ph yurop phe\\u012byng h\\u0304\\u00e6\\u0300ng de\\u012byw k\\u0306 t\\u0302xngk\\u0101"
4646         "r h\\u0304l\\u0101y encoding n\\u0131 k\\u0101r khrxbkhlum thuk p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 n\\u0131"
4647         " kl\\u00f9m. h\\u0304r\\u1ee5\\u0304x m\\u00e6\\u0302t\\u00e6\\u0300 n\\u0131 p\\u0323h\\u0101s\\u0304\\u02b9"
4648         "\\u0101 de\\u012b\\u0300yw ch\\u00e8n p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 x\\u1ea1ngkvs\\u0304\\u02b9 k\\u0306"
4649         " m\\u1ecb\\u0300m\\u012b encoding d\\u0131 th\\u012b\\u0300 phe\\u012byng phx s\\u0304\\u1ea3h\\u0304r\\u1ea1"
4650         "b thuk t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r, kher\\u1ee5\\u0304\\u0300xngh\\u0304m\\u0101y wrrkh txn l\\u00e6"
4651         "a s\\u0304\\u1ea1\\u1ef5l\\u1ea1ks\\u0304\\u02b9\\u1e47\\u0312 th\\u0101ng thekhnikh th\\u012b\\u0300 ch\\u0131"
4652         "\\u0302 k\\u1ea1n xy\\u016b\\u0300 th\\u1ea1\\u0300wp\\u1ecb.";
4653
4654
4655     UnicodeString  xlitText(thaiText);
4656     xlitText = xlitText.unescape();
4657     tr->transliterate(xlitText);
4658
4659     UnicodeString expectedText(latinText);
4660     expectedText = expectedText.unescape();
4661     expect(*tr, xlitText, expectedText);
4662
4663     delete tr;
4664 #endif
4665 }
4666
4667
4668 //======================================================================
4669 // Support methods
4670 //======================================================================
4671 void TransliteratorTest::expectT(const UnicodeString& id,
4672                                  const UnicodeString& source,
4673                                  const UnicodeString& expectedResult) {
4674     UErrorCode ec = U_ZERO_ERROR;
4675     UParseError pe;
4676     Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
4677     if (U_FAILURE(ec)) {
4678         errln((UnicodeString)"FAIL: Could not create " + id + " -  " + u_errorName(ec));
4679         delete t;
4680         return;
4681     }
4682     expect(*t, source, expectedResult);
4683     delete t;
4684 }
4685
4686 void TransliteratorTest::reportParseError(const UnicodeString& message,
4687                                           const UParseError& parseError,
4688                                           const UErrorCode& status) {
4689     dataerrln(message +
4690           /*", parse error " + parseError.code +*/
4691           ", line " + parseError.line +
4692           ", offset " + parseError.offset +
4693           ", pre-context " + prettify(parseError.preContext, TRUE) +
4694           ", post-context " + prettify(parseError.postContext,TRUE) +
4695           ", Error: " + u_errorName(status));
4696 }
4697
4698 void TransliteratorTest::expect(const UnicodeString& rules,
4699                                 const UnicodeString& source,
4700                                 const UnicodeString& expectedResult,
4701                                 UTransPosition *pos) {
4702     expect("<ID>", rules, source, expectedResult, pos);
4703 }
4704
4705 void TransliteratorTest::expect(const UnicodeString& id,
4706                                 const UnicodeString& rules,
4707                                 const UnicodeString& source,
4708                                 const UnicodeString& expectedResult,
4709                                 UTransPosition *pos) {
4710     UErrorCode status = U_ZERO_ERROR;
4711     UParseError parseError;
4712     Transliterator* t = Transliterator::createFromRules(id, rules, UTRANS_FORWARD, parseError, status);
4713     if (U_FAILURE(status)) {
4714         reportParseError(UnicodeString("Couldn't create transliterator from ") + rules, parseError, status);
4715     } else {
4716         expect(*t, source, expectedResult, pos);
4717     }
4718     delete t;
4719 }
4720
4721 void TransliteratorTest::expect(const Transliterator& t,
4722                                 const UnicodeString& source,
4723                                 const UnicodeString& expectedResult,
4724                                 const Transliterator& reverseTransliterator) {
4725     expect(t, source, expectedResult);
4726     expect(reverseTransliterator, expectedResult, source);
4727 }
4728
4729 void TransliteratorTest::expect(const Transliterator& t,
4730                                 const UnicodeString& source,
4731                                 const UnicodeString& expectedResult,
4732                                 UTransPosition *pos) {
4733     if (pos == 0) {
4734         UnicodeString result(source);
4735         t.transliterate(result);
4736         expectAux(t.getID() + ":String", source, result, expectedResult);
4737     }
4738     UTransPosition index={0, 0, 0, 0};
4739     if (pos != 0) {
4740         index = *pos;
4741     }
4742
4743     UnicodeString rsource(source);
4744     if (pos == 0) {
4745         t.transliterate(rsource);
4746     } else {
4747         // Do it all at once -- below we do it incrementally
4748         t.finishTransliteration(rsource, *pos);
4749     }
4750     expectAux(t.getID() + ":Replaceable", source, rsource, expectedResult);
4751
4752     // Test keyboard (incremental) transliteration -- this result
4753     // must be the same after we finalize (see below).
4754     UnicodeString log;
4755     rsource.remove();
4756     if (pos != 0) {
4757         rsource = source;
4758         formatInput(log, rsource, index);
4759         log.append(" -> ");
4760         UErrorCode status = U_ZERO_ERROR;
4761         t.transliterate(rsource, index, status);
4762         formatInput(log, rsource, index);
4763     } else {
4764         for (int32_t i=0; i<source.length(); ++i) {
4765             if (i != 0) {
4766                 log.append(" + ");
4767             }
4768             log.append(source.charAt(i)).append(" -> ");
4769             UErrorCode status = U_ZERO_ERROR;
4770             t.transliterate(rsource, index, source.charAt(i), status);
4771             formatInput(log, rsource, index);
4772         }
4773     }
4774
4775     // As a final step in keyboard transliteration, we must call
4776     // transliterate to finish off any pending partial matches that
4777     // were waiting for more input.
4778     t.finishTransliteration(rsource, index);
4779     log.append(" => ").append(rsource);
4780
4781     expectAux(t.getID() + ":Keyboard", log,
4782               rsource == expectedResult,
4783               expectedResult);
4784 }
4785
4786
4787 /**
4788  * @param appendTo result is appended to this param.
4789  * @param input the string being transliterated
4790  * @param pos the index struct
4791  */
4792 UnicodeString& TransliteratorTest::formatInput(UnicodeString &appendTo,
4793                                                const UnicodeString& input,
4794                                                const UTransPosition& pos) {
4795     // Output a string of the form aaa{bbb|ccc|ddd}eee, where
4796     // the {} indicate the context start and limit, and the ||
4797     // indicate the start and limit.
4798     if (0 <= pos.contextStart &&
4799         pos.contextStart <= pos.start &&
4800         pos.start <= pos.limit &&
4801         pos.limit <= pos.contextLimit &&
4802         pos.contextLimit <= input.length()) {
4803
4804         UnicodeString a, b, c, d, e;
4805         input.extractBetween(0, pos.contextStart, a);
4806         input.extractBetween(pos.contextStart, pos.start, b);
4807         input.extractBetween(pos.start, pos.limit, c);
4808         input.extractBetween(pos.limit, pos.contextLimit, d);
4809         input.extractBetween(pos.contextLimit, input.length(), e);
4810         appendTo.append(a).append((UChar)123/*{*/).append(b).
4811             append((UChar)PIPE).append(c).append((UChar)PIPE).append(d).
4812             append((UChar)125/*}*/).append(e);
4813     } else {
4814         appendTo.append((UnicodeString)"INVALID UTransPosition {cs=" +
4815                         pos.contextStart + ", s=" + pos.start + ", l=" +
4816                         pos.limit + ", cl=" + pos.contextLimit + "} on " +
4817                         input);
4818     }
4819     return appendTo;
4820 }
4821
4822 void TransliteratorTest::expectAux(const UnicodeString& tag,
4823                                    const UnicodeString& source,
4824                                    const UnicodeString& result,
4825                                    const UnicodeString& expectedResult) {
4826     expectAux(tag, source + " -> " + result,
4827               result == expectedResult,
4828               expectedResult);
4829 }
4830
4831 void TransliteratorTest::expectAux(const UnicodeString& tag,
4832                                    const UnicodeString& summary, UBool pass,
4833                                    const UnicodeString& expectedResult) {
4834     if (pass) {
4835         logln(UnicodeString("(")+tag+") " + prettify(summary));
4836     } else {
4837         dataerrln(UnicodeString("FAIL: (")+tag+") "
4838               + prettify(summary)
4839               + ", expected " + prettify(expectedResult));
4840     }
4841 }
4842
4843 #endif /* #if !UCONFIG_NO_TRANSLITERATION */