+ UNICODE_STRING_SIMPLE("<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}"));
+}
+
+static const char* BEGIN_END_RULES[] = {
+ // [0]
+ "abc > xy;"
+ "aba > z;",
+
+ // [1]
+/*
+ "::BEGIN;"
+ "abc > xy;"
+ "::END;"
+ "::BEGIN;"
+ "aba > z;"
+ "::END;",
+*/
+ "", // test case commented out below, this is here to keep from messing up the indexes
+
+ // [2]
+/*
+ "abc > xy;"
+ "::BEGIN;"
+ "aba > z;"
+ "::END;",
+*/
+ "", // test case commented out below, this is here to keep from messing up the indexes
+
+ // [3]
+/*
+ "::BEGIN;"
+ "abc > xy;"
+ "::END;"
+ "aba > z;",
+*/
+ "", // test case commented out below, this is here to keep from messing up the indexes
+
+ // [4]
+ "abc > xy;"
+ "::Null;"
+ "aba > z;",
+
+ // [5]
+ "::Upper;"
+ "ABC > xy;"
+ "AB > x;"
+ "C > z;"
+ "::Upper;"
+ "XYZ > p;"
+ "XY > q;"
+ "Z > r;"
+ "::Upper;",
+
+ // [6]
+ "$ws = [[:Separator:][\\u0009-\\u000C]$];"
+ "$delim = [\\-$ws];"
+ "$ws $delim* > ' ';"
+ "'-' $delim* > '-';",
+
+ // [7]
+ "::Null;"
+ "$ws = [[:Separator:][\\u0009-\\u000C]$];"
+ "$delim = [\\-$ws];"
+ "$ws $delim* > ' ';"
+ "'-' $delim* > '-';",
+
+ // [8]
+ "$ws = [[:Separator:][\\u0009-\\u000C]$];"
+ "$delim = [\\-$ws];"
+ "$ws $delim* > ' ';"
+ "'-' $delim* > '-';"
+ "::Null;",
+
+ // [9]
+ "$ws = [[:Separator:][\\u0009-\\u000C]$];"
+ "$delim = [\\-$ws];"
+ "::Null;"
+ "$ws $delim* > ' ';"
+ "'-' $delim* > '-';",
+
+ // [10]
+/*
+ "::BEGIN;"
+ "$ws = [[:Separator:][\\u0009-\\u000C]$];"
+ "$delim = [\\-$ws];"
+ "::END;"
+ "$ws $delim* > ' ';"
+ "'-' $delim* > '-';",
+*/
+ "", // test case commented out below, this is here to keep from messing up the indexes
+
+ // [11]
+/*
+ "$ws = [[:Separator:][\\u0009-\\u000C]$];"
+ "$delim = [\\-$ws];"
+ "::BEGIN;"
+ "$ws $delim* > ' ';"
+ "'-' $delim* > '-';"
+ "::END;",
+*/
+ "", // test case commented out below, this is here to keep from messing up the indexes
+
+ // [12]
+/*
+ "$ws = [[:Separator:][\\u0009-\\u000C]$];"
+ "$delim = [\\-$ws];"
+ "$ab = [ab];"
+ "::BEGIN;"
+ "$ws $delim* > ' ';"
+ "'-' $delim* > '-';"
+ "::END;"
+ "::BEGIN;"
+ "$ab { ' ' } $ab > '-';"
+ "c { ' ' > ;"
+ "::END;"
+ "::BEGIN;"
+ "'a-a' > a\\%|a;"
+ "::END;",
+*/
+ "", // test case commented out below, this is here to keep from messing up the indexes
+
+ // [13]
+ "$ws = [[:Separator:][\\u0009-\\u000C]$];"
+ "$delim = [\\-$ws];"
+ "$ab = [ab];"
+ "::Null;"
+ "$ws $delim* > ' ';"
+ "'-' $delim* > '-';"
+ "::Null;"
+ "$ab { ' ' } $ab > '-';"
+ "c { ' ' > ;"
+ "::Null;"
+ "'a-a' > a\\%|a;",
+
+ // [14]
+/*
+ "::[abc];"
+ "::BEGIN;"
+ "abc > xy;"
+ "::END;"
+ "::BEGIN;"
+ "aba > yz;"
+ "::END;"
+ "::Upper;",
+*/
+ "", // test case commented out below, this is here to keep from messing up the indexes
+
+ // [15]
+ "::[abc];"
+ "abc > xy;"
+ "::Null;"
+ "aba > yz;"
+ "::Upper;",
+
+ // [16]
+/*
+ "::[abc];"
+ "::BEGIN;"
+ "abc <> xy;"
+ "::END;"
+ "::BEGIN;"
+ "aba <> yz;"
+ "::END;"
+ "::Upper(Lower);"
+ "::([XYZ]);"
+*/
+ "", // test case commented out below, this is here to keep from messing up the indexes
+
+ // [17]
+ "::[abc];"
+ "abc <> xy;"
+ "::Null;"
+ "aba <> yz;"
+ "::Upper(Lower);"
+ "::([XYZ]);"
+};
+
+/*
+(This entire test is commented out below and will need some heavy revision when we re-add
+the ::BEGIN/::END stuff)
+static const char* BOGUS_BEGIN_END_RULES[] = {
+ // [7]
+ "::BEGIN;"
+ "abc > xy;"
+ "::BEGIN;"
+ "aba > z;"
+ "::END;"
+ "::END;",
+
+ // [8]
+ "abc > xy;"
+ " aba > z;"
+ "::END;",
+
+ // [9]
+ "::BEGIN;"
+ "::Upper;"
+ "::END;"
+};
+static const int32_t BOGUS_BEGIN_END_RULES_length = (int32_t)(sizeof(BOGUS_BEGIN_END_RULES) / sizeof(BOGUS_BEGIN_END_RULES[0]));
+*/
+
+static const char* BEGIN_END_TEST_CASES[] = {
+ // rules input expected output
+ BEGIN_END_RULES[0], "abc ababc aba", "xy zbc z",
+// BEGIN_END_RULES[1], "abc ababc aba", "xy abxy z",
+// BEGIN_END_RULES[2], "abc ababc aba", "xy abxy z",
+// BEGIN_END_RULES[3], "abc ababc aba", "xy abxy z",
+ BEGIN_END_RULES[4], "abc ababc aba", "xy abxy z",
+ BEGIN_END_RULES[5], "abccabaacababcbc", "PXAARXQBR",
+
+ BEGIN_END_RULES[6], "e e - e---e- e", "e e e-e-e",
+ BEGIN_END_RULES[7], "e e - e---e- e", "e e e-e-e",
+ BEGIN_END_RULES[8], "e e - e---e- e", "e e e-e-e",
+ BEGIN_END_RULES[9], "e e - e---e- e", "e e e-e-e",
+// BEGIN_END_RULES[10], "e e - e---e- e", "e e e-e-e",
+// BEGIN_END_RULES[11], "e e - e---e- e", "e e e-e-e",
+// BEGIN_END_RULES[12], "e e - e---e- e", "e e e-e-e",
+// BEGIN_END_RULES[12], "a a a a", "a%a%a%a",
+// BEGIN_END_RULES[12], "a a-b c b a", "a%a-b cb-a",
+ BEGIN_END_RULES[13], "e e - e---e- e", "e e e-e-e",
+ BEGIN_END_RULES[13], "a a a a", "a%a%a%a",
+ BEGIN_END_RULES[13], "a a-b c b a", "a%a-b cb-a",
+
+// BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
+ BEGIN_END_RULES[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
+// BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
+ BEGIN_END_RULES[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ"
+};
+static const int32_t BEGIN_END_TEST_CASES_length = (int32_t)(sizeof(BEGIN_END_TEST_CASES) / sizeof(BEGIN_END_TEST_CASES[0]));
+
+void TransliteratorTest::TestBeginEnd() {
+ // run through the list of test cases above
+ int32_t i = 0;
+ for (i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
+ expect((UnicodeString)"Test case #" + (i / 3),
+ UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
+ UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
+ UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
+ }
+
+ // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing
+ UParseError parseError;
+ UErrorCode status = U_ZERO_ERROR;
+ Transliterator* reversed = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
+ UTRANS_REVERSE, parseError, status);
+ if (reversed == 0 || U_FAILURE(status)) {
+ reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
+ } else {
+ expect(*reversed, UnicodeString("xy XY XYZ yz YZ"), UnicodeString("xy abc xaba yz aba"));
+ }
+ delete reversed;
+
+ // finally, run through the list of syntactically-ill-formed rule sets above and make sure
+ // that all of them cause errors
+/*
+(commented out until we have the real ::BEGIN/::END stuff in place
+ for (i = 0; i < BOGUS_BEGIN_END_RULES_length; i++) {
+ UParseError parseError;
+ UErrorCode status = U_ZERO_ERROR;
+ Transliterator* t = Transliterator::createFromRules("foo", UnicodeString(BOGUS_BEGIN_END_RULES[i]),
+ UTRANS_FORWARD, parseError, status);
+ if (!U_FAILURE(status)) {
+ delete t;
+ errln((UnicodeString)"Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]);
+ }
+ }
+*/
+}
+
+void TransliteratorTest::TestBeginEndToRules() {
+ // run through the same list of test cases we used above, but this time, instead of just
+ // instantiating a Transliterator from the rules and running the test against it, we instantiate
+ // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from
+ // the resulting set of rules, and make sure that the generated rule set is semantically equivalent
+ // to (i.e., does the same thing as) the original rule set
+ for (int32_t i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
+ UParseError parseError;
+ UErrorCode status = U_ZERO_ERROR;
+ Transliterator* t = Transliterator::createFromRules("--", UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
+ UTRANS_FORWARD, parseError, status);
+ if (U_FAILURE(status)) {
+ reportParseError(UnicodeString("FAIL: Couldn't create transliterator"), parseError, status);
+ } else {
+ UnicodeString rules;
+ t->toRules(rules, TRUE);
+ Transliterator* t2 = Transliterator::createFromRules((UnicodeString)"Test case #" + (i / 3), rules,
+ UTRANS_FORWARD, parseError, status);
+ if (U_FAILURE(status)) {
+ reportParseError(UnicodeString("FAIL: Couldn't create transliterator from generated rules"),
+ parseError, status);
+ delete t;
+ } else {
+ expect(*t2,
+ UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
+ UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
+ delete t;
+ delete t2;
+ }
+ }
+ }
+
+ // do the same thing for the reversible test case
+ UParseError parseError;
+ UErrorCode status = U_ZERO_ERROR;
+ Transliterator* reversed = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
+ UTRANS_REVERSE, parseError, status);
+ if (U_FAILURE(status)) {
+ reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
+ } else {
+ UnicodeString rules;
+ reversed->toRules(rules, FALSE);
+ Transliterator* reversed2 = Transliterator::createFromRules("Reversed", rules, UTRANS_FORWARD,
+ parseError, status);
+ if (U_FAILURE(status)) {
+ reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator from generated rules"),
+ parseError, status);
+ delete reversed;
+ } else {
+ expect(*reversed2,
+ UnicodeString("xy XY XYZ yz YZ"),
+ UnicodeString("xy abc xaba yz aba"));
+ delete reversed;
+ delete reversed2;
+ }
+ }
+}
+
+void TransliteratorTest::TestRegisterAlias() {
+ UnicodeString longID("Lower;[aeiou]Upper");
+ UnicodeString shortID("Any-CapVowels");
+ UnicodeString reallyShortID("CapVowels");
+
+ Transliterator::registerAlias(shortID, longID);
+
+ UErrorCode err = U_ZERO_ERROR;
+ Transliterator* t1 = Transliterator::createInstance(longID, UTRANS_FORWARD, err);
+ if (U_FAILURE(err)) {
+ errln("Failed to instantiate transliterator with long ID");
+ Transliterator::unregister(shortID);
+ return;
+ }
+ Transliterator* t2 = Transliterator::createInstance(reallyShortID, UTRANS_FORWARD, err);
+ if (U_FAILURE(err)) {
+ errln("Failed to instantiate transliterator with short ID");
+ delete t1;
+ Transliterator::unregister(shortID);
+ return;
+ }
+
+ if (t1->getID() != longID)
+ errln("Transliterator instantiated with long ID doesn't have long ID");
+ if (t2->getID() != reallyShortID)
+ errln("Transliterator instantiated with short ID doesn't have short ID");
+
+ UnicodeString rules1;
+ UnicodeString rules2;
+
+ t1->toRules(rules1, TRUE);
+ t2->toRules(rules2, TRUE);
+ if (rules1 != rules2)
+ errln("Alias transliterators aren't the same");
+
+ delete t1;
+ delete t2;
+ Transliterator::unregister(shortID);
+
+ t1 = Transliterator::createInstance(shortID, UTRANS_FORWARD, err);
+ if (U_SUCCESS(err)) {
+ errln("Instantiation with short ID succeeded after short ID was unregistered");
+ delete t1;
+ }
+
+ // try the same thing again, but this time with something other than
+ // an instance of CompoundTransliterator
+ UnicodeString realID("Latin-Greek");
+ UnicodeString fakeID("Latin-dlgkjdflkjdl");
+ Transliterator::registerAlias(fakeID, realID);
+
+ err = U_ZERO_ERROR;
+ t1 = Transliterator::createInstance(realID, UTRANS_FORWARD, err);
+ if (U_FAILURE(err)) {
+ dataerrln("Failed to instantiate transliterator with real ID - %s", u_errorName(err));
+ Transliterator::unregister(realID);
+ return;
+ }
+ t2 = Transliterator::createInstance(fakeID, UTRANS_FORWARD, err);
+ if (U_FAILURE(err)) {
+ errln("Failed to instantiate transliterator with fake ID");
+ delete t1;
+ Transliterator::unregister(realID);
+ return;
+ }
+
+ t1->toRules(rules1, TRUE);
+ t2->toRules(rules2, TRUE);
+ if (rules1 != rules2)
+ errln("Alias transliterators aren't the same");
+
+ delete t1;
+ delete t2;
+ Transliterator::unregister(fakeID);