+void TransliteratorTest::TestRegisterAlias() {
+ UnicodeString longID("Lower;[aeiou]Upper");
+ UnicodeString shortID("Any-CapVowels");
+ UnicodeString reallyShortID("CapVowels");
+
+ Transliterator::registerAlias(shortID, longID);
+
+ UErrorCode err = U_ZERO_ERROR;
+ Transliterator* t1 = Transliterator::createInstance(longID, UTRANS_FORWARD, err);
+ if (U_FAILURE(err)) {
+ errln("Failed to instantiate transliterator with long ID");
+ Transliterator::unregister(shortID);
+ return;
+ }
+ Transliterator* t2 = Transliterator::createInstance(reallyShortID, UTRANS_FORWARD, err);
+ if (U_FAILURE(err)) {
+ errln("Failed to instantiate transliterator with short ID");
+ delete t1;
+ Transliterator::unregister(shortID);
+ return;
+ }
+
+ if (t1->getID() != longID)
+ errln("Transliterator instantiated with long ID doesn't have long ID");
+ if (t2->getID() != reallyShortID)
+ errln("Transliterator instantiated with short ID doesn't have short ID");
+
+ UnicodeString rules1;
+ UnicodeString rules2;
+
+ t1->toRules(rules1, TRUE);
+ t2->toRules(rules2, TRUE);
+ if (rules1 != rules2)
+ errln("Alias transliterators aren't the same");
+
+ delete t1;
+ delete t2;
+ Transliterator::unregister(shortID);
+
+ t1 = Transliterator::createInstance(shortID, UTRANS_FORWARD, err);
+ if (U_SUCCESS(err)) {
+ errln("Instantiation with short ID succeeded after short ID was unregistered");
+ delete t1;
+ }
+
+ // try the same thing again, but this time with something other than
+ // an instance of CompoundTransliterator
+ UnicodeString realID("Latin-Greek");
+ UnicodeString fakeID("Latin-dlgkjdflkjdl");
+ Transliterator::registerAlias(fakeID, realID);
+
+ err = U_ZERO_ERROR;
+ t1 = Transliterator::createInstance(realID, UTRANS_FORWARD, err);
+ if (U_FAILURE(err)) {
+ dataerrln("Failed to instantiate transliterator with real ID - %s", u_errorName(err));
+ Transliterator::unregister(realID);
+ return;
+ }
+ t2 = Transliterator::createInstance(fakeID, UTRANS_FORWARD, err);
+ if (U_FAILURE(err)) {
+ errln("Failed to instantiate transliterator with fake ID");
+ delete t1;
+ Transliterator::unregister(realID);
+ return;
+ }
+
+ t1->toRules(rules1, TRUE);
+ t2->toRules(rules2, TRUE);
+ if (rules1 != rules2)
+ errln("Alias transliterators aren't the same");
+
+ delete t1;
+ delete t2;
+ Transliterator::unregister(fakeID);
+}
+
+void TransliteratorTest::TestRuleStripping() {
+ /*
+#
+\uE001>\u0C01; # SIGN
+ */
+ static const UChar rule[] = {
+ 0x0023,0x0020,0x000D,0x000A,
+ 0xE001,0x003E,0x0C01,0x003B,0x0020,0x0023,0x0020,0x0053,0x0049,0x0047,0x004E,0
+ };
+ static const UChar expectedRule[] = {
+ 0xE001,0x003E,0x0C01,0x003B,0
+ };
+ UChar result[sizeof(rule)/sizeof(rule[0])];
+ UErrorCode status = U_ZERO_ERROR;
+ int32_t len = utrans_stripRules(rule, (int32_t)(sizeof(rule)/sizeof(rule[0])), result, &status);
+ if (len != u_strlen(expectedRule)) {
+ errln("utrans_stripRules return len = %d", len);
+ }
+ if (u_strncmp(expectedRule, result, len) != 0) {
+ errln("utrans_stripRules did not return expected string");
+ }
+}
+
+/**
+ * Test the Halfwidth-Fullwidth transliterator (ticket 6281).
+ */
+void TransliteratorTest::TestHalfwidthFullwidth(void) {
+ UParseError parseError;
+ UErrorCode status = U_ZERO_ERROR;
+ Transliterator* hf = Transliterator::createInstance("Halfwidth-Fullwidth", UTRANS_FORWARD, parseError, status);
+ Transliterator* fh = Transliterator::createInstance("Fullwidth-Halfwidth", UTRANS_FORWARD, parseError, status);
+ if (hf == 0 || fh == 0) {
+ dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
+ delete hf;
+ delete fh;
+ return;
+ }
+
+ // Array of 2n items
+ // Each item is
+ // "hf"|"fh"|"both",
+ // <Halfwidth>,
+ // <Fullwidth>
+ const char* DATA[] = {
+ "both",
+ "\\uFFE9\\uFFEA\\uFFEB\\uFFEC\\u0061\\uFF71\\u00AF\\u0020",
+ "\\u2190\\u2191\\u2192\\u2193\\uFF41\\u30A2\\uFFE3\\u3000",
+ };
+ int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
+
+ for (int32_t i=0; i<DATA_length; i+=3) {
+ UnicodeString h = CharsToUnicodeString(DATA[i+1]);
+ UnicodeString f = CharsToUnicodeString(DATA[i+2]);
+ switch (*DATA[i]) {
+ case 0x68: //'h': // Halfwidth-Fullwidth only
+ expect(*hf, h, f);
+ break;
+ case 0x66: //'f': // Fullwidth-Halfwidth only
+ expect(*fh, f, h);
+ break;
+ case 0x62: //'b': // both directions
+ expect(*hf, h, f);
+ expect(*fh, f, h);
+ break;
+ }
+ }
+ delete hf;
+ delete fh;
+}
+
+
+ /**
+ * Test Thai. The text is the first paragraph of "What is Unicode" from the Unicode.org web site.
+ * TODO: confirm that the expected results are correct.
+ * For now, test just confirms that C++ and Java give identical results.
+ */
+void TransliteratorTest::TestThai(void) {
+#if !UCONFIG_NO_BREAK_ITERATION
+ UParseError parseError;
+ UErrorCode status = U_ZERO_ERROR;
+ Transliterator* tr = Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
+ if (tr == 0) {
+ dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
+ return;
+ }
+ if (U_FAILURE(status)) {
+ errln("FAIL: createInstance failed with %s", u_errorName(status));
+ return;
+ }
+ const char *thaiText =
+ "\\u0e42\\u0e14\\u0e22\\u0e1e\\u0e37\\u0e49\\u0e19\\u0e10\\u0e32\\u0e19\\u0e41\\u0e25\\u0e49\\u0e27, \\u0e04\\u0e2d"
+ "\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d\\u0e23\\u0e4c\\u0e08\\u0e30\\u0e40\\u0e01\\u0e35\\u0e48\\u0e22"
+ "\\u0e27\\u0e02\\u0e49\\u0e2d\\u0e07\\u0e01\\u0e31\\u0e1a\\u0e40\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e02\\u0e2d"
+ "\\u0e07\\u0e15\\u0e31\\u0e27\\u0e40\\u0e25\\u0e02. \\u0e04\\u0e2d\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d"
+ "\\u0e23\\u0e4c\\u0e08\\u0e31\\u0e14\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29"
+ "\\u0e23\\u0e41\\u0e25\\u0e30\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30\\u0e2d\\u0e37\\u0e48\\u0e19\\u0e46 \\u0e42"
+ "\\u0e14\\u0e22\\u0e01\\u0e32\\u0e23\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25"
+ "\\u0e02\\u0e43\\u0e2b\\u0e49\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e41\\u0e15\\u0e48\\u0e25\\u0e30\\u0e15"
+ "\\u0e31\\u0e27. \\u0e01\\u0e48\\u0e2d\\u0e19\\u0e2b\\u0e19\\u0e49\\u0e32\\u0e17\\u0e35\\u0e48\\u0e4a Unicode \\u0e08"
+ "\\u0e30\\u0e16\\u0e39\\u0e01\\u0e2a\\u0e23\\u0e49\\u0e32\\u0e07\\u0e02\\u0e36\\u0e49\\u0e19, \\u0e44\\u0e14\\u0e49"
+ "\\u0e21\\u0e35\\u0e23\\u0e30\\u0e1a\\u0e1a encoding \\u0e2d\\u0e22\\u0e39\\u0e48\\u0e2b\\u0e25\\u0e32\\u0e22\\u0e23"
+ "\\u0e49\\u0e2d\\u0e22\\u0e23\\u0e30\\u0e1a\\u0e1a\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e01\\u0e32\\u0e23"
+ "\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25\\u0e02\\u0e40\\u0e2b\\u0e25\\u0e48"
+ "\\u0e32\\u0e19\\u0e35\\u0e49. \\u0e44\\u0e21\\u0e48\\u0e21\\u0e35 encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48"
+ "\\u0e21\\u0e35\\u0e08\\u0e33\\u0e19\\u0e27\\u0e19\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30"
+ "\\u0e21\\u0e32\\u0e01\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d: \\u0e22\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d"
+ "\\u0e22\\u0e48\\u0e32\\u0e07\\u0e40\\u0e0a\\u0e48\\u0e19, \\u0e40\\u0e09\\u0e1e\\u0e32\\u0e30\\u0e43\\u0e19\\u0e01"
+ "\\u0e25\\u0e38\\u0e48\\u0e21\\u0e2a\\u0e2b\\u0e20\\u0e32\\u0e1e\\u0e22\\u0e38\\u0e42\\u0e23\\u0e1b\\u0e40\\u0e1e"
+ "\\u0e35\\u0e22\\u0e07\\u0e41\\u0e2b\\u0e48\\u0e07\\u0e40\\u0e14\\u0e35\\u0e22\\u0e27 \\u0e01\\u0e47\\u0e15\\u0e49"
+ "\\u0e2d\\u0e07\\u0e01\\u0e32\\u0e23\\u0e2b\\u0e25\\u0e32\\u0e22 encoding \\u0e43\\u0e19\\u0e01\\u0e32\\u0e23\\u0e04"
+ "\\u0e23\\u0e2d\\u0e1a\\u0e04\\u0e25\\u0e38\\u0e21\\u0e17\\u0e38\\u0e01\\u0e20\\u0e32\\u0e29\\u0e32\\u0e43\\u0e19"
+ "\\u0e01\\u0e25\\u0e38\\u0e48\\u0e21. \\u0e2b\\u0e23\\u0e37\\u0e2d\\u0e41\\u0e21\\u0e49\\u0e41\\u0e15\\u0e48\\u0e43"
+ "\\u0e19\\u0e20\\u0e32\\u0e29\\u0e32\\u0e40\\u0e14\\u0e35\\u0e48\\u0e22\\u0e27 \\u0e40\\u0e0a\\u0e48\\u0e19 \\u0e20"
+ "\\u0e32\\u0e29\\u0e32\\u0e2d\\u0e31\\u0e07\\u0e01\\u0e24\\u0e29 \\u0e01\\u0e47\\u0e44\\u0e21\\u0e48\\u0e21\\u0e35"
+ " encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d\\u0e2a\\u0e33\\u0e2b"
+ "\\u0e23\\u0e31\\u0e1a\\u0e17\\u0e38\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29\\u0e23, \\u0e40\\u0e04"
+ "\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e27\\u0e23\\u0e23\\u0e04\\u0e15\\u0e2d\\u0e19"
+ " \\u0e41\\u0e25\\u0e30\\u0e2a\\u0e31\\u0e0d\\u0e25\\u0e31\\u0e01\\u0e29\\u0e13\\u0e4c\\u0e17\\u0e32\\u0e07\\u0e40"
+ "\\u0e17\\u0e04\\u0e19\\u0e34\\u0e04\\u0e17\\u0e35\\u0e48\\u0e43\\u0e0a\\u0e49\\u0e01\\u0e31\\u0e19\\u0e2d\\u0e22"
+ "\\u0e39\\u0e48\\u0e17\\u0e31\\u0e48\\u0e27\\u0e44\\u0e1b.";
+
+ const char *latinText =
+ "doy ph\\u1ee5\\u0304\\u0302n \\u1e6d\\u0304h\\u0101n l\\u00e6\\u0302w, khxmphiwtexr\\u0312 ca ke\\u012b\\u0300"
+ "ywk\\u0304\\u0125xng k\\u1ea1b re\\u1ee5\\u0304\\u0300xng k\\u0304hxng t\\u1ea1wlek\\u0304h. khxmphiwtexr"
+ "\\u0312 c\\u1ea1d k\\u0115b t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r l\\u00e6a x\\u1ea1kk\\u0304h ra x\\u1ee5\\u0304"
+ "\\u0300n\\u00ab doy k\\u0101r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304\\u0131\\u0302 s\\u0304"
+ "\\u1ea3h\\u0304r\\u1ea1b t\\u00e6\\u0300la t\\u1ea1w. k\\u0300xn h\\u0304n\\u0302\\u0101 th\\u012b\\u0300\\u0301"
+ " Unicode ca t\\u0304h\\u016bk s\\u0304r\\u0302\\u0101ng k\\u0304h\\u1ee5\\u0302n, d\\u1ecb\\u0302 m\\u012b "
+ "rabb encoding xy\\u016b\\u0300 h\\u0304l\\u0101y r\\u0302xy rabb s\\u0304\\u1ea3h\\u0304r\\u1ea1b k\\u0101"
+ "r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304el\\u0300\\u0101 n\\u012b\\u0302. m\\u1ecb\\u0300m"
+ "\\u012b encoding d\\u0131 th\\u012b\\u0300 m\\u012b c\\u1ea3nwn t\\u1ea1w x\\u1ea1kk\\u0304hra m\\u0101k p"
+ "he\\u012byng phx: yk t\\u1ea1wx\\u1ef3\\u0101ng ch\\u00e8n, c\\u0304heph\\u0101a n\\u0131 kl\\u00f9m s\\u0304"
+ "h\\u0304p\\u0323h\\u0101ph yurop phe\\u012byng h\\u0304\\u00e6\\u0300ng de\\u012byw k\\u0306 t\\u0302xngk\\u0101"
+ "r h\\u0304l\\u0101y encoding n\\u0131 k\\u0101r khrxbkhlum thuk p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 n\\u0131"
+ " kl\\u00f9m. h\\u0304r\\u1ee5\\u0304x m\\u00e6\\u0302t\\u00e6\\u0300 n\\u0131 p\\u0323h\\u0101s\\u0304\\u02b9"
+ "\\u0101 de\\u012b\\u0300yw ch\\u00e8n p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 x\\u1ea1ngkvs\\u0304\\u02b9 k\\u0306"
+ " m\\u1ecb\\u0300m\\u012b encoding d\\u0131 th\\u012b\\u0300 phe\\u012byng phx s\\u0304\\u1ea3h\\u0304r\\u1ea1"
+ "b thuk t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r, kher\\u1ee5\\u0304\\u0300xngh\\u0304m\\u0101y wrrkh txn l\\u00e6"
+ "a s\\u0304\\u1ea1\\u1ef5l\\u1ea1ks\\u0304\\u02b9\\u1e47\\u0312 th\\u0101ng thekhnikh th\\u012b\\u0300 ch\\u0131"
+ "\\u0302 k\\u1ea1n xy\\u016b\\u0300 th\\u1ea1\\u0300wp\\u1ecb.";
+
+
+ UnicodeString xlitText(thaiText);
+ xlitText = xlitText.unescape();
+ tr->transliterate(xlitText);
+
+ UnicodeString expectedText(latinText);
+ expectedText = expectedText.unescape();
+ expect(*tr, xlitText, expectedText);
+
+ delete tr;
+#endif
+}
+
+