X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/b75a7d8f3b4adbae880cab104ce2c6a50eee4db2..46f4442e9a5a4f3b98b7c1083586332f6a8a99a4:/icuSources/test/intltest/transtst.cpp diff --git a/icuSources/test/intltest/transtst.cpp b/icuSources/test/intltest/transtst.cpp index 7793f48d..556a07c7 100644 --- a/icuSources/test/intltest/transtst.cpp +++ b/icuSources/test/intltest/transtst.cpp @@ -1,6 +1,6 @@ /* ********************************************************************** -* Copyright (C) 1999-2003, International Business Machines +* Copyright (C) 1999-2008, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * Date Name Description @@ -17,20 +17,16 @@ #include "unicode/dtfmtsym.h" #include "unicode/normlzr.h" #include "unicode/translit.h" -#include "unicode/ucnv.h" -#include "unicode/ucnv_err.h" #include "unicode/uchar.h" #include "unicode/unifilt.h" #include "unicode/uniset.h" -#include "unitohex.h" -#include "unicode/utypes.h" #include "unicode/ustring.h" #include "unicode/usetiter.h" #include "unicode/uscript.h" -#include "hextouni.h" #include "cpdtrans.h" #include "nultrans.h" #include "rbt.h" +#include "rbt_pars.h" #include "anytrans.h" #include "esctrn.h" #include "name2uni.h" @@ -42,6 +38,8 @@ #include "unesctrn.h" #include "uni2name.h" #include "cstring.h" +#include "cmemory.h" +#include <stdio.h> /*********************************************************************** @@ -102,6 +100,8 @@ TransliteratorTest::TransliteratorTest() { } +TransliteratorTest::~TransliteratorTest() {} + void TransliteratorTest::runIndexedTest(int32_t index, UBool exec, const char* &name, char* /*par*/) { @@ -184,10 +184,17 @@ TransliteratorTest::runIndexedTest(int32_t index, UBool exec, TESTCASE(75,TestAllCodepoints); TESTCASE(76,TestBoilerplate); TESTCASE(77,TestAlternateSyntax); + TESTCASE(78,TestBeginEnd); + TESTCASE(79,TestBeginEndToRules); + TESTCASE(80,TestRegisterAlias); + TESTCASE(81,TestRuleStripping); + TESTCASE(82,TestHalfwidthFullwidth); + TESTCASE(83,TestThai); default: name = ""; break; } } +static const UVersionInfo ICU_39 = {3,9,4,0}; /** * Make sure every system transliterator can be instantiated. * @@ -196,15 +203,32 @@ TransliteratorTest::runIndexedTest(int32_t index, UBool exec, * instantiates everything as well. */ void TransliteratorTest::TestInstantiation() { + UErrorCode ec = U_ZERO_ERROR; + StringEnumeration* avail = Transliterator::getAvailableIDs(ec); + assertSuccess("getAvailableIDs()", ec); + assertTrue("getAvailableIDs()!=NULL", avail!=NULL); int32_t n = Transliterator::countAvailableIDs(); + assertTrue("getAvailableIDs().count()==countAvailableIDs()", + avail->count(ec) == n); + assertSuccess("count()", ec); UnicodeString name; for (int32_t i=0; i<n; ++i) { - UnicodeString id = Transliterator::getAvailableID(i); + const UnicodeString& id = *avail->snext(ec); + if (!assertSuccess("snext()", ec) || + !assertTrue("snext()!=NULL", (&id)!=NULL, TRUE)) { + break; + } + UnicodeString id2 = Transliterator::getAvailableID(i); if (id.length() < 1) { errln(UnicodeString("FAIL: getAvailableID(") + i + ") returned empty string"); continue; } + if (id != id2) { + errln(UnicodeString("FAIL: getAvailableID(") + + i + ") != getAvailableIDs().snext()"); + continue; + } UParseError parseError; UErrorCode status = U_ZERO_ERROR; Transliterator* t = Transliterator::createInstance(id, @@ -249,6 +273,9 @@ void TransliteratorTest::TestInstantiation() { delete t; } } + assertTrue("snext()==NULL", avail->snext(ec)==NULL); + assertSuccess("snext()", ec); + delete avail; // Now test the failure path UParseError parseError; @@ -298,7 +325,8 @@ void TransliteratorTest::TestSimpleRules(void) { /* Test categories */ UErrorCode status = U_ZERO_ERROR; - RuleBasedTransliterator t( + UParseError parseError; + Transliterator *t = Transliterator::createFromRules( "<ID>", UnicodeString("$dummy=").append((UChar)0xE100) + UnicodeString(";" @@ -309,12 +337,14 @@ void TransliteratorTest::TestSimpleRules(void) { "'!' { $lu > '^';" "$lu > '*';" "a > ERROR", ""), + UTRANS_FORWARD, parseError, status); if (U_FAILURE(status)) { errln("FAIL: RBT constructor failed"); return; } - expect(t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&"); + expect(*t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&"); + delete t; } /** @@ -375,29 +405,35 @@ void TransliteratorTest::TestRuleBasedInverse(void) { int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0])); UErrorCode status = U_ZERO_ERROR; - RuleBasedTransliterator fwd("<ID>", RULES, status); - RuleBasedTransliterator rev("<ID>", RULES, - UTRANS_REVERSE, status); + UParseError parseError; + Transliterator *fwd = Transliterator::createFromRules("<ID>", RULES, + UTRANS_FORWARD, parseError, status); + Transliterator *rev = Transliterator::createFromRules("<ID>", RULES, + UTRANS_REVERSE, parseError, status); if (U_FAILURE(status)) { errln("FAIL: RBT constructor failed"); return; } for (int32_t i=0; i<DATA_length; i+=2) { - expect(fwd, DATA[i], DATA[i+1]); - expect(rev, DATA[i+1], DATA[i]); + expect(*fwd, DATA[i], DATA[i+1]); + expect(*rev, DATA[i+1], DATA[i]); } + delete fwd; + delete rev; } /** * Basic test of keyboard. */ void TransliteratorTest::TestKeyboard(void) { + UParseError parseError; UErrorCode status = U_ZERO_ERROR; - RuleBasedTransliterator t("<ID>", + Transliterator *t = Transliterator::createFromRules("<ID>", UnicodeString("psch>Y;") +"ps>y;" +"ch>x;" +"a>A;", + UTRANS_FORWARD, parseError, status); if (U_FAILURE(status)) { errln("FAIL: RBT constructor failed"); @@ -414,19 +450,22 @@ void TransliteratorTest::TestKeyboard(void) { 0, "AycAY", // null means finishKeyboardTransliteration }; - keyboardAux(t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0]))); + keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0]))); + delete t; } /** * Basic test of keyboard with cursor. */ void TransliteratorTest::TestKeyboard2(void) { + UParseError parseError; UErrorCode status = U_ZERO_ERROR; - RuleBasedTransliterator t("<ID>", + Transliterator *t = Transliterator::createFromRules("<ID>", UnicodeString("ych>Y;") +"ps>|y;" +"ch>x;" +"a>A;", + UTRANS_FORWARD, parseError, status); if (U_FAILURE(status)) { errln("FAIL: RBT constructor failed"); @@ -446,7 +485,8 @@ void TransliteratorTest::TestKeyboard2(void) { 0, "AycAY", // null means finishKeyboardTransliteration }; - keyboardAux(t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0]))); + keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0]))); + delete t; } /** @@ -472,13 +512,15 @@ void TransliteratorTest::TestKeyboard3(void) { 0, "abycz", // null means finishKeyboardTransliteration }; + UParseError parseError; UErrorCode status = U_ZERO_ERROR; - RuleBasedTransliterator t("<ID>", RULES, status); + Transliterator *t = Transliterator::createFromRules("<ID>", RULES, UTRANS_FORWARD, parseError, status); if (U_FAILURE(status)) { errln("FAIL: RBT constructor failed"); return; } - keyboardAux(t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0]))); + keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0]))); + delete t; } void TransliteratorTest::keyboardAux(const Transliterator& t, @@ -644,6 +686,14 @@ void TransliteratorTest::TestFiltering(void) { } else { logln(UnicodeString("FAIL: \"") + s + "\", wanted \"" + exp + "\""); } + + // ICU4C ONLY. Do not find Transliterator.orphanFilter() in ICU4J. + UnicodeFilter *f = hex->orphanFilter(); + if (f == NULL){ + errln("FAIL: orphanFilter() should get a UnicodeFilter"); + } else { + delete f; + } delete hex; } @@ -690,13 +740,15 @@ void TransliteratorTest::TestPatternQuoting(void) { for (int32_t i=0; i<3; i+=3) { logln(UnicodeString("Pattern: ") + prettify(DATA[i])); + UParseError parseError; UErrorCode status = U_ZERO_ERROR; - RuleBasedTransliterator t("<ID>", DATA[i], status); + Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status); if (U_FAILURE(status)) { errln("RBT constructor failed"); } else { - expect(t, DATA[i+1], DATA[i+2]); + expect(*t, DATA[i+1], DATA[i+2]); } + delete t; } } @@ -744,10 +796,12 @@ void TransliteratorTest::TestJ277(void) { "y <> $ypsilon;" "n <> $nu;", ""); - RuleBasedTransliterator mini("mini", rules, UTRANS_REVERSE, status); + Transliterator *mini = Transliterator::createFromRules("mini", rules, UTRANS_REVERSE, parseError, status); if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; } - expect(mini, syn, "syn"); - expect(mini, sayn, "saun"); + expect(*mini, syn, "syn"); + expect(*mini, sayn, "saun"); + delete mini; + mini = NULL; #if !UCONFIG_NO_FORMATTING // Transliterate the Greek locale data @@ -784,22 +838,27 @@ void TransliteratorTest::TestJ277(void) { * Prefix, suffix support in hex transliterators */ void TransliteratorTest::TestJ243(void) { - UErrorCode status = U_ZERO_ERROR; + UErrorCode ec = U_ZERO_ERROR; // Test default Hex-Any, which should handle // \u, \U, u+, and U+ - HexToUnicodeTransliterator hex; - expect(hex, UnicodeString("\\u0041+\\U0042,u+0043uu+0044z", ""), "A+B,CuDz"); - // Try a custom Hex-Unicode - // \uXXXX and &#xXXXX; - status = U_ZERO_ERROR; - HexToUnicodeTransliterator hex2(UnicodeString("\\\\u###0;&\\#x###0\\;", ""), status); - expect(hex2, UnicodeString("\\u61\\u062\\u0063\\u00645\\u66x0123", ""), - "abcd5fx0123"); - // Try custom Any-Hex (default is tested elsewhere) - status = U_ZERO_ERROR; - UnicodeToHexTransliterator hex3(UnicodeString("&\\#x###0;", ""), status); - expect(hex3, "012", "012"); + Transliterator *hex = + Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, ec); + if (assertSuccess("getInstance", ec)) { + expect(*hex, UnicodeString("\\u0041+\\U00000042,U+0043uU+0044z", ""), "A+B,CuDz"); + } + delete hex; + +// // Try a custom Hex-Unicode +// // \uXXXX and &#xXXXX; +// ec = U_ZERO_ERROR; +// HexToUnicodeTransliterator hex2(UnicodeString("\\\\u###0;&\\#x###0\\;", ""), ec); +// expect(hex2, UnicodeString("\\u61\\u062\\u0063\\u00645\\u66x0123", ""), +// "abcd5fx0123"); +// // Try custom Any-Hex (default is tested elsewhere) +// ec = U_ZERO_ERROR; +// UnicodeToHexTransliterator hex3(UnicodeString("&\\#x###0;", ""), ec); +// expect(hex3, "012", "012"); } /** @@ -816,10 +875,9 @@ void TransliteratorTest::TestJ329(void) { for (int32_t i=0; i<DATA_length; ++i) { UErrorCode status = U_ZERO_ERROR; UParseError parseError; - RuleBasedTransliterator rbt("<ID>", + Transliterator *rbt = Transliterator::createFromRules("<ID>", DATA[i].rule, UTRANS_FORWARD, - 0, parseError, status); UBool gotError = U_FAILURE(status); @@ -836,6 +894,7 @@ void TransliteratorTest::TestJ329(void) { } else { errln(UnicodeString("FAIL: ") + desc); } + delete rbt; } } @@ -859,13 +918,15 @@ void TransliteratorTest::TestSegments(void) { for (int32_t i=0; i<DATA_length; i+=3) { logln("Pattern: " + prettify(DATA[i])); + UParseError parseError; UErrorCode status = U_ZERO_ERROR; - RuleBasedTransliterator t("ID", DATA[i], status); + Transliterator *t = Transliterator::createFromRules("ID", DATA[i], UTRANS_FORWARD, parseError, status); if (U_FAILURE(status)) { errln("FAIL: RBT constructor"); } else { - expect(t, DATA[i+1], DATA[i+2]); + expect(*t, DATA[i+1], DATA[i+2]); } + delete t; } } @@ -889,13 +950,15 @@ void TransliteratorTest::TestCursorOffset(void) { for (int32_t i=0; i<DATA_length; i+=3) { logln("Pattern: " + prettify(DATA[i])); + UParseError parseError; UErrorCode status = U_ZERO_ERROR; - RuleBasedTransliterator t("<ID>", DATA[i], status); + Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status); if (U_FAILURE(status)) { errln("FAIL: RBT constructor"); } else { - expect(t, DATA[i+1], DATA[i+2]); + expect(*t, DATA[i+1], DATA[i+2]); } + delete t; } } @@ -928,13 +991,15 @@ void TransliteratorTest::TestArbitraryVariableValues(void) { for (int32_t i=0; i<DATA_length; i+=3) { logln("Pattern: " + prettify(DATA[i])); + UParseError parseError; UErrorCode status = U_ZERO_ERROR; - RuleBasedTransliterator t("<ID>", DATA[i], status); + Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status); if (U_FAILURE(status)) { errln("FAIL: RBT constructor"); } else { - expect(t, DATA[i+1], DATA[i+2]); + expect(*t, DATA[i+1], DATA[i+2]); } + delete t; } } @@ -970,8 +1035,9 @@ void TransliteratorTest::TestPositionHandling(void) { int32_t n = (int32_t)(sizeof(DATA) / sizeof(DATA[0])) / 3; for (int32_t i=0; i<n; i++) { UErrorCode status = U_ZERO_ERROR; - Transliterator *t = new RuleBasedTransliterator("<ID>", - DATA[3*i], status); + UParseError parseError; + Transliterator *t = Transliterator::createFromRules("<ID>", + DATA[3*i], UTRANS_FORWARD, parseError, status); if (U_FAILURE(status)) { delete t; errln("FAIL: RBT constructor"); @@ -1053,18 +1119,21 @@ void TransliteratorTest::TestCopyJ476(void) { // The real test here is what happens when the destructors are // called. So we let one object get destructed, and check to // see that its copy still works. - RuleBasedTransliterator *t2 = 0; + Transliterator *t2 = 0; { + UParseError parseError; UErrorCode status = U_ZERO_ERROR; - RuleBasedTransliterator t1("t1", "a>A;b>B;", status); + Transliterator *t1 = Transliterator::createFromRules("t1", + "a>A;b>B;'foo'+>'bar'", UTRANS_FORWARD, parseError, status); if (U_FAILURE(status)) { errln("FAIL: RBT constructor"); return; } - t2 = new RuleBasedTransliterator(t1); - expect(t1, "abc", "ABc"); + t2 = t1->clone(); // Call copy constructor under the covers. + expect(*t1, "abcfoofoo", "ABcbar"); + delete t1; } - expect(*t2, "abc", "ABc"); + expect(*t2, "abcfoofoo", "ABcbar"); delete t2; } @@ -1201,7 +1270,7 @@ void TransliteratorTest::TestNameMap(void) { // Careful: CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N expect(*uni2name, CharsToUnicodeString("\\u00A0abc\\u4E01\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF"), CharsToUnicodeString("\\\\N{NO-BREAK SPACE}abc\\\\N{CJK UNIFIED IDEOGRAPH-4E01}\\\\N{MICRO SIGN}\\\\N{GUJARATI SIGN CANDRABINDU}\\\\N{REPLACEMENT CHARACTER}\\\\N{END OF TRANSMISSION}\\\\N{CHARACTER TABULATION}\\\\N{<control-0081>}\\\\N{<noncharacter-FFFF>}")); - expect(*name2uni, "{\\N { NO-BREAK SPACE}abc\\N{ CJK UNIFIED IDEOGRAPH-4E01 }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{END OF TRANSMISSION}\\N{CHARACTER TABULATION}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{", + expect(*name2uni, UNICODE_STRING_SIMPLE("{\\N { NO-BREAK SPACE}abc\\N{ CJK UNIFIED IDEOGRAPH-4E01 }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{END OF TRANSMISSION}\\N{CHARACTER TABULATION}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{"), CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{")); delete uni2name; @@ -1296,7 +1365,10 @@ void TransliteratorTest::TestCreateInstance(){ UnicodeString expID(DATA[i+2]); Transliterator* t = Transliterator::createInstance(id,dir,err,ec); - UnicodeString newID = t?t->getID():UnicodeString(); + UnicodeString newID; + if (t) { + newID = t->getID(); + } UBool ok = (newID == expID); if (!t) { newID = u_errorName(ec); @@ -1445,7 +1517,7 @@ void TransliteratorTest::TestCompoundRBT(void) { errln("FAIL: createFromRules failed"); return; } - expect(*t, "\\u0043at in the hat, bat on the mat", + expect(*t, UNICODE_STRING_SIMPLE("\\u0043at in the hat, bat on the mat"), "C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t"); UnicodeString r; t->toRules(r, TRUE); @@ -1575,7 +1647,14 @@ void TransliteratorTest::TestRemove(void) { } expect(*t, "Able bodied baker's cats", "Ale odied ker's ts"); + + // extra test for RemoveTransliterator::clone(), which at one point wasn't + // duplicating the filter + Transliterator* t2 = t->clone(); + expect(*t2, "Able bodied baker's cats", "Ale odied ker's ts"); + delete t; + delete t2; } void TransliteratorTest::TestToRules(void) { @@ -1650,7 +1729,7 @@ void TransliteratorTest::TestToRules(void) { UParseError parseError; UErrorCode status = U_ZERO_ERROR; Transliterator *t = Transliterator::createFromRules("ID", - DATA[d+1], UTRANS_FORWARD, parseError, status); + UnicodeString(DATA[d+1], -1, US_INV), UTRANS_FORWARD, parseError, status); if (t == 0) { errln("FAIL: createFromRules failed"); return; @@ -1659,19 +1738,19 @@ void TransliteratorTest::TestToRules(void) { t->toRules(rules, FALSE); t->toRules(escapedRules, TRUE); UnicodeString expRules = CharsToUnicodeString(DATA[d+2]); - UnicodeString expEscapedRules(DATA[d+2]); + UnicodeString expEscapedRules(DATA[d+2], -1, US_INV); if (rules == expRules) { - logln((UnicodeString)"Ok: " + DATA[d+1] + + logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) + " => " + rules); } else { - errln((UnicodeString)"FAIL: " + DATA[d+1] + + errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) + " => " + rules + ", exp " + expRules); } if (escapedRules == expEscapedRules) { - logln((UnicodeString)"Ok: " + DATA[d+1] + + logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) + " => " + escapedRules); } else { - errln((UnicodeString)"FAIL: " + DATA[d+1] + + errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) + " => " + escapedRules + ", exp " + expEscapedRules); } delete t; @@ -1679,8 +1758,8 @@ void TransliteratorTest::TestToRules(void) { } else { // UnicodeSet test UErrorCode status = U_ZERO_ERROR; - UnicodeString pat(DATA[d+1]); - UnicodeString expToPat(DATA[d+2]); + UnicodeString pat(DATA[d+1], -1, US_INV); + UnicodeString expToPat(DATA[d+2], -1, US_INV); UnicodeSet set(pat, status); if (U_FAILURE(status)) { errln("FAIL: UnicodeSet ct failed"); @@ -1742,23 +1821,23 @@ void TransliteratorTest::TestSupplemental() { expectT("Any-Name", CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"), - "\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}"); + UNICODE_STRING_SIMPLE("\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}")); expectT("Any-Hex/Unicode", CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), - "U+10330U+10FF00U+E0061U+00A0"); + UNICODE_STRING_SIMPLE("U+10330U+10FF00U+E0061U+00A0")); expectT("Any-Hex/C", CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), - "\\U00010330\\U0010FF00\\U000E0061\\u00A0"); + UNICODE_STRING_SIMPLE("\\U00010330\\U0010FF00\\U000E0061\\u00A0")); expectT("Any-Hex/Perl", CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), - "\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}"); + UNICODE_STRING_SIMPLE("\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}")); expectT("Any-Hex/Java", CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), - "\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0"); + UNICODE_STRING_SIMPLE("\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0")); expectT("Any-Hex/XML", CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), @@ -1768,7 +1847,7 @@ void TransliteratorTest::TestSupplemental() { CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), "𐌰􏼀󠁡 "); - expectT("[\\U000E0000-\\U000E0FFF] Remove", + expectT(UNICODE_STRING_SIMPLE("[\\U000E0000-\\U000E0FFF] Remove"), CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"), CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0")); } @@ -1854,12 +1933,22 @@ void TransliteratorTest::TestQuantifier() { "bb x xb"); } -class TestTrans : public NullTransliterator { +class TestTrans : public Transliterator { public: - TestTrans(const UnicodeString& id) { - setID(id); + TestTrans(const UnicodeString& id) : Transliterator(id, 0) { } + virtual Transliterator* clone(void) const { + return new TestTrans(getID()); + } + virtual void handleTransliterate(Replaceable& /*text*/, UTransPosition& offsets, + UBool /*isIncremental*/) const + { + offsets.start = offsets.limit; + } + virtual UClassID getDynamicClassID() const; + static UClassID U_EXPORT2 getStaticClassID(); }; +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TestTrans) /** * Test Source-Target/Variant. @@ -2190,7 +2279,7 @@ void TransliteratorTest::TestUndefinedVariable() { UnicodeString rule = "$initial } a <> \\u1161;"; UParseError pe; UErrorCode ec = U_ZERO_ERROR; - Transliterator *t = new RuleBasedTransliterator("<ID>", rule, UTRANS_FORWARD, 0, pe, ec); + Transliterator *t = Transliterator::createFromRules("<ID>", rule, UTRANS_FORWARD, pe, ec); delete t; if (U_FAILURE(ec)) { logln((UnicodeString)"OK: Got exception for " + rule + ", as expected: " + @@ -2250,15 +2339,22 @@ void TransliteratorTest::TestCompoundFilterID(void) { t = Transliterator::createInstance(id, direction, pe, ec); } UBool ok = (t != NULL && U_SUCCESS(ec)); + UnicodeString transID; + if (t!=0) { + transID = t->getID(); + } + else { + transID = UnicodeString("NULL", ""); + } if (ok == expOk) { - logln((UnicodeString)"Ok: " + id + " => " + (t!=0?t->getID():(UnicodeString)"NULL") + ", " + + logln((UnicodeString)"Ok: " + id + " => " + transID + ", " + u_errorName(ec)); if (source.length() != 0) { expect(*t, source, exp); } delete t; } else { - errln((UnicodeString)"FAIL: " + id + " => " + (t!=0?t->getID():(UnicodeString)"NULL") + ", " + + errln((UnicodeString)"FAIL: " + id + " => " + transID + ", " + u_errorName(ec)); } } @@ -2268,7 +2364,7 @@ void TransliteratorTest::TestCompoundFilterID(void) { * Test new property set syntax */ void TransliteratorTest::TestPropertySet() { - expect("a>A; \\p{Lu}>x; \\p{ANY}>y;", "abcDEF", "Ayyxxx"); + expect(UNICODE_STRING_SIMPLE("a>A; \\p{Lu}>x; \\p{ANY}>y;"), "abcDEF", "Ayyxxx"); expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9", "[ a stitch ]\n[ in time ]\r[ saves 9]"); } @@ -2295,9 +2391,9 @@ void TransliteratorTest::TestNewEngine() { // true. Otherwise, this test will fail, revealing a // limitation of global filters in incremental mode. Transliterator *a = - Transliterator::createFromRules("a", "a > A;", UTRANS_FORWARD, pe, ec); + Transliterator::createFromRules("a_to_A", "a > A;", UTRANS_FORWARD, pe, ec); Transliterator *A = - Transliterator::createFromRules("A", "A > b;", UTRANS_FORWARD, pe, ec); + Transliterator::createFromRules("A_to_b", "A > b;", UTRANS_FORWARD, pe, ec); if (U_FAILURE(ec)) { delete a; delete A; @@ -2327,6 +2423,13 @@ void TransliteratorTest::TestNewEngine() { } expect(*t, "aAaA", "bAbA"); + + assertTrue("countElements", t->countElements() == 3); + assertEquals("getElement(0)", t->getElement(0, ec).getID(), "a_to_A"); + assertEquals("getElement(1)", t->getElement(1, ec).getID(), "NFD"); + assertEquals("getElement(2)", t->getElement(2, ec).getID(), "A_to_b"); + assertSuccess("getElement", ec); + delete a; delete A; delete array[1]; @@ -2736,8 +2839,8 @@ void TransliteratorTest::TestGurmukhiDevanagari(){ // (\u0902) (when preceded by vowel) ---> (\u0A02) // (\u0902) (when preceded by consonant) ---> (\u0A70) UErrorCode status = U_ZERO_ERROR; - UnicodeSet vowel(UnicodeString("[\\u0905-\\u090A \\u090F\\u0910\\u0913\\u0914 \\u093e-\\u0942\\u0947\\u0948\\u094B\\u094C\\u094D]").unescape(), status); - UnicodeSet non_vowel(UnicodeString("[\\u0915-\\u0928\\u092A-\\u0930]").unescape(), status); + UnicodeSet vowel(UnicodeString("[\\u0905-\\u090A \\u090F\\u0910\\u0913\\u0914 \\u093e-\\u0942\\u0947\\u0948\\u094B\\u094C\\u094D]", -1, US_INV).unescape(), status); + UnicodeSet non_vowel(UnicodeString("[\\u0915-\\u0928\\u092A-\\u0930]", -1, US_INV).unescape(), status); UParseError parseError; UnicodeSetIterator vIter(vowel); @@ -2748,8 +2851,8 @@ void TransliteratorTest::TestGurmukhiDevanagari(){ delete trans; return; } - UnicodeString src (" \\u0902"); - UnicodeString expected(" \\u0A02"); + UnicodeString src (" \\u0902", -1, US_INV); + UnicodeString expected(" \\u0A02", -1, US_INV); src = src.unescape(); expected= expected.unescape(); @@ -2842,7 +2945,7 @@ void TransliteratorTest::TestLocaleResource() { * Make sure parse errors reference the right line. */ void TransliteratorTest::TestParseError() { - const char* rule = + static const char* rule = "a > b;\n" "# more stuff\n" "d << b;"; @@ -2858,9 +2961,25 @@ void TransliteratorTest::TestParseError() { } else { errln("FAIL: " + err); } - return; } - errln("FAIL: no syntax error"); + else { + errln("FAIL: no syntax error"); + } + static const char* maskingRule = + "a>x;\n" + "# more stuff\n" + "ab>y;"; + ec = U_ZERO_ERROR; + delete Transliterator::createFromRules("ID", maskingRule, UTRANS_FORWARD, pe, ec); + if (ec != U_RULE_MASK_ERROR) { + errln("FAIL: returned %s instead of U_RULE_MASK_ERROR", u_errorName(ec)); + } + else if (UnicodeString("a > x;") != UnicodeString(pe.preContext)) { + errln("FAIL: did not get expected precontext"); + } + else if (UnicodeString("ab > y;") != UnicodeString(pe.postContext)) { + errln("FAIL: did not get expected postcontext"); + } } /** @@ -3047,8 +3166,8 @@ void TransliteratorTest::TestToRulesMark() { UParseError pe; UErrorCode ec = U_ZERO_ERROR; - Transliterator *t2 = Transliterator::createFromRules("source-target", testRules, UTRANS_FORWARD, pe, ec); - Transliterator *t3 = Transliterator::createFromRules("target-source", testRules, UTRANS_REVERSE, pe, ec); + Transliterator *t2 = Transliterator::createFromRules("source-target", UnicodeString(testRules, -1, US_INV), UTRANS_FORWARD, pe, ec); + Transliterator *t3 = Transliterator::createFromRules("target-source", UnicodeString(testRules, -1, US_INV), UTRANS_REVERSE, pe, ec); if (U_FAILURE(ec)) { delete t2; @@ -3060,8 +3179,8 @@ void TransliteratorTest::TestToRulesMark() { expect(*t2, source, target); expect(*t3, target, source); - checkRules("Failed toRules FORWARD", *t2, testRulesForward); - checkRules("Failed toRules BACKWARD", *t3, testRulesBackward); + checkRules("Failed toRules FORWARD", *t2, UnicodeString(testRulesForward, -1, US_INV)); + checkRules("Failed toRules BACKWARD", *t3, UnicodeString(testRulesBackward, -1, US_INV)); delete t2; delete t3; @@ -3081,7 +3200,7 @@ void TransliteratorTest::TestEscape() { errln((UnicodeString)"FAIL: createInstance"); } else { expect(*t, - "\\x{40}\\U000000312Q", + UNICODE_STRING_SIMPLE("\\x{40}\\U000000312Q"), "@12Q"); } delete t; @@ -3093,7 +3212,7 @@ void TransliteratorTest::TestEscape() { } else { expect(*t, CharsToUnicodeString("A\\U0010BEEF\\uFEED"), - "\\u0041\\U0010BEEF\\uFEED"); + UNICODE_STRING_SIMPLE("\\u0041\\U0010BEEF\\uFEED")); } delete t; @@ -3104,7 +3223,7 @@ void TransliteratorTest::TestEscape() { } else { expect(*t, CharsToUnicodeString("A\\U0010BEEF\\uFEED"), - "\\u0041\\uDBEF\\uDEEF\\uFEED"); + UNICODE_STRING_SIMPLE("\\u0041\\uDBEF\\uDEEF\\uFEED")); } delete t; @@ -3115,7 +3234,7 @@ void TransliteratorTest::TestEscape() { } else { expect(*t, CharsToUnicodeString("A\\U0010BEEF\\uFEED"), - "\\x{41}\\x{10BEEF}\\x{FEED}"); + UNICODE_STRING_SIMPLE("\\x{41}\\x{10BEEF}\\x{FEED}")); } delete t; } @@ -3321,9 +3440,9 @@ void TransliteratorTest::TestSpecialCases(void) { char* Char32ToEscapedChars(UChar32 ch, char* buffer) { if (ch <= 0xFFFF) { - sprintf(buffer, "\\u%04x", ch); + sprintf(buffer, "\\u%04x", (int)ch); } else { - sprintf(buffer, "\\u%08x", ch); + sprintf(buffer, "\\U%08x", (int)ch); } return buffer; } @@ -3432,7 +3551,7 @@ void TransliteratorTest::TestIncrementalProgress(void) { Transliterator::getAvailableVariant(k, source, target, variant); UnicodeString id = source + "-" + target + "/" + variant; - + Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, err, status); if (U_FAILURE(status)) { errln((UnicodeString)"FAIL: Could not create " + id); @@ -3520,7 +3639,7 @@ void TransliteratorTest::TestFunction() { } expect(*t, "The Quick Brown Fox", - "T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox"); + UNICODE_STRING_SIMPLE("T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox")); delete t; } @@ -3598,7 +3717,7 @@ void TransliteratorTest::TestMulticharStringSet() { Transliterator* _TUFF[4]; UnicodeString* _TUFID[4]; -static Transliterator* _TUFFactory(const UnicodeString& /*ID*/, +static Transliterator* U_EXPORT2 _TUFFactory(const UnicodeString& /*ID*/, Transliterator::Token context) { return _TUFF[context.integer]->clone(); } @@ -3638,7 +3757,7 @@ void TransliteratorTest::TestUserFunction() { // There's no need to register inverses if we don't use them t = Transliterator::createFromRules("gif", - "'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';", + UNICODE_STRING_SIMPLE("'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';"), UTRANS_FORWARD, pe, ec); if (t == NULL || U_FAILURE(ec)) { errln((UnicodeString)"FAIL: createFromRules gif " + u_errorName(ec)); @@ -3647,13 +3766,13 @@ void TransliteratorTest::TestUserFunction() { _TUFReg("Any-gif", t, 0); t = Transliterator::createFromRules("RemoveCurly", - "[\\{\\}] > ; '\\N' > ;", + UNICODE_STRING_SIMPLE("[\\{\\}] > ; '\\N' > ;"), UTRANS_FORWARD, pe, ec); if (t == NULL || U_FAILURE(ec)) { errln((UnicodeString)"FAIL: createFromRules RemoveCurly " + u_errorName(ec)); goto FAIL; } - expect(*t, "\\N{name}", "name"); + expect(*t, UNICODE_STRING_SIMPLE("\\N{name}"), "name"); _TUFReg("Any-RemoveCurly", t, 1); logln("Trying &hex"); @@ -3671,7 +3790,7 @@ void TransliteratorTest::TestUserFunction() { errln((UnicodeString)"FAIL: createInstance Any-hex2 " + u_errorName(ec)); goto FAIL; } - expect(*t, "abc", "\\u0061\\u0062\\u0063"); + expect(*t, "abc", UNICODE_STRING_SIMPLE("\\u0061\\u0062\\u0063")); delete t; logln("Trying &gif"); @@ -3702,7 +3821,7 @@ void TransliteratorTest::TestUserFunction() { goto FAIL; } expect(*t, "abc", - "\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C "); + UNICODE_STRING_SIMPLE("\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C ")); delete t; FAIL: @@ -3921,9 +4040,564 @@ void TransliteratorTest::TestAlternateSyntax() { "xbz"); expect(CharsToUnicodeString("([:^ASCII:]) \\u2192 \\u2206Name($1);"), CharsToUnicodeString("<=\\u2190; >=\\u2192; <>=\\u2194; &=\\u2206"), - "<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}"); + UNICODE_STRING_SIMPLE("<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}")); +} + +static const char* BEGIN_END_RULES[] = { + // [0] + "abc > xy;" + "aba > z;", + + // [1] +/* + "::BEGIN;" + "abc > xy;" + "::END;" + "::BEGIN;" + "aba > z;" + "::END;", +*/ + "", // test case commented out below, this is here to keep from messing up the indexes + + // [2] +/* + "abc > xy;" + "::BEGIN;" + "aba > z;" + "::END;", +*/ + "", // test case commented out below, this is here to keep from messing up the indexes + + // [3] +/* + "::BEGIN;" + "abc > xy;" + "::END;" + "aba > z;", +*/ + "", // test case commented out below, this is here to keep from messing up the indexes + + // [4] + "abc > xy;" + "::Null;" + "aba > z;", + + // [5] + "::Upper;" + "ABC > xy;" + "AB > x;" + "C > z;" + "::Upper;" + "XYZ > p;" + "XY > q;" + "Z > r;" + "::Upper;", + + // [6] + "$ws = [[:Separator:][\\u0009-\\u000C]$];" + "$delim = [\\-$ws];" + "$ws $delim* > ' ';" + "'-' $delim* > '-';", + + // [7] + "::Null;" + "$ws = [[:Separator:][\\u0009-\\u000C]$];" + "$delim = [\\-$ws];" + "$ws $delim* > ' ';" + "'-' $delim* > '-';", + + // [8] + "$ws = [[:Separator:][\\u0009-\\u000C]$];" + "$delim = [\\-$ws];" + "$ws $delim* > ' ';" + "'-' $delim* > '-';" + "::Null;", + + // [9] + "$ws = [[:Separator:][\\u0009-\\u000C]$];" + "$delim = [\\-$ws];" + "::Null;" + "$ws $delim* > ' ';" + "'-' $delim* > '-';", + + // [10] +/* + "::BEGIN;" + "$ws = [[:Separator:][\\u0009-\\u000C]$];" + "$delim = [\\-$ws];" + "::END;" + "$ws $delim* > ' ';" + "'-' $delim* > '-';", +*/ + "", // test case commented out below, this is here to keep from messing up the indexes + + // [11] +/* + "$ws = [[:Separator:][\\u0009-\\u000C]$];" + "$delim = [\\-$ws];" + "::BEGIN;" + "$ws $delim* > ' ';" + "'-' $delim* > '-';" + "::END;", +*/ + "", // test case commented out below, this is here to keep from messing up the indexes + + // [12] +/* + "$ws = [[:Separator:][\\u0009-\\u000C]$];" + "$delim = [\\-$ws];" + "$ab = [ab];" + "::BEGIN;" + "$ws $delim* > ' ';" + "'-' $delim* > '-';" + "::END;" + "::BEGIN;" + "$ab { ' ' } $ab > '-';" + "c { ' ' > ;" + "::END;" + "::BEGIN;" + "'a-a' > a\\%|a;" + "::END;", +*/ + "", // test case commented out below, this is here to keep from messing up the indexes + + // [13] + "$ws = [[:Separator:][\\u0009-\\u000C]$];" + "$delim = [\\-$ws];" + "$ab = [ab];" + "::Null;" + "$ws $delim* > ' ';" + "'-' $delim* > '-';" + "::Null;" + "$ab { ' ' } $ab > '-';" + "c { ' ' > ;" + "::Null;" + "'a-a' > a\\%|a;", + + // [14] +/* + "::[abc];" + "::BEGIN;" + "abc > xy;" + "::END;" + "::BEGIN;" + "aba > yz;" + "::END;" + "::Upper;", +*/ + "", // test case commented out below, this is here to keep from messing up the indexes + + // [15] + "::[abc];" + "abc > xy;" + "::Null;" + "aba > yz;" + "::Upper;", + + // [16] +/* + "::[abc];" + "::BEGIN;" + "abc <> xy;" + "::END;" + "::BEGIN;" + "aba <> yz;" + "::END;" + "::Upper(Lower);" + "::([XYZ]);" +*/ + "", // test case commented out below, this is here to keep from messing up the indexes + + // [17] + "::[abc];" + "abc <> xy;" + "::Null;" + "aba <> yz;" + "::Upper(Lower);" + "::([XYZ]);" +}; +static const int32_t BEGIN_END_RULES_length = (int32_t)(sizeof(BEGIN_END_RULES) / sizeof(BEGIN_END_RULES[0])); + +/* +(This entire test is commented out below and will need some heavy revision when we re-add +the ::BEGIN/::END stuff) +static const char* BOGUS_BEGIN_END_RULES[] = { + // [7] + "::BEGIN;" + "abc > xy;" + "::BEGIN;" + "aba > z;" + "::END;" + "::END;", + + // [8] + "abc > xy;" + " aba > z;" + "::END;", + + // [9] + "::BEGIN;" + "::Upper;" + "::END;" +}; +static const int32_t BOGUS_BEGIN_END_RULES_length = (int32_t)(sizeof(BOGUS_BEGIN_END_RULES) / sizeof(BOGUS_BEGIN_END_RULES[0])); +*/ + +static const char* BEGIN_END_TEST_CASES[] = { + // rules input expected output + BEGIN_END_RULES[0], "abc ababc aba", "xy zbc z", +// BEGIN_END_RULES[1], "abc ababc aba", "xy abxy z", +// BEGIN_END_RULES[2], "abc ababc aba", "xy abxy z", +// BEGIN_END_RULES[3], "abc ababc aba", "xy abxy z", + BEGIN_END_RULES[4], "abc ababc aba", "xy abxy z", + BEGIN_END_RULES[5], "abccabaacababcbc", "PXAARXQBR", + + BEGIN_END_RULES[6], "e e - e---e- e", "e e e-e-e", + BEGIN_END_RULES[7], "e e - e---e- e", "e e e-e-e", + BEGIN_END_RULES[8], "e e - e---e- e", "e e e-e-e", + BEGIN_END_RULES[9], "e e - e---e- e", "e e e-e-e", +// BEGIN_END_RULES[10], "e e - e---e- e", "e e e-e-e", +// BEGIN_END_RULES[11], "e e - e---e- e", "e e e-e-e", +// BEGIN_END_RULES[12], "e e - e---e- e", "e e e-e-e", +// BEGIN_END_RULES[12], "a a a a", "a%a%a%a", +// BEGIN_END_RULES[12], "a a-b c b a", "a%a-b cb-a", + BEGIN_END_RULES[13], "e e - e---e- e", "e e e-e-e", + BEGIN_END_RULES[13], "a a a a", "a%a%a%a", + BEGIN_END_RULES[13], "a a-b c b a", "a%a-b cb-a", + +// BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ", + BEGIN_END_RULES[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ", +// BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ", + BEGIN_END_RULES[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ" +}; +static const int32_t BEGIN_END_TEST_CASES_length = (int32_t)(sizeof(BEGIN_END_TEST_CASES) / sizeof(BEGIN_END_TEST_CASES[0])); + +void TransliteratorTest::TestBeginEnd() { + // run through the list of test cases above + int32_t i = 0; + for (i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) { + expect((UnicodeString)"Test case #" + (i / 3), + UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV), + UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV), + UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV)); + } + + // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing + UParseError parseError; + UErrorCode status = U_ZERO_ERROR; + Transliterator* reversed = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]), + UTRANS_REVERSE, parseError, status); + if (reversed == 0 || U_FAILURE(status)) { + reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status); + } else { + expect(*reversed, UnicodeString("xy XY XYZ yz YZ"), UnicodeString("xy abc xaba yz aba")); + } + delete reversed; + + // finally, run through the list of syntactically-ill-formed rule sets above and make sure + // that all of them cause errors +/* +(commented out until we have the real ::BEGIN/::END stuff in place + for (i = 0; i < BOGUS_BEGIN_END_RULES_length; i++) { + UParseError parseError; + UErrorCode status = U_ZERO_ERROR; + Transliterator* t = Transliterator::createFromRules("foo", UnicodeString(BOGUS_BEGIN_END_RULES[i]), + UTRANS_FORWARD, parseError, status); + if (!U_FAILURE(status)) { + delete t; + errln((UnicodeString)"Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]); + } + } +*/ } +void TransliteratorTest::TestBeginEndToRules() { + // run through the same list of test cases we used above, but this time, instead of just + // instantiating a Transliterator from the rules and running the test against it, we instantiate + // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from + // the resulting set of rules, and make sure that the generated rule set is semantically equivalent + // to (i.e., does the same thing as) the original rule set + for (int32_t i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) { + UParseError parseError; + UErrorCode status = U_ZERO_ERROR; + Transliterator* t = Transliterator::createFromRules("--", UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV), + UTRANS_FORWARD, parseError, status); + if (U_FAILURE(status)) { + reportParseError(UnicodeString("FAIL: Couldn't create transliterator"), parseError, status); + } else { + UnicodeString rules; + t->toRules(rules, TRUE); + Transliterator* t2 = Transliterator::createFromRules((UnicodeString)"Test case #" + (i / 3), rules, + UTRANS_FORWARD, parseError, status); + if (U_FAILURE(status)) { + reportParseError(UnicodeString("FAIL: Couldn't create transliterator from generated rules"), + parseError, status); + delete t; + } else { + expect(*t2, + UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV), + UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV)); + delete t; + delete t2; + } + } + } + + // do the same thing for the reversible test case + UParseError parseError; + UErrorCode status = U_ZERO_ERROR; + Transliterator* reversed = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]), + UTRANS_REVERSE, parseError, status); + if (U_FAILURE(status)) { + reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status); + } else { + UnicodeString rules; + reversed->toRules(rules, FALSE); + Transliterator* reversed2 = Transliterator::createFromRules("Reversed", rules, UTRANS_FORWARD, + parseError, status); + if (U_FAILURE(status)) { + reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator from generated rules"), + parseError, status); + delete reversed; + } else { + expect(*reversed2, + UnicodeString("xy XY XYZ yz YZ"), + UnicodeString("xy abc xaba yz aba")); + delete reversed; + delete reversed2; + } + } +} + +void TransliteratorTest::TestRegisterAlias() { + UnicodeString longID("Lower;[aeiou]Upper"); + UnicodeString shortID("Any-CapVowels"); + UnicodeString reallyShortID("CapVowels"); + + Transliterator::registerAlias(shortID, longID); + + UErrorCode err = U_ZERO_ERROR; + Transliterator* t1 = Transliterator::createInstance(longID, UTRANS_FORWARD, err); + if (U_FAILURE(err)) { + errln("Failed to instantiate transliterator with long ID"); + Transliterator::unregister(shortID); + return; + } + Transliterator* t2 = Transliterator::createInstance(reallyShortID, UTRANS_FORWARD, err); + if (U_FAILURE(err)) { + errln("Failed to instantiate transliterator with short ID"); + delete t1; + Transliterator::unregister(shortID); + return; + } + + if (t1->getID() != longID) + errln("Transliterator instantiated with long ID doesn't have long ID"); + if (t2->getID() != reallyShortID) + errln("Transliterator instantiated with short ID doesn't have short ID"); + + UnicodeString rules1; + UnicodeString rules2; + + t1->toRules(rules1, TRUE); + t2->toRules(rules2, TRUE); + if (rules1 != rules2) + errln("Alias transliterators aren't the same"); + + delete t1; + delete t2; + Transliterator::unregister(shortID); + + t1 = Transliterator::createInstance(shortID, UTRANS_FORWARD, err); + if (U_SUCCESS(err)) { + errln("Instantiation with short ID succeeded after short ID was unregistered"); + delete t1; + } + + // try the same thing again, but this time with something other than + // an instance of CompoundTransliterator + UnicodeString realID("Latin-Greek"); + UnicodeString fakeID("Latin-dlgkjdflkjdl"); + Transliterator::registerAlias(fakeID, realID); + + err = U_ZERO_ERROR; + t1 = Transliterator::createInstance(realID, UTRANS_FORWARD, err); + if (U_FAILURE(err)) { + errln("Failed to instantiate transliterator with real ID"); + Transliterator::unregister(realID); + return; + } + t2 = Transliterator::createInstance(fakeID, UTRANS_FORWARD, err); + if (U_FAILURE(err)) { + errln("Failed to instantiate transliterator with fake ID"); + delete t1; + Transliterator::unregister(realID); + return; + } + + t1->toRules(rules1, TRUE); + t2->toRules(rules2, TRUE); + if (rules1 != rules2) + errln("Alias transliterators aren't the same"); + + delete t1; + delete t2; + Transliterator::unregister(fakeID); +} + +void TransliteratorTest::TestRuleStripping() { + /* +# +\uE001>\u0C01; # SIGN + */ + static const UChar rule[] = { + 0x0023,0x0020,0x000D,0x000A, + 0xE001,0x003E,0x0C01,0x003B,0x0020,0x0023,0x0020,0x0053,0x0049,0x0047,0x004E,0 + }; + static const UChar expectedRule[] = { + 0xE001,0x003E,0x0C01,0x003B,0 + }; + UChar result[sizeof(rule)/sizeof(rule[0])]; + UErrorCode status = U_ZERO_ERROR; + int32_t len = utrans_stripRules(rule, (int32_t)(sizeof(rule)/sizeof(rule[0])), result, &status); + if (len != u_strlen(expectedRule)) { + errln("utrans_stripRules return len = %d", len); + } + if (u_strncmp(expectedRule, result, len) != 0) { + errln("utrans_stripRules did not return expected string"); + } +} + +/** + * Test the Halfwidth-Fullwidth transliterator (ticket 6281). + */ +void TransliteratorTest::TestHalfwidthFullwidth(void) { + UParseError parseError; + UErrorCode status = U_ZERO_ERROR; + Transliterator* hf = Transliterator::createInstance("Halfwidth-Fullwidth", UTRANS_FORWARD, parseError, status); + Transliterator* fh = Transliterator::createInstance("Fullwidth-Halfwidth", UTRANS_FORWARD, parseError, status); + if (hf == 0 || fh == 0) { + errln("FAIL: createInstance failed"); + delete hf; + delete fh; + return; + } + + // Array of 2n items + // Each item is + // "hf"|"fh"|"both", + // <Halfwidth>, + // <Fullwidth> + const char* DATA[] = { + "both", + "\\uFFE9\\uFFEA\\uFFEB\\uFFEC\\u0061\\uFF71\\u00AF\\u0020", + "\\u2190\\u2191\\u2192\\u2193\\uFF41\\u30A2\\uFFE3\\u3000", + }; + int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0])); + + for (int32_t i=0; i<DATA_length; i+=3) { + UnicodeString h = CharsToUnicodeString(DATA[i+1]); + UnicodeString f = CharsToUnicodeString(DATA[i+2]); + switch (*DATA[i]) { + case 0x68: //'h': // Halfwidth-Fullwidth only + expect(*hf, h, f); + break; + case 0x66: //'f': // Fullwidth-Halfwidth only + expect(*fh, f, h); + break; + case 0x62: //'b': // both directions + expect(*hf, h, f); + expect(*fh, f, h); + break; + } + } + delete hf; + delete fh; +} + + + /** + * Test Thai. The text is the first paragraph of "What is Unicode" from the Unicode.org web site. + * TODO: confirm that the expected results are correct. + * For now, test just confirms that C++ and Java give identical results. + */ +void TransliteratorTest::TestThai(void) { + UParseError parseError; + UErrorCode status = U_ZERO_ERROR; + Transliterator* tr = Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status); + if (tr == 0) { + errln("FAIL: createInstance failed"); + return; + } + if (U_FAILURE(status)) { + errln("FAIL: createInstance failed with %s", u_errorName(status)); + return; + } + const char *thaiText = + "\\u0e42\\u0e14\\u0e22\\u0e1e\\u0e37\\u0e49\\u0e19\\u0e10\\u0e32\\u0e19\\u0e41\\u0e25\\u0e49\\u0e27, \\u0e04\\u0e2d" + "\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d\\u0e23\\u0e4c\\u0e08\\u0e30\\u0e40\\u0e01\\u0e35\\u0e48\\u0e22" + "\\u0e27\\u0e02\\u0e49\\u0e2d\\u0e07\\u0e01\\u0e31\\u0e1a\\u0e40\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e02\\u0e2d" + "\\u0e07\\u0e15\\u0e31\\u0e27\\u0e40\\u0e25\\u0e02. \\u0e04\\u0e2d\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d" + "\\u0e23\\u0e4c\\u0e08\\u0e31\\u0e14\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29" + "\\u0e23\\u0e41\\u0e25\\u0e30\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30\\u0e2d\\u0e37\\u0e48\\u0e19\\u0e46 \\u0e42" + "\\u0e14\\u0e22\\u0e01\\u0e32\\u0e23\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25" + "\\u0e02\\u0e43\\u0e2b\\u0e49\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e41\\u0e15\\u0e48\\u0e25\\u0e30\\u0e15" + "\\u0e31\\u0e27. \\u0e01\\u0e48\\u0e2d\\u0e19\\u0e2b\\u0e19\\u0e49\\u0e32\\u0e17\\u0e35\\u0e48\\u0e4a Unicode \\u0e08" + "\\u0e30\\u0e16\\u0e39\\u0e01\\u0e2a\\u0e23\\u0e49\\u0e32\\u0e07\\u0e02\\u0e36\\u0e49\\u0e19, \\u0e44\\u0e14\\u0e49" + "\\u0e21\\u0e35\\u0e23\\u0e30\\u0e1a\\u0e1a encoding \\u0e2d\\u0e22\\u0e39\\u0e48\\u0e2b\\u0e25\\u0e32\\u0e22\\u0e23" + "\\u0e49\\u0e2d\\u0e22\\u0e23\\u0e30\\u0e1a\\u0e1a\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e01\\u0e32\\u0e23" + "\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25\\u0e02\\u0e40\\u0e2b\\u0e25\\u0e48" + "\\u0e32\\u0e19\\u0e35\\u0e49. \\u0e44\\u0e21\\u0e48\\u0e21\\u0e35 encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48" + "\\u0e21\\u0e35\\u0e08\\u0e33\\u0e19\\u0e27\\u0e19\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30" + "\\u0e21\\u0e32\\u0e01\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d: \\u0e22\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d" + "\\u0e22\\u0e48\\u0e32\\u0e07\\u0e40\\u0e0a\\u0e48\\u0e19, \\u0e40\\u0e09\\u0e1e\\u0e32\\u0e30\\u0e43\\u0e19\\u0e01" + "\\u0e25\\u0e38\\u0e48\\u0e21\\u0e2a\\u0e2b\\u0e20\\u0e32\\u0e1e\\u0e22\\u0e38\\u0e42\\u0e23\\u0e1b\\u0e40\\u0e1e" + "\\u0e35\\u0e22\\u0e07\\u0e41\\u0e2b\\u0e48\\u0e07\\u0e40\\u0e14\\u0e35\\u0e22\\u0e27 \\u0e01\\u0e47\\u0e15\\u0e49" + "\\u0e2d\\u0e07\\u0e01\\u0e32\\u0e23\\u0e2b\\u0e25\\u0e32\\u0e22 encoding \\u0e43\\u0e19\\u0e01\\u0e32\\u0e23\\u0e04" + "\\u0e23\\u0e2d\\u0e1a\\u0e04\\u0e25\\u0e38\\u0e21\\u0e17\\u0e38\\u0e01\\u0e20\\u0e32\\u0e29\\u0e32\\u0e43\\u0e19" + "\\u0e01\\u0e25\\u0e38\\u0e48\\u0e21. \\u0e2b\\u0e23\\u0e37\\u0e2d\\u0e41\\u0e21\\u0e49\\u0e41\\u0e15\\u0e48\\u0e43" + "\\u0e19\\u0e20\\u0e32\\u0e29\\u0e32\\u0e40\\u0e14\\u0e35\\u0e48\\u0e22\\u0e27 \\u0e40\\u0e0a\\u0e48\\u0e19 \\u0e20" + "\\u0e32\\u0e29\\u0e32\\u0e2d\\u0e31\\u0e07\\u0e01\\u0e24\\u0e29 \\u0e01\\u0e47\\u0e44\\u0e21\\u0e48\\u0e21\\u0e35" + " encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d\\u0e2a\\u0e33\\u0e2b" + "\\u0e23\\u0e31\\u0e1a\\u0e17\\u0e38\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29\\u0e23, \\u0e40\\u0e04" + "\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e27\\u0e23\\u0e23\\u0e04\\u0e15\\u0e2d\\u0e19" + " \\u0e41\\u0e25\\u0e30\\u0e2a\\u0e31\\u0e0d\\u0e25\\u0e31\\u0e01\\u0e29\\u0e13\\u0e4c\\u0e17\\u0e32\\u0e07\\u0e40" + "\\u0e17\\u0e04\\u0e19\\u0e34\\u0e04\\u0e17\\u0e35\\u0e48\\u0e43\\u0e0a\\u0e49\\u0e01\\u0e31\\u0e19\\u0e2d\\u0e22" + "\\u0e39\\u0e48\\u0e17\\u0e31\\u0e48\\u0e27\\u0e44\\u0e1b."; + + const char *latinText = + "doy ph\\u1ee5\\u0304\\u0302n \\u1e6d\\u0304h\\u0101n l\\u00e6\\u0302w, khxmphiwtexr\\u0312 ca ke\\u012b\\u0300" + "ywk\\u0304\\u0125xng k\\u1ea1b re\\u1ee5\\u0304\\u0300xng k\\u0304hxng t\\u1ea1wlek\\u0304h. khxmphiwtexr" + "\\u0312 c\\u1ea1d k\\u0115b t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r l\\u00e6a x\\u1ea1kk\\u0304h ra x\\u1ee5\\u0304" + "\\u0300n\\u00ab doy k\\u0101r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304\\u0131\\u0302 s\\u0304" + "\\u1ea3h\\u0304r\\u1ea1b t\\u00e6\\u0300la t\\u1ea1w. k\\u0300xn h\\u0304n\\u0302\\u0101 th\\u012b\\u0300\\u0301" + " Unicode ca t\\u0304h\\u016bk s\\u0304r\\u0302\\u0101ng k\\u0304h\\u1ee5\\u0302n, d\\u1ecb\\u0302 m\\u012b " + "rabb encoding xy\\u016b\\u0300 h\\u0304l\\u0101y r\\u0302xy rabb s\\u0304\\u1ea3h\\u0304r\\u1ea1b k\\u0101" + "r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304el\\u0300\\u0101 n\\u012b\\u0302. m\\u1ecb\\u0300m" + "\\u012b encoding d\\u0131 th\\u012b\\u0300 m\\u012b c\\u1ea3nwn t\\u1ea1w x\\u1ea1kk\\u0304hra m\\u0101k p" + "he\\u012byng phx: yk t\\u1ea1wx\\u1ef3\\u0101ng ch\\u00e8n, c\\u0304heph\\u0101a n\\u0131 kl\\u00f9m s\\u0304" + "h\\u0304p\\u0323h\\u0101ph yurop phe\\u012byng h\\u0304\\u00e6\\u0300ng de\\u012byw k\\u0306 t\\u0302xngk\\u0101" + "r h\\u0304l\\u0101y encoding n\\u0131 k\\u0101r khrxbkhlum thuk p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 n\\u0131" + " kl\\u00f9m. h\\u0304r\\u1ee5\\u0304x m\\u00e6\\u0302t\\u00e6\\u0300 n\\u0131 p\\u0323h\\u0101s\\u0304\\u02b9" + "\\u0101 de\\u012b\\u0300yw ch\\u00e8n p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 x\\u1ea1ngkvs\\u0304\\u02b9 k\\u0306" + " m\\u1ecb\\u0300m\\u012b encoding d\\u0131 th\\u012b\\u0300 phe\\u012byng phx s\\u0304\\u1ea3h\\u0304r\\u1ea1" + "b thuk t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r, kher\\u1ee5\\u0304\\u0300xngh\\u0304m\\u0101y wrrkh txn l\\u00e6" + "a s\\u0304\\u1ea1\\u1ef5l\\u1ea1ks\\u0304\\u02b9\\u1e47\\u0312 th\\u0101ng thekhnikh th\\u012b\\u0300 ch\\u0131" + "\\u0302 k\\u1ea1n xy\\u016b\\u0300 th\\u1ea1\\u0300wp\\u1ecb."; + + + UnicodeString xlitText(thaiText); + xlitText = xlitText.unescape(); + tr->transliterate(xlitText); + + UnicodeString expectedText(latinText); + expectedText = expectedText.unescape(); + expect(*tr, xlitText, expectedText); + + delete tr; +} + + //====================================================================== // Support methods //====================================================================== @@ -3942,14 +4616,35 @@ void TransliteratorTest::expectT(const UnicodeString& id, delete t; } +void TransliteratorTest::reportParseError(const UnicodeString& message, + const UParseError& parseError, + const UErrorCode& status) { + errln(message + + /*", parse error " + parseError.code +*/ + ", line " + parseError.line + + ", offset " + parseError.offset + + ", pre-context " + prettify(parseError.preContext, TRUE) + + ", post-context " + prettify(parseError.postContext,TRUE) + + ", Error: " + u_errorName(status)); +} + void TransliteratorTest::expect(const UnicodeString& rules, const UnicodeString& source, const UnicodeString& expectedResult, UTransPosition *pos) { + expect("<ID>", rules, source, expectedResult, pos); +} + +void TransliteratorTest::expect(const UnicodeString& id, + const UnicodeString& rules, + const UnicodeString& source, + const UnicodeString& expectedResult, + UTransPosition *pos) { UErrorCode status = U_ZERO_ERROR; - Transliterator *t = new RuleBasedTransliterator("<ID>", rules, status); + UParseError parseError; + Transliterator* t = Transliterator::createFromRules(id, rules, UTRANS_FORWARD, parseError, status); if (U_FAILURE(status)) { - errln("FAIL: Transliterator constructor failed"); + reportParseError(UnicodeString("Couldn't create transliterator from ") + rules, parseError, status); } else { expect(*t, source, expectedResult, pos); } @@ -3973,7 +4668,6 @@ void TransliteratorTest::expect(const Transliterator& t, t.transliterate(result); expectAux(t.getID() + ":String", source, result, expectedResult); } - UTransPosition index={0, 0, 0, 0}; if (pos != 0) { index = *pos;