/*
**********************************************************************
-* Copyright (C) 1999-2003, International Business Machines
+* Copyright (C) 1999-2008, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Date Name Description
#include "unicode/dtfmtsym.h"
#include "unicode/normlzr.h"
#include "unicode/translit.h"
-#include "unicode/ucnv.h"
-#include "unicode/ucnv_err.h"
#include "unicode/uchar.h"
#include "unicode/unifilt.h"
#include "unicode/uniset.h"
-#include "unitohex.h"
-#include "unicode/utypes.h"
#include "unicode/ustring.h"
#include "unicode/usetiter.h"
#include "unicode/uscript.h"
-#include "hextouni.h"
#include "cpdtrans.h"
#include "nultrans.h"
#include "rbt.h"
+#include "rbt_pars.h"
#include "anytrans.h"
#include "esctrn.h"
#include "name2uni.h"
#include "unesctrn.h"
#include "uni2name.h"
#include "cstring.h"
+#include "cmemory.h"
+#include <stdio.h>
/***********************************************************************
{
}
+TransliteratorTest::~TransliteratorTest() {}
+
void
TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
const char* &name, char* /*par*/) {
TESTCASE(75,TestAllCodepoints);
TESTCASE(76,TestBoilerplate);
TESTCASE(77,TestAlternateSyntax);
+ TESTCASE(78,TestBeginEnd);
+ TESTCASE(79,TestBeginEndToRules);
+ TESTCASE(80,TestRegisterAlias);
+ TESTCASE(81,TestRuleStripping);
+ TESTCASE(82,TestHalfwidthFullwidth);
+ TESTCASE(83,TestThai);
default: name = ""; break;
}
}
+static const UVersionInfo ICU_39 = {3,9,4,0};
/**
* Make sure every system transliterator can be instantiated.
*
* instantiates everything as well.
*/
void TransliteratorTest::TestInstantiation() {
+ UErrorCode ec = U_ZERO_ERROR;
+ StringEnumeration* avail = Transliterator::getAvailableIDs(ec);
+ assertSuccess("getAvailableIDs()", ec);
+ assertTrue("getAvailableIDs()!=NULL", avail!=NULL);
int32_t n = Transliterator::countAvailableIDs();
+ assertTrue("getAvailableIDs().count()==countAvailableIDs()",
+ avail->count(ec) == n);
+ assertSuccess("count()", ec);
UnicodeString name;
for (int32_t i=0; i<n; ++i) {
- UnicodeString id = Transliterator::getAvailableID(i);
+ const UnicodeString& id = *avail->snext(ec);
+ if (!assertSuccess("snext()", ec) ||
+ !assertTrue("snext()!=NULL", (&id)!=NULL, TRUE)) {
+ break;
+ }
+ UnicodeString id2 = Transliterator::getAvailableID(i);
if (id.length() < 1) {
errln(UnicodeString("FAIL: getAvailableID(") +
i + ") returned empty string");
continue;
}
+ if (id != id2) {
+ errln(UnicodeString("FAIL: getAvailableID(") +
+ i + ") != getAvailableIDs().snext()");
+ continue;
+ }
UParseError parseError;
UErrorCode status = U_ZERO_ERROR;
Transliterator* t = Transliterator::createInstance(id,
delete t;
}
}
+ assertTrue("snext()==NULL", avail->snext(ec)==NULL);
+ assertSuccess("snext()", ec);
+ delete avail;
// Now test the failure path
UParseError parseError;
/* Test categories
*/
UErrorCode status = U_ZERO_ERROR;
- RuleBasedTransliterator t(
+ UParseError parseError;
+ Transliterator *t = Transliterator::createFromRules(
"<ID>",
UnicodeString("$dummy=").append((UChar)0xE100) +
UnicodeString(";"
"'!' { $lu > '^';"
"$lu > '*';"
"a > ERROR", ""),
+ UTRANS_FORWARD, parseError,
status);
if (U_FAILURE(status)) {
errln("FAIL: RBT constructor failed");
return;
}
- expect(t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
+ expect(*t, "abcdefgABCDEFGU", "&bcd&fg!^**!^*&");
+ delete t;
}
/**
int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
UErrorCode status = U_ZERO_ERROR;
- RuleBasedTransliterator fwd("<ID>", RULES, status);
- RuleBasedTransliterator rev("<ID>", RULES,
- UTRANS_REVERSE, status);
+ UParseError parseError;
+ Transliterator *fwd = Transliterator::createFromRules("<ID>", RULES,
+ UTRANS_FORWARD, parseError, status);
+ Transliterator *rev = Transliterator::createFromRules("<ID>", RULES,
+ UTRANS_REVERSE, parseError, status);
if (U_FAILURE(status)) {
errln("FAIL: RBT constructor failed");
return;
}
for (int32_t i=0; i<DATA_length; i+=2) {
- expect(fwd, DATA[i], DATA[i+1]);
- expect(rev, DATA[i+1], DATA[i]);
+ expect(*fwd, DATA[i], DATA[i+1]);
+ expect(*rev, DATA[i+1], DATA[i]);
}
+ delete fwd;
+ delete rev;
}
/**
* Basic test of keyboard.
*/
void TransliteratorTest::TestKeyboard(void) {
+ UParseError parseError;
UErrorCode status = U_ZERO_ERROR;
- RuleBasedTransliterator t("<ID>",
+ Transliterator *t = Transliterator::createFromRules("<ID>",
UnicodeString("psch>Y;")
+"ps>y;"
+"ch>x;"
+"a>A;",
+ UTRANS_FORWARD, parseError,
status);
if (U_FAILURE(status)) {
errln("FAIL: RBT constructor failed");
0, "AycAY", // null means finishKeyboardTransliteration
};
- keyboardAux(t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
+ keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
+ delete t;
}
/**
* Basic test of keyboard with cursor.
*/
void TransliteratorTest::TestKeyboard2(void) {
+ UParseError parseError;
UErrorCode status = U_ZERO_ERROR;
- RuleBasedTransliterator t("<ID>",
+ Transliterator *t = Transliterator::createFromRules("<ID>",
UnicodeString("ych>Y;")
+"ps>|y;"
+"ch>x;"
+"a>A;",
+ UTRANS_FORWARD, parseError,
status);
if (U_FAILURE(status)) {
errln("FAIL: RBT constructor failed");
0, "AycAY", // null means finishKeyboardTransliteration
};
- keyboardAux(t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
+ keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
+ delete t;
}
/**
0, "abycz", // null means finishKeyboardTransliteration
};
+ UParseError parseError;
UErrorCode status = U_ZERO_ERROR;
- RuleBasedTransliterator t("<ID>", RULES, status);
+ Transliterator *t = Transliterator::createFromRules("<ID>", RULES, UTRANS_FORWARD, parseError, status);
if (U_FAILURE(status)) {
errln("FAIL: RBT constructor failed");
return;
}
- keyboardAux(t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
+ keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
+ delete t;
}
void TransliteratorTest::keyboardAux(const Transliterator& t,
} else {
logln(UnicodeString("FAIL: \"") + s + "\", wanted \"" + exp + "\"");
}
+
+ // ICU4C ONLY. Do not find Transliterator.orphanFilter() in ICU4J.
+ UnicodeFilter *f = hex->orphanFilter();
+ if (f == NULL){
+ errln("FAIL: orphanFilter() should get a UnicodeFilter");
+ } else {
+ delete f;
+ }
delete hex;
}
for (int32_t i=0; i<3; i+=3) {
logln(UnicodeString("Pattern: ") + prettify(DATA[i]));
+ UParseError parseError;
UErrorCode status = U_ZERO_ERROR;
- RuleBasedTransliterator t("<ID>", DATA[i], status);
+ Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
if (U_FAILURE(status)) {
errln("RBT constructor failed");
} else {
- expect(t, DATA[i+1], DATA[i+2]);
+ expect(*t, DATA[i+1], DATA[i+2]);
}
+ delete t;
}
}
"y <> $ypsilon;"
"n <> $nu;",
"");
- RuleBasedTransliterator mini("mini", rules, UTRANS_REVERSE, status);
+ Transliterator *mini = Transliterator::createFromRules("mini", rules, UTRANS_REVERSE, parseError, status);
if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
- expect(mini, syn, "syn");
- expect(mini, sayn, "saun");
+ expect(*mini, syn, "syn");
+ expect(*mini, sayn, "saun");
+ delete mini;
+ mini = NULL;
#if !UCONFIG_NO_FORMATTING
// Transliterate the Greek locale data
* Prefix, suffix support in hex transliterators
*/
void TransliteratorTest::TestJ243(void) {
- UErrorCode status = U_ZERO_ERROR;
+ UErrorCode ec = U_ZERO_ERROR;
// Test default Hex-Any, which should handle
// \u, \U, u+, and U+
- HexToUnicodeTransliterator hex;
- expect(hex, UnicodeString("\\u0041+\\U0042,u+0043uu+0044z", ""), "A+B,CuDz");
- // Try a custom Hex-Unicode
- // \uXXXX and &#xXXXX;
- status = U_ZERO_ERROR;
- HexToUnicodeTransliterator hex2(UnicodeString("\\\\u###0;&\\#x###0\\;", ""), status);
- expect(hex2, UnicodeString("\\u61\\u062\\u0063\\u00645\\u66x0123", ""),
- "abcd5fx0123");
- // Try custom Any-Hex (default is tested elsewhere)
- status = U_ZERO_ERROR;
- UnicodeToHexTransliterator hex3(UnicodeString("&\\#x###0;", ""), status);
- expect(hex3, "012", "012");
+ Transliterator *hex =
+ Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, ec);
+ if (assertSuccess("getInstance", ec)) {
+ expect(*hex, UnicodeString("\\u0041+\\U00000042,U+0043uU+0044z", ""), "A+B,CuDz");
+ }
+ delete hex;
+
+// // Try a custom Hex-Unicode
+// // \uXXXX and &#xXXXX;
+// ec = U_ZERO_ERROR;
+// HexToUnicodeTransliterator hex2(UnicodeString("\\\\u###0;&\\#x###0\\;", ""), ec);
+// expect(hex2, UnicodeString("\\u61\\u062\\u0063\\u00645\\u66x0123", ""),
+// "abcd5fx0123");
+// // Try custom Any-Hex (default is tested elsewhere)
+// ec = U_ZERO_ERROR;
+// UnicodeToHexTransliterator hex3(UnicodeString("&\\#x###0;", ""), ec);
+// expect(hex3, "012", "012");
}
/**
for (int32_t i=0; i<DATA_length; ++i) {
UErrorCode status = U_ZERO_ERROR;
UParseError parseError;
- RuleBasedTransliterator rbt("<ID>",
+ Transliterator *rbt = Transliterator::createFromRules("<ID>",
DATA[i].rule,
UTRANS_FORWARD,
- 0,
parseError,
status);
UBool gotError = U_FAILURE(status);
} else {
errln(UnicodeString("FAIL: ") + desc);
}
+ delete rbt;
}
}
for (int32_t i=0; i<DATA_length; i+=3) {
logln("Pattern: " + prettify(DATA[i]));
+ UParseError parseError;
UErrorCode status = U_ZERO_ERROR;
- RuleBasedTransliterator t("ID", DATA[i], status);
+ Transliterator *t = Transliterator::createFromRules("ID", DATA[i], UTRANS_FORWARD, parseError, status);
if (U_FAILURE(status)) {
errln("FAIL: RBT constructor");
} else {
- expect(t, DATA[i+1], DATA[i+2]);
+ expect(*t, DATA[i+1], DATA[i+2]);
}
+ delete t;
}
}
for (int32_t i=0; i<DATA_length; i+=3) {
logln("Pattern: " + prettify(DATA[i]));
+ UParseError parseError;
UErrorCode status = U_ZERO_ERROR;
- RuleBasedTransliterator t("<ID>", DATA[i], status);
+ Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
if (U_FAILURE(status)) {
errln("FAIL: RBT constructor");
} else {
- expect(t, DATA[i+1], DATA[i+2]);
+ expect(*t, DATA[i+1], DATA[i+2]);
}
+ delete t;
}
}
for (int32_t i=0; i<DATA_length; i+=3) {
logln("Pattern: " + prettify(DATA[i]));
+ UParseError parseError;
UErrorCode status = U_ZERO_ERROR;
- RuleBasedTransliterator t("<ID>", DATA[i], status);
+ Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
if (U_FAILURE(status)) {
errln("FAIL: RBT constructor");
} else {
- expect(t, DATA[i+1], DATA[i+2]);
+ expect(*t, DATA[i+1], DATA[i+2]);
}
+ delete t;
}
}
int32_t n = (int32_t)(sizeof(DATA) / sizeof(DATA[0])) / 3;
for (int32_t i=0; i<n; i++) {
UErrorCode status = U_ZERO_ERROR;
- Transliterator *t = new RuleBasedTransliterator("<ID>",
- DATA[3*i], status);
+ UParseError parseError;
+ Transliterator *t = Transliterator::createFromRules("<ID>",
+ DATA[3*i], UTRANS_FORWARD, parseError, status);
if (U_FAILURE(status)) {
delete t;
errln("FAIL: RBT constructor");
// The real test here is what happens when the destructors are
// called. So we let one object get destructed, and check to
// see that its copy still works.
- RuleBasedTransliterator *t2 = 0;
+ Transliterator *t2 = 0;
{
+ UParseError parseError;
UErrorCode status = U_ZERO_ERROR;
- RuleBasedTransliterator t1("t1", "a>A;b>B;", status);
+ Transliterator *t1 = Transliterator::createFromRules("t1",
+ "a>A;b>B;'foo'+>'bar'", UTRANS_FORWARD, parseError, status);
if (U_FAILURE(status)) {
errln("FAIL: RBT constructor");
return;
}
- t2 = new RuleBasedTransliterator(t1);
- expect(t1, "abc", "ABc");
+ t2 = t1->clone(); // Call copy constructor under the covers.
+ expect(*t1, "abcfoofoo", "ABcbar");
+ delete t1;
}
- expect(*t2, "abc", "ABc");
+ expect(*t2, "abcfoofoo", "ABcbar");
delete t2;
}
// Careful: CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
expect(*uni2name, CharsToUnicodeString("\\u00A0abc\\u4E01\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF"),
CharsToUnicodeString("\\\\N{NO-BREAK SPACE}abc\\\\N{CJK UNIFIED IDEOGRAPH-4E01}\\\\N{MICRO SIGN}\\\\N{GUJARATI SIGN CANDRABINDU}\\\\N{REPLACEMENT CHARACTER}\\\\N{END OF TRANSMISSION}\\\\N{CHARACTER TABULATION}\\\\N{<control-0081>}\\\\N{<noncharacter-FFFF>}"));
- expect(*name2uni, "{\\N { NO-BREAK SPACE}abc\\N{ CJK UNIFIED IDEOGRAPH-4E01 }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{END OF TRANSMISSION}\\N{CHARACTER TABULATION}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{",
+ expect(*name2uni, UNICODE_STRING_SIMPLE("{\\N { NO-BREAK SPACE}abc\\N{ CJK UNIFIED IDEOGRAPH-4E01 }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{END OF TRANSMISSION}\\N{CHARACTER TABULATION}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{"),
CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{"));
delete uni2name;
UnicodeString expID(DATA[i+2]);
Transliterator* t =
Transliterator::createInstance(id,dir,err,ec);
- UnicodeString newID = t?t->getID():UnicodeString();
+ UnicodeString newID;
+ if (t) {
+ newID = t->getID();
+ }
UBool ok = (newID == expID);
if (!t) {
newID = u_errorName(ec);
errln("FAIL: createFromRules failed");
return;
}
- expect(*t, "\\u0043at in the hat, bat on the mat",
+ expect(*t, UNICODE_STRING_SIMPLE("\\u0043at in the hat, bat on the mat"),
"C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");
UnicodeString r;
t->toRules(r, TRUE);
}
expect(*t, "Able bodied baker's cats", "Ale odied ker's ts");
+
+ // extra test for RemoveTransliterator::clone(), which at one point wasn't
+ // duplicating the filter
+ Transliterator* t2 = t->clone();
+ expect(*t2, "Able bodied baker's cats", "Ale odied ker's ts");
+
delete t;
+ delete t2;
}
void TransliteratorTest::TestToRules(void) {
UParseError parseError;
UErrorCode status = U_ZERO_ERROR;
Transliterator *t = Transliterator::createFromRules("ID",
- DATA[d+1], UTRANS_FORWARD, parseError, status);
+ UnicodeString(DATA[d+1], -1, US_INV), UTRANS_FORWARD, parseError, status);
if (t == 0) {
errln("FAIL: createFromRules failed");
return;
t->toRules(rules, FALSE);
t->toRules(escapedRules, TRUE);
UnicodeString expRules = CharsToUnicodeString(DATA[d+2]);
- UnicodeString expEscapedRules(DATA[d+2]);
+ UnicodeString expEscapedRules(DATA[d+2], -1, US_INV);
if (rules == expRules) {
- logln((UnicodeString)"Ok: " + DATA[d+1] +
+ logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
" => " + rules);
} else {
- errln((UnicodeString)"FAIL: " + DATA[d+1] +
+ errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
" => " + rules + ", exp " + expRules);
}
if (escapedRules == expEscapedRules) {
- logln((UnicodeString)"Ok: " + DATA[d+1] +
+ logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
" => " + escapedRules);
} else {
- errln((UnicodeString)"FAIL: " + DATA[d+1] +
+ errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
" => " + escapedRules + ", exp " + expEscapedRules);
}
delete t;
} else {
// UnicodeSet test
UErrorCode status = U_ZERO_ERROR;
- UnicodeString pat(DATA[d+1]);
- UnicodeString expToPat(DATA[d+2]);
+ UnicodeString pat(DATA[d+1], -1, US_INV);
+ UnicodeString expToPat(DATA[d+2], -1, US_INV);
UnicodeSet set(pat, status);
if (U_FAILURE(status)) {
errln("FAIL: UnicodeSet ct failed");
expectT("Any-Name",
CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"),
- "\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}");
+ UNICODE_STRING_SIMPLE("\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}"));
expectT("Any-Hex/Unicode",
CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
- "U+10330U+10FF00U+E0061U+00A0");
+ UNICODE_STRING_SIMPLE("U+10330U+10FF00U+E0061U+00A0"));
expectT("Any-Hex/C",
CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
- "\\U00010330\\U0010FF00\\U000E0061\\u00A0");
+ UNICODE_STRING_SIMPLE("\\U00010330\\U0010FF00\\U000E0061\\u00A0"));
expectT("Any-Hex/Perl",
CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
- "\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}");
+ UNICODE_STRING_SIMPLE("\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}"));
expectT("Any-Hex/Java",
CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
- "\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0");
+ UNICODE_STRING_SIMPLE("\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0"));
expectT("Any-Hex/XML",
CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
"𐌰􏼀󠁡 ");
- expectT("[\\U000E0000-\\U000E0FFF] Remove",
+ expectT(UNICODE_STRING_SIMPLE("[\\U000E0000-\\U000E0FFF] Remove"),
CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0"));
}
"bb x xb");
}
-class TestTrans : public NullTransliterator {
+class TestTrans : public Transliterator {
public:
- TestTrans(const UnicodeString& id) {
- setID(id);
+ TestTrans(const UnicodeString& id) : Transliterator(id, 0) {
}
+ virtual Transliterator* clone(void) const {
+ return new TestTrans(getID());
+ }
+ virtual void handleTransliterate(Replaceable& /*text*/, UTransPosition& offsets,
+ UBool /*isIncremental*/) const
+ {
+ offsets.start = offsets.limit;
+ }
+ virtual UClassID getDynamicClassID() const;
+ static UClassID U_EXPORT2 getStaticClassID();
};
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TestTrans)
/**
* Test Source-Target/Variant.
UnicodeString rule = "$initial } a <> \\u1161;";
UParseError pe;
UErrorCode ec = U_ZERO_ERROR;
- Transliterator *t = new RuleBasedTransliterator("<ID>", rule, UTRANS_FORWARD, 0, pe, ec);
+ Transliterator *t = Transliterator::createFromRules("<ID>", rule, UTRANS_FORWARD, pe, ec);
delete t;
if (U_FAILURE(ec)) {
logln((UnicodeString)"OK: Got exception for " + rule + ", as expected: " +
t = Transliterator::createInstance(id, direction, pe, ec);
}
UBool ok = (t != NULL && U_SUCCESS(ec));
+ UnicodeString transID;
+ if (t!=0) {
+ transID = t->getID();
+ }
+ else {
+ transID = UnicodeString("NULL", "");
+ }
if (ok == expOk) {
- logln((UnicodeString)"Ok: " + id + " => " + (t!=0?t->getID():(UnicodeString)"NULL") + ", " +
+ logln((UnicodeString)"Ok: " + id + " => " + transID + ", " +
u_errorName(ec));
if (source.length() != 0) {
expect(*t, source, exp);
}
delete t;
} else {
- errln((UnicodeString)"FAIL: " + id + " => " + (t!=0?t->getID():(UnicodeString)"NULL") + ", " +
+ errln((UnicodeString)"FAIL: " + id + " => " + transID + ", " +
u_errorName(ec));
}
}
* Test new property set syntax
*/
void TransliteratorTest::TestPropertySet() {
- expect("a>A; \\p{Lu}>x; \\p{ANY}>y;", "abcDEF", "Ayyxxx");
+ expect(UNICODE_STRING_SIMPLE("a>A; \\p{Lu}>x; \\p{ANY}>y;"), "abcDEF", "Ayyxxx");
expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9",
"[ a stitch ]\n[ in time ]\r[ saves 9]");
}
// true. Otherwise, this test will fail, revealing a
// limitation of global filters in incremental mode.
Transliterator *a =
- Transliterator::createFromRules("a", "a > A;", UTRANS_FORWARD, pe, ec);
+ Transliterator::createFromRules("a_to_A", "a > A;", UTRANS_FORWARD, pe, ec);
Transliterator *A =
- Transliterator::createFromRules("A", "A > b;", UTRANS_FORWARD, pe, ec);
+ Transliterator::createFromRules("A_to_b", "A > b;", UTRANS_FORWARD, pe, ec);
if (U_FAILURE(ec)) {
delete a;
delete A;
}
expect(*t, "aAaA", "bAbA");
+
+ assertTrue("countElements", t->countElements() == 3);
+ assertEquals("getElement(0)", t->getElement(0, ec).getID(), "a_to_A");
+ assertEquals("getElement(1)", t->getElement(1, ec).getID(), "NFD");
+ assertEquals("getElement(2)", t->getElement(2, ec).getID(), "A_to_b");
+ assertSuccess("getElement", ec);
+
delete a;
delete A;
delete array[1];
// (\u0902) (when preceded by vowel) ---> (\u0A02)
// (\u0902) (when preceded by consonant) ---> (\u0A70)
UErrorCode status = U_ZERO_ERROR;
- UnicodeSet vowel(UnicodeString("[\\u0905-\\u090A \\u090F\\u0910\\u0913\\u0914 \\u093e-\\u0942\\u0947\\u0948\\u094B\\u094C\\u094D]").unescape(), status);
- UnicodeSet non_vowel(UnicodeString("[\\u0915-\\u0928\\u092A-\\u0930]").unescape(), status);
+ UnicodeSet vowel(UnicodeString("[\\u0905-\\u090A \\u090F\\u0910\\u0913\\u0914 \\u093e-\\u0942\\u0947\\u0948\\u094B\\u094C\\u094D]", -1, US_INV).unescape(), status);
+ UnicodeSet non_vowel(UnicodeString("[\\u0915-\\u0928\\u092A-\\u0930]", -1, US_INV).unescape(), status);
UParseError parseError;
UnicodeSetIterator vIter(vowel);
delete trans;
return;
}
- UnicodeString src (" \\u0902");
- UnicodeString expected(" \\u0A02");
+ UnicodeString src (" \\u0902", -1, US_INV);
+ UnicodeString expected(" \\u0A02", -1, US_INV);
src = src.unescape();
expected= expected.unescape();
* Make sure parse errors reference the right line.
*/
void TransliteratorTest::TestParseError() {
- const char* rule =
+ static const char* rule =
"a > b;\n"
"# more stuff\n"
"d << b;";
} else {
errln("FAIL: " + err);
}
- return;
}
- errln("FAIL: no syntax error");
+ else {
+ errln("FAIL: no syntax error");
+ }
+ static const char* maskingRule =
+ "a>x;\n"
+ "# more stuff\n"
+ "ab>y;";
+ ec = U_ZERO_ERROR;
+ delete Transliterator::createFromRules("ID", maskingRule, UTRANS_FORWARD, pe, ec);
+ if (ec != U_RULE_MASK_ERROR) {
+ errln("FAIL: returned %s instead of U_RULE_MASK_ERROR", u_errorName(ec));
+ }
+ else if (UnicodeString("a > x;") != UnicodeString(pe.preContext)) {
+ errln("FAIL: did not get expected precontext");
+ }
+ else if (UnicodeString("ab > y;") != UnicodeString(pe.postContext)) {
+ errln("FAIL: did not get expected postcontext");
+ }
}
/**
UParseError pe;
UErrorCode ec = U_ZERO_ERROR;
- Transliterator *t2 = Transliterator::createFromRules("source-target", testRules, UTRANS_FORWARD, pe, ec);
- Transliterator *t3 = Transliterator::createFromRules("target-source", testRules, UTRANS_REVERSE, pe, ec);
+ Transliterator *t2 = Transliterator::createFromRules("source-target", UnicodeString(testRules, -1, US_INV), UTRANS_FORWARD, pe, ec);
+ Transliterator *t3 = Transliterator::createFromRules("target-source", UnicodeString(testRules, -1, US_INV), UTRANS_REVERSE, pe, ec);
if (U_FAILURE(ec)) {
delete t2;
expect(*t2, source, target);
expect(*t3, target, source);
- checkRules("Failed toRules FORWARD", *t2, testRulesForward);
- checkRules("Failed toRules BACKWARD", *t3, testRulesBackward);
+ checkRules("Failed toRules FORWARD", *t2, UnicodeString(testRulesForward, -1, US_INV));
+ checkRules("Failed toRules BACKWARD", *t3, UnicodeString(testRulesBackward, -1, US_INV));
delete t2;
delete t3;
errln((UnicodeString)"FAIL: createInstance");
} else {
expect(*t,
- "\\x{40}\\U000000312Q",
+ UNICODE_STRING_SIMPLE("\\x{40}\\U000000312Q"),
"@12Q");
}
delete t;
} else {
expect(*t,
CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
- "\\u0041\\U0010BEEF\\uFEED");
+ UNICODE_STRING_SIMPLE("\\u0041\\U0010BEEF\\uFEED"));
}
delete t;
} else {
expect(*t,
CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
- "\\u0041\\uDBEF\\uDEEF\\uFEED");
+ UNICODE_STRING_SIMPLE("\\u0041\\uDBEF\\uDEEF\\uFEED"));
}
delete t;
} else {
expect(*t,
CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
- "\\x{41}\\x{10BEEF}\\x{FEED}");
+ UNICODE_STRING_SIMPLE("\\x{41}\\x{10BEEF}\\x{FEED}"));
}
delete t;
}
char* Char32ToEscapedChars(UChar32 ch, char* buffer) {
if (ch <= 0xFFFF) {
- sprintf(buffer, "\\u%04x", ch);
+ sprintf(buffer, "\\u%04x", (int)ch);
} else {
- sprintf(buffer, "\\u%08x", ch);
+ sprintf(buffer, "\\U%08x", (int)ch);
}
return buffer;
}
Transliterator::getAvailableVariant(k, source, target, variant);
UnicodeString id = source + "-" + target + "/" + variant;
-
+
Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, err, status);
if (U_FAILURE(status)) {
errln((UnicodeString)"FAIL: Could not create " + id);
}
expect(*t, "The Quick Brown Fox",
- "T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox");
+ UNICODE_STRING_SIMPLE("T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox"));
delete t;
}
Transliterator* _TUFF[4];
UnicodeString* _TUFID[4];
-static Transliterator* _TUFFactory(const UnicodeString& /*ID*/,
+static Transliterator* U_EXPORT2 _TUFFactory(const UnicodeString& /*ID*/,
Transliterator::Token context) {
return _TUFF[context.integer]->clone();
}
// There's no need to register inverses if we don't use them
t = Transliterator::createFromRules("gif",
- "'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';",
+ UNICODE_STRING_SIMPLE("'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';"),
UTRANS_FORWARD, pe, ec);
if (t == NULL || U_FAILURE(ec)) {
errln((UnicodeString)"FAIL: createFromRules gif " + u_errorName(ec));
_TUFReg("Any-gif", t, 0);
t = Transliterator::createFromRules("RemoveCurly",
- "[\\{\\}] > ; '\\N' > ;",
+ UNICODE_STRING_SIMPLE("[\\{\\}] > ; '\\N' > ;"),
UTRANS_FORWARD, pe, ec);
if (t == NULL || U_FAILURE(ec)) {
errln((UnicodeString)"FAIL: createFromRules RemoveCurly " + u_errorName(ec));
goto FAIL;
}
- expect(*t, "\\N{name}", "name");
+ expect(*t, UNICODE_STRING_SIMPLE("\\N{name}"), "name");
_TUFReg("Any-RemoveCurly", t, 1);
logln("Trying &hex");
errln((UnicodeString)"FAIL: createInstance Any-hex2 " + u_errorName(ec));
goto FAIL;
}
- expect(*t, "abc", "\\u0061\\u0062\\u0063");
+ expect(*t, "abc", UNICODE_STRING_SIMPLE("\\u0061\\u0062\\u0063"));
delete t;
logln("Trying &gif");
goto FAIL;
}
expect(*t, "abc",
- "\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C ");
+ UNICODE_STRING_SIMPLE("\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C "));
delete t;
FAIL:
"xbz");
expect(CharsToUnicodeString("([:^ASCII:]) \\u2192 \\u2206Name($1);"),
CharsToUnicodeString("<=\\u2190; >=\\u2192; <>=\\u2194; &=\\u2206"),
- "<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}");
+ UNICODE_STRING_SIMPLE("<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}"));
+}
+
+static const char* BEGIN_END_RULES[] = {
+ // [0]
+ "abc > xy;"
+ "aba > z;",
+
+ // [1]
+/*
+ "::BEGIN;"
+ "abc > xy;"
+ "::END;"
+ "::BEGIN;"
+ "aba > z;"
+ "::END;",
+*/
+ "", // test case commented out below, this is here to keep from messing up the indexes
+
+ // [2]
+/*
+ "abc > xy;"
+ "::BEGIN;"
+ "aba > z;"
+ "::END;",
+*/
+ "", // test case commented out below, this is here to keep from messing up the indexes
+
+ // [3]
+/*
+ "::BEGIN;"
+ "abc > xy;"
+ "::END;"
+ "aba > z;",
+*/
+ "", // test case commented out below, this is here to keep from messing up the indexes
+
+ // [4]
+ "abc > xy;"
+ "::Null;"
+ "aba > z;",
+
+ // [5]
+ "::Upper;"
+ "ABC > xy;"
+ "AB > x;"
+ "C > z;"
+ "::Upper;"
+ "XYZ > p;"
+ "XY > q;"
+ "Z > r;"
+ "::Upper;",
+
+ // [6]
+ "$ws = [[:Separator:][\\u0009-\\u000C]$];"
+ "$delim = [\\-$ws];"
+ "$ws $delim* > ' ';"
+ "'-' $delim* > '-';",
+
+ // [7]
+ "::Null;"
+ "$ws = [[:Separator:][\\u0009-\\u000C]$];"
+ "$delim = [\\-$ws];"
+ "$ws $delim* > ' ';"
+ "'-' $delim* > '-';",
+
+ // [8]
+ "$ws = [[:Separator:][\\u0009-\\u000C]$];"
+ "$delim = [\\-$ws];"
+ "$ws $delim* > ' ';"
+ "'-' $delim* > '-';"
+ "::Null;",
+
+ // [9]
+ "$ws = [[:Separator:][\\u0009-\\u000C]$];"
+ "$delim = [\\-$ws];"
+ "::Null;"
+ "$ws $delim* > ' ';"
+ "'-' $delim* > '-';",
+
+ // [10]
+/*
+ "::BEGIN;"
+ "$ws = [[:Separator:][\\u0009-\\u000C]$];"
+ "$delim = [\\-$ws];"
+ "::END;"
+ "$ws $delim* > ' ';"
+ "'-' $delim* > '-';",
+*/
+ "", // test case commented out below, this is here to keep from messing up the indexes
+
+ // [11]
+/*
+ "$ws = [[:Separator:][\\u0009-\\u000C]$];"
+ "$delim = [\\-$ws];"
+ "::BEGIN;"
+ "$ws $delim* > ' ';"
+ "'-' $delim* > '-';"
+ "::END;",
+*/
+ "", // test case commented out below, this is here to keep from messing up the indexes
+
+ // [12]
+/*
+ "$ws = [[:Separator:][\\u0009-\\u000C]$];"
+ "$delim = [\\-$ws];"
+ "$ab = [ab];"
+ "::BEGIN;"
+ "$ws $delim* > ' ';"
+ "'-' $delim* > '-';"
+ "::END;"
+ "::BEGIN;"
+ "$ab { ' ' } $ab > '-';"
+ "c { ' ' > ;"
+ "::END;"
+ "::BEGIN;"
+ "'a-a' > a\\%|a;"
+ "::END;",
+*/
+ "", // test case commented out below, this is here to keep from messing up the indexes
+
+ // [13]
+ "$ws = [[:Separator:][\\u0009-\\u000C]$];"
+ "$delim = [\\-$ws];"
+ "$ab = [ab];"
+ "::Null;"
+ "$ws $delim* > ' ';"
+ "'-' $delim* > '-';"
+ "::Null;"
+ "$ab { ' ' } $ab > '-';"
+ "c { ' ' > ;"
+ "::Null;"
+ "'a-a' > a\\%|a;",
+
+ // [14]
+/*
+ "::[abc];"
+ "::BEGIN;"
+ "abc > xy;"
+ "::END;"
+ "::BEGIN;"
+ "aba > yz;"
+ "::END;"
+ "::Upper;",
+*/
+ "", // test case commented out below, this is here to keep from messing up the indexes
+
+ // [15]
+ "::[abc];"
+ "abc > xy;"
+ "::Null;"
+ "aba > yz;"
+ "::Upper;",
+
+ // [16]
+/*
+ "::[abc];"
+ "::BEGIN;"
+ "abc <> xy;"
+ "::END;"
+ "::BEGIN;"
+ "aba <> yz;"
+ "::END;"
+ "::Upper(Lower);"
+ "::([XYZ]);"
+*/
+ "", // test case commented out below, this is here to keep from messing up the indexes
+
+ // [17]
+ "::[abc];"
+ "abc <> xy;"
+ "::Null;"
+ "aba <> yz;"
+ "::Upper(Lower);"
+ "::([XYZ]);"
+};
+static const int32_t BEGIN_END_RULES_length = (int32_t)(sizeof(BEGIN_END_RULES) / sizeof(BEGIN_END_RULES[0]));
+
+/*
+(This entire test is commented out below and will need some heavy revision when we re-add
+the ::BEGIN/::END stuff)
+static const char* BOGUS_BEGIN_END_RULES[] = {
+ // [7]
+ "::BEGIN;"
+ "abc > xy;"
+ "::BEGIN;"
+ "aba > z;"
+ "::END;"
+ "::END;",
+
+ // [8]
+ "abc > xy;"
+ " aba > z;"
+ "::END;",
+
+ // [9]
+ "::BEGIN;"
+ "::Upper;"
+ "::END;"
+};
+static const int32_t BOGUS_BEGIN_END_RULES_length = (int32_t)(sizeof(BOGUS_BEGIN_END_RULES) / sizeof(BOGUS_BEGIN_END_RULES[0]));
+*/
+
+static const char* BEGIN_END_TEST_CASES[] = {
+ // rules input expected output
+ BEGIN_END_RULES[0], "abc ababc aba", "xy zbc z",
+// BEGIN_END_RULES[1], "abc ababc aba", "xy abxy z",
+// BEGIN_END_RULES[2], "abc ababc aba", "xy abxy z",
+// BEGIN_END_RULES[3], "abc ababc aba", "xy abxy z",
+ BEGIN_END_RULES[4], "abc ababc aba", "xy abxy z",
+ BEGIN_END_RULES[5], "abccabaacababcbc", "PXAARXQBR",
+
+ BEGIN_END_RULES[6], "e e - e---e- e", "e e e-e-e",
+ BEGIN_END_RULES[7], "e e - e---e- e", "e e e-e-e",
+ BEGIN_END_RULES[8], "e e - e---e- e", "e e e-e-e",
+ BEGIN_END_RULES[9], "e e - e---e- e", "e e e-e-e",
+// BEGIN_END_RULES[10], "e e - e---e- e", "e e e-e-e",
+// BEGIN_END_RULES[11], "e e - e---e- e", "e e e-e-e",
+// BEGIN_END_RULES[12], "e e - e---e- e", "e e e-e-e",
+// BEGIN_END_RULES[12], "a a a a", "a%a%a%a",
+// BEGIN_END_RULES[12], "a a-b c b a", "a%a-b cb-a",
+ BEGIN_END_RULES[13], "e e - e---e- e", "e e e-e-e",
+ BEGIN_END_RULES[13], "a a a a", "a%a%a%a",
+ BEGIN_END_RULES[13], "a a-b c b a", "a%a-b cb-a",
+
+// BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
+ BEGIN_END_RULES[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
+// BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
+ BEGIN_END_RULES[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ"
+};
+static const int32_t BEGIN_END_TEST_CASES_length = (int32_t)(sizeof(BEGIN_END_TEST_CASES) / sizeof(BEGIN_END_TEST_CASES[0]));
+
+void TransliteratorTest::TestBeginEnd() {
+ // run through the list of test cases above
+ int32_t i = 0;
+ for (i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
+ expect((UnicodeString)"Test case #" + (i / 3),
+ UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
+ UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
+ UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
+ }
+
+ // instantiate the one reversible rule set in the reverse direction and make sure it does the right thing
+ UParseError parseError;
+ UErrorCode status = U_ZERO_ERROR;
+ Transliterator* reversed = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
+ UTRANS_REVERSE, parseError, status);
+ if (reversed == 0 || U_FAILURE(status)) {
+ reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
+ } else {
+ expect(*reversed, UnicodeString("xy XY XYZ yz YZ"), UnicodeString("xy abc xaba yz aba"));
+ }
+ delete reversed;
+
+ // finally, run through the list of syntactically-ill-formed rule sets above and make sure
+ // that all of them cause errors
+/*
+(commented out until we have the real ::BEGIN/::END stuff in place
+ for (i = 0; i < BOGUS_BEGIN_END_RULES_length; i++) {
+ UParseError parseError;
+ UErrorCode status = U_ZERO_ERROR;
+ Transliterator* t = Transliterator::createFromRules("foo", UnicodeString(BOGUS_BEGIN_END_RULES[i]),
+ UTRANS_FORWARD, parseError, status);
+ if (!U_FAILURE(status)) {
+ delete t;
+ errln((UnicodeString)"Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]);
+ }
+ }
+*/
}
+void TransliteratorTest::TestBeginEndToRules() {
+ // run through the same list of test cases we used above, but this time, instead of just
+ // instantiating a Transliterator from the rules and running the test against it, we instantiate
+ // a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from
+ // the resulting set of rules, and make sure that the generated rule set is semantically equivalent
+ // to (i.e., does the same thing as) the original rule set
+ for (int32_t i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
+ UParseError parseError;
+ UErrorCode status = U_ZERO_ERROR;
+ Transliterator* t = Transliterator::createFromRules("--", UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
+ UTRANS_FORWARD, parseError, status);
+ if (U_FAILURE(status)) {
+ reportParseError(UnicodeString("FAIL: Couldn't create transliterator"), parseError, status);
+ } else {
+ UnicodeString rules;
+ t->toRules(rules, TRUE);
+ Transliterator* t2 = Transliterator::createFromRules((UnicodeString)"Test case #" + (i / 3), rules,
+ UTRANS_FORWARD, parseError, status);
+ if (U_FAILURE(status)) {
+ reportParseError(UnicodeString("FAIL: Couldn't create transliterator from generated rules"),
+ parseError, status);
+ delete t;
+ } else {
+ expect(*t2,
+ UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
+ UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
+ delete t;
+ delete t2;
+ }
+ }
+ }
+
+ // do the same thing for the reversible test case
+ UParseError parseError;
+ UErrorCode status = U_ZERO_ERROR;
+ Transliterator* reversed = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
+ UTRANS_REVERSE, parseError, status);
+ if (U_FAILURE(status)) {
+ reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
+ } else {
+ UnicodeString rules;
+ reversed->toRules(rules, FALSE);
+ Transliterator* reversed2 = Transliterator::createFromRules("Reversed", rules, UTRANS_FORWARD,
+ parseError, status);
+ if (U_FAILURE(status)) {
+ reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator from generated rules"),
+ parseError, status);
+ delete reversed;
+ } else {
+ expect(*reversed2,
+ UnicodeString("xy XY XYZ yz YZ"),
+ UnicodeString("xy abc xaba yz aba"));
+ delete reversed;
+ delete reversed2;
+ }
+ }
+}
+
+void TransliteratorTest::TestRegisterAlias() {
+ UnicodeString longID("Lower;[aeiou]Upper");
+ UnicodeString shortID("Any-CapVowels");
+ UnicodeString reallyShortID("CapVowels");
+
+ Transliterator::registerAlias(shortID, longID);
+
+ UErrorCode err = U_ZERO_ERROR;
+ Transliterator* t1 = Transliterator::createInstance(longID, UTRANS_FORWARD, err);
+ if (U_FAILURE(err)) {
+ errln("Failed to instantiate transliterator with long ID");
+ Transliterator::unregister(shortID);
+ return;
+ }
+ Transliterator* t2 = Transliterator::createInstance(reallyShortID, UTRANS_FORWARD, err);
+ if (U_FAILURE(err)) {
+ errln("Failed to instantiate transliterator with short ID");
+ delete t1;
+ Transliterator::unregister(shortID);
+ return;
+ }
+
+ if (t1->getID() != longID)
+ errln("Transliterator instantiated with long ID doesn't have long ID");
+ if (t2->getID() != reallyShortID)
+ errln("Transliterator instantiated with short ID doesn't have short ID");
+
+ UnicodeString rules1;
+ UnicodeString rules2;
+
+ t1->toRules(rules1, TRUE);
+ t2->toRules(rules2, TRUE);
+ if (rules1 != rules2)
+ errln("Alias transliterators aren't the same");
+
+ delete t1;
+ delete t2;
+ Transliterator::unregister(shortID);
+
+ t1 = Transliterator::createInstance(shortID, UTRANS_FORWARD, err);
+ if (U_SUCCESS(err)) {
+ errln("Instantiation with short ID succeeded after short ID was unregistered");
+ delete t1;
+ }
+
+ // try the same thing again, but this time with something other than
+ // an instance of CompoundTransliterator
+ UnicodeString realID("Latin-Greek");
+ UnicodeString fakeID("Latin-dlgkjdflkjdl");
+ Transliterator::registerAlias(fakeID, realID);
+
+ err = U_ZERO_ERROR;
+ t1 = Transliterator::createInstance(realID, UTRANS_FORWARD, err);
+ if (U_FAILURE(err)) {
+ errln("Failed to instantiate transliterator with real ID");
+ Transliterator::unregister(realID);
+ return;
+ }
+ t2 = Transliterator::createInstance(fakeID, UTRANS_FORWARD, err);
+ if (U_FAILURE(err)) {
+ errln("Failed to instantiate transliterator with fake ID");
+ delete t1;
+ Transliterator::unregister(realID);
+ return;
+ }
+
+ t1->toRules(rules1, TRUE);
+ t2->toRules(rules2, TRUE);
+ if (rules1 != rules2)
+ errln("Alias transliterators aren't the same");
+
+ delete t1;
+ delete t2;
+ Transliterator::unregister(fakeID);
+}
+
+void TransliteratorTest::TestRuleStripping() {
+ /*
+#
+\uE001>\u0C01; # SIGN
+ */
+ static const UChar rule[] = {
+ 0x0023,0x0020,0x000D,0x000A,
+ 0xE001,0x003E,0x0C01,0x003B,0x0020,0x0023,0x0020,0x0053,0x0049,0x0047,0x004E,0
+ };
+ static const UChar expectedRule[] = {
+ 0xE001,0x003E,0x0C01,0x003B,0
+ };
+ UChar result[sizeof(rule)/sizeof(rule[0])];
+ UErrorCode status = U_ZERO_ERROR;
+ int32_t len = utrans_stripRules(rule, (int32_t)(sizeof(rule)/sizeof(rule[0])), result, &status);
+ if (len != u_strlen(expectedRule)) {
+ errln("utrans_stripRules return len = %d", len);
+ }
+ if (u_strncmp(expectedRule, result, len) != 0) {
+ errln("utrans_stripRules did not return expected string");
+ }
+}
+
+/**
+ * Test the Halfwidth-Fullwidth transliterator (ticket 6281).
+ */
+void TransliteratorTest::TestHalfwidthFullwidth(void) {
+ UParseError parseError;
+ UErrorCode status = U_ZERO_ERROR;
+ Transliterator* hf = Transliterator::createInstance("Halfwidth-Fullwidth", UTRANS_FORWARD, parseError, status);
+ Transliterator* fh = Transliterator::createInstance("Fullwidth-Halfwidth", UTRANS_FORWARD, parseError, status);
+ if (hf == 0 || fh == 0) {
+ errln("FAIL: createInstance failed");
+ delete hf;
+ delete fh;
+ return;
+ }
+
+ // Array of 2n items
+ // Each item is
+ // "hf"|"fh"|"both",
+ // <Halfwidth>,
+ // <Fullwidth>
+ const char* DATA[] = {
+ "both",
+ "\\uFFE9\\uFFEA\\uFFEB\\uFFEC\\u0061\\uFF71\\u00AF\\u0020",
+ "\\u2190\\u2191\\u2192\\u2193\\uFF41\\u30A2\\uFFE3\\u3000",
+ };
+ int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
+
+ for (int32_t i=0; i<DATA_length; i+=3) {
+ UnicodeString h = CharsToUnicodeString(DATA[i+1]);
+ UnicodeString f = CharsToUnicodeString(DATA[i+2]);
+ switch (*DATA[i]) {
+ case 0x68: //'h': // Halfwidth-Fullwidth only
+ expect(*hf, h, f);
+ break;
+ case 0x66: //'f': // Fullwidth-Halfwidth only
+ expect(*fh, f, h);
+ break;
+ case 0x62: //'b': // both directions
+ expect(*hf, h, f);
+ expect(*fh, f, h);
+ break;
+ }
+ }
+ delete hf;
+ delete fh;
+}
+
+
+ /**
+ * Test Thai. The text is the first paragraph of "What is Unicode" from the Unicode.org web site.
+ * TODO: confirm that the expected results are correct.
+ * For now, test just confirms that C++ and Java give identical results.
+ */
+void TransliteratorTest::TestThai(void) {
+ UParseError parseError;
+ UErrorCode status = U_ZERO_ERROR;
+ Transliterator* tr = Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
+ if (tr == 0) {
+ errln("FAIL: createInstance failed");
+ return;
+ }
+ if (U_FAILURE(status)) {
+ errln("FAIL: createInstance failed with %s", u_errorName(status));
+ return;
+ }
+ const char *thaiText =
+ "\\u0e42\\u0e14\\u0e22\\u0e1e\\u0e37\\u0e49\\u0e19\\u0e10\\u0e32\\u0e19\\u0e41\\u0e25\\u0e49\\u0e27, \\u0e04\\u0e2d"
+ "\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d\\u0e23\\u0e4c\\u0e08\\u0e30\\u0e40\\u0e01\\u0e35\\u0e48\\u0e22"
+ "\\u0e27\\u0e02\\u0e49\\u0e2d\\u0e07\\u0e01\\u0e31\\u0e1a\\u0e40\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e02\\u0e2d"
+ "\\u0e07\\u0e15\\u0e31\\u0e27\\u0e40\\u0e25\\u0e02. \\u0e04\\u0e2d\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d"
+ "\\u0e23\\u0e4c\\u0e08\\u0e31\\u0e14\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29"
+ "\\u0e23\\u0e41\\u0e25\\u0e30\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30\\u0e2d\\u0e37\\u0e48\\u0e19\\u0e46 \\u0e42"
+ "\\u0e14\\u0e22\\u0e01\\u0e32\\u0e23\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25"
+ "\\u0e02\\u0e43\\u0e2b\\u0e49\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e41\\u0e15\\u0e48\\u0e25\\u0e30\\u0e15"
+ "\\u0e31\\u0e27. \\u0e01\\u0e48\\u0e2d\\u0e19\\u0e2b\\u0e19\\u0e49\\u0e32\\u0e17\\u0e35\\u0e48\\u0e4a Unicode \\u0e08"
+ "\\u0e30\\u0e16\\u0e39\\u0e01\\u0e2a\\u0e23\\u0e49\\u0e32\\u0e07\\u0e02\\u0e36\\u0e49\\u0e19, \\u0e44\\u0e14\\u0e49"
+ "\\u0e21\\u0e35\\u0e23\\u0e30\\u0e1a\\u0e1a encoding \\u0e2d\\u0e22\\u0e39\\u0e48\\u0e2b\\u0e25\\u0e32\\u0e22\\u0e23"
+ "\\u0e49\\u0e2d\\u0e22\\u0e23\\u0e30\\u0e1a\\u0e1a\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e01\\u0e32\\u0e23"
+ "\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25\\u0e02\\u0e40\\u0e2b\\u0e25\\u0e48"
+ "\\u0e32\\u0e19\\u0e35\\u0e49. \\u0e44\\u0e21\\u0e48\\u0e21\\u0e35 encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48"
+ "\\u0e21\\u0e35\\u0e08\\u0e33\\u0e19\\u0e27\\u0e19\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30"
+ "\\u0e21\\u0e32\\u0e01\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d: \\u0e22\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d"
+ "\\u0e22\\u0e48\\u0e32\\u0e07\\u0e40\\u0e0a\\u0e48\\u0e19, \\u0e40\\u0e09\\u0e1e\\u0e32\\u0e30\\u0e43\\u0e19\\u0e01"
+ "\\u0e25\\u0e38\\u0e48\\u0e21\\u0e2a\\u0e2b\\u0e20\\u0e32\\u0e1e\\u0e22\\u0e38\\u0e42\\u0e23\\u0e1b\\u0e40\\u0e1e"
+ "\\u0e35\\u0e22\\u0e07\\u0e41\\u0e2b\\u0e48\\u0e07\\u0e40\\u0e14\\u0e35\\u0e22\\u0e27 \\u0e01\\u0e47\\u0e15\\u0e49"
+ "\\u0e2d\\u0e07\\u0e01\\u0e32\\u0e23\\u0e2b\\u0e25\\u0e32\\u0e22 encoding \\u0e43\\u0e19\\u0e01\\u0e32\\u0e23\\u0e04"
+ "\\u0e23\\u0e2d\\u0e1a\\u0e04\\u0e25\\u0e38\\u0e21\\u0e17\\u0e38\\u0e01\\u0e20\\u0e32\\u0e29\\u0e32\\u0e43\\u0e19"
+ "\\u0e01\\u0e25\\u0e38\\u0e48\\u0e21. \\u0e2b\\u0e23\\u0e37\\u0e2d\\u0e41\\u0e21\\u0e49\\u0e41\\u0e15\\u0e48\\u0e43"
+ "\\u0e19\\u0e20\\u0e32\\u0e29\\u0e32\\u0e40\\u0e14\\u0e35\\u0e48\\u0e22\\u0e27 \\u0e40\\u0e0a\\u0e48\\u0e19 \\u0e20"
+ "\\u0e32\\u0e29\\u0e32\\u0e2d\\u0e31\\u0e07\\u0e01\\u0e24\\u0e29 \\u0e01\\u0e47\\u0e44\\u0e21\\u0e48\\u0e21\\u0e35"
+ " encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d\\u0e2a\\u0e33\\u0e2b"
+ "\\u0e23\\u0e31\\u0e1a\\u0e17\\u0e38\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29\\u0e23, \\u0e40\\u0e04"
+ "\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e27\\u0e23\\u0e23\\u0e04\\u0e15\\u0e2d\\u0e19"
+ " \\u0e41\\u0e25\\u0e30\\u0e2a\\u0e31\\u0e0d\\u0e25\\u0e31\\u0e01\\u0e29\\u0e13\\u0e4c\\u0e17\\u0e32\\u0e07\\u0e40"
+ "\\u0e17\\u0e04\\u0e19\\u0e34\\u0e04\\u0e17\\u0e35\\u0e48\\u0e43\\u0e0a\\u0e49\\u0e01\\u0e31\\u0e19\\u0e2d\\u0e22"
+ "\\u0e39\\u0e48\\u0e17\\u0e31\\u0e48\\u0e27\\u0e44\\u0e1b.";
+
+ const char *latinText =
+ "doy ph\\u1ee5\\u0304\\u0302n \\u1e6d\\u0304h\\u0101n l\\u00e6\\u0302w, khxmphiwtexr\\u0312 ca ke\\u012b\\u0300"
+ "ywk\\u0304\\u0125xng k\\u1ea1b re\\u1ee5\\u0304\\u0300xng k\\u0304hxng t\\u1ea1wlek\\u0304h. khxmphiwtexr"
+ "\\u0312 c\\u1ea1d k\\u0115b t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r l\\u00e6a x\\u1ea1kk\\u0304h ra x\\u1ee5\\u0304"
+ "\\u0300n\\u00ab doy k\\u0101r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304\\u0131\\u0302 s\\u0304"
+ "\\u1ea3h\\u0304r\\u1ea1b t\\u00e6\\u0300la t\\u1ea1w. k\\u0300xn h\\u0304n\\u0302\\u0101 th\\u012b\\u0300\\u0301"
+ " Unicode ca t\\u0304h\\u016bk s\\u0304r\\u0302\\u0101ng k\\u0304h\\u1ee5\\u0302n, d\\u1ecb\\u0302 m\\u012b "
+ "rabb encoding xy\\u016b\\u0300 h\\u0304l\\u0101y r\\u0302xy rabb s\\u0304\\u1ea3h\\u0304r\\u1ea1b k\\u0101"
+ "r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304el\\u0300\\u0101 n\\u012b\\u0302. m\\u1ecb\\u0300m"
+ "\\u012b encoding d\\u0131 th\\u012b\\u0300 m\\u012b c\\u1ea3nwn t\\u1ea1w x\\u1ea1kk\\u0304hra m\\u0101k p"
+ "he\\u012byng phx: yk t\\u1ea1wx\\u1ef3\\u0101ng ch\\u00e8n, c\\u0304heph\\u0101a n\\u0131 kl\\u00f9m s\\u0304"
+ "h\\u0304p\\u0323h\\u0101ph yurop phe\\u012byng h\\u0304\\u00e6\\u0300ng de\\u012byw k\\u0306 t\\u0302xngk\\u0101"
+ "r h\\u0304l\\u0101y encoding n\\u0131 k\\u0101r khrxbkhlum thuk p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 n\\u0131"
+ " kl\\u00f9m. h\\u0304r\\u1ee5\\u0304x m\\u00e6\\u0302t\\u00e6\\u0300 n\\u0131 p\\u0323h\\u0101s\\u0304\\u02b9"
+ "\\u0101 de\\u012b\\u0300yw ch\\u00e8n p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 x\\u1ea1ngkvs\\u0304\\u02b9 k\\u0306"
+ " m\\u1ecb\\u0300m\\u012b encoding d\\u0131 th\\u012b\\u0300 phe\\u012byng phx s\\u0304\\u1ea3h\\u0304r\\u1ea1"
+ "b thuk t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r, kher\\u1ee5\\u0304\\u0300xngh\\u0304m\\u0101y wrrkh txn l\\u00e6"
+ "a s\\u0304\\u1ea1\\u1ef5l\\u1ea1ks\\u0304\\u02b9\\u1e47\\u0312 th\\u0101ng thekhnikh th\\u012b\\u0300 ch\\u0131"
+ "\\u0302 k\\u1ea1n xy\\u016b\\u0300 th\\u1ea1\\u0300wp\\u1ecb.";
+
+
+ UnicodeString xlitText(thaiText);
+ xlitText = xlitText.unescape();
+ tr->transliterate(xlitText);
+
+ UnicodeString expectedText(latinText);
+ expectedText = expectedText.unescape();
+ expect(*tr, xlitText, expectedText);
+
+ delete tr;
+}
+
+
//======================================================================
// Support methods
//======================================================================
delete t;
}
+void TransliteratorTest::reportParseError(const UnicodeString& message,
+ const UParseError& parseError,
+ const UErrorCode& status) {
+ errln(message +
+ /*", parse error " + parseError.code +*/
+ ", line " + parseError.line +
+ ", offset " + parseError.offset +
+ ", pre-context " + prettify(parseError.preContext, TRUE) +
+ ", post-context " + prettify(parseError.postContext,TRUE) +
+ ", Error: " + u_errorName(status));
+}
+
void TransliteratorTest::expect(const UnicodeString& rules,
const UnicodeString& source,
const UnicodeString& expectedResult,
UTransPosition *pos) {
+ expect("<ID>", rules, source, expectedResult, pos);
+}
+
+void TransliteratorTest::expect(const UnicodeString& id,
+ const UnicodeString& rules,
+ const UnicodeString& source,
+ const UnicodeString& expectedResult,
+ UTransPosition *pos) {
UErrorCode status = U_ZERO_ERROR;
- Transliterator *t = new RuleBasedTransliterator("<ID>", rules, status);
+ UParseError parseError;
+ Transliterator* t = Transliterator::createFromRules(id, rules, UTRANS_FORWARD, parseError, status);
if (U_FAILURE(status)) {
- errln("FAIL: Transliterator constructor failed");
+ reportParseError(UnicodeString("Couldn't create transliterator from ") + rules, parseError, status);
} else {
expect(*t, source, expectedResult, pos);
}
t.transliterate(result);
expectAux(t.getID() + ":String", source, result, expectedResult);
}
-
UTransPosition index={0, 0, 0, 0};
if (pos != 0) {
index = *pos;