git.saurik.com Git - apple/icu.git/blame_incremental - icuSources/test/intltest/transtst.cpp

... / ...

Commit	Line	Data
	1	/*
	2	**********************************************************************
	3	* Copyright (C) 1999-2011, International Business Machines
	4	* Corporation and others. All Rights Reserved.
	5	**********************************************************************
	6	* Date Name Description
	7	* 11/10/99 aliu Creation.
	8	**********************************************************************
	9	*/
	10
	11	#include "unicode/utypes.h"
	12
	13	#if !UCONFIG_NO_TRANSLITERATION
	14
	15	#include "transtst.h"
	16	#include "unicode/locid.h"
	17	#include "unicode/dtfmtsym.h"
	18	#include "unicode/normlzr.h"
	19	#include "unicode/translit.h"
	20	#include "unicode/uchar.h"
	21	#include "unicode/unifilt.h"
	22	#include "unicode/uniset.h"
	23	#include "unicode/ustring.h"
	24	#include "unicode/usetiter.h"
	25	#include "unicode/uscript.h"
	26	#include "unicode/utf16.h"
	27	#include "cpdtrans.h"
	28	#include "nultrans.h"
	29	#include "rbt.h"
	30	#include "rbt_pars.h"
	31	#include "anytrans.h"
	32	#include "esctrn.h"
	33	#include "name2uni.h"
	34	#include "nortrans.h"
	35	#include "remtrans.h"
	36	#include "titletrn.h"
	37	#include "tolowtrn.h"
	38	#include "toupptrn.h"
	39	#include "unesctrn.h"
	40	#include "uni2name.h"
	41	#include "cstring.h"
	42	#include "cmemory.h"
	43	#include <stdio.h>
	44
	45	/***********************************************************************
	46
	47	HOW TO USE THIS TEST FILE
	48	-or-
	49	How I developed on two platforms
	50	without losing (too much of) my mind
	51
	52
	53	1. Add new tests by copying/pasting/changing existing tests. On Java,
	54	any public void method named Test...() taking no parameters becomes
	55	a test. On C++, you need to modify the header and add a line to
	56	the runIndexedTest() dispatch method.
	57
	58	2. Make liberal use of the expect() method; it is your friend.
	59
	60	3. The tests in this file exactly match those in a sister file on the
	61	other side. The two files are:
	62
	63	icu4j: src/com/ibm/test/translit/TransliteratorTest.java
	64	icu4c: source/test/intltest/transtst.cpp
	65
	66	==> THIS IS THE IMPORTANT PART <==
	67
	68	When you add a test in this file, add it in TransliteratorTest.java
	69	too. Give it the same name and put it in the same relative place.
	70	This makes maintenance a lot simpler for any poor soul who ends up
	71	trying to synchronize the tests between icu4j and icu4c.
	72
	73	4. If you MUST enter a test that is NOT paralleled in the sister file,
	74	then add it in the special non-mirrored section. These are
	75	labeled
	76
	77	"icu4j ONLY"
	78
	79	or
	80
	81	"icu4c ONLY"
	82
	83	Make sure you document the reason the test is here and not there.
	84
	85
	86	Thank you.
	87	The Management
	88	***********************************************************************/
	89
	90	// Define character constants thusly to be EBCDIC-friendly
	91	enum {
	92	LEFT_BRACE=((UChar)0x007B), /{/
	93	PIPE =((UChar)0x007C), /\|/
	94	ZERO =((UChar)0x0030), /0/
	95	UPPER_A =((UChar)0x0041) /A/
	96	};
	97
	98	TransliteratorTest::TransliteratorTest()
	99	: DESERET_DEE((UChar32)0x10414),
	100	DESERET_dee((UChar32)0x1043C)
	101	{
	102	}
	103
	104	TransliteratorTest::~TransliteratorTest() {}
	105
	106	void
	107	TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
	108	const char* &name, char* /par/) {
	109	switch (index) {
	110	TESTCASE(0,TestInstantiation);
	111	TESTCASE(1,TestSimpleRules);
	112	TESTCASE(2,TestRuleBasedInverse);
	113	TESTCASE(3,TestKeyboard);
	114	TESTCASE(4,TestKeyboard2);
	115	TESTCASE(5,TestKeyboard3);
	116	TESTCASE(6,TestArabic);
	117	TESTCASE(7,TestCompoundKana);
	118	TESTCASE(8,TestCompoundHex);
	119	TESTCASE(9,TestFiltering);
	120	TESTCASE(10,TestInlineSet);
	121	TESTCASE(11,TestPatternQuoting);
	122	TESTCASE(12,TestJ277);
	123	TESTCASE(13,TestJ243);
	124	TESTCASE(14,TestJ329);
	125	TESTCASE(15,TestSegments);
	126	TESTCASE(16,TestCursorOffset);
	127	TESTCASE(17,TestArbitraryVariableValues);
	128	TESTCASE(18,TestPositionHandling);
	129	TESTCASE(19,TestHiraganaKatakana);
	130	TESTCASE(20,TestCopyJ476);
	131	TESTCASE(21,TestAnchors);
	132	TESTCASE(22,TestInterIndic);
	133	TESTCASE(23,TestFilterIDs);
	134	TESTCASE(24,TestCaseMap);
	135	TESTCASE(25,TestNameMap);
	136	TESTCASE(26,TestLiberalizedID);
	137	TESTCASE(27,TestCreateInstance);
	138	TESTCASE(28,TestNormalizationTransliterator);
	139	TESTCASE(29,TestCompoundRBT);
	140	TESTCASE(30,TestCompoundFilter);
	141	TESTCASE(31,TestRemove);
	142	TESTCASE(32,TestToRules);
	143	TESTCASE(33,TestContext);
	144	TESTCASE(34,TestSupplemental);
	145	TESTCASE(35,TestQuantifier);
	146	TESTCASE(36,TestSTV);
	147	TESTCASE(37,TestCompoundInverse);
	148	TESTCASE(38,TestNFDChainRBT);
	149	TESTCASE(39,TestNullInverse);
	150	TESTCASE(40,TestAliasInverseID);
	151	TESTCASE(41,TestCompoundInverseID);
	152	TESTCASE(42,TestUndefinedVariable);
	153	TESTCASE(43,TestEmptyContext);
	154	TESTCASE(44,TestCompoundFilterID);
	155	TESTCASE(45,TestPropertySet);
	156	TESTCASE(46,TestNewEngine);
	157	TESTCASE(47,TestQuantifiedSegment);
	158	TESTCASE(48,TestDevanagariLatinRT);
	159	TESTCASE(49,TestTeluguLatinRT);
	160	TESTCASE(50,TestCompoundLatinRT);
	161	TESTCASE(51,TestSanskritLatinRT);
	162	TESTCASE(52,TestLocaleInstantiation);
	163	TESTCASE(53,TestTitleAccents);
	164	TESTCASE(54,TestLocaleResource);
	165	TESTCASE(55,TestParseError);
	166	TESTCASE(56,TestOutputSet);
	167	TESTCASE(57,TestVariableRange);
	168	TESTCASE(58,TestInvalidPostContext);
	169	TESTCASE(59,TestIDForms);
	170	TESTCASE(60,TestToRulesMark);
	171	TESTCASE(61,TestEscape);
	172	TESTCASE(62,TestAnchorMasking);
	173	TESTCASE(63,TestDisplayName);
	174	TESTCASE(64,TestSpecialCases);
	175	#if !UCONFIG_NO_FILE_IO
	176	TESTCASE(65,TestIncrementalProgress);
	177	#endif
	178	TESTCASE(66,TestSurrogateCasing);
	179	TESTCASE(67,TestFunction);
	180	TESTCASE(68,TestInvalidBackRef);
	181	TESTCASE(69,TestMulticharStringSet);
	182	TESTCASE(70,TestUserFunction);
	183	TESTCASE(71,TestAnyX);
	184	TESTCASE(72,TestSourceTargetSet);
	185	TESTCASE(73,TestGurmukhiDevanagari);
	186	TESTCASE(74,TestPatternWhiteSpace);
	187	TESTCASE(75,TestAllCodepoints);
	188	TESTCASE(76,TestBoilerplate);
	189	TESTCASE(77,TestAlternateSyntax);
	190	TESTCASE(78,TestBeginEnd);
	191	TESTCASE(79,TestBeginEndToRules);
	192	TESTCASE(80,TestRegisterAlias);
	193	TESTCASE(81,TestRuleStripping);
	194	TESTCASE(82,TestHalfwidthFullwidth);
	195	TESTCASE(83,TestThai);
	196	TESTCASE(84,TestAny);
	197	default: name = ""; break;
	198	}
	199	}
	200
	201	static const UVersionInfo ICU_39 = {3,9,4,0};
	202	/**
	203	* Make sure every system transliterator can be instantiated.
	204	*
	205	* ALSO test that the result of toRules() for each rule is a valid
	206	* rule. Do this here so we don't have to have another test that
	207	* instantiates everything as well.
	208	*/
	209	void TransliteratorTest::TestInstantiation() {
	210	UErrorCode ec = U_ZERO_ERROR;
	211	StringEnumeration* avail = Transliterator::getAvailableIDs(ec);
	212	assertSuccess("getAvailableIDs()", ec);
	213	assertTrue("getAvailableIDs()!=NULL", avail!=NULL);
	214	int32_t n = Transliterator::countAvailableIDs();
	215	assertTrue("getAvailableIDs().count()==countAvailableIDs()",
	216	avail->count(ec) == n);
	217	assertSuccess("count()", ec);
	218	UnicodeString name;
	219	for (int32_t i=0; i<n; ++i) {
	220	const UnicodeString& id = *avail->snext(ec);
	221	if (!assertSuccess("snext()", ec) \|\|
	222	!assertTrue("snext()!=NULL", (&id)!=NULL, TRUE)) {
	223	break;
	224	}
	225	UnicodeString id2 = Transliterator::getAvailableID(i);
	226	if (id.length() < 1) {
	227	errln(UnicodeString("FAIL: getAvailableID(") +
	228	i + ") returned empty string");
	229	continue;
	230	}
	231	if (id != id2) {
	232	errln(UnicodeString("FAIL: getAvailableID(") +
	233	i + ") != getAvailableIDs().snext()");
	234	continue;
	235	}
	236	UParseError parseError;
	237	UErrorCode status = U_ZERO_ERROR;
	238	Transliterator* t = Transliterator::createInstance(id,
	239	UTRANS_FORWARD, parseError,status);
	240	name.truncate(0);
	241	Transliterator::getDisplayName(id, name);
	242	if (t == 0) {
	243	#if UCONFIG_NO_BREAK_ITERATION
	244	// If UCONFIG_NO_BREAK_ITERATION is on, then only Thai should fail.
	245	if (id.compare((UnicodeString)"Thai-Latin") != 0)
	246	#endif
	247	dataerrln(UnicodeString("FAIL: Couldn't create ") + id +
	248	/", parse error " + parseError.code +/
	249	", line " + parseError.line +
	250	", offset " + parseError.offset +
	251	", pre-context " + prettify(parseError.preContext, TRUE) +
	252	", post-context " +prettify(parseError.postContext,TRUE) +
	253	", Error: " + u_errorName(status));
	254	// When createInstance fails, it deletes the failing
	255	// entry from the available ID list. We detect this
	256	// here by looking for a change in countAvailableIDs.
	257	int32_t nn = Transliterator::countAvailableIDs();
	258	if (nn == (n - 1)) {
	259	n = nn;
	260	--i; // Compensate for deleted entry
	261	}
	262	} else {
	263	logln(UnicodeString("OK: ") + name + " (" + id + ")");
	264
	265	// Now test toRules
	266	UnicodeString rules;
	267	t->toRules(rules, TRUE);
	268	Transliterator *u = Transliterator::createFromRules("x",
	269	rules, UTRANS_FORWARD, parseError,status);
	270	if (u == 0) {
	271	errln(UnicodeString("FAIL: ") + id +
	272	".createFromRules() => bad rules" +
	273	/", parse error " + parseError.code +/
	274	", line " + parseError.line +
	275	", offset " + parseError.offset +
	276	", context " + prettify(parseError.preContext, TRUE) +
	277	", rules: " + prettify(rules, TRUE));
	278	} else {
	279	delete u;
	280	}
	281	delete t;
	282	}
	283	}
	284	assertTrue("snext()==NULL", avail->snext(ec)==NULL);
	285	assertSuccess("snext()", ec);
	286	delete avail;
	287
	288	// Now test the failure path
	289	UParseError parseError;
	290	UErrorCode status = U_ZERO_ERROR;
	291	UnicodeString id("<Not a valid Transliterator ID>");
	292	Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
	293	if (t != 0) {
	294	errln("FAIL: " + id + " returned a transliterator");
	295	delete t;
	296	} else {
	297	logln("OK: Bogus ID handled properly");
	298	}
	299	}
	300
	301	void TransliteratorTest::TestSimpleRules(void) {
	302	/* Example: rules 1. ab>x\|y
	303	* 2. yc>z
	304	*
	305	* []\|eabcd start - no match, copy e to tranlated buffer
	306	* [e]\|abcd match rule 1 - copy output & adjust cursor
	307	* [ex\|y]cd match rule 2 - copy output & adjust cursor
	308	* [exz]\|d no match, copy d to transliterated buffer
	309	* [exzd]\| done
	310	*/
	311	expect(UnicodeString("ab>x\|y;", "") +
	312	"yc>z",
	313	"eabcd", "exzd");
	314
	315	/* Another set of rules:
	316	* 1. ab>x\|yzacw
	317	* 2. za>q
	318	* 3. qc>r
	319	* 4. cw>n
	320	*
	321	* []\|ab Rule 1
	322	* [x\|yzacw] No match
	323	* [xy\|zacw] Rule 2
	324	* [xyq\|cw] Rule 4
	325	* [xyqn]\| Done
	326	*/
	327	expect(UnicodeString("ab>x\|yzacw;") +
	328	"za>q;" +
	329	"qc>r;" +
	330	"cw>n",
	331	"ab", "xyqn");
	332
	333	/* Test categories
	334	*/
	335	UErrorCode status = U_ZERO_ERROR;
	336	UParseError parseError;
	337	Transliterator *t = Transliterator::createFromRules(
	338	"<ID>",
	339	UnicodeString("$dummy=").append((UChar)0xE100) +
	340	UnicodeString(";"
	341	"$vowel=[aeiouAEIOU];"
	342	"$lu=[:Lu:];"
	343	"$vowel } $lu > '!';"
	344	"$vowel > '&';"
	345	"'!' { $lu > '^';"
	346	"$lu > '*';"
	347	"a > ERROR", ""),
	348	UTRANS_FORWARD, parseError,
	349	status);
	350	if (U_FAILURE(status)) {
	351	dataerrln("FAIL: RBT constructor failed - %s", u_errorName(status));
	352	return;
	353	}
	354	expect(t, "abcdefgABCDEFGU", "&bcd&fg!^!^&");
	355	delete t;
	356	}
	357
	358	/**
	359	* Test inline set syntax and set variable syntax.
	360	*/
	361	void TransliteratorTest::TestInlineSet(void) {
	362	expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz");
	363	expect("a[0-9]b > qrs", "1a7b9", "1qrs9");
	364
	365	expect(UnicodeString(
	366	"$digit = [0-9];"
	367	"$alpha = [a-zA-Z];"
	368	"$alphanumeric = [$digit $alpha];" // ***
	369	"$special = [^$alphanumeric];" // ***
	370	"$alphanumeric > '-';"
	371	"$special > '*';", ""),
	372
	373	"thx-1138", "---*----");
	374	}
	375
	376	/**
	377	* Create some inverses and confirm that they work. We have to be
	378	* careful how we do this, since the inverses will not be true
	379	* inverses -- we can't throw any random string at the composition
	380	* of the transliterators and expect the identity function. F x
	381	* F' != I. However, if we are careful about the input, we will
	382	* get the expected results.
	383	*/
	384	void TransliteratorTest::TestRuleBasedInverse(void) {
	385	UnicodeString RULES =
	386	UnicodeString("abc>zyx;") +
	387	"ab>yz;" +
	388	"bc>zx;" +
	389	"ca>xy;" +
	390	"a>x;" +
	391	"b>y;" +
	392	"c>z;" +
	393
	394	"abc<zyx;" +
	395	"ab<yz;" +
	396	"bc<zx;" +
	397	"ca<xy;" +
	398	"a<x;" +
	399	"b<y;" +
	400	"c<z;" +
	401
	402	"";
	403
	404	const char* DATA[] = {
	405	// Careful here -- random strings will not work. If we keep
	406	// the left side to the domain and the right side to the range
	407	// we will be okay though (left, abc; right xyz).
	408	"a", "x",
	409	"abcacab", "zyxxxyy",
	410	"caccb", "xyzzy",
	411	};
	412
	413	int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
	414
	415	UErrorCode status = U_ZERO_ERROR;
	416	UParseError parseError;
	417	Transliterator *fwd = Transliterator::createFromRules("<ID>", RULES,
	418	UTRANS_FORWARD, parseError, status);
	419	Transliterator *rev = Transliterator::createFromRules("<ID>", RULES,
	420	UTRANS_REVERSE, parseError, status);
	421	if (U_FAILURE(status)) {
	422	errln("FAIL: RBT constructor failed");
	423	return;
	424	}
	425	for (int32_t i=0; i<DATA_length; i+=2) {
	426	expect(*fwd, DATA[i], DATA[i+1]);
	427	expect(*rev, DATA[i+1], DATA[i]);
	428	}
	429	delete fwd;
	430	delete rev;
	431	}
	432
	433	/**
	434	* Basic test of keyboard.
	435	*/
	436	void TransliteratorTest::TestKeyboard(void) {
	437	UParseError parseError;
	438	UErrorCode status = U_ZERO_ERROR;
	439	Transliterator *t = Transliterator::createFromRules("<ID>",
	440	UnicodeString("psch>Y;")
	441	+"ps>y;"
	442	+"ch>x;"
	443	+"a>A;",
	444	UTRANS_FORWARD, parseError,
	445	status);
	446	if (U_FAILURE(status)) {
	447	errln("FAIL: RBT constructor failed");
	448	return;
	449	}
	450	const char* DATA[] = {
	451	// insertion, buffer
	452	"a", "A",
	453	"p", "Ap",
	454	"s", "Aps",
	455	"c", "Apsc",
	456	"a", "AycA",
	457	"psch", "AycAY",
	458	0, "AycAY", // null means finishKeyboardTransliteration
	459	};
	460
	461	keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
	462	delete t;
	463	}
	464
	465	/**
	466	* Basic test of keyboard with cursor.
	467	*/
	468	void TransliteratorTest::TestKeyboard2(void) {
	469	UParseError parseError;
	470	UErrorCode status = U_ZERO_ERROR;
	471	Transliterator *t = Transliterator::createFromRules("<ID>",
	472	UnicodeString("ych>Y;")
	473	+"ps>\|y;"
	474	+"ch>x;"
	475	+"a>A;",
	476	UTRANS_FORWARD, parseError,
	477	status);
	478	if (U_FAILURE(status)) {
	479	errln("FAIL: RBT constructor failed");
	480	return;
	481	}
	482	const char* DATA[] = {
	483	// insertion, buffer
	484	"a", "A",
	485	"p", "Ap",
	486	"s", "Aps", // modified for rollback - "Ay",
	487	"c", "Apsc", // modified for rollback - "Ayc",
	488	"a", "AycA",
	489	"p", "AycAp",
	490	"s", "AycAps", // modified for rollback - "AycAy",
	491	"c", "AycApsc", // modified for rollback - "AycAyc",
	492	"h", "AycAY",
	493	0, "AycAY", // null means finishKeyboardTransliteration
	494	};
	495
	496	keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
	497	delete t;
	498	}
	499
	500	/**
	501	* Test keyboard transliteration with back-replacement.
	502	*/
	503	void TransliteratorTest::TestKeyboard3(void) {
	504	// We want th>z but t>y. Furthermore, during keyboard
	505	// transliteration we want t>y then yh>z if t, then h are
	506	// typed.
	507	UnicodeString RULES("t>\|y;"
	508	"yh>z;");
	509
	510	const char* DATA[] = {
	511	// Column 1: characters to add to buffer (as if typed)
	512	// Column 2: expected appearance of buffer after
	513	// keyboard xliteration.
	514	"a", "a",
	515	"b", "ab",
	516	"t", "abt", // modified for rollback - "aby",
	517	"c", "abyc",
	518	"t", "abyct", // modified for rollback - "abycy",
	519	"h", "abycz",
	520	0, "abycz", // null means finishKeyboardTransliteration
	521	};
	522
	523	UParseError parseError;
	524	UErrorCode status = U_ZERO_ERROR;
	525	Transliterator *t = Transliterator::createFromRules("<ID>", RULES, UTRANS_FORWARD, parseError, status);
	526	if (U_FAILURE(status)) {
	527	errln("FAIL: RBT constructor failed");
	528	return;
	529	}
	530	keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
	531	delete t;
	532	}
	533
	534	void TransliteratorTest::keyboardAux(const Transliterator& t,
	535	const char* DATA[], int32_t DATA_length) {
	536	UErrorCode status = U_ZERO_ERROR;
	537	UTransPosition index={0, 0, 0, 0};
	538	UnicodeString s;
	539	for (int32_t i=0; i<DATA_length; i+=2) {
	540	UnicodeString log;
	541	if (DATA[i] != 0) {
	542	log = s + " + "
	543	+ DATA[i]
	544	+ " -> ";
	545	t.transliterate(s, index, DATA[i], status);
	546	} else {
	547	log = s + " => ";
	548	t.finishTransliteration(s, index);
	549	}
	550	// Show the start index '{' and the cursor '\|'
	551	UnicodeString a, b, c;
	552	s.extractBetween(0, index.contextStart, a);
	553	s.extractBetween(index.contextStart, index.start, b);
	554	s.extractBetween(index.start, s.length(), c);
	555	log.append(a).
	556	append((UChar)LEFT_BRACE).
	557	append(b).
	558	append((UChar)PIPE).
	559	append(c);
	560	if (s == DATA[i+1] && U_SUCCESS(status)) {
	561	logln(log);
	562	} else {
	563	errln(UnicodeString("FAIL: ") + log + ", expected " + DATA[i+1]);
	564	}
	565	}
	566	}
	567
	568	void TransliteratorTest::TestArabic(void) {
	569	// Test disabled for 2.0 until new Arabic transliterator can be written.
	570	// /*
	571	// const char* DATA[] = {
	572	// "Arabic", "\u062a\u062a\u0645\u062a\u0639\u0020"+
	573	// "\u0627\u0644\u0644\u063a\u0629\u0020"+
	574	// "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629\u0020"+
	575	// "\u0628\u0628\u0646\u0638\u0645\u0020"+
	576	// "\u0643\u062a\u0627\u0628\u0628\u064a\u0629\u0020"+
	577	// "\u062c\u0645\u064a\u0644\u0629",
	578	// };
	579	// */
	580	//
	581	// UChar ar_raw[] = {
	582	// 0x062a, 0x062a, 0x0645, 0x062a, 0x0639, 0x0020, 0x0627,
	583	// 0x0644, 0x0644, 0x063a, 0x0629, 0x0020, 0x0627, 0x0644,
	584	// 0x0639, 0x0631, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
	585	// 0x0628, 0x0628, 0x0646, 0x0638, 0x0645, 0x0020, 0x0643,
	586	// 0x062a, 0x0627, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
	587	// 0x062c, 0x0645, 0x064a, 0x0644, 0x0629, 0
	588	// };
	589	// UnicodeString ar(ar_raw);
	590	// UErrorCode status=U_ZERO_ERROR;
	591	// UParseError parseError;
	592	// Transliterator *t = Transliterator::createInstance("Latin-Arabic", UTRANS_FORWARD, parseError, status);
	593	// if (t == 0) {
	594	// errln("FAIL: createInstance failed");
	595	// return;
	596	// }
	597	// expect(*t, "Arabic", ar);
	598	// delete t;
	599	}
	600
	601	/**
	602	* Compose the Kana transliterator forward and reverse and try
	603	* some strings that should come out unchanged.
	604	*/
	605	void TransliteratorTest::TestCompoundKana(void) {
	606	UParseError parseError;
	607	UErrorCode status = U_ZERO_ERROR;
	608	Transliterator* t = Transliterator::createInstance("Latin-Hiragana;Hiragana-Latin", UTRANS_FORWARD, parseError, status);
	609	if (t == 0) {
	610	dataerrln("FAIL: construction of Latin-Hiragana;Hiragana-Latin failed - %s", u_errorName(status));
	611	} else {
	612	expect(*t, "aaaaa", "aaaaa");
	613	delete t;
	614	}
	615	}
	616
	617	/**
	618	* Compose the hex transliterators forward and reverse.
	619	*/
	620	void TransliteratorTest::TestCompoundHex(void) {
	621	UParseError parseError;
	622	UErrorCode status = U_ZERO_ERROR;
	623	Transliterator* a = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
	624	Transliterator* b = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, parseError, status);
	625	Transliterator* transab[] = { a, b };
	626	Transliterator* transba[] = { b, a };
	627	if (a == 0 \|\| b == 0) {
	628	errln("FAIL: construction failed");
	629	delete a;
	630	delete b;
	631	return;
	632	}
	633	// Do some basic tests of a
	634	expect(*a, "01", UnicodeString("\\u0030\\u0031", ""));
	635	// Do some basic tests of b
	636	expect(*b, UnicodeString("\\u0030\\u0031", ""), "01");
	637
	638	Transliterator* ab = new CompoundTransliterator(transab, 2);
	639	UnicodeString s("abcde", "");
	640	expect(*ab, s, s);
	641
	642	UnicodeString str(s);
	643	a->transliterate(str);
	644	Transliterator* ba = new CompoundTransliterator(transba, 2);
	645	expect(*ba, str, str);
	646
	647	delete ab;
	648	delete ba;
	649	delete a;
	650	delete b;
	651	}
	652
	653	int gTestFilterClassID = 0;
	654	/**
	655	* Used by TestFiltering().
	656	*/
	657	class TestFilter : public UnicodeFilter {
	658	virtual UnicodeFunctor* clone() const {
	659	return new TestFilter(*this);
	660	}
	661	virtual UBool contains(UChar32 c) const {
	662	return c != (UChar)0x0063 /c/;
	663	}
	664	// Stubs
	665	virtual UnicodeString& toPattern(UnicodeString& result,
	666	UBool /escapeUnprintable/) const {
	667	return result;
	668	}
	669	virtual UBool matchesIndexValue(uint8_t /v/) const {
	670	return FALSE;
	671	}
	672	virtual void addMatchSetTo(UnicodeSet& /toUnionTo/) const {}
	673	public:
	674	UClassID getDynamicClassID() const { return (UClassID)&gTestFilterClassID; }
	675	};
	676
	677	/**
	678	* Do some basic tests of filtering.
	679	*/
	680	void TransliteratorTest::TestFiltering(void) {
	681	UParseError parseError;
	682	UErrorCode status = U_ZERO_ERROR;
	683	Transliterator* hex = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
	684	if (hex == 0) {
	685	errln("FAIL: createInstance(Any-Hex) failed");
	686	return;
	687	}
	688	hex->adoptFilter(new TestFilter());
	689	UnicodeString s("abcde");
	690	hex->transliterate(s);
	691	UnicodeString exp("\\u0061\\u0062c\\u0064\\u0065", "");
	692	if (s == exp) {
	693	logln(UnicodeString("Ok: \"") + exp + "\"");
	694	} else {
	695	logln(UnicodeString("FAIL: \"") + s + "\", wanted \"" + exp + "\"");
	696	}
	697
	698	// ICU4C ONLY. Do not find Transliterator.orphanFilter() in ICU4J.
	699	UnicodeFilter *f = hex->orphanFilter();
	700	if (f == NULL){
	701	errln("FAIL: orphanFilter() should get a UnicodeFilter");
	702	} else {
	703	delete f;
	704	}
	705	delete hex;
	706	}
	707
	708	/**
	709	* Test anchors
	710	*/
	711	void TransliteratorTest::TestAnchors(void) {
	712	expect(UnicodeString("^a > 0; a$ > 2 ; a > 1;", ""),
	713	"aaa",
	714	"012");
	715	expect(UnicodeString("$s=[z$]; $s{a>0; a}$s>2; a>1;", ""),
	716	"aaa",
	717	"012");
	718	expect(UnicodeString("^ab > 01 ;"
	719	" ab > \|8 ;"
	720	" b > k ;"
	721	" 8x$ > 45 ;"
	722	" 8x > 77 ;", ""),
	723
	724	"ababbabxabx",
	725	"018k7745");
	726	expect(UnicodeString("$s = [z$] ;"
	727	"$s{ab > 01 ;"
	728	" ab > \|8 ;"
	729	" b > k ;"
	730	" 8x}$s > 45 ;"
	731	" 8x > 77 ;", ""),
	732
	733	"abzababbabxzabxabx",
	734	"01z018k45z01x45");
	735	}
	736
	737	/**
	738	* Test pattern quoting and escape mechanisms.
	739	*/
	740	void TransliteratorTest::TestPatternQuoting(void) {
	741	// Array of 3n items
	742	// Each item is <rules>, <input>, <expected output>
	743	const UnicodeString DATA[] = {
	744	UnicodeString(UChar(0x4E01)) + ">'[male adult]'",
	745	UnicodeString(UChar(0x4E01)),
	746	"[male adult]"
	747	};
	748
	749	for (int32_t i=0; i<3; i+=3) {
	750	logln(UnicodeString("Pattern: ") + prettify(DATA[i]));
	751	UParseError parseError;
	752	UErrorCode status = U_ZERO_ERROR;
	753	Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
	754	if (U_FAILURE(status)) {
	755	errln("RBT constructor failed");
	756	} else {
	757	expect(*t, DATA[i+1], DATA[i+2]);
	758	}
	759	delete t;
	760	}
	761	}
	762
	763	/**
	764	* Regression test for bugs found in Greek transliteration.
	765	*/
	766	void TransliteratorTest::TestJ277(void) {
	767	UErrorCode status = U_ZERO_ERROR;
	768	UParseError parseError;
	769	Transliterator *gl = Transliterator::createInstance("Greek-Latin; NFD; [:M:]Remove; NFC", UTRANS_FORWARD, parseError, status);
	770	if (gl == NULL) {
	771	dataerrln("FAIL: createInstance(Greek-Latin) returned NULL - %s", u_errorName(status));
	772	return;
	773	}
	774
	775	UChar sigma = 0x3C3;
	776	UChar upsilon = 0x3C5;
	777	UChar nu = 0x3BD;
	778	// UChar PHI = 0x3A6;
	779	UChar alpha = 0x3B1;
	780	// UChar omega = 0x3C9;
	781	// UChar omicron = 0x3BF;
	782	// UChar epsilon = 0x3B5;
	783
	784	// sigma upsilon nu -> syn
	785	UnicodeString syn;
	786	syn.append(sigma).append(upsilon).append(nu);
	787	expect(*gl, syn, "syn");
	788
	789	// sigma alpha upsilon nu -> saun
	790	UnicodeString sayn;
	791	sayn.append(sigma).append(alpha).append(upsilon).append(nu);
	792	expect(*gl, sayn, "saun");
	793
	794	// Again, using a smaller rule set
	795	UnicodeString rules(
	796	"$alpha = \\u03B1;"
	797	"$nu = \\u03BD;"
	798	"$sigma = \\u03C3;"
	799	"$ypsilon = \\u03C5;"
	800	"$vowel = [aeiouAEIOU$alpha$ypsilon];"
	801	"s <> $sigma;"
	802	"a <> $alpha;"
	803	"u <> $vowel { $ypsilon;"
	804	"y <> $ypsilon;"
	805	"n <> $nu;",
	806	"");
	807	Transliterator *mini = Transliterator::createFromRules("mini", rules, UTRANS_REVERSE, parseError, status);
	808	if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
	809	expect(*mini, syn, "syn");
	810	expect(*mini, sayn, "saun");
	811	delete mini;
	812	mini = NULL;
	813
	814	#if !UCONFIG_NO_FORMATTING
	815	// Transliterate the Greek locale data
	816	Locale el("el");
	817	DateFormatSymbols syms(el, status);
	818	if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
	819	int32_t i, count;
	820	const UnicodeString* data = syms.getMonths(count);
	821	for (i=0; i<count; ++i) {
	822	if (data[i].length() == 0) {
	823	continue;
	824	}
	825	UnicodeString out(data[i]);
	826	gl->transliterate(out);
	827	UBool ok = TRUE;
	828	if (data[i].length() >= 2 && out.length() >= 2 &&
	829	u_isupper(data[i].charAt(0)) && u_islower(data[i].charAt(1))) {
	830	if (!(u_isupper(out.charAt(0)) && u_islower(out.charAt(1)))) {
	831	ok = FALSE;
	832	}
	833	}
	834	if (ok) {
	835	logln(prettify(data[i] + " -> " + out));
	836	} else {
	837	errln(UnicodeString("FAIL: ") + prettify(data[i] + " -> " + out));
	838	}
	839	}
	840	#endif
	841
	842	delete gl;
	843	}
	844
	845	/**
	846	* Prefix, suffix support in hex transliterators
	847	*/
	848	void TransliteratorTest::TestJ243(void) {
	849	UErrorCode ec = U_ZERO_ERROR;
	850
	851	// Test default Hex-Any, which should handle
	852	// \u, \U, u+, and U+
	853	Transliterator *hex =
	854	Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, ec);
	855	if (assertSuccess("getInstance", ec)) {
	856	expect(*hex, UnicodeString("\\u0041+\\U00000042,U+0043uU+0044z", ""), "A+B,CuDz");
	857	}
	858	delete hex;
	859
	860	// // Try a custom Hex-Unicode
	861	// // \uXXXX and &#xXXXX;
	862	// ec = U_ZERO_ERROR;
	863	// HexToUnicodeTransliterator hex2(UnicodeString("\\\\u###0;&\\#x###0\\;", ""), ec);
	864	// expect(hex2, UnicodeString("\\u61\\u062\\u0063\\u00645\\u66x0123", ""),
	865	// "abcd5fx0123");
	866	// // Try custom Any-Hex (default is tested elsewhere)
	867	// ec = U_ZERO_ERROR;
	868	// UnicodeToHexTransliterator hex3(UnicodeString("&\\#x###0;", ""), ec);
	869	// expect(hex3, "012", "012");
	870	}
	871
	872	/**
	873	* Parsers need better syntax error messages.
	874	*/
	875	void TransliteratorTest::TestJ329(void) {
	876
	877	struct { UBool containsErrors; const char* rule; } DATA[] = {
	878	{ FALSE, "a > b; c > d" },
	879	{ TRUE, "a > b; no operator; c > d" },
	880	};
	881	int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
	882
	883	for (int32_t i=0; i<DATA_length; ++i) {
	884	UErrorCode status = U_ZERO_ERROR;
	885	UParseError parseError;
	886	Transliterator *rbt = Transliterator::createFromRules("<ID>",
	887	DATA[i].rule,
	888	UTRANS_FORWARD,
	889	parseError,
	890	status);
	891	UBool gotError = U_FAILURE(status);
	892	UnicodeString desc(DATA[i].rule);
	893	desc.append(gotError ? " -> error" : " -> no error");
	894	if (gotError) {
	895	desc = desc + ", ParseError code=" + u_errorName(status) +
	896	" line=" + parseError.line +
	897	" offset=" + parseError.offset +
	898	" context=" + parseError.preContext;
	899	}
	900	if (gotError == DATA[i].containsErrors) {
	901	logln(UnicodeString("Ok: ") + desc);
	902	} else {
	903	errln(UnicodeString("FAIL: ") + desc);
	904	}
	905	delete rbt;
	906	}
	907	}
	908
	909	/**
	910	* Test segments and segment references.
	911	*/
	912	void TransliteratorTest::TestSegments(void) {
	913	// Array of 3n items
	914	// Each item is <rules>, <input>, <expected output>
	915	UnicodeString DATA[] = {
	916	"([a-z]) '.' ([0-9]) > $2 '-' $1",
	917	"abc.123.xyz.456",
	918	"ab1-c23.xy4-z56",
	919
	920	// nested
	921	"(([a-z])([0-9])) > $1 '.' $2 '.' $3;",
	922	"a1 b2",
	923	"a1.a.1 b2.b.2",
	924	};
	925	int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
	926
	927	for (int32_t i=0; i<DATA_length; i+=3) {
	928	logln("Pattern: " + prettify(DATA[i]));
	929	UParseError parseError;
	930	UErrorCode status = U_ZERO_ERROR;
	931	Transliterator *t = Transliterator::createFromRules("ID", DATA[i], UTRANS_FORWARD, parseError, status);
	932	if (U_FAILURE(status)) {
	933	errln("FAIL: RBT constructor");
	934	} else {
	935	expect(*t, DATA[i+1], DATA[i+2]);
	936	}
	937	delete t;
	938	}
	939	}
	940
	941	/**
	942	* Test cursor positioning outside of the key
	943	*/
	944	void TransliteratorTest::TestCursorOffset(void) {
	945	// Array of 3n items
	946	// Each item is <rules>, <input>, <expected output>
	947	UnicodeString DATA[] = {
	948	"pre {alpha} post > \| @ ALPHA ;"
	949	"eALPHA > beta ;"
	950	"pre {beta} post > BETA @@ \| ;"
	951	"post > xyz",
	952
	953	"prealphapost prebetapost",
	954
	955	"prbetaxyz preBETApost",
	956	};
	957	int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
	958
	959	for (int32_t i=0; i<DATA_length; i+=3) {
	960	logln("Pattern: " + prettify(DATA[i]));
	961	UParseError parseError;
	962	UErrorCode status = U_ZERO_ERROR;
	963	Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
	964	if (U_FAILURE(status)) {
	965	errln("FAIL: RBT constructor");
	966	} else {
	967	expect(*t, DATA[i+1], DATA[i+2]);
	968	}
	969	delete t;
	970	}
	971	}
	972
	973	/**
	974	* Test zero length and > 1 char length variable values. Test
	975	* use of variable refs in UnicodeSets.
	976	*/
	977	void TransliteratorTest::TestArbitraryVariableValues(void) {
	978	// Array of 3n items
	979	// Each item is <rules>, <input>, <expected output>
	980	UnicodeString DATA[] = {
	981	"$abe = ab;"
	982	"$pat = x[yY]z;"
	983	"$ll = 'a-z';"
	984	"$llZ = [$ll];"
	985	"$llY = [$ll$pat];"
	986	"$emp = ;"
	987
	988	"$abe > ABE;"
	989	"$pat > END;"
	990	"$llZ > 1;"
	991	"$llY > 2;"
	992	"7$emp 8 > 9;"
	993	"",
	994
	995	"ab xYzxyz stY78",
	996	"ABE ENDEND 1129",
	997	};
	998	int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
	999
	1000	for (int32_t i=0; i<DATA_length; i+=3) {
	1001	logln("Pattern: " + prettify(DATA[i]));
	1002	UParseError parseError;
	1003	UErrorCode status = U_ZERO_ERROR;
	1004	Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
	1005	if (U_FAILURE(status)) {
	1006	errln("FAIL: RBT constructor");
	1007	} else {
	1008	expect(*t, DATA[i+1], DATA[i+2]);
	1009	}
	1010	delete t;
	1011	}
	1012	}
	1013
	1014	/**
	1015	* Confirm that the contextStart, contextLimit, start, and limit
	1016	* behave correctly. J474.
	1017	*/
	1018	void TransliteratorTest::TestPositionHandling(void) {
	1019	// Array of 3n items
	1020	// Each item is <rules>, <input>, <expected output>
	1021	const char* DATA[] = {
	1022	"a{t} > SS ; {t}b > UU ; {t} > TT ;",
	1023	"xtat txtb", // pos 0,9,0,9
	1024	"xTTaSS TTxUUb",
	1025
	1026	"a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
	1027	"xtat txtb", // pos 2,9,3,8
	1028	"xtaSS TTxUUb",
	1029
	1030	"a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
	1031	"xtat txtb", // pos 3,8,3,8
	1032	"xtaTT TTxTTb",
	1033	};
	1034
	1035	// Array of 4n positions -- these go with the DATA array
	1036	// They are: contextStart, contextLimit, start, limit
	1037	int32_t POS[] = {
	1038	0, 9, 0, 9,
	1039	2, 9, 3, 8,
	1040	3, 8, 3, 8,
	1041	};
	1042
	1043	int32_t n = (int32_t)(sizeof(DATA) / sizeof(DATA[0])) / 3;
	1044	for (int32_t i=0; i<n; i++) {
	1045	UErrorCode status = U_ZERO_ERROR;
	1046	UParseError parseError;
	1047	Transliterator *t = Transliterator::createFromRules("<ID>",
	1048	DATA[3*i], UTRANS_FORWARD, parseError, status);
	1049	if (U_FAILURE(status)) {
	1050	delete t;
	1051	errln("FAIL: RBT constructor");
	1052	return;
	1053	}
	1054	UTransPosition pos;
	1055	pos.contextStart= POS[4*i];
	1056	pos.contextLimit = POS[4*i+1];
	1057	pos.start = POS[4*i+2];
	1058	pos.limit = POS[4*i+3];
	1059	UnicodeString rsource(DATA[3*i+1]);
	1060	t->transliterate(rsource, pos, status);
	1061	if (U_FAILURE(status)) {
	1062	delete t;
	1063	errln("FAIL: transliterate");
	1064	return;
	1065	}
	1066	t->finishTransliteration(rsource, pos);
	1067	expectAux(DATA[3*i],
	1068	DATA[3*i+1],
	1069	rsource,
	1070	DATA[3*i+2]);
	1071	delete t;
	1072	}
	1073	}
	1074
	1075	/**
	1076	* Test the Hiragana-Katakana transliterator.
	1077	*/
	1078	void TransliteratorTest::TestHiraganaKatakana(void) {
	1079	UParseError parseError;
	1080	UErrorCode status = U_ZERO_ERROR;
	1081	Transliterator* hk = Transliterator::createInstance("Hiragana-Katakana", UTRANS_FORWARD, parseError, status);
	1082	Transliterator* kh = Transliterator::createInstance("Katakana-Hiragana", UTRANS_FORWARD, parseError, status);
	1083	if (hk == 0 \|\| kh == 0) {
	1084	dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
	1085	delete hk;
	1086	delete kh;
	1087	return;
	1088	}
	1089
	1090	// Array of 3n items
	1091	// Each item is "hk"\|"kh"\|"both", <Hiragana>, <Katakana>
	1092	const char* DATA[] = {
	1093	"both",
	1094	"\\u3042\\u3090\\u3099\\u3092\\u3050",
	1095	"\\u30A2\\u30F8\\u30F2\\u30B0",
	1096
	1097	"kh",
	1098	"\\u307C\\u3051\\u3060\\u3042\\u3093\\u30FC",
	1099	"\\u30DC\\u30F6\\u30C0\\u30FC\\u30F3\\u30FC",
	1100	};
	1101	int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
	1102
	1103	for (int32_t i=0; i<DATA_length; i+=3) {
	1104	UnicodeString h = CharsToUnicodeString(DATA[i+1]);
	1105	UnicodeString k = CharsToUnicodeString(DATA[i+2]);
	1106	switch (*DATA[i]) {
	1107	case 0x68: //'h': // Hiragana-Katakana
	1108	expect(*hk, h, k);
	1109	break;
	1110	case 0x6B: //'k': // Katakana-Hiragana
	1111	expect(*kh, k, h);
	1112	break;
	1113	case 0x62: //'b': // both
	1114	expect(*hk, h, k);
	1115	expect(*kh, k, h);
	1116	break;
	1117	}
	1118	}
	1119	delete hk;
	1120	delete kh;
	1121	}
	1122
	1123	/**
	1124	* Test cloning / copy constructor of RBT.
	1125	*/
	1126	void TransliteratorTest::TestCopyJ476(void) {
	1127	// The real test here is what happens when the destructors are
	1128	// called. So we let one object get destructed, and check to
	1129	// see that its copy still works.
	1130	Transliterator *t2 = 0;
	1131	{
	1132	UParseError parseError;
	1133	UErrorCode status = U_ZERO_ERROR;
	1134	Transliterator *t1 = Transliterator::createFromRules("t1",
	1135	"a>A;b>B;'foo'+>'bar'", UTRANS_FORWARD, parseError, status);
	1136	if (U_FAILURE(status)) {
	1137	errln("FAIL: RBT constructor");
	1138	return;
	1139	}
	1140	t2 = t1->clone(); // Call copy constructor under the covers.
	1141	expect(*t1, "abcfoofoo", "ABcbar");
	1142	delete t1;
	1143	}
	1144	expect(*t2, "abcfoofoo", "ABcbar");
	1145	delete t2;
	1146	}
	1147
	1148	/**
	1149	* Test inter-Indic transliterators. These are composed.
	1150	* ICU4C Jitterbug 483.
	1151	*/
	1152	void TransliteratorTest::TestInterIndic(void) {
	1153	UnicodeString ID("Devanagari-Gujarati", "");
	1154	UErrorCode status = U_ZERO_ERROR;
	1155	UParseError parseError;
	1156	Transliterator* dg = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
	1157	if (dg == 0) {
	1158	dataerrln("FAIL: createInstance(" + ID + ") returned NULL - " + u_errorName(status));
	1159	return;
	1160	}
	1161	UnicodeString id = dg->getID();
	1162	if (id != ID) {
	1163	errln("FAIL: createInstance(" + ID + ")->getID() => " + id);
	1164	}
	1165	UnicodeString dev = CharsToUnicodeString("\\u0901\\u090B\\u0925");
	1166	UnicodeString guj = CharsToUnicodeString("\\u0A81\\u0A8B\\u0AA5");
	1167	expect(*dg, dev, guj);
	1168	delete dg;
	1169	}
	1170
	1171	/**
	1172	* Test filter syntax in IDs. (J918)
	1173	*/
	1174	void TransliteratorTest::TestFilterIDs(void) {
	1175	// Array of 3n strings:
	1176	// <id>, <inverse id>, <input>, <expected output>
	1177	const char* DATA[] = {
	1178	"[aeiou]Any-Hex", // ID
	1179	"[aeiou]Hex-Any", // expected inverse ID
	1180	"quizzical", // src
	1181	"q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src)
	1182
	1183	"[aeiou]Any-Hex;[^5]Hex-Any",
	1184	"[^5]Any-Hex;[aeiou]Hex-Any",
	1185	"quizzical",
	1186	"q\\u0075izzical",
	1187
	1188	"[abc]Null",
	1189	"[abc]Null",
	1190	"xyz",
	1191	"xyz",
	1192	};
	1193	enum { DATA_length = sizeof(DATA) / sizeof(DATA[0]) };
	1194
	1195	for (int i=0; i<DATA_length; i+=4) {
	1196	UnicodeString ID(DATA[i], "");
	1197	UnicodeString uID(DATA[i+1], "");
	1198	UnicodeString data2(DATA[i+2], "");
	1199	UnicodeString data3(DATA[i+3], "");
	1200	UParseError parseError;
	1201	UErrorCode status = U_ZERO_ERROR;
	1202	Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
	1203	if (t == 0) {
	1204	errln("FAIL: createInstance(" + ID + ") returned NULL");
	1205	return;
	1206	}
	1207	expect(*t, data2, data3);
	1208
	1209	// Check the ID
	1210	if (ID != t->getID()) {
	1211	errln("FAIL: createInstance(" + ID + ").getID() => " +
	1212	t->getID());
	1213	}
	1214
	1215	// Check the inverse
	1216	Transliterator *u = t->createInverse(status);
	1217	if (u == 0) {
	1218	errln("FAIL: " + ID + ".createInverse() returned NULL");
	1219	} else if (u->getID() != uID) {
	1220	errln("FAIL: " + ID + ".createInverse().getID() => " +
	1221	u->getID() + ", expected " + uID);
	1222	}
	1223
	1224	delete t;
	1225	delete u;
	1226	}
	1227	}
	1228
	1229	/**
	1230	* Test the case mapping transliterators.
	1231	*/
	1232	void TransliteratorTest::TestCaseMap(void) {
	1233	UParseError parseError;
	1234	UErrorCode status = U_ZERO_ERROR;
	1235	Transliterator* toUpper =
	1236	Transliterator::createInstance("Any-Upper[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
	1237	Transliterator* toLower =
	1238	Transliterator::createInstance("Any-Lower[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
	1239	Transliterator* toTitle =
	1240	Transliterator::createInstance("Any-Title[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
	1241	if (toUpper==0 \|\| toLower==0 \|\| toTitle==0) {
	1242	errln("FAIL: createInstance returned NULL");
	1243	delete toUpper;
	1244	delete toLower;
	1245	delete toTitle;
	1246	return;
	1247	}
	1248
	1249	expect(*toUpper, "The quick brown fox jumped over the lazy dogs.",
	1250	"THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS.");
	1251	expect(*toLower, "The quIck brown fOX jUMPED OVER THE LAzY dogs.",
	1252	"the quick brown foX jumped over the lazY dogs.");
	1253	expect(*toTitle, "the quick brown foX can't jump over the laZy dogs.",
	1254	"The Quick Brown FoX Can't Jump Over The LaZy Dogs.");
	1255
	1256	delete toUpper;
	1257	delete toLower;
	1258	delete toTitle;
	1259	}
	1260
	1261	/**
	1262	* Test the name mapping transliterators.
	1263	*/
	1264	void TransliteratorTest::TestNameMap(void) {
	1265	UParseError parseError;
	1266	UErrorCode status = U_ZERO_ERROR;
	1267	Transliterator* uni2name =
	1268	Transliterator::createInstance("Any-Name[^abc]", UTRANS_FORWARD, parseError, status);
	1269	Transliterator* name2uni =
	1270	Transliterator::createInstance("Name-Any", UTRANS_FORWARD, parseError, status);
	1271	if (uni2name==0 \|\| name2uni==0) {
	1272	errln("FAIL: createInstance returned NULL");
	1273	delete uni2name;
	1274	delete name2uni;
	1275	return;
	1276	}
	1277
	1278	// Careful: CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
	1279	expect(*uni2name, CharsToUnicodeString("\\u00A0abc\\u4E01\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF"),
	1280	CharsToUnicodeString("\\\\N{NO-BREAK SPACE}abc\\\\N{CJK UNIFIED IDEOGRAPH-4E01}\\\\N{MICRO SIGN}\\\\N{GUJARATI SIGN CANDRABINDU}\\\\N{REPLACEMENT CHARACTER}\\\\N{<control-0004>}\\\\N{<control-0009>}\\\\N{<control-0081>}\\\\N{<noncharacter-FFFF>}"));
	1281	expect(*name2uni, UNICODE_STRING_SIMPLE("{\\N { NO-BREAK SPACE}abc\\N{ CJK UNIFIED IDEOGRAPH-4E01 }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{"),
	1282	CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{"));
	1283
	1284	delete uni2name;
	1285	delete name2uni;
	1286
	1287	// round trip
	1288	Transliterator* t =
	1289	Transliterator::createInstance("Any-Name;Name-Any", UTRANS_FORWARD, parseError, status);
	1290	if (t==0) {
	1291	errln("FAIL: createInstance returned NULL");
	1292	delete t;
	1293	return;
	1294	}
	1295
	1296	// Careful: CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
	1297	UnicodeString s = CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{");
	1298	expect(*t, s, s);
	1299	delete t;
	1300	}
	1301
	1302	/**
	1303	* Test liberalized ID syntax. 1006c
	1304	*/
	1305	void TransliteratorTest::TestLiberalizedID(void) {
	1306	// Some test cases have an expected getID() value of NULL. This
	1307	// means I have disabled the test case for now. This stuff is
	1308	// still under development, and I haven't decided whether to make
	1309	// getID() return canonical case yet. It will all get rewritten
	1310	// with the move to Source-Target/Variant IDs anyway. [aliu]
	1311	const char* DATA[] = {
	1312	"latin-greek", NULL /"Latin-Greek"/, "case insensitivity",
	1313	" Null ", "Null", "whitespace",
	1314	" Latin[a-z]-Greek ", "[a-z]Latin-Greek", "inline filter",
	1315	" null ; latin-greek ", NULL /"Null;Latin-Greek"/, "compound whitespace",
	1316	};
	1317	const int32_t DATA_length = sizeof(DATA)/sizeof(DATA[0]);
	1318	UParseError parseError;
	1319	UErrorCode status= U_ZERO_ERROR;
	1320	for (int32_t i=0; i<DATA_length; i+=3) {
	1321	Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, parseError, status);
	1322	if (t == 0) {
	1323	dataerrln(UnicodeString("FAIL: ") + DATA[i+2] +
	1324	" cannot create ID \"" + DATA[i] + "\" - " + u_errorName(status));
	1325	} else {
	1326	UnicodeString exp;
	1327	if (DATA[i+1]) {
	1328	exp = UnicodeString(DATA[i+1], "");
	1329	}
	1330	// Don't worry about getID() if the expected char*
	1331	// is NULL -- see above.
	1332	if (exp.length() == 0 \|\| exp == t->getID()) {
	1333	logln(UnicodeString("Ok: ") + DATA[i+2] +
	1334	" create ID \"" + DATA[i] + "\" => \"" +
	1335	exp + "\"");
	1336	} else {
	1337	errln(UnicodeString("FAIL: ") + DATA[i+2] +
	1338	" create ID \"" + DATA[i] + "\" => \"" +
	1339	t->getID() + "\", exp \"" + exp + "\"");
	1340	}
	1341	delete t;
	1342	}
	1343	}
	1344	}
	1345
	1346	/* test for Jitterbug 912 */
	1347	void TransliteratorTest::TestCreateInstance(){
	1348	const char* FORWARD = "F";
	1349	const char* REVERSE = "R";
	1350	const char* DATA[] = {
	1351	// Column 1: id
	1352	// Column 2: direction
	1353	// Column 3: expected ID, or "" if expect failure
	1354	"Latin-Hangul", REVERSE, "Hangul-Latin", // JB#912
	1355
	1356	// JB#2689: bad compound causes crash
	1357	"InvalidSource-InvalidTarget", FORWARD, "",
	1358	"InvalidSource-InvalidTarget", REVERSE, "",
	1359	"Hex-Any;InvalidSource-InvalidTarget", FORWARD, "",
	1360	"Hex-Any;InvalidSource-InvalidTarget", REVERSE, "",
	1361	"InvalidSource-InvalidTarget;Hex-Any", FORWARD, "",
	1362	"InvalidSource-InvalidTarget;Hex-Any", REVERSE, "",
	1363
	1364	NULL
	1365	};
	1366
	1367	for (int32_t i=0; DATA[i]; i+=3) {
	1368	UParseError err;
	1369	UErrorCode ec = U_ZERO_ERROR;
	1370	UnicodeString id(DATA[i]);
	1371	UTransDirection dir = (DATA[i+1]==FORWARD)?
	1372	UTRANS_FORWARD:UTRANS_REVERSE;
	1373	UnicodeString expID(DATA[i+2]);
	1374	Transliterator* t =
	1375	Transliterator::createInstance(id,dir,err,ec);
	1376	UnicodeString newID;
	1377	if (t) {
	1378	newID = t->getID();
	1379	}
	1380	UBool ok = (newID == expID);
	1381	if (!t) {
	1382	newID = u_errorName(ec);
	1383	}
	1384	if (ok) {
	1385	logln((UnicodeString)"Ok: createInstance(" +
	1386	id + "," + DATA[i+1] + ") => " + newID);
	1387	} else {
	1388	dataerrln((UnicodeString)"FAIL: createInstance(" +
	1389	id + "," + DATA[i+1] + ") => " + newID +
	1390	", expected " + expID);
	1391	}
	1392	delete t;
	1393	}
	1394	}
	1395
	1396	/**
	1397	* Test the normalization transliterator.
	1398	*/
	1399	void TransliteratorTest::TestNormalizationTransliterator() {
	1400	// THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.test.normalizer.BasicTest
	1401	// PLEASE KEEP THEM IN SYNC WITH BasicTest.
	1402	const char* CANON[] = {
	1403	// Input Decomposed Composed
	1404	"cat", "cat", "cat" ,
	1405	"\\u00e0ardvark", "a\\u0300ardvark", "\\u00e0ardvark" ,
	1406
	1407	"\\u1e0a", "D\\u0307", "\\u1e0a" , // D-dot_above
	1408	"D\\u0307", "D\\u0307", "\\u1e0a" , // D dot_above
	1409
	1410	"\\u1e0c\\u0307", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D-dot_below dot_above
	1411	"\\u1e0a\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D-dot_above dot_below
	1412	"D\\u0307\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D dot_below dot_above
	1413
	1414	"\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307","\\u1e10\\u0323\\u0307", // D dot_below cedilla dot_above
	1415	"D\\u0307\\u0328\\u0323","D\\u0328\\u0323\\u0307","\\u1e0c\\u0328\\u0307", // D dot_above ogonek dot_below
	1416
	1417	"\\u1E14", "E\\u0304\\u0300", "\\u1E14" , // E-macron-grave
	1418	"\\u0112\\u0300", "E\\u0304\\u0300", "\\u1E14" , // E-macron + grave
	1419	"\\u00c8\\u0304", "E\\u0300\\u0304", "\\u00c8\\u0304" , // E-grave + macron
	1420
	1421	"\\u212b", "A\\u030a", "\\u00c5" , // angstrom_sign
	1422	"\\u00c5", "A\\u030a", "\\u00c5" , // A-ring
	1423
	1424	"\\u00fdffin", "y\\u0301ffin", "\\u00fdffin" , //updated with 3.0
	1425	"\\u00fd\\uFB03n", "y\\u0301\\uFB03n", "\\u00fd\\uFB03n" , //updated with 3.0
	1426
	1427	"Henry IV", "Henry IV", "Henry IV" ,
	1428	"Henry \\u2163", "Henry \\u2163", "Henry \\u2163" ,
	1429
	1430	"\\u30AC", "\\u30AB\\u3099", "\\u30AC" , // ga (Katakana)
	1431	"\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" , // ka + ten
	1432	"\\uFF76\\uFF9E", "\\uFF76\\uFF9E", "\\uFF76\\uFF9E" , // hw_ka + hw_ten
	1433	"\\u30AB\\uFF9E", "\\u30AB\\uFF9E", "\\u30AB\\uFF9E" , // ka + hw_ten
	1434	"\\uFF76\\u3099", "\\uFF76\\u3099", "\\uFF76\\u3099" , // hw_ka + ten
	1435
	1436	"A\\u0300\\u0316", "A\\u0316\\u0300", "\\u00C0\\u0316" ,
	1437	0 // end
	1438	};
	1439
	1440	const char* COMPAT[] = {
	1441	// Input Decomposed Composed
	1442	"\\uFB4f", "\\u05D0\\u05DC", "\\u05D0\\u05DC" , // Alef-Lamed vs. Alef, Lamed
	1443
	1444	"\\u00fdffin", "y\\u0301ffin", "\\u00fdffin" , //updated for 3.0
	1445	"\\u00fd\\uFB03n", "y\\u0301ffin", "\\u00fdffin" , // ffi ligature -> f + f + i
	1446
	1447	"Henry IV", "Henry IV", "Henry IV" ,
	1448	"Henry \\u2163", "Henry IV", "Henry IV" ,
	1449
	1450	"\\u30AC", "\\u30AB\\u3099", "\\u30AC" , // ga (Katakana)
	1451	"\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" , // ka + ten
	1452
	1453	"\\uFF76\\u3099", "\\u30AB\\u3099", "\\u30AC" , // hw_ka + ten
	1454	0 // end
	1455	};
	1456
	1457	int32_t i;
	1458	UParseError parseError;
	1459	UErrorCode status = U_ZERO_ERROR;
	1460	Transliterator* NFD = Transliterator::createInstance("NFD", UTRANS_FORWARD, parseError, status);
	1461	Transliterator* NFC = Transliterator::createInstance("NFC", UTRANS_FORWARD, parseError, status);
	1462	if (!NFD \|\| !NFC) {
	1463	dataerrln("FAIL: createInstance failed: %s", u_errorName(status));
	1464	delete NFD;
	1465	delete NFC;
	1466	return;
	1467	}
	1468	for (i=0; CANON[i]; i+=3) {
	1469	UnicodeString in = CharsToUnicodeString(CANON[i]);
	1470	UnicodeString expd = CharsToUnicodeString(CANON[i+1]);
	1471	UnicodeString expc = CharsToUnicodeString(CANON[i+2]);
	1472	expect(*NFD, in, expd);
	1473	expect(*NFC, in, expc);
	1474	}
	1475	delete NFD;
	1476	delete NFC;
	1477
	1478	Transliterator* NFKD = Transliterator::createInstance("NFKD", UTRANS_FORWARD, parseError, status);
	1479	Transliterator* NFKC = Transliterator::createInstance("NFKC", UTRANS_FORWARD, parseError, status);
	1480	if (!NFKD \|\| !NFKC) {
	1481	errln("FAIL: createInstance failed");
	1482	delete NFKD;
	1483	delete NFKC;
	1484	return;
	1485	}
	1486	for (i=0; COMPAT[i]; i+=3) {
	1487	UnicodeString in = CharsToUnicodeString(COMPAT[i]);
	1488	UnicodeString expkd = CharsToUnicodeString(COMPAT[i+1]);
	1489	UnicodeString expkc = CharsToUnicodeString(COMPAT[i+2]);
	1490	expect(*NFKD, in, expkd);
	1491	expect(*NFKC, in, expkc);
	1492	}
	1493	delete NFKD;
	1494	delete NFKC;
	1495
	1496	UParseError pe;
	1497	status = U_ZERO_ERROR;
	1498	Transliterator *t = Transliterator::createInstance("NFD; [x]Remove",
	1499	UTRANS_FORWARD,
	1500	pe, status);
	1501	if (t == 0) {
	1502	errln("FAIL: createInstance failed");
	1503	}
	1504	expect(*t, CharsToUnicodeString("\\u010dx"),
	1505	CharsToUnicodeString("c\\u030C"));
	1506	delete t;
	1507	}
	1508
	1509	/**
	1510	* Test compound RBT rules.
	1511	*/
	1512	void TransliteratorTest::TestCompoundRBT(void) {
	1513	// Careful with spacing and ';' here: Phrase this exactly
	1514	// as toRules() is going to return it. If toRules() changes
	1515	// with regard to spacing or ';', then adjust this string.
	1516	UnicodeString rule("::Hex-Any;\n"
	1517	"::Any-Lower;\n"
	1518	"a > '.A.';\n"
	1519	"b > '.B.';\n"
	1520	"::[^t]Any-Upper;", "");
	1521	UParseError parseError;
	1522	UErrorCode status = U_ZERO_ERROR;
	1523	Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, parseError, status);
	1524	if (t == 0) {
	1525	errln("FAIL: createFromRules failed");
	1526	return;
	1527	}
	1528	expect(*t, UNICODE_STRING_SIMPLE("\\u0043at in the hat, bat on the mat"),
	1529	"C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");
	1530	UnicodeString r;
	1531	t->toRules(r, TRUE);
	1532	if (r == rule) {
	1533	logln((UnicodeString)"OK: toRules() => " + r);
	1534	} else {
	1535	errln((UnicodeString)"FAIL: toRules() => " + r +
	1536	", expected " + rule);
	1537	}
	1538	delete t;
	1539
	1540	// Now test toRules
	1541	t = Transliterator::createInstance("Greek-Latin; Latin-Cyrillic", UTRANS_FORWARD, parseError, status);
	1542	if (t == 0) {
	1543	dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
	1544	return;
	1545	}
	1546	UnicodeString exp("::Greek-Latin;\n::Latin-Cyrillic;");
	1547	t->toRules(r, TRUE);
	1548	if (r != exp) {
	1549	errln((UnicodeString)"FAIL: toRules() => " + r +
	1550	", expected " + exp);
	1551	} else {
	1552	logln((UnicodeString)"OK: toRules() => " + r);
	1553	}
	1554	delete t;
	1555
	1556	// Round trip the result of toRules
	1557	t = Transliterator::createFromRules("Test", r, UTRANS_FORWARD, parseError, status);
	1558	if (t == 0) {
	1559	errln("FAIL: createFromRules #2 failed");
	1560	return;
	1561	} else {
	1562	logln((UnicodeString)"OK: createFromRules(" + r + ") succeeded");
	1563	}
	1564
	1565	// Test toRules again
	1566	t->toRules(r, TRUE);
	1567	if (r != exp) {
	1568	errln((UnicodeString)"FAIL: toRules() => " + r +
	1569	", expected " + exp);
	1570	} else {
	1571	logln((UnicodeString)"OK: toRules() => " + r);
	1572	}
	1573
	1574	delete t;
	1575
	1576	// Test Foo(Bar) IDs. Careful with spacing in id; make it conform
	1577	// to what the regenerated ID will look like.
	1578	UnicodeString id("Upper(Lower);(NFKC)", "");
	1579	t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
	1580	if (t == 0) {
	1581	errln("FAIL: createInstance #2 failed");
	1582	return;
	1583	}
	1584	if (t->getID() == id) {
	1585	logln((UnicodeString)"OK: created " + id);
	1586	} else {
	1587	errln((UnicodeString)"FAIL: createInstance(" + id +
	1588	").getID() => " + t->getID());
	1589	}
	1590
	1591	Transliterator *u = t->createInverse(status);
	1592	if (u == 0) {
	1593	errln("FAIL: createInverse failed");
	1594	delete t;
	1595	return;
	1596	}
	1597	exp = "NFKC();Lower(Upper)";
	1598	if (u->getID() == exp) {
	1599	logln((UnicodeString)"OK: createInverse(" + id + ") => " +
	1600	u->getID());
	1601	} else {
	1602	errln((UnicodeString)"FAIL: createInverse(" + id + ") => " +
	1603	u->getID());
	1604	}
	1605	delete t;
	1606	delete u;
	1607	}
	1608
	1609	/**
	1610	* Compound filter semantics were orginially not implemented
	1611	* correctly. Originally, each component filter f(i) is replaced by
	1612	* f'(i) = f(i) && g, where g is the filter for the compound
	1613	* transliterator.
	1614	*
	1615	* From Mark:
	1616	*
	1617	* Suppose and I have a transliterator X. Internally X is
	1618	* "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A].
	1619	*
	1620	* The compound should convert all greek characters (through latin) to
	1621	* cyrillic, then lowercase the result. The filter should say "don't
	1622	* touch 'A' in the original". But because an intermediate result
	1623	* happens to go through "A", the Greek Alpha gets hung up.
	1624	*/
	1625	void TransliteratorTest::TestCompoundFilter(void) {
	1626	UParseError parseError;
	1627	UErrorCode status = U_ZERO_ERROR;
	1628	Transliterator *t = Transliterator::createInstance
	1629	("Greek-Latin; Latin-Greek; Lower", UTRANS_FORWARD, parseError, status);
	1630	if (t == 0) {
	1631	dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
	1632	return;
	1633	}
	1634	t->adoptFilter(new UnicodeSet("[^A]", status));
	1635	if (U_FAILURE(status)) {
	1636	errln("FAIL: UnicodeSet ct failed");
	1637	delete t;
	1638	return;
	1639	}
	1640
	1641	// Only the 'A' at index 1 should remain unchanged
	1642	expect(*t,
	1643	CharsToUnicodeString("BA\\u039A\\u0391"),
	1644	CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1"));
	1645	delete t;
	1646	}
	1647
	1648	void TransliteratorTest::TestRemove(void) {
	1649	UParseError parseError;
	1650	UErrorCode status = U_ZERO_ERROR;
	1651	Transliterator *t = Transliterator::createInstance("Remove[abc]", UTRANS_FORWARD, parseError, status);
	1652	if (t == 0) {
	1653	errln("FAIL: createInstance failed");
	1654	return;
	1655	}
	1656
	1657	expect(*t, "Able bodied baker's cats", "Ale odied ker's ts");
	1658
	1659	// extra test for RemoveTransliterator::clone(), which at one point wasn't
	1660	// duplicating the filter
	1661	Transliterator* t2 = t->clone();
	1662	expect(*t2, "Able bodied baker's cats", "Ale odied ker's ts");
	1663
	1664	delete t;
	1665	delete t2;
	1666	}
	1667
	1668	void TransliteratorTest::TestToRules(void) {
	1669	const char* RBT = "rbt";
	1670	const char* SET = "set";
	1671	static const char* DATA[] = {
	1672	RBT,
	1673	"$a=\\u4E61; [$a] > A;",
	1674	"[\\u4E61] > A;",
	1675
	1676	RBT,
	1677	"$white=[[:Zs:][:Zl:]]; $white{a} > A;",
	1678	"[[:Zs:][:Zl:]]{a} > A;",
	1679
	1680	SET,
	1681	"[[:Zs:][:Zl:]]",
	1682	"[[:Zs:][:Zl:]]",
	1683
	1684	SET,
	1685	"[:Ps:]",
	1686	"[:Ps:]",
	1687
	1688	SET,
	1689	"[:L:]",
	1690	"[:L:]",
	1691
	1692	SET,
	1693	"[[:L:]-[A]]",
	1694	"[[:L:]-[A]]",
	1695
	1696	SET,
	1697	"[~[:Lu:][:Ll:]]",
	1698	"[~[:Lu:][:Ll:]]",
	1699
	1700	SET,
	1701	"[~[a-z]]",
	1702	"[~[a-z]]",
	1703
	1704	RBT,
	1705	"$white=[:Zs:]; $black=[^$white]; $black{a} > A;",
	1706	"[^[:Zs:]]{a} > A;",
	1707
	1708	RBT,
	1709	"$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;",
	1710	"[[a-z]-[:Zs:]]{a} > A;",
	1711
	1712	RBT,
	1713	"$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;",
	1714	"[[:Zs:]&[a-z]]{a} > A;",
	1715
	1716	RBT,
	1717	"$a=[:Zs:]; $b=[x$a]; $b{a} > A;",
	1718	"[x[:Zs:]]{a} > A;",
	1719
	1720	RBT,
	1721	"$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"
	1722	"$macron = \\u0304 ;"
	1723	"$evowel = [aeiouyAEIOUY] ;"
	1724	"$iotasub = \\u0345 ;"
	1725	"($evowel $macron $accentMinus *) i > \| $1 $iotasub ;",
	1726	"([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > \| $1 \\u0345;",
	1727
	1728	RBT,
	1729	"([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > \| $1 \\u0345;",
	1730	"([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > \| $1 \\u0345;",
	1731	};
	1732	static const int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
	1733
	1734	for (int32_t d=0; d < DATA_length; d+=3) {
	1735	if (DATA[d] == RBT) {
	1736	// Transliterator test
	1737	UParseError parseError;
	1738	UErrorCode status = U_ZERO_ERROR;
	1739	Transliterator *t = Transliterator::createFromRules("ID",
	1740	UnicodeString(DATA[d+1], -1, US_INV), UTRANS_FORWARD, parseError, status);
	1741	if (t == 0) {
	1742	dataerrln("FAIL: createFromRules failed - %s", u_errorName(status));
	1743	return;
	1744	}
	1745	UnicodeString rules, escapedRules;
	1746	t->toRules(rules, FALSE);
	1747	t->toRules(escapedRules, TRUE);
	1748	UnicodeString expRules = CharsToUnicodeString(DATA[d+2]);
	1749	UnicodeString expEscapedRules(DATA[d+2], -1, US_INV);
	1750	if (rules == expRules) {
	1751	logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
	1752	" => " + rules);
	1753	} else {
	1754	errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
	1755	" => " + rules + ", exp " + expRules);
	1756	}
	1757	if (escapedRules == expEscapedRules) {
	1758	logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
	1759	" => " + escapedRules);
	1760	} else {
	1761	errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
	1762	" => " + escapedRules + ", exp " + expEscapedRules);
	1763	}
	1764	delete t;
	1765
	1766	} else {
	1767	// UnicodeSet test
	1768	UErrorCode status = U_ZERO_ERROR;
	1769	UnicodeString pat(DATA[d+1], -1, US_INV);
	1770	UnicodeString expToPat(DATA[d+2], -1, US_INV);
	1771	UnicodeSet set(pat, status);
	1772	if (U_FAILURE(status)) {
	1773	errln("FAIL: UnicodeSet ct failed");
	1774	return;
	1775	}
	1776	// Adjust spacing etc. as necessary.
	1777	UnicodeString toPat;
	1778	set.toPattern(toPat);
	1779	if (expToPat == toPat) {
	1780	logln((UnicodeString)"Ok: " + pat +
	1781	" => " + toPat);
	1782	} else {
	1783	errln((UnicodeString)"FAIL: " + pat +
	1784	" => " + prettify(toPat, TRUE) +
	1785	", exp " + prettify(pat, TRUE));
	1786	}
	1787	}
	1788	}
	1789	}
	1790
	1791	void TransliteratorTest::TestContext() {
	1792	UTransPosition pos = {0, 2, 0, 1}; // cs cl s l
	1793	expect("de > x; {d}e > y;",
	1794	"de",
	1795	"ye",
	1796	&pos);
	1797
	1798	expect("ab{c} > z;",
	1799	"xadabdabcy",
	1800	"xadabdabzy");
	1801	}
	1802
	1803	void TransliteratorTest::TestSupplemental() {
	1804
	1805	expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];"
	1806	"a > $a; $s > i;"),
	1807	CharsToUnicodeString("ab\\U0001030Fx"),
	1808	CharsToUnicodeString("\\U00010300bix"));
	1809
	1810	expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];"
	1811	"$b=[A-Z\\U00010400-\\U0001044D];"
	1812	"($a)($b) > $2 $1;"),
	1813	CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"),
	1814	CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301"));
	1815
	1816	// k\|ax\\U00010300xm
	1817
	1818	// k\|a\\U00010400\\U00010300xm
	1819	// ky\|\\U00010400\\U00010300xm
	1820	// ky\\U00010400\|\\U00010300xm
	1821
	1822	// ky\\U00010400\|\\U00010300\\U00010400m
	1823	// ky\\U00010400y\|\\U00010400m
	1824	expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];"
	1825	"$a {x} > \| @ \\U00010400;"
	1826	"{$a} [^\\u0000-\\uFFFF] > y;"),
	1827	CharsToUnicodeString("kax\\U00010300xm"),
	1828	CharsToUnicodeString("ky\\U00010400y\\U00010400m"));
	1829
	1830	expectT("Any-Name",
	1831	CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"),
	1832	UNICODE_STRING_SIMPLE("\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}"));
	1833
	1834	expectT("Any-Hex/Unicode",
	1835	CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
	1836	UNICODE_STRING_SIMPLE("U+10330U+10FF00U+E0061U+00A0"));
	1837
	1838	expectT("Any-Hex/C",
	1839	CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
	1840	UNICODE_STRING_SIMPLE("\\U00010330\\U0010FF00\\U000E0061\\u00A0"));
	1841
	1842	expectT("Any-Hex/Perl",
	1843	CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
	1844	UNICODE_STRING_SIMPLE("\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}"));
	1845
	1846	expectT("Any-Hex/Java",
	1847	CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
	1848	UNICODE_STRING_SIMPLE("\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0"));
	1849
	1850	expectT("Any-Hex/XML",
	1851	CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
	1852	"𐌰􏼀󠁡 ");
	1853
	1854	expectT("Any-Hex/XML10",
	1855	CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
	1856	"𐌰􏼀󠁡 ");
	1857
	1858	expectT(UNICODE_STRING_SIMPLE("[\\U000E0000-\\U000E0FFF] Remove"),
	1859	CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
	1860	CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0"));
	1861	}
	1862
	1863	void TransliteratorTest::TestQuantifier() {
	1864
	1865	// Make sure @ in a quantified anteContext works
	1866	expect("a+ {b} > \| @@ c; A > a; (a+ c) > '(' $1 ')';",
	1867	"AAAAAb",
	1868	"aaa(aac)");
	1869
	1870	// Make sure @ in a quantified postContext works
	1871	expect("{b} a+ > c @@ \|; (a+) > '(' $1 ')';",
	1872	"baaaaa",
	1873	"caa(aaa)");
	1874
	1875	// Make sure @ in a quantified postContext with seg ref works
	1876	expect("{(b)} a+ > $1 @@ \|; (a+) > '(' $1 ')';",
	1877	"baaaaa",
	1878	"baa(aaa)");
	1879
	1880	// Make sure @ past ante context doesn't enter ante context
	1881	UTransPosition pos = {0, 5, 3, 5};
	1882	expect("a+ {b} > \| @@ c; x > y; (a+ c) > '(' $1 ')';",
	1883	"xxxab",
	1884	"xxx(ac)",
	1885	&pos);
	1886
	1887	// Make sure @ past post context doesn't pass limit
	1888	UTransPosition pos2 = {0, 4, 0, 2};
	1889	expect("{b} a+ > c @@ \|; x > y; a > A;",
	1890	"baxx",
	1891	"caxx",
	1892	&pos2);
	1893
	1894	// Make sure @ past post context doesn't enter post context
	1895	expect("{b} a+ > c @@ \|; x > y; a > A;",
	1896	"baxx",
	1897	"cayy");
	1898
	1899	expect("(ab)? c > d;",
	1900	"c abc ababc",
	1901	"d d abd");
	1902
	1903	// NOTE: The (ab)+ when referenced just yields a single "ab",
	1904	// not the full sequence of them. This accords with perl behavior.
	1905	expect("(ab)+ {x} > '(' $1 ')';",
	1906	"x abx ababxy",
	1907	"x ab(ab) abab(ab)y");
	1908
	1909	expect("b+ > x;",
	1910	"ac abc abbc abbbc",
	1911	"ac axc axc axc");
	1912
	1913	expect("[abc]+ > x;",
	1914	"qac abrc abbcs abtbbc",
	1915	"qx xrx xs xtx");
	1916
	1917	expect("q{(ab)+} > x;",
	1918	"qa qab qaba qababc qaba",
	1919	"qa qx qxa qxc qxa");
	1920
	1921	expect("q(ab)* > x;",
	1922	"qa qab qaba qababc",
	1923	"xa x xa xc");
	1924
	1925	// NOTE: The (ab)+ when referenced just yields a single "ab",
	1926	// not the full sequence of them. This accords with perl behavior.
	1927	expect("q(ab)* > '(' $1 ')';",
	1928	"qa qab qaba qababc",
	1929	"()a (ab) (ab)a (ab)c");
	1930
	1931	// 'foo'+ and 'foo'* -- the quantifier should apply to the entire
	1932	// quoted string
	1933	expect("'ab'+ > x;",
	1934	"bb ab ababb",
	1935	"bb x xb");
	1936
	1937	// $foo+ and $foo* -- the quantifier should apply to the entire
	1938	// variable reference
	1939	expect("$var = ab; $var+ > x;",
	1940	"bb ab ababb",
	1941	"bb x xb");
	1942	}
	1943
	1944	class TestTrans : public Transliterator {
	1945	public:
	1946	TestTrans(const UnicodeString& id) : Transliterator(id, 0) {
	1947	}
	1948	virtual Transliterator* clone(void) const {
	1949	return new TestTrans(getID());
	1950	}
	1951	virtual void handleTransliterate(Replaceable& /text/, UTransPosition& offsets,
	1952	UBool /isIncremental/) const
	1953	{
	1954	offsets.start = offsets.limit;
	1955	}
	1956	virtual UClassID getDynamicClassID() const;
	1957	static UClassID U_EXPORT2 getStaticClassID();
	1958	};
	1959	UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TestTrans)
	1960
	1961	/**
	1962	* Test Source-Target/Variant.
	1963	*/
	1964	void TransliteratorTest::TestSTV(void) {
	1965	int32_t ns = Transliterator::countAvailableSources();
	1966	if (ns < 0 \|\| ns > 255) {
	1967	errln((UnicodeString)"FAIL: Bad source count: " + ns);
	1968	return;
	1969	}
	1970	int32_t i, j;
	1971	for (i=0; i<ns; ++i) {
	1972	UnicodeString source;
	1973	Transliterator::getAvailableSource(i, source);
	1974	logln((UnicodeString)"" + i + ": " + source);
	1975	if (source.length() == 0) {
	1976	errln("FAIL: empty source");
	1977	continue;
	1978	}
	1979	int32_t nt = Transliterator::countAvailableTargets(source);
	1980	if (nt < 0 \|\| nt > 255) {
	1981	errln((UnicodeString)"FAIL: Bad target count: " + nt);
	1982	continue;
	1983	}
	1984	for (int32_t j=0; j<nt; ++j) {
	1985	UnicodeString target;
	1986	Transliterator::getAvailableTarget(j, source, target);
	1987	logln((UnicodeString)" " + j + ": " + target);
	1988	if (target.length() == 0) {
	1989	errln("FAIL: empty target");
	1990	continue;
	1991	}
	1992	int32_t nv = Transliterator::countAvailableVariants(source, target);
	1993	if (nv < 0 \|\| nv > 255) {
	1994	errln((UnicodeString)"FAIL: Bad variant count: " + nv);
	1995	continue;
	1996	}
	1997	for (int32_t k=0; k<nv; ++k) {
	1998	UnicodeString variant;
	1999	Transliterator::getAvailableVariant(k, source, target, variant);
	2000	if (variant.length() == 0) {
	2001	logln((UnicodeString)" " + k + ": <empty>");
	2002	} else {
	2003	logln((UnicodeString)" " + k + ": " + variant);
	2004	}
	2005	}
	2006	}
	2007	}
	2008
	2009	// Test registration
	2010	const char* IDS[] = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
	2011	const char* FULL_IDS[] = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
	2012	const char* SOURCES[] = { NULL, "Seoridf", "Oewoir" };
	2013	for (i=0; i<3; ++i) {
	2014	Transliterator *t = new TestTrans(IDS[i]);
	2015	if (t == 0) {
	2016	errln("FAIL: out of memory");
	2017	return;
	2018	}
	2019	if (t->getID() != IDS[i]) {
	2020	errln((UnicodeString)"FAIL: ID mismatch for " + IDS[i]);
	2021	delete t;
	2022	return;
	2023	}
	2024	Transliterator::registerInstance(t);
	2025	UErrorCode status = U_ZERO_ERROR;
	2026	t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
	2027	if (t == NULL) {
	2028	errln((UnicodeString)"FAIL: Registration/creation failed for ID " +
	2029	IDS[i]);
	2030	} else {
	2031	logln((UnicodeString)"Ok: Registration/creation succeeded for ID " +
	2032	IDS[i]);
	2033	delete t;
	2034	}
	2035	Transliterator::unregister(IDS[i]);
	2036	t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
	2037	if (t != NULL) {
	2038	errln((UnicodeString)"FAIL: Unregistration failed for ID " +
	2039	IDS[i]);
	2040	delete t;
	2041	}
	2042	}
	2043
	2044	// Make sure getAvailable API reflects removal
	2045	int32_t n = Transliterator::countAvailableIDs();
	2046	for (i=0; i<n; ++i) {
	2047	UnicodeString id = Transliterator::getAvailableID(i);
	2048	for (j=0; j<3; ++j) {
	2049	if (id.caseCompare(FULL_IDS[j],0)==0) {
	2050	errln((UnicodeString)"FAIL: unregister(" + id + ") failed");
	2051	}
	2052	}
	2053	}
	2054	n = Transliterator::countAvailableTargets("Any");
	2055	for (i=0; i<n; ++i) {
	2056	UnicodeString t;
	2057	Transliterator::getAvailableTarget(i, "Any", t);
	2058	if (t.caseCompare(IDS[0],0)==0) {
	2059	errln((UnicodeString)"FAIL: unregister(Any-" + t + ") failed");
	2060	}
	2061	}
	2062	n = Transliterator::countAvailableSources();
	2063	for (i=0; i<n; ++i) {
	2064	UnicodeString s;
	2065	Transliterator::getAvailableSource(i, s);
	2066	for (j=0; j<3; ++j) {
	2067	if (SOURCES[j] == NULL) continue;
	2068	if (s.caseCompare(SOURCES[j],0)==0) {
	2069	errln((UnicodeString)"FAIL: unregister(" + s + "-*) failed");
	2070	}
	2071	}
	2072	}
	2073	}
	2074
	2075	/**
	2076	* Test inverse of Greek-Latin; Title()
	2077	*/
	2078	void TransliteratorTest::TestCompoundInverse(void) {
	2079	UParseError parseError;
	2080	UErrorCode status = U_ZERO_ERROR;
	2081	Transliterator *t = Transliterator::createInstance
	2082	("Greek-Latin; Title()", UTRANS_REVERSE,parseError, status);
	2083	if (t == 0) {
	2084	dataerrln("FAIL: createInstance - %s", u_errorName(status));
	2085	return;
	2086	}
	2087	UnicodeString exp("(Title);Latin-Greek");
	2088	if (t->getID() == exp) {
	2089	logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" +
	2090	t->getID());
	2091	} else {
	2092	errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" +
	2093	t->getID() + "\", expected \"" + exp + "\"");
	2094	}
	2095	delete t;
	2096	}
	2097
	2098	/**
	2099	* Test NFD chaining with RBT
	2100	*/
	2101	void TransliteratorTest::TestNFDChainRBT() {
	2102	UParseError pe;
	2103	UErrorCode ec = U_ZERO_ERROR;
	2104	Transliterator* t = Transliterator::createFromRules(
	2105	"TEST", "::NFD; aa > Q; a > q;",
	2106	UTRANS_FORWARD, pe, ec);
	2107	if (t == NULL \|\| U_FAILURE(ec)) {
	2108	dataerrln("FAIL: Transliterator::createFromRules failed with %s", u_errorName(ec));
	2109	return;
	2110	}
	2111	expect(*t, "aa", "Q");
	2112	delete t;
	2113
	2114	// TEMPORARY TESTS -- BEING DEBUGGED
	2115	//=- UnicodeString s, s2;
	2116	//=- t = Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, pe, ec);
	2117	//=- s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
	2118	//=- s2 = CharsToUnicodeString("\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D");
	2119	//=- expect(*t, s, s2);
	2120	//=- delete t;
	2121	//=-
	2122	//=- t = Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, pe, ec);
	2123	//=- expect(*t, s2, s);
	2124	//=- delete t;
	2125	//=-
	2126	//=- t = Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, pe, ec);
	2127	//=- s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
	2128	//=- expect(*t, s, s);
	2129	//=- delete t;
	2130
	2131	// const char* source[] = {
	2132	// /*
	2133	// "\\u015Br\\u012Bmad",
	2134	// "bhagavadg\\u012Bt\\u0101",
	2135	// "adhy\\u0101ya",
	2136	// "arjuna",
	2137	// "vi\\u1E63\\u0101da",
	2138	// "y\\u014Dga",
	2139	// "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
	2140	// "uv\\u0101cr\\u0325",
	2141	// */
	2142	// "rmk\\u1E63\\u0113t",
	2143	// //"dharmak\\u1E63\\u0113tr\\u0113",
	2144	// /*
	2145	// "kuruk\\u1E63\\u0113tr\\u0113",
	2146	// "samav\\u0113t\\u0101",
	2147	// "yuyutsava-\\u1E25",
	2148	// "m\\u0101mak\\u0101-\\u1E25",
	2149	// // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
	2150	// "kimakurvata",
	2151	// "san\\u0304java",
	2152	// */
	2153	//
	2154	// 0
	2155	// };
	2156	// const char* expected[] = {
	2157	// /*
	2158	// "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
	2159	// "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
	2160	// "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
	2161	// "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
	2162	// "\\u0935\\u093f\\u0937\\u093e\\u0926",
	2163	// "\\u092f\\u094b\\u0917",
	2164	// "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
	2165	// "\\u0909\\u0935\\u093E\\u091A\\u0943",
	2166	// */
	2167	// "\\u0927",
	2168	// //"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
	2169	// /*
	2170	// "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
	2171	// "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
	2172	// "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
	2173	// "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
	2174	// // "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
	2175	// "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
	2176	// "\\u0938\\u0902\\u091c\\u0935",
	2177	// */
	2178	// 0
	2179	// };
	2180	// UErrorCode status = U_ZERO_ERROR;
	2181	// UParseError parseError;
	2182	// UnicodeString message;
	2183	// Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
	2184	// Transliterator* devToLatinToDev=Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
	2185	// if(U_FAILURE(status)){
	2186	// errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
	2187	// errln("PreContext: " + prettify(parseError.preContext) + "PostContext: " + prettify( parseError.postContext) );
	2188	// delete latinToDevToLatin;
	2189	// delete devToLatinToDev;
	2190	// return;
	2191	// }
	2192	// UnicodeString gotResult;
	2193	// for(int i= 0; source[i] != 0; i++){
	2194	// gotResult = source[i];
	2195	// expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
	2196	// expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
	2197	// }
	2198	// delete latinToDevToLatin;
	2199	// delete devToLatinToDev;
	2200	}
	2201
	2202	/**
	2203	* Inverse of "Null" should be "Null". (J21)
	2204	*/
	2205	void TransliteratorTest::TestNullInverse() {
	2206	UParseError pe;
	2207	UErrorCode ec = U_ZERO_ERROR;
	2208	Transliterator *t = Transliterator::createInstance("Null", UTRANS_FORWARD, pe, ec);
	2209	if (t == 0 \|\| U_FAILURE(ec)) {
	2210	errln("FAIL: createInstance");
	2211	return;
	2212	}
	2213	Transliterator *u = t->createInverse(ec);
	2214	if (u == 0 \|\| U_FAILURE(ec)) {
	2215	errln("FAIL: createInverse");
	2216	delete t;
	2217	return;
	2218	}
	2219	if (u->getID() != "Null") {
	2220	errln("FAIL: Inverse of Null should be Null");
	2221	}
	2222	delete t;
	2223	delete u;
	2224	}
	2225
	2226	/**
	2227	* Check ID of inverse of alias. (J22)
	2228	*/
	2229	void TransliteratorTest::TestAliasInverseID() {
	2230	UnicodeString ID("Latin-Hangul", ""); // This should be any alias ID with an inverse
	2231	UParseError pe;
	2232	UErrorCode ec = U_ZERO_ERROR;
	2233	Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
	2234	if (t == 0 \|\| U_FAILURE(ec)) {
	2235	dataerrln("FAIL: createInstance - %s", u_errorName(ec));
	2236	return;
	2237	}
	2238	Transliterator *u = t->createInverse(ec);
	2239	if (u == 0 \|\| U_FAILURE(ec)) {
	2240	errln("FAIL: createInverse");
	2241	delete t;
	2242	return;
	2243	}
	2244	UnicodeString exp = "Hangul-Latin";
	2245	UnicodeString got = u->getID();
	2246	if (got != exp) {
	2247	errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
	2248	", expected " + exp);
	2249	}
	2250	delete t;
	2251	delete u;
	2252	}
	2253
	2254	/**
	2255	* Test IDs of inverses of compound transliterators. (J20)
	2256	*/
	2257	void TransliteratorTest::TestCompoundInverseID() {
	2258	UnicodeString ID = "Latin-Jamo;NFC(NFD)";
	2259	UParseError pe;
	2260	UErrorCode ec = U_ZERO_ERROR;
	2261	Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
	2262	if (t == 0 \|\| U_FAILURE(ec)) {
	2263	dataerrln("FAIL: createInstance - %s", u_errorName(ec));
	2264	return;
	2265	}
	2266	Transliterator *u = t->createInverse(ec);
	2267	if (u == 0 \|\| U_FAILURE(ec)) {
	2268	errln("FAIL: createInverse");
	2269	delete t;
	2270	return;
	2271	}
	2272	UnicodeString exp = "NFD(NFC);Jamo-Latin";
	2273	UnicodeString got = u->getID();
	2274	if (got != exp) {
	2275	errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
	2276	", expected " + exp);
	2277	}
	2278	delete t;
	2279	delete u;
	2280	}
	2281
	2282	/**
	2283	* Test undefined variable.
	2284
	2285	*/
	2286	void TransliteratorTest::TestUndefinedVariable() {
	2287	UnicodeString rule = "$initial } a <> \\u1161;";
	2288	UParseError pe;
	2289	UErrorCode ec = U_ZERO_ERROR;
	2290	Transliterator *t = Transliterator::createFromRules("<ID>", rule, UTRANS_FORWARD, pe, ec);
	2291	delete t;
	2292	if (U_FAILURE(ec)) {
	2293	logln((UnicodeString)"OK: Got exception for " + rule + ", as expected: " +
	2294	u_errorName(ec));
	2295	return;
	2296	}
	2297	errln((UnicodeString)"Fail: bogus rule " + rule + " compiled with error " +
	2298	u_errorName(ec));
	2299	}
	2300
	2301	/**
	2302	* Test empty context.
	2303	*/
	2304	void TransliteratorTest::TestEmptyContext() {
	2305	expect(" { a } > b;", "xay a ", "xby b ");
	2306	}
	2307
	2308	/**
	2309	* Test compound filter ID syntax
	2310	*/
	2311	void TransliteratorTest::TestCompoundFilterID(void) {
	2312	static const char* DATA[] = {
	2313	// Col. 1 = ID or rule set (latter must start with #)
	2314
	2315	// = columns > 1 are null if expect col. 1 to be illegal =
	2316
	2317	// Col. 2 = direction, "F..." or "R..."
	2318	// Col. 3 = source string
	2319	// Col. 4 = exp result
	2320
	2321	"[abc]; [abc]", NULL, NULL, NULL, // multiple filters
	2322	"Latin-Greek; [abc];", NULL, NULL, NULL, // misplaced filter
	2323	"[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\\u0392c",
	2324	"[b]; (Lower); Latin-Greek; Upper(); ([\\u0392])", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
	2325	"#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\\u0392c",
	2326	"#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\\u0392]);", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
	2327	NULL,
	2328	};
	2329
	2330	for (int32_t i=0; DATA[i]; i+=4) {
	2331	UnicodeString id = CharsToUnicodeString(DATA[i]);
	2332	UTransDirection direction = (DATA[i+1] != NULL && DATA[i+1][0] == 'R') ?
	2333	UTRANS_REVERSE : UTRANS_FORWARD;
	2334	UnicodeString source;
	2335	UnicodeString exp;
	2336	if (DATA[i+2] != NULL) {
	2337	source = CharsToUnicodeString(DATA[i+2]);
	2338	exp = CharsToUnicodeString(DATA[i+3]);
	2339	}
	2340	UBool expOk = (DATA[i+1] != NULL);
	2341	Transliterator* t = NULL;
	2342	UParseError pe;
	2343	UErrorCode ec = U_ZERO_ERROR;
	2344	if (id.charAt(0) == 0x23/#/) {
	2345	t = Transliterator::createFromRules("ID", id, direction, pe, ec);
	2346	} else {
	2347	t = Transliterator::createInstance(id, direction, pe, ec);
	2348	}
	2349	UBool ok = (t != NULL && U_SUCCESS(ec));
	2350	UnicodeString transID;
	2351	if (t!=0) {
	2352	transID = t->getID();
	2353	}
	2354	else {
	2355	transID = UnicodeString("NULL", "");
	2356	}
	2357	if (ok == expOk) {
	2358	logln((UnicodeString)"Ok: " + id + " => " + transID + ", " +
	2359	u_errorName(ec));
	2360	if (source.length() != 0) {
	2361	expect(*t, source, exp);
	2362	}
	2363	delete t;
	2364	} else {
	2365	dataerrln((UnicodeString)"FAIL: " + id + " => " + transID + ", " +
	2366	u_errorName(ec));
	2367	}
	2368	}
	2369	}
	2370
	2371	/**
	2372	* Test new property set syntax
	2373	*/
	2374	void TransliteratorTest::TestPropertySet() {
	2375	expect(UNICODE_STRING_SIMPLE("a>A; \\p{Lu}>x; \\p{ANY}>y;"), "abcDEF", "Ayyxxx");
	2376	expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9",
	2377	"[ a stitch ]\n[ in time ]\r[ saves 9]");
	2378	}
	2379
	2380	/**
	2381	* Test various failure points of the new 2.0 engine.
	2382	*/
	2383	void TransliteratorTest::TestNewEngine() {
	2384	UParseError pe;
	2385	UErrorCode ec = U_ZERO_ERROR;
	2386	Transliterator *t = Transliterator::createInstance("Latin-Hiragana", UTRANS_FORWARD, pe, ec);
	2387	if (t == 0 \|\| U_FAILURE(ec)) {
	2388	dataerrln("FAIL: createInstance Latin-Hiragana - %s", u_errorName(ec));
	2389	return;
	2390	}
	2391	// Katakana should be untouched
	2392	expect(*t, CharsToUnicodeString("a\\u3042\\u30A2"),
	2393	CharsToUnicodeString("\\u3042\\u3042\\u30A2"));
	2394
	2395	delete t;
	2396
	2397	#if 1
	2398	// This test will only work if Transliterator.ROLLBACK is
	2399	// true. Otherwise, this test will fail, revealing a
	2400	// limitation of global filters in incremental mode.
	2401	Transliterator *a =
	2402	Transliterator::createFromRules("a_to_A", "a > A;", UTRANS_FORWARD, pe, ec);
	2403	Transliterator *A =
	2404	Transliterator::createFromRules("A_to_b", "A > b;", UTRANS_FORWARD, pe, ec);
	2405	if (U_FAILURE(ec)) {
	2406	delete a;
	2407	delete A;
	2408	return;
	2409	}
	2410
	2411	Transliterator* array[3];
	2412	array[0] = a;
	2413	array[1] = Transliterator::createInstance("NFD", UTRANS_FORWARD, pe, ec);
	2414	array[2] = A;
	2415	if (U_FAILURE(ec)) {
	2416	errln("FAIL: createInstance NFD");
	2417	delete a;
	2418	delete A;
	2419	delete array[1];
	2420	return;
	2421	}
	2422
	2423	t = new CompoundTransliterator(array, 3, new UnicodeSet("[:Ll:]", ec));
	2424	if (U_FAILURE(ec)) {
	2425	errln("FAIL: UnicodeSet constructor");
	2426	delete a;
	2427	delete A;
	2428	delete array[1];
	2429	delete t;
	2430	return;
	2431	}
	2432
	2433	expect(*t, "aAaA", "bAbA");
	2434
	2435	assertTrue("countElements", t->countElements() == 3);
	2436	assertEquals("getElement(0)", t->getElement(0, ec).getID(), "a_to_A");
	2437	assertEquals("getElement(1)", t->getElement(1, ec).getID(), "NFD");
	2438	assertEquals("getElement(2)", t->getElement(2, ec).getID(), "A_to_b");
	2439	assertSuccess("getElement", ec);
	2440
	2441	delete a;
	2442	delete A;
	2443	delete array[1];
	2444	delete t;
	2445	#endif
	2446
	2447	expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > \| $1 $smooth ;",
	2448	"a",
	2449	"ax");
	2450
	2451	UnicodeString gr = CharsToUnicodeString(
	2452	"$ddot = \\u0308 ;"
	2453	"$lcgvowel = [\\u03b1\\u03b5\\u03b7\\u03b9\\u03bf\\u03c5\\u03c9] ;"
	2454	"$rough = \\u0314 ;"
	2455	"($lcgvowel+ $ddot?) $rough > h \| $1 ;"
	2456	"\\u03b1 <> a ;"
	2457	"$rough <> h ;");
	2458
	2459	expect(gr, CharsToUnicodeString("\\u03B1\\u0314"), "ha");
	2460	}
	2461
	2462	/**
	2463	* Test quantified segment behavior. We want:
	2464	* ([abc])+ > x $1 x; applied to "cba" produces "xax"
	2465	*/
	2466	void TransliteratorTest::TestQuantifiedSegment(void) {
	2467	// The normal case
	2468	expect("([abc]+) > x $1 x;", "cba", "xcbax");
	2469
	2470	// The tricky case; the quantifier is around the segment
	2471	expect("([abc])+ > x $1 x;", "cba", "xax");
	2472
	2473	// Tricky case in reverse direction
	2474	expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax");
	2475
	2476	// Check post-context segment
	2477	expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba");
	2478
	2479	// Test toRule/toPattern for non-quantified segment.
	2480	// Careful with spacing here.
	2481	UnicodeString r("([a-c]){q} > x $1 x;");
	2482	UParseError pe;
	2483	UErrorCode ec = U_ZERO_ERROR;
	2484	Transliterator* t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
	2485	if (U_FAILURE(ec)) {
	2486	errln("FAIL: createFromRules");
	2487	delete t;
	2488	return;
	2489	}
	2490	UnicodeString rr;
	2491	t->toRules(rr, TRUE);
	2492	if (r != rr) {
	2493	errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
	2494	} else {
	2495	logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
	2496	}
	2497	delete t;
	2498
	2499	// Test toRule/toPattern for quantified segment.
	2500	// Careful with spacing here.
	2501	r = "([a-c])+{q} > x $1 x;";
	2502	t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
	2503	if (U_FAILURE(ec)) {
	2504	errln("FAIL: createFromRules");
	2505	delete t;
	2506	return;
	2507	}
	2508	t->toRules(rr, TRUE);
	2509	if (r != rr) {
	2510	errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
	2511	} else {
	2512	logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
	2513	}
	2514	delete t;
	2515	}
	2516
	2517	//======================================================================
	2518	// Ram's tests
	2519	//======================================================================
	2520	void TransliteratorTest::TestDevanagariLatinRT(){
	2521	const int MAX_LEN= 52;
	2522	const char* const source[MAX_LEN] = {
	2523	"bh\\u0101rata",
	2524	"kra",
	2525	"k\\u1E63a",
	2526	"khra",
	2527	"gra",
	2528	"\\u1E45ra",
	2529	"cra",
	2530	"chra",
	2531	"j\\u00F1a",
	2532	"jhra",
	2533	"\\u00F1ra",
	2534	"\\u1E6Dya",
	2535	"\\u1E6Dhra",
	2536	"\\u1E0Dya",
	2537	//"r\\u0323ya", // \u095c is not valid in Devanagari
	2538	"\\u1E0Dhya",
	2539	"\\u1E5Bhra",
	2540	"\\u1E47ra",
	2541	"tta",
	2542	"thra",
	2543	"dda",
	2544	"dhra",
	2545	"nna",
	2546	"pra",
	2547	"phra",
	2548	"bra",
	2549	"bhra",
	2550	"mra",
	2551	"\\u1E49ra",
	2552	//"l\\u0331ra",
	2553	"yra",
	2554	"\\u1E8Fra",
	2555	//"l-",
	2556	"vra",
	2557	"\\u015Bra",
	2558	"\\u1E63ra",
	2559	"sra",
	2560	"hma",
	2561	"\\u1E6D\\u1E6Da",
	2562	"\\u1E6D\\u1E6Dha",
	2563	"\\u1E6Dh\\u1E6Dha",
	2564	"\\u1E0D\\u1E0Da",
	2565	"\\u1E0D\\u1E0Dha",
	2566	"\\u1E6Dya",
	2567	"\\u1E6Dhya",
	2568	"\\u1E0Dya",
	2569	"\\u1E0Dhya",
	2570	// Not roundtrippable --
	2571	// \\u0939\\u094d\\u094d\\u092E - hma
	2572	// \\u0939\\u094d\\u092E - hma
	2573	// CharsToUnicodeString("hma"),
	2574	"hya",
	2575	"\\u015Br\\u0325",
	2576	"\\u015Bca",
	2577	"\\u0115",
	2578	"san\\u0304j\\u012Bb s\\u0113nagupta",
	2579	"\\u0101nand vaddir\\u0101ju",
	2580	"\\u0101",
	2581	"a"
	2582	};
	2583	const char* const expected[MAX_LEN] = {
	2584	"\\u092D\\u093E\\u0930\\u0924", /* bha\\u0304rata */
	2585	"\\u0915\\u094D\\u0930", /* kra */
	2586	"\\u0915\\u094D\\u0937", /* ks\\u0323a */
	2587	"\\u0916\\u094D\\u0930", /* khra */
	2588	"\\u0917\\u094D\\u0930", /* gra */
	2589	"\\u0919\\u094D\\u0930", /* n\\u0307ra */
	2590	"\\u091A\\u094D\\u0930", /* cra */
	2591	"\\u091B\\u094D\\u0930", /* chra */
	2592	"\\u091C\\u094D\\u091E", /* jn\\u0303a */
	2593	"\\u091D\\u094D\\u0930", /* jhra */
	2594	"\\u091E\\u094D\\u0930", /* n\\u0303ra */
	2595	"\\u091F\\u094D\\u092F", /* t\\u0323ya */
	2596	"\\u0920\\u094D\\u0930", /* t\\u0323hra */
	2597	"\\u0921\\u094D\\u092F", /* d\\u0323ya */
	2598	//"\\u095C\\u094D\\u092F", /* r\\u0323ya */ // \u095c is not valid in Devanagari
	2599	"\\u0922\\u094D\\u092F", /* d\\u0323hya */
	2600	"\\u0922\\u093C\\u094D\\u0930", /* r\\u0323hra */
	2601	"\\u0923\\u094D\\u0930", /* n\\u0323ra */
	2602	"\\u0924\\u094D\\u0924", /* tta */
	2603	"\\u0925\\u094D\\u0930", /* thra */
	2604	"\\u0926\\u094D\\u0926", /* dda */
	2605	"\\u0927\\u094D\\u0930", /* dhra */
	2606	"\\u0928\\u094D\\u0928", /* nna */
	2607	"\\u092A\\u094D\\u0930", /* pra */
	2608	"\\u092B\\u094D\\u0930", /* phra */
	2609	"\\u092C\\u094D\\u0930", /* bra */
	2610	"\\u092D\\u094D\\u0930", /* bhra */
	2611	"\\u092E\\u094D\\u0930", /* mra */
	2612	"\\u0929\\u094D\\u0930", /* n\\u0331ra */
	2613	//"\\u0934\\u094D\\u0930", /* l\\u0331ra */
	2614	"\\u092F\\u094D\\u0930", /* yra */
	2615	"\\u092F\\u093C\\u094D\\u0930", /* y\\u0307ra */
	2616	//"l-",
	2617	"\\u0935\\u094D\\u0930", /* vra */
	2618	"\\u0936\\u094D\\u0930", /* s\\u0301ra */
	2619	"\\u0937\\u094D\\u0930", /* s\\u0323ra */
	2620	"\\u0938\\u094D\\u0930", /* sra */
	2621	"\\u0939\\u094d\\u092E", /* hma */
	2622	"\\u091F\\u094D\\u091F", /* t\\u0323t\\u0323a */
	2623	"\\u091F\\u094D\\u0920", /* t\\u0323t\\u0323ha */
	2624	"\\u0920\\u094D\\u0920", /* t\\u0323ht\\u0323ha*/
	2625	"\\u0921\\u094D\\u0921", /* d\\u0323d\\u0323a */
	2626	"\\u0921\\u094D\\u0922", /* d\\u0323d\\u0323ha */
	2627	"\\u091F\\u094D\\u092F", /* t\\u0323ya */
	2628	"\\u0920\\u094D\\u092F", /* t\\u0323hya */
	2629	"\\u0921\\u094D\\u092F", /* d\\u0323ya */
	2630	"\\u0922\\u094D\\u092F", /* d\\u0323hya */
	2631	// "hma", /* hma */
	2632	"\\u0939\\u094D\\u092F", /* hya */
	2633	"\\u0936\\u0943", /* s\\u0301r\\u0325a */
	2634	"\\u0936\\u094D\\u091A", /* s\\u0301ca */
	2635	"\\u090d", /* e\\u0306 */
	2636	"\\u0938\\u0902\\u091C\\u0940\\u092C\\u094D \\u0938\\u0947\\u0928\\u0917\\u0941\\u092A\\u094D\\u0924",
	2637	"\\u0906\\u0928\\u0902\\u0926\\u094D \\u0935\\u0926\\u094D\\u0926\\u093F\\u0930\\u093E\\u091C\\u0941",
	2638	"\\u0906",
	2639	"\\u0905",
	2640	};
	2641	UErrorCode status = U_ZERO_ERROR;
	2642	UParseError parseError;
	2643	UnicodeString message;
	2644	Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
	2645	Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
	2646	if(U_FAILURE(status)){
	2647	dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
	2648	dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
	2649	return;
	2650	}
	2651	UnicodeString gotResult;
	2652	for(int i= 0; i<MAX_LEN; i++){
	2653	gotResult = source[i];
	2654	expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
	2655	expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
	2656	}
	2657	delete latinToDev;
	2658	delete devToLatin;
	2659	}
	2660
	2661	void TransliteratorTest::TestTeluguLatinRT(){
	2662	const int MAX_LEN=10;
	2663	const char* const source[MAX_LEN] = {
	2664	"raghur\\u0101m vi\\u015Bvan\\u0101dha", /* Raghuram Viswanadha */
	2665	"\\u0101nand vaddir\\u0101ju", /* Anand Vaddiraju */
	2666	"r\\u0101j\\u012Bv ka\\u015Barab\\u0101da", /* Rajeev Kasarabada */
	2667	"san\\u0304j\\u012Bv ka\\u015Barab\\u0101da", /* sanjeev kasarabada */
	2668	"san\\u0304j\\u012Bb sen'gupta", /* sanjib sengupata */
	2669	"amar\\u0113ndra hanum\\u0101nula", /* Amarendra hanumanula */
	2670	"ravi kum\\u0101r vi\\u015Bvan\\u0101dha", /* Ravi Kumar Viswanadha */
	2671	"\\u0101ditya kandr\\u0113gula", /* Aditya Kandregula */
	2672	"\\u015Br\\u012Bdhar ka\\u1E47\\u1E6Dama\\u015Be\\u1E6D\\u1E6Di",/* Shridhar Kantamsetty */
	2673	"m\\u0101dhav de\\u015Be\\u1E6D\\u1E6Di" /* Madhav Desetty */
	2674	};
	2675
	2676	const char* const expected[MAX_LEN] = {
	2677	"\\u0c30\\u0c18\\u0c41\\u0c30\\u0c3e\\u0c2e\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
	2678	"\\u0c06\\u0c28\\u0c02\\u0c26\\u0c4d \\u0C35\\u0C26\\u0C4D\\u0C26\\u0C3F\\u0C30\\u0C3E\\u0C1C\\u0C41",
	2679	"\\u0c30\\u0c3e\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
	2680	"\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
	2681	"\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c2c\\u0c4d \\u0c38\\u0c46\\u0c28\\u0c4d\\u0c17\\u0c41\\u0c2a\\u0c4d\\u0c24",
	2682	"\\u0c05\\u0c2e\\u0c30\\u0c47\\u0c02\\u0c26\\u0c4d\\u0c30 \\u0c39\\u0c28\\u0c41\\u0c2e\\u0c3e\\u0c28\\u0c41\\u0c32",
	2683	"\\u0c30\\u0c35\\u0c3f \\u0c15\\u0c41\\u0c2e\\u0c3e\\u0c30\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
	2684	"\\u0c06\\u0c26\\u0c3f\\u0c24\\u0c4d\\u0c2f \\u0C15\\u0C02\\u0C26\\u0C4D\\u0C30\\u0C47\\u0C17\\u0C41\\u0c32",
	2685	"\\u0c36\\u0c4d\\u0c30\\u0c40\\u0C27\\u0C30\\u0C4D \\u0c15\\u0c02\\u0c1f\\u0c2e\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
	2686	"\\u0c2e\\u0c3e\\u0c27\\u0c35\\u0c4d \\u0c26\\u0c46\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
	2687	};
	2688
	2689	UErrorCode status = U_ZERO_ERROR;
	2690	UParseError parseError;
	2691	UnicodeString message;
	2692	Transliterator* latinToDev=Transliterator::createInstance("Latin-Telugu", UTRANS_FORWARD, parseError, status);
	2693	Transliterator* devToLatin=Transliterator::createInstance("Telugu-Latin", UTRANS_FORWARD, parseError, status);
	2694	if(U_FAILURE(status)){
	2695	dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
	2696	dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
	2697	return;
	2698	}
	2699	UnicodeString gotResult;
	2700	for(int i= 0; i<MAX_LEN; i++){
	2701	gotResult = source[i];
	2702	expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
	2703	expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
	2704	}
	2705	delete latinToDev;
	2706	delete devToLatin;
	2707	}
	2708
	2709	void TransliteratorTest::TestSanskritLatinRT(){
	2710	const int MAX_LEN =16;
	2711	const char* const source[MAX_LEN] = {
	2712	"rmk\\u1E63\\u0113t",
	2713	"\\u015Br\\u012Bmad",
	2714	"bhagavadg\\u012Bt\\u0101",
	2715	"adhy\\u0101ya",
	2716	"arjuna",
	2717	"vi\\u1E63\\u0101da",
	2718	"y\\u014Dga",
	2719	"dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
	2720	"uv\\u0101cr\\u0325",
	2721	"dharmak\\u1E63\\u0113tr\\u0113",
	2722	"kuruk\\u1E63\\u0113tr\\u0113",
	2723	"samav\\u0113t\\u0101",
	2724	"yuyutsava\\u1E25",
	2725	"m\\u0101mak\\u0101\\u1E25",
	2726	// "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
	2727	"kimakurvata",
	2728	"san\\u0304java",
	2729	};
	2730	const char* const expected[MAX_LEN] = {
	2731	"\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
	2732	"\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
	2733	"\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
	2734	"\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
	2735	"\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
	2736	"\\u0935\\u093f\\u0937\\u093e\\u0926",
	2737	"\\u092f\\u094b\\u0917",
	2738	"\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
	2739	"\\u0909\\u0935\\u093E\\u091A\\u0943",
	2740	"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
	2741	"\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
	2742	"\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
	2743	"\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
	2744	"\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
	2745	//"\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
	2746	"\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
	2747	"\\u0938\\u0902\\u091c\\u0935",
	2748	};
	2749	UErrorCode status = U_ZERO_ERROR;
	2750	UParseError parseError;
	2751	UnicodeString message;
	2752	Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
	2753	Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
	2754	if(U_FAILURE(status)){
	2755	dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
	2756	dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
	2757	return;
	2758	}
	2759	UnicodeString gotResult;
	2760	for(int i= 0; i<MAX_LEN; i++){
	2761	gotResult = source[i];
	2762	expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
	2763	expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
	2764	}
	2765	delete latinToDev;
	2766	delete devToLatin;
	2767	}
	2768
	2769
	2770	void TransliteratorTest::TestCompoundLatinRT(){
	2771	const char* const source[] = {
	2772	"rmk\\u1E63\\u0113t",
	2773	"\\u015Br\\u012Bmad",
	2774	"bhagavadg\\u012Bt\\u0101",
	2775	"adhy\\u0101ya",
	2776	"arjuna",
	2777	"vi\\u1E63\\u0101da",
	2778	"y\\u014Dga",
	2779	"dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
	2780	"uv\\u0101cr\\u0325",
	2781	"dharmak\\u1E63\\u0113tr\\u0113",
	2782	"kuruk\\u1E63\\u0113tr\\u0113",
	2783	"samav\\u0113t\\u0101",
	2784	"yuyutsava\\u1E25",
	2785	"m\\u0101mak\\u0101\\u1E25",
	2786	// "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
	2787	"kimakurvata",
	2788	"san\\u0304java"
	2789	};
	2790	const int MAX_LEN = sizeof(source)/sizeof(source[0]);
	2791	const char* const expected[MAX_LEN] = {
	2792	"\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
	2793	"\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
	2794	"\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
	2795	"\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
	2796	"\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
	2797	"\\u0935\\u093f\\u0937\\u093e\\u0926",
	2798	"\\u092f\\u094b\\u0917",
	2799	"\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
	2800	"\\u0909\\u0935\\u093E\\u091A\\u0943",
	2801	"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
	2802	"\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
	2803	"\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
	2804	"\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
	2805	"\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
	2806	// "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
	2807	"\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
	2808	"\\u0938\\u0902\\u091c\\u0935"
	2809	};
	2810	if(MAX_LEN != sizeof(expected)/sizeof(expected[0])) {
	2811	errln("error in TestCompoundLatinRT: source[] and expected[] have different lengths!");
	2812	return;
	2813	}
	2814
	2815	UErrorCode status = U_ZERO_ERROR;
	2816	UParseError parseError;
	2817	UnicodeString message;
	2818	Transliterator* devToLatinToDev =Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
	2819	Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
	2820	Transliterator* devToTelToDev =Transliterator::createInstance("Devanagari-Telugu;Telugu-Devanagari", UTRANS_FORWARD, parseError, status);
	2821	Transliterator* latinToTelToLatin=Transliterator::createInstance("Latin-Telugu;Telugu-Latin", UTRANS_FORWARD, parseError, status);
	2822
	2823	if(U_FAILURE(status)){
	2824	dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
	2825	dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
	2826	return;
	2827	}
	2828	UnicodeString gotResult;
	2829	for(int i= 0; i<MAX_LEN; i++){
	2830	gotResult = source[i];
	2831	expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
	2832	expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
	2833	expect(*latinToTelToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
	2834
	2835	}
	2836	delete(latinToDevToLatin);
	2837	delete(devToLatinToDev);
	2838	delete(devToTelToDev);
	2839	delete(latinToTelToLatin);
	2840	}
	2841
	2842	/**
	2843	* Test Gurmukhi-Devanagari Tippi and Bindi
	2844	*/
	2845	void TransliteratorTest::TestGurmukhiDevanagari(){
	2846	// the rule says:
	2847	// (\u0902) (when preceded by vowel) ---> (\u0A02)
	2848	// (\u0902) (when preceded by consonant) ---> (\u0A70)
	2849	UErrorCode status = U_ZERO_ERROR;
	2850	UnicodeSet vowel(UnicodeString("[\\u0905-\\u090A \\u090F\\u0910\\u0913\\u0914 \\u093e-\\u0942\\u0947\\u0948\\u094B\\u094C\\u094D]", -1, US_INV).unescape(), status);
	2851	UnicodeSet non_vowel(UnicodeString("[\\u0915-\\u0928\\u092A-\\u0930]", -1, US_INV).unescape(), status);
	2852	UParseError parseError;
	2853
	2854	UnicodeSetIterator vIter(vowel);
	2855	UnicodeSetIterator nvIter(non_vowel);
	2856	Transliterator* trans = Transliterator::createInstance("Devanagari-Gurmukhi",UTRANS_FORWARD, parseError, status);
	2857	if(U_FAILURE(status)) {
	2858	dataerrln("Error creating transliterator %s", u_errorName(status));
	2859	delete trans;
	2860	return;
	2861	}
	2862	UnicodeString src (" \\u0902", -1, US_INV);
	2863	UnicodeString expected(" \\u0A02", -1, US_INV);
	2864	src = src.unescape();
	2865	expected= expected.unescape();
	2866
	2867	while(vIter.next()){
	2868	src.setCharAt(0,(UChar) vIter.getCodepoint());
	2869	expected.setCharAt(0,(UChar) (vIter.getCodepoint()+0x0100));
	2870	expect(*trans,src,expected);
	2871	}
	2872
	2873	expected.setCharAt(1,0x0A70);
	2874	while(nvIter.next()){
	2875	//src.setCharAt(0,(char) nvIter.codepoint);
	2876	src.setCharAt(0,(UChar)nvIter.getCodepoint());
	2877	expected.setCharAt(0,(UChar) (nvIter.getCodepoint()+0x0100));
	2878	expect(*trans,src,expected);
	2879	}
	2880	delete trans;
	2881	}
	2882	/**
	2883	* Test instantiation from a locale.
	2884	*/
	2885	void TransliteratorTest::TestLocaleInstantiation(void) {
	2886	UParseError pe;
	2887	UErrorCode ec = U_ZERO_ERROR;
	2888	Transliterator *t = Transliterator::createInstance("ru_RU-Latin", UTRANS_FORWARD, pe, ec);
	2889	if (U_FAILURE(ec)) {
	2890	dataerrln("FAIL: createInstance(ru_RU-Latin) - %s", u_errorName(ec));
	2891	delete t;
	2892	return;
	2893	}
	2894	expect(*t, CharsToUnicodeString("\\u0430"), "a");
	2895	delete t;
	2896
	2897	t = Transliterator::createInstance("en-el", UTRANS_FORWARD, pe, ec);
	2898	if (U_FAILURE(ec)) {
	2899	errln("FAIL: createInstance(en-el)");
	2900	delete t;
	2901	return;
	2902	}
	2903	expect(*t, "a", CharsToUnicodeString("\\u03B1"));
	2904	delete t;
	2905	}
	2906
	2907	/**
	2908	* Test title case handling of accent (should ignore accents)
	2909	*/
	2910	void TransliteratorTest::TestTitleAccents(void) {
	2911	UParseError pe;
	2912	UErrorCode ec = U_ZERO_ERROR;
	2913	Transliterator *t = Transliterator::createInstance("Title", UTRANS_FORWARD, pe, ec);
	2914	if (U_FAILURE(ec)) {
	2915	errln("FAIL: createInstance(Title)");
	2916	delete t;
	2917	return;
	2918	}
	2919	expect(*t, CharsToUnicodeString("a\\u0300b can't abe"), CharsToUnicodeString("A\\u0300b Can't Abe"));
	2920	delete t;
	2921	}
	2922
	2923	/**
	2924	* Basic test of a locale resource based rule.
	2925	*/
	2926	void TransliteratorTest::TestLocaleResource() {
	2927	const char* DATA[] = {
	2928	// id from to
	2929	//"Latin-Greek/UNGEGN", "b", "\\u03bc\\u03c0",
	2930	"Latin-el", "b", "\\u03bc\\u03c0",
	2931	"Latin-Greek", "b", "\\u03B2",
	2932	"Greek-Latin/UNGEGN", "\\u03B2", "v",
	2933	"el-Latin", "\\u03B2", "v",
	2934	"Greek-Latin", "\\u03B2", "b",
	2935	};
	2936	const int32_t DATA_length = sizeof(DATA) / sizeof(DATA[0]);
	2937	for (int32_t i=0; i<DATA_length; i+=3) {
	2938	UParseError pe;
	2939	UErrorCode ec = U_ZERO_ERROR;
	2940	Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, pe, ec);
	2941	if (U_FAILURE(ec)) {
	2942	dataerrln((UnicodeString)"FAIL: createInstance(" + DATA[i] + ") - " + u_errorName(ec));
	2943	delete t;
	2944	continue;
	2945	}
	2946	expect(*t, CharsToUnicodeString(DATA[i+1]),
	2947	CharsToUnicodeString(DATA[i+2]));
	2948	delete t;
	2949	}
	2950	}
	2951
	2952	/**
	2953	* Make sure parse errors reference the right line.
	2954	*/
	2955	void TransliteratorTest::TestParseError() {
	2956	static const char* rule =
	2957	"a > b;\n"
	2958	"# more stuff\n"
	2959	"d << b;";
	2960	UErrorCode ec = U_ZERO_ERROR;
	2961	UParseError pe;
	2962	Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
	2963	delete t;
	2964	if (U_FAILURE(ec)) {
	2965	UnicodeString err(pe.preContext);
	2966	err.append((UChar)124/\|/).append(pe.postContext);
	2967	if (err.indexOf("d << b") >= 0) {
	2968	logln("Ok: " + err);
	2969	} else {
	2970	errln("FAIL: " + err);
	2971	}
	2972	}
	2973	else {
	2974	errln("FAIL: no syntax error");
	2975	}
	2976	static const char* maskingRule =
	2977	"a>x;\n"
	2978	"# more stuff\n"
	2979	"ab>y;";
	2980	ec = U_ZERO_ERROR;
	2981	delete Transliterator::createFromRules("ID", maskingRule, UTRANS_FORWARD, pe, ec);
	2982	if (ec != U_RULE_MASK_ERROR) {
	2983	errln("FAIL: returned %s instead of U_RULE_MASK_ERROR", u_errorName(ec));
	2984	}
	2985	else if (UnicodeString("a > x;") != UnicodeString(pe.preContext)) {
	2986	errln("FAIL: did not get expected precontext");
	2987	}
	2988	else if (UnicodeString("ab > y;") != UnicodeString(pe.postContext)) {
	2989	errln("FAIL: did not get expected postcontext");
	2990	}
	2991	}
	2992
	2993	/**
	2994	* Make sure sets on output are disallowed.
	2995	*/
	2996	void TransliteratorTest::TestOutputSet() {
	2997	UnicodeString rule = "$set = [a-cm-n]; b > $set;";
	2998	UErrorCode ec = U_ZERO_ERROR;
	2999	UParseError pe;
	3000	Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
	3001	delete t;
	3002	if (U_FAILURE(ec)) {
	3003	UnicodeString err(pe.preContext);
	3004	err.append((UChar)124/\|/).append(pe.postContext);
	3005	logln("Ok: " + err);
	3006	return;
	3007	}
	3008	errln("FAIL: No syntax error");
	3009	}
	3010
	3011	/**
	3012	* Test the use variable range pragma, making sure that use of
	3013	* variable range characters is detected and flagged as an error.
	3014	*/
	3015	void TransliteratorTest::TestVariableRange() {
	3016	UnicodeString rule = "use variable range 0x70 0x72; a > A; b > B; q > Q;";
	3017	UErrorCode ec = U_ZERO_ERROR;
	3018	UParseError pe;
	3019	Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
	3020	delete t;
	3021	if (U_FAILURE(ec)) {
	3022	UnicodeString err(pe.preContext);
	3023	err.append((UChar)124/\|/).append(pe.postContext);
	3024	logln("Ok: " + err);
	3025	return;
	3026	}
	3027	errln("FAIL: No syntax error");
	3028	}
	3029
	3030	/**
	3031	* Test invalid post context error handling
	3032	*/
	3033	void TransliteratorTest::TestInvalidPostContext() {
	3034	UnicodeString rule = "a}b{c>d;";
	3035	UErrorCode ec = U_ZERO_ERROR;
	3036	UParseError pe;
	3037	Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
	3038	delete t;
	3039	if (U_FAILURE(ec)) {
	3040	UnicodeString err(pe.preContext);
	3041	err.append((UChar)124/\|/).append(pe.postContext);
	3042	if (err.indexOf("a}b{c") >= 0) {
	3043	logln("Ok: " + err);
	3044	} else {
	3045	errln("FAIL: " + err);
	3046	}
	3047	return;
	3048	}
	3049	errln("FAIL: No syntax error");
	3050	}
	3051
	3052	/**
	3053	* Test ID form variants
	3054	*/
	3055	void TransliteratorTest::TestIDForms() {
	3056	const char* DATA[] = {
	3057	"NFC", NULL, "NFD",
	3058	"nfd", NULL, "NFC", // make sure case is ignored
	3059	"Any-NFKD", NULL, "Any-NFKC",
	3060	"Null", NULL, "Null",
	3061	"-nfkc", "nfkc", "NFKD",
	3062	"-nfkc/", "nfkc", "NFKD",
	3063	"Latin-Greek/UNGEGN", NULL, "Greek-Latin/UNGEGN",
	3064	"Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN",
	3065	"Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali",
	3066	"Source-", NULL, NULL,
	3067	"Source/Variant-", NULL, NULL,
	3068	"Source-/Variant", NULL, NULL,
	3069	"/Variant", NULL, NULL,
	3070	"/Variant-", NULL, NULL,
	3071	"-/Variant", NULL, NULL,
	3072	"-/", NULL, NULL,
	3073	"-", NULL, NULL,
	3074	"/", NULL, NULL,
	3075	};
	3076	const int32_t DATA_length = sizeof(DATA)/sizeof(DATA[0]);
	3077
	3078	for (int32_t i=0; i<DATA_length; i+=3) {
	3079	const char* ID = DATA[i];
	3080	const char* expID = DATA[i+1];
	3081	const char* expInvID = DATA[i+2];
	3082	UBool expValid = (expInvID != NULL);
	3083	if (expID == NULL) {
	3084	expID = ID;
	3085	}
	3086	UParseError pe;
	3087	UErrorCode ec = U_ZERO_ERROR;
	3088	Transliterator *t =
	3089	Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
	3090	if (U_FAILURE(ec)) {
	3091	if (!expValid) {
	3092	logln((UnicodeString)"Ok: getInstance(" + ID +") => " + u_errorName(ec));
	3093	} else {
	3094	dataerrln((UnicodeString)"FAIL: Couldn't create " + ID + " - " + u_errorName(ec));
	3095	}
	3096	delete t;
	3097	continue;
	3098	}
	3099	Transliterator *u = t->createInverse(ec);
	3100	if (U_FAILURE(ec)) {
	3101	errln((UnicodeString)"FAIL: Couldn't create inverse of " + ID);
	3102	delete t;
	3103	delete u;
	3104	continue;
	3105	}
	3106	if (t->getID() == expID &&
	3107	u->getID() == expInvID) {
	3108	logln((UnicodeString)"Ok: " + ID + ".getInverse() => " + expInvID);
	3109	} else {
	3110	errln((UnicodeString)"FAIL: getInstance(" + ID + ") => " +
	3111	t->getID() + " x getInverse() => " + u->getID() +
	3112	", expected " + expInvID);
	3113	}
	3114	delete t;
	3115	delete u;
	3116	}
	3117	}
	3118
	3119	static const UChar SPACE[] = {32,0};
	3120	static const UChar NEWLINE[] = {10,0};
	3121	static const UChar RETURN[] = {13,0};
	3122	static const UChar EMPTY[] = {0};
	3123
	3124	void TransliteratorTest::checkRules(const UnicodeString& label, Transliterator& t2,
	3125	const UnicodeString& testRulesForward) {
	3126	UnicodeString rules2; t2.toRules(rules2, TRUE);
	3127	//rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
	3128	rules2.findAndReplace(SPACE, EMPTY);
	3129	rules2.findAndReplace(NEWLINE, EMPTY);
	3130	rules2.findAndReplace(RETURN, EMPTY);
	3131
	3132	UnicodeString testRules(testRulesForward); testRules.findAndReplace(SPACE, EMPTY);
	3133
	3134	if (rules2 != testRules) {
	3135	errln(label);
	3136	logln((UnicodeString)"GENERATED RULES: " + rules2);
	3137	logln((UnicodeString)"SHOULD BE: " + testRulesForward);
	3138	}
	3139	}
	3140
	3141	/**
	3142	* Mark's toRules test.
	3143	*/
	3144	void TransliteratorTest::TestToRulesMark() {
	3145	const char* testRules =
	3146	"::[[:Latin:][:Mark:]];"
	3147	"::NFKD (NFC);"
	3148	"::Lower (Lower);"
	3149	"a <> \\u03B1;" // alpha
	3150	"::NFKC (NFD);"
	3151	"::Upper (Lower);"
	3152	"::Lower ();"
	3153	"::([[:Greek:][:Mark:]]);"
	3154	;
	3155	const char* testRulesForward =
	3156	"::[[:Latin:][:Mark:]];"
	3157	"::NFKD(NFC);"
	3158	"::Lower(Lower);"
	3159	"a > \\u03B1;"
	3160	"::NFKC(NFD);"
	3161	"::Upper (Lower);"
	3162	"::Lower ();"
	3163	;
	3164	const char* testRulesBackward =
	3165	"::[[:Greek:][:Mark:]];"
	3166	"::Lower (Upper);"
	3167	"::NFD(NFKC);"
	3168	"\\u03B1 > a;"
	3169	"::Lower(Lower);"
	3170	"::NFC(NFKD);"
	3171	;
	3172	UnicodeString source = CharsToUnicodeString("\\u00E1"); // a-acute
	3173	UnicodeString target = CharsToUnicodeString("\\u03AC"); // alpha-acute
	3174
	3175	UParseError pe;
	3176	UErrorCode ec = U_ZERO_ERROR;
	3177	Transliterator *t2 = Transliterator::createFromRules("source-target", UnicodeString(testRules, -1, US_INV), UTRANS_FORWARD, pe, ec);
	3178	Transliterator *t3 = Transliterator::createFromRules("target-source", UnicodeString(testRules, -1, US_INV), UTRANS_REVERSE, pe, ec);
	3179
	3180	if (U_FAILURE(ec)) {
	3181	delete t2;
	3182	delete t3;
	3183	dataerrln((UnicodeString)"FAIL: createFromRules => " + u_errorName(ec));
	3184	return;
	3185	}
	3186
	3187	expect(*t2, source, target);
	3188	expect(*t3, target, source);
	3189
	3190	checkRules("Failed toRules FORWARD", *t2, UnicodeString(testRulesForward, -1, US_INV));
	3191	checkRules("Failed toRules BACKWARD", *t3, UnicodeString(testRulesBackward, -1, US_INV));
	3192
	3193	delete t2;
	3194	delete t3;
	3195	}
	3196
	3197	/**
	3198	* Test Escape and Unescape transliterators.
	3199	*/
	3200	void TransliteratorTest::TestEscape() {
	3201	UParseError pe;
	3202	UErrorCode ec;
	3203	Transliterator *t;
	3204
	3205	ec = U_ZERO_ERROR;
	3206	t = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, pe, ec);
	3207	if (U_FAILURE(ec)) {
	3208	errln((UnicodeString)"FAIL: createInstance");
	3209	} else {
	3210	expect(*t,
	3211	UNICODE_STRING_SIMPLE("\\x{40}\\U000000312Q"),
	3212	"@12Q");
	3213	}
	3214	delete t;
	3215
	3216	ec = U_ZERO_ERROR;
	3217	t = Transliterator::createInstance("Any-Hex/C", UTRANS_FORWARD, pe, ec);
	3218	if (U_FAILURE(ec)) {
	3219	errln((UnicodeString)"FAIL: createInstance");
	3220	} else {
	3221	expect(*t,
	3222	CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
	3223	UNICODE_STRING_SIMPLE("\\u0041\\U0010BEEF\\uFEED"));
	3224	}
	3225	delete t;
	3226
	3227	ec = U_ZERO_ERROR;
	3228	t = Transliterator::createInstance("Any-Hex/Java", UTRANS_FORWARD, pe, ec);
	3229	if (U_FAILURE(ec)) {
	3230	errln((UnicodeString)"FAIL: createInstance");
	3231	} else {
	3232	expect(*t,
	3233	CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
	3234	UNICODE_STRING_SIMPLE("\\u0041\\uDBEF\\uDEEF\\uFEED"));
	3235	}
	3236	delete t;
	3237
	3238	ec = U_ZERO_ERROR;
	3239	t = Transliterator::createInstance("Any-Hex/Perl", UTRANS_FORWARD, pe, ec);
	3240	if (U_FAILURE(ec)) {
	3241	errln((UnicodeString)"FAIL: createInstance");
	3242	} else {
	3243	expect(*t,
	3244	CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
	3245	UNICODE_STRING_SIMPLE("\\x{41}\\x{10BEEF}\\x{FEED}"));
	3246	}
	3247	delete t;
	3248	}
	3249
	3250
	3251	void TransliteratorTest::TestAnchorMasking(){
	3252	UnicodeString rule ("^a > Q; a > q;");
	3253	UErrorCode status= U_ZERO_ERROR;
	3254	UParseError parseError;
	3255
	3256	Transliterator* t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD,parseError,status);
	3257	if(U_FAILURE(status)){
	3258	errln(UnicodeString("FAIL: ") + "ID" +
	3259	".createFromRules() => bad rules" +
	3260	/", parse error " + parseError.code +/
	3261	", line " + parseError.line +
	3262	", offset " + parseError.offset +
	3263	", context " + prettify(parseError.preContext, TRUE) +
	3264	", rules: " + prettify(rule, TRUE));
	3265	}
	3266	delete t;
	3267	}
	3268
	3269	/**
	3270	* Make sure display names of variants look reasonable.
	3271	*/
	3272	void TransliteratorTest::TestDisplayName() {
	3273	#if UCONFIG_NO_FORMATTING
	3274	logln("Skipping, UCONFIG_NO_FORMATTING is set\n");
	3275	return;
	3276	#else
	3277	static const char* DATA[] = {
	3278	// ID, forward name, reverse name
	3279	// Update the text as necessary -- the important thing is
	3280	// not the text itself, but how various cases are handled.
	3281
	3282	// Basic test
	3283	"Any-Hex", "Any to Hex Escape", "Hex Escape to Any",
	3284
	3285	// Variants
	3286	"Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl",
	3287
	3288	// Target-only IDs
	3289	"NFC", "Any to NFC", "Any to NFD",
	3290	};
	3291
	3292	int32_t DATA_length = sizeof(DATA) / sizeof(DATA[0]);
	3293
	3294	Locale US("en", "US");
	3295
	3296	for (int32_t i=0; i<DATA_length; i+=3) {
	3297	UnicodeString name;
	3298	Transliterator::getDisplayName(DATA[i], US, name);
	3299	if (name != DATA[i+1]) {
	3300	dataerrln((UnicodeString)"FAIL: " + DATA[i] + ".getDisplayName() => " +
	3301	name + ", expected " + DATA[i+1]);
	3302	} else {
	3303	logln((UnicodeString)"Ok: " + DATA[i] + ".getDisplayName() => " + name);
	3304	}
	3305	UErrorCode ec = U_ZERO_ERROR;
	3306	UParseError pe;
	3307	Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_REVERSE, pe, ec);
	3308	if (U_FAILURE(ec)) {
	3309	delete t;
	3310	dataerrln("FAIL: createInstance failed - %s", u_errorName(ec));
	3311	continue;
	3312	}
	3313	name = Transliterator::getDisplayName(t->getID(), US, name);
	3314	if (name != DATA[i+2]) {
	3315	dataerrln((UnicodeString)"FAIL: " + t->getID() + ".getDisplayName() => " +
	3316	name + ", expected " + DATA[i+2]);
	3317	} else {
	3318	logln((UnicodeString)"Ok: " + t->getID() + ".getDisplayName() => " + name);
	3319	}
	3320	delete t;
	3321	}
	3322	#endif
	3323	}
	3324
	3325	void TransliteratorTest::TestSpecialCases(void) {
	3326	const UnicodeString registerRules[] = {
	3327	"Any-Dev1", "x > X; y > Y;",
	3328	"Any-Dev2", "XY > Z",
	3329	"Greek-Latin/FAKE",
	3330	CharsToUnicodeString
	3331	("[^[:L:][:M:]] { \\u03bc\\u03c0 > b ; \\u03bc\\u03c0 } [^[:L:][:M:]] > b ; [^[:L:][:M:]] { [\\u039c\\u03bc][\\u03a0\\u03c0] > B ; [\\u039c\\u03bc][\\u03a0\\u03c0] } [^[:L:][:M:]] > B ;"),
	3332	"" // END MARKER
	3333	};
	3334
	3335	const UnicodeString testCases[] = {
	3336	// NORMALIZATION
	3337	// should add more test cases
	3338	"NFD" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
	3339	"NFC" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
	3340	"NFKD", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
	3341	"NFKC", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
	3342
	3343	// mp -> b BUG
	3344	"Greek-Latin/UNGEGN", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
	3345	"Greek-Latin/FAKE", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
	3346
	3347	// check for devanagari bug
	3348	"nfd;Dev1;Dev2;nfc", "xy", "Z",
	3349
	3350	// ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE
	3351	"Title", CharsToUnicodeString("ab'cD ffi\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
	3352	CharsToUnicodeString("Ab'cd Ffi\\u0131ii\\u0307 \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
	3353
	3354	//TODO: enable this test once Titlecase works right
	3355	/*
	3356	"Title", CharsToUnicodeString("\\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
	3357	CharsToUnicodeString("Ffi\\u0131ii \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
	3358	*/
	3359	"Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
	3360	CharsToUnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 ") + DESERET_DEE + DESERET_DEE,
	3361	"Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
	3362	CharsToUnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 ") + DESERET_dee + DESERET_dee,
	3363
	3364	"Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
	3365	"Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
	3366
	3367	// FORMS OF S
	3368	"Greek-Latin/UNGEGN", CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
	3369	CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
	3370	"Latin-Greek/UNGEGN", CharsToUnicodeString("s ss s\\u0331s\\u0331"),
	3371	CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3") ,
	3372	"Greek-Latin", CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
	3373	CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
	3374	"Latin-Greek", CharsToUnicodeString("s ss s\\u0331s\\u0331"),
	3375	CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
	3376	// Tatiana bug
	3377	// Upper: TAT\\u02B9\\u00C2NA
	3378	// Lower: tat\\u02B9\\u00E2na
	3379	// Title: Tat\\u02B9\\u00E2na
	3380	"Upper", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
	3381	CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
	3382	"Lower", CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
	3383	CharsToUnicodeString("tat\\u02B9\\u00E2na"),
	3384	"Title", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
	3385	CharsToUnicodeString("Tat\\u02B9\\u00E2na"),
	3386
	3387	"" // END MARKER
	3388	};
	3389
	3390	UParseError pos;
	3391	int32_t i;
	3392	for (i = 0; registerRules[i].length()!=0; i+=2) {
	3393	UErrorCode status = U_ZERO_ERROR;
	3394
	3395	Transliterator *t = Transliterator::createFromRules(registerRules[0+i],
	3396	registerRules[i+1], UTRANS_FORWARD, pos, status);
	3397	if (U_FAILURE(status)) {
	3398	dataerrln("Fails: Unable to create the transliterator from rules. - %s", u_errorName(status));
	3399	} else {
	3400	Transliterator::registerInstance(t);
	3401	}
	3402	}
	3403	for (i = 0; testCases[i].length()!=0; i+=3) {
	3404	UErrorCode ec = U_ZERO_ERROR;
	3405	UParseError pe;
	3406	const UnicodeString& name = testCases[i];
	3407	Transliterator *t = Transliterator::createInstance(name, UTRANS_FORWARD, pe, ec);
	3408	if (U_FAILURE(ec)) {
	3409	dataerrln((UnicodeString)"FAIL: Couldn't create " + name + " - " + u_errorName(ec));
	3410	delete t;
	3411	continue;
	3412	}
	3413	const UnicodeString& id = t->getID();
	3414	const UnicodeString& source = testCases[i+1];
	3415	UnicodeString target;
	3416
	3417	// Automatic generation of targets, to make it simpler to add test cases (and more fail-safe)
	3418
	3419	if (testCases[i+2].length() > 0) {
	3420	target = testCases[i+2];
	3421	} else if (0==id.caseCompare("NFD", U_FOLD_CASE_DEFAULT)) {
	3422	Normalizer::normalize(source, UNORM_NFD, 0, target, ec);
	3423	} else if (0==id.caseCompare("NFC", U_FOLD_CASE_DEFAULT)) {
	3424	Normalizer::normalize(source, UNORM_NFC, 0, target, ec);
	3425	} else if (0==id.caseCompare("NFKD", U_FOLD_CASE_DEFAULT)) {
	3426	Normalizer::normalize(source, UNORM_NFKD, 0, target, ec);
	3427	} else if (0==id.caseCompare("NFKC", U_FOLD_CASE_DEFAULT)) {
	3428	Normalizer::normalize(source, UNORM_NFKC, 0, target, ec);
	3429	} else if (0==id.caseCompare("Lower", U_FOLD_CASE_DEFAULT)) {
	3430	target = source;
	3431	target.toLower(Locale::getUS());
	3432	} else if (0==id.caseCompare("Upper", U_FOLD_CASE_DEFAULT)) {
	3433	target = source;
	3434	target.toUpper(Locale::getUS());
	3435	}
	3436	if (U_FAILURE(ec)) {
	3437	errln((UnicodeString)"FAIL: Internal error normalizing " + source);
	3438	continue;
	3439	}
	3440
	3441	expect(*t, source, target);
	3442	delete t;
	3443	}
	3444	for (i = 0; registerRules[i].length()!=0; i+=2) {
	3445	Transliterator::unregister(registerRules[i]);
	3446	}
	3447	}
	3448
	3449	char* Char32ToEscapedChars(UChar32 ch, char* buffer) {
	3450	if (ch <= 0xFFFF) {
	3451	sprintf(buffer, "\\u%04x", (int)ch);
	3452	} else {
	3453	sprintf(buffer, "\\U%08x", (int)ch);
	3454	}
	3455	return buffer;
	3456	}
	3457
	3458	void TransliteratorTest::TestSurrogateCasing (void) {
	3459	// check that casing handles surrogates
	3460	// titlecase is currently defective
	3461	char buffer[20];
	3462	UChar buffer2[20];
	3463	UChar32 dee;
	3464	U16_GET(DESERET_dee,0, 0, DESERET_dee.length(), dee);
	3465	UnicodeString DEE(u_totitle(dee));
	3466	if (DEE != DESERET_DEE) {
	3467	err("Fails titlecase of surrogates");
	3468	err(Char32ToEscapedChars(dee, buffer));
	3469	err(", ");
	3470	errln(Char32ToEscapedChars(DEE.char32At(0), buffer));
	3471	}
	3472
	3473	UnicodeString deeDEETest=DESERET_dee + DESERET_DEE;
	3474	UnicodeString deedeeTest = DESERET_dee + DESERET_dee;
	3475	UnicodeString DEEDEETest = DESERET_DEE + DESERET_DEE;
	3476	UErrorCode status= U_ZERO_ERROR;
	3477
	3478	u_strToUpper(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
	3479	if (U_FAILURE(status) \|\| (UnicodeString(buffer2)!= DEEDEETest)) {
	3480	errln("Fails: Can't uppercase surrogates.");
	3481	}
	3482
	3483	status= U_ZERO_ERROR;
	3484	u_strToLower(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
	3485	if (U_FAILURE(status) \|\| (UnicodeString(buffer2)!= deedeeTest)) {
	3486	errln("Fails: Can't lowercase surrogates.");
	3487	}
	3488	}
	3489
	3490	static void _trans(Transliterator& t, const UnicodeString& src,
	3491	UnicodeString& result) {
	3492	result = src;
	3493	t.transliterate(result);
	3494	}
	3495
	3496	static void _trans(const UnicodeString& id, const UnicodeString& src,
	3497	UnicodeString& result, UErrorCode ec) {
	3498	UParseError pe;
	3499	Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
	3500	if (U_SUCCESS(ec)) {
	3501	_trans(*t, src, result);
	3502	}
	3503	delete t;
	3504	}
	3505
	3506	static UnicodeString _findMatch(const UnicodeString& source,
	3507	const UnicodeString* pairs) {
	3508	UnicodeString empty;
	3509	for (int32_t i=0; pairs[i].length() > 0; i+=2) {
	3510	if (0==source.caseCompare(pairs[i], U_FOLD_CASE_DEFAULT)) {
	3511	return pairs[i+1];
	3512	}
	3513	}
	3514	return empty;
	3515	}
	3516
	3517	// Check to see that incremental gets at least part way through a reasonable string.
	3518
	3519	void TransliteratorTest::TestIncrementalProgress(void) {
	3520	UErrorCode ec = U_ZERO_ERROR;
	3521	UnicodeString latinTest = "The Quick Brown Fox.";
	3522	UnicodeString devaTest;
	3523	_trans("Latin-Devanagari", latinTest, devaTest, ec);
	3524	UnicodeString kataTest;
	3525	_trans("Latin-Katakana", latinTest, kataTest, ec);
	3526	if (U_FAILURE(ec)) {
	3527	errln("FAIL: Internal error");
	3528	return;
	3529	}
	3530	const UnicodeString tests[] = {
	3531	"Any", latinTest,
	3532	"Latin", latinTest,
	3533	"Halfwidth", latinTest,
	3534	"Devanagari", devaTest,
	3535	"Katakana", kataTest,
	3536	"" // END MARKER
	3537	};
	3538
	3539	UnicodeString test("The Quick Brown Fox Jumped Over The Lazy Dog.");
	3540	int32_t i = 0, j=0, k=0;
	3541	int32_t sources = Transliterator::countAvailableSources();
	3542	for (i = 0; i < sources; i++) {
	3543	UnicodeString source;
	3544	Transliterator::getAvailableSource(i, source);
	3545	UnicodeString test = _findMatch(source, tests);
	3546	if (test.length() == 0) {
	3547	logln((UnicodeString)"Skipping " + source + "-X");
	3548	continue;
	3549	}
	3550	int32_t targets = Transliterator::countAvailableTargets(source);
	3551	for (j = 0; j < targets; j++) {
	3552	UnicodeString target;
	3553	Transliterator::getAvailableTarget(j, source, target);
	3554	int32_t variants = Transliterator::countAvailableVariants(source, target);
	3555	for (k =0; k< variants; k++) {
	3556	UnicodeString variant;
	3557	UParseError err;
	3558	UErrorCode status = U_ZERO_ERROR;
	3559
	3560	Transliterator::getAvailableVariant(k, source, target, variant);
	3561	UnicodeString id = source + "-" + target + "/" + variant;
	3562
	3563	Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, err, status);
	3564	if (U_FAILURE(status)) {
	3565	dataerrln((UnicodeString)"FAIL: Could not create " + id);
	3566	delete t;
	3567	continue;
	3568	}
	3569	status = U_ZERO_ERROR;
	3570	CheckIncrementalAux(t, test);
	3571
	3572	UnicodeString rev;
	3573	_trans(*t, test, rev);
	3574	Transliterator *inv = t->createInverse(status);
	3575	if (U_FAILURE(status)) {
	3576	#if UCONFIG_NO_BREAK_ITERATION
	3577	// If UCONFIG_NO_BREAK_ITERATION is on, then only Thai should fail.
	3578	if (id.compare((UnicodeString)"Latin-Thai/") != 0)
	3579	#endif
	3580	errln((UnicodeString)"FAIL: Could not create inverse of " + id);
	3581
	3582	delete t;
	3583	delete inv;
	3584	continue;
	3585	}
	3586	CheckIncrementalAux(inv, rev);
	3587	delete t;
	3588	delete inv;
	3589	}
	3590	}
	3591	}
	3592	}
	3593
	3594	void TransliteratorTest::CheckIncrementalAux(const Transliterator* t,
	3595	const UnicodeString& input) {
	3596	UErrorCode ec = U_ZERO_ERROR;
	3597	UTransPosition pos;
	3598	UnicodeString test = input;
	3599
	3600	pos.contextStart = 0;
	3601	pos.contextLimit = input.length();
	3602	pos.start = 0;
	3603	pos.limit = input.length();
	3604
	3605	t->transliterate(test, pos, ec);
	3606	if (U_FAILURE(ec)) {
	3607	errln((UnicodeString)"FAIL: transliterate() error " + u_errorName(ec));
	3608	return;
	3609	}
	3610	UBool gotError = FALSE;
	3611
	3612	// we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X?
	3613
	3614	if (pos.start == 0 && pos.limit != 0 && t->getID() != "Hex-Any/Unicode") {
	3615	errln((UnicodeString)"No Progress, " +
	3616	t->getID() + ": " + formatInput(test, input, pos));
	3617	gotError = TRUE;
	3618	} else {
	3619	logln((UnicodeString)"PASS Progress, " +
	3620	t->getID() + ": " + formatInput(test, input, pos));
	3621	}
	3622	t->finishTransliteration(test, pos);
	3623	if (pos.start != pos.limit) {
	3624	errln((UnicodeString)"Incomplete, " +
	3625	t->getID() + ": " + formatInput(test, input, pos));
	3626	gotError = TRUE;
	3627	}
	3628	}
	3629
	3630	void TransliteratorTest::TestFunction() {
	3631	// Careful with spacing and ';' here: Phrase this exactly
	3632	// as toRules() is going to return it. If toRules() changes
	3633	// with regard to spacing or ';', then adjust this string.
	3634	UnicodeString rule =
	3635	"([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';";
	3636
	3637	UParseError pe;
	3638	UErrorCode ec = U_ZERO_ERROR;
	3639	Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
	3640	if (t == NULL) {
	3641	dataerrln("FAIL: createFromRules failed - %s", u_errorName(ec));
	3642	return;
	3643	}
	3644
	3645	UnicodeString r;
	3646	t->toRules(r, TRUE);
	3647	if (r == rule) {
	3648	logln((UnicodeString)"OK: toRules() => " + r);
	3649	} else {
	3650	errln((UnicodeString)"FAIL: toRules() => " + r +
	3651	", expected " + rule);
	3652	}
	3653
	3654	expect(*t, "The Quick Brown Fox",
	3655	UNICODE_STRING_SIMPLE("T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox"));
	3656
	3657	delete t;
	3658	}
	3659
	3660	void TransliteratorTest::TestInvalidBackRef(void) {
	3661	UnicodeString rule = ". > $1;";
	3662	UnicodeString rule2 =CharsToUnicodeString("(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\\u0020;");
	3663	UParseError pe;
	3664	UErrorCode ec = U_ZERO_ERROR;
	3665	Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
	3666	Transliterator *t2 = Transliterator::createFromRules("Test2", rule2, UTRANS_FORWARD, pe, ec);
	3667
	3668	if (t != NULL) {
	3669	errln("FAIL: createFromRules should have returned NULL");
	3670	delete t;
	3671	}
	3672
	3673	if (t2 != NULL) {
	3674	errln("FAIL: createFromRules should have returned NULL");
	3675	delete t2;
	3676	}
	3677
	3678	if (U_SUCCESS(ec)) {
	3679	errln("FAIL: Ok: . > $1; => no error");
	3680	} else {
	3681	logln((UnicodeString)"Ok: . > $1; => " + u_errorName(ec));
	3682	}
	3683	}
	3684
	3685	void TransliteratorTest::TestMulticharStringSet() {
	3686	// Basic testing
	3687	const char* rule =
	3688	" [{aa}] > x;"
	3689	" a > y;"
	3690	" [b{bc}] > z;"
	3691	"[{gd}] { e > q;"
	3692	" e } [{fg}] > r;" ;
	3693
	3694	UParseError pe;
	3695	UErrorCode ec = U_ZERO_ERROR;
	3696	Transliterator* t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
	3697	if (t == NULL \|\| U_FAILURE(ec)) {
	3698	delete t;
	3699	errln("FAIL: createFromRules failed");
	3700	return;
	3701	}
	3702
	3703	expect(*t, "a aa ab bc d gd de gde gdefg ddefg",
	3704	"y x yz z d gd de gdq gdqfg ddrfg");
	3705	delete t;
	3706
	3707	// Overlapped string test. Make sure that when multiple
	3708	// strings can match that the longest one is matched.
	3709	rule =
	3710	" [a {ab} {abc}] > x;"
	3711	" b > y;"
	3712	" c > z;"
	3713	" q [t {st} {rst}] { e > p;" ;
	3714
	3715	t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
	3716	if (t == NULL \|\| U_FAILURE(ec)) {
	3717	delete t;
	3718	errln("FAIL: createFromRules failed");
	3719	return;
	3720	}
	3721
	3722	expect(*t, "a ab abc qte qste qrste",
	3723	"x x x qtp qstp qrstp");
	3724	delete t;
	3725	}
	3726
	3727	// vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
	3728	// BEGIN TestUserFunction support factory
	3729
	3730	Transliterator* _TUFF[4];
	3731	UnicodeString* _TUFID[4];
	3732
	3733	static Transliterator* U_EXPORT2 _TUFFactory(const UnicodeString& /ID/,
	3734	Transliterator::Token context) {
	3735	return _TUFF[context.integer]->clone();
	3736	}
	3737
	3738	static void _TUFReg(const UnicodeString& ID, Transliterator* t, int32_t n) {
	3739	_TUFF[n] = t;
	3740	_TUFID[n] = new UnicodeString(ID);
	3741	Transliterator::registerFactory(ID, _TUFFactory, Transliterator::integerToken(n));
	3742	}
	3743
	3744	static void _TUFUnreg(int32_t n) {
	3745	if (_TUFF[n] != NULL) {
	3746	Transliterator::unregister(*_TUFID[n]);
	3747	delete _TUFF[n];
	3748	delete _TUFID[n];
	3749	}
	3750	}
	3751
	3752	// END TestUserFunction support factory
	3753	// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
	3754
	3755	/**
	3756	* Test that user-registered transliterators can be used under function
	3757	* syntax.
	3758	*/
	3759	void TransliteratorTest::TestUserFunction() {
	3760
	3761	Transliterator* t;
	3762	UParseError pe;
	3763	UErrorCode ec = U_ZERO_ERROR;
	3764
	3765	// Setup our factory
	3766	int32_t i;
	3767	for (i=0; i<4; ++i) {
	3768	_TUFF[i] = NULL;
	3769	}
	3770
	3771	// There's no need to register inverses if we don't use them
	3772	t = Transliterator::createFromRules("gif",
	3773	UNICODE_STRING_SIMPLE("'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';"),
	3774	UTRANS_FORWARD, pe, ec);
	3775	if (t == NULL \|\| U_FAILURE(ec)) {
	3776	dataerrln((UnicodeString)"FAIL: createFromRules gif " + u_errorName(ec));
	3777	return;
	3778	}
	3779	_TUFReg("Any-gif", t, 0);
	3780
	3781	t = Transliterator::createFromRules("RemoveCurly",
	3782	UNICODE_STRING_SIMPLE("[\\{\\}] > ; '\\N' > ;"),
	3783	UTRANS_FORWARD, pe, ec);
	3784	if (t == NULL \|\| U_FAILURE(ec)) {
	3785	errln((UnicodeString)"FAIL: createFromRules RemoveCurly " + u_errorName(ec));
	3786	goto FAIL;
	3787	}
	3788	expect(*t, UNICODE_STRING_SIMPLE("\\N{name}"), "name");
	3789	_TUFReg("Any-RemoveCurly", t, 1);
	3790
	3791	logln("Trying &hex");
	3792	t = Transliterator::createFromRules("hex2",
	3793	"(.) > &hex($1);",
	3794	UTRANS_FORWARD, pe, ec);
	3795	if (t == NULL \|\| U_FAILURE(ec)) {
	3796	errln("FAIL: createFromRules");
	3797	goto FAIL;
	3798	}
	3799	logln("Registering");
	3800	_TUFReg("Any-hex2", t, 2);
	3801	t = Transliterator::createInstance("Any-hex2", UTRANS_FORWARD, ec);
	3802	if (t == NULL \|\| U_FAILURE(ec)) {
	3803	errln((UnicodeString)"FAIL: createInstance Any-hex2 " + u_errorName(ec));
	3804	goto FAIL;
	3805	}
	3806	expect(*t, "abc", UNICODE_STRING_SIMPLE("\\u0061\\u0062\\u0063"));
	3807	delete t;
	3808
	3809	logln("Trying &gif");
	3810	t = Transliterator::createFromRules("gif2",
	3811	"(.) > &Gif(&Hex2($1));",
	3812	UTRANS_FORWARD, pe, ec);
	3813	if (t == NULL \|\| U_FAILURE(ec)) {
	3814	errln((UnicodeString)"FAIL: createFromRules gif2 " + u_errorName(ec));
	3815	goto FAIL;
	3816	}
	3817	logln("Registering");
	3818	_TUFReg("Any-gif2", t, 3);
	3819	t = Transliterator::createInstance("Any-gif2", UTRANS_FORWARD, ec);
	3820	if (t == NULL \|\| U_FAILURE(ec)) {
	3821	errln((UnicodeString)"FAIL: createInstance Any-gif2 " + u_errorName(ec));
	3822	goto FAIL;
	3823	}
	3824	expect(*t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">"
	3825	"<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">");
	3826	delete t;
	3827
	3828	// Test that filters are allowed after &
	3829	t = Transliterator::createFromRules("test",
	3830	"(.) > &Hex($1) ' ' &RemoveCurly(&Name($1)) ' ';",
	3831	UTRANS_FORWARD, pe, ec);
	3832	if (t == NULL \|\| U_FAILURE(ec)) {
	3833	errln((UnicodeString)"FAIL: createFromRules test " + u_errorName(ec));
	3834	goto FAIL;
	3835	}
	3836	expect(*t, "abc",
	3837	UNICODE_STRING_SIMPLE("\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C "));
	3838	delete t;
	3839
	3840	FAIL:
	3841	for (i=0; i<4; ++i) {
	3842	_TUFUnreg(i);
	3843	}
	3844	}
	3845
	3846	/**
	3847	* Test the Any-X transliterators.
	3848	*/
	3849	void TransliteratorTest::TestAnyX(void) {
	3850	UParseError parseError;
	3851	UErrorCode status = U_ZERO_ERROR;
	3852	Transliterator* anyLatin =
	3853	Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
	3854	if (anyLatin==0) {
	3855	dataerrln("FAIL: createInstance returned NULL - %s", u_errorName(status));
	3856	delete anyLatin;
	3857	return;
	3858	}
	3859
	3860	expect(*anyLatin,
	3861	CharsToUnicodeString("greek:\\u03B1\\u03B2\\u03BA\\u0391\\u0392\\u039A hiragana:\\u3042\\u3076\\u304F cyrillic:\\u0430\\u0431\\u0446"),
	3862	CharsToUnicodeString("greek:abkABK hiragana:abuku cyrillic:abc"));
	3863
	3864	delete anyLatin;
	3865	}
	3866
	3867	/**
	3868	* Test Any-X transliterators with sample letters from all scripts.
	3869	*/
	3870	void TransliteratorTest::TestAny(void) {
	3871	UErrorCode status = U_ZERO_ERROR;
	3872	// Note: there is a lot of implict construction of UnicodeStrings from (char *) in
	3873	// function call parameters going on in this test.
	3874	UnicodeSet alphabetic("[:alphabetic:]", status);
	3875	if (U_FAILURE(status)) {
	3876	dataerrln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
	3877	return;
	3878	}
	3879	alphabetic.freeze();
	3880
	3881	UnicodeString testString;
	3882	for (int32_t i = 0; i < USCRIPT_CODE_LIMIT; i++) {
	3883	const char *scriptName = uscript_getShortName((UScriptCode)i);
	3884	if (scriptName == NULL) {
	3885	errln("Failure: file %s, line %d: Script Code %d is invalid, ", __FILE__, __LINE__, i);
	3886	return;
	3887	}
	3888
	3889	UnicodeSet sample;
	3890	sample.applyPropertyAlias("script", scriptName, status);
	3891	if (U_FAILURE(status)) {
	3892	errln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
	3893	return;
	3894	}
	3895	sample.retainAll(alphabetic);
	3896	for (int32_t count=0; count<5; count++) {
	3897	UChar32 c = sample.charAt(count);
	3898	if (c == -1) {
	3899	break;
	3900	}
	3901	testString.append(c);
	3902	}
	3903	}
	3904
	3905	UParseError parseError;
	3906	Transliterator* anyLatin =
	3907	Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
	3908	if (U_FAILURE(status)) {
	3909	dataerrln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
	3910	return;
	3911	}
	3912
	3913	logln(UnicodeString("Sample set for Any-Latin: ") + testString);
	3914	anyLatin->transliterate(testString);
	3915	logln(UnicodeString("Sample result for Any-Latin: ") + testString);
	3916	delete anyLatin;
	3917	}
	3918
	3919
	3920	/**
	3921	* Test the source and target set API. These are only implemented
	3922	* for RBT and CompoundTransliterator at this time.
	3923	*/
	3924	void TransliteratorTest::TestSourceTargetSet() {
	3925	UErrorCode ec = U_ZERO_ERROR;
	3926
	3927	// Rules
	3928	const char* r =
	3929	"a > b; "
	3930	"r [x{lu}] > q;";
	3931
	3932	// Expected source
	3933	UnicodeSet expSrc("[arx{lu}]", ec);
	3934
	3935	// Expected target
	3936	UnicodeSet expTrg("[bq]", ec);
	3937
	3938	UParseError pe;
	3939	Transliterator* t = Transliterator::createFromRules("test", r, UTRANS_FORWARD, pe, ec);
	3940
	3941	if (U_FAILURE(ec)) {
	3942	delete t;
	3943	errln("FAIL: Couldn't set up test");
	3944	return;
	3945	}
	3946
	3947	UnicodeSet src; t->getSourceSet(src);
	3948	UnicodeSet trg; t->getTargetSet(trg);
	3949
	3950	if (src == expSrc && trg == expTrg) {
	3951	UnicodeString a, b;
	3952	logln((UnicodeString)"Ok: " +
	3953	r + " => source = " + src.toPattern(a, TRUE) +
	3954	", target = " + trg.toPattern(b, TRUE));
	3955	} else {
	3956	UnicodeString a, b, c, d;
	3957	errln((UnicodeString)"FAIL: " +
	3958	r + " => source = " + src.toPattern(a, TRUE) +
	3959	", expected " + expSrc.toPattern(b, TRUE) +
	3960	"; target = " + trg.toPattern(c, TRUE) +
	3961	", expected " + expTrg.toPattern(d, TRUE));
	3962	}
	3963
	3964	delete t;
	3965	}
	3966
	3967	/**
	3968	* Test handling of Pattern_White_Space, for both RBT and UnicodeSet.
	3969	*/
	3970	void TransliteratorTest::TestPatternWhiteSpace() {
	3971	// Rules
	3972	const char* r = "a > \\u200E b;";
	3973
	3974	UErrorCode ec = U_ZERO_ERROR;
	3975	UParseError pe;
	3976	Transliterator* t = Transliterator::createFromRules("test", CharsToUnicodeString(r), UTRANS_FORWARD, pe, ec);
	3977
	3978	if (U_FAILURE(ec)) {
	3979	errln("FAIL: Couldn't set up test");
	3980	} else {
	3981	expect(*t, "a", "b");
	3982	}
	3983	delete t;
	3984
	3985	// UnicodeSet
	3986	ec = U_ZERO_ERROR;
	3987	UnicodeSet set(CharsToUnicodeString("[a \\u200E]"), ec);
	3988
	3989	if (U_FAILURE(ec)) {
	3990	errln("FAIL: Couldn't set up test");
	3991	} else {
	3992	if (set.contains(0x200E)) {
	3993	errln("FAIL: U+200E not being ignored by UnicodeSet");
	3994	}
	3995	}
	3996	}
	3997	//======================================================================
	3998	// this method is in TestUScript.java
	3999	//======================================================================
	4000	void TransliteratorTest::TestAllCodepoints(){
	4001	UScriptCode code= USCRIPT_INVALID_CODE;
	4002	char id[256]={'\0'};
	4003	char abbr[256]={'\0'};
	4004	char newId[256]={'\0'};
	4005	char newAbbrId[256]={'\0'};
	4006	char oldId[256]={'\0'};
	4007	char oldAbbrId[256]={'\0'};
	4008
	4009	UErrorCode status =U_ZERO_ERROR;
	4010	UParseError pe;
	4011
	4012	for(uint32_t i = 0; i<=0x10ffff; i++){
	4013	code = uscript_getScript(i,&status);
	4014	if(code == USCRIPT_INVALID_CODE){
	4015	dataerrln("uscript_getScript for codepoint \\U%08X failed.", i);
	4016	}
	4017	const char* myId = uscript_getName(code);
	4018	if(!myId) {
	4019	dataerrln("Valid script code returned NULL name. Check your data!");
	4020	return;
	4021	}
	4022	uprv_strcpy(id,myId);
	4023	uprv_strcpy(abbr,uscript_getShortName(code));
	4024
	4025	uprv_strcpy(newId,"[:");
	4026	uprv_strcat(newId,id);
	4027	uprv_strcat(newId,":];NFD");
	4028
	4029	uprv_strcpy(newAbbrId,"[:");
	4030	uprv_strcat(newAbbrId,abbr);
	4031	uprv_strcat(newAbbrId,":];NFD");
	4032
	4033	if(uprv_strcmp(newId,oldId)!=0){
	4034	Transliterator* t = Transliterator::createInstance(newId,UTRANS_FORWARD,pe,status);
	4035	if(t==NULL \|\| U_FAILURE(status)){
	4036	dataerrln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(status));
	4037	}
	4038	delete t;
	4039	}
	4040	if(uprv_strcmp(newAbbrId,oldAbbrId)!=0){
	4041	Transliterator* t = Transliterator::createInstance(newAbbrId,UTRANS_FORWARD,pe,status);
	4042	if(t==NULL \|\| U_FAILURE(status)){
	4043	dataerrln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(status));
	4044	}
	4045	delete t;
	4046	}
	4047	uprv_strcpy(oldId,newId);
	4048	uprv_strcpy(oldAbbrId, newAbbrId);
	4049
	4050	}
	4051
	4052	}
	4053
	4054	#define TEST_TRANSLIT_ID(id, cls) { \
	4055	UErrorCode ec = U_ZERO_ERROR; \
	4056	Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); \
	4057	if (U_FAILURE(ec)) { \
	4058	dataerrln("FAIL: Couldn't create %s - %s", id, u_errorName(ec)); \
	4059	} else { \
	4060	if (t->getDynamicClassID() != cls::getStaticClassID()) { \
	4061	errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
	4062	} \
	4063	/* t = t; / /can't do this: coverage test for assignment op*/ \
	4064	} \
	4065	delete t; \
	4066	}
	4067
	4068	#define TEST_TRANSLIT_RULE(rule, cls) { \
	4069	UErrorCode ec = U_ZERO_ERROR; \
	4070	UParseError pe; \
	4071	Transliterator* t = Transliterator::createFromRules("_", rule, UTRANS_FORWARD, pe, ec); \
	4072	if (U_FAILURE(ec)) { \
	4073	errln("FAIL: Couldn't create " rule); \
	4074	} else { \
	4075	if (t->getDynamicClassID() != cls ::getStaticClassID()) { \
	4076	errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
	4077	} \
	4078	/* t = t; / /can't do this: coverage test for assignment op*/ \
	4079	} \
	4080	delete t; \
	4081	}
	4082
	4083	void TransliteratorTest::TestBoilerplate() {
	4084	TEST_TRANSLIT_ID("Any-Latin", AnyTransliterator);
	4085	TEST_TRANSLIT_ID("Any-Hex", EscapeTransliterator);
	4086	TEST_TRANSLIT_ID("Hex-Any", UnescapeTransliterator);
	4087	TEST_TRANSLIT_ID("Lower", LowercaseTransliterator);
	4088	TEST_TRANSLIT_ID("Upper", UppercaseTransliterator);
	4089	TEST_TRANSLIT_ID("Title", TitlecaseTransliterator);
	4090	TEST_TRANSLIT_ID("Null", NullTransliterator);
	4091	TEST_TRANSLIT_ID("Remove", RemoveTransliterator);
	4092	TEST_TRANSLIT_ID("Any-Name", UnicodeNameTransliterator);
	4093	TEST_TRANSLIT_ID("Name-Any", NameUnicodeTransliterator);
	4094	TEST_TRANSLIT_ID("NFD", NormalizationTransliterator);
	4095	TEST_TRANSLIT_ID("Latin-Greek", CompoundTransliterator);
	4096	TEST_TRANSLIT_RULE("a>b;", RuleBasedTransliterator);
	4097	}
	4098
	4099	void TransliteratorTest::TestAlternateSyntax() {
	4100	// U+2206 == &
	4101	// U+2190 == <
	4102	// U+2192 == >
	4103	// U+2194 == <>
	4104	expect(CharsToUnicodeString("a \\u2192 x; b \\u2190 y; c \\u2194 z"),
	4105	"abc",
	4106	"xbz");
	4107	expect(CharsToUnicodeString("([:^ASCII:]) \\u2192 \\u2206Name($1);"),
	4108	CharsToUnicodeString("<=\\u2190; >=\\u2192; <>=\\u2194; &=\\u2206"),
	4109	UNICODE_STRING_SIMPLE("<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}"));
	4110	}
	4111
	4112	static const char* BEGIN_END_RULES[] = {
	4113	// [0]
	4114	"abc > xy;"
	4115	"aba > z;",
	4116
	4117	// [1]
	4118	/*
	4119	"::BEGIN;"
	4120	"abc > xy;"
	4121	"::END;"
	4122	"::BEGIN;"
	4123	"aba > z;"
	4124	"::END;",
	4125	*/
	4126	"", // test case commented out below, this is here to keep from messing up the indexes
	4127
	4128	// [2]
	4129	/*
	4130	"abc > xy;"
	4131	"::BEGIN;"
	4132	"aba > z;"
	4133	"::END;",
	4134	*/
	4135	"", // test case commented out below, this is here to keep from messing up the indexes
	4136
	4137	// [3]
	4138	/*
	4139	"::BEGIN;"
	4140	"abc > xy;"
	4141	"::END;"
	4142	"aba > z;",
	4143	*/
	4144	"", // test case commented out below, this is here to keep from messing up the indexes
	4145
	4146	// [4]
	4147	"abc > xy;"
	4148	"::Null;"
	4149	"aba > z;",
	4150
	4151	// [5]
	4152	"::Upper;"
	4153	"ABC > xy;"
	4154	"AB > x;"
	4155	"C > z;"
	4156	"::Upper;"
	4157	"XYZ > p;"
	4158	"XY > q;"
	4159	"Z > r;"
	4160	"::Upper;",
	4161
	4162	// [6]
	4163	"$ws = [[:Separator:][\\u0009-\\u000C]$];"
	4164	"$delim = [\\-$ws];"
	4165	"$ws $delim* > ' ';"
	4166	"'-' $delim* > '-';",
	4167
	4168	// [7]
	4169	"::Null;"
	4170	"$ws = [[:Separator:][\\u0009-\\u000C]$];"
	4171	"$delim = [\\-$ws];"
	4172	"$ws $delim* > ' ';"
	4173	"'-' $delim* > '-';",
	4174
	4175	// [8]
	4176	"$ws = [[:Separator:][\\u0009-\\u000C]$];"
	4177	"$delim = [\\-$ws];"
	4178	"$ws $delim* > ' ';"
	4179	"'-' $delim* > '-';"
	4180	"::Null;",
	4181
	4182	// [9]
	4183	"$ws = [[:Separator:][\\u0009-\\u000C]$];"
	4184	"$delim = [\\-$ws];"
	4185	"::Null;"
	4186	"$ws $delim* > ' ';"
	4187	"'-' $delim* > '-';",
	4188
	4189	// [10]
	4190	/*
	4191	"::BEGIN;"
	4192	"$ws = [[:Separator:][\\u0009-\\u000C]$];"
	4193	"$delim = [\\-$ws];"
	4194	"::END;"
	4195	"$ws $delim* > ' ';"
	4196	"'-' $delim* > '-';",
	4197	*/
	4198	"", // test case commented out below, this is here to keep from messing up the indexes
	4199
	4200	// [11]
	4201	/*
	4202	"$ws = [[:Separator:][\\u0009-\\u000C]$];"
	4203	"$delim = [\\-$ws];"
	4204	"::BEGIN;"
	4205	"$ws $delim* > ' ';"
	4206	"'-' $delim* > '-';"
	4207	"::END;",
	4208	*/
	4209	"", // test case commented out below, this is here to keep from messing up the indexes
	4210
	4211	// [12]
	4212	/*
	4213	"$ws = [[:Separator:][\\u0009-\\u000C]$];"
	4214	"$delim = [\\-$ws];"
	4215	"$ab = [ab];"
	4216	"::BEGIN;"
	4217	"$ws $delim* > ' ';"
	4218	"'-' $delim* > '-';"
	4219	"::END;"
	4220	"::BEGIN;"
	4221	"$ab { ' ' } $ab > '-';"
	4222	"c { ' ' > ;"
	4223	"::END;"
	4224	"::BEGIN;"
	4225	"'a-a' > a\\%\|a;"
	4226	"::END;",
	4227	*/
	4228	"", // test case commented out below, this is here to keep from messing up the indexes
	4229
	4230	// [13]
	4231	"$ws = [[:Separator:][\\u0009-\\u000C]$];"
	4232	"$delim = [\\-$ws];"
	4233	"$ab = [ab];"
	4234	"::Null;"
	4235	"$ws $delim* > ' ';"
	4236	"'-' $delim* > '-';"
	4237	"::Null;"
	4238	"$ab { ' ' } $ab > '-';"
	4239	"c { ' ' > ;"
	4240	"::Null;"
	4241	"'a-a' > a\\%\|a;",
	4242
	4243	// [14]
	4244	/*
	4245	"::[abc];"
	4246	"::BEGIN;"
	4247	"abc > xy;"
	4248	"::END;"
	4249	"::BEGIN;"
	4250	"aba > yz;"
	4251	"::END;"
	4252	"::Upper;",
	4253	*/
	4254	"", // test case commented out below, this is here to keep from messing up the indexes
	4255
	4256	// [15]
	4257	"::[abc];"
	4258	"abc > xy;"
	4259	"::Null;"
	4260	"aba > yz;"
	4261	"::Upper;",
	4262
	4263	// [16]
	4264	/*
	4265	"::[abc];"
	4266	"::BEGIN;"
	4267	"abc <> xy;"
	4268	"::END;"
	4269	"::BEGIN;"
	4270	"aba <> yz;"
	4271	"::END;"
	4272	"::Upper(Lower);"
	4273	"::([XYZ]);"
	4274	*/
	4275	"", // test case commented out below, this is here to keep from messing up the indexes
	4276
	4277	// [17]
	4278	"::[abc];"
	4279	"abc <> xy;"
	4280	"::Null;"
	4281	"aba <> yz;"
	4282	"::Upper(Lower);"
	4283	"::([XYZ]);"
	4284	};
	4285	static const int32_t BEGIN_END_RULES_length = (int32_t)(sizeof(BEGIN_END_RULES) / sizeof(BEGIN_END_RULES[0]));
	4286
	4287	/*
	4288	(This entire test is commented out below and will need some heavy revision when we re-add
	4289	the ::BEGIN/::END stuff)
	4290	static const char* BOGUS_BEGIN_END_RULES[] = {
	4291	// [7]
	4292	"::BEGIN;"
	4293	"abc > xy;"
	4294	"::BEGIN;"
	4295	"aba > z;"
	4296	"::END;"
	4297	"::END;",
	4298
	4299	// [8]
	4300	"abc > xy;"
	4301	" aba > z;"
	4302	"::END;",
	4303
	4304	// [9]
	4305	"::BEGIN;"
	4306	"::Upper;"
	4307	"::END;"
	4308	};
	4309	static const int32_t BOGUS_BEGIN_END_RULES_length = (int32_t)(sizeof(BOGUS_BEGIN_END_RULES) / sizeof(BOGUS_BEGIN_END_RULES[0]));
	4310	*/
	4311
	4312	static const char* BEGIN_END_TEST_CASES[] = {
	4313	// rules input expected output
	4314	BEGIN_END_RULES[0], "abc ababc aba", "xy zbc z",
	4315	// BEGIN_END_RULES[1], "abc ababc aba", "xy abxy z",
	4316	// BEGIN_END_RULES[2], "abc ababc aba", "xy abxy z",
	4317	// BEGIN_END_RULES[3], "abc ababc aba", "xy abxy z",
	4318	BEGIN_END_RULES[4], "abc ababc aba", "xy abxy z",
	4319	BEGIN_END_RULES[5], "abccabaacababcbc", "PXAARXQBR",
	4320
	4321	BEGIN_END_RULES[6], "e e - e---e- e", "e e e-e-e",
	4322	BEGIN_END_RULES[7], "e e - e---e- e", "e e e-e-e",
	4323	BEGIN_END_RULES[8], "e e - e---e- e", "e e e-e-e",
	4324	BEGIN_END_RULES[9], "e e - e---e- e", "e e e-e-e",
	4325	// BEGIN_END_RULES[10], "e e - e---e- e", "e e e-e-e",
	4326	// BEGIN_END_RULES[11], "e e - e---e- e", "e e e-e-e",
	4327	// BEGIN_END_RULES[12], "e e - e---e- e", "e e e-e-e",
	4328	// BEGIN_END_RULES[12], "a a a a", "a%a%a%a",
	4329	// BEGIN_END_RULES[12], "a a-b c b a", "a%a-b cb-a",
	4330	BEGIN_END_RULES[13], "e e - e---e- e", "e e e-e-e",
	4331	BEGIN_END_RULES[13], "a a a a", "a%a%a%a",
	4332	BEGIN_END_RULES[13], "a a-b c b a", "a%a-b cb-a",
	4333
	4334	// BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
	4335	BEGIN_END_RULES[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
	4336	// BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
	4337	BEGIN_END_RULES[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ"
	4338	};
	4339	static const int32_t BEGIN_END_TEST_CASES_length = (int32_t)(sizeof(BEGIN_END_TEST_CASES) / sizeof(BEGIN_END_TEST_CASES[0]));
	4340
	4341	void TransliteratorTest::TestBeginEnd() {
	4342	// run through the list of test cases above
	4343	int32_t i = 0;
	4344	for (i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
	4345	expect((UnicodeString)"Test case #" + (i / 3),
	4346	UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
	4347	UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
	4348	UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
	4349	}
	4350
	4351	// instantiate the one reversible rule set in the reverse direction and make sure it does the right thing
	4352	UParseError parseError;
	4353	UErrorCode status = U_ZERO_ERROR;
	4354	Transliterator* reversed = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
	4355	UTRANS_REVERSE, parseError, status);
	4356	if (reversed == 0 \|\| U_FAILURE(status)) {
	4357	reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
	4358	} else {
	4359	expect(*reversed, UnicodeString("xy XY XYZ yz YZ"), UnicodeString("xy abc xaba yz aba"));
	4360	}
	4361	delete reversed;
	4362
	4363	// finally, run through the list of syntactically-ill-formed rule sets above and make sure
	4364	// that all of them cause errors
	4365	/*
	4366	(commented out until we have the real ::BEGIN/::END stuff in place
	4367	for (i = 0; i < BOGUS_BEGIN_END_RULES_length; i++) {
	4368	UParseError parseError;
	4369	UErrorCode status = U_ZERO_ERROR;
	4370	Transliterator* t = Transliterator::createFromRules("foo", UnicodeString(BOGUS_BEGIN_END_RULES[i]),
	4371	UTRANS_FORWARD, parseError, status);
	4372	if (!U_FAILURE(status)) {
	4373	delete t;
	4374	errln((UnicodeString)"Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]);
	4375	}
	4376	}
	4377	*/
	4378	}
	4379
	4380	void TransliteratorTest::TestBeginEndToRules() {
	4381	// run through the same list of test cases we used above, but this time, instead of just
	4382	// instantiating a Transliterator from the rules and running the test against it, we instantiate
	4383	// a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from
	4384	// the resulting set of rules, and make sure that the generated rule set is semantically equivalent
	4385	// to (i.e., does the same thing as) the original rule set
	4386	for (int32_t i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
	4387	UParseError parseError;
	4388	UErrorCode status = U_ZERO_ERROR;
	4389	Transliterator* t = Transliterator::createFromRules("--", UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
	4390	UTRANS_FORWARD, parseError, status);
	4391	if (U_FAILURE(status)) {
	4392	reportParseError(UnicodeString("FAIL: Couldn't create transliterator"), parseError, status);
	4393	} else {
	4394	UnicodeString rules;
	4395	t->toRules(rules, TRUE);
	4396	Transliterator* t2 = Transliterator::createFromRules((UnicodeString)"Test case #" + (i / 3), rules,
	4397	UTRANS_FORWARD, parseError, status);
	4398	if (U_FAILURE(status)) {
	4399	reportParseError(UnicodeString("FAIL: Couldn't create transliterator from generated rules"),
	4400	parseError, status);
	4401	delete t;
	4402	} else {
	4403	expect(*t2,
	4404	UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
	4405	UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
	4406	delete t;
	4407	delete t2;
	4408	}
	4409	}
	4410	}
	4411
	4412	// do the same thing for the reversible test case
	4413	UParseError parseError;
	4414	UErrorCode status = U_ZERO_ERROR;
	4415	Transliterator* reversed = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
	4416	UTRANS_REVERSE, parseError, status);
	4417	if (U_FAILURE(status)) {
	4418	reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
	4419	} else {
	4420	UnicodeString rules;
	4421	reversed->toRules(rules, FALSE);
	4422	Transliterator* reversed2 = Transliterator::createFromRules("Reversed", rules, UTRANS_FORWARD,
	4423	parseError, status);
	4424	if (U_FAILURE(status)) {
	4425	reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator from generated rules"),
	4426	parseError, status);
	4427	delete reversed;
	4428	} else {
	4429	expect(*reversed2,
	4430	UnicodeString("xy XY XYZ yz YZ"),
	4431	UnicodeString("xy abc xaba yz aba"));
	4432	delete reversed;
	4433	delete reversed2;
	4434	}
	4435	}
	4436	}
	4437
	4438	void TransliteratorTest::TestRegisterAlias() {
	4439	UnicodeString longID("Lower;[aeiou]Upper");
	4440	UnicodeString shortID("Any-CapVowels");
	4441	UnicodeString reallyShortID("CapVowels");
	4442
	4443	Transliterator::registerAlias(shortID, longID);
	4444
	4445	UErrorCode err = U_ZERO_ERROR;
	4446	Transliterator* t1 = Transliterator::createInstance(longID, UTRANS_FORWARD, err);
	4447	if (U_FAILURE(err)) {
	4448	errln("Failed to instantiate transliterator with long ID");
	4449	Transliterator::unregister(shortID);
	4450	return;
	4451	}
	4452	Transliterator* t2 = Transliterator::createInstance(reallyShortID, UTRANS_FORWARD, err);
	4453	if (U_FAILURE(err)) {
	4454	errln("Failed to instantiate transliterator with short ID");
	4455	delete t1;
	4456	Transliterator::unregister(shortID);
	4457	return;
	4458	}
	4459
	4460	if (t1->getID() != longID)
	4461	errln("Transliterator instantiated with long ID doesn't have long ID");
	4462	if (t2->getID() != reallyShortID)
	4463	errln("Transliterator instantiated with short ID doesn't have short ID");
	4464
	4465	UnicodeString rules1;
	4466	UnicodeString rules2;
	4467
	4468	t1->toRules(rules1, TRUE);
	4469	t2->toRules(rules2, TRUE);
	4470	if (rules1 != rules2)
	4471	errln("Alias transliterators aren't the same");
	4472
	4473	delete t1;
	4474	delete t2;
	4475	Transliterator::unregister(shortID);
	4476
	4477	t1 = Transliterator::createInstance(shortID, UTRANS_FORWARD, err);
	4478	if (U_SUCCESS(err)) {
	4479	errln("Instantiation with short ID succeeded after short ID was unregistered");
	4480	delete t1;
	4481	}
	4482
	4483	// try the same thing again, but this time with something other than
	4484	// an instance of CompoundTransliterator
	4485	UnicodeString realID("Latin-Greek");
	4486	UnicodeString fakeID("Latin-dlgkjdflkjdl");
	4487	Transliterator::registerAlias(fakeID, realID);
	4488
	4489	err = U_ZERO_ERROR;
	4490	t1 = Transliterator::createInstance(realID, UTRANS_FORWARD, err);
	4491	if (U_FAILURE(err)) {
	4492	dataerrln("Failed to instantiate transliterator with real ID - %s", u_errorName(err));
	4493	Transliterator::unregister(realID);
	4494	return;
	4495	}
	4496	t2 = Transliterator::createInstance(fakeID, UTRANS_FORWARD, err);
	4497	if (U_FAILURE(err)) {
	4498	errln("Failed to instantiate transliterator with fake ID");
	4499	delete t1;
	4500	Transliterator::unregister(realID);
	4501	return;
	4502	}
	4503
	4504	t1->toRules(rules1, TRUE);
	4505	t2->toRules(rules2, TRUE);
	4506	if (rules1 != rules2)
	4507	errln("Alias transliterators aren't the same");
	4508
	4509	delete t1;
	4510	delete t2;
	4511	Transliterator::unregister(fakeID);
	4512	}
	4513
	4514	void TransliteratorTest::TestRuleStripping() {
	4515	/*
	4516	#
	4517	\uE001>\u0C01; # SIGN
	4518	*/
	4519	static const UChar rule[] = {
	4520	0x0023,0x0020,0x000D,0x000A,
	4521	0xE001,0x003E,0x0C01,0x003B,0x0020,0x0023,0x0020,0x0053,0x0049,0x0047,0x004E,0
	4522	};
	4523	static const UChar expectedRule[] = {
	4524	0xE001,0x003E,0x0C01,0x003B,0
	4525	};
	4526	UChar result[sizeof(rule)/sizeof(rule[0])];
	4527	UErrorCode status = U_ZERO_ERROR;
	4528	int32_t len = utrans_stripRules(rule, (int32_t)(sizeof(rule)/sizeof(rule[0])), result, &status);
	4529	if (len != u_strlen(expectedRule)) {
	4530	errln("utrans_stripRules return len = %d", len);
	4531	}
	4532	if (u_strncmp(expectedRule, result, len) != 0) {
	4533	errln("utrans_stripRules did not return expected string");
	4534	}
	4535	}
	4536
	4537	/**
	4538	* Test the Halfwidth-Fullwidth transliterator (ticket 6281).
	4539	*/
	4540	void TransliteratorTest::TestHalfwidthFullwidth(void) {
	4541	UParseError parseError;
	4542	UErrorCode status = U_ZERO_ERROR;
	4543	Transliterator* hf = Transliterator::createInstance("Halfwidth-Fullwidth", UTRANS_FORWARD, parseError, status);
	4544	Transliterator* fh = Transliterator::createInstance("Fullwidth-Halfwidth", UTRANS_FORWARD, parseError, status);
	4545	if (hf == 0 \|\| fh == 0) {
	4546	dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
	4547	delete hf;
	4548	delete fh;
	4549	return;
	4550	}
	4551
	4552	// Array of 2n items
	4553	// Each item is
	4554	// "hf"\|"fh"\|"both",
	4555	// <Halfwidth>,
	4556	// <Fullwidth>
	4557	const char* DATA[] = {
	4558	"both",
	4559	"\\uFFE9\\uFFEA\\uFFEB\\uFFEC\\u0061\\uFF71\\u00AF\\u0020",
	4560	"\\u2190\\u2191\\u2192\\u2193\\uFF41\\u30A2\\uFFE3\\u3000",
	4561	};
	4562	int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
	4563
	4564	for (int32_t i=0; i<DATA_length; i+=3) {
	4565	UnicodeString h = CharsToUnicodeString(DATA[i+1]);
	4566	UnicodeString f = CharsToUnicodeString(DATA[i+2]);
	4567	switch (*DATA[i]) {
	4568	case 0x68: //'h': // Halfwidth-Fullwidth only
	4569	expect(*hf, h, f);
	4570	break;
	4571	case 0x66: //'f': // Fullwidth-Halfwidth only
	4572	expect(*fh, f, h);
	4573	break;
	4574	case 0x62: //'b': // both directions
	4575	expect(*hf, h, f);
	4576	expect(*fh, f, h);
	4577	break;
	4578	}
	4579	}
	4580	delete hf;
	4581	delete fh;
	4582	}
	4583
	4584
	4585	/**
	4586	* Test Thai. The text is the first paragraph of "What is Unicode" from the Unicode.org web site.
	4587	* TODO: confirm that the expected results are correct.
	4588	* For now, test just confirms that C++ and Java give identical results.
	4589	*/
	4590	void TransliteratorTest::TestThai(void) {
	4591	#if !UCONFIG_NO_BREAK_ITERATION
	4592	UParseError parseError;
	4593	UErrorCode status = U_ZERO_ERROR;
	4594	Transliterator* tr = Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
	4595	if (tr == 0) {
	4596	dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
	4597	return;
	4598	}
	4599	if (U_FAILURE(status)) {
	4600	errln("FAIL: createInstance failed with %s", u_errorName(status));
	4601	return;
	4602	}
	4603	const char *thaiText =
	4604	"\\u0e42\\u0e14\\u0e22\\u0e1e\\u0e37\\u0e49\\u0e19\\u0e10\\u0e32\\u0e19\\u0e41\\u0e25\\u0e49\\u0e27, \\u0e04\\u0e2d"
	4605	"\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d\\u0e23\\u0e4c\\u0e08\\u0e30\\u0e40\\u0e01\\u0e35\\u0e48\\u0e22"
	4606	"\\u0e27\\u0e02\\u0e49\\u0e2d\\u0e07\\u0e01\\u0e31\\u0e1a\\u0e40\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e02\\u0e2d"
	4607	"\\u0e07\\u0e15\\u0e31\\u0e27\\u0e40\\u0e25\\u0e02. \\u0e04\\u0e2d\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d"
	4608	"\\u0e23\\u0e4c\\u0e08\\u0e31\\u0e14\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29"
	4609	"\\u0e23\\u0e41\\u0e25\\u0e30\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30\\u0e2d\\u0e37\\u0e48\\u0e19\\u0e46 \\u0e42"
	4610	"\\u0e14\\u0e22\\u0e01\\u0e32\\u0e23\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25"
	4611	"\\u0e02\\u0e43\\u0e2b\\u0e49\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e41\\u0e15\\u0e48\\u0e25\\u0e30\\u0e15"
	4612	"\\u0e31\\u0e27. \\u0e01\\u0e48\\u0e2d\\u0e19\\u0e2b\\u0e19\\u0e49\\u0e32\\u0e17\\u0e35\\u0e48\\u0e4a Unicode \\u0e08"
	4613	"\\u0e30\\u0e16\\u0e39\\u0e01\\u0e2a\\u0e23\\u0e49\\u0e32\\u0e07\\u0e02\\u0e36\\u0e49\\u0e19, \\u0e44\\u0e14\\u0e49"
	4614	"\\u0e21\\u0e35\\u0e23\\u0e30\\u0e1a\\u0e1a encoding \\u0e2d\\u0e22\\u0e39\\u0e48\\u0e2b\\u0e25\\u0e32\\u0e22\\u0e23"
	4615	"\\u0e49\\u0e2d\\u0e22\\u0e23\\u0e30\\u0e1a\\u0e1a\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e01\\u0e32\\u0e23"
	4616	"\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25\\u0e02\\u0e40\\u0e2b\\u0e25\\u0e48"
	4617	"\\u0e32\\u0e19\\u0e35\\u0e49. \\u0e44\\u0e21\\u0e48\\u0e21\\u0e35 encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48"
	4618	"\\u0e21\\u0e35\\u0e08\\u0e33\\u0e19\\u0e27\\u0e19\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30"
	4619	"\\u0e21\\u0e32\\u0e01\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d: \\u0e22\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d"
	4620	"\\u0e22\\u0e48\\u0e32\\u0e07\\u0e40\\u0e0a\\u0e48\\u0e19, \\u0e40\\u0e09\\u0e1e\\u0e32\\u0e30\\u0e43\\u0e19\\u0e01"
	4621	"\\u0e25\\u0e38\\u0e48\\u0e21\\u0e2a\\u0e2b\\u0e20\\u0e32\\u0e1e\\u0e22\\u0e38\\u0e42\\u0e23\\u0e1b\\u0e40\\u0e1e"
	4622	"\\u0e35\\u0e22\\u0e07\\u0e41\\u0e2b\\u0e48\\u0e07\\u0e40\\u0e14\\u0e35\\u0e22\\u0e27 \\u0e01\\u0e47\\u0e15\\u0e49"
	4623	"\\u0e2d\\u0e07\\u0e01\\u0e32\\u0e23\\u0e2b\\u0e25\\u0e32\\u0e22 encoding \\u0e43\\u0e19\\u0e01\\u0e32\\u0e23\\u0e04"
	4624	"\\u0e23\\u0e2d\\u0e1a\\u0e04\\u0e25\\u0e38\\u0e21\\u0e17\\u0e38\\u0e01\\u0e20\\u0e32\\u0e29\\u0e32\\u0e43\\u0e19"
	4625	"\\u0e01\\u0e25\\u0e38\\u0e48\\u0e21. \\u0e2b\\u0e23\\u0e37\\u0e2d\\u0e41\\u0e21\\u0e49\\u0e41\\u0e15\\u0e48\\u0e43"
	4626	"\\u0e19\\u0e20\\u0e32\\u0e29\\u0e32\\u0e40\\u0e14\\u0e35\\u0e48\\u0e22\\u0e27 \\u0e40\\u0e0a\\u0e48\\u0e19 \\u0e20"
	4627	"\\u0e32\\u0e29\\u0e32\\u0e2d\\u0e31\\u0e07\\u0e01\\u0e24\\u0e29 \\u0e01\\u0e47\\u0e44\\u0e21\\u0e48\\u0e21\\u0e35"
	4628	" encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d\\u0e2a\\u0e33\\u0e2b"
	4629	"\\u0e23\\u0e31\\u0e1a\\u0e17\\u0e38\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29\\u0e23, \\u0e40\\u0e04"
	4630	"\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e27\\u0e23\\u0e23\\u0e04\\u0e15\\u0e2d\\u0e19"
	4631	" \\u0e41\\u0e25\\u0e30\\u0e2a\\u0e31\\u0e0d\\u0e25\\u0e31\\u0e01\\u0e29\\u0e13\\u0e4c\\u0e17\\u0e32\\u0e07\\u0e40"
	4632	"\\u0e17\\u0e04\\u0e19\\u0e34\\u0e04\\u0e17\\u0e35\\u0e48\\u0e43\\u0e0a\\u0e49\\u0e01\\u0e31\\u0e19\\u0e2d\\u0e22"
	4633	"\\u0e39\\u0e48\\u0e17\\u0e31\\u0e48\\u0e27\\u0e44\\u0e1b.";
	4634
	4635	const char *latinText =
	4636	"doy ph\\u1ee5\\u0304\\u0302n \\u1e6d\\u0304h\\u0101n l\\u00e6\\u0302w, khxmphiwtexr\\u0312 ca ke\\u012b\\u0300"
	4637	"ywk\\u0304\\u0125xng k\\u1ea1b re\\u1ee5\\u0304\\u0300xng k\\u0304hxng t\\u1ea1wlek\\u0304h. khxmphiwtexr"
	4638	"\\u0312 c\\u1ea1d k\\u0115b t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r l\\u00e6a x\\u1ea1kk\\u0304h ra x\\u1ee5\\u0304"
	4639	"\\u0300n\\u00ab doy k\\u0101r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304\\u0131\\u0302 s\\u0304"
	4640	"\\u1ea3h\\u0304r\\u1ea1b t\\u00e6\\u0300la t\\u1ea1w. k\\u0300xn h\\u0304n\\u0302\\u0101 th\\u012b\\u0300\\u0301"
	4641	" Unicode ca t\\u0304h\\u016bk s\\u0304r\\u0302\\u0101ng k\\u0304h\\u1ee5\\u0302n, d\\u1ecb\\u0302 m\\u012b "
	4642	"rabb encoding xy\\u016b\\u0300 h\\u0304l\\u0101y r\\u0302xy rabb s\\u0304\\u1ea3h\\u0304r\\u1ea1b k\\u0101"
	4643	"r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304el\\u0300\\u0101 n\\u012b\\u0302. m\\u1ecb\\u0300m"
	4644	"\\u012b encoding d\\u0131 th\\u012b\\u0300 m\\u012b c\\u1ea3nwn t\\u1ea1w x\\u1ea1kk\\u0304hra m\\u0101k p"
	4645	"he\\u012byng phx: yk t\\u1ea1wx\\u1ef3\\u0101ng ch\\u00e8n, c\\u0304heph\\u0101a n\\u0131 kl\\u00f9m s\\u0304"
	4646	"h\\u0304p\\u0323h\\u0101ph yurop phe\\u012byng h\\u0304\\u00e6\\u0300ng de\\u012byw k\\u0306 t\\u0302xngk\\u0101"
	4647	"r h\\u0304l\\u0101y encoding n\\u0131 k\\u0101r khrxbkhlum thuk p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 n\\u0131"
	4648	" kl\\u00f9m. h\\u0304r\\u1ee5\\u0304x m\\u00e6\\u0302t\\u00e6\\u0300 n\\u0131 p\\u0323h\\u0101s\\u0304\\u02b9"
	4649	"\\u0101 de\\u012b\\u0300yw ch\\u00e8n p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 x\\u1ea1ngkvs\\u0304\\u02b9 k\\u0306"
	4650	" m\\u1ecb\\u0300m\\u012b encoding d\\u0131 th\\u012b\\u0300 phe\\u012byng phx s\\u0304\\u1ea3h\\u0304r\\u1ea1"
	4651	"b thuk t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r, kher\\u1ee5\\u0304\\u0300xngh\\u0304m\\u0101y wrrkh txn l\\u00e6"
	4652	"a s\\u0304\\u1ea1\\u1ef5l\\u1ea1ks\\u0304\\u02b9\\u1e47\\u0312 th\\u0101ng thekhnikh th\\u012b\\u0300 ch\\u0131"
	4653	"\\u0302 k\\u1ea1n xy\\u016b\\u0300 th\\u1ea1\\u0300wp\\u1ecb.";
	4654
	4655
	4656	UnicodeString xlitText(thaiText);
	4657	xlitText = xlitText.unescape();
	4658	tr->transliterate(xlitText);
	4659
	4660	UnicodeString expectedText(latinText);
	4661	expectedText = expectedText.unescape();
	4662	expect(*tr, xlitText, expectedText);
	4663
	4664	delete tr;
	4665	#endif
	4666	}
	4667
	4668
	4669	//======================================================================
	4670	// Support methods
	4671	//======================================================================
	4672	void TransliteratorTest::expectT(const UnicodeString& id,
	4673	const UnicodeString& source,
	4674	const UnicodeString& expectedResult) {
	4675	UErrorCode ec = U_ZERO_ERROR;
	4676	UParseError pe;
	4677	Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
	4678	if (U_FAILURE(ec)) {
	4679	errln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(ec));
	4680	delete t;
	4681	return;
	4682	}
	4683	expect(*t, source, expectedResult);
	4684	delete t;
	4685	}
	4686
	4687	void TransliteratorTest::reportParseError(const UnicodeString& message,
	4688	const UParseError& parseError,
	4689	const UErrorCode& status) {
	4690	dataerrln(message +
	4691	/", parse error " + parseError.code +/
	4692	", line " + parseError.line +
	4693	", offset " + parseError.offset +
	4694	", pre-context " + prettify(parseError.preContext, TRUE) +
	4695	", post-context " + prettify(parseError.postContext,TRUE) +
	4696	", Error: " + u_errorName(status));
	4697	}
	4698
	4699	void TransliteratorTest::expect(const UnicodeString& rules,
	4700	const UnicodeString& source,
	4701	const UnicodeString& expectedResult,
	4702	UTransPosition *pos) {
	4703	expect("<ID>", rules, source, expectedResult, pos);
	4704	}
	4705
	4706	void TransliteratorTest::expect(const UnicodeString& id,
	4707	const UnicodeString& rules,
	4708	const UnicodeString& source,
	4709	const UnicodeString& expectedResult,
	4710	UTransPosition *pos) {
	4711	UErrorCode status = U_ZERO_ERROR;
	4712	UParseError parseError;
	4713	Transliterator* t = Transliterator::createFromRules(id, rules, UTRANS_FORWARD, parseError, status);
	4714	if (U_FAILURE(status)) {
	4715	reportParseError(UnicodeString("Couldn't create transliterator from ") + rules, parseError, status);
	4716	} else {
	4717	expect(*t, source, expectedResult, pos);
	4718	}
	4719	delete t;
	4720	}
	4721
	4722	void TransliteratorTest::expect(const Transliterator& t,
	4723	const UnicodeString& source,
	4724	const UnicodeString& expectedResult,
	4725	const Transliterator& reverseTransliterator) {
	4726	expect(t, source, expectedResult);
	4727	expect(reverseTransliterator, expectedResult, source);
	4728	}
	4729
	4730	void TransliteratorTest::expect(const Transliterator& t,
	4731	const UnicodeString& source,
	4732	const UnicodeString& expectedResult,
	4733	UTransPosition *pos) {
	4734	if (pos == 0) {
	4735	UnicodeString result(source);
	4736	t.transliterate(result);
	4737	expectAux(t.getID() + ":String", source, result, expectedResult);
	4738	}
	4739	UTransPosition index={0, 0, 0, 0};
	4740	if (pos != 0) {
	4741	index = *pos;
	4742	}
	4743
	4744	UnicodeString rsource(source);
	4745	if (pos == 0) {
	4746	t.transliterate(rsource);
	4747	} else {
	4748	// Do it all at once -- below we do it incrementally
	4749	t.finishTransliteration(rsource, *pos);
	4750	}
	4751	expectAux(t.getID() + ":Replaceable", source, rsource, expectedResult);
	4752
	4753	// Test keyboard (incremental) transliteration -- this result
	4754	// must be the same after we finalize (see below).
	4755	UnicodeString log;
	4756	rsource.remove();
	4757	if (pos != 0) {
	4758	rsource = source;
	4759	formatInput(log, rsource, index);
	4760	log.append(" -> ");
	4761	UErrorCode status = U_ZERO_ERROR;
	4762	t.transliterate(rsource, index, status);
	4763	formatInput(log, rsource, index);
	4764	} else {
	4765	for (int32_t i=0; i<source.length(); ++i) {
	4766	if (i != 0) {
	4767	log.append(" + ");
	4768	}
	4769	log.append(source.charAt(i)).append(" -> ");
	4770	UErrorCode status = U_ZERO_ERROR;
	4771	t.transliterate(rsource, index, source.charAt(i), status);
	4772	formatInput(log, rsource, index);
	4773	}
	4774	}
	4775
	4776	// As a final step in keyboard transliteration, we must call
	4777	// transliterate to finish off any pending partial matches that
	4778	// were waiting for more input.
	4779	t.finishTransliteration(rsource, index);
	4780	log.append(" => ").append(rsource);
	4781
	4782	expectAux(t.getID() + ":Keyboard", log,
	4783	rsource == expectedResult,
	4784	expectedResult);
	4785	}
	4786
	4787
	4788	/**
	4789	* @param appendTo result is appended to this param.
	4790	* @param input the string being transliterated
	4791	* @param pos the index struct
	4792	*/
	4793	UnicodeString& TransliteratorTest::formatInput(UnicodeString &appendTo,
	4794	const UnicodeString& input,
	4795	const UTransPosition& pos) {
	4796	// Output a string of the form aaa{bbb\|ccc\|ddd}eee, where
	4797	// the {} indicate the context start and limit, and the \|\|
	4798	// indicate the start and limit.
	4799	if (0 <= pos.contextStart &&
	4800	pos.contextStart <= pos.start &&
	4801	pos.start <= pos.limit &&
	4802	pos.limit <= pos.contextLimit &&
	4803	pos.contextLimit <= input.length()) {
	4804
	4805	UnicodeString a, b, c, d, e;
	4806	input.extractBetween(0, pos.contextStart, a);
	4807	input.extractBetween(pos.contextStart, pos.start, b);
	4808	input.extractBetween(pos.start, pos.limit, c);
	4809	input.extractBetween(pos.limit, pos.contextLimit, d);
	4810	input.extractBetween(pos.contextLimit, input.length(), e);
	4811	appendTo.append(a).append((UChar)123/{/).append(b).
	4812	append((UChar)PIPE).append(c).append((UChar)PIPE).append(d).
	4813	append((UChar)125/}/).append(e);
	4814	} else {
	4815	appendTo.append((UnicodeString)"INVALID UTransPosition {cs=" +
	4816	pos.contextStart + ", s=" + pos.start + ", l=" +
	4817	pos.limit + ", cl=" + pos.contextLimit + "} on " +
	4818	input);
	4819	}
	4820	return appendTo;
	4821	}
	4822
	4823	void TransliteratorTest::expectAux(const UnicodeString& tag,
	4824	const UnicodeString& source,
	4825	const UnicodeString& result,
	4826	const UnicodeString& expectedResult) {
	4827	expectAux(tag, source + " -> " + result,
	4828	result == expectedResult,
	4829	expectedResult);
	4830	}
	4831
	4832	void TransliteratorTest::expectAux(const UnicodeString& tag,
	4833	const UnicodeString& summary, UBool pass,
	4834	const UnicodeString& expectedResult) {
	4835	if (pass) {
	4836	logln(UnicodeString("(")+tag+") " + prettify(summary));
	4837	} else {
	4838	dataerrln(UnicodeString("FAIL: (")+tag+") "
	4839	+ prettify(summary)
	4840	+ ", expected " + prettify(expectedResult));
	4841	}
	4842	}
	4843
	4844	#endif /* #if !UCONFIG_NO_TRANSLITERATION */