git.saurik.com Git - apple/icu.git/blame_incremental - icuSources/test/intltest/transtst.cpp

... / ...

Commit	Line	Data
	1	/*
	2	**********************************************************************
	3	* Copyright (C) 1999-2010, International Business Machines
	4	* Corporation and others. All Rights Reserved.
	5	**********************************************************************
	6	* Date Name Description
	7	* 11/10/99 aliu Creation.
	8	**********************************************************************
	9	*/
	10
	11	#include "unicode/utypes.h"
	12
	13	#if !UCONFIG_NO_TRANSLITERATION
	14
	15	#include "transtst.h"
	16	#include "unicode/locid.h"
	17	#include "unicode/dtfmtsym.h"
	18	#include "unicode/normlzr.h"
	19	#include "unicode/translit.h"
	20	#include "unicode/uchar.h"
	21	#include "unicode/unifilt.h"
	22	#include "unicode/uniset.h"
	23	#include "unicode/ustring.h"
	24	#include "unicode/usetiter.h"
	25	#include "unicode/uscript.h"
	26	#include "cpdtrans.h"
	27	#include "nultrans.h"
	28	#include "rbt.h"
	29	#include "rbt_pars.h"
	30	#include "anytrans.h"
	31	#include "esctrn.h"
	32	#include "name2uni.h"
	33	#include "nortrans.h"
	34	#include "remtrans.h"
	35	#include "titletrn.h"
	36	#include "tolowtrn.h"
	37	#include "toupptrn.h"
	38	#include "unesctrn.h"
	39	#include "uni2name.h"
	40	#include "cstring.h"
	41	#include "cmemory.h"
	42	#include <stdio.h>
	43
	44	/***********************************************************************
	45
	46	HOW TO USE THIS TEST FILE
	47	-or-
	48	How I developed on two platforms
	49	without losing (too much of) my mind
	50
	51
	52	1. Add new tests by copying/pasting/changing existing tests. On Java,
	53	any public void method named Test...() taking no parameters becomes
	54	a test. On C++, you need to modify the header and add a line to
	55	the runIndexedTest() dispatch method.
	56
	57	2. Make liberal use of the expect() method; it is your friend.
	58
	59	3. The tests in this file exactly match those in a sister file on the
	60	other side. The two files are:
	61
	62	icu4j: src/com/ibm/test/translit/TransliteratorTest.java
	63	icu4c: source/test/intltest/transtst.cpp
	64
	65	==> THIS IS THE IMPORTANT PART <==
	66
	67	When you add a test in this file, add it in TransliteratorTest.java
	68	too. Give it the same name and put it in the same relative place.
	69	This makes maintenance a lot simpler for any poor soul who ends up
	70	trying to synchronize the tests between icu4j and icu4c.
	71
	72	4. If you MUST enter a test that is NOT paralleled in the sister file,
	73	then add it in the special non-mirrored section. These are
	74	labeled
	75
	76	"icu4j ONLY"
	77
	78	or
	79
	80	"icu4c ONLY"
	81
	82	Make sure you document the reason the test is here and not there.
	83
	84
	85	Thank you.
	86	The Management
	87	***********************************************************************/
	88
	89	// Define character constants thusly to be EBCDIC-friendly
	90	enum {
	91	LEFT_BRACE=((UChar)0x007B), /{/
	92	PIPE =((UChar)0x007C), /\|/
	93	ZERO =((UChar)0x0030), /0/
	94	UPPER_A =((UChar)0x0041) /A/
	95	};
	96
	97	TransliteratorTest::TransliteratorTest()
	98	: DESERET_DEE((UChar32)0x10414),
	99	DESERET_dee((UChar32)0x1043C)
	100	{
	101	}
	102
	103	TransliteratorTest::~TransliteratorTest() {}
	104
	105	void
	106	TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
	107	const char* &name, char* /par/) {
	108	switch (index) {
	109	TESTCASE(0,TestInstantiation);
	110	TESTCASE(1,TestSimpleRules);
	111	TESTCASE(2,TestRuleBasedInverse);
	112	TESTCASE(3,TestKeyboard);
	113	TESTCASE(4,TestKeyboard2);
	114	TESTCASE(5,TestKeyboard3);
	115	TESTCASE(6,TestArabic);
	116	TESTCASE(7,TestCompoundKana);
	117	TESTCASE(8,TestCompoundHex);
	118	TESTCASE(9,TestFiltering);
	119	TESTCASE(10,TestInlineSet);
	120	TESTCASE(11,TestPatternQuoting);
	121	TESTCASE(12,TestJ277);
	122	TESTCASE(13,TestJ243);
	123	TESTCASE(14,TestJ329);
	124	TESTCASE(15,TestSegments);
	125	TESTCASE(16,TestCursorOffset);
	126	TESTCASE(17,TestArbitraryVariableValues);
	127	TESTCASE(18,TestPositionHandling);
	128	TESTCASE(19,TestHiraganaKatakana);
	129	TESTCASE(20,TestCopyJ476);
	130	TESTCASE(21,TestAnchors);
	131	TESTCASE(22,TestInterIndic);
	132	TESTCASE(23,TestFilterIDs);
	133	TESTCASE(24,TestCaseMap);
	134	TESTCASE(25,TestNameMap);
	135	TESTCASE(26,TestLiberalizedID);
	136	TESTCASE(27,TestCreateInstance);
	137	TESTCASE(28,TestNormalizationTransliterator);
	138	TESTCASE(29,TestCompoundRBT);
	139	TESTCASE(30,TestCompoundFilter);
	140	TESTCASE(31,TestRemove);
	141	TESTCASE(32,TestToRules);
	142	TESTCASE(33,TestContext);
	143	TESTCASE(34,TestSupplemental);
	144	TESTCASE(35,TestQuantifier);
	145	TESTCASE(36,TestSTV);
	146	TESTCASE(37,TestCompoundInverse);
	147	TESTCASE(38,TestNFDChainRBT);
	148	TESTCASE(39,TestNullInverse);
	149	TESTCASE(40,TestAliasInverseID);
	150	TESTCASE(41,TestCompoundInverseID);
	151	TESTCASE(42,TestUndefinedVariable);
	152	TESTCASE(43,TestEmptyContext);
	153	TESTCASE(44,TestCompoundFilterID);
	154	TESTCASE(45,TestPropertySet);
	155	TESTCASE(46,TestNewEngine);
	156	TESTCASE(47,TestQuantifiedSegment);
	157	TESTCASE(48,TestDevanagariLatinRT);
	158	TESTCASE(49,TestTeluguLatinRT);
	159	TESTCASE(50,TestCompoundLatinRT);
	160	TESTCASE(51,TestSanskritLatinRT);
	161	TESTCASE(52,TestLocaleInstantiation);
	162	TESTCASE(53,TestTitleAccents);
	163	TESTCASE(54,TestLocaleResource);
	164	TESTCASE(55,TestParseError);
	165	TESTCASE(56,TestOutputSet);
	166	TESTCASE(57,TestVariableRange);
	167	TESTCASE(58,TestInvalidPostContext);
	168	TESTCASE(59,TestIDForms);
	169	TESTCASE(60,TestToRulesMark);
	170	TESTCASE(61,TestEscape);
	171	TESTCASE(62,TestAnchorMasking);
	172	TESTCASE(63,TestDisplayName);
	173	TESTCASE(64,TestSpecialCases);
	174	#if !UCONFIG_NO_FILE_IO
	175	TESTCASE(65,TestIncrementalProgress);
	176	#endif
	177	TESTCASE(66,TestSurrogateCasing);
	178	TESTCASE(67,TestFunction);
	179	TESTCASE(68,TestInvalidBackRef);
	180	TESTCASE(69,TestMulticharStringSet);
	181	TESTCASE(70,TestUserFunction);
	182	TESTCASE(71,TestAnyX);
	183	TESTCASE(72,TestSourceTargetSet);
	184	TESTCASE(73,TestGurmukhiDevanagari);
	185	TESTCASE(74,TestRuleWhitespace);
	186	TESTCASE(75,TestAllCodepoints);
	187	TESTCASE(76,TestBoilerplate);
	188	TESTCASE(77,TestAlternateSyntax);
	189	TESTCASE(78,TestBeginEnd);
	190	TESTCASE(79,TestBeginEndToRules);
	191	TESTCASE(80,TestRegisterAlias);
	192	TESTCASE(81,TestRuleStripping);
	193	TESTCASE(82,TestHalfwidthFullwidth);
	194	TESTCASE(83,TestThai);
	195	TESTCASE(84,TestAny);
	196	default: name = ""; break;
	197	}
	198	}
	199
	200	static const UVersionInfo ICU_39 = {3,9,4,0};
	201	/**
	202	* Make sure every system transliterator can be instantiated.
	203	*
	204	* ALSO test that the result of toRules() for each rule is a valid
	205	* rule. Do this here so we don't have to have another test that
	206	* instantiates everything as well.
	207	*/
	208	void TransliteratorTest::TestInstantiation() {
	209	UErrorCode ec = U_ZERO_ERROR;
	210	StringEnumeration* avail = Transliterator::getAvailableIDs(ec);
	211	assertSuccess("getAvailableIDs()", ec);
	212	assertTrue("getAvailableIDs()!=NULL", avail!=NULL);
	213	int32_t n = Transliterator::countAvailableIDs();
	214	assertTrue("getAvailableIDs().count()==countAvailableIDs()",
	215	avail->count(ec) == n);
	216	assertSuccess("count()", ec);
	217	UnicodeString name;
	218	for (int32_t i=0; i<n; ++i) {
	219	const UnicodeString& id = *avail->snext(ec);
	220	if (!assertSuccess("snext()", ec) \|\|
	221	!assertTrue("snext()!=NULL", (&id)!=NULL, TRUE)) {
	222	break;
	223	}
	224	UnicodeString id2 = Transliterator::getAvailableID(i);
	225	if (id.length() < 1) {
	226	errln(UnicodeString("FAIL: getAvailableID(") +
	227	i + ") returned empty string");
	228	continue;
	229	}
	230	if (id != id2) {
	231	errln(UnicodeString("FAIL: getAvailableID(") +
	232	i + ") != getAvailableIDs().snext()");
	233	continue;
	234	}
	235	UParseError parseError;
	236	UErrorCode status = U_ZERO_ERROR;
	237	Transliterator* t = Transliterator::createInstance(id,
	238	UTRANS_FORWARD, parseError,status);
	239	name.truncate(0);
	240	Transliterator::getDisplayName(id, name);
	241	if (t == 0) {
	242	#if UCONFIG_NO_BREAK_ITERATION
	243	// If UCONFIG_NO_BREAK_ITERATION is on, then only Thai should fail.
	244	if (id.compare((UnicodeString)"Thai-Latin") != 0)
	245	#endif
	246	dataerrln(UnicodeString("FAIL: Couldn't create ") + id +
	247	/", parse error " + parseError.code +/
	248	", line " + parseError.line +
	249	", offset " + parseError.offset +
	250	", pre-context " + prettify(parseError.preContext, TRUE) +
	251	", post-context " +prettify(parseError.postContext,TRUE) +
	252	", Error: " + u_errorName(status));
	253	// When createInstance fails, it deletes the failing
	254	// entry from the available ID list. We detect this
	255	// here by looking for a change in countAvailableIDs.
	256	int32_t nn = Transliterator::countAvailableIDs();
	257	if (nn == (n - 1)) {
	258	n = nn;
	259	--i; // Compensate for deleted entry
	260	}
	261	} else {
	262	logln(UnicodeString("OK: ") + name + " (" + id + ")");
	263
	264	// Now test toRules
	265	UnicodeString rules;
	266	t->toRules(rules, TRUE);
	267	Transliterator *u = Transliterator::createFromRules("x",
	268	rules, UTRANS_FORWARD, parseError,status);
	269	if (u == 0) {
	270	errln(UnicodeString("FAIL: ") + id +
	271	".createFromRules() => bad rules" +
	272	/", parse error " + parseError.code +/
	273	", line " + parseError.line +
	274	", offset " + parseError.offset +
	275	", context " + prettify(parseError.preContext, TRUE) +
	276	", rules: " + prettify(rules, TRUE));
	277	} else {
	278	delete u;
	279	}
	280	delete t;
	281	}
	282	}
	283	assertTrue("snext()==NULL", avail->snext(ec)==NULL);
	284	assertSuccess("snext()", ec);
	285	delete avail;
	286
	287	// Now test the failure path
	288	UParseError parseError;
	289	UErrorCode status = U_ZERO_ERROR;
	290	UnicodeString id("<Not a valid Transliterator ID>");
	291	Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
	292	if (t != 0) {
	293	errln("FAIL: " + id + " returned a transliterator");
	294	delete t;
	295	} else {
	296	logln("OK: Bogus ID handled properly");
	297	}
	298	}
	299
	300	void TransliteratorTest::TestSimpleRules(void) {
	301	/* Example: rules 1. ab>x\|y
	302	* 2. yc>z
	303	*
	304	* []\|eabcd start - no match, copy e to tranlated buffer
	305	* [e]\|abcd match rule 1 - copy output & adjust cursor
	306	* [ex\|y]cd match rule 2 - copy output & adjust cursor
	307	* [exz]\|d no match, copy d to transliterated buffer
	308	* [exzd]\| done
	309	*/
	310	expect(UnicodeString("ab>x\|y;", "") +
	311	"yc>z",
	312	"eabcd", "exzd");
	313
	314	/* Another set of rules:
	315	* 1. ab>x\|yzacw
	316	* 2. za>q
	317	* 3. qc>r
	318	* 4. cw>n
	319	*
	320	* []\|ab Rule 1
	321	* [x\|yzacw] No match
	322	* [xy\|zacw] Rule 2
	323	* [xyq\|cw] Rule 4
	324	* [xyqn]\| Done
	325	*/
	326	expect(UnicodeString("ab>x\|yzacw;") +
	327	"za>q;" +
	328	"qc>r;" +
	329	"cw>n",
	330	"ab", "xyqn");
	331
	332	/* Test categories
	333	*/
	334	UErrorCode status = U_ZERO_ERROR;
	335	UParseError parseError;
	336	Transliterator *t = Transliterator::createFromRules(
	337	"<ID>",
	338	UnicodeString("$dummy=").append((UChar)0xE100) +
	339	UnicodeString(";"
	340	"$vowel=[aeiouAEIOU];"
	341	"$lu=[:Lu:];"
	342	"$vowel } $lu > '!';"
	343	"$vowel > '&';"
	344	"'!' { $lu > '^';"
	345	"$lu > '*';"
	346	"a > ERROR", ""),
	347	UTRANS_FORWARD, parseError,
	348	status);
	349	if (U_FAILURE(status)) {
	350	dataerrln("FAIL: RBT constructor failed - %s", u_errorName(status));
	351	return;
	352	}
	353	expect(t, "abcdefgABCDEFGU", "&bcd&fg!^!^&");
	354	delete t;
	355	}
	356
	357	/**
	358	* Test inline set syntax and set variable syntax.
	359	*/
	360	void TransliteratorTest::TestInlineSet(void) {
	361	expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz");
	362	expect("a[0-9]b > qrs", "1a7b9", "1qrs9");
	363
	364	expect(UnicodeString(
	365	"$digit = [0-9];"
	366	"$alpha = [a-zA-Z];"
	367	"$alphanumeric = [$digit $alpha];" // ***
	368	"$special = [^$alphanumeric];" // ***
	369	"$alphanumeric > '-';"
	370	"$special > '*';", ""),
	371
	372	"thx-1138", "---*----");
	373	}
	374
	375	/**
	376	* Create some inverses and confirm that they work. We have to be
	377	* careful how we do this, since the inverses will not be true
	378	* inverses -- we can't throw any random string at the composition
	379	* of the transliterators and expect the identity function. F x
	380	* F' != I. However, if we are careful about the input, we will
	381	* get the expected results.
	382	*/
	383	void TransliteratorTest::TestRuleBasedInverse(void) {
	384	UnicodeString RULES =
	385	UnicodeString("abc>zyx;") +
	386	"ab>yz;" +
	387	"bc>zx;" +
	388	"ca>xy;" +
	389	"a>x;" +
	390	"b>y;" +
	391	"c>z;" +
	392
	393	"abc<zyx;" +
	394	"ab<yz;" +
	395	"bc<zx;" +
	396	"ca<xy;" +
	397	"a<x;" +
	398	"b<y;" +
	399	"c<z;" +
	400
	401	"";
	402
	403	const char* DATA[] = {
	404	// Careful here -- random strings will not work. If we keep
	405	// the left side to the domain and the right side to the range
	406	// we will be okay though (left, abc; right xyz).
	407	"a", "x",
	408	"abcacab", "zyxxxyy",
	409	"caccb", "xyzzy",
	410	};
	411
	412	int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
	413
	414	UErrorCode status = U_ZERO_ERROR;
	415	UParseError parseError;
	416	Transliterator *fwd = Transliterator::createFromRules("<ID>", RULES,
	417	UTRANS_FORWARD, parseError, status);
	418	Transliterator *rev = Transliterator::createFromRules("<ID>", RULES,
	419	UTRANS_REVERSE, parseError, status);
	420	if (U_FAILURE(status)) {
	421	errln("FAIL: RBT constructor failed");
	422	return;
	423	}
	424	for (int32_t i=0; i<DATA_length; i+=2) {
	425	expect(*fwd, DATA[i], DATA[i+1]);
	426	expect(*rev, DATA[i+1], DATA[i]);
	427	}
	428	delete fwd;
	429	delete rev;
	430	}
	431
	432	/**
	433	* Basic test of keyboard.
	434	*/
	435	void TransliteratorTest::TestKeyboard(void) {
	436	UParseError parseError;
	437	UErrorCode status = U_ZERO_ERROR;
	438	Transliterator *t = Transliterator::createFromRules("<ID>",
	439	UnicodeString("psch>Y;")
	440	+"ps>y;"
	441	+"ch>x;"
	442	+"a>A;",
	443	UTRANS_FORWARD, parseError,
	444	status);
	445	if (U_FAILURE(status)) {
	446	errln("FAIL: RBT constructor failed");
	447	return;
	448	}
	449	const char* DATA[] = {
	450	// insertion, buffer
	451	"a", "A",
	452	"p", "Ap",
	453	"s", "Aps",
	454	"c", "Apsc",
	455	"a", "AycA",
	456	"psch", "AycAY",
	457	0, "AycAY", // null means finishKeyboardTransliteration
	458	};
	459
	460	keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
	461	delete t;
	462	}
	463
	464	/**
	465	* Basic test of keyboard with cursor.
	466	*/
	467	void TransliteratorTest::TestKeyboard2(void) {
	468	UParseError parseError;
	469	UErrorCode status = U_ZERO_ERROR;
	470	Transliterator *t = Transliterator::createFromRules("<ID>",
	471	UnicodeString("ych>Y;")
	472	+"ps>\|y;"
	473	+"ch>x;"
	474	+"a>A;",
	475	UTRANS_FORWARD, parseError,
	476	status);
	477	if (U_FAILURE(status)) {
	478	errln("FAIL: RBT constructor failed");
	479	return;
	480	}
	481	const char* DATA[] = {
	482	// insertion, buffer
	483	"a", "A",
	484	"p", "Ap",
	485	"s", "Aps", // modified for rollback - "Ay",
	486	"c", "Apsc", // modified for rollback - "Ayc",
	487	"a", "AycA",
	488	"p", "AycAp",
	489	"s", "AycAps", // modified for rollback - "AycAy",
	490	"c", "AycApsc", // modified for rollback - "AycAyc",
	491	"h", "AycAY",
	492	0, "AycAY", // null means finishKeyboardTransliteration
	493	};
	494
	495	keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
	496	delete t;
	497	}
	498
	499	/**
	500	* Test keyboard transliteration with back-replacement.
	501	*/
	502	void TransliteratorTest::TestKeyboard3(void) {
	503	// We want th>z but t>y. Furthermore, during keyboard
	504	// transliteration we want t>y then yh>z if t, then h are
	505	// typed.
	506	UnicodeString RULES("t>\|y;"
	507	"yh>z;");
	508
	509	const char* DATA[] = {
	510	// Column 1: characters to add to buffer (as if typed)
	511	// Column 2: expected appearance of buffer after
	512	// keyboard xliteration.
	513	"a", "a",
	514	"b", "ab",
	515	"t", "abt", // modified for rollback - "aby",
	516	"c", "abyc",
	517	"t", "abyct", // modified for rollback - "abycy",
	518	"h", "abycz",
	519	0, "abycz", // null means finishKeyboardTransliteration
	520	};
	521
	522	UParseError parseError;
	523	UErrorCode status = U_ZERO_ERROR;
	524	Transliterator *t = Transliterator::createFromRules("<ID>", RULES, UTRANS_FORWARD, parseError, status);
	525	if (U_FAILURE(status)) {
	526	errln("FAIL: RBT constructor failed");
	527	return;
	528	}
	529	keyboardAux(*t, DATA, (int32_t)(sizeof(DATA)/sizeof(DATA[0])));
	530	delete t;
	531	}
	532
	533	void TransliteratorTest::keyboardAux(const Transliterator& t,
	534	const char* DATA[], int32_t DATA_length) {
	535	UErrorCode status = U_ZERO_ERROR;
	536	UTransPosition index={0, 0, 0, 0};
	537	UnicodeString s;
	538	for (int32_t i=0; i<DATA_length; i+=2) {
	539	UnicodeString log;
	540	if (DATA[i] != 0) {
	541	log = s + " + "
	542	+ DATA[i]
	543	+ " -> ";
	544	t.transliterate(s, index, DATA[i], status);
	545	} else {
	546	log = s + " => ";
	547	t.finishTransliteration(s, index);
	548	}
	549	// Show the start index '{' and the cursor '\|'
	550	UnicodeString a, b, c;
	551	s.extractBetween(0, index.contextStart, a);
	552	s.extractBetween(index.contextStart, index.start, b);
	553	s.extractBetween(index.start, s.length(), c);
	554	log.append(a).
	555	append((UChar)LEFT_BRACE).
	556	append(b).
	557	append((UChar)PIPE).
	558	append(c);
	559	if (s == DATA[i+1] && U_SUCCESS(status)) {
	560	logln(log);
	561	} else {
	562	errln(UnicodeString("FAIL: ") + log + ", expected " + DATA[i+1]);
	563	}
	564	}
	565	}
	566
	567	void TransliteratorTest::TestArabic(void) {
	568	// Test disabled for 2.0 until new Arabic transliterator can be written.
	569	// /*
	570	// const char* DATA[] = {
	571	// "Arabic", "\u062a\u062a\u0645\u062a\u0639\u0020"+
	572	// "\u0627\u0644\u0644\u063a\u0629\u0020"+
	573	// "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629\u0020"+
	574	// "\u0628\u0628\u0646\u0638\u0645\u0020"+
	575	// "\u0643\u062a\u0627\u0628\u0628\u064a\u0629\u0020"+
	576	// "\u062c\u0645\u064a\u0644\u0629",
	577	// };
	578	// */
	579	//
	580	// UChar ar_raw[] = {
	581	// 0x062a, 0x062a, 0x0645, 0x062a, 0x0639, 0x0020, 0x0627,
	582	// 0x0644, 0x0644, 0x063a, 0x0629, 0x0020, 0x0627, 0x0644,
	583	// 0x0639, 0x0631, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
	584	// 0x0628, 0x0628, 0x0646, 0x0638, 0x0645, 0x0020, 0x0643,
	585	// 0x062a, 0x0627, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
	586	// 0x062c, 0x0645, 0x064a, 0x0644, 0x0629, 0
	587	// };
	588	// UnicodeString ar(ar_raw);
	589	// UErrorCode status=U_ZERO_ERROR;
	590	// UParseError parseError;
	591	// Transliterator *t = Transliterator::createInstance("Latin-Arabic", UTRANS_FORWARD, parseError, status);
	592	// if (t == 0) {
	593	// errln("FAIL: createInstance failed");
	594	// return;
	595	// }
	596	// expect(*t, "Arabic", ar);
	597	// delete t;
	598	}
	599
	600	/**
	601	* Compose the Kana transliterator forward and reverse and try
	602	* some strings that should come out unchanged.
	603	*/
	604	void TransliteratorTest::TestCompoundKana(void) {
	605	UParseError parseError;
	606	UErrorCode status = U_ZERO_ERROR;
	607	Transliterator* t = Transliterator::createInstance("Latin-Hiragana;Hiragana-Latin", UTRANS_FORWARD, parseError, status);
	608	if (t == 0) {
	609	dataerrln("FAIL: construction of Latin-Hiragana;Hiragana-Latin failed - %s", u_errorName(status));
	610	} else {
	611	expect(*t, "aaaaa", "aaaaa");
	612	delete t;
	613	}
	614	}
	615
	616	/**
	617	* Compose the hex transliterators forward and reverse.
	618	*/
	619	void TransliteratorTest::TestCompoundHex(void) {
	620	UParseError parseError;
	621	UErrorCode status = U_ZERO_ERROR;
	622	Transliterator* a = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
	623	Transliterator* b = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, parseError, status);
	624	Transliterator* transab[] = { a, b };
	625	Transliterator* transba[] = { b, a };
	626	if (a == 0 \|\| b == 0) {
	627	errln("FAIL: construction failed");
	628	delete a;
	629	delete b;
	630	return;
	631	}
	632	// Do some basic tests of a
	633	expect(*a, "01", UnicodeString("\\u0030\\u0031", ""));
	634	// Do some basic tests of b
	635	expect(*b, UnicodeString("\\u0030\\u0031", ""), "01");
	636
	637	Transliterator* ab = new CompoundTransliterator(transab, 2);
	638	UnicodeString s("abcde", "");
	639	expect(*ab, s, s);
	640
	641	UnicodeString str(s);
	642	a->transliterate(str);
	643	Transliterator* ba = new CompoundTransliterator(transba, 2);
	644	expect(*ba, str, str);
	645
	646	delete ab;
	647	delete ba;
	648	delete a;
	649	delete b;
	650	}
	651
	652	int gTestFilterClassID = 0;
	653	/**
	654	* Used by TestFiltering().
	655	*/
	656	class TestFilter : public UnicodeFilter {
	657	virtual UnicodeFunctor* clone() const {
	658	return new TestFilter(*this);
	659	}
	660	virtual UBool contains(UChar32 c) const {
	661	return c != (UChar)0x0063 /c/;
	662	}
	663	// Stubs
	664	virtual UnicodeString& toPattern(UnicodeString& result,
	665	UBool /escapeUnprintable/) const {
	666	return result;
	667	}
	668	virtual UBool matchesIndexValue(uint8_t /v/) const {
	669	return FALSE;
	670	}
	671	virtual void addMatchSetTo(UnicodeSet& /toUnionTo/) const {}
	672	public:
	673	UClassID getDynamicClassID() const { return (UClassID)&gTestFilterClassID; }
	674	};
	675
	676	/**
	677	* Do some basic tests of filtering.
	678	*/
	679	void TransliteratorTest::TestFiltering(void) {
	680	UParseError parseError;
	681	UErrorCode status = U_ZERO_ERROR;
	682	Transliterator* hex = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
	683	if (hex == 0) {
	684	errln("FAIL: createInstance(Any-Hex) failed");
	685	return;
	686	}
	687	hex->adoptFilter(new TestFilter());
	688	UnicodeString s("abcde");
	689	hex->transliterate(s);
	690	UnicodeString exp("\\u0061\\u0062c\\u0064\\u0065", "");
	691	if (s == exp) {
	692	logln(UnicodeString("Ok: \"") + exp + "\"");
	693	} else {
	694	logln(UnicodeString("FAIL: \"") + s + "\", wanted \"" + exp + "\"");
	695	}
	696
	697	// ICU4C ONLY. Do not find Transliterator.orphanFilter() in ICU4J.
	698	UnicodeFilter *f = hex->orphanFilter();
	699	if (f == NULL){
	700	errln("FAIL: orphanFilter() should get a UnicodeFilter");
	701	} else {
	702	delete f;
	703	}
	704	delete hex;
	705	}
	706
	707	/**
	708	* Test anchors
	709	*/
	710	void TransliteratorTest::TestAnchors(void) {
	711	expect(UnicodeString("^a > 0; a$ > 2 ; a > 1;", ""),
	712	"aaa",
	713	"012");
	714	expect(UnicodeString("$s=[z$]; $s{a>0; a}$s>2; a>1;", ""),
	715	"aaa",
	716	"012");
	717	expect(UnicodeString("^ab > 01 ;"
	718	" ab > \|8 ;"
	719	" b > k ;"
	720	" 8x$ > 45 ;"
	721	" 8x > 77 ;", ""),
	722
	723	"ababbabxabx",
	724	"018k7745");
	725	expect(UnicodeString("$s = [z$] ;"
	726	"$s{ab > 01 ;"
	727	" ab > \|8 ;"
	728	" b > k ;"
	729	" 8x}$s > 45 ;"
	730	" 8x > 77 ;", ""),
	731
	732	"abzababbabxzabxabx",
	733	"01z018k45z01x45");
	734	}
	735
	736	/**
	737	* Test pattern quoting and escape mechanisms.
	738	*/
	739	void TransliteratorTest::TestPatternQuoting(void) {
	740	// Array of 3n items
	741	// Each item is <rules>, <input>, <expected output>
	742	const UnicodeString DATA[] = {
	743	UnicodeString(UChar(0x4E01)) + ">'[male adult]'",
	744	UnicodeString(UChar(0x4E01)),
	745	"[male adult]"
	746	};
	747
	748	for (int32_t i=0; i<3; i+=3) {
	749	logln(UnicodeString("Pattern: ") + prettify(DATA[i]));
	750	UParseError parseError;
	751	UErrorCode status = U_ZERO_ERROR;
	752	Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
	753	if (U_FAILURE(status)) {
	754	errln("RBT constructor failed");
	755	} else {
	756	expect(*t, DATA[i+1], DATA[i+2]);
	757	}
	758	delete t;
	759	}
	760	}
	761
	762	/**
	763	* Regression test for bugs found in Greek transliteration.
	764	*/
	765	void TransliteratorTest::TestJ277(void) {
	766	UErrorCode status = U_ZERO_ERROR;
	767	UParseError parseError;
	768	Transliterator *gl = Transliterator::createInstance("Greek-Latin; NFD; [:M:]Remove; NFC", UTRANS_FORWARD, parseError, status);
	769	if (gl == NULL) {
	770	dataerrln("FAIL: createInstance(Greek-Latin) returned NULL - %s", u_errorName(status));
	771	return;
	772	}
	773
	774	UChar sigma = 0x3C3;
	775	UChar upsilon = 0x3C5;
	776	UChar nu = 0x3BD;
	777	// UChar PHI = 0x3A6;
	778	UChar alpha = 0x3B1;
	779	// UChar omega = 0x3C9;
	780	// UChar omicron = 0x3BF;
	781	// UChar epsilon = 0x3B5;
	782
	783	// sigma upsilon nu -> syn
	784	UnicodeString syn;
	785	syn.append(sigma).append(upsilon).append(nu);
	786	expect(*gl, syn, "syn");
	787
	788	// sigma alpha upsilon nu -> saun
	789	UnicodeString sayn;
	790	sayn.append(sigma).append(alpha).append(upsilon).append(nu);
	791	expect(*gl, sayn, "saun");
	792
	793	// Again, using a smaller rule set
	794	UnicodeString rules(
	795	"$alpha = \\u03B1;"
	796	"$nu = \\u03BD;"
	797	"$sigma = \\u03C3;"
	798	"$ypsilon = \\u03C5;"
	799	"$vowel = [aeiouAEIOU$alpha$ypsilon];"
	800	"s <> $sigma;"
	801	"a <> $alpha;"
	802	"u <> $vowel { $ypsilon;"
	803	"y <> $ypsilon;"
	804	"n <> $nu;",
	805	"");
	806	Transliterator *mini = Transliterator::createFromRules("mini", rules, UTRANS_REVERSE, parseError, status);
	807	if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
	808	expect(*mini, syn, "syn");
	809	expect(*mini, sayn, "saun");
	810	delete mini;
	811	mini = NULL;
	812
	813	#if !UCONFIG_NO_FORMATTING
	814	// Transliterate the Greek locale data
	815	Locale el("el");
	816	DateFormatSymbols syms(el, status);
	817	if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
	818	int32_t i, count;
	819	const UnicodeString* data = syms.getMonths(count);
	820	for (i=0; i<count; ++i) {
	821	if (data[i].length() == 0) {
	822	continue;
	823	}
	824	UnicodeString out(data[i]);
	825	gl->transliterate(out);
	826	UBool ok = TRUE;
	827	if (data[i].length() >= 2 && out.length() >= 2 &&
	828	u_isupper(data[i].charAt(0)) && u_islower(data[i].charAt(1))) {
	829	if (!(u_isupper(out.charAt(0)) && u_islower(out.charAt(1)))) {
	830	ok = FALSE;
	831	}
	832	}
	833	if (ok) {
	834	logln(prettify(data[i] + " -> " + out));
	835	} else {
	836	errln(UnicodeString("FAIL: ") + prettify(data[i] + " -> " + out));
	837	}
	838	}
	839	#endif
	840
	841	delete gl;
	842	}
	843
	844	/**
	845	* Prefix, suffix support in hex transliterators
	846	*/
	847	void TransliteratorTest::TestJ243(void) {
	848	UErrorCode ec = U_ZERO_ERROR;
	849
	850	// Test default Hex-Any, which should handle
	851	// \u, \U, u+, and U+
	852	Transliterator *hex =
	853	Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, ec);
	854	if (assertSuccess("getInstance", ec)) {
	855	expect(*hex, UnicodeString("\\u0041+\\U00000042,U+0043uU+0044z", ""), "A+B,CuDz");
	856	}
	857	delete hex;
	858
	859	// // Try a custom Hex-Unicode
	860	// // \uXXXX and &#xXXXX;
	861	// ec = U_ZERO_ERROR;
	862	// HexToUnicodeTransliterator hex2(UnicodeString("\\\\u###0;&\\#x###0\\;", ""), ec);
	863	// expect(hex2, UnicodeString("\\u61\\u062\\u0063\\u00645\\u66x0123", ""),
	864	// "abcd5fx0123");
	865	// // Try custom Any-Hex (default is tested elsewhere)
	866	// ec = U_ZERO_ERROR;
	867	// UnicodeToHexTransliterator hex3(UnicodeString("&\\#x###0;", ""), ec);
	868	// expect(hex3, "012", "012");
	869	}
	870
	871	/**
	872	* Parsers need better syntax error messages.
	873	*/
	874	void TransliteratorTest::TestJ329(void) {
	875
	876	struct { UBool containsErrors; const char* rule; } DATA[] = {
	877	{ FALSE, "a > b; c > d" },
	878	{ TRUE, "a > b; no operator; c > d" },
	879	};
	880	int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
	881
	882	for (int32_t i=0; i<DATA_length; ++i) {
	883	UErrorCode status = U_ZERO_ERROR;
	884	UParseError parseError;
	885	Transliterator *rbt = Transliterator::createFromRules("<ID>",
	886	DATA[i].rule,
	887	UTRANS_FORWARD,
	888	parseError,
	889	status);
	890	UBool gotError = U_FAILURE(status);
	891	UnicodeString desc(DATA[i].rule);
	892	desc.append(gotError ? " -> error" : " -> no error");
	893	if (gotError) {
	894	desc = desc + ", ParseError code=" + u_errorName(status) +
	895	" line=" + parseError.line +
	896	" offset=" + parseError.offset +
	897	" context=" + parseError.preContext;
	898	}
	899	if (gotError == DATA[i].containsErrors) {
	900	logln(UnicodeString("Ok: ") + desc);
	901	} else {
	902	errln(UnicodeString("FAIL: ") + desc);
	903	}
	904	delete rbt;
	905	}
	906	}
	907
	908	/**
	909	* Test segments and segment references.
	910	*/
	911	void TransliteratorTest::TestSegments(void) {
	912	// Array of 3n items
	913	// Each item is <rules>, <input>, <expected output>
	914	UnicodeString DATA[] = {
	915	"([a-z]) '.' ([0-9]) > $2 '-' $1",
	916	"abc.123.xyz.456",
	917	"ab1-c23.xy4-z56",
	918
	919	// nested
	920	"(([a-z])([0-9])) > $1 '.' $2 '.' $3;",
	921	"a1 b2",
	922	"a1.a.1 b2.b.2",
	923	};
	924	int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
	925
	926	for (int32_t i=0; i<DATA_length; i+=3) {
	927	logln("Pattern: " + prettify(DATA[i]));
	928	UParseError parseError;
	929	UErrorCode status = U_ZERO_ERROR;
	930	Transliterator *t = Transliterator::createFromRules("ID", DATA[i], UTRANS_FORWARD, parseError, status);
	931	if (U_FAILURE(status)) {
	932	errln("FAIL: RBT constructor");
	933	} else {
	934	expect(*t, DATA[i+1], DATA[i+2]);
	935	}
	936	delete t;
	937	}
	938	}
	939
	940	/**
	941	* Test cursor positioning outside of the key
	942	*/
	943	void TransliteratorTest::TestCursorOffset(void) {
	944	// Array of 3n items
	945	// Each item is <rules>, <input>, <expected output>
	946	UnicodeString DATA[] = {
	947	"pre {alpha} post > \| @ ALPHA ;"
	948	"eALPHA > beta ;"
	949	"pre {beta} post > BETA @@ \| ;"
	950	"post > xyz",
	951
	952	"prealphapost prebetapost",
	953
	954	"prbetaxyz preBETApost",
	955	};
	956	int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
	957
	958	for (int32_t i=0; i<DATA_length; i+=3) {
	959	logln("Pattern: " + prettify(DATA[i]));
	960	UParseError parseError;
	961	UErrorCode status = U_ZERO_ERROR;
	962	Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
	963	if (U_FAILURE(status)) {
	964	errln("FAIL: RBT constructor");
	965	} else {
	966	expect(*t, DATA[i+1], DATA[i+2]);
	967	}
	968	delete t;
	969	}
	970	}
	971
	972	/**
	973	* Test zero length and > 1 char length variable values. Test
	974	* use of variable refs in UnicodeSets.
	975	*/
	976	void TransliteratorTest::TestArbitraryVariableValues(void) {
	977	// Array of 3n items
	978	// Each item is <rules>, <input>, <expected output>
	979	UnicodeString DATA[] = {
	980	"$abe = ab;"
	981	"$pat = x[yY]z;"
	982	"$ll = 'a-z';"
	983	"$llZ = [$ll];"
	984	"$llY = [$ll$pat];"
	985	"$emp = ;"
	986
	987	"$abe > ABE;"
	988	"$pat > END;"
	989	"$llZ > 1;"
	990	"$llY > 2;"
	991	"7$emp 8 > 9;"
	992	"",
	993
	994	"ab xYzxyz stY78",
	995	"ABE ENDEND 1129",
	996	};
	997	int32_t DATA_length = (int32_t)(sizeof(DATA)/sizeof(*DATA));
	998
	999	for (int32_t i=0; i<DATA_length; i+=3) {
	1000	logln("Pattern: " + prettify(DATA[i]));
	1001	UParseError parseError;
	1002	UErrorCode status = U_ZERO_ERROR;
	1003	Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
	1004	if (U_FAILURE(status)) {
	1005	errln("FAIL: RBT constructor");
	1006	} else {
	1007	expect(*t, DATA[i+1], DATA[i+2]);
	1008	}
	1009	delete t;
	1010	}
	1011	}
	1012
	1013	/**
	1014	* Confirm that the contextStart, contextLimit, start, and limit
	1015	* behave correctly. J474.
	1016	*/
	1017	void TransliteratorTest::TestPositionHandling(void) {
	1018	// Array of 3n items
	1019	// Each item is <rules>, <input>, <expected output>
	1020	const char* DATA[] = {
	1021	"a{t} > SS ; {t}b > UU ; {t} > TT ;",
	1022	"xtat txtb", // pos 0,9,0,9
	1023	"xTTaSS TTxUUb",
	1024
	1025	"a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
	1026	"xtat txtb", // pos 2,9,3,8
	1027	"xtaSS TTxUUb",
	1028
	1029	"a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
	1030	"xtat txtb", // pos 3,8,3,8
	1031	"xtaTT TTxTTb",
	1032	};
	1033
	1034	// Array of 4n positions -- these go with the DATA array
	1035	// They are: contextStart, contextLimit, start, limit
	1036	int32_t POS[] = {
	1037	0, 9, 0, 9,
	1038	2, 9, 3, 8,
	1039	3, 8, 3, 8,
	1040	};
	1041
	1042	int32_t n = (int32_t)(sizeof(DATA) / sizeof(DATA[0])) / 3;
	1043	for (int32_t i=0; i<n; i++) {
	1044	UErrorCode status = U_ZERO_ERROR;
	1045	UParseError parseError;
	1046	Transliterator *t = Transliterator::createFromRules("<ID>",
	1047	DATA[3*i], UTRANS_FORWARD, parseError, status);
	1048	if (U_FAILURE(status)) {
	1049	delete t;
	1050	errln("FAIL: RBT constructor");
	1051	return;
	1052	}
	1053	UTransPosition pos;
	1054	pos.contextStart= POS[4*i];
	1055	pos.contextLimit = POS[4*i+1];
	1056	pos.start = POS[4*i+2];
	1057	pos.limit = POS[4*i+3];
	1058	UnicodeString rsource(DATA[3*i+1]);
	1059	t->transliterate(rsource, pos, status);
	1060	if (U_FAILURE(status)) {
	1061	delete t;
	1062	errln("FAIL: transliterate");
	1063	return;
	1064	}
	1065	t->finishTransliteration(rsource, pos);
	1066	expectAux(DATA[3*i],
	1067	DATA[3*i+1],
	1068	rsource,
	1069	DATA[3*i+2]);
	1070	delete t;
	1071	}
	1072	}
	1073
	1074	/**
	1075	* Test the Hiragana-Katakana transliterator.
	1076	*/
	1077	void TransliteratorTest::TestHiraganaKatakana(void) {
	1078	UParseError parseError;
	1079	UErrorCode status = U_ZERO_ERROR;
	1080	Transliterator* hk = Transliterator::createInstance("Hiragana-Katakana", UTRANS_FORWARD, parseError, status);
	1081	Transliterator* kh = Transliterator::createInstance("Katakana-Hiragana", UTRANS_FORWARD, parseError, status);
	1082	if (hk == 0 \|\| kh == 0) {
	1083	dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
	1084	delete hk;
	1085	delete kh;
	1086	return;
	1087	}
	1088
	1089	// Array of 3n items
	1090	// Each item is "hk"\|"kh"\|"both", <Hiragana>, <Katakana>
	1091	const char* DATA[] = {
	1092	"both",
	1093	"\\u3042\\u3090\\u3099\\u3092\\u3050",
	1094	"\\u30A2\\u30F8\\u30F2\\u30B0",
	1095
	1096	"kh",
	1097	"\\u307C\\u3051\\u3060\\u3042\\u3093\\u30FC",
	1098	"\\u30DC\\u30F6\\u30C0\\u30FC\\u30F3\\u30FC",
	1099	};
	1100	int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
	1101
	1102	for (int32_t i=0; i<DATA_length; i+=3) {
	1103	UnicodeString h = CharsToUnicodeString(DATA[i+1]);
	1104	UnicodeString k = CharsToUnicodeString(DATA[i+2]);
	1105	switch (*DATA[i]) {
	1106	case 0x68: //'h': // Hiragana-Katakana
	1107	expect(*hk, h, k);
	1108	break;
	1109	case 0x6B: //'k': // Katakana-Hiragana
	1110	expect(*kh, k, h);
	1111	break;
	1112	case 0x62: //'b': // both
	1113	expect(*hk, h, k);
	1114	expect(*kh, k, h);
	1115	break;
	1116	}
	1117	}
	1118	delete hk;
	1119	delete kh;
	1120	}
	1121
	1122	/**
	1123	* Test cloning / copy constructor of RBT.
	1124	*/
	1125	void TransliteratorTest::TestCopyJ476(void) {
	1126	// The real test here is what happens when the destructors are
	1127	// called. So we let one object get destructed, and check to
	1128	// see that its copy still works.
	1129	Transliterator *t2 = 0;
	1130	{
	1131	UParseError parseError;
	1132	UErrorCode status = U_ZERO_ERROR;
	1133	Transliterator *t1 = Transliterator::createFromRules("t1",
	1134	"a>A;b>B;'foo'+>'bar'", UTRANS_FORWARD, parseError, status);
	1135	if (U_FAILURE(status)) {
	1136	errln("FAIL: RBT constructor");
	1137	return;
	1138	}
	1139	t2 = t1->clone(); // Call copy constructor under the covers.
	1140	expect(*t1, "abcfoofoo", "ABcbar");
	1141	delete t1;
	1142	}
	1143	expect(*t2, "abcfoofoo", "ABcbar");
	1144	delete t2;
	1145	}
	1146
	1147	/**
	1148	* Test inter-Indic transliterators. These are composed.
	1149	* ICU4C Jitterbug 483.
	1150	*/
	1151	void TransliteratorTest::TestInterIndic(void) {
	1152	UnicodeString ID("Devanagari-Gujarati", "");
	1153	UErrorCode status = U_ZERO_ERROR;
	1154	UParseError parseError;
	1155	Transliterator* dg = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
	1156	if (dg == 0) {
	1157	dataerrln("FAIL: createInstance(" + ID + ") returned NULL - " + u_errorName(status));
	1158	return;
	1159	}
	1160	UnicodeString id = dg->getID();
	1161	if (id != ID) {
	1162	errln("FAIL: createInstance(" + ID + ")->getID() => " + id);
	1163	}
	1164	UnicodeString dev = CharsToUnicodeString("\\u0901\\u090B\\u0925");
	1165	UnicodeString guj = CharsToUnicodeString("\\u0A81\\u0A8B\\u0AA5");
	1166	expect(*dg, dev, guj);
	1167	delete dg;
	1168	}
	1169
	1170	/**
	1171	* Test filter syntax in IDs. (J918)
	1172	*/
	1173	void TransliteratorTest::TestFilterIDs(void) {
	1174	// Array of 3n strings:
	1175	// <id>, <inverse id>, <input>, <expected output>
	1176	const char* DATA[] = {
	1177	"[aeiou]Any-Hex", // ID
	1178	"[aeiou]Hex-Any", // expected inverse ID
	1179	"quizzical", // src
	1180	"q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src)
	1181
	1182	"[aeiou]Any-Hex;[^5]Hex-Any",
	1183	"[^5]Any-Hex;[aeiou]Hex-Any",
	1184	"quizzical",
	1185	"q\\u0075izzical",
	1186
	1187	"[abc]Null",
	1188	"[abc]Null",
	1189	"xyz",
	1190	"xyz",
	1191	};
	1192	enum { DATA_length = sizeof(DATA) / sizeof(DATA[0]) };
	1193
	1194	for (int i=0; i<DATA_length; i+=4) {
	1195	UnicodeString ID(DATA[i], "");
	1196	UnicodeString uID(DATA[i+1], "");
	1197	UnicodeString data2(DATA[i+2], "");
	1198	UnicodeString data3(DATA[i+3], "");
	1199	UParseError parseError;
	1200	UErrorCode status = U_ZERO_ERROR;
	1201	Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
	1202	if (t == 0) {
	1203	errln("FAIL: createInstance(" + ID + ") returned NULL");
	1204	return;
	1205	}
	1206	expect(*t, data2, data3);
	1207
	1208	// Check the ID
	1209	if (ID != t->getID()) {
	1210	errln("FAIL: createInstance(" + ID + ").getID() => " +
	1211	t->getID());
	1212	}
	1213
	1214	// Check the inverse
	1215	Transliterator *u = t->createInverse(status);
	1216	if (u == 0) {
	1217	errln("FAIL: " + ID + ".createInverse() returned NULL");
	1218	} else if (u->getID() != uID) {
	1219	errln("FAIL: " + ID + ".createInverse().getID() => " +
	1220	u->getID() + ", expected " + uID);
	1221	}
	1222
	1223	delete t;
	1224	delete u;
	1225	}
	1226	}
	1227
	1228	/**
	1229	* Test the case mapping transliterators.
	1230	*/
	1231	void TransliteratorTest::TestCaseMap(void) {
	1232	UParseError parseError;
	1233	UErrorCode status = U_ZERO_ERROR;
	1234	Transliterator* toUpper =
	1235	Transliterator::createInstance("Any-Upper[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
	1236	Transliterator* toLower =
	1237	Transliterator::createInstance("Any-Lower[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
	1238	Transliterator* toTitle =
	1239	Transliterator::createInstance("Any-Title[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
	1240	if (toUpper==0 \|\| toLower==0 \|\| toTitle==0) {
	1241	errln("FAIL: createInstance returned NULL");
	1242	delete toUpper;
	1243	delete toLower;
	1244	delete toTitle;
	1245	return;
	1246	}
	1247
	1248	expect(*toUpper, "The quick brown fox jumped over the lazy dogs.",
	1249	"THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS.");
	1250	expect(*toLower, "The quIck brown fOX jUMPED OVER THE LAzY dogs.",
	1251	"the quick brown foX jumped over the lazY dogs.");
	1252	expect(*toTitle, "the quick brown foX can't jump over the laZy dogs.",
	1253	"The Quick Brown FoX Can't Jump Over The LaZy Dogs.");
	1254
	1255	delete toUpper;
	1256	delete toLower;
	1257	delete toTitle;
	1258	}
	1259
	1260	/**
	1261	* Test the name mapping transliterators.
	1262	*/
	1263	void TransliteratorTest::TestNameMap(void) {
	1264	UParseError parseError;
	1265	UErrorCode status = U_ZERO_ERROR;
	1266	Transliterator* uni2name =
	1267	Transliterator::createInstance("Any-Name[^abc]", UTRANS_FORWARD, parseError, status);
	1268	Transliterator* name2uni =
	1269	Transliterator::createInstance("Name-Any", UTRANS_FORWARD, parseError, status);
	1270	if (uni2name==0 \|\| name2uni==0) {
	1271	errln("FAIL: createInstance returned NULL");
	1272	delete uni2name;
	1273	delete name2uni;
	1274	return;
	1275	}
	1276
	1277	// Careful: CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
	1278	expect(*uni2name, CharsToUnicodeString("\\u00A0abc\\u4E01\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF"),
	1279	CharsToUnicodeString("\\\\N{NO-BREAK SPACE}abc\\\\N{CJK UNIFIED IDEOGRAPH-4E01}\\\\N{MICRO SIGN}\\\\N{GUJARATI SIGN CANDRABINDU}\\\\N{REPLACEMENT CHARACTER}\\\\N{END OF TRANSMISSION}\\\\N{CHARACTER TABULATION}\\\\N{<control-0081>}\\\\N{<noncharacter-FFFF>}"));
	1280	expect(*name2uni, UNICODE_STRING_SIMPLE("{\\N { NO-BREAK SPACE}abc\\N{ CJK UNIFIED IDEOGRAPH-4E01 }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{END OF TRANSMISSION}\\N{CHARACTER TABULATION}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{"),
	1281	CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{"));
	1282
	1283	delete uni2name;
	1284	delete name2uni;
	1285
	1286	// round trip
	1287	Transliterator* t =
	1288	Transliterator::createInstance("Any-Name;Name-Any", UTRANS_FORWARD, parseError, status);
	1289	if (t==0) {
	1290	errln("FAIL: createInstance returned NULL");
	1291	delete t;
	1292	return;
	1293	}
	1294
	1295	// Careful: CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
	1296	UnicodeString s = CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{");
	1297	expect(*t, s, s);
	1298	delete t;
	1299	}
	1300
	1301	/**
	1302	* Test liberalized ID syntax. 1006c
	1303	*/
	1304	void TransliteratorTest::TestLiberalizedID(void) {
	1305	// Some test cases have an expected getID() value of NULL. This
	1306	// means I have disabled the test case for now. This stuff is
	1307	// still under development, and I haven't decided whether to make
	1308	// getID() return canonical case yet. It will all get rewritten
	1309	// with the move to Source-Target/Variant IDs anyway. [aliu]
	1310	const char* DATA[] = {
	1311	"latin-greek", NULL /"Latin-Greek"/, "case insensitivity",
	1312	" Null ", "Null", "whitespace",
	1313	" Latin[a-z]-Greek ", "[a-z]Latin-Greek", "inline filter",
	1314	" null ; latin-greek ", NULL /"Null;Latin-Greek"/, "compound whitespace",
	1315	};
	1316	const int32_t DATA_length = sizeof(DATA)/sizeof(DATA[0]);
	1317	UParseError parseError;
	1318	UErrorCode status= U_ZERO_ERROR;
	1319	for (int32_t i=0; i<DATA_length; i+=3) {
	1320	Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, parseError, status);
	1321	if (t == 0) {
	1322	dataerrln(UnicodeString("FAIL: ") + DATA[i+2] +
	1323	" cannot create ID \"" + DATA[i] + "\" - " + u_errorName(status));
	1324	} else {
	1325	UnicodeString exp;
	1326	if (DATA[i+1]) {
	1327	exp = UnicodeString(DATA[i+1], "");
	1328	}
	1329	// Don't worry about getID() if the expected char*
	1330	// is NULL -- see above.
	1331	if (exp.length() == 0 \|\| exp == t->getID()) {
	1332	logln(UnicodeString("Ok: ") + DATA[i+2] +
	1333	" create ID \"" + DATA[i] + "\" => \"" +
	1334	exp + "\"");
	1335	} else {
	1336	errln(UnicodeString("FAIL: ") + DATA[i+2] +
	1337	" create ID \"" + DATA[i] + "\" => \"" +
	1338	t->getID() + "\", exp \"" + exp + "\"");
	1339	}
	1340	delete t;
	1341	}
	1342	}
	1343	}
	1344
	1345	/* test for Jitterbug 912 */
	1346	void TransliteratorTest::TestCreateInstance(){
	1347	const char* FORWARD = "F";
	1348	const char* REVERSE = "R";
	1349	const char* DATA[] = {
	1350	// Column 1: id
	1351	// Column 2: direction
	1352	// Column 3: expected ID, or "" if expect failure
	1353	"Latin-Hangul", REVERSE, "Hangul-Latin", // JB#912
	1354
	1355	// JB#2689: bad compound causes crash
	1356	"InvalidSource-InvalidTarget", FORWARD, "",
	1357	"InvalidSource-InvalidTarget", REVERSE, "",
	1358	"Hex-Any;InvalidSource-InvalidTarget", FORWARD, "",
	1359	"Hex-Any;InvalidSource-InvalidTarget", REVERSE, "",
	1360	"InvalidSource-InvalidTarget;Hex-Any", FORWARD, "",
	1361	"InvalidSource-InvalidTarget;Hex-Any", REVERSE, "",
	1362
	1363	NULL
	1364	};
	1365
	1366	for (int32_t i=0; DATA[i]; i+=3) {
	1367	UParseError err;
	1368	UErrorCode ec = U_ZERO_ERROR;
	1369	UnicodeString id(DATA[i]);
	1370	UTransDirection dir = (DATA[i+1]==FORWARD)?
	1371	UTRANS_FORWARD:UTRANS_REVERSE;
	1372	UnicodeString expID(DATA[i+2]);
	1373	Transliterator* t =
	1374	Transliterator::createInstance(id,dir,err,ec);
	1375	UnicodeString newID;
	1376	if (t) {
	1377	newID = t->getID();
	1378	}
	1379	UBool ok = (newID == expID);
	1380	if (!t) {
	1381	newID = u_errorName(ec);
	1382	}
	1383	if (ok) {
	1384	logln((UnicodeString)"Ok: createInstance(" +
	1385	id + "," + DATA[i+1] + ") => " + newID);
	1386	} else {
	1387	dataerrln((UnicodeString)"FAIL: createInstance(" +
	1388	id + "," + DATA[i+1] + ") => " + newID +
	1389	", expected " + expID);
	1390	}
	1391	delete t;
	1392	}
	1393	}
	1394
	1395	/**
	1396	* Test the normalization transliterator.
	1397	*/
	1398	void TransliteratorTest::TestNormalizationTransliterator() {
	1399	// THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.test.normalizer.BasicTest
	1400	// PLEASE KEEP THEM IN SYNC WITH BasicTest.
	1401	const char* CANON[] = {
	1402	// Input Decomposed Composed
	1403	"cat", "cat", "cat" ,
	1404	"\\u00e0ardvark", "a\\u0300ardvark", "\\u00e0ardvark" ,
	1405
	1406	"\\u1e0a", "D\\u0307", "\\u1e0a" , // D-dot_above
	1407	"D\\u0307", "D\\u0307", "\\u1e0a" , // D dot_above
	1408
	1409	"\\u1e0c\\u0307", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D-dot_below dot_above
	1410	"\\u1e0a\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D-dot_above dot_below
	1411	"D\\u0307\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D dot_below dot_above
	1412
	1413	"\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307","\\u1e10\\u0323\\u0307", // D dot_below cedilla dot_above
	1414	"D\\u0307\\u0328\\u0323","D\\u0328\\u0323\\u0307","\\u1e0c\\u0328\\u0307", // D dot_above ogonek dot_below
	1415
	1416	"\\u1E14", "E\\u0304\\u0300", "\\u1E14" , // E-macron-grave
	1417	"\\u0112\\u0300", "E\\u0304\\u0300", "\\u1E14" , // E-macron + grave
	1418	"\\u00c8\\u0304", "E\\u0300\\u0304", "\\u00c8\\u0304" , // E-grave + macron
	1419
	1420	"\\u212b", "A\\u030a", "\\u00c5" , // angstrom_sign
	1421	"\\u00c5", "A\\u030a", "\\u00c5" , // A-ring
	1422
	1423	"\\u00fdffin", "y\\u0301ffin", "\\u00fdffin" , //updated with 3.0
	1424	"\\u00fd\\uFB03n", "y\\u0301\\uFB03n", "\\u00fd\\uFB03n" , //updated with 3.0
	1425
	1426	"Henry IV", "Henry IV", "Henry IV" ,
	1427	"Henry \\u2163", "Henry \\u2163", "Henry \\u2163" ,
	1428
	1429	"\\u30AC", "\\u30AB\\u3099", "\\u30AC" , // ga (Katakana)
	1430	"\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" , // ka + ten
	1431	"\\uFF76\\uFF9E", "\\uFF76\\uFF9E", "\\uFF76\\uFF9E" , // hw_ka + hw_ten
	1432	"\\u30AB\\uFF9E", "\\u30AB\\uFF9E", "\\u30AB\\uFF9E" , // ka + hw_ten
	1433	"\\uFF76\\u3099", "\\uFF76\\u3099", "\\uFF76\\u3099" , // hw_ka + ten
	1434
	1435	"A\\u0300\\u0316", "A\\u0316\\u0300", "\\u00C0\\u0316" ,
	1436	0 // end
	1437	};
	1438
	1439	const char* COMPAT[] = {
	1440	// Input Decomposed Composed
	1441	"\\uFB4f", "\\u05D0\\u05DC", "\\u05D0\\u05DC" , // Alef-Lamed vs. Alef, Lamed
	1442
	1443	"\\u00fdffin", "y\\u0301ffin", "\\u00fdffin" , //updated for 3.0
	1444	"\\u00fd\\uFB03n", "y\\u0301ffin", "\\u00fdffin" , // ffi ligature -> f + f + i
	1445
	1446	"Henry IV", "Henry IV", "Henry IV" ,
	1447	"Henry \\u2163", "Henry IV", "Henry IV" ,
	1448
	1449	"\\u30AC", "\\u30AB\\u3099", "\\u30AC" , // ga (Katakana)
	1450	"\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" , // ka + ten
	1451
	1452	"\\uFF76\\u3099", "\\u30AB\\u3099", "\\u30AC" , // hw_ka + ten
	1453	0 // end
	1454	};
	1455
	1456	int32_t i;
	1457	UParseError parseError;
	1458	UErrorCode status = U_ZERO_ERROR;
	1459	Transliterator* NFD = Transliterator::createInstance("NFD", UTRANS_FORWARD, parseError, status);
	1460	Transliterator* NFC = Transliterator::createInstance("NFC", UTRANS_FORWARD, parseError, status);
	1461	if (!NFD \|\| !NFC) {
	1462	dataerrln("FAIL: createInstance failed: %s", u_errorName(status));
	1463	delete NFD;
	1464	delete NFC;
	1465	return;
	1466	}
	1467	for (i=0; CANON[i]; i+=3) {
	1468	UnicodeString in = CharsToUnicodeString(CANON[i]);
	1469	UnicodeString expd = CharsToUnicodeString(CANON[i+1]);
	1470	UnicodeString expc = CharsToUnicodeString(CANON[i+2]);
	1471	expect(*NFD, in, expd);
	1472	expect(*NFC, in, expc);
	1473	}
	1474	delete NFD;
	1475	delete NFC;
	1476
	1477	Transliterator* NFKD = Transliterator::createInstance("NFKD", UTRANS_FORWARD, parseError, status);
	1478	Transliterator* NFKC = Transliterator::createInstance("NFKC", UTRANS_FORWARD, parseError, status);
	1479	if (!NFKD \|\| !NFKC) {
	1480	errln("FAIL: createInstance failed");
	1481	delete NFKD;
	1482	delete NFKC;
	1483	return;
	1484	}
	1485	for (i=0; COMPAT[i]; i+=3) {
	1486	UnicodeString in = CharsToUnicodeString(COMPAT[i]);
	1487	UnicodeString expkd = CharsToUnicodeString(COMPAT[i+1]);
	1488	UnicodeString expkc = CharsToUnicodeString(COMPAT[i+2]);
	1489	expect(*NFKD, in, expkd);
	1490	expect(*NFKC, in, expkc);
	1491	}
	1492	delete NFKD;
	1493	delete NFKC;
	1494
	1495	UParseError pe;
	1496	status = U_ZERO_ERROR;
	1497	Transliterator *t = Transliterator::createInstance("NFD; [x]Remove",
	1498	UTRANS_FORWARD,
	1499	pe, status);
	1500	if (t == 0) {
	1501	errln("FAIL: createInstance failed");
	1502	}
	1503	expect(*t, CharsToUnicodeString("\\u010dx"),
	1504	CharsToUnicodeString("c\\u030C"));
	1505	delete t;
	1506	}
	1507
	1508	/**
	1509	* Test compound RBT rules.
	1510	*/
	1511	void TransliteratorTest::TestCompoundRBT(void) {
	1512	// Careful with spacing and ';' here: Phrase this exactly
	1513	// as toRules() is going to return it. If toRules() changes
	1514	// with regard to spacing or ';', then adjust this string.
	1515	UnicodeString rule("::Hex-Any;\n"
	1516	"::Any-Lower;\n"
	1517	"a > '.A.';\n"
	1518	"b > '.B.';\n"
	1519	"::[^t]Any-Upper;", "");
	1520	UParseError parseError;
	1521	UErrorCode status = U_ZERO_ERROR;
	1522	Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, parseError, status);
	1523	if (t == 0) {
	1524	errln("FAIL: createFromRules failed");
	1525	return;
	1526	}
	1527	expect(*t, UNICODE_STRING_SIMPLE("\\u0043at in the hat, bat on the mat"),
	1528	"C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");
	1529	UnicodeString r;
	1530	t->toRules(r, TRUE);
	1531	if (r == rule) {
	1532	logln((UnicodeString)"OK: toRules() => " + r);
	1533	} else {
	1534	errln((UnicodeString)"FAIL: toRules() => " + r +
	1535	", expected " + rule);
	1536	}
	1537	delete t;
	1538
	1539	// Now test toRules
	1540	t = Transliterator::createInstance("Greek-Latin; Latin-Cyrillic", UTRANS_FORWARD, parseError, status);
	1541	if (t == 0) {
	1542	dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
	1543	return;
	1544	}
	1545	UnicodeString exp("::Greek-Latin;\n::Latin-Cyrillic;");
	1546	t->toRules(r, TRUE);
	1547	if (r != exp) {
	1548	errln((UnicodeString)"FAIL: toRules() => " + r +
	1549	", expected " + exp);
	1550	} else {
	1551	logln((UnicodeString)"OK: toRules() => " + r);
	1552	}
	1553	delete t;
	1554
	1555	// Round trip the result of toRules
	1556	t = Transliterator::createFromRules("Test", r, UTRANS_FORWARD, parseError, status);
	1557	if (t == 0) {
	1558	errln("FAIL: createFromRules #2 failed");
	1559	return;
	1560	} else {
	1561	logln((UnicodeString)"OK: createFromRules(" + r + ") succeeded");
	1562	}
	1563
	1564	// Test toRules again
	1565	t->toRules(r, TRUE);
	1566	if (r != exp) {
	1567	errln((UnicodeString)"FAIL: toRules() => " + r +
	1568	", expected " + exp);
	1569	} else {
	1570	logln((UnicodeString)"OK: toRules() => " + r);
	1571	}
	1572
	1573	delete t;
	1574
	1575	// Test Foo(Bar) IDs. Careful with spacing in id; make it conform
	1576	// to what the regenerated ID will look like.
	1577	UnicodeString id("Upper(Lower);(NFKC)", "");
	1578	t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
	1579	if (t == 0) {
	1580	errln("FAIL: createInstance #2 failed");
	1581	return;
	1582	}
	1583	if (t->getID() == id) {
	1584	logln((UnicodeString)"OK: created " + id);
	1585	} else {
	1586	errln((UnicodeString)"FAIL: createInstance(" + id +
	1587	").getID() => " + t->getID());
	1588	}
	1589
	1590	Transliterator *u = t->createInverse(status);
	1591	if (u == 0) {
	1592	errln("FAIL: createInverse failed");
	1593	delete t;
	1594	return;
	1595	}
	1596	exp = "NFKC();Lower(Upper)";
	1597	if (u->getID() == exp) {
	1598	logln((UnicodeString)"OK: createInverse(" + id + ") => " +
	1599	u->getID());
	1600	} else {
	1601	errln((UnicodeString)"FAIL: createInverse(" + id + ") => " +
	1602	u->getID());
	1603	}
	1604	delete t;
	1605	delete u;
	1606	}
	1607
	1608	/**
	1609	* Compound filter semantics were orginially not implemented
	1610	* correctly. Originally, each component filter f(i) is replaced by
	1611	* f'(i) = f(i) && g, where g is the filter for the compound
	1612	* transliterator.
	1613	*
	1614	* From Mark:
	1615	*
	1616	* Suppose and I have a transliterator X. Internally X is
	1617	* "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A].
	1618	*
	1619	* The compound should convert all greek characters (through latin) to
	1620	* cyrillic, then lowercase the result. The filter should say "don't
	1621	* touch 'A' in the original". But because an intermediate result
	1622	* happens to go through "A", the Greek Alpha gets hung up.
	1623	*/
	1624	void TransliteratorTest::TestCompoundFilter(void) {
	1625	UParseError parseError;
	1626	UErrorCode status = U_ZERO_ERROR;
	1627	Transliterator *t = Transliterator::createInstance
	1628	("Greek-Latin; Latin-Greek; Lower", UTRANS_FORWARD, parseError, status);
	1629	if (t == 0) {
	1630	dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
	1631	return;
	1632	}
	1633	t->adoptFilter(new UnicodeSet("[^A]", status));
	1634	if (U_FAILURE(status)) {
	1635	errln("FAIL: UnicodeSet ct failed");
	1636	delete t;
	1637	return;
	1638	}
	1639
	1640	// Only the 'A' at index 1 should remain unchanged
	1641	expect(*t,
	1642	CharsToUnicodeString("BA\\u039A\\u0391"),
	1643	CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1"));
	1644	delete t;
	1645	}
	1646
	1647	void TransliteratorTest::TestRemove(void) {
	1648	UParseError parseError;
	1649	UErrorCode status = U_ZERO_ERROR;
	1650	Transliterator *t = Transliterator::createInstance("Remove[abc]", UTRANS_FORWARD, parseError, status);
	1651	if (t == 0) {
	1652	errln("FAIL: createInstance failed");
	1653	return;
	1654	}
	1655
	1656	expect(*t, "Able bodied baker's cats", "Ale odied ker's ts");
	1657
	1658	// extra test for RemoveTransliterator::clone(), which at one point wasn't
	1659	// duplicating the filter
	1660	Transliterator* t2 = t->clone();
	1661	expect(*t2, "Able bodied baker's cats", "Ale odied ker's ts");
	1662
	1663	delete t;
	1664	delete t2;
	1665	}
	1666
	1667	void TransliteratorTest::TestToRules(void) {
	1668	const char* RBT = "rbt";
	1669	const char* SET = "set";
	1670	static const char* DATA[] = {
	1671	RBT,
	1672	"$a=\\u4E61; [$a] > A;",
	1673	"[\\u4E61] > A;",
	1674
	1675	RBT,
	1676	"$white=[[:Zs:][:Zl:]]; $white{a} > A;",
	1677	"[[:Zs:][:Zl:]]{a} > A;",
	1678
	1679	SET,
	1680	"[[:Zs:][:Zl:]]",
	1681	"[[:Zs:][:Zl:]]",
	1682
	1683	SET,
	1684	"[:Ps:]",
	1685	"[:Ps:]",
	1686
	1687	SET,
	1688	"[:L:]",
	1689	"[:L:]",
	1690
	1691	SET,
	1692	"[[:L:]-[A]]",
	1693	"[[:L:]-[A]]",
	1694
	1695	SET,
	1696	"[~[:Lu:][:Ll:]]",
	1697	"[~[:Lu:][:Ll:]]",
	1698
	1699	SET,
	1700	"[~[a-z]]",
	1701	"[~[a-z]]",
	1702
	1703	RBT,
	1704	"$white=[:Zs:]; $black=[^$white]; $black{a} > A;",
	1705	"[^[:Zs:]]{a} > A;",
	1706
	1707	RBT,
	1708	"$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;",
	1709	"[[a-z]-[:Zs:]]{a} > A;",
	1710
	1711	RBT,
	1712	"$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;",
	1713	"[[:Zs:]&[a-z]]{a} > A;",
	1714
	1715	RBT,
	1716	"$a=[:Zs:]; $b=[x$a]; $b{a} > A;",
	1717	"[x[:Zs:]]{a} > A;",
	1718
	1719	RBT,
	1720	"$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"
	1721	"$macron = \\u0304 ;"
	1722	"$evowel = [aeiouyAEIOUY] ;"
	1723	"$iotasub = \\u0345 ;"
	1724	"($evowel $macron $accentMinus *) i > \| $1 $iotasub ;",
	1725	"([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > \| $1 \\u0345;",
	1726
	1727	RBT,
	1728	"([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > \| $1 \\u0345;",
	1729	"([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > \| $1 \\u0345;",
	1730	};
	1731	static const int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
	1732
	1733	for (int32_t d=0; d < DATA_length; d+=3) {
	1734	if (DATA[d] == RBT) {
	1735	// Transliterator test
	1736	UParseError parseError;
	1737	UErrorCode status = U_ZERO_ERROR;
	1738	Transliterator *t = Transliterator::createFromRules("ID",
	1739	UnicodeString(DATA[d+1], -1, US_INV), UTRANS_FORWARD, parseError, status);
	1740	if (t == 0) {
	1741	dataerrln("FAIL: createFromRules failed - %s", u_errorName(status));
	1742	return;
	1743	}
	1744	UnicodeString rules, escapedRules;
	1745	t->toRules(rules, FALSE);
	1746	t->toRules(escapedRules, TRUE);
	1747	UnicodeString expRules = CharsToUnicodeString(DATA[d+2]);
	1748	UnicodeString expEscapedRules(DATA[d+2], -1, US_INV);
	1749	if (rules == expRules) {
	1750	logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
	1751	" => " + rules);
	1752	} else {
	1753	errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
	1754	" => " + rules + ", exp " + expRules);
	1755	}
	1756	if (escapedRules == expEscapedRules) {
	1757	logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
	1758	" => " + escapedRules);
	1759	} else {
	1760	errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
	1761	" => " + escapedRules + ", exp " + expEscapedRules);
	1762	}
	1763	delete t;
	1764
	1765	} else {
	1766	// UnicodeSet test
	1767	UErrorCode status = U_ZERO_ERROR;
	1768	UnicodeString pat(DATA[d+1], -1, US_INV);
	1769	UnicodeString expToPat(DATA[d+2], -1, US_INV);
	1770	UnicodeSet set(pat, status);
	1771	if (U_FAILURE(status)) {
	1772	errln("FAIL: UnicodeSet ct failed");
	1773	return;
	1774	}
	1775	// Adjust spacing etc. as necessary.
	1776	UnicodeString toPat;
	1777	set.toPattern(toPat);
	1778	if (expToPat == toPat) {
	1779	logln((UnicodeString)"Ok: " + pat +
	1780	" => " + toPat);
	1781	} else {
	1782	errln((UnicodeString)"FAIL: " + pat +
	1783	" => " + prettify(toPat, TRUE) +
	1784	", exp " + prettify(pat, TRUE));
	1785	}
	1786	}
	1787	}
	1788	}
	1789
	1790	void TransliteratorTest::TestContext() {
	1791	UTransPosition pos = {0, 2, 0, 1}; // cs cl s l
	1792	expect("de > x; {d}e > y;",
	1793	"de",
	1794	"ye",
	1795	&pos);
	1796
	1797	expect("ab{c} > z;",
	1798	"xadabdabcy",
	1799	"xadabdabzy");
	1800	}
	1801
	1802	void TransliteratorTest::TestSupplemental() {
	1803
	1804	expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];"
	1805	"a > $a; $s > i;"),
	1806	CharsToUnicodeString("ab\\U0001030Fx"),
	1807	CharsToUnicodeString("\\U00010300bix"));
	1808
	1809	expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];"
	1810	"$b=[A-Z\\U00010400-\\U0001044D];"
	1811	"($a)($b) > $2 $1;"),
	1812	CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"),
	1813	CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301"));
	1814
	1815	// k\|ax\\U00010300xm
	1816
	1817	// k\|a\\U00010400\\U00010300xm
	1818	// ky\|\\U00010400\\U00010300xm
	1819	// ky\\U00010400\|\\U00010300xm
	1820
	1821	// ky\\U00010400\|\\U00010300\\U00010400m
	1822	// ky\\U00010400y\|\\U00010400m
	1823	expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];"
	1824	"$a {x} > \| @ \\U00010400;"
	1825	"{$a} [^\\u0000-\\uFFFF] > y;"),
	1826	CharsToUnicodeString("kax\\U00010300xm"),
	1827	CharsToUnicodeString("ky\\U00010400y\\U00010400m"));
	1828
	1829	expectT("Any-Name",
	1830	CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"),
	1831	UNICODE_STRING_SIMPLE("\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}"));
	1832
	1833	expectT("Any-Hex/Unicode",
	1834	CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
	1835	UNICODE_STRING_SIMPLE("U+10330U+10FF00U+E0061U+00A0"));
	1836
	1837	expectT("Any-Hex/C",
	1838	CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
	1839	UNICODE_STRING_SIMPLE("\\U00010330\\U0010FF00\\U000E0061\\u00A0"));
	1840
	1841	expectT("Any-Hex/Perl",
	1842	CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
	1843	UNICODE_STRING_SIMPLE("\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}"));
	1844
	1845	expectT("Any-Hex/Java",
	1846	CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
	1847	UNICODE_STRING_SIMPLE("\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0"));
	1848
	1849	expectT("Any-Hex/XML",
	1850	CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
	1851	"𐌰􏼀󠁡 ");
	1852
	1853	expectT("Any-Hex/XML10",
	1854	CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
	1855	"𐌰􏼀󠁡 ");
	1856
	1857	expectT(UNICODE_STRING_SIMPLE("[\\U000E0000-\\U000E0FFF] Remove"),
	1858	CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
	1859	CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0"));
	1860	}
	1861
	1862	void TransliteratorTest::TestQuantifier() {
	1863
	1864	// Make sure @ in a quantified anteContext works
	1865	expect("a+ {b} > \| @@ c; A > a; (a+ c) > '(' $1 ')';",
	1866	"AAAAAb",
	1867	"aaa(aac)");
	1868
	1869	// Make sure @ in a quantified postContext works
	1870	expect("{b} a+ > c @@ \|; (a+) > '(' $1 ')';",
	1871	"baaaaa",
	1872	"caa(aaa)");
	1873
	1874	// Make sure @ in a quantified postContext with seg ref works
	1875	expect("{(b)} a+ > $1 @@ \|; (a+) > '(' $1 ')';",
	1876	"baaaaa",
	1877	"baa(aaa)");
	1878
	1879	// Make sure @ past ante context doesn't enter ante context
	1880	UTransPosition pos = {0, 5, 3, 5};
	1881	expect("a+ {b} > \| @@ c; x > y; (a+ c) > '(' $1 ')';",
	1882	"xxxab",
	1883	"xxx(ac)",
	1884	&pos);
	1885
	1886	// Make sure @ past post context doesn't pass limit
	1887	UTransPosition pos2 = {0, 4, 0, 2};
	1888	expect("{b} a+ > c @@ \|; x > y; a > A;",
	1889	"baxx",
	1890	"caxx",
	1891	&pos2);
	1892
	1893	// Make sure @ past post context doesn't enter post context
	1894	expect("{b} a+ > c @@ \|; x > y; a > A;",
	1895	"baxx",
	1896	"cayy");
	1897
	1898	expect("(ab)? c > d;",
	1899	"c abc ababc",
	1900	"d d abd");
	1901
	1902	// NOTE: The (ab)+ when referenced just yields a single "ab",
	1903	// not the full sequence of them. This accords with perl behavior.
	1904	expect("(ab)+ {x} > '(' $1 ')';",
	1905	"x abx ababxy",
	1906	"x ab(ab) abab(ab)y");
	1907
	1908	expect("b+ > x;",
	1909	"ac abc abbc abbbc",
	1910	"ac axc axc axc");
	1911
	1912	expect("[abc]+ > x;",
	1913	"qac abrc abbcs abtbbc",
	1914	"qx xrx xs xtx");
	1915
	1916	expect("q{(ab)+} > x;",
	1917	"qa qab qaba qababc qaba",
	1918	"qa qx qxa qxc qxa");
	1919
	1920	expect("q(ab)* > x;",
	1921	"qa qab qaba qababc",
	1922	"xa x xa xc");
	1923
	1924	// NOTE: The (ab)+ when referenced just yields a single "ab",
	1925	// not the full sequence of them. This accords with perl behavior.
	1926	expect("q(ab)* > '(' $1 ')';",
	1927	"qa qab qaba qababc",
	1928	"()a (ab) (ab)a (ab)c");
	1929
	1930	// 'foo'+ and 'foo'* -- the quantifier should apply to the entire
	1931	// quoted string
	1932	expect("'ab'+ > x;",
	1933	"bb ab ababb",
	1934	"bb x xb");
	1935
	1936	// $foo+ and $foo* -- the quantifier should apply to the entire
	1937	// variable reference
	1938	expect("$var = ab; $var+ > x;",
	1939	"bb ab ababb",
	1940	"bb x xb");
	1941	}
	1942
	1943	class TestTrans : public Transliterator {
	1944	public:
	1945	TestTrans(const UnicodeString& id) : Transliterator(id, 0) {
	1946	}
	1947	virtual Transliterator* clone(void) const {
	1948	return new TestTrans(getID());
	1949	}
	1950	virtual void handleTransliterate(Replaceable& /text/, UTransPosition& offsets,
	1951	UBool /isIncremental/) const
	1952	{
	1953	offsets.start = offsets.limit;
	1954	}
	1955	virtual UClassID getDynamicClassID() const;
	1956	static UClassID U_EXPORT2 getStaticClassID();
	1957	};
	1958	UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TestTrans)
	1959
	1960	/**
	1961	* Test Source-Target/Variant.
	1962	*/
	1963	void TransliteratorTest::TestSTV(void) {
	1964	int32_t ns = Transliterator::countAvailableSources();
	1965	if (ns < 0 \|\| ns > 255) {
	1966	errln((UnicodeString)"FAIL: Bad source count: " + ns);
	1967	return;
	1968	}
	1969	int32_t i, j;
	1970	for (i=0; i<ns; ++i) {
	1971	UnicodeString source;
	1972	Transliterator::getAvailableSource(i, source);
	1973	logln((UnicodeString)"" + i + ": " + source);
	1974	if (source.length() == 0) {
	1975	errln("FAIL: empty source");
	1976	continue;
	1977	}
	1978	int32_t nt = Transliterator::countAvailableTargets(source);
	1979	if (nt < 0 \|\| nt > 255) {
	1980	errln((UnicodeString)"FAIL: Bad target count: " + nt);
	1981	continue;
	1982	}
	1983	for (int32_t j=0; j<nt; ++j) {
	1984	UnicodeString target;
	1985	Transliterator::getAvailableTarget(j, source, target);
	1986	logln((UnicodeString)" " + j + ": " + target);
	1987	if (target.length() == 0) {
	1988	errln("FAIL: empty target");
	1989	continue;
	1990	}
	1991	int32_t nv = Transliterator::countAvailableVariants(source, target);
	1992	if (nv < 0 \|\| nv > 255) {
	1993	errln((UnicodeString)"FAIL: Bad variant count: " + nv);
	1994	continue;
	1995	}
	1996	for (int32_t k=0; k<nv; ++k) {
	1997	UnicodeString variant;
	1998	Transliterator::getAvailableVariant(k, source, target, variant);
	1999	if (variant.length() == 0) {
	2000	logln((UnicodeString)" " + k + ": <empty>");
	2001	} else {
	2002	logln((UnicodeString)" " + k + ": " + variant);
	2003	}
	2004	}
	2005	}
	2006	}
	2007
	2008	// Test registration
	2009	const char* IDS[] = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
	2010	const char* FULL_IDS[] = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
	2011	const char* SOURCES[] = { NULL, "Seoridf", "Oewoir" };
	2012	for (i=0; i<3; ++i) {
	2013	Transliterator *t = new TestTrans(IDS[i]);
	2014	if (t == 0) {
	2015	errln("FAIL: out of memory");
	2016	return;
	2017	}
	2018	if (t->getID() != IDS[i]) {
	2019	errln((UnicodeString)"FAIL: ID mismatch for " + IDS[i]);
	2020	delete t;
	2021	return;
	2022	}
	2023	Transliterator::registerInstance(t);
	2024	UErrorCode status = U_ZERO_ERROR;
	2025	t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
	2026	if (t == NULL) {
	2027	errln((UnicodeString)"FAIL: Registration/creation failed for ID " +
	2028	IDS[i]);
	2029	} else {
	2030	logln((UnicodeString)"Ok: Registration/creation succeeded for ID " +
	2031	IDS[i]);
	2032	delete t;
	2033	}
	2034	Transliterator::unregister(IDS[i]);
	2035	t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
	2036	if (t != NULL) {
	2037	errln((UnicodeString)"FAIL: Unregistration failed for ID " +
	2038	IDS[i]);
	2039	delete t;
	2040	}
	2041	}
	2042
	2043	// Make sure getAvailable API reflects removal
	2044	int32_t n = Transliterator::countAvailableIDs();
	2045	for (i=0; i<n; ++i) {
	2046	UnicodeString id = Transliterator::getAvailableID(i);
	2047	for (j=0; j<3; ++j) {
	2048	if (id.caseCompare(FULL_IDS[j],0)==0) {
	2049	errln((UnicodeString)"FAIL: unregister(" + id + ") failed");
	2050	}
	2051	}
	2052	}
	2053	n = Transliterator::countAvailableTargets("Any");
	2054	for (i=0; i<n; ++i) {
	2055	UnicodeString t;
	2056	Transliterator::getAvailableTarget(i, "Any", t);
	2057	if (t.caseCompare(IDS[0],0)==0) {
	2058	errln((UnicodeString)"FAIL: unregister(Any-" + t + ") failed");
	2059	}
	2060	}
	2061	n = Transliterator::countAvailableSources();
	2062	for (i=0; i<n; ++i) {
	2063	UnicodeString s;
	2064	Transliterator::getAvailableSource(i, s);
	2065	for (j=0; j<3; ++j) {
	2066	if (SOURCES[j] == NULL) continue;
	2067	if (s.caseCompare(SOURCES[j],0)==0) {
	2068	errln((UnicodeString)"FAIL: unregister(" + s + "-*) failed");
	2069	}
	2070	}
	2071	}
	2072	}
	2073
	2074	/**
	2075	* Test inverse of Greek-Latin; Title()
	2076	*/
	2077	void TransliteratorTest::TestCompoundInverse(void) {
	2078	UParseError parseError;
	2079	UErrorCode status = U_ZERO_ERROR;
	2080	Transliterator *t = Transliterator::createInstance
	2081	("Greek-Latin; Title()", UTRANS_REVERSE,parseError, status);
	2082	if (t == 0) {
	2083	dataerrln("FAIL: createInstance - %s", u_errorName(status));
	2084	return;
	2085	}
	2086	UnicodeString exp("(Title);Latin-Greek");
	2087	if (t->getID() == exp) {
	2088	logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" +
	2089	t->getID());
	2090	} else {
	2091	errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" +
	2092	t->getID() + "\", expected \"" + exp + "\"");
	2093	}
	2094	delete t;
	2095	}
	2096
	2097	/**
	2098	* Test NFD chaining with RBT
	2099	*/
	2100	void TransliteratorTest::TestNFDChainRBT() {
	2101	UParseError pe;
	2102	UErrorCode ec = U_ZERO_ERROR;
	2103	Transliterator* t = Transliterator::createFromRules(
	2104	"TEST", "::NFD; aa > Q; a > q;",
	2105	UTRANS_FORWARD, pe, ec);
	2106	if (t == NULL \|\| U_FAILURE(ec)) {
	2107	dataerrln("FAIL: Transliterator::createFromRules failed with %s", u_errorName(ec));
	2108	return;
	2109	}
	2110	expect(*t, "aa", "Q");
	2111	delete t;
	2112
	2113	// TEMPORARY TESTS -- BEING DEBUGGED
	2114	//=- UnicodeString s, s2;
	2115	//=- t = Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, pe, ec);
	2116	//=- s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
	2117	//=- s2 = CharsToUnicodeString("\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D");
	2118	//=- expect(*t, s, s2);
	2119	//=- delete t;
	2120	//=-
	2121	//=- t = Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, pe, ec);
	2122	//=- expect(*t, s2, s);
	2123	//=- delete t;
	2124	//=-
	2125	//=- t = Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, pe, ec);
	2126	//=- s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
	2127	//=- expect(*t, s, s);
	2128	//=- delete t;
	2129
	2130	// const char* source[] = {
	2131	// /*
	2132	// "\\u015Br\\u012Bmad",
	2133	// "bhagavadg\\u012Bt\\u0101",
	2134	// "adhy\\u0101ya",
	2135	// "arjuna",
	2136	// "vi\\u1E63\\u0101da",
	2137	// "y\\u014Dga",
	2138	// "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
	2139	// "uv\\u0101cr\\u0325",
	2140	// */
	2141	// "rmk\\u1E63\\u0113t",
	2142	// //"dharmak\\u1E63\\u0113tr\\u0113",
	2143	// /*
	2144	// "kuruk\\u1E63\\u0113tr\\u0113",
	2145	// "samav\\u0113t\\u0101",
	2146	// "yuyutsava-\\u1E25",
	2147	// "m\\u0101mak\\u0101-\\u1E25",
	2148	// // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
	2149	// "kimakurvata",
	2150	// "san\\u0304java",
	2151	// */
	2152	//
	2153	// 0
	2154	// };
	2155	// const char* expected[] = {
	2156	// /*
	2157	// "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
	2158	// "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
	2159	// "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
	2160	// "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
	2161	// "\\u0935\\u093f\\u0937\\u093e\\u0926",
	2162	// "\\u092f\\u094b\\u0917",
	2163	// "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
	2164	// "\\u0909\\u0935\\u093E\\u091A\\u0943",
	2165	// */
	2166	// "\\u0927",
	2167	// //"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
	2168	// /*
	2169	// "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
	2170	// "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
	2171	// "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
	2172	// "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
	2173	// // "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
	2174	// "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
	2175	// "\\u0938\\u0902\\u091c\\u0935",
	2176	// */
	2177	// 0
	2178	// };
	2179	// UErrorCode status = U_ZERO_ERROR;
	2180	// UParseError parseError;
	2181	// UnicodeString message;
	2182	// Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
	2183	// Transliterator* devToLatinToDev=Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
	2184	// if(U_FAILURE(status)){
	2185	// errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
	2186	// errln("PreContext: " + prettify(parseError.preContext) + "PostContext: " + prettify( parseError.postContext) );
	2187	// delete latinToDevToLatin;
	2188	// delete devToLatinToDev;
	2189	// return;
	2190	// }
	2191	// UnicodeString gotResult;
	2192	// for(int i= 0; source[i] != 0; i++){
	2193	// gotResult = source[i];
	2194	// expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
	2195	// expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
	2196	// }
	2197	// delete latinToDevToLatin;
	2198	// delete devToLatinToDev;
	2199	}
	2200
	2201	/**
	2202	* Inverse of "Null" should be "Null". (J21)
	2203	*/
	2204	void TransliteratorTest::TestNullInverse() {
	2205	UParseError pe;
	2206	UErrorCode ec = U_ZERO_ERROR;
	2207	Transliterator *t = Transliterator::createInstance("Null", UTRANS_FORWARD, pe, ec);
	2208	if (t == 0 \|\| U_FAILURE(ec)) {
	2209	errln("FAIL: createInstance");
	2210	return;
	2211	}
	2212	Transliterator *u = t->createInverse(ec);
	2213	if (u == 0 \|\| U_FAILURE(ec)) {
	2214	errln("FAIL: createInverse");
	2215	delete t;
	2216	return;
	2217	}
	2218	if (u->getID() != "Null") {
	2219	errln("FAIL: Inverse of Null should be Null");
	2220	}
	2221	delete t;
	2222	delete u;
	2223	}
	2224
	2225	/**
	2226	* Check ID of inverse of alias. (J22)
	2227	*/
	2228	void TransliteratorTest::TestAliasInverseID() {
	2229	UnicodeString ID("Latin-Hangul", ""); // This should be any alias ID with an inverse
	2230	UParseError pe;
	2231	UErrorCode ec = U_ZERO_ERROR;
	2232	Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
	2233	if (t == 0 \|\| U_FAILURE(ec)) {
	2234	dataerrln("FAIL: createInstance - %s", u_errorName(ec));
	2235	return;
	2236	}
	2237	Transliterator *u = t->createInverse(ec);
	2238	if (u == 0 \|\| U_FAILURE(ec)) {
	2239	errln("FAIL: createInverse");
	2240	delete t;
	2241	return;
	2242	}
	2243	UnicodeString exp = "Hangul-Latin";
	2244	UnicodeString got = u->getID();
	2245	if (got != exp) {
	2246	errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
	2247	", expected " + exp);
	2248	}
	2249	delete t;
	2250	delete u;
	2251	}
	2252
	2253	/**
	2254	* Test IDs of inverses of compound transliterators. (J20)
	2255	*/
	2256	void TransliteratorTest::TestCompoundInverseID() {
	2257	UnicodeString ID = "Latin-Jamo;NFC(NFD)";
	2258	UParseError pe;
	2259	UErrorCode ec = U_ZERO_ERROR;
	2260	Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
	2261	if (t == 0 \|\| U_FAILURE(ec)) {
	2262	dataerrln("FAIL: createInstance - %s", u_errorName(ec));
	2263	return;
	2264	}
	2265	Transliterator *u = t->createInverse(ec);
	2266	if (u == 0 \|\| U_FAILURE(ec)) {
	2267	errln("FAIL: createInverse");
	2268	delete t;
	2269	return;
	2270	}
	2271	UnicodeString exp = "NFD(NFC);Jamo-Latin";
	2272	UnicodeString got = u->getID();
	2273	if (got != exp) {
	2274	errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
	2275	", expected " + exp);
	2276	}
	2277	delete t;
	2278	delete u;
	2279	}
	2280
	2281	/**
	2282	* Test undefined variable.
	2283
	2284	*/
	2285	void TransliteratorTest::TestUndefinedVariable() {
	2286	UnicodeString rule = "$initial } a <> \\u1161;";
	2287	UParseError pe;
	2288	UErrorCode ec = U_ZERO_ERROR;
	2289	Transliterator *t = Transliterator::createFromRules("<ID>", rule, UTRANS_FORWARD, pe, ec);
	2290	delete t;
	2291	if (U_FAILURE(ec)) {
	2292	logln((UnicodeString)"OK: Got exception for " + rule + ", as expected: " +
	2293	u_errorName(ec));
	2294	return;
	2295	}
	2296	errln((UnicodeString)"Fail: bogus rule " + rule + " compiled with error " +
	2297	u_errorName(ec));
	2298	}
	2299
	2300	/**
	2301	* Test empty context.
	2302	*/
	2303	void TransliteratorTest::TestEmptyContext() {
	2304	expect(" { a } > b;", "xay a ", "xby b ");
	2305	}
	2306
	2307	/**
	2308	* Test compound filter ID syntax
	2309	*/
	2310	void TransliteratorTest::TestCompoundFilterID(void) {
	2311	static const char* DATA[] = {
	2312	// Col. 1 = ID or rule set (latter must start with #)
	2313
	2314	// = columns > 1 are null if expect col. 1 to be illegal =
	2315
	2316	// Col. 2 = direction, "F..." or "R..."
	2317	// Col. 3 = source string
	2318	// Col. 4 = exp result
	2319
	2320	"[abc]; [abc]", NULL, NULL, NULL, // multiple filters
	2321	"Latin-Greek; [abc];", NULL, NULL, NULL, // misplaced filter
	2322	"[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\\u0392c",
	2323	"[b]; (Lower); Latin-Greek; Upper(); ([\\u0392])", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
	2324	"#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\\u0392c",
	2325	"#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\\u0392]);", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
	2326	NULL,
	2327	};
	2328
	2329	for (int32_t i=0; DATA[i]; i+=4) {
	2330	UnicodeString id = CharsToUnicodeString(DATA[i]);
	2331	UTransDirection direction = (DATA[i+1] != NULL && DATA[i+1][0] == 'R') ?
	2332	UTRANS_REVERSE : UTRANS_FORWARD;
	2333	UnicodeString source;
	2334	UnicodeString exp;
	2335	if (DATA[i+2] != NULL) {
	2336	source = CharsToUnicodeString(DATA[i+2]);
	2337	exp = CharsToUnicodeString(DATA[i+3]);
	2338	}
	2339	UBool expOk = (DATA[i+1] != NULL);
	2340	Transliterator* t = NULL;
	2341	UParseError pe;
	2342	UErrorCode ec = U_ZERO_ERROR;
	2343	if (id.charAt(0) == 0x23/#/) {
	2344	t = Transliterator::createFromRules("ID", id, direction, pe, ec);
	2345	} else {
	2346	t = Transliterator::createInstance(id, direction, pe, ec);
	2347	}
	2348	UBool ok = (t != NULL && U_SUCCESS(ec));
	2349	UnicodeString transID;
	2350	if (t!=0) {
	2351	transID = t->getID();
	2352	}
	2353	else {
	2354	transID = UnicodeString("NULL", "");
	2355	}
	2356	if (ok == expOk) {
	2357	logln((UnicodeString)"Ok: " + id + " => " + transID + ", " +
	2358	u_errorName(ec));
	2359	if (source.length() != 0) {
	2360	expect(*t, source, exp);
	2361	}
	2362	delete t;
	2363	} else {
	2364	dataerrln((UnicodeString)"FAIL: " + id + " => " + transID + ", " +
	2365	u_errorName(ec));
	2366	}
	2367	}
	2368	}
	2369
	2370	/**
	2371	* Test new property set syntax
	2372	*/
	2373	void TransliteratorTest::TestPropertySet() {
	2374	expect(UNICODE_STRING_SIMPLE("a>A; \\p{Lu}>x; \\p{ANY}>y;"), "abcDEF", "Ayyxxx");
	2375	expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9",
	2376	"[ a stitch ]\n[ in time ]\r[ saves 9]");
	2377	}
	2378
	2379	/**
	2380	* Test various failure points of the new 2.0 engine.
	2381	*/
	2382	void TransliteratorTest::TestNewEngine() {
	2383	UParseError pe;
	2384	UErrorCode ec = U_ZERO_ERROR;
	2385	Transliterator *t = Transliterator::createInstance("Latin-Hiragana", UTRANS_FORWARD, pe, ec);
	2386	if (t == 0 \|\| U_FAILURE(ec)) {
	2387	dataerrln("FAIL: createInstance Latin-Hiragana - %s", u_errorName(ec));
	2388	return;
	2389	}
	2390	// Katakana should be untouched
	2391	expect(*t, CharsToUnicodeString("a\\u3042\\u30A2"),
	2392	CharsToUnicodeString("\\u3042\\u3042\\u30A2"));
	2393
	2394	delete t;
	2395
	2396	#if 1
	2397	// This test will only work if Transliterator.ROLLBACK is
	2398	// true. Otherwise, this test will fail, revealing a
	2399	// limitation of global filters in incremental mode.
	2400	Transliterator *a =
	2401	Transliterator::createFromRules("a_to_A", "a > A;", UTRANS_FORWARD, pe, ec);
	2402	Transliterator *A =
	2403	Transliterator::createFromRules("A_to_b", "A > b;", UTRANS_FORWARD, pe, ec);
	2404	if (U_FAILURE(ec)) {
	2405	delete a;
	2406	delete A;
	2407	return;
	2408	}
	2409
	2410	Transliterator* array[3];
	2411	array[0] = a;
	2412	array[1] = Transliterator::createInstance("NFD", UTRANS_FORWARD, pe, ec);
	2413	array[2] = A;
	2414	if (U_FAILURE(ec)) {
	2415	errln("FAIL: createInstance NFD");
	2416	delete a;
	2417	delete A;
	2418	delete array[1];
	2419	return;
	2420	}
	2421
	2422	t = new CompoundTransliterator(array, 3, new UnicodeSet("[:Ll:]", ec));
	2423	if (U_FAILURE(ec)) {
	2424	errln("FAIL: UnicodeSet constructor");
	2425	delete a;
	2426	delete A;
	2427	delete array[1];
	2428	delete t;
	2429	return;
	2430	}
	2431
	2432	expect(*t, "aAaA", "bAbA");
	2433
	2434	assertTrue("countElements", t->countElements() == 3);
	2435	assertEquals("getElement(0)", t->getElement(0, ec).getID(), "a_to_A");
	2436	assertEquals("getElement(1)", t->getElement(1, ec).getID(), "NFD");
	2437	assertEquals("getElement(2)", t->getElement(2, ec).getID(), "A_to_b");
	2438	assertSuccess("getElement", ec);
	2439
	2440	delete a;
	2441	delete A;
	2442	delete array[1];
	2443	delete t;
	2444	#endif
	2445
	2446	expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > \| $1 $smooth ;",
	2447	"a",
	2448	"ax");
	2449
	2450	UnicodeString gr = CharsToUnicodeString(
	2451	"$ddot = \\u0308 ;"
	2452	"$lcgvowel = [\\u03b1\\u03b5\\u03b7\\u03b9\\u03bf\\u03c5\\u03c9] ;"
	2453	"$rough = \\u0314 ;"
	2454	"($lcgvowel+ $ddot?) $rough > h \| $1 ;"
	2455	"\\u03b1 <> a ;"
	2456	"$rough <> h ;");
	2457
	2458	expect(gr, CharsToUnicodeString("\\u03B1\\u0314"), "ha");
	2459	}
	2460
	2461	/**
	2462	* Test quantified segment behavior. We want:
	2463	* ([abc])+ > x $1 x; applied to "cba" produces "xax"
	2464	*/
	2465	void TransliteratorTest::TestQuantifiedSegment(void) {
	2466	// The normal case
	2467	expect("([abc]+) > x $1 x;", "cba", "xcbax");
	2468
	2469	// The tricky case; the quantifier is around the segment
	2470	expect("([abc])+ > x $1 x;", "cba", "xax");
	2471
	2472	// Tricky case in reverse direction
	2473	expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax");
	2474
	2475	// Check post-context segment
	2476	expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba");
	2477
	2478	// Test toRule/toPattern for non-quantified segment.
	2479	// Careful with spacing here.
	2480	UnicodeString r("([a-c]){q} > x $1 x;");
	2481	UParseError pe;
	2482	UErrorCode ec = U_ZERO_ERROR;
	2483	Transliterator* t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
	2484	if (U_FAILURE(ec)) {
	2485	errln("FAIL: createFromRules");
	2486	delete t;
	2487	return;
	2488	}
	2489	UnicodeString rr;
	2490	t->toRules(rr, TRUE);
	2491	if (r != rr) {
	2492	errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
	2493	} else {
	2494	logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
	2495	}
	2496	delete t;
	2497
	2498	// Test toRule/toPattern for quantified segment.
	2499	// Careful with spacing here.
	2500	r = "([a-c])+{q} > x $1 x;";
	2501	t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
	2502	if (U_FAILURE(ec)) {
	2503	errln("FAIL: createFromRules");
	2504	delete t;
	2505	return;
	2506	}
	2507	t->toRules(rr, TRUE);
	2508	if (r != rr) {
	2509	errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
	2510	} else {
	2511	logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
	2512	}
	2513	delete t;
	2514	}
	2515
	2516	//======================================================================
	2517	// Ram's tests
	2518	//======================================================================
	2519	void TransliteratorTest::TestDevanagariLatinRT(){
	2520	const int MAX_LEN= 52;
	2521	const char* const source[MAX_LEN] = {
	2522	"bh\\u0101rata",
	2523	"kra",
	2524	"k\\u1E63a",
	2525	"khra",
	2526	"gra",
	2527	"\\u1E45ra",
	2528	"cra",
	2529	"chra",
	2530	"j\\u00F1a",
	2531	"jhra",
	2532	"\\u00F1ra",
	2533	"\\u1E6Dya",
	2534	"\\u1E6Dhra",
	2535	"\\u1E0Dya",
	2536	//"r\\u0323ya", // \u095c is not valid in Devanagari
	2537	"\\u1E0Dhya",
	2538	"\\u1E5Bhra",
	2539	"\\u1E47ra",
	2540	"tta",
	2541	"thra",
	2542	"dda",
	2543	"dhra",
	2544	"nna",
	2545	"pra",
	2546	"phra",
	2547	"bra",
	2548	"bhra",
	2549	"mra",
	2550	"\\u1E49ra",
	2551	//"l\\u0331ra",
	2552	"yra",
	2553	"\\u1E8Fra",
	2554	//"l-",
	2555	"vra",
	2556	"\\u015Bra",
	2557	"\\u1E63ra",
	2558	"sra",
	2559	"hma",
	2560	"\\u1E6D\\u1E6Da",
	2561	"\\u1E6D\\u1E6Dha",
	2562	"\\u1E6Dh\\u1E6Dha",
	2563	"\\u1E0D\\u1E0Da",
	2564	"\\u1E0D\\u1E0Dha",
	2565	"\\u1E6Dya",
	2566	"\\u1E6Dhya",
	2567	"\\u1E0Dya",
	2568	"\\u1E0Dhya",
	2569	// Not roundtrippable --
	2570	// \\u0939\\u094d\\u094d\\u092E - hma
	2571	// \\u0939\\u094d\\u092E - hma
	2572	// CharsToUnicodeString("hma"),
	2573	"hya",
	2574	"\\u015Br\\u0325",
	2575	"\\u015Bca",
	2576	"\\u0115",
	2577	"san\\u0304j\\u012Bb s\\u0113nagupta",
	2578	"\\u0101nand vaddir\\u0101ju",
	2579	"\\u0101",
	2580	"a"
	2581	};
	2582	const char* const expected[MAX_LEN] = {
	2583	"\\u092D\\u093E\\u0930\\u0924", /* bha\\u0304rata */
	2584	"\\u0915\\u094D\\u0930", /* kra */
	2585	"\\u0915\\u094D\\u0937", /* ks\\u0323a */
	2586	"\\u0916\\u094D\\u0930", /* khra */
	2587	"\\u0917\\u094D\\u0930", /* gra */
	2588	"\\u0919\\u094D\\u0930", /* n\\u0307ra */
	2589	"\\u091A\\u094D\\u0930", /* cra */
	2590	"\\u091B\\u094D\\u0930", /* chra */
	2591	"\\u091C\\u094D\\u091E", /* jn\\u0303a */
	2592	"\\u091D\\u094D\\u0930", /* jhra */
	2593	"\\u091E\\u094D\\u0930", /* n\\u0303ra */
	2594	"\\u091F\\u094D\\u092F", /* t\\u0323ya */
	2595	"\\u0920\\u094D\\u0930", /* t\\u0323hra */
	2596	"\\u0921\\u094D\\u092F", /* d\\u0323ya */
	2597	//"\\u095C\\u094D\\u092F", /* r\\u0323ya */ // \u095c is not valid in Devanagari
	2598	"\\u0922\\u094D\\u092F", /* d\\u0323hya */
	2599	"\\u0922\\u093C\\u094D\\u0930", /* r\\u0323hra */
	2600	"\\u0923\\u094D\\u0930", /* n\\u0323ra */
	2601	"\\u0924\\u094D\\u0924", /* tta */
	2602	"\\u0925\\u094D\\u0930", /* thra */
	2603	"\\u0926\\u094D\\u0926", /* dda */
	2604	"\\u0927\\u094D\\u0930", /* dhra */
	2605	"\\u0928\\u094D\\u0928", /* nna */
	2606	"\\u092A\\u094D\\u0930", /* pra */
	2607	"\\u092B\\u094D\\u0930", /* phra */
	2608	"\\u092C\\u094D\\u0930", /* bra */
	2609	"\\u092D\\u094D\\u0930", /* bhra */
	2610	"\\u092E\\u094D\\u0930", /* mra */
	2611	"\\u0929\\u094D\\u0930", /* n\\u0331ra */
	2612	//"\\u0934\\u094D\\u0930", /* l\\u0331ra */
	2613	"\\u092F\\u094D\\u0930", /* yra */
	2614	"\\u092F\\u093C\\u094D\\u0930", /* y\\u0307ra */
	2615	//"l-",
	2616	"\\u0935\\u094D\\u0930", /* vra */
	2617	"\\u0936\\u094D\\u0930", /* s\\u0301ra */
	2618	"\\u0937\\u094D\\u0930", /* s\\u0323ra */
	2619	"\\u0938\\u094D\\u0930", /* sra */
	2620	"\\u0939\\u094d\\u092E", /* hma */
	2621	"\\u091F\\u094D\\u091F", /* t\\u0323t\\u0323a */
	2622	"\\u091F\\u094D\\u0920", /* t\\u0323t\\u0323ha */
	2623	"\\u0920\\u094D\\u0920", /* t\\u0323ht\\u0323ha*/
	2624	"\\u0921\\u094D\\u0921", /* d\\u0323d\\u0323a */
	2625	"\\u0921\\u094D\\u0922", /* d\\u0323d\\u0323ha */
	2626	"\\u091F\\u094D\\u092F", /* t\\u0323ya */
	2627	"\\u0920\\u094D\\u092F", /* t\\u0323hya */
	2628	"\\u0921\\u094D\\u092F", /* d\\u0323ya */
	2629	"\\u0922\\u094D\\u092F", /* d\\u0323hya */
	2630	// "hma", /* hma */
	2631	"\\u0939\\u094D\\u092F", /* hya */
	2632	"\\u0936\\u0943", /* s\\u0301r\\u0325a */
	2633	"\\u0936\\u094D\\u091A", /* s\\u0301ca */
	2634	"\\u090d", /* e\\u0306 */
	2635	"\\u0938\\u0902\\u091C\\u0940\\u092C\\u094D \\u0938\\u0947\\u0928\\u0917\\u0941\\u092A\\u094D\\u0924",
	2636	"\\u0906\\u0928\\u0902\\u0926\\u094D \\u0935\\u0926\\u094D\\u0926\\u093F\\u0930\\u093E\\u091C\\u0941",
	2637	"\\u0906",
	2638	"\\u0905",
	2639	};
	2640	UErrorCode status = U_ZERO_ERROR;
	2641	UParseError parseError;
	2642	UnicodeString message;
	2643	Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
	2644	Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
	2645	if(U_FAILURE(status)){
	2646	dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
	2647	dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
	2648	return;
	2649	}
	2650	UnicodeString gotResult;
	2651	for(int i= 0; i<MAX_LEN; i++){
	2652	gotResult = source[i];
	2653	expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
	2654	expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
	2655	}
	2656	delete latinToDev;
	2657	delete devToLatin;
	2658	}
	2659
	2660	void TransliteratorTest::TestTeluguLatinRT(){
	2661	const int MAX_LEN=10;
	2662	const char* const source[MAX_LEN] = {
	2663	"raghur\\u0101m vi\\u015Bvan\\u0101dha", /* Raghuram Viswanadha */
	2664	"\\u0101nand vaddir\\u0101ju", /* Anand Vaddiraju */
	2665	"r\\u0101j\\u012Bv ka\\u015Barab\\u0101da", /* Rajeev Kasarabada */
	2666	"san\\u0304j\\u012Bv ka\\u015Barab\\u0101da", /* sanjeev kasarabada */
	2667	"san\\u0304j\\u012Bb sen'gupta", /* sanjib sengupata */
	2668	"amar\\u0113ndra hanum\\u0101nula", /* Amarendra hanumanula */
	2669	"ravi kum\\u0101r vi\\u015Bvan\\u0101dha", /* Ravi Kumar Viswanadha */
	2670	"\\u0101ditya kandr\\u0113gula", /* Aditya Kandregula */
	2671	"\\u015Br\\u012Bdhar ka\\u1E47\\u1E6Dama\\u015Be\\u1E6D\\u1E6Di",/* Shridhar Kantamsetty */
	2672	"m\\u0101dhav de\\u015Be\\u1E6D\\u1E6Di" /* Madhav Desetty */
	2673	};
	2674
	2675	const char* const expected[MAX_LEN] = {
	2676	"\\u0c30\\u0c18\\u0c41\\u0c30\\u0c3e\\u0c2e\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
	2677	"\\u0c06\\u0c28\\u0c02\\u0c26\\u0c4d \\u0C35\\u0C26\\u0C4D\\u0C26\\u0C3F\\u0C30\\u0C3E\\u0C1C\\u0C41",
	2678	"\\u0c30\\u0c3e\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
	2679	"\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
	2680	"\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c2c\\u0c4d \\u0c38\\u0c46\\u0c28\\u0c4d\\u0c17\\u0c41\\u0c2a\\u0c4d\\u0c24",
	2681	"\\u0c05\\u0c2e\\u0c30\\u0c47\\u0c02\\u0c26\\u0c4d\\u0c30 \\u0c39\\u0c28\\u0c41\\u0c2e\\u0c3e\\u0c28\\u0c41\\u0c32",
	2682	"\\u0c30\\u0c35\\u0c3f \\u0c15\\u0c41\\u0c2e\\u0c3e\\u0c30\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
	2683	"\\u0c06\\u0c26\\u0c3f\\u0c24\\u0c4d\\u0c2f \\u0C15\\u0C02\\u0C26\\u0C4D\\u0C30\\u0C47\\u0C17\\u0C41\\u0c32",
	2684	"\\u0c36\\u0c4d\\u0c30\\u0c40\\u0C27\\u0C30\\u0C4D \\u0c15\\u0c02\\u0c1f\\u0c2e\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
	2685	"\\u0c2e\\u0c3e\\u0c27\\u0c35\\u0c4d \\u0c26\\u0c46\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
	2686	};
	2687
	2688	UErrorCode status = U_ZERO_ERROR;
	2689	UParseError parseError;
	2690	UnicodeString message;
	2691	Transliterator* latinToDev=Transliterator::createInstance("Latin-Telugu", UTRANS_FORWARD, parseError, status);
	2692	Transliterator* devToLatin=Transliterator::createInstance("Telugu-Latin", UTRANS_FORWARD, parseError, status);
	2693	if(U_FAILURE(status)){
	2694	dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
	2695	dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
	2696	return;
	2697	}
	2698	UnicodeString gotResult;
	2699	for(int i= 0; i<MAX_LEN; i++){
	2700	gotResult = source[i];
	2701	expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
	2702	expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
	2703	}
	2704	delete latinToDev;
	2705	delete devToLatin;
	2706	}
	2707
	2708	void TransliteratorTest::TestSanskritLatinRT(){
	2709	const int MAX_LEN =16;
	2710	const char* const source[MAX_LEN] = {
	2711	"rmk\\u1E63\\u0113t",
	2712	"\\u015Br\\u012Bmad",
	2713	"bhagavadg\\u012Bt\\u0101",
	2714	"adhy\\u0101ya",
	2715	"arjuna",
	2716	"vi\\u1E63\\u0101da",
	2717	"y\\u014Dga",
	2718	"dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
	2719	"uv\\u0101cr\\u0325",
	2720	"dharmak\\u1E63\\u0113tr\\u0113",
	2721	"kuruk\\u1E63\\u0113tr\\u0113",
	2722	"samav\\u0113t\\u0101",
	2723	"yuyutsava\\u1E25",
	2724	"m\\u0101mak\\u0101\\u1E25",
	2725	// "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
	2726	"kimakurvata",
	2727	"san\\u0304java",
	2728	};
	2729	const char* const expected[MAX_LEN] = {
	2730	"\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
	2731	"\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
	2732	"\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
	2733	"\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
	2734	"\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
	2735	"\\u0935\\u093f\\u0937\\u093e\\u0926",
	2736	"\\u092f\\u094b\\u0917",
	2737	"\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
	2738	"\\u0909\\u0935\\u093E\\u091A\\u0943",
	2739	"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
	2740	"\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
	2741	"\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
	2742	"\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
	2743	"\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
	2744	//"\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
	2745	"\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
	2746	"\\u0938\\u0902\\u091c\\u0935",
	2747	};
	2748	UErrorCode status = U_ZERO_ERROR;
	2749	UParseError parseError;
	2750	UnicodeString message;
	2751	Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
	2752	Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
	2753	if(U_FAILURE(status)){
	2754	dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
	2755	dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
	2756	return;
	2757	}
	2758	UnicodeString gotResult;
	2759	for(int i= 0; i<MAX_LEN; i++){
	2760	gotResult = source[i];
	2761	expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
	2762	expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
	2763	}
	2764	delete latinToDev;
	2765	delete devToLatin;
	2766	}
	2767
	2768
	2769	void TransliteratorTest::TestCompoundLatinRT(){
	2770	const char* const source[] = {
	2771	"rmk\\u1E63\\u0113t",
	2772	"\\u015Br\\u012Bmad",
	2773	"bhagavadg\\u012Bt\\u0101",
	2774	"adhy\\u0101ya",
	2775	"arjuna",
	2776	"vi\\u1E63\\u0101da",
	2777	"y\\u014Dga",
	2778	"dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
	2779	"uv\\u0101cr\\u0325",
	2780	"dharmak\\u1E63\\u0113tr\\u0113",
	2781	"kuruk\\u1E63\\u0113tr\\u0113",
	2782	"samav\\u0113t\\u0101",
	2783	"yuyutsava\\u1E25",
	2784	"m\\u0101mak\\u0101\\u1E25",
	2785	// "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
	2786	"kimakurvata",
	2787	"san\\u0304java"
	2788	};
	2789	const int MAX_LEN = sizeof(source)/sizeof(source[0]);
	2790	const char* const expected[MAX_LEN] = {
	2791	"\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
	2792	"\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
	2793	"\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
	2794	"\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
	2795	"\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
	2796	"\\u0935\\u093f\\u0937\\u093e\\u0926",
	2797	"\\u092f\\u094b\\u0917",
	2798	"\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
	2799	"\\u0909\\u0935\\u093E\\u091A\\u0943",
	2800	"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
	2801	"\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
	2802	"\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
	2803	"\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
	2804	"\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
	2805	// "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
	2806	"\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
	2807	"\\u0938\\u0902\\u091c\\u0935"
	2808	};
	2809	if(MAX_LEN != sizeof(expected)/sizeof(expected[0])) {
	2810	errln("error in TestCompoundLatinRT: source[] and expected[] have different lengths!");
	2811	return;
	2812	}
	2813
	2814	UErrorCode status = U_ZERO_ERROR;
	2815	UParseError parseError;
	2816	UnicodeString message;
	2817	Transliterator* devToLatinToDev =Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
	2818	Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
	2819	Transliterator* devToTelToDev =Transliterator::createInstance("Devanagari-Telugu;Telugu-Devanagari", UTRANS_FORWARD, parseError, status);
	2820	Transliterator* latinToTelToLatin=Transliterator::createInstance("Latin-Telugu;Telugu-Latin", UTRANS_FORWARD, parseError, status);
	2821
	2822	if(U_FAILURE(status)){
	2823	dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
	2824	dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
	2825	return;
	2826	}
	2827	UnicodeString gotResult;
	2828	for(int i= 0; i<MAX_LEN; i++){
	2829	gotResult = source[i];
	2830	expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
	2831	expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
	2832	expect(*latinToTelToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
	2833
	2834	}
	2835	delete(latinToDevToLatin);
	2836	delete(devToLatinToDev);
	2837	delete(devToTelToDev);
	2838	delete(latinToTelToLatin);
	2839	}
	2840
	2841	/**
	2842	* Test Gurmukhi-Devanagari Tippi and Bindi
	2843	*/
	2844	void TransliteratorTest::TestGurmukhiDevanagari(){
	2845	// the rule says:
	2846	// (\u0902) (when preceded by vowel) ---> (\u0A02)
	2847	// (\u0902) (when preceded by consonant) ---> (\u0A70)
	2848	UErrorCode status = U_ZERO_ERROR;
	2849	UnicodeSet vowel(UnicodeString("[\\u0905-\\u090A \\u090F\\u0910\\u0913\\u0914 \\u093e-\\u0942\\u0947\\u0948\\u094B\\u094C\\u094D]", -1, US_INV).unescape(), status);
	2850	UnicodeSet non_vowel(UnicodeString("[\\u0915-\\u0928\\u092A-\\u0930]", -1, US_INV).unescape(), status);
	2851	UParseError parseError;
	2852
	2853	UnicodeSetIterator vIter(vowel);
	2854	UnicodeSetIterator nvIter(non_vowel);
	2855	Transliterator* trans = Transliterator::createInstance("Devanagari-Gurmukhi",UTRANS_FORWARD, parseError, status);
	2856	if(U_FAILURE(status)) {
	2857	dataerrln("Error creating transliterator %s", u_errorName(status));
	2858	delete trans;
	2859	return;
	2860	}
	2861	UnicodeString src (" \\u0902", -1, US_INV);
	2862	UnicodeString expected(" \\u0A02", -1, US_INV);
	2863	src = src.unescape();
	2864	expected= expected.unescape();
	2865
	2866	while(vIter.next()){
	2867	src.setCharAt(0,(UChar) vIter.getCodepoint());
	2868	expected.setCharAt(0,(UChar) (vIter.getCodepoint()+0x0100));
	2869	expect(*trans,src,expected);
	2870	}
	2871
	2872	expected.setCharAt(1,0x0A70);
	2873	while(nvIter.next()){
	2874	//src.setCharAt(0,(char) nvIter.codepoint);
	2875	src.setCharAt(0,(UChar)nvIter.getCodepoint());
	2876	expected.setCharAt(0,(UChar) (nvIter.getCodepoint()+0x0100));
	2877	expect(*trans,src,expected);
	2878	}
	2879	delete trans;
	2880	}
	2881	/**
	2882	* Test instantiation from a locale.
	2883	*/
	2884	void TransliteratorTest::TestLocaleInstantiation(void) {
	2885	UParseError pe;
	2886	UErrorCode ec = U_ZERO_ERROR;
	2887	Transliterator *t = Transliterator::createInstance("ru_RU-Latin", UTRANS_FORWARD, pe, ec);
	2888	if (U_FAILURE(ec)) {
	2889	dataerrln("FAIL: createInstance(ru_RU-Latin) - %s", u_errorName(ec));
	2890	delete t;
	2891	return;
	2892	}
	2893	expect(*t, CharsToUnicodeString("\\u0430"), "a");
	2894	delete t;
	2895
	2896	t = Transliterator::createInstance("en-el", UTRANS_FORWARD, pe, ec);
	2897	if (U_FAILURE(ec)) {
	2898	errln("FAIL: createInstance(en-el)");
	2899	delete t;
	2900	return;
	2901	}
	2902	expect(*t, "a", CharsToUnicodeString("\\u03B1"));
	2903	delete t;
	2904	}
	2905
	2906	/**
	2907	* Test title case handling of accent (should ignore accents)
	2908	*/
	2909	void TransliteratorTest::TestTitleAccents(void) {
	2910	UParseError pe;
	2911	UErrorCode ec = U_ZERO_ERROR;
	2912	Transliterator *t = Transliterator::createInstance("Title", UTRANS_FORWARD, pe, ec);
	2913	if (U_FAILURE(ec)) {
	2914	errln("FAIL: createInstance(Title)");
	2915	delete t;
	2916	return;
	2917	}
	2918	expect(*t, CharsToUnicodeString("a\\u0300b can't abe"), CharsToUnicodeString("A\\u0300b Can't Abe"));
	2919	delete t;
	2920	}
	2921
	2922	/**
	2923	* Basic test of a locale resource based rule.
	2924	*/
	2925	void TransliteratorTest::TestLocaleResource() {
	2926	const char* DATA[] = {
	2927	// id from to
	2928	//"Latin-Greek/UNGEGN", "b", "\\u03bc\\u03c0",
	2929	"Latin-el", "b", "\\u03bc\\u03c0",
	2930	"Latin-Greek", "b", "\\u03B2",
	2931	"Greek-Latin/UNGEGN", "\\u03B2", "v",
	2932	"el-Latin", "\\u03B2", "v",
	2933	"Greek-Latin", "\\u03B2", "b",
	2934	};
	2935	const int32_t DATA_length = sizeof(DATA) / sizeof(DATA[0]);
	2936	for (int32_t i=0; i<DATA_length; i+=3) {
	2937	UParseError pe;
	2938	UErrorCode ec = U_ZERO_ERROR;
	2939	Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, pe, ec);
	2940	if (U_FAILURE(ec)) {
	2941	dataerrln((UnicodeString)"FAIL: createInstance(" + DATA[i] + ") - " + u_errorName(ec));
	2942	delete t;
	2943	continue;
	2944	}
	2945	expect(*t, CharsToUnicodeString(DATA[i+1]),
	2946	CharsToUnicodeString(DATA[i+2]));
	2947	delete t;
	2948	}
	2949	}
	2950
	2951	/**
	2952	* Make sure parse errors reference the right line.
	2953	*/
	2954	void TransliteratorTest::TestParseError() {
	2955	static const char* rule =
	2956	"a > b;\n"
	2957	"# more stuff\n"
	2958	"d << b;";
	2959	UErrorCode ec = U_ZERO_ERROR;
	2960	UParseError pe;
	2961	Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
	2962	delete t;
	2963	if (U_FAILURE(ec)) {
	2964	UnicodeString err(pe.preContext);
	2965	err.append((UChar)124/\|/).append(pe.postContext);
	2966	if (err.indexOf("d << b") >= 0) {
	2967	logln("Ok: " + err);
	2968	} else {
	2969	errln("FAIL: " + err);
	2970	}
	2971	}
	2972	else {
	2973	errln("FAIL: no syntax error");
	2974	}
	2975	static const char* maskingRule =
	2976	"a>x;\n"
	2977	"# more stuff\n"
	2978	"ab>y;";
	2979	ec = U_ZERO_ERROR;
	2980	delete Transliterator::createFromRules("ID", maskingRule, UTRANS_FORWARD, pe, ec);
	2981	if (ec != U_RULE_MASK_ERROR) {
	2982	errln("FAIL: returned %s instead of U_RULE_MASK_ERROR", u_errorName(ec));
	2983	}
	2984	else if (UnicodeString("a > x;") != UnicodeString(pe.preContext)) {
	2985	errln("FAIL: did not get expected precontext");
	2986	}
	2987	else if (UnicodeString("ab > y;") != UnicodeString(pe.postContext)) {
	2988	errln("FAIL: did not get expected postcontext");
	2989	}
	2990	}
	2991
	2992	/**
	2993	* Make sure sets on output are disallowed.
	2994	*/
	2995	void TransliteratorTest::TestOutputSet() {
	2996	UnicodeString rule = "$set = [a-cm-n]; b > $set;";
	2997	UErrorCode ec = U_ZERO_ERROR;
	2998	UParseError pe;
	2999	Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
	3000	delete t;
	3001	if (U_FAILURE(ec)) {
	3002	UnicodeString err(pe.preContext);
	3003	err.append((UChar)124/\|/).append(pe.postContext);
	3004	logln("Ok: " + err);
	3005	return;
	3006	}
	3007	errln("FAIL: No syntax error");
	3008	}
	3009
	3010	/**
	3011	* Test the use variable range pragma, making sure that use of
	3012	* variable range characters is detected and flagged as an error.
	3013	*/
	3014	void TransliteratorTest::TestVariableRange() {
	3015	UnicodeString rule = "use variable range 0x70 0x72; a > A; b > B; q > Q;";
	3016	UErrorCode ec = U_ZERO_ERROR;
	3017	UParseError pe;
	3018	Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
	3019	delete t;
	3020	if (U_FAILURE(ec)) {
	3021	UnicodeString err(pe.preContext);
	3022	err.append((UChar)124/\|/).append(pe.postContext);
	3023	logln("Ok: " + err);
	3024	return;
	3025	}
	3026	errln("FAIL: No syntax error");
	3027	}
	3028
	3029	/**
	3030	* Test invalid post context error handling
	3031	*/
	3032	void TransliteratorTest::TestInvalidPostContext() {
	3033	UnicodeString rule = "a}b{c>d;";
	3034	UErrorCode ec = U_ZERO_ERROR;
	3035	UParseError pe;
	3036	Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
	3037	delete t;
	3038	if (U_FAILURE(ec)) {
	3039	UnicodeString err(pe.preContext);
	3040	err.append((UChar)124/\|/).append(pe.postContext);
	3041	if (err.indexOf("a}b{c") >= 0) {
	3042	logln("Ok: " + err);
	3043	} else {
	3044	errln("FAIL: " + err);
	3045	}
	3046	return;
	3047	}
	3048	errln("FAIL: No syntax error");
	3049	}
	3050
	3051	/**
	3052	* Test ID form variants
	3053	*/
	3054	void TransliteratorTest::TestIDForms() {
	3055	const char* DATA[] = {
	3056	"NFC", NULL, "NFD",
	3057	"nfd", NULL, "NFC", // make sure case is ignored
	3058	"Any-NFKD", NULL, "Any-NFKC",
	3059	"Null", NULL, "Null",
	3060	"-nfkc", "nfkc", "NFKD",
	3061	"-nfkc/", "nfkc", "NFKD",
	3062	"Latin-Greek/UNGEGN", NULL, "Greek-Latin/UNGEGN",
	3063	"Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN",
	3064	"Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali",
	3065	"Source-", NULL, NULL,
	3066	"Source/Variant-", NULL, NULL,
	3067	"Source-/Variant", NULL, NULL,
	3068	"/Variant", NULL, NULL,
	3069	"/Variant-", NULL, NULL,
	3070	"-/Variant", NULL, NULL,
	3071	"-/", NULL, NULL,
	3072	"-", NULL, NULL,
	3073	"/", NULL, NULL,
	3074	};
	3075	const int32_t DATA_length = sizeof(DATA)/sizeof(DATA[0]);
	3076
	3077	for (int32_t i=0; i<DATA_length; i+=3) {
	3078	const char* ID = DATA[i];
	3079	const char* expID = DATA[i+1];
	3080	const char* expInvID = DATA[i+2];
	3081	UBool expValid = (expInvID != NULL);
	3082	if (expID == NULL) {
	3083	expID = ID;
	3084	}
	3085	UParseError pe;
	3086	UErrorCode ec = U_ZERO_ERROR;
	3087	Transliterator *t =
	3088	Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
	3089	if (U_FAILURE(ec)) {
	3090	if (!expValid) {
	3091	logln((UnicodeString)"Ok: getInstance(" + ID +") => " + u_errorName(ec));
	3092	} else {
	3093	dataerrln((UnicodeString)"FAIL: Couldn't create " + ID + " - " + u_errorName(ec));
	3094	}
	3095	delete t;
	3096	continue;
	3097	}
	3098	Transliterator *u = t->createInverse(ec);
	3099	if (U_FAILURE(ec)) {
	3100	errln((UnicodeString)"FAIL: Couldn't create inverse of " + ID);
	3101	delete t;
	3102	delete u;
	3103	continue;
	3104	}
	3105	if (t->getID() == expID &&
	3106	u->getID() == expInvID) {
	3107	logln((UnicodeString)"Ok: " + ID + ".getInverse() => " + expInvID);
	3108	} else {
	3109	errln((UnicodeString)"FAIL: getInstance(" + ID + ") => " +
	3110	t->getID() + " x getInverse() => " + u->getID() +
	3111	", expected " + expInvID);
	3112	}
	3113	delete t;
	3114	delete u;
	3115	}
	3116	}
	3117
	3118	static const UChar SPACE[] = {32,0};
	3119	static const UChar NEWLINE[] = {10,0};
	3120	static const UChar RETURN[] = {13,0};
	3121	static const UChar EMPTY[] = {0};
	3122
	3123	void TransliteratorTest::checkRules(const UnicodeString& label, Transliterator& t2,
	3124	const UnicodeString& testRulesForward) {
	3125	UnicodeString rules2; t2.toRules(rules2, TRUE);
	3126	//rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
	3127	rules2.findAndReplace(SPACE, EMPTY);
	3128	rules2.findAndReplace(NEWLINE, EMPTY);
	3129	rules2.findAndReplace(RETURN, EMPTY);
	3130
	3131	UnicodeString testRules(testRulesForward); testRules.findAndReplace(SPACE, EMPTY);
	3132
	3133	if (rules2 != testRules) {
	3134	errln(label);
	3135	logln((UnicodeString)"GENERATED RULES: " + rules2);
	3136	logln((UnicodeString)"SHOULD BE: " + testRulesForward);
	3137	}
	3138	}
	3139
	3140	/**
	3141	* Mark's toRules test.
	3142	*/
	3143	void TransliteratorTest::TestToRulesMark() {
	3144	const char* testRules =
	3145	"::[[:Latin:][:Mark:]];"
	3146	"::NFKD (NFC);"
	3147	"::Lower (Lower);"
	3148	"a <> \\u03B1;" // alpha
	3149	"::NFKC (NFD);"
	3150	"::Upper (Lower);"
	3151	"::Lower ();"
	3152	"::([[:Greek:][:Mark:]]);"
	3153	;
	3154	const char* testRulesForward =
	3155	"::[[:Latin:][:Mark:]];"
	3156	"::NFKD(NFC);"
	3157	"::Lower(Lower);"
	3158	"a > \\u03B1;"
	3159	"::NFKC(NFD);"
	3160	"::Upper (Lower);"
	3161	"::Lower ();"
	3162	;
	3163	const char* testRulesBackward =
	3164	"::[[:Greek:][:Mark:]];"
	3165	"::Lower (Upper);"
	3166	"::NFD(NFKC);"
	3167	"\\u03B1 > a;"
	3168	"::Lower(Lower);"
	3169	"::NFC(NFKD);"
	3170	;
	3171	UnicodeString source = CharsToUnicodeString("\\u00E1"); // a-acute
	3172	UnicodeString target = CharsToUnicodeString("\\u03AC"); // alpha-acute
	3173
	3174	UParseError pe;
	3175	UErrorCode ec = U_ZERO_ERROR;
	3176	Transliterator *t2 = Transliterator::createFromRules("source-target", UnicodeString(testRules, -1, US_INV), UTRANS_FORWARD, pe, ec);
	3177	Transliterator *t3 = Transliterator::createFromRules("target-source", UnicodeString(testRules, -1, US_INV), UTRANS_REVERSE, pe, ec);
	3178
	3179	if (U_FAILURE(ec)) {
	3180	delete t2;
	3181	delete t3;
	3182	dataerrln((UnicodeString)"FAIL: createFromRules => " + u_errorName(ec));
	3183	return;
	3184	}
	3185
	3186	expect(*t2, source, target);
	3187	expect(*t3, target, source);
	3188
	3189	checkRules("Failed toRules FORWARD", *t2, UnicodeString(testRulesForward, -1, US_INV));
	3190	checkRules("Failed toRules BACKWARD", *t3, UnicodeString(testRulesBackward, -1, US_INV));
	3191
	3192	delete t2;
	3193	delete t3;
	3194	}
	3195
	3196	/**
	3197	* Test Escape and Unescape transliterators.
	3198	*/
	3199	void TransliteratorTest::TestEscape() {
	3200	UParseError pe;
	3201	UErrorCode ec;
	3202	Transliterator *t;
	3203
	3204	ec = U_ZERO_ERROR;
	3205	t = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, pe, ec);
	3206	if (U_FAILURE(ec)) {
	3207	errln((UnicodeString)"FAIL: createInstance");
	3208	} else {
	3209	expect(*t,
	3210	UNICODE_STRING_SIMPLE("\\x{40}\\U000000312Q"),
	3211	"@12Q");
	3212	}
	3213	delete t;
	3214
	3215	ec = U_ZERO_ERROR;
	3216	t = Transliterator::createInstance("Any-Hex/C", UTRANS_FORWARD, pe, ec);
	3217	if (U_FAILURE(ec)) {
	3218	errln((UnicodeString)"FAIL: createInstance");
	3219	} else {
	3220	expect(*t,
	3221	CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
	3222	UNICODE_STRING_SIMPLE("\\u0041\\U0010BEEF\\uFEED"));
	3223	}
	3224	delete t;
	3225
	3226	ec = U_ZERO_ERROR;
	3227	t = Transliterator::createInstance("Any-Hex/Java", UTRANS_FORWARD, pe, ec);
	3228	if (U_FAILURE(ec)) {
	3229	errln((UnicodeString)"FAIL: createInstance");
	3230	} else {
	3231	expect(*t,
	3232	CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
	3233	UNICODE_STRING_SIMPLE("\\u0041\\uDBEF\\uDEEF\\uFEED"));
	3234	}
	3235	delete t;
	3236
	3237	ec = U_ZERO_ERROR;
	3238	t = Transliterator::createInstance("Any-Hex/Perl", UTRANS_FORWARD, pe, ec);
	3239	if (U_FAILURE(ec)) {
	3240	errln((UnicodeString)"FAIL: createInstance");
	3241	} else {
	3242	expect(*t,
	3243	CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
	3244	UNICODE_STRING_SIMPLE("\\x{41}\\x{10BEEF}\\x{FEED}"));
	3245	}
	3246	delete t;
	3247	}
	3248
	3249
	3250	void TransliteratorTest::TestAnchorMasking(){
	3251	UnicodeString rule ("^a > Q; a > q;");
	3252	UErrorCode status= U_ZERO_ERROR;
	3253	UParseError parseError;
	3254
	3255	Transliterator* t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD,parseError,status);
	3256	if(U_FAILURE(status)){
	3257	errln(UnicodeString("FAIL: ") + "ID" +
	3258	".createFromRules() => bad rules" +
	3259	/", parse error " + parseError.code +/
	3260	", line " + parseError.line +
	3261	", offset " + parseError.offset +
	3262	", context " + prettify(parseError.preContext, TRUE) +
	3263	", rules: " + prettify(rule, TRUE));
	3264	}
	3265	delete t;
	3266	}
	3267
	3268	/**
	3269	* Make sure display names of variants look reasonable.
	3270	*/
	3271	void TransliteratorTest::TestDisplayName() {
	3272	#if UCONFIG_NO_FORMATTING
	3273	logln("Skipping, UCONFIG_NO_FORMATTING is set\n");
	3274	return;
	3275	#else
	3276	static const char* DATA[] = {
	3277	// ID, forward name, reverse name
	3278	// Update the text as necessary -- the important thing is
	3279	// not the text itself, but how various cases are handled.
	3280
	3281	// Basic test
	3282	"Any-Hex", "Any to Hex Escape", "Hex Escape to Any",
	3283
	3284	// Variants
	3285	"Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl",
	3286
	3287	// Target-only IDs
	3288	"NFC", "Any to NFC", "Any to NFD",
	3289	};
	3290
	3291	int32_t DATA_length = sizeof(DATA) / sizeof(DATA[0]);
	3292
	3293	Locale US("en", "US");
	3294
	3295	for (int32_t i=0; i<DATA_length; i+=3) {
	3296	UnicodeString name;
	3297	Transliterator::getDisplayName(DATA[i], US, name);
	3298	if (name != DATA[i+1]) {
	3299	dataerrln((UnicodeString)"FAIL: " + DATA[i] + ".getDisplayName() => " +
	3300	name + ", expected " + DATA[i+1]);
	3301	} else {
	3302	logln((UnicodeString)"Ok: " + DATA[i] + ".getDisplayName() => " + name);
	3303	}
	3304	UErrorCode ec = U_ZERO_ERROR;
	3305	UParseError pe;
	3306	Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_REVERSE, pe, ec);
	3307	if (U_FAILURE(ec)) {
	3308	delete t;
	3309	dataerrln("FAIL: createInstance failed - %s", u_errorName(ec));
	3310	continue;
	3311	}
	3312	name = Transliterator::getDisplayName(t->getID(), US, name);
	3313	if (name != DATA[i+2]) {
	3314	dataerrln((UnicodeString)"FAIL: " + t->getID() + ".getDisplayName() => " +
	3315	name + ", expected " + DATA[i+2]);
	3316	} else {
	3317	logln((UnicodeString)"Ok: " + t->getID() + ".getDisplayName() => " + name);
	3318	}
	3319	delete t;
	3320	}
	3321	#endif
	3322	}
	3323
	3324	void TransliteratorTest::TestSpecialCases(void) {
	3325	const UnicodeString registerRules[] = {
	3326	"Any-Dev1", "x > X; y > Y;",
	3327	"Any-Dev2", "XY > Z",
	3328	"Greek-Latin/FAKE",
	3329	CharsToUnicodeString
	3330	("[^[:L:][:M:]] { \\u03bc\\u03c0 > b ; \\u03bc\\u03c0 } [^[:L:][:M:]] > b ; [^[:L:][:M:]] { [\\u039c\\u03bc][\\u03a0\\u03c0] > B ; [\\u039c\\u03bc][\\u03a0\\u03c0] } [^[:L:][:M:]] > B ;"),
	3331	"" // END MARKER
	3332	};
	3333
	3334	const UnicodeString testCases[] = {
	3335	// NORMALIZATION
	3336	// should add more test cases
	3337	"NFD" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
	3338	"NFC" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
	3339	"NFKD", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
	3340	"NFKC", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
	3341
	3342	// mp -> b BUG
	3343	"Greek-Latin/UNGEGN", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
	3344	"Greek-Latin/FAKE", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
	3345
	3346	// check for devanagari bug
	3347	"nfd;Dev1;Dev2;nfc", "xy", "Z",
	3348
	3349	// ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE
	3350	"Title", CharsToUnicodeString("ab'cD ffi\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
	3351	CharsToUnicodeString("Ab'cd Ffi\\u0131ii\\u0307 \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
	3352
	3353	//TODO: enable this test once Titlecase works right
	3354	/*
	3355	"Title", CharsToUnicodeString("\\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
	3356	CharsToUnicodeString("Ffi\\u0131ii \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
	3357	*/
	3358	"Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
	3359	CharsToUnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 ") + DESERET_DEE + DESERET_DEE,
	3360	"Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
	3361	CharsToUnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 ") + DESERET_dee + DESERET_dee,
	3362
	3363	"Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
	3364	"Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
	3365
	3366	// FORMS OF S
	3367	"Greek-Latin/UNGEGN", CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
	3368	CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
	3369	"Latin-Greek/UNGEGN", CharsToUnicodeString("s ss s\\u0331s\\u0331"),
	3370	CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3") ,
	3371	"Greek-Latin", CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
	3372	CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
	3373	"Latin-Greek", CharsToUnicodeString("s ss s\\u0331s\\u0331"),
	3374	CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
	3375	// Tatiana bug
	3376	// Upper: TAT\\u02B9\\u00C2NA
	3377	// Lower: tat\\u02B9\\u00E2na
	3378	// Title: Tat\\u02B9\\u00E2na
	3379	"Upper", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
	3380	CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
	3381	"Lower", CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
	3382	CharsToUnicodeString("tat\\u02B9\\u00E2na"),
	3383	"Title", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
	3384	CharsToUnicodeString("Tat\\u02B9\\u00E2na"),
	3385
	3386	"" // END MARKER
	3387	};
	3388
	3389	UParseError pos;
	3390	int32_t i;
	3391	for (i = 0; registerRules[i].length()!=0; i+=2) {
	3392	UErrorCode status = U_ZERO_ERROR;
	3393
	3394	Transliterator *t = Transliterator::createFromRules(registerRules[0+i],
	3395	registerRules[i+1], UTRANS_FORWARD, pos, status);
	3396	if (U_FAILURE(status)) {
	3397	dataerrln("Fails: Unable to create the transliterator from rules. - %s", u_errorName(status));
	3398	} else {
	3399	Transliterator::registerInstance(t);
	3400	}
	3401	}
	3402	for (i = 0; testCases[i].length()!=0; i+=3) {
	3403	UErrorCode ec = U_ZERO_ERROR;
	3404	UParseError pe;
	3405	const UnicodeString& name = testCases[i];
	3406	Transliterator *t = Transliterator::createInstance(name, UTRANS_FORWARD, pe, ec);
	3407	if (U_FAILURE(ec)) {
	3408	dataerrln((UnicodeString)"FAIL: Couldn't create " + name + " - " + u_errorName(ec));
	3409	delete t;
	3410	continue;
	3411	}
	3412	const UnicodeString& id = t->getID();
	3413	const UnicodeString& source = testCases[i+1];
	3414	UnicodeString target;
	3415
	3416	// Automatic generation of targets, to make it simpler to add test cases (and more fail-safe)
	3417
	3418	if (testCases[i+2].length() > 0) {
	3419	target = testCases[i+2];
	3420	} else if (0==id.caseCompare("NFD", U_FOLD_CASE_DEFAULT)) {
	3421	Normalizer::normalize(source, UNORM_NFD, 0, target, ec);
	3422	} else if (0==id.caseCompare("NFC", U_FOLD_CASE_DEFAULT)) {
	3423	Normalizer::normalize(source, UNORM_NFC, 0, target, ec);
	3424	} else if (0==id.caseCompare("NFKD", U_FOLD_CASE_DEFAULT)) {
	3425	Normalizer::normalize(source, UNORM_NFKD, 0, target, ec);
	3426	} else if (0==id.caseCompare("NFKC", U_FOLD_CASE_DEFAULT)) {
	3427	Normalizer::normalize(source, UNORM_NFKC, 0, target, ec);
	3428	} else if (0==id.caseCompare("Lower", U_FOLD_CASE_DEFAULT)) {
	3429	target = source;
	3430	target.toLower(Locale::getUS());
	3431	} else if (0==id.caseCompare("Upper", U_FOLD_CASE_DEFAULT)) {
	3432	target = source;
	3433	target.toUpper(Locale::getUS());
	3434	}
	3435	if (U_FAILURE(ec)) {
	3436	errln((UnicodeString)"FAIL: Internal error normalizing " + source);
	3437	continue;
	3438	}
	3439
	3440	expect(*t, source, target);
	3441	delete t;
	3442	}
	3443	for (i = 0; registerRules[i].length()!=0; i+=2) {
	3444	Transliterator::unregister(registerRules[i]);
	3445	}
	3446	}
	3447
	3448	char* Char32ToEscapedChars(UChar32 ch, char* buffer) {
	3449	if (ch <= 0xFFFF) {
	3450	sprintf(buffer, "\\u%04x", (int)ch);
	3451	} else {
	3452	sprintf(buffer, "\\U%08x", (int)ch);
	3453	}
	3454	return buffer;
	3455	}
	3456
	3457	void TransliteratorTest::TestSurrogateCasing (void) {
	3458	// check that casing handles surrogates
	3459	// titlecase is currently defective
	3460	char buffer[20];
	3461	UChar buffer2[20];
	3462	UChar32 dee;
	3463	UTF_GET_CHAR(DESERET_dee,0, 0, DESERET_dee.length(), dee);
	3464	UnicodeString DEE(u_totitle(dee));
	3465	if (DEE != DESERET_DEE) {
	3466	err("Fails titlecase of surrogates");
	3467	err(Char32ToEscapedChars(dee, buffer));
	3468	err(", ");
	3469	errln(Char32ToEscapedChars(DEE.char32At(0), buffer));
	3470	}
	3471
	3472	UnicodeString deeDEETest=DESERET_dee + DESERET_DEE;
	3473	UnicodeString deedeeTest = DESERET_dee + DESERET_dee;
	3474	UnicodeString DEEDEETest = DESERET_DEE + DESERET_DEE;
	3475	UErrorCode status= U_ZERO_ERROR;
	3476
	3477	u_strToUpper(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
	3478	if (U_FAILURE(status) \|\| (UnicodeString(buffer2)!= DEEDEETest)) {
	3479	errln("Fails: Can't uppercase surrogates.");
	3480	}
	3481
	3482	status= U_ZERO_ERROR;
	3483	u_strToLower(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
	3484	if (U_FAILURE(status) \|\| (UnicodeString(buffer2)!= deedeeTest)) {
	3485	errln("Fails: Can't lowercase surrogates.");
	3486	}
	3487	}
	3488
	3489	static void _trans(Transliterator& t, const UnicodeString& src,
	3490	UnicodeString& result) {
	3491	result = src;
	3492	t.transliterate(result);
	3493	}
	3494
	3495	static void _trans(const UnicodeString& id, const UnicodeString& src,
	3496	UnicodeString& result, UErrorCode ec) {
	3497	UParseError pe;
	3498	Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
	3499	if (U_SUCCESS(ec)) {
	3500	_trans(*t, src, result);
	3501	}
	3502	delete t;
	3503	}
	3504
	3505	static UnicodeString _findMatch(const UnicodeString& source,
	3506	const UnicodeString* pairs) {
	3507	UnicodeString empty;
	3508	for (int32_t i=0; pairs[i].length() > 0; i+=2) {
	3509	if (0==source.caseCompare(pairs[i], U_FOLD_CASE_DEFAULT)) {
	3510	return pairs[i+1];
	3511	}
	3512	}
	3513	return empty;
	3514	}
	3515
	3516	// Check to see that incremental gets at least part way through a reasonable string.
	3517
	3518	void TransliteratorTest::TestIncrementalProgress(void) {
	3519	UErrorCode ec = U_ZERO_ERROR;
	3520	UnicodeString latinTest = "The Quick Brown Fox.";
	3521	UnicodeString devaTest;
	3522	_trans("Latin-Devanagari", latinTest, devaTest, ec);
	3523	UnicodeString kataTest;
	3524	_trans("Latin-Katakana", latinTest, kataTest, ec);
	3525	if (U_FAILURE(ec)) {
	3526	errln("FAIL: Internal error");
	3527	return;
	3528	}
	3529	const UnicodeString tests[] = {
	3530	"Any", latinTest,
	3531	"Latin", latinTest,
	3532	"Halfwidth", latinTest,
	3533	"Devanagari", devaTest,
	3534	"Katakana", kataTest,
	3535	"" // END MARKER
	3536	};
	3537
	3538	UnicodeString test("The Quick Brown Fox Jumped Over The Lazy Dog.");
	3539	int32_t i = 0, j=0, k=0;
	3540	int32_t sources = Transliterator::countAvailableSources();
	3541	for (i = 0; i < sources; i++) {
	3542	UnicodeString source;
	3543	Transliterator::getAvailableSource(i, source);
	3544	UnicodeString test = _findMatch(source, tests);
	3545	if (test.length() == 0) {
	3546	logln((UnicodeString)"Skipping " + source + "-X");
	3547	continue;
	3548	}
	3549	int32_t targets = Transliterator::countAvailableTargets(source);
	3550	for (j = 0; j < targets; j++) {
	3551	UnicodeString target;
	3552	Transliterator::getAvailableTarget(j, source, target);
	3553	int32_t variants = Transliterator::countAvailableVariants(source, target);
	3554	for (k =0; k< variants; k++) {
	3555	UnicodeString variant;
	3556	UParseError err;
	3557	UErrorCode status = U_ZERO_ERROR;
	3558
	3559	Transliterator::getAvailableVariant(k, source, target, variant);
	3560	UnicodeString id = source + "-" + target + "/" + variant;
	3561
	3562	Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, err, status);
	3563	if (U_FAILURE(status)) {
	3564	dataerrln((UnicodeString)"FAIL: Could not create " + id);
	3565	delete t;
	3566	continue;
	3567	}
	3568	status = U_ZERO_ERROR;
	3569	CheckIncrementalAux(t, test);
	3570
	3571	UnicodeString rev;
	3572	_trans(*t, test, rev);
	3573	Transliterator *inv = t->createInverse(status);
	3574	if (U_FAILURE(status)) {
	3575	#if UCONFIG_NO_BREAK_ITERATION
	3576	// If UCONFIG_NO_BREAK_ITERATION is on, then only Thai should fail.
	3577	if (id.compare((UnicodeString)"Latin-Thai/") != 0)
	3578	#endif
	3579	errln((UnicodeString)"FAIL: Could not create inverse of " + id);
	3580
	3581	delete t;
	3582	delete inv;
	3583	continue;
	3584	}
	3585	CheckIncrementalAux(inv, rev);
	3586	delete t;
	3587	delete inv;
	3588	}
	3589	}
	3590	}
	3591	}
	3592
	3593	void TransliteratorTest::CheckIncrementalAux(const Transliterator* t,
	3594	const UnicodeString& input) {
	3595	UErrorCode ec = U_ZERO_ERROR;
	3596	UTransPosition pos;
	3597	UnicodeString test = input;
	3598
	3599	pos.contextStart = 0;
	3600	pos.contextLimit = input.length();
	3601	pos.start = 0;
	3602	pos.limit = input.length();
	3603
	3604	t->transliterate(test, pos, ec);
	3605	if (U_FAILURE(ec)) {
	3606	errln((UnicodeString)"FAIL: transliterate() error " + u_errorName(ec));
	3607	return;
	3608	}
	3609	UBool gotError = FALSE;
	3610
	3611	// we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X?
	3612
	3613	if (pos.start == 0 && pos.limit != 0 && t->getID() != "Hex-Any/Unicode") {
	3614	errln((UnicodeString)"No Progress, " +
	3615	t->getID() + ": " + formatInput(test, input, pos));
	3616	gotError = TRUE;
	3617	} else {
	3618	logln((UnicodeString)"PASS Progress, " +
	3619	t->getID() + ": " + formatInput(test, input, pos));
	3620	}
	3621	t->finishTransliteration(test, pos);
	3622	if (pos.start != pos.limit) {
	3623	errln((UnicodeString)"Incomplete, " +
	3624	t->getID() + ": " + formatInput(test, input, pos));
	3625	gotError = TRUE;
	3626	}
	3627	}
	3628
	3629	void TransliteratorTest::TestFunction() {
	3630	// Careful with spacing and ';' here: Phrase this exactly
	3631	// as toRules() is going to return it. If toRules() changes
	3632	// with regard to spacing or ';', then adjust this string.
	3633	UnicodeString rule =
	3634	"([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';";
	3635
	3636	UParseError pe;
	3637	UErrorCode ec = U_ZERO_ERROR;
	3638	Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
	3639	if (t == NULL) {
	3640	dataerrln("FAIL: createFromRules failed - %s", u_errorName(ec));
	3641	return;
	3642	}
	3643
	3644	UnicodeString r;
	3645	t->toRules(r, TRUE);
	3646	if (r == rule) {
	3647	logln((UnicodeString)"OK: toRules() => " + r);
	3648	} else {
	3649	errln((UnicodeString)"FAIL: toRules() => " + r +
	3650	", expected " + rule);
	3651	}
	3652
	3653	expect(*t, "The Quick Brown Fox",
	3654	UNICODE_STRING_SIMPLE("T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox"));
	3655
	3656	delete t;
	3657	}
	3658
	3659	void TransliteratorTest::TestInvalidBackRef(void) {
	3660	UnicodeString rule = ". > $1;";
	3661	UnicodeString rule2 =CharsToUnicodeString("(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\\u0020;");
	3662	UParseError pe;
	3663	UErrorCode ec = U_ZERO_ERROR;
	3664	Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
	3665	Transliterator *t2 = Transliterator::createFromRules("Test2", rule2, UTRANS_FORWARD, pe, ec);
	3666
	3667	if (t != NULL) {
	3668	errln("FAIL: createFromRules should have returned NULL");
	3669	delete t;
	3670	}
	3671
	3672	if (t2 != NULL) {
	3673	errln("FAIL: createFromRules should have returned NULL");
	3674	delete t2;
	3675	}
	3676
	3677	if (U_SUCCESS(ec)) {
	3678	errln("FAIL: Ok: . > $1; => no error");
	3679	} else {
	3680	logln((UnicodeString)"Ok: . > $1; => " + u_errorName(ec));
	3681	}
	3682	}
	3683
	3684	void TransliteratorTest::TestMulticharStringSet() {
	3685	// Basic testing
	3686	const char* rule =
	3687	" [{aa}] > x;"
	3688	" a > y;"
	3689	" [b{bc}] > z;"
	3690	"[{gd}] { e > q;"
	3691	" e } [{fg}] > r;" ;
	3692
	3693	UParseError pe;
	3694	UErrorCode ec = U_ZERO_ERROR;
	3695	Transliterator* t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
	3696	if (t == NULL \|\| U_FAILURE(ec)) {
	3697	delete t;
	3698	errln("FAIL: createFromRules failed");
	3699	return;
	3700	}
	3701
	3702	expect(*t, "a aa ab bc d gd de gde gdefg ddefg",
	3703	"y x yz z d gd de gdq gdqfg ddrfg");
	3704	delete t;
	3705
	3706	// Overlapped string test. Make sure that when multiple
	3707	// strings can match that the longest one is matched.
	3708	rule =
	3709	" [a {ab} {abc}] > x;"
	3710	" b > y;"
	3711	" c > z;"
	3712	" q [t {st} {rst}] { e > p;" ;
	3713
	3714	t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
	3715	if (t == NULL \|\| U_FAILURE(ec)) {
	3716	delete t;
	3717	errln("FAIL: createFromRules failed");
	3718	return;
	3719	}
	3720
	3721	expect(*t, "a ab abc qte qste qrste",
	3722	"x x x qtp qstp qrstp");
	3723	delete t;
	3724	}
	3725
	3726	// vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
	3727	// BEGIN TestUserFunction support factory
	3728
	3729	Transliterator* _TUFF[4];
	3730	UnicodeString* _TUFID[4];
	3731
	3732	static Transliterator* U_EXPORT2 _TUFFactory(const UnicodeString& /ID/,
	3733	Transliterator::Token context) {
	3734	return _TUFF[context.integer]->clone();
	3735	}
	3736
	3737	static void _TUFReg(const UnicodeString& ID, Transliterator* t, int32_t n) {
	3738	_TUFF[n] = t;
	3739	_TUFID[n] = new UnicodeString(ID);
	3740	Transliterator::registerFactory(ID, _TUFFactory, Transliterator::integerToken(n));
	3741	}
	3742
	3743	static void _TUFUnreg(int32_t n) {
	3744	if (_TUFF[n] != NULL) {
	3745	Transliterator::unregister(*_TUFID[n]);
	3746	delete _TUFF[n];
	3747	delete _TUFID[n];
	3748	}
	3749	}
	3750
	3751	// END TestUserFunction support factory
	3752	// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
	3753
	3754	/**
	3755	* Test that user-registered transliterators can be used under function
	3756	* syntax.
	3757	*/
	3758	void TransliteratorTest::TestUserFunction() {
	3759
	3760	Transliterator* t;
	3761	UParseError pe;
	3762	UErrorCode ec = U_ZERO_ERROR;
	3763
	3764	// Setup our factory
	3765	int32_t i;
	3766	for (i=0; i<4; ++i) {
	3767	_TUFF[i] = NULL;
	3768	}
	3769
	3770	// There's no need to register inverses if we don't use them
	3771	t = Transliterator::createFromRules("gif",
	3772	UNICODE_STRING_SIMPLE("'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';"),
	3773	UTRANS_FORWARD, pe, ec);
	3774	if (t == NULL \|\| U_FAILURE(ec)) {
	3775	dataerrln((UnicodeString)"FAIL: createFromRules gif " + u_errorName(ec));
	3776	return;
	3777	}
	3778	_TUFReg("Any-gif", t, 0);
	3779
	3780	t = Transliterator::createFromRules("RemoveCurly",
	3781	UNICODE_STRING_SIMPLE("[\\{\\}] > ; '\\N' > ;"),
	3782	UTRANS_FORWARD, pe, ec);
	3783	if (t == NULL \|\| U_FAILURE(ec)) {
	3784	errln((UnicodeString)"FAIL: createFromRules RemoveCurly " + u_errorName(ec));
	3785	goto FAIL;
	3786	}
	3787	expect(*t, UNICODE_STRING_SIMPLE("\\N{name}"), "name");
	3788	_TUFReg("Any-RemoveCurly", t, 1);
	3789
	3790	logln("Trying &hex");
	3791	t = Transliterator::createFromRules("hex2",
	3792	"(.) > &hex($1);",
	3793	UTRANS_FORWARD, pe, ec);
	3794	if (t == NULL \|\| U_FAILURE(ec)) {
	3795	errln("FAIL: createFromRules");
	3796	goto FAIL;
	3797	}
	3798	logln("Registering");
	3799	_TUFReg("Any-hex2", t, 2);
	3800	t = Transliterator::createInstance("Any-hex2", UTRANS_FORWARD, ec);
	3801	if (t == NULL \|\| U_FAILURE(ec)) {
	3802	errln((UnicodeString)"FAIL: createInstance Any-hex2 " + u_errorName(ec));
	3803	goto FAIL;
	3804	}
	3805	expect(*t, "abc", UNICODE_STRING_SIMPLE("\\u0061\\u0062\\u0063"));
	3806	delete t;
	3807
	3808	logln("Trying &gif");
	3809	t = Transliterator::createFromRules("gif2",
	3810	"(.) > &Gif(&Hex2($1));",
	3811	UTRANS_FORWARD, pe, ec);
	3812	if (t == NULL \|\| U_FAILURE(ec)) {
	3813	errln((UnicodeString)"FAIL: createFromRules gif2 " + u_errorName(ec));
	3814	goto FAIL;
	3815	}
	3816	logln("Registering");
	3817	_TUFReg("Any-gif2", t, 3);
	3818	t = Transliterator::createInstance("Any-gif2", UTRANS_FORWARD, ec);
	3819	if (t == NULL \|\| U_FAILURE(ec)) {
	3820	errln((UnicodeString)"FAIL: createInstance Any-gif2 " + u_errorName(ec));
	3821	goto FAIL;
	3822	}
	3823	expect(*t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">"
	3824	"<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">");
	3825	delete t;
	3826
	3827	// Test that filters are allowed after &
	3828	t = Transliterator::createFromRules("test",
	3829	"(.) > &Hex($1) ' ' &RemoveCurly(&Name($1)) ' ';",
	3830	UTRANS_FORWARD, pe, ec);
	3831	if (t == NULL \|\| U_FAILURE(ec)) {
	3832	errln((UnicodeString)"FAIL: createFromRules test " + u_errorName(ec));
	3833	goto FAIL;
	3834	}
	3835	expect(*t, "abc",
	3836	UNICODE_STRING_SIMPLE("\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C "));
	3837	delete t;
	3838
	3839	FAIL:
	3840	for (i=0; i<4; ++i) {
	3841	_TUFUnreg(i);
	3842	}
	3843	}
	3844
	3845	/**
	3846	* Test the Any-X transliterators.
	3847	*/
	3848	void TransliteratorTest::TestAnyX(void) {
	3849	UParseError parseError;
	3850	UErrorCode status = U_ZERO_ERROR;
	3851	Transliterator* anyLatin =
	3852	Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
	3853	if (anyLatin==0) {
	3854	dataerrln("FAIL: createInstance returned NULL - %s", u_errorName(status));
	3855	delete anyLatin;
	3856	return;
	3857	}
	3858
	3859	expect(*anyLatin,
	3860	CharsToUnicodeString("greek:\\u03B1\\u03B2\\u03BA\\u0391\\u0392\\u039A hiragana:\\u3042\\u3076\\u304F cyrillic:\\u0430\\u0431\\u0446"),
	3861	CharsToUnicodeString("greek:abkABK hiragana:abuku cyrillic:abc"));
	3862
	3863	delete anyLatin;
	3864	}
	3865
	3866	/**
	3867	* Test Any-X transliterators with sample letters from all scripts.
	3868	*/
	3869	void TransliteratorTest::TestAny(void) {
	3870	UErrorCode status = U_ZERO_ERROR;
	3871	// Note: there is a lot of implict construction of UnicodeStrings from (char *) in
	3872	// function call parameters going on in this test.
	3873	UnicodeSet alphabetic("[:alphabetic:]", status);
	3874	if (U_FAILURE(status)) {
	3875	dataerrln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
	3876	return;
	3877	}
	3878	alphabetic.freeze();
	3879
	3880	UnicodeString testString;
	3881	for (int32_t i = 0; i < USCRIPT_CODE_LIMIT; i++) {
	3882	const char *scriptName = uscript_getShortName((UScriptCode)i);
	3883	if (scriptName == NULL) {
	3884	errln("Failure: file %s, line %d: Script Code %d is invalid, ", __FILE__, __LINE__, i);
	3885	return;
	3886	}
	3887
	3888	UnicodeSet sample;
	3889	sample.applyPropertyAlias("script", scriptName, status);
	3890	if (U_FAILURE(status)) {
	3891	errln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
	3892	return;
	3893	}
	3894	sample.retainAll(alphabetic);
	3895	for (int32_t count=0; count<5; count++) {
	3896	UChar32 c = sample.charAt(count);
	3897	if (c == -1) {
	3898	break;
	3899	}
	3900	testString.append(c);
	3901	}
	3902	}
	3903
	3904	UParseError parseError;
	3905	Transliterator* anyLatin =
	3906	Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
	3907	if (U_FAILURE(status)) {
	3908	errln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
	3909	return;
	3910	}
	3911
	3912	logln(UnicodeString("Sample set for Any-Latin: ") + testString);
	3913	anyLatin->transliterate(testString);
	3914	logln(UnicodeString("Sample result for Any-Latin: ") + testString);
	3915	delete anyLatin;
	3916	}
	3917
	3918
	3919	/**
	3920	* Test the source and target set API. These are only implemented
	3921	* for RBT and CompoundTransliterator at this time.
	3922	*/
	3923	void TransliteratorTest::TestSourceTargetSet() {
	3924	UErrorCode ec = U_ZERO_ERROR;
	3925
	3926	// Rules
	3927	const char* r =
	3928	"a > b; "
	3929	"r [x{lu}] > q;";
	3930
	3931	// Expected source
	3932	UnicodeSet expSrc("[arx{lu}]", ec);
	3933
	3934	// Expected target
	3935	UnicodeSet expTrg("[bq]", ec);
	3936
	3937	UParseError pe;
	3938	Transliterator* t = Transliterator::createFromRules("test", r, UTRANS_FORWARD, pe, ec);
	3939
	3940	if (U_FAILURE(ec)) {
	3941	delete t;
	3942	errln("FAIL: Couldn't set up test");
	3943	return;
	3944	}
	3945
	3946	UnicodeSet src; t->getSourceSet(src);
	3947	UnicodeSet trg; t->getTargetSet(trg);
	3948
	3949	if (src == expSrc && trg == expTrg) {
	3950	UnicodeString a, b;
	3951	logln((UnicodeString)"Ok: " +
	3952	r + " => source = " + src.toPattern(a, TRUE) +
	3953	", target = " + trg.toPattern(b, TRUE));
	3954	} else {
	3955	UnicodeString a, b, c, d;
	3956	errln((UnicodeString)"FAIL: " +
	3957	r + " => source = " + src.toPattern(a, TRUE) +
	3958	", expected " + expSrc.toPattern(b, TRUE) +
	3959	"; target = " + trg.toPattern(c, TRUE) +
	3960	", expected " + expTrg.toPattern(d, TRUE));
	3961	}
	3962
	3963	delete t;
	3964	}
	3965
	3966	/**
	3967	* Test handling of rule whitespace, for both RBT and UnicodeSet.
	3968	*/
	3969	void TransliteratorTest::TestRuleWhitespace() {
	3970	// Rules
	3971	const char* r = "a > \\u200E b;";
	3972
	3973	UErrorCode ec = U_ZERO_ERROR;
	3974	UParseError pe;
	3975	Transliterator* t = Transliterator::createFromRules("test", CharsToUnicodeString(r), UTRANS_FORWARD, pe, ec);
	3976
	3977	if (U_FAILURE(ec)) {
	3978	errln("FAIL: Couldn't set up test");
	3979	} else {
	3980	expect(*t, "a", "b");
	3981	}
	3982	delete t;
	3983
	3984	// UnicodeSet
	3985	ec = U_ZERO_ERROR;
	3986	UnicodeSet set(CharsToUnicodeString("[a \\u200E]"), ec);
	3987
	3988	if (U_FAILURE(ec)) {
	3989	errln("FAIL: Couldn't set up test");
	3990	} else {
	3991	if (set.contains(0x200E)) {
	3992	errln("FAIL: U+200E not being ignored by UnicodeSet");
	3993	}
	3994	}
	3995	}
	3996	//======================================================================
	3997	// this method is in TestUScript.java
	3998	//======================================================================
	3999	void TransliteratorTest::TestAllCodepoints(){
	4000	UScriptCode code= USCRIPT_INVALID_CODE;
	4001	char id[256]={'\0'};
	4002	char abbr[256]={'\0'};
	4003	char newId[256]={'\0'};
	4004	char newAbbrId[256]={'\0'};
	4005	char oldId[256]={'\0'};
	4006	char oldAbbrId[256]={'\0'};
	4007
	4008	UErrorCode status =U_ZERO_ERROR;
	4009	UParseError pe;
	4010
	4011	for(uint32_t i = 0; i<=0x10ffff; i++){
	4012	code = uscript_getScript(i,&status);
	4013	if(code == USCRIPT_INVALID_CODE){
	4014	errln("uscript_getScript for codepoint \\U%08X failed.\n", i);
	4015	}
	4016	const char* myId = uscript_getName(code);
	4017	if(!myId) {
	4018	dataerrln("Valid script code returned NULL name. Check your data!");
	4019	return;
	4020	}
	4021	uprv_strcpy(id,myId);
	4022	uprv_strcpy(abbr,uscript_getShortName(code));
	4023
	4024	uprv_strcpy(newId,"[:");
	4025	uprv_strcat(newId,id);
	4026	uprv_strcat(newId,":];NFD");
	4027
	4028	uprv_strcpy(newAbbrId,"[:");
	4029	uprv_strcat(newAbbrId,abbr);
	4030	uprv_strcat(newAbbrId,":];NFD");
	4031
	4032	if(uprv_strcmp(newId,oldId)!=0){
	4033	Transliterator* t = Transliterator::createInstance(newId,UTRANS_FORWARD,pe,status);
	4034	if(t==NULL \|\| U_FAILURE(status)){
	4035	errln((UnicodeString)"FAIL: Could not create " + id);
	4036	}
	4037	delete t;
	4038	}
	4039	if(uprv_strcmp(newAbbrId,oldAbbrId)!=0){
	4040	Transliterator* t = Transliterator::createInstance(newAbbrId,UTRANS_FORWARD,pe,status);
	4041	if(t==NULL \|\| U_FAILURE(status)){
	4042	errln((UnicodeString)"FAIL: Could not create " + id);
	4043	}
	4044	delete t;
	4045	}
	4046	uprv_strcpy(oldId,newId);
	4047	uprv_strcpy(oldAbbrId, newAbbrId);
	4048
	4049	}
	4050
	4051	}
	4052
	4053	#define TEST_TRANSLIT_ID(id, cls) { \
	4054	UErrorCode ec = U_ZERO_ERROR; \
	4055	Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); \
	4056	if (U_FAILURE(ec)) { \
	4057	dataerrln("FAIL: Couldn't create %s - %s", id, u_errorName(ec)); \
	4058	} else { \
	4059	if (t->getDynamicClassID() != cls::getStaticClassID()) { \
	4060	errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
	4061	} \
	4062	/* t = t; / /can't do this: coverage test for assignment op*/ \
	4063	} \
	4064	delete t; \
	4065	}
	4066
	4067	#define TEST_TRANSLIT_RULE(rule, cls) { \
	4068	UErrorCode ec = U_ZERO_ERROR; \
	4069	UParseError pe; \
	4070	Transliterator* t = Transliterator::createFromRules("_", rule, UTRANS_FORWARD, pe, ec); \
	4071	if (U_FAILURE(ec)) { \
	4072	errln("FAIL: Couldn't create " rule); \
	4073	} else { \
	4074	if (t->getDynamicClassID() != cls ::getStaticClassID()) { \
	4075	errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
	4076	} \
	4077	/* t = t; / /can't do this: coverage test for assignment op*/ \
	4078	} \
	4079	delete t; \
	4080	}
	4081
	4082	void TransliteratorTest::TestBoilerplate() {
	4083	TEST_TRANSLIT_ID("Any-Latin", AnyTransliterator);
	4084	TEST_TRANSLIT_ID("Any-Hex", EscapeTransliterator);
	4085	TEST_TRANSLIT_ID("Hex-Any", UnescapeTransliterator);
	4086	TEST_TRANSLIT_ID("Lower", LowercaseTransliterator);
	4087	TEST_TRANSLIT_ID("Upper", UppercaseTransliterator);
	4088	TEST_TRANSLIT_ID("Title", TitlecaseTransliterator);
	4089	TEST_TRANSLIT_ID("Null", NullTransliterator);
	4090	TEST_TRANSLIT_ID("Remove", RemoveTransliterator);
	4091	TEST_TRANSLIT_ID("Any-Name", UnicodeNameTransliterator);
	4092	TEST_TRANSLIT_ID("Name-Any", NameUnicodeTransliterator);
	4093	TEST_TRANSLIT_ID("NFD", NormalizationTransliterator);
	4094	TEST_TRANSLIT_ID("Latin-Greek", CompoundTransliterator);
	4095	TEST_TRANSLIT_RULE("a>b;", RuleBasedTransliterator);
	4096	}
	4097
	4098	void TransliteratorTest::TestAlternateSyntax() {
	4099	// U+2206 == &
	4100	// U+2190 == <
	4101	// U+2192 == >
	4102	// U+2194 == <>
	4103	expect(CharsToUnicodeString("a \\u2192 x; b \\u2190 y; c \\u2194 z"),
	4104	"abc",
	4105	"xbz");
	4106	expect(CharsToUnicodeString("([:^ASCII:]) \\u2192 \\u2206Name($1);"),
	4107	CharsToUnicodeString("<=\\u2190; >=\\u2192; <>=\\u2194; &=\\u2206"),
	4108	UNICODE_STRING_SIMPLE("<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}"));
	4109	}
	4110
	4111	static const char* BEGIN_END_RULES[] = {
	4112	// [0]
	4113	"abc > xy;"
	4114	"aba > z;",
	4115
	4116	// [1]
	4117	/*
	4118	"::BEGIN;"
	4119	"abc > xy;"
	4120	"::END;"
	4121	"::BEGIN;"
	4122	"aba > z;"
	4123	"::END;",
	4124	*/
	4125	"", // test case commented out below, this is here to keep from messing up the indexes
	4126
	4127	// [2]
	4128	/*
	4129	"abc > xy;"
	4130	"::BEGIN;"
	4131	"aba > z;"
	4132	"::END;",
	4133	*/
	4134	"", // test case commented out below, this is here to keep from messing up the indexes
	4135
	4136	// [3]
	4137	/*
	4138	"::BEGIN;"
	4139	"abc > xy;"
	4140	"::END;"
	4141	"aba > z;",
	4142	*/
	4143	"", // test case commented out below, this is here to keep from messing up the indexes
	4144
	4145	// [4]
	4146	"abc > xy;"
	4147	"::Null;"
	4148	"aba > z;",
	4149
	4150	// [5]
	4151	"::Upper;"
	4152	"ABC > xy;"
	4153	"AB > x;"
	4154	"C > z;"
	4155	"::Upper;"
	4156	"XYZ > p;"
	4157	"XY > q;"
	4158	"Z > r;"
	4159	"::Upper;",
	4160
	4161	// [6]
	4162	"$ws = [[:Separator:][\\u0009-\\u000C]$];"
	4163	"$delim = [\\-$ws];"
	4164	"$ws $delim* > ' ';"
	4165	"'-' $delim* > '-';",
	4166
	4167	// [7]
	4168	"::Null;"
	4169	"$ws = [[:Separator:][\\u0009-\\u000C]$];"
	4170	"$delim = [\\-$ws];"
	4171	"$ws $delim* > ' ';"
	4172	"'-' $delim* > '-';",
	4173
	4174	// [8]
	4175	"$ws = [[:Separator:][\\u0009-\\u000C]$];"
	4176	"$delim = [\\-$ws];"
	4177	"$ws $delim* > ' ';"
	4178	"'-' $delim* > '-';"
	4179	"::Null;",
	4180
	4181	// [9]
	4182	"$ws = [[:Separator:][\\u0009-\\u000C]$];"
	4183	"$delim = [\\-$ws];"
	4184	"::Null;"
	4185	"$ws $delim* > ' ';"
	4186	"'-' $delim* > '-';",
	4187
	4188	// [10]
	4189	/*
	4190	"::BEGIN;"
	4191	"$ws = [[:Separator:][\\u0009-\\u000C]$];"
	4192	"$delim = [\\-$ws];"
	4193	"::END;"
	4194	"$ws $delim* > ' ';"
	4195	"'-' $delim* > '-';",
	4196	*/
	4197	"", // test case commented out below, this is here to keep from messing up the indexes
	4198
	4199	// [11]
	4200	/*
	4201	"$ws = [[:Separator:][\\u0009-\\u000C]$];"
	4202	"$delim = [\\-$ws];"
	4203	"::BEGIN;"
	4204	"$ws $delim* > ' ';"
	4205	"'-' $delim* > '-';"
	4206	"::END;",
	4207	*/
	4208	"", // test case commented out below, this is here to keep from messing up the indexes
	4209
	4210	// [12]
	4211	/*
	4212	"$ws = [[:Separator:][\\u0009-\\u000C]$];"
	4213	"$delim = [\\-$ws];"
	4214	"$ab = [ab];"
	4215	"::BEGIN;"
	4216	"$ws $delim* > ' ';"
	4217	"'-' $delim* > '-';"
	4218	"::END;"
	4219	"::BEGIN;"
	4220	"$ab { ' ' } $ab > '-';"
	4221	"c { ' ' > ;"
	4222	"::END;"
	4223	"::BEGIN;"
	4224	"'a-a' > a\\%\|a;"
	4225	"::END;",
	4226	*/
	4227	"", // test case commented out below, this is here to keep from messing up the indexes
	4228
	4229	// [13]
	4230	"$ws = [[:Separator:][\\u0009-\\u000C]$];"
	4231	"$delim = [\\-$ws];"
	4232	"$ab = [ab];"
	4233	"::Null;"
	4234	"$ws $delim* > ' ';"
	4235	"'-' $delim* > '-';"
	4236	"::Null;"
	4237	"$ab { ' ' } $ab > '-';"
	4238	"c { ' ' > ;"
	4239	"::Null;"
	4240	"'a-a' > a\\%\|a;",
	4241
	4242	// [14]
	4243	/*
	4244	"::[abc];"
	4245	"::BEGIN;"
	4246	"abc > xy;"
	4247	"::END;"
	4248	"::BEGIN;"
	4249	"aba > yz;"
	4250	"::END;"
	4251	"::Upper;",
	4252	*/
	4253	"", // test case commented out below, this is here to keep from messing up the indexes
	4254
	4255	// [15]
	4256	"::[abc];"
	4257	"abc > xy;"
	4258	"::Null;"
	4259	"aba > yz;"
	4260	"::Upper;",
	4261
	4262	// [16]
	4263	/*
	4264	"::[abc];"
	4265	"::BEGIN;"
	4266	"abc <> xy;"
	4267	"::END;"
	4268	"::BEGIN;"
	4269	"aba <> yz;"
	4270	"::END;"
	4271	"::Upper(Lower);"
	4272	"::([XYZ]);"
	4273	*/
	4274	"", // test case commented out below, this is here to keep from messing up the indexes
	4275
	4276	// [17]
	4277	"::[abc];"
	4278	"abc <> xy;"
	4279	"::Null;"
	4280	"aba <> yz;"
	4281	"::Upper(Lower);"
	4282	"::([XYZ]);"
	4283	};
	4284	static const int32_t BEGIN_END_RULES_length = (int32_t)(sizeof(BEGIN_END_RULES) / sizeof(BEGIN_END_RULES[0]));
	4285
	4286	/*
	4287	(This entire test is commented out below and will need some heavy revision when we re-add
	4288	the ::BEGIN/::END stuff)
	4289	static const char* BOGUS_BEGIN_END_RULES[] = {
	4290	// [7]
	4291	"::BEGIN;"
	4292	"abc > xy;"
	4293	"::BEGIN;"
	4294	"aba > z;"
	4295	"::END;"
	4296	"::END;",
	4297
	4298	// [8]
	4299	"abc > xy;"
	4300	" aba > z;"
	4301	"::END;",
	4302
	4303	// [9]
	4304	"::BEGIN;"
	4305	"::Upper;"
	4306	"::END;"
	4307	};
	4308	static const int32_t BOGUS_BEGIN_END_RULES_length = (int32_t)(sizeof(BOGUS_BEGIN_END_RULES) / sizeof(BOGUS_BEGIN_END_RULES[0]));
	4309	*/
	4310
	4311	static const char* BEGIN_END_TEST_CASES[] = {
	4312	// rules input expected output
	4313	BEGIN_END_RULES[0], "abc ababc aba", "xy zbc z",
	4314	// BEGIN_END_RULES[1], "abc ababc aba", "xy abxy z",
	4315	// BEGIN_END_RULES[2], "abc ababc aba", "xy abxy z",
	4316	// BEGIN_END_RULES[3], "abc ababc aba", "xy abxy z",
	4317	BEGIN_END_RULES[4], "abc ababc aba", "xy abxy z",
	4318	BEGIN_END_RULES[5], "abccabaacababcbc", "PXAARXQBR",
	4319
	4320	BEGIN_END_RULES[6], "e e - e---e- e", "e e e-e-e",
	4321	BEGIN_END_RULES[7], "e e - e---e- e", "e e e-e-e",
	4322	BEGIN_END_RULES[8], "e e - e---e- e", "e e e-e-e",
	4323	BEGIN_END_RULES[9], "e e - e---e- e", "e e e-e-e",
	4324	// BEGIN_END_RULES[10], "e e - e---e- e", "e e e-e-e",
	4325	// BEGIN_END_RULES[11], "e e - e---e- e", "e e e-e-e",
	4326	// BEGIN_END_RULES[12], "e e - e---e- e", "e e e-e-e",
	4327	// BEGIN_END_RULES[12], "a a a a", "a%a%a%a",
	4328	// BEGIN_END_RULES[12], "a a-b c b a", "a%a-b cb-a",
	4329	BEGIN_END_RULES[13], "e e - e---e- e", "e e e-e-e",
	4330	BEGIN_END_RULES[13], "a a a a", "a%a%a%a",
	4331	BEGIN_END_RULES[13], "a a-b c b a", "a%a-b cb-a",
	4332
	4333	// BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
	4334	BEGIN_END_RULES[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
	4335	// BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
	4336	BEGIN_END_RULES[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ"
	4337	};
	4338	static const int32_t BEGIN_END_TEST_CASES_length = (int32_t)(sizeof(BEGIN_END_TEST_CASES) / sizeof(BEGIN_END_TEST_CASES[0]));
	4339
	4340	void TransliteratorTest::TestBeginEnd() {
	4341	// run through the list of test cases above
	4342	int32_t i = 0;
	4343	for (i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
	4344	expect((UnicodeString)"Test case #" + (i / 3),
	4345	UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
	4346	UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
	4347	UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
	4348	}
	4349
	4350	// instantiate the one reversible rule set in the reverse direction and make sure it does the right thing
	4351	UParseError parseError;
	4352	UErrorCode status = U_ZERO_ERROR;
	4353	Transliterator* reversed = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
	4354	UTRANS_REVERSE, parseError, status);
	4355	if (reversed == 0 \|\| U_FAILURE(status)) {
	4356	reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
	4357	} else {
	4358	expect(*reversed, UnicodeString("xy XY XYZ yz YZ"), UnicodeString("xy abc xaba yz aba"));
	4359	}
	4360	delete reversed;
	4361
	4362	// finally, run through the list of syntactically-ill-formed rule sets above and make sure
	4363	// that all of them cause errors
	4364	/*
	4365	(commented out until we have the real ::BEGIN/::END stuff in place
	4366	for (i = 0; i < BOGUS_BEGIN_END_RULES_length; i++) {
	4367	UParseError parseError;
	4368	UErrorCode status = U_ZERO_ERROR;
	4369	Transliterator* t = Transliterator::createFromRules("foo", UnicodeString(BOGUS_BEGIN_END_RULES[i]),
	4370	UTRANS_FORWARD, parseError, status);
	4371	if (!U_FAILURE(status)) {
	4372	delete t;
	4373	errln((UnicodeString)"Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]);
	4374	}
	4375	}
	4376	*/
	4377	}
	4378
	4379	void TransliteratorTest::TestBeginEndToRules() {
	4380	// run through the same list of test cases we used above, but this time, instead of just
	4381	// instantiating a Transliterator from the rules and running the test against it, we instantiate
	4382	// a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from
	4383	// the resulting set of rules, and make sure that the generated rule set is semantically equivalent
	4384	// to (i.e., does the same thing as) the original rule set
	4385	for (int32_t i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
	4386	UParseError parseError;
	4387	UErrorCode status = U_ZERO_ERROR;
	4388	Transliterator* t = Transliterator::createFromRules("--", UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
	4389	UTRANS_FORWARD, parseError, status);
	4390	if (U_FAILURE(status)) {
	4391	reportParseError(UnicodeString("FAIL: Couldn't create transliterator"), parseError, status);
	4392	} else {
	4393	UnicodeString rules;
	4394	t->toRules(rules, TRUE);
	4395	Transliterator* t2 = Transliterator::createFromRules((UnicodeString)"Test case #" + (i / 3), rules,
	4396	UTRANS_FORWARD, parseError, status);
	4397	if (U_FAILURE(status)) {
	4398	reportParseError(UnicodeString("FAIL: Couldn't create transliterator from generated rules"),
	4399	parseError, status);
	4400	delete t;
	4401	} else {
	4402	expect(*t2,
	4403	UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
	4404	UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
	4405	delete t;
	4406	delete t2;
	4407	}
	4408	}
	4409	}
	4410
	4411	// do the same thing for the reversible test case
	4412	UParseError parseError;
	4413	UErrorCode status = U_ZERO_ERROR;
	4414	Transliterator* reversed = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
	4415	UTRANS_REVERSE, parseError, status);
	4416	if (U_FAILURE(status)) {
	4417	reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
	4418	} else {
	4419	UnicodeString rules;
	4420	reversed->toRules(rules, FALSE);
	4421	Transliterator* reversed2 = Transliterator::createFromRules("Reversed", rules, UTRANS_FORWARD,
	4422	parseError, status);
	4423	if (U_FAILURE(status)) {
	4424	reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator from generated rules"),
	4425	parseError, status);
	4426	delete reversed;
	4427	} else {
	4428	expect(*reversed2,
	4429	UnicodeString("xy XY XYZ yz YZ"),
	4430	UnicodeString("xy abc xaba yz aba"));
	4431	delete reversed;
	4432	delete reversed2;
	4433	}
	4434	}
	4435	}
	4436
	4437	void TransliteratorTest::TestRegisterAlias() {
	4438	UnicodeString longID("Lower;[aeiou]Upper");
	4439	UnicodeString shortID("Any-CapVowels");
	4440	UnicodeString reallyShortID("CapVowels");
	4441
	4442	Transliterator::registerAlias(shortID, longID);
	4443
	4444	UErrorCode err = U_ZERO_ERROR;
	4445	Transliterator* t1 = Transliterator::createInstance(longID, UTRANS_FORWARD, err);
	4446	if (U_FAILURE(err)) {
	4447	errln("Failed to instantiate transliterator with long ID");
	4448	Transliterator::unregister(shortID);
	4449	return;
	4450	}
	4451	Transliterator* t2 = Transliterator::createInstance(reallyShortID, UTRANS_FORWARD, err);
	4452	if (U_FAILURE(err)) {
	4453	errln("Failed to instantiate transliterator with short ID");
	4454	delete t1;
	4455	Transliterator::unregister(shortID);
	4456	return;
	4457	}
	4458
	4459	if (t1->getID() != longID)
	4460	errln("Transliterator instantiated with long ID doesn't have long ID");
	4461	if (t2->getID() != reallyShortID)
	4462	errln("Transliterator instantiated with short ID doesn't have short ID");
	4463
	4464	UnicodeString rules1;
	4465	UnicodeString rules2;
	4466
	4467	t1->toRules(rules1, TRUE);
	4468	t2->toRules(rules2, TRUE);
	4469	if (rules1 != rules2)
	4470	errln("Alias transliterators aren't the same");
	4471
	4472	delete t1;
	4473	delete t2;
	4474	Transliterator::unregister(shortID);
	4475
	4476	t1 = Transliterator::createInstance(shortID, UTRANS_FORWARD, err);
	4477	if (U_SUCCESS(err)) {
	4478	errln("Instantiation with short ID succeeded after short ID was unregistered");
	4479	delete t1;
	4480	}
	4481
	4482	// try the same thing again, but this time with something other than
	4483	// an instance of CompoundTransliterator
	4484	UnicodeString realID("Latin-Greek");
	4485	UnicodeString fakeID("Latin-dlgkjdflkjdl");
	4486	Transliterator::registerAlias(fakeID, realID);
	4487
	4488	err = U_ZERO_ERROR;
	4489	t1 = Transliterator::createInstance(realID, UTRANS_FORWARD, err);
	4490	if (U_FAILURE(err)) {
	4491	dataerrln("Failed to instantiate transliterator with real ID - %s", u_errorName(err));
	4492	Transliterator::unregister(realID);
	4493	return;
	4494	}
	4495	t2 = Transliterator::createInstance(fakeID, UTRANS_FORWARD, err);
	4496	if (U_FAILURE(err)) {
	4497	errln("Failed to instantiate transliterator with fake ID");
	4498	delete t1;
	4499	Transliterator::unregister(realID);
	4500	return;
	4501	}
	4502
	4503	t1->toRules(rules1, TRUE);
	4504	t2->toRules(rules2, TRUE);
	4505	if (rules1 != rules2)
	4506	errln("Alias transliterators aren't the same");
	4507
	4508	delete t1;
	4509	delete t2;
	4510	Transliterator::unregister(fakeID);
	4511	}
	4512
	4513	void TransliteratorTest::TestRuleStripping() {
	4514	/*
	4515	#
	4516	\uE001>\u0C01; # SIGN
	4517	*/
	4518	static const UChar rule[] = {
	4519	0x0023,0x0020,0x000D,0x000A,
	4520	0xE001,0x003E,0x0C01,0x003B,0x0020,0x0023,0x0020,0x0053,0x0049,0x0047,0x004E,0
	4521	};
	4522	static const UChar expectedRule[] = {
	4523	0xE001,0x003E,0x0C01,0x003B,0
	4524	};
	4525	UChar result[sizeof(rule)/sizeof(rule[0])];
	4526	UErrorCode status = U_ZERO_ERROR;
	4527	int32_t len = utrans_stripRules(rule, (int32_t)(sizeof(rule)/sizeof(rule[0])), result, &status);
	4528	if (len != u_strlen(expectedRule)) {
	4529	errln("utrans_stripRules return len = %d", len);
	4530	}
	4531	if (u_strncmp(expectedRule, result, len) != 0) {
	4532	errln("utrans_stripRules did not return expected string");
	4533	}
	4534	}
	4535
	4536	/**
	4537	* Test the Halfwidth-Fullwidth transliterator (ticket 6281).
	4538	*/
	4539	void TransliteratorTest::TestHalfwidthFullwidth(void) {
	4540	UParseError parseError;
	4541	UErrorCode status = U_ZERO_ERROR;
	4542	Transliterator* hf = Transliterator::createInstance("Halfwidth-Fullwidth", UTRANS_FORWARD, parseError, status);
	4543	Transliterator* fh = Transliterator::createInstance("Fullwidth-Halfwidth", UTRANS_FORWARD, parseError, status);
	4544	if (hf == 0 \|\| fh == 0) {
	4545	dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
	4546	delete hf;
	4547	delete fh;
	4548	return;
	4549	}
	4550
	4551	// Array of 2n items
	4552	// Each item is
	4553	// "hf"\|"fh"\|"both",
	4554	// <Halfwidth>,
	4555	// <Fullwidth>
	4556	const char* DATA[] = {
	4557	"both",
	4558	"\\uFFE9\\uFFEA\\uFFEB\\uFFEC\\u0061\\uFF71\\u00AF\\u0020",
	4559	"\\u2190\\u2191\\u2192\\u2193\\uFF41\\u30A2\\uFFE3\\u3000",
	4560	};
	4561	int32_t DATA_length = (int32_t)(sizeof(DATA) / sizeof(DATA[0]));
	4562
	4563	for (int32_t i=0; i<DATA_length; i+=3) {
	4564	UnicodeString h = CharsToUnicodeString(DATA[i+1]);
	4565	UnicodeString f = CharsToUnicodeString(DATA[i+2]);
	4566	switch (*DATA[i]) {
	4567	case 0x68: //'h': // Halfwidth-Fullwidth only
	4568	expect(*hf, h, f);
	4569	break;
	4570	case 0x66: //'f': // Fullwidth-Halfwidth only
	4571	expect(*fh, f, h);
	4572	break;
	4573	case 0x62: //'b': // both directions
	4574	expect(*hf, h, f);
	4575	expect(*fh, f, h);
	4576	break;
	4577	}
	4578	}
	4579	delete hf;
	4580	delete fh;
	4581	}
	4582
	4583
	4584	/**
	4585	* Test Thai. The text is the first paragraph of "What is Unicode" from the Unicode.org web site.
	4586	* TODO: confirm that the expected results are correct.
	4587	* For now, test just confirms that C++ and Java give identical results.
	4588	*/
	4589	void TransliteratorTest::TestThai(void) {
	4590	#if !UCONFIG_NO_BREAK_ITERATION
	4591	UParseError parseError;
	4592	UErrorCode status = U_ZERO_ERROR;
	4593	Transliterator* tr = Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
	4594	if (tr == 0) {
	4595	dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
	4596	return;
	4597	}
	4598	if (U_FAILURE(status)) {
	4599	errln("FAIL: createInstance failed with %s", u_errorName(status));
	4600	return;
	4601	}
	4602	const char *thaiText =
	4603	"\\u0e42\\u0e14\\u0e22\\u0e1e\\u0e37\\u0e49\\u0e19\\u0e10\\u0e32\\u0e19\\u0e41\\u0e25\\u0e49\\u0e27, \\u0e04\\u0e2d"
	4604	"\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d\\u0e23\\u0e4c\\u0e08\\u0e30\\u0e40\\u0e01\\u0e35\\u0e48\\u0e22"
	4605	"\\u0e27\\u0e02\\u0e49\\u0e2d\\u0e07\\u0e01\\u0e31\\u0e1a\\u0e40\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e02\\u0e2d"
	4606	"\\u0e07\\u0e15\\u0e31\\u0e27\\u0e40\\u0e25\\u0e02. \\u0e04\\u0e2d\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d"
	4607	"\\u0e23\\u0e4c\\u0e08\\u0e31\\u0e14\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29"
	4608	"\\u0e23\\u0e41\\u0e25\\u0e30\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30\\u0e2d\\u0e37\\u0e48\\u0e19\\u0e46 \\u0e42"
	4609	"\\u0e14\\u0e22\\u0e01\\u0e32\\u0e23\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25"
	4610	"\\u0e02\\u0e43\\u0e2b\\u0e49\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e41\\u0e15\\u0e48\\u0e25\\u0e30\\u0e15"
	4611	"\\u0e31\\u0e27. \\u0e01\\u0e48\\u0e2d\\u0e19\\u0e2b\\u0e19\\u0e49\\u0e32\\u0e17\\u0e35\\u0e48\\u0e4a Unicode \\u0e08"
	4612	"\\u0e30\\u0e16\\u0e39\\u0e01\\u0e2a\\u0e23\\u0e49\\u0e32\\u0e07\\u0e02\\u0e36\\u0e49\\u0e19, \\u0e44\\u0e14\\u0e49"
	4613	"\\u0e21\\u0e35\\u0e23\\u0e30\\u0e1a\\u0e1a encoding \\u0e2d\\u0e22\\u0e39\\u0e48\\u0e2b\\u0e25\\u0e32\\u0e22\\u0e23"
	4614	"\\u0e49\\u0e2d\\u0e22\\u0e23\\u0e30\\u0e1a\\u0e1a\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e01\\u0e32\\u0e23"
	4615	"\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25\\u0e02\\u0e40\\u0e2b\\u0e25\\u0e48"
	4616	"\\u0e32\\u0e19\\u0e35\\u0e49. \\u0e44\\u0e21\\u0e48\\u0e21\\u0e35 encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48"
	4617	"\\u0e21\\u0e35\\u0e08\\u0e33\\u0e19\\u0e27\\u0e19\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30"
	4618	"\\u0e21\\u0e32\\u0e01\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d: \\u0e22\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d"
	4619	"\\u0e22\\u0e48\\u0e32\\u0e07\\u0e40\\u0e0a\\u0e48\\u0e19, \\u0e40\\u0e09\\u0e1e\\u0e32\\u0e30\\u0e43\\u0e19\\u0e01"
	4620	"\\u0e25\\u0e38\\u0e48\\u0e21\\u0e2a\\u0e2b\\u0e20\\u0e32\\u0e1e\\u0e22\\u0e38\\u0e42\\u0e23\\u0e1b\\u0e40\\u0e1e"
	4621	"\\u0e35\\u0e22\\u0e07\\u0e41\\u0e2b\\u0e48\\u0e07\\u0e40\\u0e14\\u0e35\\u0e22\\u0e27 \\u0e01\\u0e47\\u0e15\\u0e49"
	4622	"\\u0e2d\\u0e07\\u0e01\\u0e32\\u0e23\\u0e2b\\u0e25\\u0e32\\u0e22 encoding \\u0e43\\u0e19\\u0e01\\u0e32\\u0e23\\u0e04"
	4623	"\\u0e23\\u0e2d\\u0e1a\\u0e04\\u0e25\\u0e38\\u0e21\\u0e17\\u0e38\\u0e01\\u0e20\\u0e32\\u0e29\\u0e32\\u0e43\\u0e19"
	4624	"\\u0e01\\u0e25\\u0e38\\u0e48\\u0e21. \\u0e2b\\u0e23\\u0e37\\u0e2d\\u0e41\\u0e21\\u0e49\\u0e41\\u0e15\\u0e48\\u0e43"
	4625	"\\u0e19\\u0e20\\u0e32\\u0e29\\u0e32\\u0e40\\u0e14\\u0e35\\u0e48\\u0e22\\u0e27 \\u0e40\\u0e0a\\u0e48\\u0e19 \\u0e20"
	4626	"\\u0e32\\u0e29\\u0e32\\u0e2d\\u0e31\\u0e07\\u0e01\\u0e24\\u0e29 \\u0e01\\u0e47\\u0e44\\u0e21\\u0e48\\u0e21\\u0e35"
	4627	" encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d\\u0e2a\\u0e33\\u0e2b"
	4628	"\\u0e23\\u0e31\\u0e1a\\u0e17\\u0e38\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29\\u0e23, \\u0e40\\u0e04"
	4629	"\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e27\\u0e23\\u0e23\\u0e04\\u0e15\\u0e2d\\u0e19"
	4630	" \\u0e41\\u0e25\\u0e30\\u0e2a\\u0e31\\u0e0d\\u0e25\\u0e31\\u0e01\\u0e29\\u0e13\\u0e4c\\u0e17\\u0e32\\u0e07\\u0e40"
	4631	"\\u0e17\\u0e04\\u0e19\\u0e34\\u0e04\\u0e17\\u0e35\\u0e48\\u0e43\\u0e0a\\u0e49\\u0e01\\u0e31\\u0e19\\u0e2d\\u0e22"
	4632	"\\u0e39\\u0e48\\u0e17\\u0e31\\u0e48\\u0e27\\u0e44\\u0e1b.";
	4633
	4634	const char *latinText =
	4635	"doy ph\\u1ee5\\u0304\\u0302n \\u1e6d\\u0304h\\u0101n l\\u00e6\\u0302w, khxmphiwtexr\\u0312 ca ke\\u012b\\u0300"
	4636	"ywk\\u0304\\u0125xng k\\u1ea1b re\\u1ee5\\u0304\\u0300xng k\\u0304hxng t\\u1ea1wlek\\u0304h. khxmphiwtexr"
	4637	"\\u0312 c\\u1ea1d k\\u0115b t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r l\\u00e6a x\\u1ea1kk\\u0304h ra x\\u1ee5\\u0304"
	4638	"\\u0300n\\u00ab doy k\\u0101r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304\\u0131\\u0302 s\\u0304"
	4639	"\\u1ea3h\\u0304r\\u1ea1b t\\u00e6\\u0300la t\\u1ea1w. k\\u0300xn h\\u0304n\\u0302\\u0101 th\\u012b\\u0300\\u0301"
	4640	" Unicode ca t\\u0304h\\u016bk s\\u0304r\\u0302\\u0101ng k\\u0304h\\u1ee5\\u0302n, d\\u1ecb\\u0302 m\\u012b "
	4641	"rabb encoding xy\\u016b\\u0300 h\\u0304l\\u0101y r\\u0302xy rabb s\\u0304\\u1ea3h\\u0304r\\u1ea1b k\\u0101"
	4642	"r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304el\\u0300\\u0101 n\\u012b\\u0302. m\\u1ecb\\u0300m"
	4643	"\\u012b encoding d\\u0131 th\\u012b\\u0300 m\\u012b c\\u1ea3nwn t\\u1ea1w x\\u1ea1kk\\u0304hra m\\u0101k p"
	4644	"he\\u012byng phx: yk t\\u1ea1wx\\u1ef3\\u0101ng ch\\u00e8n, c\\u0304heph\\u0101a n\\u0131 kl\\u00f9m s\\u0304"
	4645	"h\\u0304p\\u0323h\\u0101ph yurop phe\\u012byng h\\u0304\\u00e6\\u0300ng de\\u012byw k\\u0306 t\\u0302xngk\\u0101"
	4646	"r h\\u0304l\\u0101y encoding n\\u0131 k\\u0101r khrxbkhlum thuk p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 n\\u0131"
	4647	" kl\\u00f9m. h\\u0304r\\u1ee5\\u0304x m\\u00e6\\u0302t\\u00e6\\u0300 n\\u0131 p\\u0323h\\u0101s\\u0304\\u02b9"
	4648	"\\u0101 de\\u012b\\u0300yw ch\\u00e8n p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 x\\u1ea1ngkvs\\u0304\\u02b9 k\\u0306"
	4649	" m\\u1ecb\\u0300m\\u012b encoding d\\u0131 th\\u012b\\u0300 phe\\u012byng phx s\\u0304\\u1ea3h\\u0304r\\u1ea1"
	4650	"b thuk t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r, kher\\u1ee5\\u0304\\u0300xngh\\u0304m\\u0101y wrrkh txn l\\u00e6"
	4651	"a s\\u0304\\u1ea1\\u1ef5l\\u1ea1ks\\u0304\\u02b9\\u1e47\\u0312 th\\u0101ng thekhnikh th\\u012b\\u0300 ch\\u0131"
	4652	"\\u0302 k\\u1ea1n xy\\u016b\\u0300 th\\u1ea1\\u0300wp\\u1ecb.";
	4653
	4654
	4655	UnicodeString xlitText(thaiText);
	4656	xlitText = xlitText.unescape();
	4657	tr->transliterate(xlitText);
	4658
	4659	UnicodeString expectedText(latinText);
	4660	expectedText = expectedText.unescape();
	4661	expect(*tr, xlitText, expectedText);
	4662
	4663	delete tr;
	4664	#endif
	4665	}
	4666
	4667
	4668	//======================================================================
	4669	// Support methods
	4670	//======================================================================
	4671	void TransliteratorTest::expectT(const UnicodeString& id,
	4672	const UnicodeString& source,
	4673	const UnicodeString& expectedResult) {
	4674	UErrorCode ec = U_ZERO_ERROR;
	4675	UParseError pe;
	4676	Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
	4677	if (U_FAILURE(ec)) {
	4678	errln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(ec));
	4679	delete t;
	4680	return;
	4681	}
	4682	expect(*t, source, expectedResult);
	4683	delete t;
	4684	}
	4685
	4686	void TransliteratorTest::reportParseError(const UnicodeString& message,
	4687	const UParseError& parseError,
	4688	const UErrorCode& status) {
	4689	dataerrln(message +
	4690	/", parse error " + parseError.code +/
	4691	", line " + parseError.line +
	4692	", offset " + parseError.offset +
	4693	", pre-context " + prettify(parseError.preContext, TRUE) +
	4694	", post-context " + prettify(parseError.postContext,TRUE) +
	4695	", Error: " + u_errorName(status));
	4696	}
	4697
	4698	void TransliteratorTest::expect(const UnicodeString& rules,
	4699	const UnicodeString& source,
	4700	const UnicodeString& expectedResult,
	4701	UTransPosition *pos) {
	4702	expect("<ID>", rules, source, expectedResult, pos);
	4703	}
	4704
	4705	void TransliteratorTest::expect(const UnicodeString& id,
	4706	const UnicodeString& rules,
	4707	const UnicodeString& source,
	4708	const UnicodeString& expectedResult,
	4709	UTransPosition *pos) {
	4710	UErrorCode status = U_ZERO_ERROR;
	4711	UParseError parseError;
	4712	Transliterator* t = Transliterator::createFromRules(id, rules, UTRANS_FORWARD, parseError, status);
	4713	if (U_FAILURE(status)) {
	4714	reportParseError(UnicodeString("Couldn't create transliterator from ") + rules, parseError, status);
	4715	} else {
	4716	expect(*t, source, expectedResult, pos);
	4717	}
	4718	delete t;
	4719	}
	4720
	4721	void TransliteratorTest::expect(const Transliterator& t,
	4722	const UnicodeString& source,
	4723	const UnicodeString& expectedResult,
	4724	const Transliterator& reverseTransliterator) {
	4725	expect(t, source, expectedResult);
	4726	expect(reverseTransliterator, expectedResult, source);
	4727	}
	4728
	4729	void TransliteratorTest::expect(const Transliterator& t,
	4730	const UnicodeString& source,
	4731	const UnicodeString& expectedResult,
	4732	UTransPosition *pos) {
	4733	if (pos == 0) {
	4734	UnicodeString result(source);
	4735	t.transliterate(result);
	4736	expectAux(t.getID() + ":String", source, result, expectedResult);
	4737	}
	4738	UTransPosition index={0, 0, 0, 0};
	4739	if (pos != 0) {
	4740	index = *pos;
	4741	}
	4742
	4743	UnicodeString rsource(source);
	4744	if (pos == 0) {
	4745	t.transliterate(rsource);
	4746	} else {
	4747	// Do it all at once -- below we do it incrementally
	4748	t.finishTransliteration(rsource, *pos);
	4749	}
	4750	expectAux(t.getID() + ":Replaceable", source, rsource, expectedResult);
	4751
	4752	// Test keyboard (incremental) transliteration -- this result
	4753	// must be the same after we finalize (see below).
	4754	UnicodeString log;
	4755	rsource.remove();
	4756	if (pos != 0) {
	4757	rsource = source;
	4758	formatInput(log, rsource, index);
	4759	log.append(" -> ");
	4760	UErrorCode status = U_ZERO_ERROR;
	4761	t.transliterate(rsource, index, status);
	4762	formatInput(log, rsource, index);
	4763	} else {
	4764	for (int32_t i=0; i<source.length(); ++i) {
	4765	if (i != 0) {
	4766	log.append(" + ");
	4767	}
	4768	log.append(source.charAt(i)).append(" -> ");
	4769	UErrorCode status = U_ZERO_ERROR;
	4770	t.transliterate(rsource, index, source.charAt(i), status);
	4771	formatInput(log, rsource, index);
	4772	}
	4773	}
	4774
	4775	// As a final step in keyboard transliteration, we must call
	4776	// transliterate to finish off any pending partial matches that
	4777	// were waiting for more input.
	4778	t.finishTransliteration(rsource, index);
	4779	log.append(" => ").append(rsource);
	4780
	4781	expectAux(t.getID() + ":Keyboard", log,
	4782	rsource == expectedResult,
	4783	expectedResult);
	4784	}
	4785
	4786
	4787	/**
	4788	* @param appendTo result is appended to this param.
	4789	* @param input the string being transliterated
	4790	* @param pos the index struct
	4791	*/
	4792	UnicodeString& TransliteratorTest::formatInput(UnicodeString &appendTo,
	4793	const UnicodeString& input,
	4794	const UTransPosition& pos) {
	4795	// Output a string of the form aaa{bbb\|ccc\|ddd}eee, where
	4796	// the {} indicate the context start and limit, and the \|\|
	4797	// indicate the start and limit.
	4798	if (0 <= pos.contextStart &&
	4799	pos.contextStart <= pos.start &&
	4800	pos.start <= pos.limit &&
	4801	pos.limit <= pos.contextLimit &&
	4802	pos.contextLimit <= input.length()) {
	4803
	4804	UnicodeString a, b, c, d, e;
	4805	input.extractBetween(0, pos.contextStart, a);
	4806	input.extractBetween(pos.contextStart, pos.start, b);
	4807	input.extractBetween(pos.start, pos.limit, c);
	4808	input.extractBetween(pos.limit, pos.contextLimit, d);
	4809	input.extractBetween(pos.contextLimit, input.length(), e);
	4810	appendTo.append(a).append((UChar)123/{/).append(b).
	4811	append((UChar)PIPE).append(c).append((UChar)PIPE).append(d).
	4812	append((UChar)125/}/).append(e);
	4813	} else {
	4814	appendTo.append((UnicodeString)"INVALID UTransPosition {cs=" +
	4815	pos.contextStart + ", s=" + pos.start + ", l=" +
	4816	pos.limit + ", cl=" + pos.contextLimit + "} on " +
	4817	input);
	4818	}
	4819	return appendTo;
	4820	}
	4821
	4822	void TransliteratorTest::expectAux(const UnicodeString& tag,
	4823	const UnicodeString& source,
	4824	const UnicodeString& result,
	4825	const UnicodeString& expectedResult) {
	4826	expectAux(tag, source + " -> " + result,
	4827	result == expectedResult,
	4828	expectedResult);
	4829	}
	4830
	4831	void TransliteratorTest::expectAux(const UnicodeString& tag,
	4832	const UnicodeString& summary, UBool pass,
	4833	const UnicodeString& expectedResult) {
	4834	if (pass) {
	4835	logln(UnicodeString("(")+tag+") " + prettify(summary));
	4836	} else {
	4837	dataerrln(UnicodeString("FAIL: (")+tag+") "
	4838	+ prettify(summary)
	4839	+ ", expected " + prettify(expectedResult));
	4840	}
	4841	}
	4842
	4843	#endif /* #if !UCONFIG_NO_TRANSLITERATION */