git.saurik.com Git - apple/icu.git/blame_incremental - icuSources/test/intltest/transtst.cpp

0 / 4896 ( 0%)

Commit	Line	Data
	1	// © 2016 and later: Unicode, Inc. and others.
	2	// License & terms of use: http://www.unicode.org/copyright.html
	3	/*
	4	**********************************************************************
	5	* Copyright (C) 1999-2016, International Business Machines
	6	* Corporation and others. All Rights Reserved.
	7	**********************************************************************
	8	* Date Name Description
	9	* 11/10/99 aliu Creation.
	10	**********************************************************************
	11	*/
	12
	13	#include "unicode/utypes.h"
	14
	15	#if !UCONFIG_NO_TRANSLITERATION
	16
	17	#include "transtst.h"
	18	#include "unicode/locid.h"
	19	#include "unicode/dtfmtsym.h"
	20	#include "unicode/normlzr.h"
	21	#include "unicode/translit.h"
	22	#include "unicode/uchar.h"
	23	#include "unicode/unifilt.h"
	24	#include "unicode/uniset.h"
	25	#include "unicode/ustring.h"
	26	#include "unicode/usetiter.h"
	27	#include "unicode/uscript.h"
	28	#include "unicode/utf16.h"
	29	#include "cpdtrans.h"
	30	#include "nultrans.h"
	31	#include "rbt.h"
	32	#include "rbt_pars.h"
	33	#include "anytrans.h"
	34	#include "esctrn.h"
	35	#include "name2uni.h"
	36	#include "nortrans.h"
	37	#include "remtrans.h"
	38	#include "titletrn.h"
	39	#include "tolowtrn.h"
	40	#include "toupptrn.h"
	41	#include "unesctrn.h"
	42	#include "uni2name.h"
	43	#include "cstring.h"
	44	#include "cmemory.h"
	45	#include <stdio.h>
	46
	47	/***********************************************************************
	48
	49	HOW TO USE THIS TEST FILE
	50	-or-
	51	How I developed on two platforms
	52	without losing (too much of) my mind
	53
	54
	55	1. Add new tests by copying/pasting/changing existing tests. On Java,
	56	any public void method named Test...() taking no parameters becomes
	57	a test. On C++, you need to modify the header and add a line to
	58	the runIndexedTest() dispatch method.
	59
	60	2. Make liberal use of the expect() method; it is your friend.
	61
	62	3. The tests in this file exactly match those in a sister file on the
	63	other side. The two files are:
	64
	65	icu4j: src/com/ibm/test/translit/TransliteratorTest.java
	66	icu4c: source/test/intltest/transtst.cpp
	67
	68	==> THIS IS THE IMPORTANT PART <==
	69
	70	When you add a test in this file, add it in TransliteratorTest.java
	71	too. Give it the same name and put it in the same relative place.
	72	This makes maintenance a lot simpler for any poor soul who ends up
	73	trying to synchronize the tests between icu4j and icu4c.
	74
	75	4. If you MUST enter a test that is NOT paralleled in the sister file,
	76	then add it in the special non-mirrored section. These are
	77	labeled
	78
	79	"icu4j ONLY"
	80
	81	or
	82
	83	"icu4c ONLY"
	84
	85	Make sure you document the reason the test is here and not there.
	86
	87
	88	Thank you.
	89	The Management
	90	***********************************************************************/
	91
	92	// Define character constants thusly to be EBCDIC-friendly
	93	enum {
	94	LEFT_BRACE=((UChar)0x007B), /{/
	95	PIPE =((UChar)0x007C), /\|/
	96	ZERO =((UChar)0x0030), /0/
	97	UPPER_A =((UChar)0x0041) /A/
	98	};
	99
	100	TransliteratorTest::TransliteratorTest()
	101	: DESERET_DEE((UChar32)0x10414),
	102	DESERET_dee((UChar32)0x1043C)
	103	{
	104	}
	105
	106	TransliteratorTest::~TransliteratorTest() {}
	107
	108	void
	109	TransliteratorTest::runIndexedTest(int32_t index, UBool exec,
	110	const char* &name, char* /par/) {
	111	switch (index) {
	112	TESTCASE(0,TestInstantiation);
	113	TESTCASE(1,TestSimpleRules);
	114	TESTCASE(2,TestRuleBasedInverse);
	115	TESTCASE(3,TestKeyboard);
	116	TESTCASE(4,TestKeyboard2);
	117	TESTCASE(5,TestKeyboard3);
	118	TESTCASE(6,TestArabic);
	119	TESTCASE(7,TestCompoundKana);
	120	TESTCASE(8,TestCompoundHex);
	121	TESTCASE(9,TestFiltering);
	122	TESTCASE(10,TestInlineSet);
	123	TESTCASE(11,TestPatternQuoting);
	124	TESTCASE(12,TestJ277);
	125	TESTCASE(13,TestJ243);
	126	TESTCASE(14,TestJ329);
	127	TESTCASE(15,TestSegments);
	128	TESTCASE(16,TestCursorOffset);
	129	TESTCASE(17,TestArbitraryVariableValues);
	130	TESTCASE(18,TestPositionHandling);
	131	TESTCASE(19,TestHiraganaKatakana);
	132	TESTCASE(20,TestCopyJ476);
	133	TESTCASE(21,TestAnchors);
	134	TESTCASE(22,TestInterIndic);
	135	TESTCASE(23,TestFilterIDs);
	136	TESTCASE(24,TestCaseMap);
	137	TESTCASE(25,TestNameMap);
	138	TESTCASE(26,TestLiberalizedID);
	139	TESTCASE(27,TestCreateInstance);
	140	TESTCASE(28,TestNormalizationTransliterator);
	141	TESTCASE(29,TestCompoundRBT);
	142	TESTCASE(30,TestCompoundFilter);
	143	TESTCASE(31,TestRemove);
	144	TESTCASE(32,TestToRules);
	145	TESTCASE(33,TestContext);
	146	TESTCASE(34,TestSupplemental);
	147	TESTCASE(35,TestQuantifier);
	148	TESTCASE(36,TestSTV);
	149	TESTCASE(37,TestCompoundInverse);
	150	TESTCASE(38,TestNFDChainRBT);
	151	TESTCASE(39,TestNullInverse);
	152	TESTCASE(40,TestAliasInverseID);
	153	TESTCASE(41,TestCompoundInverseID);
	154	TESTCASE(42,TestUndefinedVariable);
	155	TESTCASE(43,TestEmptyContext);
	156	TESTCASE(44,TestCompoundFilterID);
	157	TESTCASE(45,TestPropertySet);
	158	TESTCASE(46,TestNewEngine);
	159	TESTCASE(47,TestQuantifiedSegment);
	160	TESTCASE(48,TestDevanagariLatinRT);
	161	TESTCASE(49,TestTeluguLatinRT);
	162	TESTCASE(50,TestCompoundLatinRT);
	163	TESTCASE(51,TestSanskritLatinRT);
	164	TESTCASE(52,TestLocaleInstantiation);
	165	TESTCASE(53,TestTitleAccents);
	166	TESTCASE(54,TestLocaleResource);
	167	TESTCASE(55,TestParseError);
	168	TESTCASE(56,TestOutputSet);
	169	TESTCASE(57,TestVariableRange);
	170	TESTCASE(58,TestInvalidPostContext);
	171	TESTCASE(59,TestIDForms);
	172	TESTCASE(60,TestToRulesMark);
	173	TESTCASE(61,TestEscape);
	174	TESTCASE(62,TestAnchorMasking);
	175	TESTCASE(63,TestDisplayName);
	176	TESTCASE(64,TestSpecialCases);
	177	#if !UCONFIG_NO_FILE_IO
	178	TESTCASE(65,TestIncrementalProgress);
	179	#endif
	180	TESTCASE(66,TestSurrogateCasing);
	181	TESTCASE(67,TestFunction);
	182	TESTCASE(68,TestInvalidBackRef);
	183	TESTCASE(69,TestMulticharStringSet);
	184	TESTCASE(70,TestUserFunction);
	185	TESTCASE(71,TestAnyX);
	186	TESTCASE(72,TestSourceTargetSet);
	187	TESTCASE(73,TestGurmukhiDevanagari);
	188	TESTCASE(74,TestPatternWhiteSpace);
	189	TESTCASE(75,TestAllCodepoints);
	190	TESTCASE(76,TestBoilerplate);
	191	TESTCASE(77,TestAlternateSyntax);
	192	TESTCASE(78,TestBeginEnd);
	193	TESTCASE(79,TestBeginEndToRules);
	194	TESTCASE(80,TestRegisterAlias);
	195	TESTCASE(81,TestRuleStripping);
	196	TESTCASE(82,TestHalfwidthFullwidth);
	197	TESTCASE(83,TestThai);
	198	TESTCASE(84,TestAny);
	199	TESTCASE(85,TestHansHant);
	200	default: name = ""; break;
	201	}
	202	}
	203
	204	/**
	205	* Make sure every system transliterator can be instantiated.
	206	*
	207	* ALSO test that the result of toRules() for each rule is a valid
	208	* rule. Do this here so we don't have to have another test that
	209	* instantiates everything as well.
	210	*/
	211	void TransliteratorTest::TestInstantiation() {
	212	UErrorCode ec = U_ZERO_ERROR;
	213	StringEnumeration* avail = Transliterator::getAvailableIDs(ec);
	214	assertSuccess("getAvailableIDs()", ec);
	215	assertTrue("getAvailableIDs()!=NULL", avail!=NULL);
	216	int32_t n = Transliterator::countAvailableIDs();
	217	assertTrue("getAvailableIDs().count()==countAvailableIDs()",
	218	avail->count(ec) == n);
	219	assertSuccess("count()", ec);
	220	UnicodeString name;
	221	for (int32_t i=0; i<n; ++i) {
	222	const UnicodeString& id = *avail->snext(ec);
	223	if (!assertSuccess("snext()", ec) \|\|
	224	!assertTrue("snext()!=NULL", (&id)!=NULL, TRUE)) {
	225	break;
	226	}
	227	UnicodeString id2 = Transliterator::getAvailableID(i);
	228	if (id.length() < 1) {
	229	errln(UnicodeString("FAIL: getAvailableID(") +
	230	i + ") returned empty string");
	231	continue;
	232	}
	233	if (id != id2) {
	234	errln(UnicodeString("FAIL: getAvailableID(") +
	235	i + ") != getAvailableIDs().snext()");
	236	continue;
	237	}
	238	UParseError parseError;
	239	UErrorCode status = U_ZERO_ERROR;
	240	Transliterator* t = Transliterator::createInstance(id,
	241	UTRANS_FORWARD, parseError,status);
	242	name.truncate(0);
	243	Transliterator::getDisplayName(id, name);
	244	if (t == 0) {
	245	#if UCONFIG_NO_BREAK_ITERATION
	246	// If UCONFIG_NO_BREAK_ITERATION is on, then only Thai should fail.
	247	if (id.compare((UnicodeString)"Thai-Latn") != 0 &&
	248	id.compare((UnicodeString)"Thai-Latin") != 0)
	249	#endif
	250	dataerrln(UnicodeString("FAIL: Couldn't create ") + id +
	251	/", parse error " + parseError.code +/
	252	", line " + parseError.line +
	253	", offset " + parseError.offset +
	254	", pre-context " + prettify(parseError.preContext, TRUE) +
	255	", post-context " +prettify(parseError.postContext,TRUE) +
	256	", Error: " + u_errorName(status));
	257	// When createInstance fails, it deletes the failing
	258	// entry from the available ID list. We detect this
	259	// here by looking for a change in countAvailableIDs.
	260	int32_t nn = Transliterator::countAvailableIDs();
	261	if (nn == (n - 1)) {
	262	n = nn;
	263	--i; // Compensate for deleted entry
	264	}
	265	} else {
	266	logln(UnicodeString("OK: ") + name + " (" + id + ")");
	267
	268	// Now test toRules
	269	UnicodeString rules;
	270	t->toRules(rules, TRUE);
	271	Transliterator *u = Transliterator::createFromRules("x",
	272	rules, UTRANS_FORWARD, parseError,status);
	273	if (u == 0) {
	274	errln(UnicodeString("FAIL: ") + id +
	275	".createFromRules() => bad rules" +
	276	/", parse error " + parseError.code +/
	277	", line " + parseError.line +
	278	", offset " + parseError.offset +
	279	", context " + prettify(parseError.preContext, TRUE) +
	280	", rules: " + prettify(rules, TRUE));
	281	} else {
	282	delete u;
	283	}
	284	delete t;
	285	}
	286	}
	287	assertTrue("snext()==NULL", avail->snext(ec)==NULL);
	288	assertSuccess("snext()", ec);
	289	delete avail;
	290
	291	// Now test the failure path
	292	UParseError parseError;
	293	UErrorCode status = U_ZERO_ERROR;
	294	UnicodeString id("<Not a valid Transliterator ID>");
	295	Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
	296	if (t != 0) {
	297	errln("FAIL: " + id + " returned a transliterator");
	298	delete t;
	299	} else {
	300	logln("OK: Bogus ID handled properly");
	301	}
	302	}
	303
	304	void TransliteratorTest::TestSimpleRules(void) {
	305	/* Example: rules 1. ab>x\|y
	306	* 2. yc>z
	307	*
	308	* []\|eabcd start - no match, copy e to tranlated buffer
	309	* [e]\|abcd match rule 1 - copy output & adjust cursor
	310	* [ex\|y]cd match rule 2 - copy output & adjust cursor
	311	* [exz]\|d no match, copy d to transliterated buffer
	312	* [exzd]\| done
	313	*/
	314	expect(UnicodeString("ab>x\|y;", "") +
	315	"yc>z",
	316	"eabcd", "exzd");
	317
	318	/* Another set of rules:
	319	* 1. ab>x\|yzacw
	320	* 2. za>q
	321	* 3. qc>r
	322	* 4. cw>n
	323	*
	324	* []\|ab Rule 1
	325	* [x\|yzacw] No match
	326	* [xy\|zacw] Rule 2
	327	* [xyq\|cw] Rule 4
	328	* [xyqn]\| Done
	329	*/
	330	expect(UnicodeString("ab>x\|yzacw;") +
	331	"za>q;" +
	332	"qc>r;" +
	333	"cw>n",
	334	"ab", "xyqn");
	335
	336	/* Test categories
	337	*/
	338	UErrorCode status = U_ZERO_ERROR;
	339	UParseError parseError;
	340	Transliterator *t = Transliterator::createFromRules(
	341	"<ID>",
	342	UnicodeString("$dummy=").append((UChar)0xE100) +
	343	UnicodeString(";"
	344	"$vowel=[aeiouAEIOU];"
	345	"$lu=[:Lu:];"
	346	"$vowel } $lu > '!';"
	347	"$vowel > '&';"
	348	"'!' { $lu > '^';"
	349	"$lu > '*';"
	350	"a > ERROR", ""),
	351	UTRANS_FORWARD, parseError,
	352	status);
	353	if (U_FAILURE(status)) {
	354	dataerrln("FAIL: RBT constructor failed - %s", u_errorName(status));
	355	return;
	356	}
	357	expect(t, "abcdefgABCDEFGU", "&bcd&fg!^!^&");
	358	delete t;
	359	}
	360
	361	/**
	362	* Test inline set syntax and set variable syntax.
	363	*/
	364	void TransliteratorTest::TestInlineSet(void) {
	365	expect("{ [:Ll:] } x > y; [:Ll:] > z;", "aAbxq", "zAyzz");
	366	expect("a[0-9]b > qrs", "1a7b9", "1qrs9");
	367
	368	expect(UnicodeString(
	369	"$digit = [0-9];"
	370	"$alpha = [a-zA-Z];"
	371	"$alphanumeric = [$digit $alpha];" // ***
	372	"$special = [^$alphanumeric];" // ***
	373	"$alphanumeric > '-';"
	374	"$special > '*';", ""),
	375
	376	"thx-1138", "---*----");
	377	}
	378
	379	/**
	380	* Create some inverses and confirm that they work. We have to be
	381	* careful how we do this, since the inverses will not be true
	382	* inverses -- we can't throw any random string at the composition
	383	* of the transliterators and expect the identity function. F x
	384	* F' != I. However, if we are careful about the input, we will
	385	* get the expected results.
	386	*/
	387	void TransliteratorTest::TestRuleBasedInverse(void) {
	388	UnicodeString RULES =
	389	UnicodeString("abc>zyx;") +
	390	"ab>yz;" +
	391	"bc>zx;" +
	392	"ca>xy;" +
	393	"a>x;" +
	394	"b>y;" +
	395	"c>z;" +
	396
	397	"abc<zyx;" +
	398	"ab<yz;" +
	399	"bc<zx;" +
	400	"ca<xy;" +
	401	"a<x;" +
	402	"b<y;" +
	403	"c<z;" +
	404
	405	"";
	406
	407	const char* DATA[] = {
	408	// Careful here -- random strings will not work. If we keep
	409	// the left side to the domain and the right side to the range
	410	// we will be okay though (left, abc; right xyz).
	411	"a", "x",
	412	"abcacab", "zyxxxyy",
	413	"caccb", "xyzzy",
	414	};
	415
	416	int32_t DATA_length = UPRV_LENGTHOF(DATA);
	417
	418	UErrorCode status = U_ZERO_ERROR;
	419	UParseError parseError;
	420	Transliterator *fwd = Transliterator::createFromRules("<ID>", RULES,
	421	UTRANS_FORWARD, parseError, status);
	422	Transliterator *rev = Transliterator::createFromRules("<ID>", RULES,
	423	UTRANS_REVERSE, parseError, status);
	424	if (U_FAILURE(status)) {
	425	errln("FAIL: RBT constructor failed");
	426	return;
	427	}
	428	for (int32_t i=0; i<DATA_length; i+=2) {
	429	expect(*fwd, DATA[i], DATA[i+1]);
	430	expect(*rev, DATA[i+1], DATA[i]);
	431	}
	432	delete fwd;
	433	delete rev;
	434	}
	435
	436	/**
	437	* Basic test of keyboard.
	438	*/
	439	void TransliteratorTest::TestKeyboard(void) {
	440	UParseError parseError;
	441	UErrorCode status = U_ZERO_ERROR;
	442	Transliterator *t = Transliterator::createFromRules("<ID>",
	443	UnicodeString("psch>Y;")
	444	+"ps>y;"
	445	+"ch>x;"
	446	+"a>A;",
	447	UTRANS_FORWARD, parseError,
	448	status);
	449	if (U_FAILURE(status)) {
	450	errln("FAIL: RBT constructor failed");
	451	return;
	452	}
	453	const char* DATA[] = {
	454	// insertion, buffer
	455	"a", "A",
	456	"p", "Ap",
	457	"s", "Aps",
	458	"c", "Apsc",
	459	"a", "AycA",
	460	"psch", "AycAY",
	461	0, "AycAY", // null means finishKeyboardTransliteration
	462	};
	463
	464	keyboardAux(*t, DATA, UPRV_LENGTHOF(DATA));
	465	delete t;
	466	}
	467
	468	/**
	469	* Basic test of keyboard with cursor.
	470	*/
	471	void TransliteratorTest::TestKeyboard2(void) {
	472	UParseError parseError;
	473	UErrorCode status = U_ZERO_ERROR;
	474	Transliterator *t = Transliterator::createFromRules("<ID>",
	475	UnicodeString("ych>Y;")
	476	+"ps>\|y;"
	477	+"ch>x;"
	478	+"a>A;",
	479	UTRANS_FORWARD, parseError,
	480	status);
	481	if (U_FAILURE(status)) {
	482	errln("FAIL: RBT constructor failed");
	483	return;
	484	}
	485	const char* DATA[] = {
	486	// insertion, buffer
	487	"a", "A",
	488	"p", "Ap",
	489	"s", "Aps", // modified for rollback - "Ay",
	490	"c", "Apsc", // modified for rollback - "Ayc",
	491	"a", "AycA",
	492	"p", "AycAp",
	493	"s", "AycAps", // modified for rollback - "AycAy",
	494	"c", "AycApsc", // modified for rollback - "AycAyc",
	495	"h", "AycAY",
	496	0, "AycAY", // null means finishKeyboardTransliteration
	497	};
	498
	499	keyboardAux(*t, DATA, UPRV_LENGTHOF(DATA));
	500	delete t;
	501	}
	502
	503	/**
	504	* Test keyboard transliteration with back-replacement.
	505	*/
	506	void TransliteratorTest::TestKeyboard3(void) {
	507	// We want th>z but t>y. Furthermore, during keyboard
	508	// transliteration we want t>y then yh>z if t, then h are
	509	// typed.
	510	UnicodeString RULES("t>\|y;"
	511	"yh>z;");
	512
	513	const char* DATA[] = {
	514	// Column 1: characters to add to buffer (as if typed)
	515	// Column 2: expected appearance of buffer after
	516	// keyboard xliteration.
	517	"a", "a",
	518	"b", "ab",
	519	"t", "abt", // modified for rollback - "aby",
	520	"c", "abyc",
	521	"t", "abyct", // modified for rollback - "abycy",
	522	"h", "abycz",
	523	0, "abycz", // null means finishKeyboardTransliteration
	524	};
	525
	526	UParseError parseError;
	527	UErrorCode status = U_ZERO_ERROR;
	528	Transliterator *t = Transliterator::createFromRules("<ID>", RULES, UTRANS_FORWARD, parseError, status);
	529	if (U_FAILURE(status)) {
	530	errln("FAIL: RBT constructor failed");
	531	return;
	532	}
	533	keyboardAux(*t, DATA, UPRV_LENGTHOF(DATA));
	534	delete t;
	535	}
	536
	537	void TransliteratorTest::keyboardAux(const Transliterator& t,
	538	const char* DATA[], int32_t DATA_length) {
	539	UErrorCode status = U_ZERO_ERROR;
	540	UTransPosition index={0, 0, 0, 0};
	541	UnicodeString s;
	542	for (int32_t i=0; i<DATA_length; i+=2) {
	543	UnicodeString log;
	544	if (DATA[i] != 0) {
	545	log = s + " + "
	546	+ DATA[i]
	547	+ " -> ";
	548	t.transliterate(s, index, DATA[i], status);
	549	} else {
	550	log = s + " => ";
	551	t.finishTransliteration(s, index);
	552	}
	553	// Show the start index '{' and the cursor '\|'
	554	UnicodeString a, b, c;
	555	s.extractBetween(0, index.contextStart, a);
	556	s.extractBetween(index.contextStart, index.start, b);
	557	s.extractBetween(index.start, s.length(), c);
	558	log.append(a).
	559	append((UChar)LEFT_BRACE).
	560	append(b).
	561	append((UChar)PIPE).
	562	append(c);
	563	if (s == DATA[i+1] && U_SUCCESS(status)) {
	564	logln(log);
	565	} else {
	566	errln(UnicodeString("FAIL: ") + log + ", expected " + DATA[i+1]);
	567	}
	568	}
	569	}
	570
	571	void TransliteratorTest::TestArabic(void) {
	572	// Test disabled for 2.0 until new Arabic transliterator can be written.
	573	// /*
	574	// const char* DATA[] = {
	575	// "Arabic", "\u062a\u062a\u0645\u062a\u0639\u0020"+
	576	// "\u0627\u0644\u0644\u063a\u0629\u0020"+
	577	// "\u0627\u0644\u0639\u0631\u0628\u0628\u064a\u0629\u0020"+
	578	// "\u0628\u0628\u0646\u0638\u0645\u0020"+
	579	// "\u0643\u062a\u0627\u0628\u0628\u064a\u0629\u0020"+
	580	// "\u062c\u0645\u064a\u0644\u0629",
	581	// };
	582	// */
	583	//
	584	// UChar ar_raw[] = {
	585	// 0x062a, 0x062a, 0x0645, 0x062a, 0x0639, 0x0020, 0x0627,
	586	// 0x0644, 0x0644, 0x063a, 0x0629, 0x0020, 0x0627, 0x0644,
	587	// 0x0639, 0x0631, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
	588	// 0x0628, 0x0628, 0x0646, 0x0638, 0x0645, 0x0020, 0x0643,
	589	// 0x062a, 0x0627, 0x0628, 0x0628, 0x064a, 0x0629, 0x0020,
	590	// 0x062c, 0x0645, 0x064a, 0x0644, 0x0629, 0
	591	// };
	592	// UnicodeString ar(ar_raw);
	593	// UErrorCode status=U_ZERO_ERROR;
	594	// UParseError parseError;
	595	// Transliterator *t = Transliterator::createInstance("Latin-Arabic", UTRANS_FORWARD, parseError, status);
	596	// if (t == 0) {
	597	// errln("FAIL: createInstance failed");
	598	// return;
	599	// }
	600	// expect(*t, "Arabic", ar);
	601	// delete t;
	602	}
	603
	604	/**
	605	* Compose the Kana transliterator forward and reverse and try
	606	* some strings that should come out unchanged.
	607	*/
	608	void TransliteratorTest::TestCompoundKana(void) {
	609	UParseError parseError;
	610	UErrorCode status = U_ZERO_ERROR;
	611	Transliterator* t = Transliterator::createInstance("Latin-Hiragana;Hiragana-Latin", UTRANS_FORWARD, parseError, status);
	612	if (t == 0) {
	613	dataerrln("FAIL: construction of Latin-Hiragana;Hiragana-Latin failed - %s", u_errorName(status));
	614	} else {
	615	expect(*t, "aaaaa", "aaaaa");
	616	delete t;
	617	}
	618	}
	619
	620	/**
	621	* Compose the hex transliterators forward and reverse.
	622	*/
	623	void TransliteratorTest::TestCompoundHex(void) {
	624	UParseError parseError;
	625	UErrorCode status = U_ZERO_ERROR;
	626	Transliterator* a = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
	627	Transliterator* b = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, parseError, status);
	628	Transliterator* transab[] = { a, b };
	629	Transliterator* transba[] = { b, a };
	630	if (a == 0 \|\| b == 0) {
	631	errln("FAIL: construction failed");
	632	delete a;
	633	delete b;
	634	return;
	635	}
	636	// Do some basic tests of a
	637	expect(*a, "01", UnicodeString("\\u0030\\u0031", ""));
	638	// Do some basic tests of b
	639	expect(*b, UnicodeString("\\u0030\\u0031", ""), "01");
	640
	641	Transliterator* ab = new CompoundTransliterator(transab, 2);
	642	UnicodeString s("abcde", "");
	643	expect(*ab, s, s);
	644
	645	UnicodeString str(s);
	646	a->transliterate(str);
	647	Transliterator* ba = new CompoundTransliterator(transba, 2);
	648	expect(*ba, str, str);
	649
	650	delete ab;
	651	delete ba;
	652	delete a;
	653	delete b;
	654	}
	655
	656	int gTestFilterClassID = 0;
	657	/**
	658	* Used by TestFiltering().
	659	*/
	660	class TestFilter : public UnicodeFilter {
	661	virtual TestFilter* clone() const {
	662	return new TestFilter(*this);
	663	}
	664	virtual UBool contains(UChar32 c) const {
	665	return c != (UChar)0x0063 /c/;
	666	}
	667	// Stubs
	668	virtual UnicodeString& toPattern(UnicodeString& result,
	669	UBool /escapeUnprintable/) const {
	670	return result;
	671	}
	672	virtual UBool matchesIndexValue(uint8_t /v/) const {
	673	return FALSE;
	674	}
	675	virtual void addMatchSetTo(UnicodeSet& /toUnionTo/) const {}
	676	public:
	677	UClassID getDynamicClassID() const { return (UClassID)&gTestFilterClassID; }
	678	};
	679
	680	/**
	681	* Do some basic tests of filtering.
	682	*/
	683	void TransliteratorTest::TestFiltering(void) {
	684	UParseError parseError;
	685	UErrorCode status = U_ZERO_ERROR;
	686	Transliterator* hex = Transliterator::createInstance("Any-Hex", UTRANS_FORWARD, parseError, status);
	687	if (hex == 0) {
	688	errln("FAIL: createInstance(Any-Hex) failed");
	689	return;
	690	}
	691	hex->adoptFilter(new TestFilter());
	692	UnicodeString s("abcde");
	693	hex->transliterate(s);
	694	UnicodeString exp("\\u0061\\u0062c\\u0064\\u0065", "");
	695	if (s == exp) {
	696	logln(UnicodeString("Ok: \"") + exp + "\"");
	697	} else {
	698	logln(UnicodeString("FAIL: \"") + s + "\", wanted \"" + exp + "\"");
	699	}
	700
	701	// ICU4C ONLY. Do not find Transliterator.orphanFilter() in ICU4J.
	702	UnicodeFilter *f = hex->orphanFilter();
	703	if (f == NULL){
	704	errln("FAIL: orphanFilter() should get a UnicodeFilter");
	705	} else {
	706	delete f;
	707	}
	708	delete hex;
	709	}
	710
	711	/**
	712	* Test anchors
	713	*/
	714	void TransliteratorTest::TestAnchors(void) {
	715	expect(UnicodeString("^a > 0; a$ > 2 ; a > 1;", ""),
	716	"aaa",
	717	"012");
	718	expect(UnicodeString("$s=[z$]; $s{a>0; a}$s>2; a>1;", ""),
	719	"aaa",
	720	"012");
	721	expect(UnicodeString("^ab > 01 ;"
	722	" ab > \|8 ;"
	723	" b > k ;"
	724	" 8x$ > 45 ;"
	725	" 8x > 77 ;", ""),
	726
	727	"ababbabxabx",
	728	"018k7745");
	729	expect(UnicodeString("$s = [z$] ;"
	730	"$s{ab > 01 ;"
	731	" ab > \|8 ;"
	732	" b > k ;"
	733	" 8x}$s > 45 ;"
	734	" 8x > 77 ;", ""),
	735
	736	"abzababbabxzabxabx",
	737	"01z018k45z01x45");
	738	}
	739
	740	/**
	741	* Test pattern quoting and escape mechanisms.
	742	*/
	743	void TransliteratorTest::TestPatternQuoting(void) {
	744	// Array of 3n items
	745	// Each item is <rules>, <input>, <expected output>
	746	const UnicodeString DATA[] = {
	747	UnicodeString(UChar(0x4E01)) + ">'[male adult]'",
	748	UnicodeString(UChar(0x4E01)),
	749	"[male adult]"
	750	};
	751
	752	for (int32_t i=0; i<3; i+=3) {
	753	logln(UnicodeString("Pattern: ") + prettify(DATA[i]));
	754	UParseError parseError;
	755	UErrorCode status = U_ZERO_ERROR;
	756	Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
	757	if (U_FAILURE(status)) {
	758	errln("RBT constructor failed");
	759	} else {
	760	expect(*t, DATA[i+1], DATA[i+2]);
	761	}
	762	delete t;
	763	}
	764	}
	765
	766	/**
	767	* Regression test for bugs found in Greek transliteration.
	768	*/
	769	void TransliteratorTest::TestJ277(void) {
	770	UErrorCode status = U_ZERO_ERROR;
	771	UParseError parseError;
	772	Transliterator *gl = Transliterator::createInstance("Greek-Latin; NFD; [:M:]Remove; NFC", UTRANS_FORWARD, parseError, status);
	773	if (gl == NULL) {
	774	dataerrln("FAIL: createInstance(Greek-Latin) returned NULL - %s", u_errorName(status));
	775	return;
	776	}
	777
	778	UChar sigma = 0x3C3;
	779	UChar upsilon = 0x3C5;
	780	UChar nu = 0x3BD;
	781	// UChar PHI = 0x3A6;
	782	UChar alpha = 0x3B1;
	783	// UChar omega = 0x3C9;
	784	// UChar omicron = 0x3BF;
	785	// UChar epsilon = 0x3B5;
	786
	787	// sigma upsilon nu -> syn
	788	UnicodeString syn;
	789	syn.append(sigma).append(upsilon).append(nu);
	790	expect(*gl, syn, "syn");
	791
	792	// sigma alpha upsilon nu -> saun
	793	UnicodeString sayn;
	794	sayn.append(sigma).append(alpha).append(upsilon).append(nu);
	795	expect(*gl, sayn, "saun");
	796
	797	// Again, using a smaller rule set
	798	UnicodeString rules(
	799	"$alpha = \\u03B1;"
	800	"$nu = \\u03BD;"
	801	"$sigma = \\u03C3;"
	802	"$ypsilon = \\u03C5;"
	803	"$vowel = [aeiouAEIOU$alpha$ypsilon];"
	804	"s <> $sigma;"
	805	"a <> $alpha;"
	806	"u <> $vowel { $ypsilon;"
	807	"y <> $ypsilon;"
	808	"n <> $nu;",
	809	"");
	810	Transliterator *mini = Transliterator::createFromRules("mini", rules, UTRANS_REVERSE, parseError, status);
	811	if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
	812	expect(*mini, syn, "syn");
	813	expect(*mini, sayn, "saun");
	814	delete mini;
	815	mini = NULL;
	816
	817	#if !UCONFIG_NO_FORMATTING
	818	// Transliterate the Greek locale data
	819	Locale el("el");
	820	DateFormatSymbols syms(el, status);
	821	if (U_FAILURE(status)) { errln("FAIL: Transliterator constructor failed"); return; }
	822	int32_t i, count;
	823	const UnicodeString* data = syms.getMonths(count);
	824	for (i=0; i<count; ++i) {
	825	if (data[i].length() == 0) {
	826	continue;
	827	}
	828	UnicodeString out(data[i]);
	829	gl->transliterate(out);
	830	UBool ok = TRUE;
	831	if (data[i].length() >= 2 && out.length() >= 2 &&
	832	u_isupper(data[i].charAt(0)) && u_islower(data[i].charAt(1))) {
	833	if (!(u_isupper(out.charAt(0)) && u_islower(out.charAt(1)))) {
	834	ok = FALSE;
	835	}
	836	}
	837	if (ok) {
	838	logln(prettify(data[i] + " -> " + out));
	839	} else {
	840	errln(UnicodeString("FAIL: ") + prettify(data[i] + " -> " + out));
	841	}
	842	}
	843	#endif
	844
	845	delete gl;
	846	}
	847
	848	/**
	849	* Prefix, suffix support in hex transliterators
	850	*/
	851	void TransliteratorTest::TestJ243(void) {
	852	UErrorCode ec = U_ZERO_ERROR;
	853
	854	// Test default Hex-Any, which should handle
	855	// \u, \U, u+, and U+
	856	Transliterator *hex =
	857	Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, ec);
	858	if (assertSuccess("getInstance", ec)) {
	859	expect(*hex, UnicodeString("\\u0041+\\U00000042,U+0043uU+0044z", ""), "A+B,CuDz");
	860	}
	861	delete hex;
	862
	863	// // Try a custom Hex-Unicode
	864	// // \uXXXX and &#xXXXX;
	865	// ec = U_ZERO_ERROR;
	866	// HexToUnicodeTransliterator hex2(UnicodeString("\\\\u###0;&\\#x###0\\;", ""), ec);
	867	// expect(hex2, UnicodeString("\\u61\\u062\\u0063\\u00645\\u66x0123", ""),
	868	// "abcd5fx0123");
	869	// // Try custom Any-Hex (default is tested elsewhere)
	870	// ec = U_ZERO_ERROR;
	871	// UnicodeToHexTransliterator hex3(UnicodeString("&\\#x###0;", ""), ec);
	872	// expect(hex3, "012", "012");
	873	}
	874
	875	/**
	876	* Parsers need better syntax error messages.
	877	*/
	878	void TransliteratorTest::TestJ329(void) {
	879
	880	struct { UBool containsErrors; const char* rule; } DATA[] = {
	881	{ FALSE, "a > b; c > d" },
	882	{ TRUE, "a > b; no operator; c > d" },
	883	};
	884	int32_t DATA_length = UPRV_LENGTHOF(DATA);
	885
	886	for (int32_t i=0; i<DATA_length; ++i) {
	887	UErrorCode status = U_ZERO_ERROR;
	888	UParseError parseError;
	889	Transliterator *rbt = Transliterator::createFromRules("<ID>",
	890	DATA[i].rule,
	891	UTRANS_FORWARD,
	892	parseError,
	893	status);
	894	UBool gotError = U_FAILURE(status);
	895	UnicodeString desc(DATA[i].rule);
	896	desc.append(gotError ? " -> error" : " -> no error");
	897	if (gotError) {
	898	desc = desc + ", ParseError code=" + u_errorName(status) +
	899	" line=" + parseError.line +
	900	" offset=" + parseError.offset +
	901	" context=" + parseError.preContext;
	902	}
	903	if (gotError == DATA[i].containsErrors) {
	904	logln(UnicodeString("Ok: ") + desc);
	905	} else {
	906	errln(UnicodeString("FAIL: ") + desc);
	907	}
	908	delete rbt;
	909	}
	910	}
	911
	912	/**
	913	* Test segments and segment references.
	914	*/
	915	void TransliteratorTest::TestSegments(void) {
	916	// Array of 3n items
	917	// Each item is <rules>, <input>, <expected output>
	918	UnicodeString DATA[] = {
	919	"([a-z]) '.' ([0-9]) > $2 '-' $1",
	920	"abc.123.xyz.456",
	921	"ab1-c23.xy4-z56",
	922
	923	// nested
	924	"(([a-z])([0-9])) > $1 '.' $2 '.' $3;",
	925	"a1 b2",
	926	"a1.a.1 b2.b.2",
	927	};
	928	int32_t DATA_length = UPRV_LENGTHOF(DATA);
	929
	930	for (int32_t i=0; i<DATA_length; i+=3) {
	931	logln("Pattern: " + prettify(DATA[i]));
	932	UParseError parseError;
	933	UErrorCode status = U_ZERO_ERROR;
	934	Transliterator *t = Transliterator::createFromRules("ID", DATA[i], UTRANS_FORWARD, parseError, status);
	935	if (U_FAILURE(status)) {
	936	errln("FAIL: RBT constructor");
	937	} else {
	938	expect(*t, DATA[i+1], DATA[i+2]);
	939	}
	940	delete t;
	941	}
	942	}
	943
	944	/**
	945	* Test cursor positioning outside of the key
	946	*/
	947	void TransliteratorTest::TestCursorOffset(void) {
	948	// Array of 3n items
	949	// Each item is <rules>, <input>, <expected output>
	950	UnicodeString DATA[] = {
	951	"pre {alpha} post > \| @ ALPHA ;"
	952	"eALPHA > beta ;"
	953	"pre {beta} post > BETA @@ \| ;"
	954	"post > xyz",
	955
	956	"prealphapost prebetapost",
	957
	958	"prbetaxyz preBETApost",
	959	};
	960	int32_t DATA_length = UPRV_LENGTHOF(DATA);
	961
	962	for (int32_t i=0; i<DATA_length; i+=3) {
	963	logln("Pattern: " + prettify(DATA[i]));
	964	UParseError parseError;
	965	UErrorCode status = U_ZERO_ERROR;
	966	Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
	967	if (U_FAILURE(status)) {
	968	errln("FAIL: RBT constructor");
	969	} else {
	970	expect(*t, DATA[i+1], DATA[i+2]);
	971	}
	972	delete t;
	973	}
	974	}
	975
	976	/**
	977	* Test zero length and > 1 char length variable values. Test
	978	* use of variable refs in UnicodeSets.
	979	*/
	980	void TransliteratorTest::TestArbitraryVariableValues(void) {
	981	// Array of 3n items
	982	// Each item is <rules>, <input>, <expected output>
	983	UnicodeString DATA[] = {
	984	"$abe = ab;"
	985	"$pat = x[yY]z;"
	986	"$ll = 'a-z';"
	987	"$llZ = [$ll];"
	988	"$llY = [$ll$pat];"
	989	"$emp = ;"
	990
	991	"$abe > ABE;"
	992	"$pat > END;"
	993	"$llZ > 1;"
	994	"$llY > 2;"
	995	"7$emp 8 > 9;"
	996	"",
	997
	998	"ab xYzxyz stY78",
	999	"ABE ENDEND 1129",
	1000	};
	1001	int32_t DATA_length = UPRV_LENGTHOF(DATA);
	1002
	1003	for (int32_t i=0; i<DATA_length; i+=3) {
	1004	logln("Pattern: " + prettify(DATA[i]));
	1005	UParseError parseError;
	1006	UErrorCode status = U_ZERO_ERROR;
	1007	Transliterator *t = Transliterator::createFromRules("<ID>", DATA[i], UTRANS_FORWARD, parseError, status);
	1008	if (U_FAILURE(status)) {
	1009	errln("FAIL: RBT constructor");
	1010	} else {
	1011	expect(*t, DATA[i+1], DATA[i+2]);
	1012	}
	1013	delete t;
	1014	}
	1015	}
	1016
	1017	/**
	1018	* Confirm that the contextStart, contextLimit, start, and limit
	1019	* behave correctly. J474.
	1020	*/
	1021	void TransliteratorTest::TestPositionHandling(void) {
	1022	// Array of 3n items
	1023	// Each item is <rules>, <input>, <expected output>
	1024	const char* DATA[] = {
	1025	"a{t} > SS ; {t}b > UU ; {t} > TT ;",
	1026	"xtat txtb", // pos 0,9,0,9
	1027	"xTTaSS TTxUUb",
	1028
	1029	"a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
	1030	"xtat txtb", // pos 2,9,3,8
	1031	"xtaSS TTxUUb",
	1032
	1033	"a{t} > SS ; {t}b > UU ; {t} > TT ; a > A ; b > B ;",
	1034	"xtat txtb", // pos 3,8,3,8
	1035	"xtaTT TTxTTb",
	1036	};
	1037
	1038	// Array of 4n positions -- these go with the DATA array
	1039	// They are: contextStart, contextLimit, start, limit
	1040	int32_t POS[] = {
	1041	0, 9, 0, 9,
	1042	2, 9, 3, 8,
	1043	3, 8, 3, 8,
	1044	};
	1045
	1046	int32_t n = UPRV_LENGTHOF(DATA) / 3;
	1047	for (int32_t i=0; i<n; i++) {
	1048	UErrorCode status = U_ZERO_ERROR;
	1049	UParseError parseError;
	1050	Transliterator *t = Transliterator::createFromRules("<ID>",
	1051	DATA[3*i], UTRANS_FORWARD, parseError, status);
	1052	if (U_FAILURE(status)) {
	1053	delete t;
	1054	errln("FAIL: RBT constructor");
	1055	return;
	1056	}
	1057	UTransPosition pos;
	1058	pos.contextStart= POS[4*i];
	1059	pos.contextLimit = POS[4*i+1];
	1060	pos.start = POS[4*i+2];
	1061	pos.limit = POS[4*i+3];
	1062	UnicodeString rsource(DATA[3*i+1]);
	1063	t->transliterate(rsource, pos, status);
	1064	if (U_FAILURE(status)) {
	1065	delete t;
	1066	errln("FAIL: transliterate");
	1067	return;
	1068	}
	1069	t->finishTransliteration(rsource, pos);
	1070	expectAux(DATA[3*i],
	1071	DATA[3*i+1],
	1072	rsource,
	1073	DATA[3*i+2]);
	1074	delete t;
	1075	}
	1076	}
	1077
	1078	/**
	1079	* Test the Hiragana-Katakana transliterator.
	1080	*/
	1081	void TransliteratorTest::TestHiraganaKatakana(void) {
	1082	UParseError parseError;
	1083	UErrorCode status = U_ZERO_ERROR;
	1084	Transliterator* hk = Transliterator::createInstance("Hiragana-Katakana", UTRANS_FORWARD, parseError, status);
	1085	Transliterator* kh = Transliterator::createInstance("Katakana-Hiragana", UTRANS_FORWARD, parseError, status);
	1086	if (hk == 0 \|\| kh == 0) {
	1087	dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
	1088	delete hk;
	1089	delete kh;
	1090	return;
	1091	}
	1092
	1093	// Array of 3n items
	1094	// Each item is "hk"\|"kh"\|"both", <Hiragana>, <Katakana>
	1095	const char* DATA[] = {
	1096	"both",
	1097	"\\u3042\\u3090\\u3099\\u3092\\u3050",
	1098	"\\u30A2\\u30F8\\u30F2\\u30B0",
	1099
	1100	"kh",
	1101	"\\u307C\\u3051\\u3060\\u3042\\u3093\\u30FC",
	1102	"\\u30DC\\u30F6\\u30C0\\u30FC\\u30F3\\u30FC",
	1103	};
	1104	int32_t DATA_length = UPRV_LENGTHOF(DATA);
	1105
	1106	for (int32_t i=0; i<DATA_length; i+=3) {
	1107	UnicodeString h = CharsToUnicodeString(DATA[i+1]);
	1108	UnicodeString k = CharsToUnicodeString(DATA[i+2]);
	1109	switch (*DATA[i]) {
	1110	case 0x68: //'h': // Hiragana-Katakana
	1111	expect(*hk, h, k);
	1112	break;
	1113	case 0x6B: //'k': // Katakana-Hiragana
	1114	expect(*kh, k, h);
	1115	break;
	1116	case 0x62: //'b': // both
	1117	expect(*hk, h, k);
	1118	expect(*kh, k, h);
	1119	break;
	1120	}
	1121	}
	1122	delete hk;
	1123	delete kh;
	1124	}
	1125
	1126	/**
	1127	* Test cloning / copy constructor of RBT.
	1128	*/
	1129	void TransliteratorTest::TestCopyJ476(void) {
	1130	// The real test here is what happens when the destructors are
	1131	// called. So we let one object get destructed, and check to
	1132	// see that its copy still works.
	1133	Transliterator *t2 = 0;
	1134	{
	1135	UParseError parseError;
	1136	UErrorCode status = U_ZERO_ERROR;
	1137	Transliterator *t1 = Transliterator::createFromRules("t1",
	1138	"a>A;b>B;'foo'+>'bar'", UTRANS_FORWARD, parseError, status);
	1139	if (U_FAILURE(status)) {
	1140	errln("FAIL: RBT constructor");
	1141	return;
	1142	}
	1143	t2 = t1->clone(); // Call copy constructor under the covers.
	1144	expect(*t1, "abcfoofoo", "ABcbar");
	1145	delete t1;
	1146	}
	1147	expect(*t2, "abcfoofoo", "ABcbar");
	1148	delete t2;
	1149	}
	1150
	1151	/**
	1152	* Test inter-Indic transliterators. These are composed.
	1153	* ICU4C Jitterbug 483.
	1154	*/
	1155	void TransliteratorTest::TestInterIndic(void) {
	1156	UnicodeString ID("Devanagari-Gujarati", "");
	1157	UErrorCode status = U_ZERO_ERROR;
	1158	UParseError parseError;
	1159	Transliterator* dg = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
	1160	if (dg == 0) {
	1161	dataerrln("FAIL: createInstance(" + ID + ") returned NULL - " + u_errorName(status));
	1162	return;
	1163	}
	1164	UnicodeString id = dg->getID();
	1165	if (id != ID) {
	1166	errln("FAIL: createInstance(" + ID + ")->getID() => " + id);
	1167	}
	1168	UnicodeString dev = CharsToUnicodeString("\\u0901\\u090B\\u0925");
	1169	UnicodeString guj = CharsToUnicodeString("\\u0A81\\u0A8B\\u0AA5");
	1170	expect(*dg, dev, guj);
	1171	delete dg;
	1172	}
	1173
	1174	/**
	1175	* Test filter syntax in IDs. (J918)
	1176	*/
	1177	void TransliteratorTest::TestFilterIDs(void) {
	1178	// Array of 3n strings:
	1179	// <id>, <inverse id>, <input>, <expected output>
	1180	const char* DATA[] = {
	1181	"[aeiou]Any-Hex", // ID
	1182	"[aeiou]Hex-Any", // expected inverse ID
	1183	"quizzical", // src
	1184	"q\\u0075\\u0069zz\\u0069c\\u0061l", // expected ID.translit(src)
	1185
	1186	"[aeiou]Any-Hex;[^5]Hex-Any",
	1187	"[^5]Any-Hex;[aeiou]Hex-Any",
	1188	"quizzical",
	1189	"q\\u0075izzical",
	1190
	1191	"[abc]Null",
	1192	"[abc]Null",
	1193	"xyz",
	1194	"xyz",
	1195	};
	1196	enum { DATA_length = UPRV_LENGTHOF(DATA) };
	1197
	1198	for (int i=0; i<DATA_length; i+=4) {
	1199	UnicodeString ID(DATA[i], "");
	1200	UnicodeString uID(DATA[i+1], "");
	1201	UnicodeString data2(DATA[i+2], "");
	1202	UnicodeString data3(DATA[i+3], "");
	1203	UParseError parseError;
	1204	UErrorCode status = U_ZERO_ERROR;
	1205	Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, parseError, status);
	1206	if (t == 0) {
	1207	errln("FAIL: createInstance(" + ID + ") returned NULL");
	1208	return;
	1209	}
	1210	expect(*t, data2, data3);
	1211
	1212	// Check the ID
	1213	if (ID != t->getID()) {
	1214	errln("FAIL: createInstance(" + ID + ").getID() => " +
	1215	t->getID());
	1216	}
	1217
	1218	// Check the inverse
	1219	Transliterator *u = t->createInverse(status);
	1220	if (u == 0) {
	1221	errln("FAIL: " + ID + ".createInverse() returned NULL");
	1222	} else if (u->getID() != uID) {
	1223	errln("FAIL: " + ID + ".createInverse().getID() => " +
	1224	u->getID() + ", expected " + uID);
	1225	}
	1226
	1227	delete t;
	1228	delete u;
	1229	}
	1230	}
	1231
	1232	/**
	1233	* Test the case mapping transliterators.
	1234	*/
	1235	void TransliteratorTest::TestCaseMap(void) {
	1236	UParseError parseError;
	1237	UErrorCode status = U_ZERO_ERROR;
	1238	Transliterator* toUpper =
	1239	Transliterator::createInstance("Any-Upper[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
	1240	Transliterator* toLower =
	1241	Transliterator::createInstance("Any-Lower[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
	1242	Transliterator* toTitle =
	1243	Transliterator::createInstance("Any-Title[^xyzXYZ]", UTRANS_FORWARD, parseError, status);
	1244	if (toUpper==0 \|\| toLower==0 \|\| toTitle==0) {
	1245	errln("FAIL: createInstance returned NULL");
	1246	delete toUpper;
	1247	delete toLower;
	1248	delete toTitle;
	1249	return;
	1250	}
	1251
	1252	expect(*toUpper, "The quick brown fox jumped over the lazy dogs.",
	1253	"THE QUICK BROWN FOx JUMPED OVER THE LAzy DOGS.");
	1254	expect(*toLower, "The quIck brown fOX jUMPED OVER THE LAzY dogs.",
	1255	"the quick brown foX jumped over the lazY dogs.");
	1256	expect(*toTitle, "the quick brown foX can't jump over the laZy dogs.",
	1257	"The Quick Brown FoX Can't Jump Over The LaZy Dogs.");
	1258
	1259	delete toUpper;
	1260	delete toLower;
	1261	delete toTitle;
	1262	}
	1263
	1264	/**
	1265	* Test the name mapping transliterators.
	1266	*/
	1267	void TransliteratorTest::TestNameMap(void) {
	1268	UParseError parseError;
	1269	UErrorCode status = U_ZERO_ERROR;
	1270	Transliterator* uni2name =
	1271	Transliterator::createInstance("Any-Name[^abc]", UTRANS_FORWARD, parseError, status);
	1272	Transliterator* name2uni =
	1273	Transliterator::createInstance("Name-Any", UTRANS_FORWARD, parseError, status);
	1274	if (uni2name==0 \|\| name2uni==0) {
	1275	errln("FAIL: createInstance returned NULL");
	1276	delete uni2name;
	1277	delete name2uni;
	1278	return;
	1279	}
	1280
	1281	// Careful: CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
	1282	expect(*uni2name, CharsToUnicodeString("\\u00A0abc\\u4E01\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF"),
	1283	CharsToUnicodeString("\\\\N{NO-BREAK SPACE}abc\\\\N{CJK UNIFIED IDEOGRAPH-4E01}\\\\N{MICRO SIGN}\\\\N{GUJARATI SIGN CANDRABINDU}\\\\N{REPLACEMENT CHARACTER}\\\\N{<control-0004>}\\\\N{<control-0009>}\\\\N{<control-0081>}\\\\N{<noncharacter-FFFF>}"));
	1284	expect(*name2uni, UNICODE_STRING_SIMPLE("{\\N { NO-BREAK SPACE}abc\\N{ CJK UNIFIED IDEOGRAPH-4E01 }\\N{x\\N{MICRO SIGN}\\N{GUJARATI SIGN CANDRABINDU}\\N{REPLACEMENT CHARACTER}\\N{<control-0004>}\\N{<control-0009>}\\N{<control-0081>}\\N{<noncharacter-FFFF>}\\N{<control-0004>}\\N{"),
	1285	CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{"));
	1286
	1287	delete uni2name;
	1288	delete name2uni;
	1289
	1290	// round trip
	1291	Transliterator* t =
	1292	Transliterator::createInstance("Any-Name;Name-Any", UTRANS_FORWARD, parseError, status);
	1293	if (t==0) {
	1294	errln("FAIL: createInstance returned NULL");
	1295	delete t;
	1296	return;
	1297	}
	1298
	1299	// Careful: CharsToUS will convert "\\N" => "N"; use "\\\\N" for \N
	1300	UnicodeString s = CharsToUnicodeString("{\\u00A0abc\\u4E01\\\\N{x\\u00B5\\u0A81\\uFFFD\\u0004\\u0009\\u0081\\uFFFF\\u0004\\\\N{");
	1301	expect(*t, s, s);
	1302	delete t;
	1303	}
	1304
	1305	/**
	1306	* Test liberalized ID syntax. 1006c
	1307	*/
	1308	void TransliteratorTest::TestLiberalizedID(void) {
	1309	// Some test cases have an expected getID() value of NULL. This
	1310	// means I have disabled the test case for now. This stuff is
	1311	// still under development, and I haven't decided whether to make
	1312	// getID() return canonical case yet. It will all get rewritten
	1313	// with the move to Source-Target/Variant IDs anyway. [aliu]
	1314	const char* DATA[] = {
	1315	"latin-greek", NULL /"Latin-Greek"/, "case insensitivity",
	1316	" Null ", "Null", "whitespace",
	1317	" Latin[a-z]-Greek ", "[a-z]Latin-Greek", "inline filter",
	1318	" null ; latin-greek ", NULL /"Null;Latin-Greek"/, "compound whitespace",
	1319	};
	1320	const int32_t DATA_length = UPRV_LENGTHOF(DATA);
	1321	UParseError parseError;
	1322	UErrorCode status= U_ZERO_ERROR;
	1323	for (int32_t i=0; i<DATA_length; i+=3) {
	1324	Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, parseError, status);
	1325	if (t == 0) {
	1326	dataerrln(UnicodeString("FAIL: ") + DATA[i+2] +
	1327	" cannot create ID \"" + DATA[i] + "\" - " + u_errorName(status));
	1328	} else {
	1329	UnicodeString exp;
	1330	if (DATA[i+1]) {
	1331	exp = UnicodeString(DATA[i+1], "");
	1332	}
	1333	// Don't worry about getID() if the expected char*
	1334	// is NULL -- see above.
	1335	if (exp.length() == 0 \|\| exp == t->getID()) {
	1336	logln(UnicodeString("Ok: ") + DATA[i+2] +
	1337	" create ID \"" + DATA[i] + "\" => \"" +
	1338	exp + "\"");
	1339	} else {
	1340	errln(UnicodeString("FAIL: ") + DATA[i+2] +
	1341	" create ID \"" + DATA[i] + "\" => \"" +
	1342	t->getID() + "\", exp \"" + exp + "\"");
	1343	}
	1344	delete t;
	1345	}
	1346	}
	1347	}
	1348
	1349	/* test for Jitterbug 912 */
	1350	void TransliteratorTest::TestCreateInstance(){
	1351	const char* FORWARD = "F";
	1352	const char* REVERSE = "R";
	1353	const char* DATA[] = {
	1354	// Column 1: id
	1355	// Column 2: direction
	1356	// Column 3: expected ID, or "" if expect failure
	1357	"Latin-Hangul", REVERSE, "Hangul-Latin", // JB#912
	1358
	1359	// JB#2689: bad compound causes crash
	1360	"InvalidSource-InvalidTarget", FORWARD, "",
	1361	"InvalidSource-InvalidTarget", REVERSE, "",
	1362	"Hex-Any;InvalidSource-InvalidTarget", FORWARD, "",
	1363	"Hex-Any;InvalidSource-InvalidTarget", REVERSE, "",
	1364	"InvalidSource-InvalidTarget;Hex-Any", FORWARD, "",
	1365	"InvalidSource-InvalidTarget;Hex-Any", REVERSE, "",
	1366
	1367	NULL
	1368	};
	1369
	1370	for (int32_t i=0; DATA[i]; i+=3) {
	1371	UParseError err;
	1372	UErrorCode ec = U_ZERO_ERROR;
	1373	UnicodeString id(DATA[i]);
	1374	UTransDirection dir = (DATA[i+1]==FORWARD)?
	1375	UTRANS_FORWARD:UTRANS_REVERSE;
	1376	UnicodeString expID(DATA[i+2]);
	1377	Transliterator* t =
	1378	Transliterator::createInstance(id,dir,err,ec);
	1379	UnicodeString newID;
	1380	if (t) {
	1381	newID = t->getID();
	1382	}
	1383	UBool ok = (newID == expID);
	1384	if (!t) {
	1385	newID = u_errorName(ec);
	1386	}
	1387	if (ok) {
	1388	logln((UnicodeString)"Ok: createInstance(" +
	1389	id + "," + DATA[i+1] + ") => " + newID);
	1390	} else {
	1391	dataerrln((UnicodeString)"FAIL: createInstance(" +
	1392	id + "," + DATA[i+1] + ") => " + newID +
	1393	", expected " + expID);
	1394	}
	1395	delete t;
	1396	}
	1397	}
	1398
	1399	/**
	1400	* Test the normalization transliterator.
	1401	*/
	1402	void TransliteratorTest::TestNormalizationTransliterator() {
	1403	// THE FOLLOWING TWO TABLES ARE COPIED FROM com.ibm.test.normalizer.BasicTest
	1404	// PLEASE KEEP THEM IN SYNC WITH BasicTest.
	1405	const char* CANON[] = {
	1406	// Input Decomposed Composed
	1407	"cat", "cat", "cat" ,
	1408	"\\u00e0ardvark", "a\\u0300ardvark", "\\u00e0ardvark" ,
	1409
	1410	"\\u1e0a", "D\\u0307", "\\u1e0a" , // D-dot_above
	1411	"D\\u0307", "D\\u0307", "\\u1e0a" , // D dot_above
	1412
	1413	"\\u1e0c\\u0307", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D-dot_below dot_above
	1414	"\\u1e0a\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D-dot_above dot_below
	1415	"D\\u0307\\u0323", "D\\u0323\\u0307", "\\u1e0c\\u0307" , // D dot_below dot_above
	1416
	1417	"\\u1e10\\u0307\\u0323", "D\\u0327\\u0323\\u0307","\\u1e10\\u0323\\u0307", // D dot_below cedilla dot_above
	1418	"D\\u0307\\u0328\\u0323","D\\u0328\\u0323\\u0307","\\u1e0c\\u0328\\u0307", // D dot_above ogonek dot_below
	1419
	1420	"\\u1E14", "E\\u0304\\u0300", "\\u1E14" , // E-macron-grave
	1421	"\\u0112\\u0300", "E\\u0304\\u0300", "\\u1E14" , // E-macron + grave
	1422	"\\u00c8\\u0304", "E\\u0300\\u0304", "\\u00c8\\u0304" , // E-grave + macron
	1423
	1424	"\\u212b", "A\\u030a", "\\u00c5" , // angstrom_sign
	1425	"\\u00c5", "A\\u030a", "\\u00c5" , // A-ring
	1426
	1427	"\\u00fdffin", "y\\u0301ffin", "\\u00fdffin" , //updated with 3.0
	1428	"\\u00fd\\uFB03n", "y\\u0301\\uFB03n", "\\u00fd\\uFB03n" , //updated with 3.0
	1429
	1430	"Henry IV", "Henry IV", "Henry IV" ,
	1431	"Henry \\u2163", "Henry \\u2163", "Henry \\u2163" ,
	1432
	1433	"\\u30AC", "\\u30AB\\u3099", "\\u30AC" , // ga (Katakana)
	1434	"\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" , // ka + ten
	1435	"\\uFF76\\uFF9E", "\\uFF76\\uFF9E", "\\uFF76\\uFF9E" , // hw_ka + hw_ten
	1436	"\\u30AB\\uFF9E", "\\u30AB\\uFF9E", "\\u30AB\\uFF9E" , // ka + hw_ten
	1437	"\\uFF76\\u3099", "\\uFF76\\u3099", "\\uFF76\\u3099" , // hw_ka + ten
	1438
	1439	"A\\u0300\\u0316", "A\\u0316\\u0300", "\\u00C0\\u0316" ,
	1440	0 // end
	1441	};
	1442
	1443	const char* COMPAT[] = {
	1444	// Input Decomposed Composed
	1445	"\\uFB4f", "\\u05D0\\u05DC", "\\u05D0\\u05DC" , // Alef-Lamed vs. Alef, Lamed
	1446
	1447	"\\u00fdffin", "y\\u0301ffin", "\\u00fdffin" , //updated for 3.0
	1448	"\\u00fd\\uFB03n", "y\\u0301ffin", "\\u00fdffin" , // ffi ligature -> f + f + i
	1449
	1450	"Henry IV", "Henry IV", "Henry IV" ,
	1451	"Henry \\u2163", "Henry IV", "Henry IV" ,
	1452
	1453	"\\u30AC", "\\u30AB\\u3099", "\\u30AC" , // ga (Katakana)
	1454	"\\u30AB\\u3099", "\\u30AB\\u3099", "\\u30AC" , // ka + ten
	1455
	1456	"\\uFF76\\u3099", "\\u30AB\\u3099", "\\u30AC" , // hw_ka + ten
	1457	0 // end
	1458	};
	1459
	1460	int32_t i;
	1461	UParseError parseError;
	1462	UErrorCode status = U_ZERO_ERROR;
	1463	Transliterator* NFD = Transliterator::createInstance("NFD", UTRANS_FORWARD, parseError, status);
	1464	Transliterator* NFC = Transliterator::createInstance("NFC", UTRANS_FORWARD, parseError, status);
	1465	if (!NFD \|\| !NFC) {
	1466	dataerrln("FAIL: createInstance failed: %s", u_errorName(status));
	1467	delete NFD;
	1468	delete NFC;
	1469	return;
	1470	}
	1471	for (i=0; CANON[i]; i+=3) {
	1472	UnicodeString in = CharsToUnicodeString(CANON[i]);
	1473	UnicodeString expd = CharsToUnicodeString(CANON[i+1]);
	1474	UnicodeString expc = CharsToUnicodeString(CANON[i+2]);
	1475	expect(*NFD, in, expd);
	1476	expect(*NFC, in, expc);
	1477	}
	1478	delete NFD;
	1479	delete NFC;
	1480
	1481	Transliterator* NFKD = Transliterator::createInstance("NFKD", UTRANS_FORWARD, parseError, status);
	1482	Transliterator* NFKC = Transliterator::createInstance("NFKC", UTRANS_FORWARD, parseError, status);
	1483	if (!NFKD \|\| !NFKC) {
	1484	dataerrln("FAIL: createInstance failed");
	1485	delete NFKD;
	1486	delete NFKC;
	1487	return;
	1488	}
	1489	for (i=0; COMPAT[i]; i+=3) {
	1490	UnicodeString in = CharsToUnicodeString(COMPAT[i]);
	1491	UnicodeString expkd = CharsToUnicodeString(COMPAT[i+1]);
	1492	UnicodeString expkc = CharsToUnicodeString(COMPAT[i+2]);
	1493	expect(*NFKD, in, expkd);
	1494	expect(*NFKC, in, expkc);
	1495	}
	1496	delete NFKD;
	1497	delete NFKC;
	1498
	1499	UParseError pe;
	1500	status = U_ZERO_ERROR;
	1501	Transliterator *t = Transliterator::createInstance("NFD; [x]Remove",
	1502	UTRANS_FORWARD,
	1503	pe, status);
	1504	if (t == 0) {
	1505	errln("FAIL: createInstance failed");
	1506	}
	1507	expect(*t, CharsToUnicodeString("\\u010dx"),
	1508	CharsToUnicodeString("c\\u030C"));
	1509	delete t;
	1510	}
	1511
	1512	/**
	1513	* Test compound RBT rules.
	1514	*/
	1515	void TransliteratorTest::TestCompoundRBT(void) {
	1516	// Careful with spacing and ';' here: Phrase this exactly
	1517	// as toRules() is going to return it. If toRules() changes
	1518	// with regard to spacing or ';', then adjust this string.
	1519	UnicodeString rule("::Hex-Any;\n"
	1520	"::Any-Lower;\n"
	1521	"a > '.A.';\n"
	1522	"b > '.B.';\n"
	1523	"::[^t]Any-Upper;", "");
	1524	UParseError parseError;
	1525	UErrorCode status = U_ZERO_ERROR;
	1526	Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, parseError, status);
	1527	if (t == 0) {
	1528	errln("FAIL: createFromRules failed");
	1529	return;
	1530	}
	1531	expect(*t, UNICODE_STRING_SIMPLE("\\u0043at in the hat, bat on the mat"),
	1532	"C.A.t IN tHE H.A.t, .B..A.t ON tHE M.A.t");
	1533	UnicodeString r;
	1534	t->toRules(r, TRUE);
	1535	if (r == rule) {
	1536	logln((UnicodeString)"OK: toRules() => " + r);
	1537	} else {
	1538	errln((UnicodeString)"FAIL: toRules() => " + r +
	1539	", expected " + rule);
	1540	}
	1541	delete t;
	1542
	1543	// Now test toRules
	1544	t = Transliterator::createInstance("Greek-Latin; Latin-Cyrillic", UTRANS_FORWARD, parseError, status);
	1545	if (t == 0) {
	1546	dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
	1547	return;
	1548	}
	1549	UnicodeString exp("::Greek-Latin;\n::Latin-Cyrillic;");
	1550	t->toRules(r, TRUE);
	1551	if (r != exp) {
	1552	errln((UnicodeString)"FAIL: toRules() => " + r +
	1553	", expected " + exp);
	1554	} else {
	1555	logln((UnicodeString)"OK: toRules() => " + r);
	1556	}
	1557	delete t;
	1558
	1559	// Round trip the result of toRules
	1560	t = Transliterator::createFromRules("Test", r, UTRANS_FORWARD, parseError, status);
	1561	if (t == 0) {
	1562	errln("FAIL: createFromRules #2 failed");
	1563	return;
	1564	} else {
	1565	logln((UnicodeString)"OK: createFromRules(" + r + ") succeeded");
	1566	}
	1567
	1568	// Test toRules again
	1569	t->toRules(r, TRUE);
	1570	if (r != exp) {
	1571	errln((UnicodeString)"FAIL: toRules() => " + r +
	1572	", expected " + exp);
	1573	} else {
	1574	logln((UnicodeString)"OK: toRules() => " + r);
	1575	}
	1576
	1577	delete t;
	1578
	1579	// Test Foo(Bar) IDs. Careful with spacing in id; make it conform
	1580	// to what the regenerated ID will look like.
	1581	UnicodeString id("Upper(Lower);(NFKC)", "");
	1582	t = Transliterator::createInstance(id, UTRANS_FORWARD, parseError, status);
	1583	if (t == 0) {
	1584	errln("FAIL: createInstance #2 failed");
	1585	return;
	1586	}
	1587	if (t->getID() == id) {
	1588	logln((UnicodeString)"OK: created " + id);
	1589	} else {
	1590	errln((UnicodeString)"FAIL: createInstance(" + id +
	1591	").getID() => " + t->getID());
	1592	}
	1593
	1594	Transliterator *u = t->createInverse(status);
	1595	if (u == 0) {
	1596	errln("FAIL: createInverse failed");
	1597	delete t;
	1598	return;
	1599	}
	1600	exp = "NFKC();Lower(Upper)";
	1601	if (u->getID() == exp) {
	1602	logln((UnicodeString)"OK: createInverse(" + id + ") => " +
	1603	u->getID());
	1604	} else {
	1605	errln((UnicodeString)"FAIL: createInverse(" + id + ") => " +
	1606	u->getID());
	1607	}
	1608	delete t;
	1609	delete u;
	1610	}
	1611
	1612	/**
	1613	* Compound filter semantics were orginially not implemented
	1614	* correctly. Originally, each component filter f(i) is replaced by
	1615	* f'(i) = f(i) && g, where g is the filter for the compound
	1616	* transliterator.
	1617	*
	1618	* From Mark:
	1619	*
	1620	* Suppose and I have a transliterator X. Internally X is
	1621	* "Greek-Latin; Latin-Cyrillic; Any-Lower". I use a filter [^A].
	1622	*
	1623	* The compound should convert all greek characters (through latin) to
	1624	* cyrillic, then lowercase the result. The filter should say "don't
	1625	* touch 'A' in the original". But because an intermediate result
	1626	* happens to go through "A", the Greek Alpha gets hung up.
	1627	*/
	1628	void TransliteratorTest::TestCompoundFilter(void) {
	1629	UParseError parseError;
	1630	UErrorCode status = U_ZERO_ERROR;
	1631	Transliterator *t = Transliterator::createInstance
	1632	("Greek-Latin; Latin-Greek; Lower", UTRANS_FORWARD, parseError, status);
	1633	if (t == 0) {
	1634	dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
	1635	return;
	1636	}
	1637	t->adoptFilter(new UnicodeSet("[^A]", status));
	1638	if (U_FAILURE(status)) {
	1639	errln("FAIL: UnicodeSet ct failed");
	1640	delete t;
	1641	return;
	1642	}
	1643
	1644	// Only the 'A' at index 1 should remain unchanged
	1645	expect(*t,
	1646	CharsToUnicodeString("BA\\u039A\\u0391"),
	1647	CharsToUnicodeString("\\u03b2A\\u03ba\\u03b1"));
	1648	delete t;
	1649	}
	1650
	1651	void TransliteratorTest::TestRemove(void) {
	1652	UParseError parseError;
	1653	UErrorCode status = U_ZERO_ERROR;
	1654	Transliterator *t = Transliterator::createInstance("Remove[abc]", UTRANS_FORWARD, parseError, status);
	1655	if (t == 0) {
	1656	errln("FAIL: createInstance failed");
	1657	return;
	1658	}
	1659
	1660	expect(*t, "Able bodied baker's cats", "Ale odied ker's ts");
	1661
	1662	// extra test for RemoveTransliterator::clone(), which at one point wasn't
	1663	// duplicating the filter
	1664	Transliterator* t2 = t->clone();
	1665	expect(*t2, "Able bodied baker's cats", "Ale odied ker's ts");
	1666
	1667	delete t;
	1668	delete t2;
	1669	}
	1670
	1671	void TransliteratorTest::TestToRules(void) {
	1672	const char* RBT = "rbt";
	1673	const char* SET = "set";
	1674	static const char* DATA[] = {
	1675	RBT,
	1676	"$a=\\u4E61; [$a] > A;",
	1677	"[\\u4E61] > A;",
	1678
	1679	RBT,
	1680	"$white=[[:Zs:][:Zl:]]; $white{a} > A;",
	1681	"[[:Zs:][:Zl:]]{a} > A;",
	1682
	1683	SET,
	1684	"[[:Zs:][:Zl:]]",
	1685	"[[:Zs:][:Zl:]]",
	1686
	1687	SET,
	1688	"[:Ps:]",
	1689	"[:Ps:]",
	1690
	1691	SET,
	1692	"[:L:]",
	1693	"[:L:]",
	1694
	1695	SET,
	1696	"[[:L:]-[A]]",
	1697	"[[:L:]-[A]]",
	1698
	1699	SET,
	1700	"[~[:Lu:][:Ll:]]",
	1701	"[~[:Lu:][:Ll:]]",
	1702
	1703	SET,
	1704	"[~[a-z]]",
	1705	"[~[a-z]]",
	1706
	1707	RBT,
	1708	"$white=[:Zs:]; $black=[^$white]; $black{a} > A;",
	1709	"[^[:Zs:]]{a} > A;",
	1710
	1711	RBT,
	1712	"$a=[:Zs:]; $b=[[a-z]-$a]; $b{a} > A;",
	1713	"[[a-z]-[:Zs:]]{a} > A;",
	1714
	1715	RBT,
	1716	"$a=[:Zs:]; $b=[$a&[a-z]]; $b{a} > A;",
	1717	"[[:Zs:]&[a-z]]{a} > A;",
	1718
	1719	RBT,
	1720	"$a=[:Zs:]; $b=[x$a]; $b{a} > A;",
	1721	"[x[:Zs:]]{a} > A;",
	1722
	1723	RBT,
	1724	"$accentMinus = [ [\\u0300-\\u0345] & [:M:] - [\\u0338]] ;"
	1725	"$macron = \\u0304 ;"
	1726	"$evowel = [aeiouyAEIOUY] ;"
	1727	"$iotasub = \\u0345 ;"
	1728	"($evowel $macron $accentMinus *) i > \| $1 $iotasub ;",
	1729	"([AEIOUYaeiouy]\\u0304[[\\u0300-\\u0345]&[:M:]-[\\u0338]]*)i > \| $1 \\u0345;",
	1730
	1731	RBT,
	1732	"([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > \| $1 \\u0345;",
	1733	"([AEIOUYaeiouy]\\u0304[[:M:]-[\\u0304\\u0345]]*)i > \| $1 \\u0345;",
	1734	};
	1735	static const int32_t DATA_length = UPRV_LENGTHOF(DATA);
	1736
	1737	for (int32_t d=0; d < DATA_length; d+=3) {
	1738	if (DATA[d] == RBT) {
	1739	// Transliterator test
	1740	UParseError parseError;
	1741	UErrorCode status = U_ZERO_ERROR;
	1742	Transliterator *t = Transliterator::createFromRules("ID",
	1743	UnicodeString(DATA[d+1], -1, US_INV), UTRANS_FORWARD, parseError, status);
	1744	if (t == 0) {
	1745	dataerrln("FAIL: createFromRules failed - %s", u_errorName(status));
	1746	return;
	1747	}
	1748	UnicodeString rules, escapedRules;
	1749	t->toRules(rules, FALSE);
	1750	t->toRules(escapedRules, TRUE);
	1751	UnicodeString expRules = CharsToUnicodeString(DATA[d+2]);
	1752	UnicodeString expEscapedRules(DATA[d+2], -1, US_INV);
	1753	if (rules == expRules) {
	1754	logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
	1755	" => " + rules);
	1756	} else {
	1757	errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
	1758	" => " + rules + ", exp " + expRules);
	1759	}
	1760	if (escapedRules == expEscapedRules) {
	1761	logln((UnicodeString)"Ok: " + UnicodeString(DATA[d+1], -1, US_INV) +
	1762	" => " + escapedRules);
	1763	} else {
	1764	errln((UnicodeString)"FAIL: " + UnicodeString(DATA[d+1], -1, US_INV) +
	1765	" => " + escapedRules + ", exp " + expEscapedRules);
	1766	}
	1767	delete t;
	1768
	1769	} else {
	1770	// UnicodeSet test
	1771	UErrorCode status = U_ZERO_ERROR;
	1772	UnicodeString pat(DATA[d+1], -1, US_INV);
	1773	UnicodeString expToPat(DATA[d+2], -1, US_INV);
	1774	UnicodeSet set(pat, status);
	1775	if (U_FAILURE(status)) {
	1776	errln("FAIL: UnicodeSet ct failed");
	1777	return;
	1778	}
	1779	// Adjust spacing etc. as necessary.
	1780	UnicodeString toPat;
	1781	set.toPattern(toPat);
	1782	if (expToPat == toPat) {
	1783	logln((UnicodeString)"Ok: " + pat +
	1784	" => " + toPat);
	1785	} else {
	1786	errln((UnicodeString)"FAIL: " + pat +
	1787	" => " + prettify(toPat, TRUE) +
	1788	", exp " + prettify(pat, TRUE));
	1789	}
	1790	}
	1791	}
	1792	}
	1793
	1794	void TransliteratorTest::TestContext() {
	1795	UTransPosition pos = {0, 2, 0, 1}; // cs cl s l
	1796	expect("de > x; {d}e > y;",
	1797	"de",
	1798	"ye",
	1799	&pos);
	1800
	1801	expect("ab{c} > z;",
	1802	"xadabdabcy",
	1803	"xadabdabzy");
	1804	}
	1805
	1806	void TransliteratorTest::TestSupplemental() {
	1807
	1808	expect(CharsToUnicodeString("$a=\\U00010300; $s=[\\U00010300-\\U00010323];"
	1809	"a > $a; $s > i;"),
	1810	CharsToUnicodeString("ab\\U0001030Fx"),
	1811	CharsToUnicodeString("\\U00010300bix"));
	1812
	1813	expect(CharsToUnicodeString("$a=[a-z\\U00010300-\\U00010323];"
	1814	"$b=[A-Z\\U00010400-\\U0001044D];"
	1815	"($a)($b) > $2 $1;"),
	1816	CharsToUnicodeString("aB\\U00010300\\U00010400c\\U00010401\\U00010301D"),
	1817	CharsToUnicodeString("Ba\\U00010400\\U00010300\\U00010401cD\\U00010301"));
	1818
	1819	// k\|ax\\U00010300xm
	1820
	1821	// k\|a\\U00010400\\U00010300xm
	1822	// ky\|\\U00010400\\U00010300xm
	1823	// ky\\U00010400\|\\U00010300xm
	1824
	1825	// ky\\U00010400\|\\U00010300\\U00010400m
	1826	// ky\\U00010400y\|\\U00010400m
	1827	expect(CharsToUnicodeString("$a=[a\\U00010300-\\U00010323];"
	1828	"$a {x} > \| @ \\U00010400;"
	1829	"{$a} [^\\u0000-\\uFFFF] > y;"),
	1830	CharsToUnicodeString("kax\\U00010300xm"),
	1831	CharsToUnicodeString("ky\\U00010400y\\U00010400m"));
	1832
	1833	expectT("Any-Name",
	1834	CharsToUnicodeString("\\U00010330\\U000E0061\\u00A0"),
	1835	UNICODE_STRING_SIMPLE("\\N{GOTHIC LETTER AHSA}\\N{TAG LATIN SMALL LETTER A}\\N{NO-BREAK SPACE}"));
	1836
	1837	expectT("Any-Hex/Unicode",
	1838	CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
	1839	UNICODE_STRING_SIMPLE("U+10330U+10FF00U+E0061U+00A0"));
	1840
	1841	expectT("Any-Hex/C",
	1842	CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
	1843	UNICODE_STRING_SIMPLE("\\U00010330\\U0010FF00\\U000E0061\\u00A0"));
	1844
	1845	expectT("Any-Hex/Perl",
	1846	CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
	1847	UNICODE_STRING_SIMPLE("\\x{10330}\\x{10FF00}\\x{E0061}\\x{A0}"));
	1848
	1849	expectT("Any-Hex/Java",
	1850	CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
	1851	UNICODE_STRING_SIMPLE("\\uD800\\uDF30\\uDBFF\\uDF00\\uDB40\\uDC61\\u00A0"));
	1852
	1853	expectT("Any-Hex/XML",
	1854	CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
	1855	"𐌰􏼀󠁡 ");
	1856
	1857	expectT("Any-Hex/XML10",
	1858	CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
	1859	"𐌰􏼀󠁡 ");
	1860
	1861	expectT(UNICODE_STRING_SIMPLE("[\\U000E0000-\\U000E0FFF] Remove"),
	1862	CharsToUnicodeString("\\U00010330\\U0010FF00\\U000E0061\\u00A0"),
	1863	CharsToUnicodeString("\\U00010330\\U0010FF00\\u00A0"));
	1864	}
	1865
	1866	void TransliteratorTest::TestQuantifier() {
	1867
	1868	// Make sure @ in a quantified anteContext works
	1869	expect("a+ {b} > \| @@ c; A > a; (a+ c) > '(' $1 ')';",
	1870	"AAAAAb",
	1871	"aaa(aac)");
	1872
	1873	// Make sure @ in a quantified postContext works
	1874	expect("{b} a+ > c @@ \|; (a+) > '(' $1 ')';",
	1875	"baaaaa",
	1876	"caa(aaa)");
	1877
	1878	// Make sure @ in a quantified postContext with seg ref works
	1879	expect("{(b)} a+ > $1 @@ \|; (a+) > '(' $1 ')';",
	1880	"baaaaa",
	1881	"baa(aaa)");
	1882
	1883	// Make sure @ past ante context doesn't enter ante context
	1884	UTransPosition pos = {0, 5, 3, 5};
	1885	expect("a+ {b} > \| @@ c; x > y; (a+ c) > '(' $1 ')';",
	1886	"xxxab",
	1887	"xxx(ac)",
	1888	&pos);
	1889
	1890	// Make sure @ past post context doesn't pass limit
	1891	UTransPosition pos2 = {0, 4, 0, 2};
	1892	expect("{b} a+ > c @@ \|; x > y; a > A;",
	1893	"baxx",
	1894	"caxx",
	1895	&pos2);
	1896
	1897	// Make sure @ past post context doesn't enter post context
	1898	expect("{b} a+ > c @@ \|; x > y; a > A;",
	1899	"baxx",
	1900	"cayy");
	1901
	1902	expect("(ab)? c > d;",
	1903	"c abc ababc",
	1904	"d d abd");
	1905
	1906	// NOTE: The (ab)+ when referenced just yields a single "ab",
	1907	// not the full sequence of them. This accords with perl behavior.
	1908	expect("(ab)+ {x} > '(' $1 ')';",
	1909	"x abx ababxy",
	1910	"x ab(ab) abab(ab)y");
	1911
	1912	expect("b+ > x;",
	1913	"ac abc abbc abbbc",
	1914	"ac axc axc axc");
	1915
	1916	expect("[abc]+ > x;",
	1917	"qac abrc abbcs abtbbc",
	1918	"qx xrx xs xtx");
	1919
	1920	expect("q{(ab)+} > x;",
	1921	"qa qab qaba qababc qaba",
	1922	"qa qx qxa qxc qxa");
	1923
	1924	expect("q(ab)* > x;",
	1925	"qa qab qaba qababc",
	1926	"xa x xa xc");
	1927
	1928	// NOTE: The (ab)+ when referenced just yields a single "ab",
	1929	// not the full sequence of them. This accords with perl behavior.
	1930	expect("q(ab)* > '(' $1 ')';",
	1931	"qa qab qaba qababc",
	1932	"()a (ab) (ab)a (ab)c");
	1933
	1934	// 'foo'+ and 'foo'* -- the quantifier should apply to the entire
	1935	// quoted string
	1936	expect("'ab'+ > x;",
	1937	"bb ab ababb",
	1938	"bb x xb");
	1939
	1940	// $foo+ and $foo* -- the quantifier should apply to the entire
	1941	// variable reference
	1942	expect("$var = ab; $var+ > x;",
	1943	"bb ab ababb",
	1944	"bb x xb");
	1945	}
	1946
	1947	class TestTrans : public Transliterator {
	1948	public:
	1949	TestTrans(const UnicodeString& id) : Transliterator(id, 0) {
	1950	}
	1951	virtual TestTrans* clone(void) const {
	1952	return new TestTrans(getID());
	1953	}
	1954	virtual void handleTransliterate(Replaceable& /text/, UTransPosition& offsets,
	1955	UBool /isIncremental/) const
	1956	{
	1957	offsets.start = offsets.limit;
	1958	}
	1959	virtual UClassID getDynamicClassID() const;
	1960	static UClassID U_EXPORT2 getStaticClassID();
	1961	};
	1962	UOBJECT_DEFINE_RTTI_IMPLEMENTATION(TestTrans)
	1963
	1964	/**
	1965	* Test Source-Target/Variant.
	1966	*/
	1967	void TransliteratorTest::TestSTV(void) {
	1968	int32_t ns = Transliterator::countAvailableSources();
	1969	if (ns < 0 \|\| ns > 255) {
	1970	errln((UnicodeString)"FAIL: Bad source count: " + ns);
	1971	return;
	1972	}
	1973	int32_t i, j;
	1974	for (i=0; i<ns; ++i) {
	1975	UnicodeString source;
	1976	Transliterator::getAvailableSource(i, source);
	1977	logln((UnicodeString)"" + i + ": " + source);
	1978	if (source.length() == 0) {
	1979	errln("FAIL: empty source");
	1980	continue;
	1981	}
	1982	int32_t nt = Transliterator::countAvailableTargets(source);
	1983	if (nt < 0 \|\| nt > 255) {
	1984	errln((UnicodeString)"FAIL: Bad target count: " + nt);
	1985	continue;
	1986	}
	1987	for (int32_t j=0; j<nt; ++j) {
	1988	UnicodeString target;
	1989	Transliterator::getAvailableTarget(j, source, target);
	1990	logln((UnicodeString)" " + j + ": " + target);
	1991	if (target.length() == 0) {
	1992	errln("FAIL: empty target");
	1993	continue;
	1994	}
	1995	int32_t nv = Transliterator::countAvailableVariants(source, target);
	1996	if (nv < 0 \|\| nv > 255) {
	1997	errln((UnicodeString)"FAIL: Bad variant count: " + nv);
	1998	continue;
	1999	}
	2000	for (int32_t k=0; k<nv; ++k) {
	2001	UnicodeString variant;
	2002	Transliterator::getAvailableVariant(k, source, target, variant);
	2003	if (variant.length() == 0) {
	2004	logln((UnicodeString)" " + k + ": <empty>");
	2005	} else {
	2006	logln((UnicodeString)" " + k + ": " + variant);
	2007	}
	2008	}
	2009	}
	2010	}
	2011
	2012	// Test registration
	2013	const char* IDS[] = { "Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
	2014	const char* FULL_IDS[] = { "Any-Fieruwer", "Seoridf-Sweorie", "Oewoir-Oweri/Vsie" };
	2015	const char* SOURCES[] = { NULL, "Seoridf", "Oewoir" };
	2016	for (i=0; i<3; ++i) {
	2017	Transliterator *t = new TestTrans(IDS[i]);
	2018	if (t == 0) {
	2019	errln("FAIL: out of memory");
	2020	return;
	2021	}
	2022	if (t->getID() != IDS[i]) {
	2023	errln((UnicodeString)"FAIL: ID mismatch for " + IDS[i]);
	2024	delete t;
	2025	return;
	2026	}
	2027	Transliterator::registerInstance(t);
	2028	UErrorCode status = U_ZERO_ERROR;
	2029	t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
	2030	if (t == NULL) {
	2031	errln((UnicodeString)"FAIL: Registration/creation failed for ID " +
	2032	IDS[i]);
	2033	} else {
	2034	logln((UnicodeString)"Ok: Registration/creation succeeded for ID " +
	2035	IDS[i]);
	2036	delete t;
	2037	}
	2038	Transliterator::unregister(IDS[i]);
	2039	t = Transliterator::createInstance(IDS[i], UTRANS_FORWARD, status);
	2040	if (t != NULL) {
	2041	errln((UnicodeString)"FAIL: Unregistration failed for ID " +
	2042	IDS[i]);
	2043	delete t;
	2044	}
	2045	}
	2046
	2047	// Make sure getAvailable API reflects removal
	2048	int32_t n = Transliterator::countAvailableIDs();
	2049	for (i=0; i<n; ++i) {
	2050	UnicodeString id = Transliterator::getAvailableID(i);
	2051	for (j=0; j<3; ++j) {
	2052	if (id.caseCompare(FULL_IDS[j],0)==0) {
	2053	errln((UnicodeString)"FAIL: unregister(" + id + ") failed");
	2054	}
	2055	}
	2056	}
	2057	n = Transliterator::countAvailableTargets("Any");
	2058	for (i=0; i<n; ++i) {
	2059	UnicodeString t;
	2060	Transliterator::getAvailableTarget(i, "Any", t);
	2061	if (t.caseCompare(IDS[0],0)==0) {
	2062	errln((UnicodeString)"FAIL: unregister(Any-" + t + ") failed");
	2063	}
	2064	}
	2065	n = Transliterator::countAvailableSources();
	2066	for (i=0; i<n; ++i) {
	2067	UnicodeString s;
	2068	Transliterator::getAvailableSource(i, s);
	2069	for (j=0; j<3; ++j) {
	2070	if (SOURCES[j] == NULL) continue;
	2071	if (s.caseCompare(SOURCES[j],0)==0) {
	2072	errln((UnicodeString)"FAIL: unregister(" + s + "-*) failed");
	2073	}
	2074	}
	2075	}
	2076	}
	2077
	2078	/**
	2079	* Test inverse of Greek-Latin; Title()
	2080	*/
	2081	void TransliteratorTest::TestCompoundInverse(void) {
	2082	UParseError parseError;
	2083	UErrorCode status = U_ZERO_ERROR;
	2084	Transliterator *t = Transliterator::createInstance
	2085	("Greek-Latin; Title()", UTRANS_REVERSE,parseError, status);
	2086	if (t == 0) {
	2087	dataerrln("FAIL: createInstance - %s", u_errorName(status));
	2088	return;
	2089	}
	2090	UnicodeString exp("(Title);Latin-Greek");
	2091	if (t->getID() == exp) {
	2092	logln("Ok: inverse of \"Greek-Latin; Title()\" is \"" +
	2093	t->getID());
	2094	} else {
	2095	errln("FAIL: inverse of \"Greek-Latin; Title()\" is \"" +
	2096	t->getID() + "\", expected \"" + exp + "\"");
	2097	}
	2098	delete t;
	2099	}
	2100
	2101	/**
	2102	* Test NFD chaining with RBT
	2103	*/
	2104	void TransliteratorTest::TestNFDChainRBT() {
	2105	UParseError pe;
	2106	UErrorCode ec = U_ZERO_ERROR;
	2107	Transliterator* t = Transliterator::createFromRules(
	2108	"TEST", "::NFD; aa > Q; a > q;",
	2109	UTRANS_FORWARD, pe, ec);
	2110	if (t == NULL \|\| U_FAILURE(ec)) {
	2111	dataerrln("FAIL: Transliterator::createFromRules failed with %s", u_errorName(ec));
	2112	return;
	2113	}
	2114	expect(*t, "aa", "Q");
	2115	delete t;
	2116
	2117	// TEMPORARY TESTS -- BEING DEBUGGED
	2118	//=- UnicodeString s, s2;
	2119	//=- t = Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, pe, ec);
	2120	//=- s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
	2121	//=- s2 = CharsToUnicodeString("\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D");
	2122	//=- expect(*t, s, s2);
	2123	//=- delete t;
	2124	//=-
	2125	//=- t = Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, pe, ec);
	2126	//=- expect(*t, s2, s);
	2127	//=- delete t;
	2128	//=-
	2129	//=- t = Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, pe, ec);
	2130	//=- s = CharsToUnicodeString("rmk\\u1E63\\u0113t");
	2131	//=- expect(*t, s, s);
	2132	//=- delete t;
	2133
	2134	// const char* source[] = {
	2135	// /*
	2136	// "\\u015Br\\u012Bmad",
	2137	// "bhagavadg\\u012Bt\\u0101",
	2138	// "adhy\\u0101ya",
	2139	// "arjuna",
	2140	// "vi\\u1E63\\u0101da",
	2141	// "y\\u014Dga",
	2142	// "dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
	2143	// "uv\\u0101cr\\u0325",
	2144	// */
	2145	// "rmk\\u1E63\\u0113t",
	2146	// //"dharmak\\u1E63\\u0113tr\\u0113",
	2147	// /*
	2148	// "kuruk\\u1E63\\u0113tr\\u0113",
	2149	// "samav\\u0113t\\u0101",
	2150	// "yuyutsava-\\u1E25",
	2151	// "m\\u0101mak\\u0101-\\u1E25",
	2152	// // "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
	2153	// "kimakurvata",
	2154	// "san\\u0304java",
	2155	// */
	2156	//
	2157	// 0
	2158	// };
	2159	// const char* expected[] = {
	2160	// /*
	2161	// "\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
	2162	// "\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
	2163	// "\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
	2164	// "\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
	2165	// "\\u0935\\u093f\\u0937\\u093e\\u0926",
	2166	// "\\u092f\\u094b\\u0917",
	2167	// "\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
	2168	// "\\u0909\\u0935\\u093E\\u091A\\u0943",
	2169	// */
	2170	// "\\u0927",
	2171	// //"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
	2172	// /*
	2173	// "\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
	2174	// "\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
	2175	// "\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
	2176	// "\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
	2177	// // "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
	2178	// "\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
	2179	// "\\u0938\\u0902\\u091c\\u0935",
	2180	// */
	2181	// 0
	2182	// };
	2183	// UErrorCode status = U_ZERO_ERROR;
	2184	// UParseError parseError;
	2185	// UnicodeString message;
	2186	// Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
	2187	// Transliterator* devToLatinToDev=Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
	2188	// if(U_FAILURE(status)){
	2189	// errln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
	2190	// errln("PreContext: " + prettify(parseError.preContext) + "PostContext: " + prettify( parseError.postContext) );
	2191	// delete latinToDevToLatin;
	2192	// delete devToLatinToDev;
	2193	// return;
	2194	// }
	2195	// UnicodeString gotResult;
	2196	// for(int i= 0; source[i] != 0; i++){
	2197	// gotResult = source[i];
	2198	// expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
	2199	// expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
	2200	// }
	2201	// delete latinToDevToLatin;
	2202	// delete devToLatinToDev;
	2203	}
	2204
	2205	/**
	2206	* Inverse of "Null" should be "Null". (J21)
	2207	*/
	2208	void TransliteratorTest::TestNullInverse() {
	2209	UParseError pe;
	2210	UErrorCode ec = U_ZERO_ERROR;
	2211	Transliterator *t = Transliterator::createInstance("Null", UTRANS_FORWARD, pe, ec);
	2212	if (t == 0 \|\| U_FAILURE(ec)) {
	2213	errln("FAIL: createInstance");
	2214	return;
	2215	}
	2216	Transliterator *u = t->createInverse(ec);
	2217	if (u == 0 \|\| U_FAILURE(ec)) {
	2218	errln("FAIL: createInverse");
	2219	delete t;
	2220	return;
	2221	}
	2222	if (u->getID() != "Null") {
	2223	errln("FAIL: Inverse of Null should be Null");
	2224	}
	2225	delete t;
	2226	delete u;
	2227	}
	2228
	2229	/**
	2230	* Check ID of inverse of alias. (J22)
	2231	*/
	2232	void TransliteratorTest::TestAliasInverseID() {
	2233	UnicodeString ID("Latin-Hangul", ""); // This should be any alias ID with an inverse
	2234	UParseError pe;
	2235	UErrorCode ec = U_ZERO_ERROR;
	2236	Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
	2237	if (t == 0 \|\| U_FAILURE(ec)) {
	2238	dataerrln("FAIL: createInstance - %s", u_errorName(ec));
	2239	return;
	2240	}
	2241	Transliterator *u = t->createInverse(ec);
	2242	if (u == 0 \|\| U_FAILURE(ec)) {
	2243	errln("FAIL: createInverse");
	2244	delete t;
	2245	return;
	2246	}
	2247	UnicodeString exp = "Hangul-Latin";
	2248	UnicodeString got = u->getID();
	2249	if (got != exp) {
	2250	errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
	2251	", expected " + exp);
	2252	}
	2253	delete t;
	2254	delete u;
	2255	}
	2256
	2257	/**
	2258	* Test IDs of inverses of compound transliterators. (J20)
	2259	*/
	2260	void TransliteratorTest::TestCompoundInverseID() {
	2261	UnicodeString ID = "Latin-Jamo;NFC(NFD)";
	2262	UParseError pe;
	2263	UErrorCode ec = U_ZERO_ERROR;
	2264	Transliterator *t = Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
	2265	if (t == 0 \|\| U_FAILURE(ec)) {
	2266	dataerrln("FAIL: createInstance - %s", u_errorName(ec));
	2267	return;
	2268	}
	2269	Transliterator *u = t->createInverse(ec);
	2270	if (u == 0 \|\| U_FAILURE(ec)) {
	2271	errln("FAIL: createInverse");
	2272	delete t;
	2273	return;
	2274	}
	2275	UnicodeString exp = "NFD(NFC);Jamo-Latin";
	2276	UnicodeString got = u->getID();
	2277	if (got != exp) {
	2278	errln((UnicodeString)"FAIL: Inverse of " + ID + " is " + got +
	2279	", expected " + exp);
	2280	}
	2281	delete t;
	2282	delete u;
	2283	}
	2284
	2285	/**
	2286	* Test undefined variable.
	2287
	2288	*/
	2289	void TransliteratorTest::TestUndefinedVariable() {
	2290	UnicodeString rule = "$initial } a <> \\u1161;";
	2291	UParseError pe;
	2292	UErrorCode ec = U_ZERO_ERROR;
	2293	Transliterator *t = Transliterator::createFromRules("<ID>", rule, UTRANS_FORWARD, pe, ec);
	2294	delete t;
	2295	if (U_FAILURE(ec)) {
	2296	logln((UnicodeString)"OK: Got exception for " + rule + ", as expected: " +
	2297	u_errorName(ec));
	2298	return;
	2299	}
	2300	errln((UnicodeString)"Fail: bogus rule " + rule + " compiled with error " +
	2301	u_errorName(ec));
	2302	}
	2303
	2304	/**
	2305	* Test empty context.
	2306	*/
	2307	void TransliteratorTest::TestEmptyContext() {
	2308	expect(" { a } > b;", "xay a ", "xby b ");
	2309	}
	2310
	2311	/**
	2312	* Test compound filter ID syntax
	2313	*/
	2314	void TransliteratorTest::TestCompoundFilterID(void) {
	2315	static const char* DATA[] = {
	2316	// Col. 1 = ID or rule set (latter must start with #)
	2317
	2318	// = columns > 1 are null if expect col. 1 to be illegal =
	2319
	2320	// Col. 2 = direction, "F..." or "R..."
	2321	// Col. 3 = source string
	2322	// Col. 4 = exp result
	2323
	2324	"[abc]; [abc]", NULL, NULL, NULL, // multiple filters
	2325	"Latin-Greek; [abc];", NULL, NULL, NULL, // misplaced filter
	2326	"[b]; Latin-Greek; Upper; ([xyz])", "F", "abc", "a\\u0392c",
	2327	"[b]; (Lower); Latin-Greek; Upper(); ([\\u0392])", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
	2328	"#\n::[b]; ::Latin-Greek; ::Upper; ::([xyz]);", "F", "abc", "a\\u0392c",
	2329	"#\n::[b]; ::(Lower); ::Latin-Greek; ::Upper(); ::([\\u0392]);", "R", "\\u0391\\u0392\\u0393", "\\u0391b\\u0393",
	2330	NULL,
	2331	};
	2332
	2333	for (int32_t i=0; DATA[i]; i+=4) {
	2334	UnicodeString id = CharsToUnicodeString(DATA[i]);
	2335	UTransDirection direction = (DATA[i+1] != NULL && DATA[i+1][0] == 'R') ?
	2336	UTRANS_REVERSE : UTRANS_FORWARD;
	2337	UnicodeString source;
	2338	UnicodeString exp;
	2339	if (DATA[i+2] != NULL) {
	2340	source = CharsToUnicodeString(DATA[i+2]);
	2341	exp = CharsToUnicodeString(DATA[i+3]);
	2342	}
	2343	UBool expOk = (DATA[i+1] != NULL);
	2344	LocalPointer<Transliterator> t;
	2345	UParseError pe;
	2346	UErrorCode ec = U_ZERO_ERROR;
	2347	if (id.charAt(0) == 0x23/#/) {
	2348	t.adoptInstead(Transliterator::createFromRules("ID", id, direction, pe, ec));
	2349	} else {
	2350	t.adoptInstead(Transliterator::createInstance(id, direction, pe, ec));
	2351	}
	2352	UBool ok = (t.isValid() && U_SUCCESS(ec));
	2353	UnicodeString transID;
	2354	if (t.isValid()) {
	2355	transID = t->getID();
	2356	}
	2357	else {
	2358	transID = UnicodeString("NULL", "");
	2359	}
	2360	if (ok == expOk) {
	2361	logln((UnicodeString)"Ok: " + id + " => " + transID + ", " +
	2362	u_errorName(ec));
	2363	if (source.length() != 0) {
	2364	expect(*t, source, exp);
	2365	}
	2366	} else {
	2367	dataerrln((UnicodeString)"FAIL: " + id + " => " + transID + ", " +
	2368	u_errorName(ec));
	2369	}
	2370	}
	2371	}
	2372
	2373	/**
	2374	* Test new property set syntax
	2375	*/
	2376	void TransliteratorTest::TestPropertySet() {
	2377	expect(UNICODE_STRING_SIMPLE("a>A; \\p{Lu}>x; \\p{ANY}>y;"), "abcDEF", "Ayyxxx");
	2378	expect("(.+)>'[' $1 ']';", " a stitch \n in time \r saves 9",
	2379	"[ a stitch ]\n[ in time ]\r[ saves 9]");
	2380	}
	2381
	2382	/**
	2383	* Test various failure points of the new 2.0 engine.
	2384	*/
	2385	void TransliteratorTest::TestNewEngine() {
	2386	UParseError pe;
	2387	UErrorCode ec = U_ZERO_ERROR;
	2388	Transliterator *t = Transliterator::createInstance("Latin-Hiragana", UTRANS_FORWARD, pe, ec);
	2389	if (t == 0 \|\| U_FAILURE(ec)) {
	2390	dataerrln("FAIL: createInstance Latin-Hiragana - %s", u_errorName(ec));
	2391	return;
	2392	}
	2393	// Katakana should be untouched
	2394	expect(*t, CharsToUnicodeString("a\\u3042\\u30A2"),
	2395	CharsToUnicodeString("\\u3042\\u3042\\u30A2"));
	2396
	2397	delete t;
	2398
	2399	#if 1
	2400	// This test will only work if Transliterator.ROLLBACK is
	2401	// true. Otherwise, this test will fail, revealing a
	2402	// limitation of global filters in incremental mode.
	2403	Transliterator *a =
	2404	Transliterator::createFromRules("a_to_A", "a > A;", UTRANS_FORWARD, pe, ec);
	2405	Transliterator *A =
	2406	Transliterator::createFromRules("A_to_b", "A > b;", UTRANS_FORWARD, pe, ec);
	2407	if (U_FAILURE(ec)) {
	2408	delete a;
	2409	delete A;
	2410	return;
	2411	}
	2412
	2413	Transliterator* array[3];
	2414	array[0] = a;
	2415	array[1] = Transliterator::createInstance("NFD", UTRANS_FORWARD, pe, ec);
	2416	array[2] = A;
	2417	if (U_FAILURE(ec)) {
	2418	errln("FAIL: createInstance NFD");
	2419	delete a;
	2420	delete A;
	2421	delete array[1];
	2422	return;
	2423	}
	2424
	2425	t = new CompoundTransliterator(array, 3, new UnicodeSet("[:Ll:]", ec));
	2426	if (U_FAILURE(ec)) {
	2427	errln("FAIL: UnicodeSet constructor");
	2428	delete a;
	2429	delete A;
	2430	delete array[1];
	2431	delete t;
	2432	return;
	2433	}
	2434
	2435	expect(*t, "aAaA", "bAbA");
	2436
	2437	assertTrue("countElements", t->countElements() == 3);
	2438	assertEquals("getElement(0)", t->getElement(0, ec).getID(), "a_to_A");
	2439	assertEquals("getElement(1)", t->getElement(1, ec).getID(), "NFD");
	2440	assertEquals("getElement(2)", t->getElement(2, ec).getID(), "A_to_b");
	2441	assertSuccess("getElement", ec);
	2442
	2443	delete a;
	2444	delete A;
	2445	delete array[1];
	2446	delete t;
	2447	#endif
	2448
	2449	expect("$smooth = x; $macron = q; [:^L:] { ([aeiouyAEIOUY] $macron?) } [^aeiouyAEIOUY$smooth$macron] > \| $1 $smooth ;",
	2450	"a",
	2451	"ax");
	2452
	2453	UnicodeString gr = CharsToUnicodeString(
	2454	"$ddot = \\u0308 ;"
	2455	"$lcgvowel = [\\u03b1\\u03b5\\u03b7\\u03b9\\u03bf\\u03c5\\u03c9] ;"
	2456	"$rough = \\u0314 ;"
	2457	"($lcgvowel+ $ddot?) $rough > h \| $1 ;"
	2458	"\\u03b1 <> a ;"
	2459	"$rough <> h ;");
	2460
	2461	expect(gr, CharsToUnicodeString("\\u03B1\\u0314"), "ha");
	2462	}
	2463
	2464	/**
	2465	* Test quantified segment behavior. We want:
	2466	* ([abc])+ > x $1 x; applied to "cba" produces "xax"
	2467	*/
	2468	void TransliteratorTest::TestQuantifiedSegment(void) {
	2469	// The normal case
	2470	expect("([abc]+) > x $1 x;", "cba", "xcbax");
	2471
	2472	// The tricky case; the quantifier is around the segment
	2473	expect("([abc])+ > x $1 x;", "cba", "xax");
	2474
	2475	// Tricky case in reverse direction
	2476	expect("([abc])+ { q > x $1 x;", "cbaq", "cbaxax");
	2477
	2478	// Check post-context segment
	2479	expect("{q} ([a-d])+ > '(' $1 ')';", "ddqcba", "dd(a)cba");
	2480
	2481	// Test toRule/toPattern for non-quantified segment.
	2482	// Careful with spacing here.
	2483	UnicodeString r("([a-c]){q} > x $1 x;");
	2484	UParseError pe;
	2485	UErrorCode ec = U_ZERO_ERROR;
	2486	Transliterator* t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
	2487	if (U_FAILURE(ec)) {
	2488	errln("FAIL: createFromRules");
	2489	delete t;
	2490	return;
	2491	}
	2492	UnicodeString rr;
	2493	t->toRules(rr, TRUE);
	2494	if (r != rr) {
	2495	errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
	2496	} else {
	2497	logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
	2498	}
	2499	delete t;
	2500
	2501	// Test toRule/toPattern for quantified segment.
	2502	// Careful with spacing here.
	2503	r = "([a-c])+{q} > x $1 x;";
	2504	t = Transliterator::createFromRules("ID", r, UTRANS_FORWARD, pe, ec);
	2505	if (U_FAILURE(ec)) {
	2506	errln("FAIL: createFromRules");
	2507	delete t;
	2508	return;
	2509	}
	2510	t->toRules(rr, TRUE);
	2511	if (r != rr) {
	2512	errln((UnicodeString)"FAIL: \"" + r + "\" x toRules() => \"" + rr + "\"");
	2513	} else {
	2514	logln((UnicodeString)"Ok: \"" + r + "\" x toRules() => \"" + rr + "\"");
	2515	}
	2516	delete t;
	2517	}
	2518
	2519	//======================================================================
	2520	// Ram's tests
	2521	//======================================================================
	2522	void TransliteratorTest::TestDevanagariLatinRT(){
	2523	const int MAX_LEN= 52;
	2524	const char* const source[MAX_LEN] = {
	2525	"bh\\u0101rata",
	2526	"kra",
	2527	"k\\u1E63a",
	2528	"khra",
	2529	"gra",
	2530	"\\u1E45ra",
	2531	"cra",
	2532	"chra",
	2533	"j\\u00F1a",
	2534	"jhra",
	2535	"\\u00F1ra",
	2536	"\\u1E6Dya",
	2537	"\\u1E6Dhra",
	2538	"\\u1E0Dya",
	2539	//"r\\u0323ya", // \u095c is not valid in Devanagari
	2540	"\\u1E0Dhya",
	2541	"\\u1E5Bhra",
	2542	"\\u1E47ra",
	2543	"tta",
	2544	"thra",
	2545	"dda",
	2546	"dhra",
	2547	"nna",
	2548	"pra",
	2549	"phra",
	2550	"bra",
	2551	"bhra",
	2552	"mra",
	2553	"\\u1E49ra",
	2554	//"l\\u0331ra",
	2555	"yra",
	2556	"\\u1E8Fra",
	2557	//"l-",
	2558	"vra",
	2559	"\\u015Bra",
	2560	"\\u1E63ra",
	2561	"sra",
	2562	"hma",
	2563	"\\u1E6D\\u1E6Da",
	2564	"\\u1E6D\\u1E6Dha",
	2565	"\\u1E6Dh\\u1E6Dha",
	2566	"\\u1E0D\\u1E0Da",
	2567	"\\u1E0D\\u1E0Dha",
	2568	"\\u1E6Dya",
	2569	"\\u1E6Dhya",
	2570	"\\u1E0Dya",
	2571	"\\u1E0Dhya",
	2572	// Not roundtrippable --
	2573	// \\u0939\\u094d\\u094d\\u092E - hma
	2574	// \\u0939\\u094d\\u092E - hma
	2575	// CharsToUnicodeString("hma"),
	2576	"hya",
	2577	"\\u015Br\\u0325",
	2578	"\\u015Bca",
	2579	"\\u0115",
	2580	"san\\u0304j\\u012Bb s\\u0113nagupta",
	2581	"\\u0101nand vaddir\\u0101ju",
	2582	"\\u0101",
	2583	"a"
	2584	};
	2585	const char* const expected[MAX_LEN] = {
	2586	"\\u092D\\u093E\\u0930\\u0924", /* bha\\u0304rata */
	2587	"\\u0915\\u094D\\u0930", /* kra */
	2588	"\\u0915\\u094D\\u0937", /* ks\\u0323a */
	2589	"\\u0916\\u094D\\u0930", /* khra */
	2590	"\\u0917\\u094D\\u0930", /* gra */
	2591	"\\u0919\\u094D\\u0930", /* n\\u0307ra */
	2592	"\\u091A\\u094D\\u0930", /* cra */
	2593	"\\u091B\\u094D\\u0930", /* chra */
	2594	"\\u091C\\u094D\\u091E", /* jn\\u0303a */
	2595	"\\u091D\\u094D\\u0930", /* jhra */
	2596	"\\u091E\\u094D\\u0930", /* n\\u0303ra */
	2597	"\\u091F\\u094D\\u092F", /* t\\u0323ya */
	2598	"\\u0920\\u094D\\u0930", /* t\\u0323hra */
	2599	"\\u0921\\u094D\\u092F", /* d\\u0323ya */
	2600	//"\\u095C\\u094D\\u092F", /* r\\u0323ya */ // \u095c is not valid in Devanagari
	2601	"\\u0922\\u094D\\u092F", /* d\\u0323hya */
	2602	"\\u0922\\u093C\\u094D\\u0930", /* r\\u0323hra */
	2603	"\\u0923\\u094D\\u0930", /* n\\u0323ra */
	2604	"\\u0924\\u094D\\u0924", /* tta */
	2605	"\\u0925\\u094D\\u0930", /* thra */
	2606	"\\u0926\\u094D\\u0926", /* dda */
	2607	"\\u0927\\u094D\\u0930", /* dhra */
	2608	"\\u0928\\u094D\\u0928", /* nna */
	2609	"\\u092A\\u094D\\u0930", /* pra */
	2610	"\\u092B\\u094D\\u0930", /* phra */
	2611	"\\u092C\\u094D\\u0930", /* bra */
	2612	"\\u092D\\u094D\\u0930", /* bhra */
	2613	"\\u092E\\u094D\\u0930", /* mra */
	2614	"\\u0929\\u094D\\u0930", /* n\\u0331ra */
	2615	//"\\u0934\\u094D\\u0930", /* l\\u0331ra */
	2616	"\\u092F\\u094D\\u0930", /* yra */
	2617	"\\u092F\\u093C\\u094D\\u0930", /* y\\u0307ra */
	2618	//"l-",
	2619	"\\u0935\\u094D\\u0930", /* vra */
	2620	"\\u0936\\u094D\\u0930", /* s\\u0301ra */
	2621	"\\u0937\\u094D\\u0930", /* s\\u0323ra */
	2622	"\\u0938\\u094D\\u0930", /* sra */
	2623	"\\u0939\\u094d\\u092E", /* hma */
	2624	"\\u091F\\u094D\\u091F", /* t\\u0323t\\u0323a */
	2625	"\\u091F\\u094D\\u0920", /* t\\u0323t\\u0323ha */
	2626	"\\u0920\\u094D\\u0920", /* t\\u0323ht\\u0323ha*/
	2627	"\\u0921\\u094D\\u0921", /* d\\u0323d\\u0323a */
	2628	"\\u0921\\u094D\\u0922", /* d\\u0323d\\u0323ha */
	2629	"\\u091F\\u094D\\u092F", /* t\\u0323ya */
	2630	"\\u0920\\u094D\\u092F", /* t\\u0323hya */
	2631	"\\u0921\\u094D\\u092F", /* d\\u0323ya */
	2632	"\\u0922\\u094D\\u092F", /* d\\u0323hya */
	2633	// "hma", /* hma */
	2634	"\\u0939\\u094D\\u092F", /* hya */
	2635	"\\u0936\\u0943", /* s\\u0301r\\u0325a */
	2636	"\\u0936\\u094D\\u091A", /* s\\u0301ca */
	2637	"\\u090d", /* e\\u0306 */
	2638	"\\u0938\\u0902\\u091C\\u0940\\u092C\\u094D \\u0938\\u0947\\u0928\\u0917\\u0941\\u092A\\u094D\\u0924",
	2639	"\\u0906\\u0928\\u0902\\u0926\\u094D \\u0935\\u0926\\u094D\\u0926\\u093F\\u0930\\u093E\\u091C\\u0941",
	2640	"\\u0906",
	2641	"\\u0905",
	2642	};
	2643	UErrorCode status = U_ZERO_ERROR;
	2644	UParseError parseError;
	2645	UnicodeString message;
	2646	Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
	2647	Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
	2648	if(U_FAILURE(status)){
	2649	dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
	2650	dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
	2651	return;
	2652	}
	2653	UnicodeString gotResult;
	2654	for(int i= 0; i<MAX_LEN; i++){
	2655	gotResult = source[i];
	2656	expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
	2657	expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
	2658	}
	2659	delete latinToDev;
	2660	delete devToLatin;
	2661	}
	2662
	2663	void TransliteratorTest::TestTeluguLatinRT(){
	2664	const int MAX_LEN=10;
	2665	const char* const source[MAX_LEN] = {
	2666	"raghur\\u0101m vi\\u015Bvan\\u0101dha", /* Raghuram Viswanadha */
	2667	"\\u0101nand vaddir\\u0101ju", /* Anand Vaddiraju */
	2668	"r\\u0101j\\u012Bv ka\\u015Barab\\u0101da", /* Rajeev Kasarabada */
	2669	"san\\u0304j\\u012Bv ka\\u015Barab\\u0101da", /* sanjeev kasarabada */
	2670	"san\\u0304j\\u012Bb sen'gupta", /* sanjib sengupata */
	2671	"amar\\u0113ndra hanum\\u0101nula", /* Amarendra hanumanula */
	2672	"ravi kum\\u0101r vi\\u015Bvan\\u0101dha", /* Ravi Kumar Viswanadha */
	2673	"\\u0101ditya kandr\\u0113gula", /* Aditya Kandregula */
	2674	"\\u015Br\\u012Bdhar ka\\u1E47\\u1E6Dama\\u015Be\\u1E6D\\u1E6Di",/* Shridhar Kantamsetty */
	2675	"m\\u0101dhav de\\u015Be\\u1E6D\\u1E6Di" /* Madhav Desetty */
	2676	};
	2677
	2678	const char* const expected[MAX_LEN] = {
	2679	"\\u0c30\\u0c18\\u0c41\\u0c30\\u0c3e\\u0c2e\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
	2680	"\\u0c06\\u0c28\\u0c02\\u0c26\\u0c4d \\u0C35\\u0C26\\u0C4D\\u0C26\\u0C3F\\u0C30\\u0C3E\\u0C1C\\u0C41",
	2681	"\\u0c30\\u0c3e\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
	2682	"\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c35\\u0c4d \\u0c15\\u0c36\\u0c30\\u0c2c\\u0c3e\\u0c26",
	2683	"\\u0c38\\u0c02\\u0c1c\\u0c40\\u0c2c\\u0c4d \\u0c38\\u0c46\\u0c28\\u0c4d\\u0c17\\u0c41\\u0c2a\\u0c4d\\u0c24",
	2684	"\\u0c05\\u0c2e\\u0c30\\u0c47\\u0c02\\u0c26\\u0c4d\\u0c30 \\u0c39\\u0c28\\u0c41\\u0c2e\\u0c3e\\u0c28\\u0c41\\u0c32",
	2685	"\\u0c30\\u0c35\\u0c3f \\u0c15\\u0c41\\u0c2e\\u0c3e\\u0c30\\u0c4d \\u0c35\\u0c3f\\u0c36\\u0c4d\\u0c35\\u0c28\\u0c3e\\u0c27",
	2686	"\\u0c06\\u0c26\\u0c3f\\u0c24\\u0c4d\\u0c2f \\u0C15\\u0C02\\u0C26\\u0C4D\\u0C30\\u0C47\\u0C17\\u0C41\\u0c32",
	2687	"\\u0c36\\u0c4d\\u0c30\\u0c40\\u0C27\\u0C30\\u0C4D \\u0c15\\u0c02\\u0c1f\\u0c2e\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
	2688	"\\u0c2e\\u0c3e\\u0c27\\u0c35\\u0c4d \\u0c26\\u0c46\\u0c36\\u0c46\\u0c1f\\u0c4d\\u0c1f\\u0c3f",
	2689	};
	2690
	2691	UErrorCode status = U_ZERO_ERROR;
	2692	UParseError parseError;
	2693	UnicodeString message;
	2694	Transliterator* latinToDev=Transliterator::createInstance("Latin-Telugu", UTRANS_FORWARD, parseError, status);
	2695	Transliterator* devToLatin=Transliterator::createInstance("Telugu-Latin", UTRANS_FORWARD, parseError, status);
	2696	if(U_FAILURE(status)){
	2697	dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
	2698	dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
	2699	return;
	2700	}
	2701	UnicodeString gotResult;
	2702	for(int i= 0; i<MAX_LEN; i++){
	2703	gotResult = source[i];
	2704	expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
	2705	expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
	2706	}
	2707	delete latinToDev;
	2708	delete devToLatin;
	2709	}
	2710
	2711	void TransliteratorTest::TestSanskritLatinRT(){
	2712	const int MAX_LEN =16;
	2713	const char* const source[MAX_LEN] = {
	2714	"rmk\\u1E63\\u0113t",
	2715	"\\u015Br\\u012Bmad",
	2716	"bhagavadg\\u012Bt\\u0101",
	2717	"adhy\\u0101ya",
	2718	"arjuna",
	2719	"vi\\u1E63\\u0101da",
	2720	"y\\u014Dga",
	2721	"dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
	2722	"uv\\u0101cr\\u0325",
	2723	"dharmak\\u1E63\\u0113tr\\u0113",
	2724	"kuruk\\u1E63\\u0113tr\\u0113",
	2725	"samav\\u0113t\\u0101",
	2726	"yuyutsava\\u1E25",
	2727	"m\\u0101mak\\u0101\\u1E25",
	2728	// "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
	2729	"kimakurvata",
	2730	"san\\u0304java",
	2731	};
	2732	const char* const expected[MAX_LEN] = {
	2733	"\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
	2734	"\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
	2735	"\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
	2736	"\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
	2737	"\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
	2738	"\\u0935\\u093f\\u0937\\u093e\\u0926",
	2739	"\\u092f\\u094b\\u0917",
	2740	"\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
	2741	"\\u0909\\u0935\\u093E\\u091A\\u0943",
	2742	"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
	2743	"\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
	2744	"\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
	2745	"\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
	2746	"\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
	2747	//"\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
	2748	"\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
	2749	"\\u0938\\u0902\\u091c\\u0935",
	2750	};
	2751	UErrorCode status = U_ZERO_ERROR;
	2752	UParseError parseError;
	2753	UnicodeString message;
	2754	Transliterator* latinToDev=Transliterator::createInstance("Latin-Devanagari", UTRANS_FORWARD, parseError, status);
	2755	Transliterator* devToLatin=Transliterator::createInstance("Devanagari-Latin", UTRANS_FORWARD, parseError, status);
	2756	if(U_FAILURE(status)){
	2757	dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
	2758	dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
	2759	return;
	2760	}
	2761	UnicodeString gotResult;
	2762	for(int i= 0; i<MAX_LEN; i++){
	2763	gotResult = source[i];
	2764	expect(*latinToDev,CharsToUnicodeString(source[i]),CharsToUnicodeString(expected[i]));
	2765	expect(*devToLatin,CharsToUnicodeString(expected[i]),CharsToUnicodeString(source[i]));
	2766	}
	2767	delete latinToDev;
	2768	delete devToLatin;
	2769	}
	2770
	2771
	2772	void TransliteratorTest::TestCompoundLatinRT(){
	2773	const char* const source[] = {
	2774	"rmk\\u1E63\\u0113t",
	2775	"\\u015Br\\u012Bmad",
	2776	"bhagavadg\\u012Bt\\u0101",
	2777	"adhy\\u0101ya",
	2778	"arjuna",
	2779	"vi\\u1E63\\u0101da",
	2780	"y\\u014Dga",
	2781	"dhr\\u0325tar\\u0101\\u1E63\\u1E6Dra",
	2782	"uv\\u0101cr\\u0325",
	2783	"dharmak\\u1E63\\u0113tr\\u0113",
	2784	"kuruk\\u1E63\\u0113tr\\u0113",
	2785	"samav\\u0113t\\u0101",
	2786	"yuyutsava\\u1E25",
	2787	"m\\u0101mak\\u0101\\u1E25",
	2788	// "p\\u0101\\u1E47\\u1E0Dav\\u0101\\u015Bcaiva",
	2789	"kimakurvata",
	2790	"san\\u0304java"
	2791	};
	2792	const int MAX_LEN = UPRV_LENGTHOF(source);
	2793	const char* const expected[MAX_LEN] = {
	2794	"\\u0930\\u094D\\u092E\\u094D\\u0915\\u094D\\u0937\\u0947\\u0924\\u094D",
	2795	"\\u0936\\u094d\\u0930\\u0940\\u092e\\u0926\\u094d",
	2796	"\\u092d\\u0917\\u0935\\u0926\\u094d\\u0917\\u0940\\u0924\\u093e",
	2797	"\\u0905\\u0927\\u094d\\u092f\\u093e\\u092f",
	2798	"\\u0905\\u0930\\u094d\\u091c\\u0941\\u0928",
	2799	"\\u0935\\u093f\\u0937\\u093e\\u0926",
	2800	"\\u092f\\u094b\\u0917",
	2801	"\\u0927\\u0943\\u0924\\u0930\\u093e\\u0937\\u094d\\u091f\\u094d\\u0930",
	2802	"\\u0909\\u0935\\u093E\\u091A\\u0943",
	2803	"\\u0927\\u0930\\u094d\\u092e\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
	2804	"\\u0915\\u0941\\u0930\\u0941\\u0915\\u094d\\u0937\\u0947\\u0924\\u094d\\u0930\\u0947",
	2805	"\\u0938\\u092e\\u0935\\u0947\\u0924\\u093e",
	2806	"\\u092f\\u0941\\u092f\\u0941\\u0924\\u094d\\u0938\\u0935\\u0903",
	2807	"\\u092e\\u093e\\u092e\\u0915\\u093e\\u0903",
	2808	// "\\u092a\\u093e\\u0923\\u094d\\u0921\\u0935\\u093e\\u0936\\u094d\\u091a\\u0948\\u0935",
	2809	"\\u0915\\u093f\\u092e\\u0915\\u0941\\u0930\\u094d\\u0935\\u0924",
	2810	"\\u0938\\u0902\\u091c\\u0935"
	2811	};
	2812	if(MAX_LEN != UPRV_LENGTHOF(expected)) {
	2813	errln("error in TestCompoundLatinRT: source[] and expected[] have different lengths!");
	2814	return;
	2815	}
	2816
	2817	UErrorCode status = U_ZERO_ERROR;
	2818	UParseError parseError;
	2819	UnicodeString message;
	2820	Transliterator* devToLatinToDev =Transliterator::createInstance("Devanagari-Latin;Latin-Devanagari", UTRANS_FORWARD, parseError, status);
	2821	Transliterator* latinToDevToLatin=Transliterator::createInstance("Latin-Devanagari;Devanagari-Latin", UTRANS_FORWARD, parseError, status);
	2822	Transliterator* devToTelToDev =Transliterator::createInstance("Devanagari-Telugu;Telugu-Devanagari", UTRANS_FORWARD, parseError, status);
	2823	Transliterator* latinToTelToLatin=Transliterator::createInstance("Latin-Telugu;Telugu-Latin", UTRANS_FORWARD, parseError, status);
	2824
	2825	if(U_FAILURE(status)){
	2826	dataerrln("FAIL: construction " + UnicodeString(" Error: ") + u_errorName(status));
	2827	dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
	2828	return;
	2829	}
	2830	UnicodeString gotResult;
	2831	for(int i= 0; i<MAX_LEN; i++){
	2832	gotResult = source[i];
	2833	expect(*devToLatinToDev,CharsToUnicodeString(expected[i]),CharsToUnicodeString(expected[i]));
	2834	expect(*latinToDevToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
	2835	expect(*latinToTelToLatin,CharsToUnicodeString(source[i]),CharsToUnicodeString(source[i]));
	2836
	2837	}
	2838	delete(latinToDevToLatin);
	2839	delete(devToLatinToDev);
	2840	delete(devToTelToDev);
	2841	delete(latinToTelToLatin);
	2842	}
	2843
	2844	/**
	2845	* Test Gurmukhi-Devanagari Tippi and Bindi
	2846	*/
	2847	void TransliteratorTest::TestGurmukhiDevanagari(){
	2848	// the rule says:
	2849	// (\u0902) (when preceded by vowel) ---> (\u0A02)
	2850	// (\u0902) (when preceded by consonant) ---> (\u0A70)
	2851	UErrorCode status = U_ZERO_ERROR;
	2852	UnicodeSet vowel(UnicodeString("[\\u0905-\\u090A \\u090F\\u0910\\u0913\\u0914 \\u093e-\\u0942\\u0947\\u0948\\u094B\\u094C\\u094D]", -1, US_INV).unescape(), status);
	2853	UnicodeSet non_vowel(UnicodeString("[\\u0915-\\u0928\\u092A-\\u0930]", -1, US_INV).unescape(), status);
	2854	UParseError parseError;
	2855
	2856	UnicodeSetIterator vIter(vowel);
	2857	UnicodeSetIterator nvIter(non_vowel);
	2858	Transliterator* trans = Transliterator::createInstance("Devanagari-Gurmukhi",UTRANS_FORWARD, parseError, status);
	2859	if(U_FAILURE(status)) {
	2860	dataerrln("Error creating transliterator %s", u_errorName(status));
	2861	delete trans;
	2862	return;
	2863	}
	2864	UnicodeString src (" \\u0902", -1, US_INV);
	2865	UnicodeString expected(" \\u0A02", -1, US_INV);
	2866	src = src.unescape();
	2867	expected= expected.unescape();
	2868
	2869	while(vIter.next()){
	2870	src.setCharAt(0,(UChar) vIter.getCodepoint());
	2871	expected.setCharAt(0,(UChar) (vIter.getCodepoint()+0x0100));
	2872	expect(*trans,src,expected);
	2873	}
	2874
	2875	expected.setCharAt(1,0x0A70);
	2876	while(nvIter.next()){
	2877	//src.setCharAt(0,(char) nvIter.codepoint);
	2878	src.setCharAt(0,(UChar)nvIter.getCodepoint());
	2879	expected.setCharAt(0,(UChar) (nvIter.getCodepoint()+0x0100));
	2880	expect(*trans,src,expected);
	2881	}
	2882	delete trans;
	2883	}
	2884	/**
	2885	* Test instantiation from a locale.
	2886	*/
	2887	void TransliteratorTest::TestLocaleInstantiation(void) {
	2888	UParseError pe;
	2889	UErrorCode ec = U_ZERO_ERROR;
	2890	Transliterator *t = Transliterator::createInstance("ru_RU-Latin", UTRANS_FORWARD, pe, ec);
	2891	if (U_FAILURE(ec)) {
	2892	dataerrln("FAIL: createInstance(ru_RU-Latin) - %s", u_errorName(ec));
	2893	delete t;
	2894	return;
	2895	}
	2896	expect(*t, CharsToUnicodeString("\\u0430"), "a");
	2897	delete t;
	2898
	2899	t = Transliterator::createInstance("en-el", UTRANS_FORWARD, pe, ec);
	2900	if (U_FAILURE(ec)) {
	2901	errln("FAIL: createInstance(en-el)");
	2902	delete t;
	2903	return;
	2904	}
	2905	expect(*t, "a", CharsToUnicodeString("\\u03B1"));
	2906	delete t;
	2907	}
	2908
	2909	/**
	2910	* Test title case handling of accent (should ignore accents)
	2911	*/
	2912	void TransliteratorTest::TestTitleAccents(void) {
	2913	UParseError pe;
	2914	UErrorCode ec = U_ZERO_ERROR;
	2915	Transliterator *t = Transliterator::createInstance("Title", UTRANS_FORWARD, pe, ec);
	2916	if (U_FAILURE(ec)) {
	2917	errln("FAIL: createInstance(Title)");
	2918	delete t;
	2919	return;
	2920	}
	2921	expect(*t, CharsToUnicodeString("a\\u0300b can't abe"), CharsToUnicodeString("A\\u0300b Can't Abe"));
	2922	delete t;
	2923	}
	2924
	2925	/**
	2926	* Basic test of a locale resource based rule.
	2927	*/
	2928	void TransliteratorTest::TestLocaleResource() {
	2929	const char* DATA[] = {
	2930	// id from to
	2931	//"Latin-Greek/UNGEGN", "b", "\\u03bc\\u03c0",
	2932	"Latin-el", "b", "\\u03bc\\u03c0",
	2933	"Latin-Greek", "b", "\\u03B2",
	2934	"Greek-Latin/UNGEGN", "\\u03B2", "v",
	2935	"el-Latin", "\\u03B2", "v",
	2936	"Greek-Latin", "\\u03B2", "b",
	2937	};
	2938	const int32_t DATA_length = UPRV_LENGTHOF(DATA);
	2939	for (int32_t i=0; i<DATA_length; i+=3) {
	2940	UParseError pe;
	2941	UErrorCode ec = U_ZERO_ERROR;
	2942	Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_FORWARD, pe, ec);
	2943	if (U_FAILURE(ec)) {
	2944	dataerrln((UnicodeString)"FAIL: createInstance(" + DATA[i] + ") - " + u_errorName(ec));
	2945	delete t;
	2946	continue;
	2947	}
	2948	expect(*t, CharsToUnicodeString(DATA[i+1]),
	2949	CharsToUnicodeString(DATA[i+2]));
	2950	delete t;
	2951	}
	2952	}
	2953
	2954	/**
	2955	* Make sure parse errors reference the right line.
	2956	*/
	2957	void TransliteratorTest::TestParseError() {
	2958	static const char* rule =
	2959	"a > b;\n"
	2960	"# more stuff\n"
	2961	"d << b;";
	2962	UErrorCode ec = U_ZERO_ERROR;
	2963	UParseError pe;
	2964	Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
	2965	delete t;
	2966	if (U_FAILURE(ec)) {
	2967	UnicodeString err(pe.preContext);
	2968	err.append((UChar)124/\|/).append(pe.postContext);
	2969	if (err.indexOf("d << b") >= 0) {
	2970	logln("Ok: " + err);
	2971	} else {
	2972	errln("FAIL: " + err);
	2973	}
	2974	}
	2975	else {
	2976	errln("FAIL: no syntax error");
	2977	}
	2978	static const char* maskingRule =
	2979	"a>x;\n"
	2980	"# more stuff\n"
	2981	"ab>y;";
	2982	ec = U_ZERO_ERROR;
	2983	delete Transliterator::createFromRules("ID", maskingRule, UTRANS_FORWARD, pe, ec);
	2984	if (ec != U_RULE_MASK_ERROR) {
	2985	errln("FAIL: returned %s instead of U_RULE_MASK_ERROR", u_errorName(ec));
	2986	}
	2987	else if (UnicodeString("a > x;") != UnicodeString(pe.preContext)) {
	2988	errln("FAIL: did not get expected precontext");
	2989	}
	2990	else if (UnicodeString("ab > y;") != UnicodeString(pe.postContext)) {
	2991	errln("FAIL: did not get expected postcontext");
	2992	}
	2993	}
	2994
	2995	/**
	2996	* Make sure sets on output are disallowed.
	2997	*/
	2998	void TransliteratorTest::TestOutputSet() {
	2999	UnicodeString rule = "$set = [a-cm-n]; b > $set;";
	3000	UErrorCode ec = U_ZERO_ERROR;
	3001	UParseError pe;
	3002	Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
	3003	delete t;
	3004	if (U_FAILURE(ec)) {
	3005	UnicodeString err(pe.preContext);
	3006	err.append((UChar)124/\|/).append(pe.postContext);
	3007	logln("Ok: " + err);
	3008	return;
	3009	}
	3010	errln("FAIL: No syntax error");
	3011	}
	3012
	3013	/**
	3014	* Test the use variable range pragma, making sure that use of
	3015	* variable range characters is detected and flagged as an error.
	3016	*/
	3017	void TransliteratorTest::TestVariableRange() {
	3018	UnicodeString rule = "use variable range 0x70 0x72; a > A; b > B; q > Q;";
	3019	UErrorCode ec = U_ZERO_ERROR;
	3020	UParseError pe;
	3021	Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
	3022	delete t;
	3023	if (U_FAILURE(ec)) {
	3024	UnicodeString err(pe.preContext);
	3025	err.append((UChar)124/\|/).append(pe.postContext);
	3026	logln("Ok: " + err);
	3027	return;
	3028	}
	3029	errln("FAIL: No syntax error");
	3030	}
	3031
	3032	/**
	3033	* Test invalid post context error handling
	3034	*/
	3035	void TransliteratorTest::TestInvalidPostContext() {
	3036	UnicodeString rule = "a}b{c>d;";
	3037	UErrorCode ec = U_ZERO_ERROR;
	3038	UParseError pe;
	3039	Transliterator *t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD, pe, ec);
	3040	delete t;
	3041	if (U_FAILURE(ec)) {
	3042	UnicodeString err(pe.preContext);
	3043	err.append((UChar)124/\|/).append(pe.postContext);
	3044	if (err.indexOf("a}b{c") >= 0) {
	3045	logln("Ok: " + err);
	3046	} else {
	3047	errln("FAIL: " + err);
	3048	}
	3049	return;
	3050	}
	3051	errln("FAIL: No syntax error");
	3052	}
	3053
	3054	/**
	3055	* Test ID form variants
	3056	*/
	3057	void TransliteratorTest::TestIDForms() {
	3058	const char* DATA[] = {
	3059	"NFC", NULL, "NFD",
	3060	"nfd", NULL, "NFC", // make sure case is ignored
	3061	"Any-NFKD", NULL, "Any-NFKC",
	3062	"Null", NULL, "Null",
	3063	"-nfkc", "nfkc", "NFKD",
	3064	"-nfkc/", "nfkc", "NFKD",
	3065	"Latin-Greek/UNGEGN", NULL, "Greek-Latin/UNGEGN",
	3066	"Greek/UNGEGN-Latin", "Greek-Latin/UNGEGN", "Latin-Greek/UNGEGN",
	3067	"Bengali-Devanagari/", "Bengali-Devanagari", "Devanagari-Bengali",
	3068	"Source-", NULL, NULL,
	3069	"Source/Variant-", NULL, NULL,
	3070	"Source-/Variant", NULL, NULL,
	3071	"/Variant", NULL, NULL,
	3072	"/Variant-", NULL, NULL,
	3073	"-/Variant", NULL, NULL,
	3074	"-/", NULL, NULL,
	3075	"-", NULL, NULL,
	3076	"/", NULL, NULL,
	3077	};
	3078	const int32_t DATA_length = UPRV_LENGTHOF(DATA);
	3079
	3080	for (int32_t i=0; i<DATA_length; i+=3) {
	3081	const char* ID = DATA[i];
	3082	const char* expID = DATA[i+1];
	3083	const char* expInvID = DATA[i+2];
	3084	UBool expValid = (expInvID != NULL);
	3085	if (expID == NULL) {
	3086	expID = ID;
	3087	}
	3088	UParseError pe;
	3089	UErrorCode ec = U_ZERO_ERROR;
	3090	Transliterator *t =
	3091	Transliterator::createInstance(ID, UTRANS_FORWARD, pe, ec);
	3092	if (U_FAILURE(ec)) {
	3093	if (!expValid) {
	3094	logln((UnicodeString)"Ok: getInstance(" + ID +") => " + u_errorName(ec));
	3095	} else {
	3096	dataerrln((UnicodeString)"FAIL: Couldn't create " + ID + " - " + u_errorName(ec));
	3097	}
	3098	delete t;
	3099	continue;
	3100	}
	3101	Transliterator *u = t->createInverse(ec);
	3102	if (U_FAILURE(ec)) {
	3103	errln((UnicodeString)"FAIL: Couldn't create inverse of " + ID);
	3104	delete t;
	3105	delete u;
	3106	continue;
	3107	}
	3108	if (t->getID() == expID &&
	3109	u->getID() == expInvID) {
	3110	logln((UnicodeString)"Ok: " + ID + ".getInverse() => " + expInvID);
	3111	} else {
	3112	errln((UnicodeString)"FAIL: getInstance(" + ID + ") => " +
	3113	t->getID() + " x getInverse() => " + u->getID() +
	3114	", expected " + expInvID);
	3115	}
	3116	delete t;
	3117	delete u;
	3118	}
	3119	}
	3120
	3121	static const UChar SPACE[] = {32,0};
	3122	static const UChar NEWLINE[] = {10,0};
	3123	static const UChar RETURN[] = {13,0};
	3124	static const UChar EMPTY[] = {0};
	3125
	3126	void TransliteratorTest::checkRules(const UnicodeString& label, Transliterator& t2,
	3127	const UnicodeString& testRulesForward) {
	3128	UnicodeString rules2; t2.toRules(rules2, TRUE);
	3129	//rules2 = TestUtility.replaceAll(rules2, new UnicodeSet("[' '\n\r]"), "");
	3130	rules2.findAndReplace(SPACE, EMPTY);
	3131	rules2.findAndReplace(NEWLINE, EMPTY);
	3132	rules2.findAndReplace(RETURN, EMPTY);
	3133
	3134	UnicodeString testRules(testRulesForward); testRules.findAndReplace(SPACE, EMPTY);
	3135
	3136	if (rules2 != testRules) {
	3137	errln(label);
	3138	logln((UnicodeString)"GENERATED RULES: " + rules2);
	3139	logln((UnicodeString)"SHOULD BE: " + testRulesForward);
	3140	}
	3141	}
	3142
	3143	/**
	3144	* Mark's toRules test.
	3145	*/
	3146	void TransliteratorTest::TestToRulesMark() {
	3147	const char* testRules =
	3148	"::[[:Latin:][:Mark:]];"
	3149	"::NFKD (NFC);"
	3150	"::Lower (Lower);"
	3151	"a <> \\u03B1;" // alpha
	3152	"::NFKC (NFD);"
	3153	"::Upper (Lower);"
	3154	"::Lower ();"
	3155	"::([[:Greek:][:Mark:]]);"
	3156	;
	3157	const char* testRulesForward =
	3158	"::[[:Latin:][:Mark:]];"
	3159	"::NFKD(NFC);"
	3160	"::Lower(Lower);"
	3161	"a > \\u03B1;"
	3162	"::NFKC(NFD);"
	3163	"::Upper (Lower);"
	3164	"::Lower ();"
	3165	;
	3166	const char* testRulesBackward =
	3167	"::[[:Greek:][:Mark:]];"
	3168	"::Lower (Upper);"
	3169	"::NFD(NFKC);"
	3170	"\\u03B1 > a;"
	3171	"::Lower(Lower);"
	3172	"::NFC(NFKD);"
	3173	;
	3174	UnicodeString source = CharsToUnicodeString("\\u00E1"); // a-acute
	3175	UnicodeString target = CharsToUnicodeString("\\u03AC"); // alpha-acute
	3176
	3177	UParseError pe;
	3178	UErrorCode ec = U_ZERO_ERROR;
	3179	LocalPointer<Transliterator> t2(
	3180	Transliterator::createFromRules("source-target", UnicodeString(testRules, -1, US_INV), UTRANS_FORWARD, pe, ec));
	3181	LocalPointer<Transliterator> t3(
	3182	Transliterator::createFromRules("target-source", UnicodeString(testRules, -1, US_INV), UTRANS_REVERSE, pe, ec));
	3183
	3184	if (U_FAILURE(ec)) {
	3185	dataerrln((UnicodeString)"FAIL: createFromRules => " + u_errorName(ec));
	3186	return;
	3187	}
	3188
	3189	expect(*t2, source, target);
	3190	expect(*t3, target, source);
	3191
	3192	checkRules("Failed toRules FORWARD", *t2, UnicodeString(testRulesForward, -1, US_INV));
	3193	checkRules("Failed toRules BACKWARD", *t3, UnicodeString(testRulesBackward, -1, US_INV));
	3194	}
	3195
	3196	/**
	3197	* Test Escape and Unescape transliterators.
	3198	*/
	3199	void TransliteratorTest::TestEscape() {
	3200	UParseError pe;
	3201	UErrorCode ec;
	3202	Transliterator *t;
	3203
	3204	ec = U_ZERO_ERROR;
	3205	t = Transliterator::createInstance("Hex-Any", UTRANS_FORWARD, pe, ec);
	3206	if (U_FAILURE(ec)) {
	3207	errln((UnicodeString)"FAIL: createInstance");
	3208	} else {
	3209	expect(*t,
	3210	UNICODE_STRING_SIMPLE("\\x{40}\\U000000312Q"),
	3211	"@12Q");
	3212	}
	3213	delete t;
	3214
	3215	ec = U_ZERO_ERROR;
	3216	t = Transliterator::createInstance("Any-Hex/C", UTRANS_FORWARD, pe, ec);
	3217	if (U_FAILURE(ec)) {
	3218	errln((UnicodeString)"FAIL: createInstance");
	3219	} else {
	3220	expect(*t,
	3221	CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
	3222	UNICODE_STRING_SIMPLE("\\u0041\\U0010BEEF\\uFEED"));
	3223	}
	3224	delete t;
	3225
	3226	ec = U_ZERO_ERROR;
	3227	t = Transliterator::createInstance("Any-Hex/Java", UTRANS_FORWARD, pe, ec);
	3228	if (U_FAILURE(ec)) {
	3229	errln((UnicodeString)"FAIL: createInstance");
	3230	} else {
	3231	expect(*t,
	3232	CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
	3233	UNICODE_STRING_SIMPLE("\\u0041\\uDBEF\\uDEEF\\uFEED"));
	3234	}
	3235	delete t;
	3236
	3237	ec = U_ZERO_ERROR;
	3238	t = Transliterator::createInstance("Any-Hex/Perl", UTRANS_FORWARD, pe, ec);
	3239	if (U_FAILURE(ec)) {
	3240	errln((UnicodeString)"FAIL: createInstance");
	3241	} else {
	3242	expect(*t,
	3243	CharsToUnicodeString("A\\U0010BEEF\\uFEED"),
	3244	UNICODE_STRING_SIMPLE("\\x{41}\\x{10BEEF}\\x{FEED}"));
	3245	}
	3246	delete t;
	3247	}
	3248
	3249
	3250	void TransliteratorTest::TestAnchorMasking(){
	3251	UnicodeString rule ("^a > Q; a > q;");
	3252	UErrorCode status= U_ZERO_ERROR;
	3253	UParseError parseError;
	3254
	3255	Transliterator* t = Transliterator::createFromRules("ID", rule, UTRANS_FORWARD,parseError,status);
	3256	if(U_FAILURE(status)){
	3257	errln(UnicodeString("FAIL: ") + "ID" +
	3258	".createFromRules() => bad rules" +
	3259	/", parse error " + parseError.code +/
	3260	", line " + parseError.line +
	3261	", offset " + parseError.offset +
	3262	", context " + prettify(parseError.preContext, TRUE) +
	3263	", rules: " + prettify(rule, TRUE));
	3264	}
	3265	delete t;
	3266	}
	3267
	3268	/**
	3269	* Make sure display names of variants look reasonable.
	3270	*/
	3271	void TransliteratorTest::TestDisplayName() {
	3272	#if UCONFIG_NO_FORMATTING
	3273	logln("Skipping, UCONFIG_NO_FORMATTING is set\n");
	3274	return;
	3275	#else
	3276	static const char* DATA[] = {
	3277	// ID, forward name, reverse name
	3278	// Update the text as necessary -- the important thing is
	3279	// not the text itself, but how various cases are handled.
	3280
	3281	// Basic test
	3282	"Any-Hex", "Any to Hex Escape", "Hex Escape to Any",
	3283
	3284	// Variants
	3285	"Any-Hex/Perl", "Any to Hex Escape/Perl", "Hex Escape to Any/Perl",
	3286
	3287	// Target-only IDs
	3288	"NFC", "Any to NFC", "Any to NFD",
	3289	};
	3290
	3291	int32_t DATA_length = UPRV_LENGTHOF(DATA);
	3292
	3293	Locale US("en", "US");
	3294
	3295	for (int32_t i=0; i<DATA_length; i+=3) {
	3296	UnicodeString name;
	3297	Transliterator::getDisplayName(DATA[i], US, name);
	3298	if (name != DATA[i+1]) {
	3299	dataerrln((UnicodeString)"FAIL: " + DATA[i] + ".getDisplayName() => " +
	3300	name + ", expected " + DATA[i+1]);
	3301	} else {
	3302	logln((UnicodeString)"Ok: " + DATA[i] + ".getDisplayName() => " + name);
	3303	}
	3304	UErrorCode ec = U_ZERO_ERROR;
	3305	UParseError pe;
	3306	Transliterator *t = Transliterator::createInstance(DATA[i], UTRANS_REVERSE, pe, ec);
	3307	if (U_FAILURE(ec)) {
	3308	delete t;
	3309	dataerrln("FAIL: createInstance failed - %s", u_errorName(ec));
	3310	continue;
	3311	}
	3312	name = Transliterator::getDisplayName(t->getID(), US, name);
	3313	if (name != DATA[i+2]) {
	3314	dataerrln((UnicodeString)"FAIL: " + t->getID() + ".getDisplayName() => " +
	3315	name + ", expected " + DATA[i+2]);
	3316	} else {
	3317	logln((UnicodeString)"Ok: " + t->getID() + ".getDisplayName() => " + name);
	3318	}
	3319	delete t;
	3320	}
	3321	#endif
	3322	}
	3323
	3324	void TransliteratorTest::TestSpecialCases(void) {
	3325	const UnicodeString registerRules[] = {
	3326	"Any-Dev1", "x > X; y > Y;",
	3327	"Any-Dev2", "XY > Z",
	3328	"Greek-Latin/FAKE",
	3329	CharsToUnicodeString
	3330	("[^[:L:][:M:]] { \\u03bc\\u03c0 > b ; \\u03bc\\u03c0 } [^[:L:][:M:]] > b ; [^[:L:][:M:]] { [\\u039c\\u03bc][\\u03a0\\u03c0] > B ; [\\u039c\\u03bc][\\u03a0\\u03c0] } [^[:L:][:M:]] > B ;"),
	3331	"" // END MARKER
	3332	};
	3333
	3334	const UnicodeString testCases[] = {
	3335	// NORMALIZATION
	3336	// should add more test cases
	3337	"NFD" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
	3338	"NFC" , CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
	3339	"NFKD", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
	3340	"NFKC", CharsToUnicodeString("a\\u0300 \\u00E0 \\u1100\\u1161 \\uFF76\\uFF9E\\u03D3"), "",
	3341
	3342	// mp -> b BUG
	3343	"Greek-Latin/UNGEGN", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
	3344	"Greek-Latin/FAKE", CharsToUnicodeString("(\\u03BC\\u03C0)"), "(b)",
	3345
	3346	// check for devanagari bug
	3347	"nfd;Dev1;Dev2;nfc", "xy", "Z",
	3348
	3349	// ff, i, dotless-i, I, dotted-I, LJLjlj deseret deeDEE
	3350	"Title", CharsToUnicodeString("ab'cD ffi\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
	3351	CharsToUnicodeString("Ab'cd Ffi\\u0131ii\\u0307 \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
	3352
	3353	//TODO: enable this test once Titlecase works right
	3354	/*
	3355	"Title", CharsToUnicodeString("\\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
	3356	CharsToUnicodeString("Ffi\\u0131ii \\u01C8\\u01C9\\u01C9 ") + DESERET_DEE + DESERET_dee,
	3357	*/
	3358	"Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
	3359	CharsToUnicodeString("AB'CD FFIII\\u0130 \\u01C7\\u01C7\\u01C7 ") + DESERET_DEE + DESERET_DEE,
	3360	"Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE,
	3361	CharsToUnicodeString("ab'cd \\uFB00i\\u0131ii\\u0307 \\u01C9\\u01C9\\u01C9 ") + DESERET_dee + DESERET_dee,
	3362
	3363	"Upper", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
	3364	"Lower", CharsToUnicodeString("ab'cD \\uFB00i\\u0131I\\u0130 \\u01C7\\u01C8\\u01C9 ") + DESERET_dee + DESERET_DEE, "",
	3365
	3366	// FORMS OF S
	3367	"Greek-Latin/UNGEGN", CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
	3368	CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
	3369	"Latin-Greek/UNGEGN", CharsToUnicodeString("s ss s\\u0331s\\u0331"),
	3370	CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3") ,
	3371	"Greek-Latin", CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
	3372	CharsToUnicodeString("s ss s\\u0331s\\u0331") ,
	3373	"Latin-Greek", CharsToUnicodeString("s ss s\\u0331s\\u0331"),
	3374	CharsToUnicodeString("\\u03C3 \\u03C3\\u03C2 \\u03C2\\u03C3"),
	3375	// Tatiana bug
	3376	// Upper: TAT\\u02B9\\u00C2NA
	3377	// Lower: tat\\u02B9\\u00E2na
	3378	// Title: Tat\\u02B9\\u00E2na
	3379	"Upper", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
	3380	CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
	3381	"Lower", CharsToUnicodeString("TAT\\u02B9\\u00C2NA"),
	3382	CharsToUnicodeString("tat\\u02B9\\u00E2na"),
	3383	"Title", CharsToUnicodeString("tat\\u02B9\\u00E2na"),
	3384	CharsToUnicodeString("Tat\\u02B9\\u00E2na"),
	3385
	3386	"" // END MARKER
	3387	};
	3388
	3389	UParseError pos;
	3390	int32_t i;
	3391	for (i = 0; registerRules[i].length()!=0; i+=2) {
	3392	UErrorCode status = U_ZERO_ERROR;
	3393
	3394	Transliterator *t = Transliterator::createFromRules(registerRules[0+i],
	3395	registerRules[i+1], UTRANS_FORWARD, pos, status);
	3396	if (U_FAILURE(status)) {
	3397	dataerrln("Fails: Unable to create the transliterator from rules. - %s", u_errorName(status));
	3398	} else {
	3399	Transliterator::registerInstance(t);
	3400	}
	3401	}
	3402	for (i = 0; testCases[i].length()!=0; i+=3) {
	3403	UErrorCode ec = U_ZERO_ERROR;
	3404	UParseError pe;
	3405	const UnicodeString& name = testCases[i];
	3406	Transliterator *t = Transliterator::createInstance(name, UTRANS_FORWARD, pe, ec);
	3407	if (U_FAILURE(ec)) {
	3408	dataerrln((UnicodeString)"FAIL: Couldn't create " + name + " - " + u_errorName(ec));
	3409	delete t;
	3410	continue;
	3411	}
	3412	const UnicodeString& id = t->getID();
	3413	const UnicodeString& source = testCases[i+1];
	3414	UnicodeString target;
	3415
	3416	// Automatic generation of targets, to make it simpler to add test cases (and more fail-safe)
	3417
	3418	if (testCases[i+2].length() > 0) {
	3419	target = testCases[i+2];
	3420	} else if (0==id.caseCompare("NFD", U_FOLD_CASE_DEFAULT)) {
	3421	Normalizer::normalize(source, UNORM_NFD, 0, target, ec);
	3422	} else if (0==id.caseCompare("NFC", U_FOLD_CASE_DEFAULT)) {
	3423	Normalizer::normalize(source, UNORM_NFC, 0, target, ec);
	3424	} else if (0==id.caseCompare("NFKD", U_FOLD_CASE_DEFAULT)) {
	3425	Normalizer::normalize(source, UNORM_NFKD, 0, target, ec);
	3426	} else if (0==id.caseCompare("NFKC", U_FOLD_CASE_DEFAULT)) {
	3427	Normalizer::normalize(source, UNORM_NFKC, 0, target, ec);
	3428	} else if (0==id.caseCompare("Lower", U_FOLD_CASE_DEFAULT)) {
	3429	target = source;
	3430	target.toLower(Locale::getUS());
	3431	} else if (0==id.caseCompare("Upper", U_FOLD_CASE_DEFAULT)) {
	3432	target = source;
	3433	target.toUpper(Locale::getUS());
	3434	}
	3435	if (U_FAILURE(ec)) {
	3436	errln((UnicodeString)"FAIL: Internal error normalizing " + source);
	3437	continue;
	3438	}
	3439
	3440	expect(*t, source, target);
	3441	delete t;
	3442	}
	3443	for (i = 0; registerRules[i].length()!=0; i+=2) {
	3444	Transliterator::unregister(registerRules[i]);
	3445	}
	3446	}
	3447
	3448	char* Char32ToEscapedChars(UChar32 ch, char* buffer) {
	3449	if (ch <= 0xFFFF) {
	3450	sprintf(buffer, "\\u%04x", (int)ch);
	3451	} else {
	3452	sprintf(buffer, "\\U%08x", (int)ch);
	3453	}
	3454	return buffer;
	3455	}
	3456
	3457	void TransliteratorTest::TestSurrogateCasing (void) {
	3458	// check that casing handles surrogates
	3459	// titlecase is currently defective
	3460	char buffer[20];
	3461	UChar buffer2[20];
	3462	UChar32 dee;
	3463	U16_GET(DESERET_dee,0, 0, DESERET_dee.length(), dee);
	3464	UnicodeString DEE(u_totitle(dee));
	3465	if (DEE != DESERET_DEE) {
	3466	err("Fails titlecase of surrogates");
	3467	err(Char32ToEscapedChars(dee, buffer));
	3468	err(", ");
	3469	errln(Char32ToEscapedChars(DEE.char32At(0), buffer));
	3470	}
	3471
	3472	UnicodeString deeDEETest=DESERET_dee + DESERET_DEE;
	3473	UnicodeString deedeeTest = DESERET_dee + DESERET_dee;
	3474	UnicodeString DEEDEETest = DESERET_DEE + DESERET_DEE;
	3475	UErrorCode status= U_ZERO_ERROR;
	3476
	3477	u_strToUpper(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
	3478	if (U_FAILURE(status) \|\| (UnicodeString(buffer2)!= DEEDEETest)) {
	3479	errln("Fails: Can't uppercase surrogates.");
	3480	}
	3481
	3482	status= U_ZERO_ERROR;
	3483	u_strToLower(buffer2, 20, deeDEETest.getBuffer(), deeDEETest.length(), NULL, &status);
	3484	if (U_FAILURE(status) \|\| (UnicodeString(buffer2)!= deedeeTest)) {
	3485	errln("Fails: Can't lowercase surrogates.");
	3486	}
	3487	}
	3488
	3489	static void _trans(Transliterator& t, const UnicodeString& src,
	3490	UnicodeString& result) {
	3491	result = src;
	3492	t.transliterate(result);
	3493	}
	3494
	3495	static void _trans(const UnicodeString& id, const UnicodeString& src,
	3496	UnicodeString& result, UErrorCode ec) {
	3497	UParseError pe;
	3498	Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
	3499	if (U_SUCCESS(ec)) {
	3500	_trans(*t, src, result);
	3501	}
	3502	delete t;
	3503	}
	3504
	3505	static UnicodeString _findMatch(const UnicodeString& source,
	3506	const UnicodeString* pairs) {
	3507	UnicodeString empty;
	3508	for (int32_t i=0; pairs[i].length() > 0; i+=2) {
	3509	if (0==source.caseCompare(pairs[i], U_FOLD_CASE_DEFAULT)) {
	3510	return pairs[i+1];
	3511	}
	3512	}
	3513	return empty;
	3514	}
	3515
	3516	// Check to see that incremental gets at least part way through a reasonable string.
	3517
	3518	void TransliteratorTest::TestIncrementalProgress(void) {
	3519	UErrorCode ec = U_ZERO_ERROR;
	3520	UnicodeString latinTest = "The Quick Brown Fox.";
	3521	UnicodeString devaTest;
	3522	_trans("Latin-Devanagari", latinTest, devaTest, ec);
	3523	UnicodeString kataTest;
	3524	_trans("Latin-Katakana", latinTest, kataTest, ec);
	3525	if (U_FAILURE(ec)) {
	3526	errln("FAIL: Internal error");
	3527	return;
	3528	}
	3529	const UnicodeString tests[] = {
	3530	"Any", latinTest,
	3531	"Latin", latinTest,
	3532	"Halfwidth", latinTest,
	3533	"Devanagari", devaTest,
	3534	"Katakana", kataTest,
	3535	"" // END MARKER
	3536	};
	3537
	3538	UnicodeString test("The Quick Brown Fox Jumped Over The Lazy Dog.");
	3539	int32_t i = 0, j=0, k=0;
	3540	int32_t sources = Transliterator::countAvailableSources();
	3541	for (i = 0; i < sources; i++) {
	3542	UnicodeString source;
	3543	Transliterator::getAvailableSource(i, source);
	3544	UnicodeString test = _findMatch(source, tests);
	3545	if (test.length() == 0) {
	3546	logln((UnicodeString)"Skipping " + source + "-X");
	3547	continue;
	3548	}
	3549	int32_t targets = Transliterator::countAvailableTargets(source);
	3550	for (j = 0; j < targets; j++) {
	3551	UnicodeString target;
	3552	Transliterator::getAvailableTarget(j, source, target);
	3553	int32_t variants = Transliterator::countAvailableVariants(source, target);
	3554	for (k =0; k< variants; k++) {
	3555	UnicodeString variant;
	3556	UParseError err;
	3557	UErrorCode status = U_ZERO_ERROR;
	3558
	3559	Transliterator::getAvailableVariant(k, source, target, variant);
	3560	UnicodeString id = source + "-" + target + "/" + variant;
	3561
	3562	Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, err, status);
	3563	if (U_FAILURE(status)) {
	3564	dataerrln((UnicodeString)"FAIL: Could not create " + id);
	3565	delete t;
	3566	continue;
	3567	}
	3568	status = U_ZERO_ERROR;
	3569	CheckIncrementalAux(t, test);
	3570
	3571	UnicodeString rev;
	3572	_trans(*t, test, rev);
	3573	Transliterator *inv = t->createInverse(status);
	3574	if (U_FAILURE(status)) {
	3575	// The following are forward-only, it is OK that creating an inverse will not work:
	3576	// 1. Devanagari-Arabic
	3577	// 2. Any-*/BGN
	3578	// 2a. Any-*/BGN_1981
	3579	// 3. Any-*/UNGEGN
	3580	// 4. Any-*/MNS
	3581	// If UCONFIG_NO_BREAK_ITERATION is on, Latin-Thai is also not expected to work.
	3582	if ( id.compare((UnicodeString)"Devanagari-Arabic/") != 0
	3583	&& !(id.startsWith((UnicodeString)"Any-") &&
	3584	(id.endsWith((UnicodeString)"/BGN") \|\| id.endsWith((UnicodeString)"/BGN_1981") \|\| id.endsWith((UnicodeString)"/UNGEGN") \|\| id.endsWith((UnicodeString)"/MNS"))
	3585	)
	3586	#if UCONFIG_NO_BREAK_ITERATION
	3587	&& id.compare((UnicodeString)"Latin-Thai/") != 0
	3588	#endif
	3589	)
	3590	{
	3591	errln((UnicodeString)"FAIL: Could not create inverse of " + id);
	3592	}
	3593	delete t;
	3594	delete inv;
	3595	continue;
	3596	}
	3597	CheckIncrementalAux(inv, rev);
	3598	delete t;
	3599	delete inv;
	3600	}
	3601	}
	3602	}
	3603	}
	3604
	3605	void TransliteratorTest::CheckIncrementalAux(const Transliterator* t,
	3606	const UnicodeString& input) {
	3607	UErrorCode ec = U_ZERO_ERROR;
	3608	UTransPosition pos;
	3609	UnicodeString test = input;
	3610
	3611	pos.contextStart = 0;
	3612	pos.contextLimit = input.length();
	3613	pos.start = 0;
	3614	pos.limit = input.length();
	3615
	3616	t->transliterate(test, pos, ec);
	3617	if (U_FAILURE(ec)) {
	3618	errln((UnicodeString)"FAIL: transliterate() error " + u_errorName(ec));
	3619	return;
	3620	}
	3621	UBool gotError = FALSE;
	3622	(void)gotError; // Suppress set but not used warning.
	3623
	3624	// we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X?
	3625
	3626	if (pos.start == 0 && pos.limit != 0 && t->getID() != "Hex-Any/Unicode") {
	3627	errln((UnicodeString)"No Progress, " +
	3628	t->getID() + ": " + formatInput(test, input, pos));
	3629	gotError = TRUE;
	3630	} else {
	3631	logln((UnicodeString)"PASS Progress, " +
	3632	t->getID() + ": " + formatInput(test, input, pos));
	3633	}
	3634	t->finishTransliteration(test, pos);
	3635	if (pos.start != pos.limit) {
	3636	errln((UnicodeString)"Incomplete, " +
	3637	t->getID() + ": " + formatInput(test, input, pos));
	3638	gotError = TRUE;
	3639	}
	3640	}
	3641
	3642	void TransliteratorTest::TestFunction() {
	3643	// Careful with spacing and ';' here: Phrase this exactly
	3644	// as toRules() is going to return it. If toRules() changes
	3645	// with regard to spacing or ';', then adjust this string.
	3646	UnicodeString rule =
	3647	"([:Lu:]) > $1 '(' &Lower( $1 ) '=' &Hex( &Any-Lower( $1 ) ) ')';";
	3648
	3649	UParseError pe;
	3650	UErrorCode ec = U_ZERO_ERROR;
	3651	Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
	3652	if (t == NULL) {
	3653	dataerrln("FAIL: createFromRules failed - %s", u_errorName(ec));
	3654	return;
	3655	}
	3656
	3657	UnicodeString r;
	3658	t->toRules(r, TRUE);
	3659	if (r == rule) {
	3660	logln((UnicodeString)"OK: toRules() => " + r);
	3661	} else {
	3662	errln((UnicodeString)"FAIL: toRules() => " + r +
	3663	", expected " + rule);
	3664	}
	3665
	3666	expect(*t, "The Quick Brown Fox",
	3667	UNICODE_STRING_SIMPLE("T(t=\\u0074)he Q(q=\\u0071)uick B(b=\\u0062)rown F(f=\\u0066)ox"));
	3668
	3669	delete t;
	3670	}
	3671
	3672	void TransliteratorTest::TestInvalidBackRef(void) {
	3673	UnicodeString rule = ". > $1;";
	3674	UnicodeString rule2 =CharsToUnicodeString("(.) <> &hex/unicode($1) &name($1); . > $1; [{}] >\\u0020;");
	3675	UParseError pe;
	3676	UErrorCode ec = U_ZERO_ERROR;
	3677	Transliterator *t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
	3678	Transliterator *t2 = Transliterator::createFromRules("Test2", rule2, UTRANS_FORWARD, pe, ec);
	3679
	3680	if (t != NULL) {
	3681	errln("FAIL: createFromRules should have returned NULL");
	3682	delete t;
	3683	}
	3684
	3685	if (t2 != NULL) {
	3686	errln("FAIL: createFromRules should have returned NULL");
	3687	delete t2;
	3688	}
	3689
	3690	if (U_SUCCESS(ec)) {
	3691	errln("FAIL: Ok: . > $1; => no error");
	3692	} else {
	3693	logln((UnicodeString)"Ok: . > $1; => " + u_errorName(ec));
	3694	}
	3695	}
	3696
	3697	void TransliteratorTest::TestMulticharStringSet() {
	3698	// Basic testing
	3699	const char* rule =
	3700	" [{aa}] > x;"
	3701	" a > y;"
	3702	" [b{bc}] > z;"
	3703	"[{gd}] { e > q;"
	3704	" e } [{fg}] > r;" ;
	3705
	3706	UParseError pe;
	3707	UErrorCode ec = U_ZERO_ERROR;
	3708	Transliterator* t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
	3709	if (t == NULL \|\| U_FAILURE(ec)) {
	3710	delete t;
	3711	errln("FAIL: createFromRules failed");
	3712	return;
	3713	}
	3714
	3715	expect(*t, "a aa ab bc d gd de gde gdefg ddefg",
	3716	"y x yz z d gd de gdq gdqfg ddrfg");
	3717	delete t;
	3718
	3719	// Overlapped string test. Make sure that when multiple
	3720	// strings can match that the longest one is matched.
	3721	rule =
	3722	" [a {ab} {abc}] > x;"
	3723	" b > y;"
	3724	" c > z;"
	3725	" q [t {st} {rst}] { e > p;" ;
	3726
	3727	t = Transliterator::createFromRules("Test", rule, UTRANS_FORWARD, pe, ec);
	3728	if (t == NULL \|\| U_FAILURE(ec)) {
	3729	delete t;
	3730	errln("FAIL: createFromRules failed");
	3731	return;
	3732	}
	3733
	3734	expect(*t, "a ab abc qte qste qrste",
	3735	"x x x qtp qstp qrstp");
	3736	delete t;
	3737	}
	3738
	3739	// vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
	3740	// BEGIN TestUserFunction support factory
	3741
	3742	Transliterator* _TUFF[4];
	3743	UnicodeString* _TUFID[4];
	3744
	3745	static Transliterator* U_EXPORT2 _TUFFactory(const UnicodeString& /ID/,
	3746	Transliterator::Token context) {
	3747	return _TUFF[context.integer]->clone();
	3748	}
	3749
	3750	static void _TUFReg(const UnicodeString& ID, Transliterator* t, int32_t n) {
	3751	_TUFF[n] = t;
	3752	_TUFID[n] = new UnicodeString(ID);
	3753	Transliterator::registerFactory(ID, _TUFFactory, Transliterator::integerToken(n));
	3754	}
	3755
	3756	static void _TUFUnreg(int32_t n) {
	3757	if (_TUFF[n] != NULL) {
	3758	Transliterator::unregister(*_TUFID[n]);
	3759	delete _TUFF[n];
	3760	delete _TUFID[n];
	3761	}
	3762	}
	3763
	3764	// END TestUserFunction support factory
	3765	// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
	3766
	3767	/**
	3768	* Test that user-registered transliterators can be used under function
	3769	* syntax.
	3770	*/
	3771	void TransliteratorTest::TestUserFunction() {
	3772
	3773	Transliterator* t;
	3774	UParseError pe;
	3775	UErrorCode ec = U_ZERO_ERROR;
	3776
	3777	// Setup our factory
	3778	int32_t i;
	3779	for (i=0; i<4; ++i) {
	3780	_TUFF[i] = NULL;
	3781	}
	3782
	3783	// There's no need to register inverses if we don't use them
	3784	t = Transliterator::createFromRules("gif",
	3785	UNICODE_STRING_SIMPLE("'\\'u(..)(..) > '<img src=\"http://www.unicode.org/gifs/24/' $1 '/U' $1$2 '.gif\">';"),
	3786	UTRANS_FORWARD, pe, ec);
	3787	if (t == NULL \|\| U_FAILURE(ec)) {
	3788	dataerrln((UnicodeString)"FAIL: createFromRules gif " + u_errorName(ec));
	3789	return;
	3790	}
	3791	_TUFReg("Any-gif", t, 0);
	3792
	3793	t = Transliterator::createFromRules("RemoveCurly",
	3794	UNICODE_STRING_SIMPLE("[\\{\\}] > ; '\\N' > ;"),
	3795	UTRANS_FORWARD, pe, ec);
	3796	if (t == NULL \|\| U_FAILURE(ec)) {
	3797	errln((UnicodeString)"FAIL: createFromRules RemoveCurly " + u_errorName(ec));
	3798	goto FAIL;
	3799	}
	3800	expect(*t, UNICODE_STRING_SIMPLE("\\N{name}"), "name");
	3801	_TUFReg("Any-RemoveCurly", t, 1);
	3802
	3803	logln("Trying &hex");
	3804	t = Transliterator::createFromRules("hex2",
	3805	"(.) > &hex($1);",
	3806	UTRANS_FORWARD, pe, ec);
	3807	if (t == NULL \|\| U_FAILURE(ec)) {
	3808	errln("FAIL: createFromRules");
	3809	goto FAIL;
	3810	}
	3811	logln("Registering");
	3812	_TUFReg("Any-hex2", t, 2);
	3813	t = Transliterator::createInstance("Any-hex2", UTRANS_FORWARD, ec);
	3814	if (t == NULL \|\| U_FAILURE(ec)) {
	3815	errln((UnicodeString)"FAIL: createInstance Any-hex2 " + u_errorName(ec));
	3816	goto FAIL;
	3817	}
	3818	expect(*t, "abc", UNICODE_STRING_SIMPLE("\\u0061\\u0062\\u0063"));
	3819	delete t;
	3820
	3821	logln("Trying &gif");
	3822	t = Transliterator::createFromRules("gif2",
	3823	"(.) > &Gif(&Hex2($1));",
	3824	UTRANS_FORWARD, pe, ec);
	3825	if (t == NULL \|\| U_FAILURE(ec)) {
	3826	errln((UnicodeString)"FAIL: createFromRules gif2 " + u_errorName(ec));
	3827	goto FAIL;
	3828	}
	3829	logln("Registering");
	3830	_TUFReg("Any-gif2", t, 3);
	3831	t = Transliterator::createInstance("Any-gif2", UTRANS_FORWARD, ec);
	3832	if (t == NULL \|\| U_FAILURE(ec)) {
	3833	errln((UnicodeString)"FAIL: createInstance Any-gif2 " + u_errorName(ec));
	3834	goto FAIL;
	3835	}
	3836	expect(*t, "ab", "<img src=\"http://www.unicode.org/gifs/24/00/U0061.gif\">"
	3837	"<img src=\"http://www.unicode.org/gifs/24/00/U0062.gif\">");
	3838	delete t;
	3839
	3840	// Test that filters are allowed after &
	3841	t = Transliterator::createFromRules("test",
	3842	"(.) > &Hex($1) ' ' &RemoveCurly(&Name($1)) ' ';",
	3843	UTRANS_FORWARD, pe, ec);
	3844	if (t == NULL \|\| U_FAILURE(ec)) {
	3845	errln((UnicodeString)"FAIL: createFromRules test " + u_errorName(ec));
	3846	goto FAIL;
	3847	}
	3848	expect(*t, "abc",
	3849	UNICODE_STRING_SIMPLE("\\u0061 LATIN SMALL LETTER A \\u0062 LATIN SMALL LETTER B \\u0063 LATIN SMALL LETTER C "));
	3850	delete t;
	3851
	3852	FAIL:
	3853	for (i=0; i<4; ++i) {
	3854	_TUFUnreg(i);
	3855	}
	3856	}
	3857
	3858	/**
	3859	* Test the Any-X transliterators.
	3860	*/
	3861	void TransliteratorTest::TestAnyX(void) {
	3862	UParseError parseError;
	3863	UErrorCode status = U_ZERO_ERROR;
	3864	Transliterator* anyLatin =
	3865	Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
	3866	if (anyLatin==0) {
	3867	dataerrln("FAIL: createInstance returned NULL - %s", u_errorName(status));
	3868	delete anyLatin;
	3869	return;
	3870	}
	3871
	3872	expect(*anyLatin,
	3873	CharsToUnicodeString("greek:\\u03B1\\u03B2\\u03BA\\u0391\\u0392\\u039A hiragana:\\u3042\\u3076\\u304F cyrillic:\\u0430\\u0431\\u0446"),
	3874	CharsToUnicodeString("greek:abkABK hiragana:abuku cyrillic:abc"));
	3875
	3876	delete anyLatin;
	3877
	3878	status = U_ZERO_ERROR;
	3879	Transliterator* anyASCII =
	3880	Transliterator::createInstance("Any-Latin;Latin-ASCII", UTRANS_FORWARD, parseError, status);
	3881	if (U_FAILURE(status) \|\| anyASCII==0) {
	3882	dataerrln("FAIL: createInstance returned NULL and/or set status %s", u_errorName(status));
	3883	delete anyASCII;
	3884	return;
	3885	}
	3886
	3887	expect(*anyASCII,
	3888	CharsToUnicodeString("ArabicDigits:\\u0660\\u0661\\u0664\\u0669 PersianDigits:\\u06F0\\u06F1\\u06F4\\u06F9"),
	3889	CharsToUnicodeString("ArabicDigits:0149 PersianDigits:0149"));
	3890
	3891	delete anyASCII;
	3892	}
	3893
	3894	/**
	3895	* Test Any-X transliterators with sample letters from all scripts.
	3896	*/
	3897	void TransliteratorTest::TestAny(void) {
	3898	UErrorCode status = U_ZERO_ERROR;
	3899	// Note: there is a lot of implict construction of UnicodeStrings from (char *) in
	3900	// function call parameters going on in this test.
	3901	UnicodeSet alphabetic("[:alphabetic:]", status);
	3902	if (U_FAILURE(status)) {
	3903	dataerrln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
	3904	return;
	3905	}
	3906	alphabetic.freeze();
	3907
	3908	UnicodeString testString;
	3909	for (int32_t i = 0; i < USCRIPT_CODE_LIMIT; i++) {
	3910	const char *scriptName = uscript_getShortName((UScriptCode)i);
	3911	if (scriptName == NULL) {
	3912	errln("Failure: file %s, line %d: Script Code %d is invalid, ", __FILE__, __LINE__, i);
	3913	return;
	3914	}
	3915
	3916	UnicodeSet sample;
	3917	sample.applyPropertyAlias("script", scriptName, status);
	3918	if (U_FAILURE(status)) {
	3919	errln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
	3920	return;
	3921	}
	3922	sample.retainAll(alphabetic);
	3923	for (int32_t count=0; count<5; count++) {
	3924	UChar32 c = sample.charAt(count);
	3925	if (c == -1) {
	3926	break;
	3927	}
	3928	testString.append(c);
	3929	}
	3930	}
	3931
	3932	UParseError parseError;
	3933	Transliterator* anyLatin =
	3934	Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
	3935	if (U_FAILURE(status)) {
	3936	dataerrln("Failure: file %s, line %d, status = %s", __FILE__, __LINE__, u_errorName(status));
	3937	return;
	3938	}
	3939
	3940	logln(UnicodeString("Sample set for Any-Latin: ") + testString);
	3941	anyLatin->transliterate(testString);
	3942	logln(UnicodeString("Sample result for Any-Latin: ") + testString);
	3943	delete anyLatin;
	3944	}
	3945
	3946
	3947	/**
	3948	* Test the source and target set API. These are only implemented
	3949	* for RBT and CompoundTransliterator at this time.
	3950	*/
	3951	void TransliteratorTest::TestSourceTargetSet() {
	3952	UErrorCode ec = U_ZERO_ERROR;
	3953
	3954	// Rules
	3955	const char* r =
	3956	"a > b; "
	3957	"r [x{lu}] > q;";
	3958
	3959	// Expected source
	3960	UnicodeSet expSrc("[arx{lu}]", ec);
	3961
	3962	// Expected target
	3963	UnicodeSet expTrg("[bq]", ec);
	3964
	3965	UParseError pe;
	3966	Transliterator* t = Transliterator::createFromRules("test", r, UTRANS_FORWARD, pe, ec);
	3967
	3968	if (U_FAILURE(ec)) {
	3969	delete t;
	3970	errln("FAIL: Couldn't set up test");
	3971	return;
	3972	}
	3973
	3974	UnicodeSet src; t->getSourceSet(src);
	3975	UnicodeSet trg; t->getTargetSet(trg);
	3976
	3977	if (src == expSrc && trg == expTrg) {
	3978	UnicodeString a, b;
	3979	logln((UnicodeString)"Ok: " +
	3980	r + " => source = " + src.toPattern(a, TRUE) +
	3981	", target = " + trg.toPattern(b, TRUE));
	3982	} else {
	3983	UnicodeString a, b, c, d;
	3984	errln((UnicodeString)"FAIL: " +
	3985	r + " => source = " + src.toPattern(a, TRUE) +
	3986	", expected " + expSrc.toPattern(b, TRUE) +
	3987	"; target = " + trg.toPattern(c, TRUE) +
	3988	", expected " + expTrg.toPattern(d, TRUE));
	3989	}
	3990
	3991	delete t;
	3992	}
	3993
	3994	/**
	3995	* Test handling of Pattern_White_Space, for both RBT and UnicodeSet.
	3996	*/
	3997	void TransliteratorTest::TestPatternWhiteSpace() {
	3998	// Rules
	3999	const char* r = "a > \\u200E b;";
	4000
	4001	UErrorCode ec = U_ZERO_ERROR;
	4002	UParseError pe;
	4003	Transliterator* t = Transliterator::createFromRules("test", CharsToUnicodeString(r), UTRANS_FORWARD, pe, ec);
	4004
	4005	if (U_FAILURE(ec)) {
	4006	errln("FAIL: Couldn't set up test");
	4007	} else {
	4008	expect(*t, "a", "b");
	4009	}
	4010	delete t;
	4011
	4012	// UnicodeSet
	4013	ec = U_ZERO_ERROR;
	4014	UnicodeSet set(CharsToUnicodeString("[a \\u200E]"), ec);
	4015
	4016	if (U_FAILURE(ec)) {
	4017	errln("FAIL: Couldn't set up test");
	4018	} else {
	4019	if (set.contains(0x200E)) {
	4020	errln("FAIL: U+200E not being ignored by UnicodeSet");
	4021	}
	4022	}
	4023	}
	4024	//======================================================================
	4025	// this method is in TestUScript.java
	4026	//======================================================================
	4027	void TransliteratorTest::TestAllCodepoints(){
	4028	UScriptCode code= USCRIPT_INVALID_CODE;
	4029	char id[256]={'\0'};
	4030	char abbr[256]={'\0'};
	4031	char newId[256]={'\0'};
	4032	char newAbbrId[256]={'\0'};
	4033	char oldId[256]={'\0'};
	4034	char oldAbbrId[256]={'\0'};
	4035
	4036	UErrorCode status =U_ZERO_ERROR;
	4037	UParseError pe;
	4038
	4039	for(uint32_t i = 0; i<=0x10ffff; i++){
	4040	code = uscript_getScript(i,&status);
	4041	if(code == USCRIPT_INVALID_CODE){
	4042	dataerrln("uscript_getScript for codepoint \\U%08X failed.", i);
	4043	}
	4044	const char* myId = uscript_getName(code);
	4045	if(!myId) {
	4046	dataerrln("Valid script code returned NULL name. Check your data!");
	4047	return;
	4048	}
	4049	uprv_strcpy(id,myId);
	4050	uprv_strcpy(abbr,uscript_getShortName(code));
	4051
	4052	uprv_strcpy(newId,"[:");
	4053	uprv_strcat(newId,id);
	4054	uprv_strcat(newId,":];NFD");
	4055
	4056	uprv_strcpy(newAbbrId,"[:");
	4057	uprv_strcat(newAbbrId,abbr);
	4058	uprv_strcat(newAbbrId,":];NFD");
	4059
	4060	if(uprv_strcmp(newId,oldId)!=0){
	4061	Transliterator* t = Transliterator::createInstance(newId,UTRANS_FORWARD,pe,status);
	4062	if(t==NULL \|\| U_FAILURE(status)){
	4063	dataerrln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(status));
	4064	}
	4065	delete t;
	4066	}
	4067	if(uprv_strcmp(newAbbrId,oldAbbrId)!=0){
	4068	Transliterator* t = Transliterator::createInstance(newAbbrId,UTRANS_FORWARD,pe,status);
	4069	if(t==NULL \|\| U_FAILURE(status)){
	4070	dataerrln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(status));
	4071	}
	4072	delete t;
	4073	}
	4074	uprv_strcpy(oldId,newId);
	4075	uprv_strcpy(oldAbbrId, newAbbrId);
	4076
	4077	}
	4078
	4079	}
	4080
	4081	#define TEST_TRANSLIT_ID(id, cls) UPRV_BLOCK_MACRO_BEGIN { \
	4082	UErrorCode ec = U_ZERO_ERROR; \
	4083	Transliterator* t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); \
	4084	if (U_FAILURE(ec)) { \
	4085	dataerrln("FAIL: Couldn't create %s - %s", id, u_errorName(ec)); \
	4086	} else { \
	4087	if (t->getDynamicClassID() != cls::getStaticClassID()) { \
	4088	errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
	4089	} \
	4090	/* t = t; / /can't do this: coverage test for assignment op*/ \
	4091	} \
	4092	delete t; \
	4093	} UPRV_BLOCK_MACRO_END
	4094
	4095	#define TEST_TRANSLIT_RULE(rule, cls) UPRV_BLOCK_MACRO_BEGIN { \
	4096	UErrorCode ec = U_ZERO_ERROR; \
	4097	UParseError pe; \
	4098	Transliterator* t = Transliterator::createFromRules("_", rule, UTRANS_FORWARD, pe, ec); \
	4099	if (U_FAILURE(ec)) { \
	4100	errln("FAIL: Couldn't create " rule); \
	4101	} else { \
	4102	if (t->getDynamicClassID() != cls ::getStaticClassID()) { \
	4103	errln("FAIL: " #cls " dynamic and static class ID mismatch"); \
	4104	} \
	4105	/* t = t; / /can't do this: coverage test for assignment op*/ \
	4106	} \
	4107	delete t; \
	4108	} UPRV_BLOCK_MACRO_END
	4109
	4110	void TransliteratorTest::TestBoilerplate() {
	4111	TEST_TRANSLIT_ID("Any-Latin", AnyTransliterator);
	4112	TEST_TRANSLIT_ID("Any-Hex", EscapeTransliterator);
	4113	TEST_TRANSLIT_ID("Hex-Any", UnescapeTransliterator);
	4114	TEST_TRANSLIT_ID("Lower", LowercaseTransliterator);
	4115	TEST_TRANSLIT_ID("Upper", UppercaseTransliterator);
	4116	TEST_TRANSLIT_ID("Title", TitlecaseTransliterator);
	4117	TEST_TRANSLIT_ID("Null", NullTransliterator);
	4118	TEST_TRANSLIT_ID("Remove", RemoveTransliterator);
	4119	TEST_TRANSLIT_ID("Any-Name", UnicodeNameTransliterator);
	4120	TEST_TRANSLIT_ID("Name-Any", NameUnicodeTransliterator);
	4121	TEST_TRANSLIT_ID("NFD", NormalizationTransliterator);
	4122	TEST_TRANSLIT_ID("Latin-Greek", CompoundTransliterator);
	4123	TEST_TRANSLIT_RULE("a>b;", RuleBasedTransliterator);
	4124	}
	4125
	4126	void TransliteratorTest::TestAlternateSyntax() {
	4127	// U+2206 == &
	4128	// U+2190 == <
	4129	// U+2192 == >
	4130	// U+2194 == <>
	4131	expect(CharsToUnicodeString("a \\u2192 x; b \\u2190 y; c \\u2194 z"),
	4132	"abc",
	4133	"xbz");
	4134	expect(CharsToUnicodeString("([:^ASCII:]) \\u2192 \\u2206Name($1);"),
	4135	CharsToUnicodeString("<=\\u2190; >=\\u2192; <>=\\u2194; &=\\u2206"),
	4136	UNICODE_STRING_SIMPLE("<=\\N{LEFTWARDS ARROW}; >=\\N{RIGHTWARDS ARROW}; <>=\\N{LEFT RIGHT ARROW}; &=\\N{INCREMENT}"));
	4137	}
	4138
	4139	static const char* BEGIN_END_RULES[] = {
	4140	// [0]
	4141	"abc > xy;"
	4142	"aba > z;",
	4143
	4144	// [1]
	4145	/*
	4146	"::BEGIN;"
	4147	"abc > xy;"
	4148	"::END;"
	4149	"::BEGIN;"
	4150	"aba > z;"
	4151	"::END;",
	4152	*/
	4153	"", // test case commented out below, this is here to keep from messing up the indexes
	4154
	4155	// [2]
	4156	/*
	4157	"abc > xy;"
	4158	"::BEGIN;"
	4159	"aba > z;"
	4160	"::END;",
	4161	*/
	4162	"", // test case commented out below, this is here to keep from messing up the indexes
	4163
	4164	// [3]
	4165	/*
	4166	"::BEGIN;"
	4167	"abc > xy;"
	4168	"::END;"
	4169	"aba > z;",
	4170	*/
	4171	"", // test case commented out below, this is here to keep from messing up the indexes
	4172
	4173	// [4]
	4174	"abc > xy;"
	4175	"::Null;"
	4176	"aba > z;",
	4177
	4178	// [5]
	4179	"::Upper;"
	4180	"ABC > xy;"
	4181	"AB > x;"
	4182	"C > z;"
	4183	"::Upper;"
	4184	"XYZ > p;"
	4185	"XY > q;"
	4186	"Z > r;"
	4187	"::Upper;",
	4188
	4189	// [6]
	4190	"$ws = [[:Separator:][\\u0009-\\u000C]$];"
	4191	"$delim = [\\-$ws];"
	4192	"$ws $delim* > ' ';"
	4193	"'-' $delim* > '-';",
	4194
	4195	// [7]
	4196	"::Null;"
	4197	"$ws = [[:Separator:][\\u0009-\\u000C]$];"
	4198	"$delim = [\\-$ws];"
	4199	"$ws $delim* > ' ';"
	4200	"'-' $delim* > '-';",
	4201
	4202	// [8]
	4203	"$ws = [[:Separator:][\\u0009-\\u000C]$];"
	4204	"$delim = [\\-$ws];"
	4205	"$ws $delim* > ' ';"
	4206	"'-' $delim* > '-';"
	4207	"::Null;",
	4208
	4209	// [9]
	4210	"$ws = [[:Separator:][\\u0009-\\u000C]$];"
	4211	"$delim = [\\-$ws];"
	4212	"::Null;"
	4213	"$ws $delim* > ' ';"
	4214	"'-' $delim* > '-';",
	4215
	4216	// [10]
	4217	/*
	4218	"::BEGIN;"
	4219	"$ws = [[:Separator:][\\u0009-\\u000C]$];"
	4220	"$delim = [\\-$ws];"
	4221	"::END;"
	4222	"$ws $delim* > ' ';"
	4223	"'-' $delim* > '-';",
	4224	*/
	4225	"", // test case commented out below, this is here to keep from messing up the indexes
	4226
	4227	// [11]
	4228	/*
	4229	"$ws = [[:Separator:][\\u0009-\\u000C]$];"
	4230	"$delim = [\\-$ws];"
	4231	"::BEGIN;"
	4232	"$ws $delim* > ' ';"
	4233	"'-' $delim* > '-';"
	4234	"::END;",
	4235	*/
	4236	"", // test case commented out below, this is here to keep from messing up the indexes
	4237
	4238	// [12]
	4239	/*
	4240	"$ws = [[:Separator:][\\u0009-\\u000C]$];"
	4241	"$delim = [\\-$ws];"
	4242	"$ab = [ab];"
	4243	"::BEGIN;"
	4244	"$ws $delim* > ' ';"
	4245	"'-' $delim* > '-';"
	4246	"::END;"
	4247	"::BEGIN;"
	4248	"$ab { ' ' } $ab > '-';"
	4249	"c { ' ' > ;"
	4250	"::END;"
	4251	"::BEGIN;"
	4252	"'a-a' > a\\%\|a;"
	4253	"::END;",
	4254	*/
	4255	"", // test case commented out below, this is here to keep from messing up the indexes
	4256
	4257	// [13]
	4258	"$ws = [[:Separator:][\\u0009-\\u000C]$];"
	4259	"$delim = [\\-$ws];"
	4260	"$ab = [ab];"
	4261	"::Null;"
	4262	"$ws $delim* > ' ';"
	4263	"'-' $delim* > '-';"
	4264	"::Null;"
	4265	"$ab { ' ' } $ab > '-';"
	4266	"c { ' ' > ;"
	4267	"::Null;"
	4268	"'a-a' > a\\%\|a;",
	4269
	4270	// [14]
	4271	/*
	4272	"::[abc];"
	4273	"::BEGIN;"
	4274	"abc > xy;"
	4275	"::END;"
	4276	"::BEGIN;"
	4277	"aba > yz;"
	4278	"::END;"
	4279	"::Upper;",
	4280	*/
	4281	"", // test case commented out below, this is here to keep from messing up the indexes
	4282
	4283	// [15]
	4284	"::[abc];"
	4285	"abc > xy;"
	4286	"::Null;"
	4287	"aba > yz;"
	4288	"::Upper;",
	4289
	4290	// [16]
	4291	/*
	4292	"::[abc];"
	4293	"::BEGIN;"
	4294	"abc <> xy;"
	4295	"::END;"
	4296	"::BEGIN;"
	4297	"aba <> yz;"
	4298	"::END;"
	4299	"::Upper(Lower);"
	4300	"::([XYZ]);"
	4301	*/
	4302	"", // test case commented out below, this is here to keep from messing up the indexes
	4303
	4304	// [17]
	4305	"::[abc];"
	4306	"abc <> xy;"
	4307	"::Null;"
	4308	"aba <> yz;"
	4309	"::Upper(Lower);"
	4310	"::([XYZ]);"
	4311	};
	4312
	4313	/*
	4314	(This entire test is commented out below and will need some heavy revision when we re-add
	4315	the ::BEGIN/::END stuff)
	4316	static const char* BOGUS_BEGIN_END_RULES[] = {
	4317	// [7]
	4318	"::BEGIN;"
	4319	"abc > xy;"
	4320	"::BEGIN;"
	4321	"aba > z;"
	4322	"::END;"
	4323	"::END;",
	4324
	4325	// [8]
	4326	"abc > xy;"
	4327	" aba > z;"
	4328	"::END;",
	4329
	4330	// [9]
	4331	"::BEGIN;"
	4332	"::Upper;"
	4333	"::END;"
	4334	};
	4335	static const int32_t BOGUS_BEGIN_END_RULES_length = UPRV_LENGTHOF(BOGUS_BEGIN_END_RULES);
	4336	*/
	4337
	4338	static const char* BEGIN_END_TEST_CASES[] = {
	4339	// rules input expected output
	4340	BEGIN_END_RULES[0], "abc ababc aba", "xy zbc z",
	4341	// BEGIN_END_RULES[1], "abc ababc aba", "xy abxy z",
	4342	// BEGIN_END_RULES[2], "abc ababc aba", "xy abxy z",
	4343	// BEGIN_END_RULES[3], "abc ababc aba", "xy abxy z",
	4344	BEGIN_END_RULES[4], "abc ababc aba", "xy abxy z",
	4345	BEGIN_END_RULES[5], "abccabaacababcbc", "PXAARXQBR",
	4346
	4347	BEGIN_END_RULES[6], "e e - e---e- e", "e e e-e-e",
	4348	BEGIN_END_RULES[7], "e e - e---e- e", "e e e-e-e",
	4349	BEGIN_END_RULES[8], "e e - e---e- e", "e e e-e-e",
	4350	BEGIN_END_RULES[9], "e e - e---e- e", "e e e-e-e",
	4351	// BEGIN_END_RULES[10], "e e - e---e- e", "e e e-e-e",
	4352	// BEGIN_END_RULES[11], "e e - e---e- e", "e e e-e-e",
	4353	// BEGIN_END_RULES[12], "e e - e---e- e", "e e e-e-e",
	4354	// BEGIN_END_RULES[12], "a a a a", "a%a%a%a",
	4355	// BEGIN_END_RULES[12], "a a-b c b a", "a%a-b cb-a",
	4356	BEGIN_END_RULES[13], "e e - e---e- e", "e e e-e-e",
	4357	BEGIN_END_RULES[13], "a a a a", "a%a%a%a",
	4358	BEGIN_END_RULES[13], "a a-b c b a", "a%a-b cb-a",
	4359
	4360	// BEGIN_END_RULES[14], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
	4361	BEGIN_END_RULES[15], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
	4362	// BEGIN_END_RULES[16], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ",
	4363	BEGIN_END_RULES[17], "abc xy ababc xyz aba", "XY xy ABXY xyz YZ"
	4364	};
	4365	static const int32_t BEGIN_END_TEST_CASES_length = UPRV_LENGTHOF(BEGIN_END_TEST_CASES);
	4366
	4367	void TransliteratorTest::TestBeginEnd() {
	4368	// run through the list of test cases above
	4369	int32_t i = 0;
	4370	for (i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
	4371	expect((UnicodeString)"Test case #" + (i / 3),
	4372	UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
	4373	UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
	4374	UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
	4375	}
	4376
	4377	// instantiate the one reversible rule set in the reverse direction and make sure it does the right thing
	4378	UParseError parseError;
	4379	UErrorCode status = U_ZERO_ERROR;
	4380	Transliterator* reversed = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
	4381	UTRANS_REVERSE, parseError, status);
	4382	if (reversed == 0 \|\| U_FAILURE(status)) {
	4383	reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
	4384	} else {
	4385	expect(*reversed, UnicodeString("xy XY XYZ yz YZ"), UnicodeString("xy abc xaba yz aba"));
	4386	}
	4387	delete reversed;
	4388
	4389	// finally, run through the list of syntactically-ill-formed rule sets above and make sure
	4390	// that all of them cause errors
	4391	/*
	4392	(commented out until we have the real ::BEGIN/::END stuff in place
	4393	for (i = 0; i < BOGUS_BEGIN_END_RULES_length; i++) {
	4394	UParseError parseError;
	4395	UErrorCode status = U_ZERO_ERROR;
	4396	Transliterator* t = Transliterator::createFromRules("foo", UnicodeString(BOGUS_BEGIN_END_RULES[i]),
	4397	UTRANS_FORWARD, parseError, status);
	4398	if (!U_FAILURE(status)) {
	4399	delete t;
	4400	errln((UnicodeString)"Should have gotten syntax error from " + BOGUS_BEGIN_END_RULES[i]);
	4401	}
	4402	}
	4403	*/
	4404	}
	4405
	4406	void TransliteratorTest::TestBeginEndToRules() {
	4407	// run through the same list of test cases we used above, but this time, instead of just
	4408	// instantiating a Transliterator from the rules and running the test against it, we instantiate
	4409	// a Transliterator from the rules, do toRules() on it, instantiate a Transliterator from
	4410	// the resulting set of rules, and make sure that the generated rule set is semantically equivalent
	4411	// to (i.e., does the same thing as) the original rule set
	4412	for (int32_t i = 0; i < BEGIN_END_TEST_CASES_length; i += 3) {
	4413	UParseError parseError;
	4414	UErrorCode status = U_ZERO_ERROR;
	4415	Transliterator* t = Transliterator::createFromRules("--", UnicodeString(BEGIN_END_TEST_CASES[i], -1, US_INV),
	4416	UTRANS_FORWARD, parseError, status);
	4417	if (U_FAILURE(status)) {
	4418	reportParseError(UnicodeString("FAIL: Couldn't create transliterator"), parseError, status);
	4419	} else {
	4420	UnicodeString rules;
	4421	t->toRules(rules, TRUE);
	4422	Transliterator* t2 = Transliterator::createFromRules((UnicodeString)"Test case #" + (i / 3), rules,
	4423	UTRANS_FORWARD, parseError, status);
	4424	if (U_FAILURE(status)) {
	4425	reportParseError(UnicodeString("FAIL: Couldn't create transliterator from generated rules"),
	4426	parseError, status);
	4427	delete t;
	4428	} else {
	4429	expect(*t2,
	4430	UnicodeString(BEGIN_END_TEST_CASES[i + 1], -1, US_INV),
	4431	UnicodeString(BEGIN_END_TEST_CASES[i + 2], -1, US_INV));
	4432	delete t;
	4433	delete t2;
	4434	}
	4435	}
	4436	}
	4437
	4438	// do the same thing for the reversible test case
	4439	UParseError parseError;
	4440	UErrorCode status = U_ZERO_ERROR;
	4441	Transliterator* reversed = Transliterator::createFromRules("Reversed", UnicodeString(BEGIN_END_RULES[17]),
	4442	UTRANS_REVERSE, parseError, status);
	4443	if (U_FAILURE(status)) {
	4444	reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator"), parseError, status);
	4445	} else {
	4446	UnicodeString rules;
	4447	reversed->toRules(rules, FALSE);
	4448	Transliterator* reversed2 = Transliterator::createFromRules("Reversed", rules, UTRANS_FORWARD,
	4449	parseError, status);
	4450	if (U_FAILURE(status)) {
	4451	reportParseError(UnicodeString("FAIL: Couldn't create reversed transliterator from generated rules"),
	4452	parseError, status);
	4453	delete reversed;
	4454	} else {
	4455	expect(*reversed2,
	4456	UnicodeString("xy XY XYZ yz YZ"),
	4457	UnicodeString("xy abc xaba yz aba"));
	4458	delete reversed;
	4459	delete reversed2;
	4460	}
	4461	}
	4462	}
	4463
	4464	void TransliteratorTest::TestRegisterAlias() {
	4465	UnicodeString longID("Lower;[aeiou]Upper");
	4466	UnicodeString shortID("Any-CapVowels");
	4467	UnicodeString reallyShortID("CapVowels");
	4468
	4469	Transliterator::registerAlias(shortID, longID);
	4470
	4471	UErrorCode err = U_ZERO_ERROR;
	4472	Transliterator* t1 = Transliterator::createInstance(longID, UTRANS_FORWARD, err);
	4473	if (U_FAILURE(err)) {
	4474	errln("Failed to instantiate transliterator with long ID");
	4475	Transliterator::unregister(shortID);
	4476	return;
	4477	}
	4478	Transliterator* t2 = Transliterator::createInstance(reallyShortID, UTRANS_FORWARD, err);
	4479	if (U_FAILURE(err)) {
	4480	errln("Failed to instantiate transliterator with short ID");
	4481	delete t1;
	4482	Transliterator::unregister(shortID);
	4483	return;
	4484	}
	4485
	4486	if (t1->getID() != longID)
	4487	errln("Transliterator instantiated with long ID doesn't have long ID");
	4488	if (t2->getID() != reallyShortID)
	4489	errln("Transliterator instantiated with short ID doesn't have short ID");
	4490
	4491	UnicodeString rules1;
	4492	UnicodeString rules2;
	4493
	4494	t1->toRules(rules1, TRUE);
	4495	t2->toRules(rules2, TRUE);
	4496	if (rules1 != rules2)
	4497	errln("Alias transliterators aren't the same");
	4498
	4499	delete t1;
	4500	delete t2;
	4501	Transliterator::unregister(shortID);
	4502
	4503	t1 = Transliterator::createInstance(shortID, UTRANS_FORWARD, err);
	4504	if (U_SUCCESS(err)) {
	4505	errln("Instantiation with short ID succeeded after short ID was unregistered");
	4506	delete t1;
	4507	}
	4508
	4509	// try the same thing again, but this time with something other than
	4510	// an instance of CompoundTransliterator
	4511	UnicodeString realID("Latin-Greek");
	4512	UnicodeString fakeID("Latin-dlgkjdflkjdl");
	4513	Transliterator::registerAlias(fakeID, realID);
	4514
	4515	err = U_ZERO_ERROR;
	4516	t1 = Transliterator::createInstance(realID, UTRANS_FORWARD, err);
	4517	if (U_FAILURE(err)) {
	4518	dataerrln("Failed to instantiate transliterator with real ID - %s", u_errorName(err));
	4519	Transliterator::unregister(realID);
	4520	return;
	4521	}
	4522	t2 = Transliterator::createInstance(fakeID, UTRANS_FORWARD, err);
	4523	if (U_FAILURE(err)) {
	4524	errln("Failed to instantiate transliterator with fake ID");
	4525	delete t1;
	4526	Transliterator::unregister(realID);
	4527	return;
	4528	}
	4529
	4530	t1->toRules(rules1, TRUE);
	4531	t2->toRules(rules2, TRUE);
	4532	if (rules1 != rules2)
	4533	errln("Alias transliterators aren't the same");
	4534
	4535	delete t1;
	4536	delete t2;
	4537	Transliterator::unregister(fakeID);
	4538	}
	4539
	4540	void TransliteratorTest::TestRuleStripping() {
	4541	/*
	4542	#
	4543	\uE001>\u0C01; # SIGN
	4544	*/
	4545	static const UChar rule[] = {
	4546	0x0023,0x0020,0x000D,0x000A,
	4547	0xE001,0x003E,0x0C01,0x003B,0x0020,0x0023,0x0020,0x0053,0x0049,0x0047,0x004E,0
	4548	};
	4549	static const UChar expectedRule[] = {
	4550	0xE001,0x003E,0x0C01,0x003B,0
	4551	};
	4552	UChar result[UPRV_LENGTHOF(rule)];
	4553	UErrorCode status = U_ZERO_ERROR;
	4554	int32_t len = utrans_stripRules(rule, UPRV_LENGTHOF(rule), result, &status);
	4555	if (len != u_strlen(expectedRule)) {
	4556	errln("utrans_stripRules return len = %d", len);
	4557	}
	4558	if (u_strncmp(expectedRule, result, len) != 0) {
	4559	errln("utrans_stripRules did not return expected string");
	4560	}
	4561	}
	4562
	4563	/**
	4564	* Test the Halfwidth-Fullwidth transliterator (ticket 6281).
	4565	*/
	4566	void TransliteratorTest::TestHalfwidthFullwidth(void) {
	4567	UParseError parseError;
	4568	UErrorCode status = U_ZERO_ERROR;
	4569	Transliterator* hf = Transliterator::createInstance("Halfwidth-Fullwidth", UTRANS_FORWARD, parseError, status);
	4570	Transliterator* fh = Transliterator::createInstance("Fullwidth-Halfwidth", UTRANS_FORWARD, parseError, status);
	4571	if (hf == 0 \|\| fh == 0) {
	4572	dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
	4573	delete hf;
	4574	delete fh;
	4575	return;
	4576	}
	4577
	4578	// Array of 2n items
	4579	// Each item is
	4580	// "hf"\|"fh"\|"both",
	4581	// <Halfwidth>,
	4582	// <Fullwidth>
	4583	const char* DATA[] = {
	4584	"both",
	4585	"\\uFFE9\\uFFEA\\uFFEB\\uFFEC\\u0061\\uFF71\\u00AF\\u0020",
	4586	"\\u2190\\u2191\\u2192\\u2193\\uFF41\\u30A2\\uFFE3\\u3000",
	4587	};
	4588	int32_t DATA_length = UPRV_LENGTHOF(DATA);
	4589
	4590	for (int32_t i=0; i<DATA_length; i+=3) {
	4591	UnicodeString h = CharsToUnicodeString(DATA[i+1]);
	4592	UnicodeString f = CharsToUnicodeString(DATA[i+2]);
	4593	switch (*DATA[i]) {
	4594	case 0x68: //'h': // Halfwidth-Fullwidth only
	4595	expect(*hf, h, f);
	4596	break;
	4597	case 0x66: //'f': // Fullwidth-Halfwidth only
	4598	expect(*fh, f, h);
	4599	break;
	4600	case 0x62: //'b': // both directions
	4601	expect(*hf, h, f);
	4602	expect(*fh, f, h);
	4603	break;
	4604	}
	4605	}
	4606	delete hf;
	4607	delete fh;
	4608	}
	4609
	4610
	4611	/**
	4612	* Test Thai. The text is the first paragraph of "What is Unicode" from the Unicode.org web site.
	4613	* TODO: confirm that the expected results are correct.
	4614	* For now, test just confirms that C++ and Java give identical results.
	4615	*/
	4616	void TransliteratorTest::TestThai(void) {
	4617	#if !UCONFIG_NO_BREAK_ITERATION
	4618	UParseError parseError;
	4619	UErrorCode status = U_ZERO_ERROR;
	4620	Transliterator* tr = Transliterator::createInstance("Any-Latin", UTRANS_FORWARD, parseError, status);
	4621	if (tr == 0) {
	4622	dataerrln("FAIL: createInstance failed - %s", u_errorName(status));
	4623	return;
	4624	}
	4625	if (U_FAILURE(status)) {
	4626	errln("FAIL: createInstance failed with %s", u_errorName(status));
	4627	return;
	4628	}
	4629	const char *thaiText =
	4630	"\\u0e42\\u0e14\\u0e22\\u0e1e\\u0e37\\u0e49\\u0e19\\u0e10\\u0e32\\u0e19\\u0e41\\u0e25\\u0e49\\u0e27, \\u0e04\\u0e2d"
	4631	"\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d\\u0e23\\u0e4c\\u0e08\\u0e30\\u0e40\\u0e01\\u0e35\\u0e48\\u0e22"
	4632	"\\u0e27\\u0e02\\u0e49\\u0e2d\\u0e07\\u0e01\\u0e31\\u0e1a\\u0e40\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e02\\u0e2d"
	4633	"\\u0e07\\u0e15\\u0e31\\u0e27\\u0e40\\u0e25\\u0e02. \\u0e04\\u0e2d\\u0e21\\u0e1e\\u0e34\\u0e27\\u0e40\\u0e15\\u0e2d"
	4634	"\\u0e23\\u0e4c\\u0e08\\u0e31\\u0e14\\u0e40\\u0e01\\u0e47\\u0e1a\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29"
	4635	"\\u0e23\\u0e41\\u0e25\\u0e30\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30\\u0e2d\\u0e37\\u0e48\\u0e19\\u0e46 \\u0e42"
	4636	"\\u0e14\\u0e22\\u0e01\\u0e32\\u0e23\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25"
	4637	"\\u0e02\\u0e43\\u0e2b\\u0e49\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e41\\u0e15\\u0e48\\u0e25\\u0e30\\u0e15"
	4638	"\\u0e31\\u0e27. \\u0e01\\u0e48\\u0e2d\\u0e19\\u0e2b\\u0e19\\u0e49\\u0e32\\u0e17\\u0e35\\u0e48\\u0e4a Unicode \\u0e08"
	4639	"\\u0e30\\u0e16\\u0e39\\u0e01\\u0e2a\\u0e23\\u0e49\\u0e32\\u0e07\\u0e02\\u0e36\\u0e49\\u0e19, \\u0e44\\u0e14\\u0e49"
	4640	"\\u0e21\\u0e35\\u0e23\\u0e30\\u0e1a\\u0e1a encoding \\u0e2d\\u0e22\\u0e39\\u0e48\\u0e2b\\u0e25\\u0e32\\u0e22\\u0e23"
	4641	"\\u0e49\\u0e2d\\u0e22\\u0e23\\u0e30\\u0e1a\\u0e1a\\u0e2a\\u0e33\\u0e2b\\u0e23\\u0e31\\u0e1a\\u0e01\\u0e32\\u0e23"
	4642	"\\u0e01\\u0e33\\u0e2b\\u0e19\\u0e14\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e40\\u0e25\\u0e02\\u0e40\\u0e2b\\u0e25\\u0e48"
	4643	"\\u0e32\\u0e19\\u0e35\\u0e49. \\u0e44\\u0e21\\u0e48\\u0e21\\u0e35 encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48"
	4644	"\\u0e21\\u0e35\\u0e08\\u0e33\\u0e19\\u0e27\\u0e19\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e02\\u0e23\\u0e30"
	4645	"\\u0e21\\u0e32\\u0e01\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d: \\u0e22\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d"
	4646	"\\u0e22\\u0e48\\u0e32\\u0e07\\u0e40\\u0e0a\\u0e48\\u0e19, \\u0e40\\u0e09\\u0e1e\\u0e32\\u0e30\\u0e43\\u0e19\\u0e01"
	4647	"\\u0e25\\u0e38\\u0e48\\u0e21\\u0e2a\\u0e2b\\u0e20\\u0e32\\u0e1e\\u0e22\\u0e38\\u0e42\\u0e23\\u0e1b\\u0e40\\u0e1e"
	4648	"\\u0e35\\u0e22\\u0e07\\u0e41\\u0e2b\\u0e48\\u0e07\\u0e40\\u0e14\\u0e35\\u0e22\\u0e27 \\u0e01\\u0e47\\u0e15\\u0e49"
	4649	"\\u0e2d\\u0e07\\u0e01\\u0e32\\u0e23\\u0e2b\\u0e25\\u0e32\\u0e22 encoding \\u0e43\\u0e19\\u0e01\\u0e32\\u0e23\\u0e04"
	4650	"\\u0e23\\u0e2d\\u0e1a\\u0e04\\u0e25\\u0e38\\u0e21\\u0e17\\u0e38\\u0e01\\u0e20\\u0e32\\u0e29\\u0e32\\u0e43\\u0e19"
	4651	"\\u0e01\\u0e25\\u0e38\\u0e48\\u0e21. \\u0e2b\\u0e23\\u0e37\\u0e2d\\u0e41\\u0e21\\u0e49\\u0e41\\u0e15\\u0e48\\u0e43"
	4652	"\\u0e19\\u0e20\\u0e32\\u0e29\\u0e32\\u0e40\\u0e14\\u0e35\\u0e48\\u0e22\\u0e27 \\u0e40\\u0e0a\\u0e48\\u0e19 \\u0e20"
	4653	"\\u0e32\\u0e29\\u0e32\\u0e2d\\u0e31\\u0e07\\u0e01\\u0e24\\u0e29 \\u0e01\\u0e47\\u0e44\\u0e21\\u0e48\\u0e21\\u0e35"
	4654	" encoding \\u0e43\\u0e14\\u0e17\\u0e35\\u0e48\\u0e40\\u0e1e\\u0e35\\u0e22\\u0e07\\u0e1e\\u0e2d\\u0e2a\\u0e33\\u0e2b"
	4655	"\\u0e23\\u0e31\\u0e1a\\u0e17\\u0e38\\u0e01\\u0e15\\u0e31\\u0e27\\u0e2d\\u0e31\\u0e01\\u0e29\\u0e23, \\u0e40\\u0e04"
	4656	"\\u0e23\\u0e37\\u0e48\\u0e2d\\u0e07\\u0e2b\\u0e21\\u0e32\\u0e22\\u0e27\\u0e23\\u0e23\\u0e04\\u0e15\\u0e2d\\u0e19"
	4657	" \\u0e41\\u0e25\\u0e30\\u0e2a\\u0e31\\u0e0d\\u0e25\\u0e31\\u0e01\\u0e29\\u0e13\\u0e4c\\u0e17\\u0e32\\u0e07\\u0e40"
	4658	"\\u0e17\\u0e04\\u0e19\\u0e34\\u0e04\\u0e17\\u0e35\\u0e48\\u0e43\\u0e0a\\u0e49\\u0e01\\u0e31\\u0e19\\u0e2d\\u0e22"
	4659	"\\u0e39\\u0e48\\u0e17\\u0e31\\u0e48\\u0e27\\u0e44\\u0e1b.";
	4660
	4661	const char *latinText =
	4662	"doy ph\\u1ee5\\u0304\\u0302n \\u1e6d\\u0304h\\u0101n l\\u00e6\\u0302w, khxmphiwtexr\\u0312 ca ke\\u012b\\u0300"
	4663	"ywk\\u0304\\u0125xng k\\u1ea1b re\\u1ee5\\u0304\\u0300xng k\\u0304hxng t\\u1ea1wlek\\u0304h. khxmphiwtexr"
	4664	"\\u0312 c\\u1ea1d k\\u0115b t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r l\\u00e6a x\\u1ea1kk\\u0304h ra x\\u1ee5\\u0304"
	4665	"\\u0300n\\u00ab doy k\\u0101r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304\\u0131\\u0302 s\\u0304"
	4666	"\\u1ea3h\\u0304r\\u1ea1b t\\u00e6\\u0300la t\\u1ea1w. k\\u0300xn h\\u0304n\\u0302\\u0101 th\\u012b\\u0300\\u0301"
	4667	" Unicode ca t\\u0304h\\u016bk s\\u0304r\\u0302\\u0101ng k\\u0304h\\u1ee5\\u0302n, d\\u1ecb\\u0302 m\\u012b "
	4668	"rabb encoding xy\\u016b\\u0300 h\\u0304l\\u0101y r\\u0302xy rabb s\\u0304\\u1ea3h\\u0304r\\u1ea1b k\\u0101"
	4669	"r k\\u1ea3h\\u0304nd h\\u0304m\\u0101ylek\\u0304h h\\u0304el\\u0300\\u0101 n\\u012b\\u0302. m\\u1ecb\\u0300m"
	4670	"\\u012b encoding d\\u0131 th\\u012b\\u0300 m\\u012b c\\u1ea3nwn t\\u1ea1w x\\u1ea1kk\\u0304hra m\\u0101k p"
	4671	"he\\u012byng phx: yk t\\u1ea1wx\\u1ef3\\u0101ng ch\\u00e8n, c\\u0304heph\\u0101a n\\u0131 kl\\u00f9m s\\u0304"
	4672	"h\\u0304p\\u0323h\\u0101ph yurop phe\\u012byng h\\u0304\\u00e6\\u0300ng de\\u012byw k\\u0306 t\\u0302xngk\\u0101"
	4673	"r h\\u0304l\\u0101y encoding n\\u0131 k\\u0101r khrxbkhlum thuk p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 n\\u0131"
	4674	" kl\\u00f9m. h\\u0304r\\u1ee5\\u0304x m\\u00e6\\u0302t\\u00e6\\u0300 n\\u0131 p\\u0323h\\u0101s\\u0304\\u02b9"
	4675	"\\u0101 de\\u012b\\u0300yw ch\\u00e8n p\\u0323h\\u0101s\\u0304\\u02b9\\u0101 x\\u1ea1ngkvs\\u0304\\u02b9 k\\u0306"
	4676	" m\\u1ecb\\u0300m\\u012b encoding d\\u0131 th\\u012b\\u0300 phe\\u012byng phx s\\u0304\\u1ea3h\\u0304r\\u1ea1"
	4677	"b thuk t\\u1ea1w x\\u1ea1ks\\u0304\\u02b9r, kher\\u1ee5\\u0304\\u0300xngh\\u0304m\\u0101y wrrkh txn l\\u00e6"
	4678	"a s\\u0304\\u1ea1\\u1ef5l\\u1ea1ks\\u0304\\u02b9\\u1e47\\u0312 th\\u0101ng thekhnikh th\\u012b\\u0300 ch\\u0131"
	4679	"\\u0302 k\\u1ea1n xy\\u016b\\u0300 th\\u1ea1\\u0300wp\\u1ecb.";
	4680
	4681
	4682	UnicodeString xlitText(thaiText);
	4683	xlitText = xlitText.unescape();
	4684	tr->transliterate(xlitText);
	4685
	4686	UnicodeString expectedText(latinText);
	4687	expectedText = expectedText.unescape();
	4688	expect(*tr, xlitText, expectedText);
	4689
	4690	delete tr;
	4691	#endif
	4692	}
	4693
	4694	/**
	4695	* Test for rdar://problem/61817095 (and maybe eventually other Hans-Hant errors)
	4696	* Apple-only
	4697	* ICU4C only
	4698	*/
	4699	void TransliteratorTest::TestHansHant(void) {
	4700	UParseError parseError;
	4701	UErrorCode status = U_ZERO_ERROR;
	4702	Transliterator* tr = Transliterator::createInstance("Hans-Hant", UTRANS_FORWARD, parseError, status);
	4703	if (U_FAILURE(status)) {
	4704	errln("FAIL: createInstance failed with %s", u_errorName(status));
	4705	return;
	4706	}
	4707
	4708	const char* _sourceText = "\\u810f \\u5185\\u810f \\u810f\\u5668 \\u4e94\\u810f \\u5fc3\\u810f \\u809d\\u810f \\u813e\\u810f \\u80c3\\u810f \\u80be\\u810f \\u80f0\\u810f \\u810f\\u8151 \\u80ba\\u810f";
	4709	const char* _expectedResult = "\\u9ad2 \\u5167\\u81df \\u81df\\u5668 \\u4e94\\u81df \\u5fc3\\u81df \\u809d\\u81df \\u813e\\u81df \\u80c3\\u81df \\u814e\\u81df \\u80f0\\u81df \\u81df\\u8151 \\u80ba\\u81df";
	4710
	4711	UnicodeString sourceText(_sourceText);
	4712	UnicodeString expectedResult(_expectedResult);
	4713	sourceText = sourceText.unescape();
	4714	expectedResult = expectedResult.unescape();
	4715
	4716	expect(*tr, sourceText, expectedResult);
	4717	delete tr;
	4718	}
	4719
	4720
	4721	//======================================================================
	4722	// Support methods
	4723	//======================================================================
	4724	void TransliteratorTest::expectT(const UnicodeString& id,
	4725	const UnicodeString& source,
	4726	const UnicodeString& expectedResult) {
	4727	UErrorCode ec = U_ZERO_ERROR;
	4728	UParseError pe;
	4729	Transliterator *t = Transliterator::createInstance(id, UTRANS_FORWARD, pe, ec);
	4730	if (U_FAILURE(ec)) {
	4731	errln((UnicodeString)"FAIL: Could not create " + id + " - " + u_errorName(ec));
	4732	delete t;
	4733	return;
	4734	}
	4735	expect(*t, source, expectedResult);
	4736	delete t;
	4737	}
	4738
	4739	void TransliteratorTest::reportParseError(const UnicodeString& message,
	4740	const UParseError& parseError,
	4741	const UErrorCode& status) {
	4742	dataerrln(message +
	4743	/", parse error " + parseError.code +/
	4744	", line " + parseError.line +
	4745	", offset " + parseError.offset +
	4746	", pre-context " + prettify(parseError.preContext, TRUE) +
	4747	", post-context " + prettify(parseError.postContext,TRUE) +
	4748	", Error: " + u_errorName(status));
	4749	}
	4750
	4751	void TransliteratorTest::expect(const UnicodeString& rules,
	4752	const UnicodeString& source,
	4753	const UnicodeString& expectedResult,
	4754	UTransPosition *pos) {
	4755	expect("<ID>", rules, source, expectedResult, pos);
	4756	}
	4757
	4758	void TransliteratorTest::expect(const UnicodeString& id,
	4759	const UnicodeString& rules,
	4760	const UnicodeString& source,
	4761	const UnicodeString& expectedResult,
	4762	UTransPosition *pos) {
	4763	UErrorCode status = U_ZERO_ERROR;
	4764	UParseError parseError;
	4765	Transliterator* t = Transliterator::createFromRules(id, rules, UTRANS_FORWARD, parseError, status);
	4766	if (U_FAILURE(status)) {
	4767	reportParseError(UnicodeString("Couldn't create transliterator from ") + rules, parseError, status);
	4768	} else {
	4769	expect(*t, source, expectedResult, pos);
	4770	}
	4771	delete t;
	4772	}
	4773
	4774	void TransliteratorTest::expect(const Transliterator& t,
	4775	const UnicodeString& source,
	4776	const UnicodeString& expectedResult,
	4777	const Transliterator& reverseTransliterator) {
	4778	expect(t, source, expectedResult);
	4779	expect(reverseTransliterator, expectedResult, source);
	4780	}
	4781
	4782	void TransliteratorTest::expect(const Transliterator& t,
	4783	const UnicodeString& source,
	4784	const UnicodeString& expectedResult,
	4785	UTransPosition *pos) {
	4786	if (pos == 0) {
	4787	UnicodeString result(source);
	4788	t.transliterate(result);
	4789	expectAux(t.getID() + ":String", source, result, expectedResult);
	4790	}
	4791	UTransPosition index={0, 0, 0, 0};
	4792	if (pos != 0) {
	4793	index = *pos;
	4794	}
	4795
	4796	UnicodeString rsource(source);
	4797	if (pos == 0) {
	4798	t.transliterate(rsource);
	4799	} else {
	4800	// Do it all at once -- below we do it incrementally
	4801	t.finishTransliteration(rsource, *pos);
	4802	}
	4803	expectAux(t.getID() + ":Replaceable", source, rsource, expectedResult);
	4804
	4805	// Test keyboard (incremental) transliteration -- this result
	4806	// must be the same after we finalize (see below).
	4807	UnicodeString log;
	4808	rsource.remove();
	4809	if (pos != 0) {
	4810	rsource = source;
	4811	formatInput(log, rsource, index);
	4812	log.append(" -> ");
	4813	UErrorCode status = U_ZERO_ERROR;
	4814	t.transliterate(rsource, index, status);
	4815	formatInput(log, rsource, index);
	4816	} else {
	4817	for (int32_t i=0; i<source.length(); ++i) {
	4818	if (i != 0) {
	4819	log.append(" + ");
	4820	}
	4821	log.append(source.charAt(i)).append(" -> ");
	4822	UErrorCode status = U_ZERO_ERROR;
	4823	t.transliterate(rsource, index, source.charAt(i), status);
	4824	formatInput(log, rsource, index);
	4825	}
	4826	}
	4827
	4828	// As a final step in keyboard transliteration, we must call
	4829	// transliterate to finish off any pending partial matches that
	4830	// were waiting for more input.
	4831	t.finishTransliteration(rsource, index);
	4832	log.append(" => ").append(rsource);
	4833
	4834	expectAux(t.getID() + ":Keyboard", log,
	4835	rsource == expectedResult,
	4836	expectedResult);
	4837	}
	4838
	4839
	4840	/**
	4841	* @param appendTo result is appended to this param.
	4842	* @param input the string being transliterated
	4843	* @param pos the index struct
	4844	*/
	4845	UnicodeString& TransliteratorTest::formatInput(UnicodeString &appendTo,
	4846	const UnicodeString& input,
	4847	const UTransPosition& pos) {
	4848	// Output a string of the form aaa{bbb\|ccc\|ddd}eee, where
	4849	// the {} indicate the context start and limit, and the \|\|
	4850	// indicate the start and limit.
	4851	if (0 <= pos.contextStart &&
	4852	pos.contextStart <= pos.start &&
	4853	pos.start <= pos.limit &&
	4854	pos.limit <= pos.contextLimit &&
	4855	pos.contextLimit <= input.length()) {
	4856
	4857	UnicodeString a, b, c, d, e;
	4858	input.extractBetween(0, pos.contextStart, a);
	4859	input.extractBetween(pos.contextStart, pos.start, b);
	4860	input.extractBetween(pos.start, pos.limit, c);
	4861	input.extractBetween(pos.limit, pos.contextLimit, d);
	4862	input.extractBetween(pos.contextLimit, input.length(), e);
	4863	appendTo.append(a).append((UChar)123/{/).append(b).
	4864	append((UChar)PIPE).append(c).append((UChar)PIPE).append(d).
	4865	append((UChar)125/}/).append(e);
	4866	} else {
	4867	appendTo.append((UnicodeString)"INVALID UTransPosition {cs=" +
	4868	pos.contextStart + ", s=" + pos.start + ", l=" +
	4869	pos.limit + ", cl=" + pos.contextLimit + "} on " +
	4870	input);
	4871	}
	4872	return appendTo;
	4873	}
	4874
	4875	void TransliteratorTest::expectAux(const UnicodeString& tag,
	4876	const UnicodeString& source,
	4877	const UnicodeString& result,
	4878	const UnicodeString& expectedResult) {
	4879	expectAux(tag, source + " -> " + result,
	4880	result == expectedResult,
	4881	expectedResult);
	4882	}
	4883
	4884	void TransliteratorTest::expectAux(const UnicodeString& tag,
	4885	const UnicodeString& summary, UBool pass,
	4886	const UnicodeString& expectedResult) {
	4887	if (pass) {
	4888	logln(UnicodeString("(")+tag+") " + prettify(summary));
	4889	} else {
	4890	dataerrln(UnicodeString("FAIL: (")+tag+") "
	4891	+ prettify(summary)
	4892	+ ", expected " + prettify(expectedResult));
	4893	}
	4894	}
	4895
	4896	#endif /* #if !UCONFIG_NO_TRANSLITERATION */