git.saurik.com Git - apple/icu.git/blame_incremental - icuSources/test/testdata/DataDrivenCollationTest.txt

... / ...

Commit	Line	Data
	1	// Copyright (c) 2001-2005 International Business Machines
	2	// Corporation and others. All Rights Reserved.
	3	DataDrivenCollationTest:table(nofallback) {
	4	Info {
	5	Headers { "sequence" }
	6	Description { "These are the data driven tests" }
	7	LongDescription { "The following entries are separate tests containing test data for various locales."
	8	"Each entry has the following fields: "
	9	"Info/Description - short descrioption of the test"
	10	"Settings - settings for the test."
	11	"Settings/TestLocale - locale for the collator OR"
	12	"Settings/Rules - rules for the collator (can't have both)"
	13	"Settings/Arguments - arguments to be passed to the collator before testing. Use rule syntax."
	14	"Cases - set of test cases, which are sequences of strings that will be parsed"
	15	"Sequences must not change the sign of relation, i.e. we can only have < and = or"
	16	"> and = in single sequence. Cannot mix < and > in the same sequence. Whitespace is"
	17	"is ignored unless quoted."
	18	}
	19	}
	20	TestData {
	21	TestMorePinyin {
	22	Info {
	23	Description { "Testing the primary strength." }
	24	}
	25	Settings {
	26	{
	27	TestLocale { "zh" }
	28	Arguments { "[strength 1]" }
	29	}
	30	}
	31	Cases { "lā = lĀ = Lā = LĀ < lān = lĀn < lē = lĒ = Lē = LĒ < lēn = lĒn" }
	32
	33	}
	34	TestLithuanian {
	35	Info {
	36	Description { "Lithuanian sort order." }
	37	}
	38	Settings {
	39	{
	40	TestLocale { "lt" }
	41	}
	42	}
	43	Cases { "cz<č<d<iz<y<j<sz<š<t<zz<ž" }
	44	}
	45	TestLatvian {
	46	Info {
	47	Description { "Latvian sort order." }
	48	}
	49	Settings {
	50	{
	51	TestLocale { "lv" }
	52	}
	53	}
	54	Cases { "cz<č<d<gz<ģ<h<iz<y<j<kz<ķ<l<lz<ļ<m<nz<ņ<o<rz<ŗ<s<sz<š<t<zz<ž" }
	55	}
	56	TestEstonian {
	57	Info {
	58	Description { "Estonian sort order." }
	59	}
	60	Settings {
	61	{
	62	TestLocale { "et" }
	63	}
	64	}
	65	Cases { "sy<š<šy<z<zy<ž<v<w<va<õ<õy<ä<äy<ö<öy<ü<üy<x" }
	66	}
	67	TestAlbanian {
	68	Info {
	69	Description { "Albanian sort order." }
	70	}
	71	Settings {
	72	{
	73	TestLocale { "sq" }
	74	}
	75	}
	76	Cases { "cz<ç<d<dz<dh<e<ez<ë<f<gz<gj<h<lz<ll<m<nz<nj<o<rz<rr<s<sz<sh<t<tz<th<u<xz<xh<y<zz<zh" }
	77	}
	78
	79	TestSimplifiedChineseOrder {
	80	Info {
	81	Description { "Sorted file has different order." }
	82	}
	83	Settings {
	84	{
	85	TestLocale { "root" }
	86	Arguments { "[normalization on]" }
	87	}
	88	}
	89
	90	Cases { "\u5F20<\u5F20\u4E00\u8E3F" }
	91	}
	92
	93	TestTibetanNormalizedIterativeCrash {
	94	Info {
	95	Description { "This pretty much crashes." }
	96	}
	97	Settings {
	98	{
	99	TestLocale { "root" }
	100	}
	101	}
	102
	103	Cases { "\u0f71\u0f72\u0f80\u0f71\u0f72"
	104	"<\u0f80"
	105	}
	106	}
	107	TestThaiPartialSortKeyProblems {
	108	Info {
	109	Description { "These are examples of strings that caused trouble in partial sort key testing." }
	110	}
	111	Settings {
	112	{
	113	TestLocale { "th_TH" }
	114	}
	115	}
	116	// TODO: the tests that are commented out should be enabled when j2720 is fixed
	117	Cases { "\u0E01\u0E01\u0E38\u0E18\u0E20\u0E31\u0E13\u0E11\u0E4C"
	118	"<\u0E01\u0E01\u0E38\u0E2A\u0E31\u0E19\u0E42\u0E18",
	119	"\u0E01\u0E07\u0E01\u0E32\u0E23"
	120	"<\u0E01\u0E07\u0E42\u0E01\u0E49",
	121	"\u0E01\u0E23\u0E19\u0E17\u0E32"
	122	"<\u0E01\u0E23\u0E19\u0E19\u0E40\u0E0A\u0E49\u0E32",
	123	"\u0E01\u0E23\u0E30\u0E40\u0E08\u0E35\u0E22\u0E27"
	124	"<\u0E01\u0E23\u0E30\u0E40\u0E08\u0E35\u0E4A\u0E22\u0E27",
	125	"\u0E01\u0E23\u0E23\u0E40\u0E0A\u0E2D"
	126	"<\u0E01\u0E23\u0E23\u0E40\u0E0A\u0E49\u0E32"
	127	}
	128	}
	129	TestJavaStyleRule {
	130	Info {
	131	Description { "java.text allows rules to start as '<<<x<<<y...' "
	132	"we emulate this by assuming a &[first tertiary ignorable] "
	133	"in this case."
	134	}
	135	}
	136	Settings {
	137	{
	138	Rules { "=equal<<<z<<x<<<w<y &[first tertiary ignorable]=a &[first primary ignorable]=b" }
	139	}
	140	}
	141	Cases { "a = equal < z < x < w < b < y" }
	142	}
	143	TestShiftedIgnorable {
	144	Info {
	145	Description { "New UCA states that primary ignorables should be completely "
	146	"ignorable when following a shifted code point."
	147	}
	148	}
	149	Settings {
	150	{
	151	TestLocale { "root" }
	152	Arguments { "[alternate shifted][strength 4]" }
	153	}
	154	}
	155	Cases {
	156	"a' 'b="
	157	"a' '\u0300b="
	158	"a' '\u0301b<"
	159	"a_b="
	160	"a_\u0300b="
	161	"a_\u0301b<"
	162	"A' 'b="
	163	"A' '\u0300b="
	164	"A' '\u0301b<"
	165	"A_b="
	166	"A_\u0300b="
	167	"A_\u0301b<"
	168	"a\u0301b<"
	169	"A\u0301b<"
	170	"a\u0300b<"
	171	"A\u0300b"
	172
	173	}
	174	}
	175
	176	TestNShiftedIgnorable {
	177	Info {
	178	Description { "New UCA states that primary ignorables should be completely "
	179	"ignorable when following a shifted code point."
	180	}
	181	}
	182	Settings {
	183	{
	184	TestLocale { "root" }
	185	Arguments { "[alternate non-ignorable][strength 3]" }
	186	}
	187	}
	188	Cases {
	189	"a' 'b<"
	190	"A' 'b<"
	191	"a' '\u0301b<"
	192	"A' '\u0301b<"
	193	"a' '\u0300b<"
	194	"A' '\u0300b<"
	195	"a_b<"
	196	"A_b<"
	197	"a_\u0301b<"
	198	"A_\u0301b<"
	199	"a_\u0300b<"
	200	"A_\u0300b<"
	201	"a\u0301b<"
	202	"A\u0301b<"
	203	"a\u0300b<"
	204	"A\u0300b<"
	205	}
	206	}
	207
	208	TestSafeSurrogates {
	209	Info {
	210	Description { "It turned out that surrogates were not skipped properly "
	211	"when iterating backwards if they were in the middle of a "
	212	"contraction. This test assures that this is fixed."
	213	}
	214	}
	215	Settings {
	216	{
	217	Rules {
	218	"&a < x\ud800\udc00b"
	219	}
	220	}
	221	}
	222	Cases {
	223	"a<x\ud800\udc00b"
	224	}
	225	}
	226	/*
	227	UCA 4.1 removes skipping of ignorable code points in contractions!
	228	TestCIgnorableContraction {
	229	Info {
	230	Description { "Checks whether completely ignorable code points are "
	231	"skipped in contractions."
	232	}
	233	}
	234	Settings {
	235	{
	236	TestLocale { "sh" }
	237	}
	238	{
	239	Rules {
	240	"& L < lj, Lj <<< LJ"
	241	"& N < nj, Nj <<< NJ "
	242	}
	243	}
	244	}
	245	Cases {
	246	"njiva=n\ud834\udd65jiva=n\uD834\uDD79jiva=n\u0000\u0000\u0000jiva=n\u0000jiva=n\ud800jiva=n\ufffejiva",
	247	"ljubav=l\u0000jubav=l\uD834\uDD79jubav=l\u0000\u0000\u0000jubav=l\ud800jubav=l\ufffejubav",
	248	"Ljubav=L\u0000jubav=L\uD834\uDD79jubav=L\u0000\u0000\u0000jubav=L\ud800jubav=L\ufffejubav",
	249	}
	250	}
	251
	252	*/
	253	/*
	254	UCA 4.1 removes skipping of ignorable code points in contractions!
	255	TestCIgnorablePrefix {
	256	Info {
	257	Description { "Checks whether completely ignorable code points are "
	258	"skipped in prefix processing."
	259	}
	260	}
	261	Settings {
	262	{
	263	TestLocale { "ja" }
	264	}
	265	}
	266	Cases {
	267	"\u30A1\u30FC"
	268	"= \u30A1\uDB40\uDC30\u30FC"
	269	"= \u30A1\uD800\u30FC"
	270	"= \u30A1\uFFFE\u30FC"
	271	"= \u30A1\uD834\uDD79\u30FC"
	272	"= \u30A1\u0000\u0000\u0000\u30FC"
	273	"= \u30A1\u0000\u30FC"
	274	"= \u30A1\u30FC"
	275	"= \u30A1\u0000\u059a\u30FC"
	276	"= \u30A1\u30FC"
	277	}
	278	}
	279	*/
	280	da_TestPrimary {
	281	Info {
	282	Description { "This test goes through primary strength cases" }
	283	}
	284	Settings {
	285	{
	286	TestLocale { "da" }
	287	Arguments { "[strength 1]" }
	288	}
	289	}
	290	Cases {
	291	"Lvi<Lwi",
	292	"L\u00e4vi<L\u00f6wi",
	293	"L\u00fcbeck=Lybeck",
	294	}
	295	}
	296	da_TestTertiary {
	297	Info {
	298	Description { "This test goes through tertiary strength cases" }
	299	}
	300	Settings {
	301	{
	302	TestLocale { "da" }
	303	Arguments { "[strength 3]" }
	304	}
	305	}
	306	Cases {
	307	"Luc<luck",
	308	"luck<L\u00fcbeck",
	309	"L\u00fcbeck>lybeck",
	310	"L\u00e4vi<L\u00f6we",
	311	"L\u00f6ww<mast",
	312	// constUCharCollationDanishTest::testBugs[][CollationDanishTest::MAX_TOKEN_LEN]="
	313	"A/S<"
	314	"ANDRE<"
	315	"ANDR\u00c9<"
	316	"ANDREAS<"
	317	"AS<"
	318	"CA<"
	319	"\u00c7A<"
	320	"CB<"
	321	"\u00c7C<"
	322	"D.S.B.<"
	323	"DA<"
	324	"\u00d0A<"
	325	"DB<"
	326	"\u00d0C<"
	327	"DSB<"
	328	"DSC<"
	329	"EKSTRA_ARBEJDE<"
	330	"EKSTRABUD0<"
	331	"H\u00d8ST<"
	332	"HAAG<"
	333	"H\u00c5NDBOG<"
	334	"HAANDV\u00c6RKSBANKEN<"
	335	"Karl<"
	336	"karl<"
	337	"'NIELS J\u00d8RGEN'<"
	338	"NIELS-J\u00d8RGEN<"
	339	"NIELSEN<"
	340	"'R\u00c9E, A'<"
	341	"'REE, B'<"
	342	"'R\u00c9E, L'<"
	343	"'REE, V'<"
	344	"'SCHYTT, B'<"
	345	"'SCHYTT, H'<"
	346	"'SCH\u00dcTT, H'<"
	347	"'SCHYTT, L'<"
	348	"'SCH\u00dcTT, M'<"
	349	"SS<"
	350	"\u00df<"
	351	"SSA<"
	352	"'STORE VILDMOSE'<"
	353	"STOREK\u00c6R0<"
	354	"'STORM PETERSEN'<"
	355	"STORMLY<"
	356	"THORVALD<"
	357	"THORVARDUR<"
	358	"\u00feORVAR\u00d0UR<"
	359	"THYGESEN<"
	360	"'VESTERG\u00c5RD, A'<"
	361	"'VESTERGAARD, A'<"
	362	"'VESTERG\u00c5RD, B'<"
	363	"\u00c6BLE<"
	364	"\u00c4BLE<"
	365	"\u00d8BERG<"
	366	"\u00d6BERG",
	367
	368	// constUCharCollationDanishTest::testNTList[][CollationDanishTest::MAX_TOKEN_LEN]="
	369	"andere<"
	370	"chaque<"
	371	"chemin<"
	372	"cote<"
	373	"cot\u00e9<"
	374	"c\u00f4te<"
	375	"c\u00f4t\u00e9<"
	376	"\u010du\u010d\u0113t<"
	377	"Czech<"
	378	"hi\u0161a<"
	379	"irdisch<"
	380	"lie<"
	381	"lire<"
	382	"llama<"
	383	"l\u00f5ug<"
	384	"l\u00f2za<"
	385	"lu\u010d<"
	386	"luck<"
	387	"L\u00fcbeck<"
	388	"lye<"
	389	"l\u00e4vi<"
	390	"L\u00f6wen<"
	391	"m\u00e0\u0161ta<"
	392	"m\u00eer<"
	393	"myndig<"
	394	"M\u00e4nner<"
	395	"m\u00f6chten<"
	396	"pi\u00f1a<"
	397	"pint<"
	398	"pylon<"
	399	"\u0161\u00e0ran<"
	400	"savoir<"
	401	"\u0160erb\u016bra<"
	402	"Sietla<"
	403	"\u015blub<"
	404	"subtle<"
	405	"symbol<"
	406	"s\u00e4mtlich<"
	407	"verkehrt<"
	408	"vox<"
	409	"v\u00e4ga<"
	410	"waffle<"
	411	"wood<"
	412	"yen<"
	413	"yuan<"
	414	"yucca<"
	415	"\u017eal<"
	416	"\u017eena<"
	417	"\u017den\u0113va<"
	418	"zoo0<"
	419	"Zviedrija<"
	420	"Z\u00fcrich<"
	421	"zysk0<"
	422	"\u00e4ndere"
	423	}
	424	}
	425	hi_TestNewRules {
	426	Info {
	427	Description { "This test goes through new rules and tests against old rules" }
	428	}
	429	Settings {
	430	{
	431	TestLocale { "hi" }
	432	}
	433	}
	434	Cases {
	435	"ॐ<।<॥<॰<०<१<२<३"
	436	"<४<५<६<७<८<९<अ<आ"
	437	"<इ<ई<उ<ऊ<ऋ<ॠ<ऌ<ॡ"
	438	"<ऍ<ऎ<ए<ऐ<ऑ<ऒ<ओ<औ"
	439	"<क<क़=क़<कँ<कं<कः<क॑<क॒"
	440	"<क॓<क॔<कऽ<क्<का<कि<की<कु"
	441	"<कू<कृ<कॄ<कॢ<कॣ<कॅ<कॆ<के"
	442	"<कै<कॉ<कॊ<को<कौ<ख<ख़ =ख़<खँ<खं<खः"
	443	"<ख॑<ख॒<ख॓<ख॔<खऽ<ख्<खा<खि"
	444	"<खी<खु<खू<खृ<खॄ<खॢ<खॣ<खॅ"
	445	"<खॆ<खे<खै<खॉ<खॊ<खो<खौ<ग"
	446	"<ग़=ग़<गँ<गं<गः<ग॑<ग॒<ग॓<ग॔"
	447	"<गऽ<ग्<गा<गि<गी<गु<गू<गृ"
	448	"<गॄ<गॢ<गॣ<गॅ<गॆ<गे<गै<गॉ"
	449	"<गॊ<गो<गौ<घ<ङ<च<छ<ज<ज़ =ज़<जँ<जं<जः"
	450	"<ज॑<ज॒<ज॓<ज॔<जऽ<ज्<जा<जि"
	451	"<जी<जु<जू<जृ<जॄ<जॢ<जॣ<जॅ"
	452	"<जॆ<जे<जै<जॉ<जॊ<जो<जौ<झ"
	453	"<ञ<ट<ठ<ड<ड़=ड़<डँ<डं<डः<ड॑<ड॒<ड॓<ड॔"
	454	"<डऽ<ड्<डा<डि<डी<डु<डू<डृ"
	455	"<डॄ<डॢ<डॣ<डॅ<डॆ<डे<डै<डॉ"
	456	"<डॊ<डो<डौ<ढ<ढ़=ढ़<ढँ<ढं<ढः"
	457	"<ढ॑<ढ॒<ढ॓<ढ॔<ढऽ<ढ्<ढा<ढि"
	458	"<ढी<ढु<ढू<ढृ<ढॄ<ढॢ<ढॣ<ढॅ"
	459	"<ढॆ<ढे<ढै<ढॉ<ढॊ<ढो<ढौ<ण"
	460	"<त<थ<द<ध<न<ऩ =ऩ< नँ<नं< नः"
	461	"<न॑<न॒<न॓<न॔<नऽ<न्<ना<नि"
	462	"<नी<नु<नू<नृ<नॄ<नॢ<नॣ<नॅ"
	463	"<नॆ<ने<नै<नॉ<नॊ<नो<नौ"
	464	"<प<फ<फ़=फ़<फँ<फं<फः<फ॑<फ॒"
	465	"<फ॓<फ॔<फऽ<फ्<फा<फि<फी<फु<फू<फृ"
	466	"<फॄ<फॢ<फॣ<फॅ<फॆ<फे<फै<फॉ"
	467	"<फॊ<फो<फौ<ब<भ<म<य<य़=य़ "
	468	"<यँ<यं<यः<य॑<य॒<य॓<य॔"
	469	"<यऽ<य्<या<यि<यी<यु<यू<यृ"
	470	"<यॄ<यॢ<यॣ<यॅ<यॆ<ये<यै<यॉ"
	471	"<यॊ<यो<यौ<र<ऱ=ऱ<रँ<रं<रः"
	472	"<र॑<र॒<र॓<र॔<रऽ<र्<रा<रि"
	473	"<री<रु<रू<रृ<रॄ<रॢ<रॣ<रॅ"
	474	"<रॆ<रे<रै<रॉ<रॊ<रो<रौ"
	475	"<ल<ळ<ऴ=ऴ<ळँ<ळं<ळः<ळ॑<ळ॒"
	476	"<ळ॓<ळ॔<ळऽ<ळ्<ळा<ळि<ळी<ळु"
	477	"<ळू<ळृ<ळॄ<ळॢ<ळॣ<ळॅ<ळॆ<ळे"
	478	"<ळै<ळॉ<ळॊ<ळो<ळौ<व<श<ष<स<ह"
	479	"<़<ँ<ं<ः<॑<॒<॓<॔<ऽ<्<ा<ि<ी"
	480	"<ु<ू<ृ<ॄ<ॢ<ॣ<ॅ<ॆ"
	481	"<े<ै<ॉ<ॊ<ो<ौ"
	482	}
	483	}
	484	fi_TestNewRules {
	485	Info {
	486	Description { "This test goes through new rules and tests against old rules" }
	487	}
	488	Settings {
	489	{
	490	TestLocale { "fi" }
	491	}
	492	}
	493	Cases {
	494	"xa<xA<Xa<XA<xá<Xá<xax<xAx<xáx<xd<Xd<xð<xÐ<Xð<XÐ<xđ<xĐ<Xđ<XĐ<"
	495	"xdx<xðx<xÐx<xđx<xĐx<xe<Xe<xex<xn<Xn<xŋ<xŊ<Xŋ<XŊ<xnx<xŋx<xŊx<"
	496	"xo<Xo<xó<Xó<xox<xóx<xs<Xs<xß<Xß<xßx<xsx<xt<Xt<xþ<xÞ<Xþ<XÞ<xþx<"
	497	"xÞx<xtx<xu<Xu<xú<Xú<xux<xúx<xv<Xv<xw<Xw<xvx<xwx<xy<Xy<xü<Xü<"
	498	"xű<Xű<xyx<xüx<xűx<xz<Xz<xzx<xå<Xå<xåx<xä<Xä<xæ<xÆ<Xæ<XÆ<xäx<"
	499	"xæx<xÆx<xö<Xö<xø<Xø<xő<Xő<xõ<Xõ<xœ<xŒ<Xœ<XŒ<xöx<xøx<xőx<xõx<xœx<xŒx"
	500	}
	501	}
	502	ro_TestNewRules {
	503	Info {
	504	Description { "This test goes through new rules and tests against old rules" }
	505	}
	506	Settings {
	507	{
	508	TestLocale { "ro" }
	509	}
	510	}
	511	Cases {
	512	"xAx<xă<xĂ<Xă<XĂ<xăx<xĂx<xâ<xÂ<Xâ<XÂ<xâx<xÂx<xb<xIx<xî<xÎ<Xî<XÎ<xîx<xÎx<"
	513	"xj<xSx<xș=xş<xȘ=xŞ<Xș=Xş<XȘ=XŞ<xșx=xşx<xȘx=xŞx<xT<xTx<xț=xţ<xȚ=xŢ<Xț=Xţ<XȚ"
	514	"=XŢ<xțx=xţx<xȚx=xŢx<xU"
	515	}
	516	}
	517	}
	518	}