]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/testdata/DataDrivenCollationTest.txt
ICU-6.2.6.tar.gz
[apple/icu.git] / icuSources / test / testdata / DataDrivenCollationTest.txt
1 // Copyright (c) 2001-2004 International Business Machines
2 // Corporation and others. All Rights Reserved.
3 DataDrivenCollationTest {
4 Info {
5 Headers { "sequence" }
6 Description { "These are the data driven tests" }
7 LongDescription { "The following entries are separate tests containing test data for various locales."
8 "Each entry has the following fields: "
9 "Info/Description - short descrioption of the test"
10 "Settings - settings for the test."
11 "Settings/TestLocale - locale for the collator OR"
12 "Settings/Rules - rules for the collator (can't have both)"
13 "Settings/Arguments - arguments to be passed to the collator before testing. Use rule syntax."
14 "Cases - set of test cases, which are sequences of strings that will be parsed"
15 "Sequences must not change the sign of relation, i.e. we can only have < and = or"
16 "> and = in single sequence. Cannot mix < and > in the same sequence. Whitespace is"
17 "is ignored unless quoted."
18 }
19 }
20 TestData {
21 TestLithuanian {
22 Info {
23 Description { "Lithuanian sort order." }
24 }
25 Settings {
26 {
27 TestLocale { "lt" }
28 }
29 }
30 Cases { "cz<č<d<iz<y<j<sz<š<t<zz<ž" }
31 }
32 TestLatvian {
33 Info {
34 Description { "Latvian sort order." }
35 }
36 Settings {
37 {
38 TestLocale { "lv" }
39 }
40 }
41 Cases { "cz<č<d<gz<ģ<h<iz<y<j<kz<ķ<l<lz<ļ<m<nz<ņ<o<rz<ŗ<s<sz<š<t<zz<ž" }
42 }
43 TestEstonian {
44 Info {
45 Description { "Estonian sort order." }
46 }
47 Settings {
48 {
49 TestLocale { "et" }
50 }
51 }
52 Cases { "sy<š<šy<z<zy<ž<v<w<va<õ<õy<ä<äy<ö<öy<ü<üy<x" }
53 }
54 TestAlbanian {
55 Info {
56 Description { "Albanian sort order." }
57 }
58 Settings {
59 {
60 TestLocale { "sq" }
61 }
62 }
63 Cases { "cz<ç<d<dz<dh<e<ez<ë<f<gz<gj<h<lz<ll<m<nz<nj<o<rz<rr<s<sz<sh<t<tz<th<u<xz<xh<y<zz<zh" }
64 }
65
66 TestSimplifiedChineseOrder {
67 Info {
68 Description { "Sorted file has different order." }
69 }
70 Settings {
71 {
72 TestLocale { "root" }
73 Arguments { "[normalization on]" }
74 }
75 }
76
77 Cases { "\u5F20<\u5F20\u4E00\u8E3F" }
78 }
79
80 TestTibetanNormalizedIterativeCrash {
81 Info {
82 Description { "This pretty much crashes." }
83 }
84 Settings {
85 {
86 TestLocale { "root" }
87 }
88 }
89
90 Cases { "\u0f71\u0f72\u0f80\u0f71\u0f72"
91 "<\u0f80"
92 }
93 }
94 TestThaiPartialSortKeyProblems {
95 Info {
96 Description { "These are examples of strings that caused trouble in partial sort key testing." }
97 }
98 Settings {
99 {
100 TestLocale { "th_TH" }
101 }
102 }
103 // TODO: the tests that are commented out should be enabled when j2720 is fixed
104 Cases { "\u0E01\u0E01\u0E38\u0E18\u0E20\u0E31\u0E13\u0E11\u0E4C"
105 "<\u0E01\u0E01\u0E38\u0E2A\u0E31\u0E19\u0E42\u0E18",
106 "\u0E01\u0E07\u0E01\u0E32\u0E23"
107 "<\u0E01\u0E07\u0E42\u0E01\u0E49",
108 "\u0E01\u0E23\u0E19\u0E17\u0E32"
109 "<\u0E01\u0E23\u0E19\u0E19\u0E40\u0E0A\u0E49\u0E32",
110 "\u0E01\u0E23\u0E30\u0E40\u0E08\u0E35\u0E22\u0E27"
111 "<\u0E01\u0E23\u0E30\u0E40\u0E08\u0E35\u0E4A\u0E22\u0E27",
112 "\u0E01\u0E23\u0E23\u0E40\u0E0A\u0E2D"
113 "<\u0E01\u0E23\u0E23\u0E40\u0E0A\u0E49\u0E32"
114 }
115 }
116 TestJavaStyleRule {
117 Info {
118 Description { "java.text allows rules to start as '<<<x<<<y...' "
119 "we emulate this by assuming a &[first tertiary ignorable] "
120 "in this case."
121 }
122 }
123 Settings {
124 {
125 Rules { "=equal<<<z<<x<<<w<y &[first tertiary ignorable]=a &[first primary ignorable]=b" }
126 }
127 }
128 Cases { "a = equal < z < x < w < b < y" }
129 }
130 TestShiftedIgnorable {
131 Info {
132 Description { "New UCA states that primary ignorables should be completely "
133 "ignorable when following a shifted code point."
134 }
135 }
136 Settings {
137 {
138 TestLocale { "root" }
139 Arguments { "[alternate shifted][strength 4]" }
140 }
141 }
142 Cases {
143 "a' 'b="
144 "a' '\u0300b="
145 "a' '\u0301b<"
146 "a_b="
147 "a_\u0300b="
148 "a_\u0301b<"
149 "A' 'b="
150 "A' '\u0300b="
151 "A' '\u0301b<"
152 "A_b="
153 "A_\u0300b="
154 "A_\u0301b<"
155 "a\u0301b<"
156 "A\u0301b<"
157 "a\u0300b<"
158 "A\u0300b"
159
160 }
161 }
162
163 TestNShiftedIgnorable {
164 Info {
165 Description { "New UCA states that primary ignorables should be completely "
166 "ignorable when following a shifted code point."
167 }
168 }
169 Settings {
170 {
171 TestLocale { "root" }
172 Arguments { "[alternate non-ignorable][strength 3]" }
173 }
174 }
175 Cases {
176 "a' 'b<"
177 "A' 'b<"
178 "a' '\u0301b<"
179 "A' '\u0301b<"
180 "a' '\u0300b<"
181 "A' '\u0300b<"
182 "a_b<"
183 "A_b<"
184 "a_\u0301b<"
185 "A_\u0301b<"
186 "a_\u0300b<"
187 "A_\u0300b<"
188 "a\u0301b<"
189 "A\u0301b<"
190 "a\u0300b<"
191 "A\u0300b<"
192 }
193 }
194
195 TestSafeSurrogates {
196 Info {
197 Description { "It turned out that surrogates were not skipped properly "
198 "when iterating backwards if they were in the middle of a "
199 "contraction. This test assures that this is fixed."
200 }
201 }
202 Settings {
203 {
204 Rules {
205 "&a < x\ud800\udc00b"
206 }
207 }
208 }
209 Cases {
210 "a<x\ud800\udc00b"
211 }
212 }
213
214 TestCIgnorableContraction {
215 Info {
216 Description { "Checks whether completely ignorable code points are "
217 "skipped in contractions."
218 }
219 }
220 Settings {
221 {
222 TestLocale { "sh" }
223 }
224 {
225 Rules {
226 "& L < lj, Lj <<< LJ"
227 "& N < nj, Nj <<< NJ "
228 }
229 }
230 }
231 Cases {
232 "njiva=n\ud834\udd65jiva=n\uD834\uDD79jiva=n\u0000\u0000\u0000jiva=n\u0000jiva=n\ud800jiva=n\ufffejiva",
233 "ljubav=l\u0000jubav=l\uD834\uDD79jubav=l\u0000\u0000\u0000jubav=l\ud800jubav=l\ufffejubav",
234 "Ljubav=L\u0000jubav=L\uD834\uDD79jubav=L\u0000\u0000\u0000jubav=L\ud800jubav=L\ufffejubav",
235 }
236 }
237
238
239 TestCIgnorablePrefix {
240 Info {
241 Description { "Checks whether completely ignorable code points are "
242 "skipped in prefix processing."
243 }
244 }
245 Settings {
246 {
247 TestLocale { "ja" }
248 }
249 }
250 Cases {
251 "\u30A1\u30FC"
252 "= \u30A1\uDB40\uDC30\u30FC"
253 "= \u30A1\uD800\u30FC"
254 "= \u30A1\uFFFE\u30FC"
255 "= \u30A1\uD834\uDD79\u30FC"
256 "= \u30A1\u0000\u0000\u0000\u30FC"
257 "= \u30A1\u0000\u30FC"
258 "= \u30A1\u30FC"
259 "= \u30A1\u0000\u059a\u30FC"
260 "= \u30A1\u30FC"
261 }
262 }
263 da_TestPrimary {
264 Info {
265 Description { "This test goes through primary strength cases" }
266 }
267 Settings {
268 {
269 TestLocale { "da" }
270 Arguments { "[strength 1]" }
271 }
272 }
273 Cases {
274 "Lvi<Lwi",
275 "L\u00e4vi<L\u00f6wi",
276 "L\u00fcbeck=Lybeck",
277 }
278 }
279 da_TestTertiary {
280 Info {
281 Description { "This test goes through tertiary strength cases" }
282 }
283 Settings {
284 {
285 TestLocale { "da" }
286 Arguments { "[strength 3]" }
287 }
288 }
289 Cases {
290 "Luc<luck",
291 "luck<L\u00fcbeck",
292 "L\u00fcbeck>lybeck",
293 "L\u00e4vi<L\u00f6we",
294 "L\u00f6ww<mast",
295 // constUCharCollationDanishTest::testBugs[][CollationDanishTest::MAX_TOKEN_LEN]="
296 "A/S<"
297 "ANDRE<"
298 "ANDR\u00c9<"
299 "ANDREAS<"
300 "AS<"
301 "CA<"
302 "\u00c7A<"
303 "CB<"
304 "\u00c7C<"
305 "D.S.B.<"
306 "DA<"
307 "\u00d0A<"
308 "DB<"
309 "\u00d0C<"
310 "DSB<"
311 "DSC<"
312 "EKSTRA_ARBEJDE<"
313 "EKSTRABUD0<"
314 "H\u00d8ST<"
315 "HAAG<"
316 "H\u00c5NDBOG<"
317 "HAANDV\u00c6RKSBANKEN<"
318 "Karl<"
319 "karl<"
320 "'NIELS J\u00d8RGEN'<"
321 "NIELS-J\u00d8RGEN<"
322 "NIELSEN<"
323 "'R\u00c9E, A'<"
324 "'REE, B'<"
325 "'R\u00c9E, L'<"
326 "'REE, V'<"
327 "'SCHYTT, B'<"
328 "'SCHYTT, H'<"
329 "'SCH\u00dcTT, H'<"
330 "'SCHYTT, L'<"
331 "'SCH\u00dcTT, M'<"
332 "SS<"
333 "\u00df<"
334 "SSA<"
335 "'STORE VILDMOSE'<"
336 "STOREK\u00c6R0<"
337 "'STORM PETERSEN'<"
338 "STORMLY<"
339 "THORVALD<"
340 "THORVARDUR<"
341 "\u00feORVAR\u00d0UR<"
342 "THYGESEN<"
343 "'VESTERG\u00c5RD, A'<"
344 "'VESTERGAARD, A'<"
345 "'VESTERG\u00c5RD, B'<"
346 "\u00c6BLE<"
347 "\u00c4BLE<"
348 "\u00d8BERG<"
349 "\u00d6BERG",
350
351 // constUCharCollationDanishTest::testNTList[][CollationDanishTest::MAX_TOKEN_LEN]="
352 "andere<"
353 "chaque<"
354 "chemin<"
355 "cote<"
356 "cot\u00e9<"
357 "c\u00f4te<"
358 "c\u00f4t\u00e9<"
359 "\u010du\u010d\u0113t<"
360 "Czech<"
361 "hi\u0161a<"
362 "irdisch<"
363 "lie<"
364 "lire<"
365 "llama<"
366 "l\u00f5ug<"
367 "l\u00f2za<"
368 "lu\u010d<"
369 "luck<"
370 "L\u00fcbeck<"
371 "lye<"
372 "l\u00e4vi<"
373 "L\u00f6wen<"
374 "m\u00e0\u0161ta<"
375 "m\u00eer<"
376 "myndig<"
377 "M\u00e4nner<"
378 "m\u00f6chten<"
379 "pi\u00f1a<"
380 "pint<"
381 "pylon<"
382 "\u0161\u00e0ran<"
383 "savoir<"
384 "\u0160erb\u016bra<"
385 "Sietla<"
386 "\u015blub<"
387 "subtle<"
388 "symbol<"
389 "s\u00e4mtlich<"
390 "verkehrt<"
391 "vox<"
392 "v\u00e4ga<"
393 "waffle<"
394 "wood<"
395 "yen<"
396 "yuan<"
397 "yucca<"
398 "\u017eal<"
399 "\u017eena<"
400 "\u017den\u0113va<"
401 "zoo0<"
402 "Zviedrija<"
403 "Z\u00fcrich<"
404 "zysk0<"
405 "\u00e4ndere"
406 }
407 }
408 hi_TestNewRules {
409 Info {
410 Description { "This test goes through new rules and tests against old rules" }
411 }
412 Settings {
413 {
414 TestLocale { "hi" }
415 }
416 }
417 Cases {
418 "ॐ<।<॥<॰<०<१<२<३"
419 "<४<५<६<७<८<९<अ<आ"
420 "<इ<ई<उ<ऊ<ऋ<ॠ<ऌ<ॡ"
421 "<ऍ<ऎ<ए<ऐ<ऑ<ऒ<ओ<औ"
422 "<क<क़=क़<कँ<कं<कः<क॑<क॒"
423 "<क॓<क॔<कऽ<क्<का<कि<की<कु"
424 "<कू<कृ<कॄ<कॢ<कॣ<कॅ<कॆ<के"
425 "<कै<कॉ<कॊ<को<कौ<ख<ख़ =ख़<खँ<खं<खः"
426 "<ख॑<ख॒<ख॓<ख॔<खऽ<ख्<खा<खि"
427 "<खी<खु<खू<खृ<खॄ<खॢ<खॣ<खॅ"
428 "<खॆ<खे<खै<खॉ<खॊ<खो<खौ<ग"
429 "<ग़=ग़<गँ<गं<गः<ग॑<ग॒<ग॓<ग॔"
430 "<गऽ<ग्<गा<गि<गी<गु<गू<गृ"
431 "<गॄ<गॢ<गॣ<गॅ<गॆ<गे<गै<गॉ"
432 "<गॊ<गो<गौ<घ<ङ<च<छ<ज<ज़ =ज़<जँ<जं<जः"
433 "<ज॑<ज॒<ज॓<ज॔<जऽ<ज्<जा<जि"
434 "<जी<जु<जू<जृ<जॄ<जॢ<जॣ<जॅ"
435 "<जॆ<जे<जै<जॉ<जॊ<जो<जौ<झ"
436 "<ञ<ट<ठ<ड<ड़=ड़<डँ<डं<डः<ड॑<ड॒<ड॓<ड॔"
437 "<डऽ<ड्<डा<डि<डी<डु<डू<डृ"
438 "<डॄ<डॢ<डॣ<डॅ<डॆ<डे<डै<डॉ"
439 "<डॊ<डो<डौ<ढ<ढ़=ढ़<ढँ<ढं<ढः"
440 "<ढ॑<ढ॒<ढ॓<ढ॔<ढऽ<ढ्<ढा<ढि"
441 "<ढी<ढु<ढू<ढृ<ढॄ<ढॢ<ढॣ<ढॅ"
442 "<ढॆ<ढे<ढै<ढॉ<ढॊ<ढो<ढौ<ण"
443 "<त<थ<द<ध<न<ऩ =ऩ< नँ<नं< नः"
444 "<न॑<न॒<न॓<न॔<नऽ<न्<ना<नि"
445 "<नी<नु<नू<नृ<नॄ<नॢ<नॣ<नॅ"
446 "<नॆ<ने<नै<नॉ<नॊ<नो<नौ"
447 "<प<फ<फ़=फ़<फँ<फं<फः<फ॑<फ॒"
448 "<फ॓<फ॔<फऽ<फ्<फा<फि<फी<फु<फू<फृ"
449 "<फॄ<फॢ<फॣ<फॅ<फॆ<फे<फै<फॉ"
450 "<फॊ<फो<फौ<ब<भ<म<य<य़=य़ "
451 "<यँ<यं<यः<य॑<य॒<य॓<य॔"
452 "<यऽ<य्<या<यि<यी<यु<यू<यृ"
453 "<यॄ<यॢ<यॣ<यॅ<यॆ<ये<यै<यॉ"
454 "<यॊ<यो<यौ<र<ऱ=ऱ<रँ<रं<रः"
455 "<र॑<र॒<र॓<र॔<रऽ<र्<रा<रि"
456 "<री<रु<रू<रृ<रॄ<रॢ<रॣ<रॅ"
457 "<रॆ<रे<रै<रॉ<रॊ<रो<रौ"
458 "<ल<ळ<ऴ=ऴ<ळँ<ळं<ळः<ळ॑<ळ॒"
459 "<ळ॓<ळ॔<ळऽ<ळ्<ळा<ळि<ळी<ळु"
460 "<ळू<ळृ<ळॄ<ळॢ<ळॣ<ळॅ<ळॆ<ळे"
461 "<ळै<ळॉ<ळॊ<ळो<ळौ<व<श<ष<स<ह"
462 "<़<ँ<ं<ः<॑<॒<॓<॔<ऽ<्<ा<ि<ी"
463 "<ु<ू<ृ<ॄ<ॢ<ॣ<ॅ<ॆ"
464 "<े<ै<ॉ<ॊ<ो<ौ"
465 }
466 }
467 fi_TestNewRules {
468 Info {
469 Description { "This test goes through new rules and tests against old rules" }
470 }
471 Settings {
472 {
473 TestLocale { "fi" }
474 }
475 }
476 Cases {
477 "xa<xA<Xa<XA<xá<Xá<xax<xAx<xáx<xd<Xd<xð<xÐ<Xð<XÐ<xđ<xĐ<Xđ<XĐ<"
478 "xdx<xðx<xÐx<xđx<xĐx<xe<Xe<xex<xn<Xn<xŋ<xŊ<Xŋ<XŊ<xnx<xŋx<xŊx<"
479 "xo<Xo<xó<Xó<xox<xóx<xs<Xs<xß<Xß<xßx<xsx<xt<Xt<xþ<xÞ<Xþ<XÞ<xþx<"
480 "xÞx<xtx<xu<Xu<xú<Xú<xux<xúx<xv<Xv<xw<Xw<xvx<xwx<xy<Xy<xü<Xü<"
481 "xű<Xű<xyx<xüx<xűx<xz<Xz<xzx<xå<Xå<xåx<xä<Xä<xæ<xÆ<Xæ<XÆ<xäx<"
482 "xæx<xÆx<xö<Xö<xø<Xø<xő<Xő<xõ<Xõ<xœ<xŒ<Xœ<XŒ<xöx<xøx<xőx<xõx<xœx<xŒx"
483 }
484 }
485 ro_TestNewRules {
486 Info {
487 Description { "This test goes through new rules and tests against old rules" }
488 }
489 Settings {
490 {
491 TestLocale { "ro" }
492 }
493 }
494 Cases {
495 "xAx<xă<xĂ<Xă<XĂ<xăx<xĂx<xâ<xÂ<Xâ<XÂ<xâx<xÂx<xb<xIx<xî<xÎ<Xî<XÎ<xîx<xÎx<"
496 "xj<xSx<xș=xş<xȘ=xŞ<Xș=Xş<XȘ=XŞ<xșx=xşx<xȘx=xŞx<xT<xTx<xț=xţ<xȚ=xŢ<Xț=Xţ<XȚ"
497 "=XŢ<xțx=xţx<xȚx=xŢx<xU"
498 }
499 }
500 }
501 }