]> git.saurik.com Git - apple/icu.git/blame - icuSources/test/testdata/DataDrivenCollationTest.txt
ICU-400.37.tar.gz
[apple/icu.git] / icuSources / test / testdata / DataDrivenCollationTest.txt
CommitLineData
46f4442e 1// Copyright (c) 2001-2008 International Business Machines
73c04bcf
A
2// Corporation and others. All Rights Reserved.
3DataDrivenCollationTest:table(nofallback) {
4 Info {
5 Headers { "sequence" }
6 Description { "These are the data driven tests" }
7 LongDescription { "The following entries are separate tests containing test data for various locales."
8 "Each entry has the following fields: "
9 "Info/Description - short descrioption of the test"
10 "Settings - settings for the test."
11 "Settings/TestLocale - locale for the collator OR"
12 "Settings/Rules - rules for the collator (can't have both)"
13 "Settings/Arguments - arguments to be passed to the collator before testing. Use rule syntax."
14 "Cases - set of test cases, which are sequences of strings that will be parsed"
15 "Sequences must not change the sign of relation, i.e. we can only have < and = or"
16 "> and = in single sequence. Cannot mix < and > in the same sequence. Whitespace is"
17 "is ignored unless quoted."
18 }
19 }
20 TestData {
21 TestMorePinyin {
22 Info {
23 Description { "Testing the primary strength." }
24 }
25 Settings {
26 {
27 TestLocale { "zh" }
28 Arguments { "[strength 1]" }
29 }
30 }
31 Cases { "lā = lĀ = Lā = LĀ < lān = lĀn < lē = lĒ = Lē = LĒ < lēn = lĒn" }
32
33 }
34 TestLithuanian {
35 Info {
36 Description { "Lithuanian sort order." }
37 }
38 Settings {
39 {
40 TestLocale { "lt" }
41 }
42 }
43 Cases { "cz<č<d<iz<y<j<sz<š<t<zz<ž" }
44 }
45 TestLatvian {
46 Info {
47 Description { "Latvian sort order." }
48 }
49 Settings {
50 {
51 TestLocale { "lv" }
52 }
53 }
54 Cases { "cz<č<d<gz<ģ<h<iz<y<j<kz<ķ<l<lz<ļ<m<nz<ņ<o<rz<ŗ<s<sz<š<t<zz<ž" }
55 }
56 TestEstonian {
57 Info {
58 Description { "Estonian sort order." }
59 }
60 Settings {
61 {
62 TestLocale { "et" }
63 }
64 }
65 Cases { "sy<š<šy<z<zy<ž<v<w<va<õ<õy<ä<äy<ö<öy<ü<üy<x" }
66 }
67 TestAlbanian {
68 Info {
69 Description { "Albanian sort order." }
70 }
71 Settings {
72 {
73 TestLocale { "sq" }
74 }
75 }
76 Cases { "cz<ç<d<dz<dh<e<ez<ë<f<gz<gj<h<lz<ll<m<nz<nj<o<rz<rr<s<sz<sh<t<tz<th<u<xz<xh<y<zz<zh" }
77 }
78
79 TestSimplifiedChineseOrder {
80 Info {
81 Description { "Sorted file has different order." }
82 }
83 Settings {
84 {
85 TestLocale { "root" }
86 Arguments { "[normalization on]" }
87 }
88 }
89
90 Cases { "\u5F20<\u5F20\u4E00\u8E3F" }
91 }
92
93 TestTibetanNormalizedIterativeCrash {
94 Info {
95 Description { "This pretty much crashes." }
96 }
97 Settings {
98 {
99 TestLocale { "root" }
100 }
101 }
102
103 Cases { "\u0f71\u0f72\u0f80\u0f71\u0f72"
104 "<\u0f80"
105 }
106 }
107 TestThaiPartialSortKeyProblems {
108 Info {
109 Description { "These are examples of strings that caused trouble in partial sort key testing." }
110 }
111 Settings {
112 {
113 TestLocale { "th_TH" }
114 }
115 }
116 // TODO: the tests that are commented out should be enabled when j2720 is fixed
117 Cases { "\u0E01\u0E01\u0E38\u0E18\u0E20\u0E31\u0E13\u0E11\u0E4C"
118 "<\u0E01\u0E01\u0E38\u0E2A\u0E31\u0E19\u0E42\u0E18",
119 "\u0E01\u0E07\u0E01\u0E32\u0E23"
120 "<\u0E01\u0E07\u0E42\u0E01\u0E49",
121 "\u0E01\u0E23\u0E19\u0E17\u0E32"
122 "<\u0E01\u0E23\u0E19\u0E19\u0E40\u0E0A\u0E49\u0E32",
123 "\u0E01\u0E23\u0E30\u0E40\u0E08\u0E35\u0E22\u0E27"
124 "<\u0E01\u0E23\u0E30\u0E40\u0E08\u0E35\u0E4A\u0E22\u0E27",
125 "\u0E01\u0E23\u0E23\u0E40\u0E0A\u0E2D"
126 "<\u0E01\u0E23\u0E23\u0E40\u0E0A\u0E49\u0E32"
127 }
128 }
129 TestJavaStyleRule {
130 Info {
131 Description { "java.text allows rules to start as '<<<x<<<y...' "
132 "we emulate this by assuming a &[first tertiary ignorable] "
133 "in this case."
134 }
135 }
136 Settings {
137 {
138 Rules { "=equal<<<z<<x<<<w<y &[first tertiary ignorable]=a &[first primary ignorable]=b" }
139 }
140 }
141 Cases { "a = equal < z < x < w < b < y" }
142 }
143 TestShiftedIgnorable {
144 Info {
145 Description { "New UCA states that primary ignorables should be completely "
146 "ignorable when following a shifted code point."
147 }
148 }
149 Settings {
150 {
151 TestLocale { "root" }
152 Arguments { "[alternate shifted][strength 4]" }
153 }
154 }
155 Cases {
156 "a' 'b="
157 "a' '\u0300b="
158 "a' '\u0301b<"
159 "a_b="
160 "a_\u0300b="
161 "a_\u0301b<"
162 "A' 'b="
163 "A' '\u0300b="
164 "A' '\u0301b<"
165 "A_b="
166 "A_\u0300b="
167 "A_\u0301b<"
168 "a\u0301b<"
169 "A\u0301b<"
170 "a\u0300b<"
171 "A\u0300b"
172
173 }
174 }
175
176 TestNShiftedIgnorable {
177 Info {
178 Description { "New UCA states that primary ignorables should be completely "
179 "ignorable when following a shifted code point."
180 }
181 }
182 Settings {
183 {
184 TestLocale { "root" }
185 Arguments { "[alternate non-ignorable][strength 3]" }
186 }
187 }
188 Cases {
189 "a' 'b<"
190 "A' 'b<"
191 "a' '\u0301b<"
192 "A' '\u0301b<"
193 "a' '\u0300b<"
194 "A' '\u0300b<"
195 "a_b<"
196 "A_b<"
197 "a_\u0301b<"
198 "A_\u0301b<"
199 "a_\u0300b<"
200 "A_\u0300b<"
201 "a\u0301b<"
202 "A\u0301b<"
203 "a\u0300b<"
204 "A\u0300b<"
205 }
206 }
207
208 TestSafeSurrogates {
209 Info {
210 Description { "It turned out that surrogates were not skipped properly "
211 "when iterating backwards if they were in the middle of a "
212 "contraction. This test assures that this is fixed."
213 }
214 }
215 Settings {
216 {
217 Rules {
218 "&a < x\ud800\udc00b"
219 }
220 }
221 }
222 Cases {
223 "a<x\ud800\udc00b"
224 }
225 }
226/*
227 UCA 4.1 removes skipping of ignorable code points in contractions!
228 TestCIgnorableContraction {
229 Info {
230 Description { "Checks whether completely ignorable code points are "
231 "skipped in contractions."
232 }
233 }
234 Settings {
235 {
236 TestLocale { "sh" }
237 }
238 {
239 Rules {
240 "& L < lj, Lj <<< LJ"
241 "& N < nj, Nj <<< NJ "
242 }
243 }
244 }
245 Cases {
246 "njiva=n\ud834\udd65jiva=n\uD834\uDD79jiva=n\u0000\u0000\u0000jiva=n\u0000jiva=n\ud800jiva=n\ufffejiva",
247 "ljubav=l\u0000jubav=l\uD834\uDD79jubav=l\u0000\u0000\u0000jubav=l\ud800jubav=l\ufffejubav",
248 "Ljubav=L\u0000jubav=L\uD834\uDD79jubav=L\u0000\u0000\u0000jubav=L\ud800jubav=L\ufffejubav",
249 }
250 }
251
252*/
253/*
254 UCA 4.1 removes skipping of ignorable code points in contractions!
255 TestCIgnorablePrefix {
256 Info {
257 Description { "Checks whether completely ignorable code points are "
258 "skipped in prefix processing."
259 }
260 }
261 Settings {
262 {
263 TestLocale { "ja" }
264 }
265 }
266 Cases {
267 "\u30A1\u30FC"
268 "= \u30A1\uDB40\uDC30\u30FC"
269 "= \u30A1\uD800\u30FC"
270 "= \u30A1\uFFFE\u30FC"
271 "= \u30A1\uD834\uDD79\u30FC"
272 "= \u30A1\u0000\u0000\u0000\u30FC"
273 "= \u30A1\u0000\u30FC"
274 "= \u30A1\u30FC"
275 "= \u30A1\u0000\u059a\u30FC"
276 "= \u30A1\u30FC"
277 }
278 }
279*/
280 da_TestPrimary {
281 Info {
282 Description { "This test goes through primary strength cases" }
283 }
284 Settings {
285 {
286 TestLocale { "da" }
287 Arguments { "[strength 1]" }
288 }
289 }
290 Cases {
291 "Lvi<Lwi",
292 "L\u00e4vi<L\u00f6wi",
293 "L\u00fcbeck=Lybeck",
294 }
295 }
296 da_TestTertiary {
297 Info {
298 Description { "This test goes through tertiary strength cases" }
299 }
300 Settings {
301 {
302 TestLocale { "da" }
303 Arguments { "[strength 3]" }
304 }
305 }
306 Cases {
307 "Luc<luck",
308 "luck<L\u00fcbeck",
309 "L\u00fcbeck>lybeck",
310 "L\u00e4vi<L\u00f6we",
311 "L\u00f6ww<mast",
312 // constUCharCollationDanishTest::testBugs[][CollationDanishTest::MAX_TOKEN_LEN]="
313 "A/S<"
314 "ANDRE<"
315 "ANDR\u00c9<"
316 "ANDREAS<"
317 "AS<"
318 "CA<"
319 "\u00c7A<"
320 "CB<"
321 "\u00c7C<"
322 "D.S.B.<"
323 "DA<"
324 "\u00d0A<"
325 "DB<"
326 "\u00d0C<"
327 "DSB<"
328 "DSC<"
329 "EKSTRA_ARBEJDE<"
330 "EKSTRABUD0<"
331 "H\u00d8ST<"
332 "HAAG<"
333 "H\u00c5NDBOG<"
334 "HAANDV\u00c6RKSBANKEN<"
335 "Karl<"
336 "karl<"
337 "'NIELS J\u00d8RGEN'<"
338 "NIELS-J\u00d8RGEN<"
339 "NIELSEN<"
340 "'R\u00c9E, A'<"
341 "'REE, B'<"
342 "'R\u00c9E, L'<"
343 "'REE, V'<"
344 "'SCHYTT, B'<"
345 "'SCHYTT, H'<"
346 "'SCH\u00dcTT, H'<"
347 "'SCHYTT, L'<"
348 "'SCH\u00dcTT, M'<"
349 "SS<"
350 "\u00df<"
351 "SSA<"
352 "'STORE VILDMOSE'<"
353 "STOREK\u00c6R0<"
354 "'STORM PETERSEN'<"
355 "STORMLY<"
356 "THORVALD<"
357 "THORVARDUR<"
358 "\u00feORVAR\u00d0UR<"
359 "THYGESEN<"
360 "'VESTERG\u00c5RD, A'<"
361 "'VESTERGAARD, A'<"
362 "'VESTERG\u00c5RD, B'<"
363 "\u00c6BLE<"
364 "\u00c4BLE<"
365 "\u00d8BERG<"
366 "\u00d6BERG",
367
368 // constUCharCollationDanishTest::testNTList[][CollationDanishTest::MAX_TOKEN_LEN]="
369 "andere<"
370 "chaque<"
371 "chemin<"
372 "cote<"
373 "cot\u00e9<"
374 "c\u00f4te<"
375 "c\u00f4t\u00e9<"
376 "\u010du\u010d\u0113t<"
377 "Czech<"
378 "hi\u0161a<"
379 "irdisch<"
380 "lie<"
381 "lire<"
382 "llama<"
383 "l\u00f5ug<"
384 "l\u00f2za<"
385 "lu\u010d<"
386 "luck<"
387 "L\u00fcbeck<"
388 "lye<"
389 "l\u00e4vi<"
390 "L\u00f6wen<"
391 "m\u00e0\u0161ta<"
392 "m\u00eer<"
393 "myndig<"
394 "M\u00e4nner<"
395 "m\u00f6chten<"
396 "pi\u00f1a<"
397 "pint<"
398 "pylon<"
399 "\u0161\u00e0ran<"
400 "savoir<"
401 "\u0160erb\u016bra<"
402 "Sietla<"
403 "\u015blub<"
404 "subtle<"
405 "symbol<"
406 "s\u00e4mtlich<"
407 "verkehrt<"
408 "vox<"
409 "v\u00e4ga<"
410 "waffle<"
411 "wood<"
412 "yen<"
413 "yuan<"
414 "yucca<"
415 "\u017eal<"
416 "\u017eena<"
417 "\u017den\u0113va<"
418 "zoo0<"
419 "Zviedrija<"
420 "Z\u00fcrich<"
421 "zysk0<"
422 "\u00e4ndere"
423 }
424 }
425 hi_TestNewRules {
426 Info {
427 Description { "This test goes through new rules and tests against old rules" }
428 }
429 Settings {
430 {
431 TestLocale { "hi" }
432 }
433 }
434 Cases {
435 "ॐ<।<॥<॰<०<१<२<३"
436 "<४<५<६<७<८<९<अ<आ"
437 "<इ<ई<उ<ऊ<ऋ<ॠ<ऌ<ॡ"
438 "<ऍ<ऎ<ए<ऐ<ऑ<ऒ<ओ<औ"
439 "<क<क़=क़<कँ<कं<कः<क॑<क॒"
440 "<क॓<क॔<कऽ<क्<का<कि<की<कु"
441 "<कू<कृ<कॄ<कॢ<कॣ<कॅ<कॆ<के"
442 "<कै<कॉ<कॊ<को<कौ<ख<ख़ =ख़<खँ<खं<खः"
443 "<ख॑<ख॒<ख॓<ख॔<खऽ<ख्<खा<खि"
444 "<खी<खु<खू<खृ<खॄ<खॢ<खॣ<खॅ"
445 "<खॆ<खे<खै<खॉ<खॊ<खो<खौ<ग"
446 "<ग़=ग़<गँ<गं<गः<ग॑<ग॒<ग॓<ग॔"
447 "<गऽ<ग्<गा<गि<गी<गु<गू<गृ"
448 "<गॄ<गॢ<गॣ<गॅ<गॆ<गे<गै<गॉ"
449 "<गॊ<गो<गौ<घ<ङ<च<छ<ज<ज़ =ज़<जँ<जं<जः"
450 "<ज॑<ज॒<ज॓<ज॔<जऽ<ज्<जा<जि"
451 "<जी<जु<जू<जृ<जॄ<जॢ<जॣ<जॅ"
452 "<जॆ<जे<जै<जॉ<जॊ<जो<जौ<झ"
453 "<ञ<ट<ठ<ड<ड़=ड़<डँ<डं<डः<ड॑<ड॒<ड॓<ड॔"
454 "<डऽ<ड्<डा<डि<डी<डु<डू<डृ"
455 "<डॄ<डॢ<डॣ<डॅ<डॆ<डे<डै<डॉ"
456 "<डॊ<डो<डौ<ढ<ढ़=ढ़<ढँ<ढं<ढः"
457 "<ढ॑<ढ॒<ढ॓<ढ॔<ढऽ<ढ्<ढा<ढि"
458 "<ढी<ढु<ढू<ढृ<ढॄ<ढॢ<ढॣ<ढॅ"
459 "<ढॆ<ढे<ढै<ढॉ<ढॊ<ढो<ढौ<ण"
460 "<त<थ<द<ध<न<ऩ =ऩ< नँ<नं< नः"
461 "<न॑<न॒<न॓<न॔<नऽ<न्<ना<नि"
462 "<नी<नु<नू<नृ<नॄ<नॢ<नॣ<नॅ"
463 "<नॆ<ने<नै<नॉ<नॊ<नो<नौ"
464 "<प<फ<फ़=फ़<फँ<फं<फः<फ॑<फ॒"
465 "<फ॓<फ॔<फऽ<फ्<फा<फि<फी<फु<फू<फृ"
466 "<फॄ<फॢ<फॣ<फॅ<फॆ<फे<फै<फॉ"
467 "<फॊ<फो<फौ<ब<भ<म<य<य़=य़ "
468 "<यँ<यं<यः<य॑<य॒<य॓<य॔"
469 "<यऽ<य्<या<यि<यी<यु<यू<यृ"
470 "<यॄ<यॢ<यॣ<यॅ<यॆ<ये<यै<यॉ"
471 "<यॊ<यो<यौ<र<ऱ=ऱ<रँ<रं<रः"
472 "<र॑<र॒<र॓<र॔<रऽ<र्<रा<रि"
473 "<री<रु<रू<रृ<रॄ<रॢ<रॣ<रॅ"
474 "<रॆ<रे<रै<रॉ<रॊ<रो<रौ"
475 "<ल<ळ<ऴ=ऴ<ळँ<ळं<ळः<ळ॑<ळ॒"
476 "<ळ॓<ळ॔<ळऽ<ळ्<ळा<ळि<ळी<ळु"
477 "<ळू<ळृ<ळॄ<ळॢ<ळॣ<ळॅ<ळॆ<ळे"
478 "<ळै<ळॉ<ळॊ<ळो<ळौ<व<श<ष<स<ह"
479 "<़<ँ<ं<ः<॑<॒<॓<॔<ऽ<्<ा<ि<ी"
480 "<ु<ू<ृ<ॄ<ॢ<ॣ<ॅ<ॆ"
481 "<े<ै<ॉ<ॊ<ो<ौ"
482 }
483 }
46f4442e
A
484// fi_TestNewRules {
485// Info {
486// Description { "This test goes through new rules and tests against old rules" }
487// }
488// Settings {
489// {
490// TestLocale { "fi" }
491// }
492// }
493// Cases {
494// "xa<xA<Xa<XA<xá<Xá<xax<xAx<xáx<xd<Xd<xð<xÐ<Xð<XÐ<xđ<xĐ<Xđ<XĐ<"
495// "xdx<xðx<xÐx<xđx<xĐx<xe<Xe<xex<xn<Xn<xŋ<xŊ<Xŋ<XŊ<xnx<xŋx<xŊx<"
496// "xo<Xo<xó<Xó<xox<xóx<xs<Xs<xß<Xß<xßx<xsx<xt<Xt<xþ<xÞ<Xþ<XÞ<xþx<"
497// "xÞx<xtx<xu<Xu<xú<Xú<xux<xúx<xv<Xv<xw<Xw<xvx<xwx<xy<Xy<xü<Xü<"
498// "xű<Xű<xyx<xüx<xűx<xz<Xz<xzx<xå<Xå<xåx<xä<Xä<xæ<xÆ<Xæ<XÆ<xäx<"
499// "xæx<xÆx<xö<Xö<xø<Xø<xő<Xő<xõ<Xõ<xœ<xŒ<Xœ<XŒ<xöx<xøx<xőx<xõx<xœx<xŒx"
500//}
501//}
502 ro_TestNewRules {
73c04bcf
A
503 Info {
504 Description { "This test goes through new rules and tests against old rules" }
505 }
506 Settings {
507 {
46f4442e 508 TestLocale { "ro" }
73c04bcf
A
509 }
510 }
511 Cases {
46f4442e
A
512 "xAx<xă<xĂ<Xă<XĂ<xăx<xĂx<xâ<xÂ<Xâ<XÂ<xâx<xÂx<xb<xIx<xî<xÎ<Xî<XÎ<xîx<xÎx<"
513 "xj<xSx<xș=xş<xȘ=xŞ<Xș=Xş<XȘ=XŞ<xșx=xşx<xȘx=xŞx<xT<xTx<xț=xţ<xȚ=xŢ<Xț=Xţ<XȚ"
514 "=XŢ<xțx=xţx<xȚx=xŢx<xU"
73c04bcf
A
515 }
516 }
46f4442e
A
517
518 testOffsets {
73c04bcf 519 Info {
46f4442e 520 Description { "This tests cases where forwards and backwards iteration get different offsets" }
73c04bcf 521 }
46f4442e 522
73c04bcf
A
523 Settings {
524 {
46f4442e
A
525 TestLocale { "en" }
526 Arguments { "[strength 3]" }
73c04bcf
A
527 }
528 }
46f4442e
A
529
530 Cases {
531 "a\uD800\uDC00\uDC00<b\uD800\uDC00\uDC00",
532 "\u0301A\u0301\u0301<\u0301B\u0301\u0301",
533 "abcd\r\u0301<abce\r\u0301"
73c04bcf 534 }
46f4442e 535 }
73c04bcf 536 }
46f4442e 537}