]> git.saurik.com Git - apple/icu.git/blame - icuSources/test/testdata/DataDrivenCollationTest.txt
ICU-6.2.22.tar.gz
[apple/icu.git] / icuSources / test / testdata / DataDrivenCollationTest.txt
CommitLineData
374ca955
A
1// Copyright (c) 2001-2004 International Business Machines\r
2// Corporation and others. All Rights Reserved.\r
3DataDrivenCollationTest {\r
4 Info {\r
5 Headers { "sequence" }\r
6 Description { "These are the data driven tests" }\r
7 LongDescription { "The following entries are separate tests containing test data for various locales."\r
8 "Each entry has the following fields: "\r
9 "Info/Description - short descrioption of the test"\r
10 "Settings - settings for the test."\r
11 "Settings/TestLocale - locale for the collator OR"\r
12 "Settings/Rules - rules for the collator (can't have both)"\r
13 "Settings/Arguments - arguments to be passed to the collator before testing. Use rule syntax."\r
14 "Cases - set of test cases, which are sequences of strings that will be parsed"\r
15 "Sequences must not change the sign of relation, i.e. we can only have < and = or"\r
16 "> and = in single sequence. Cannot mix < and > in the same sequence. Whitespace is"\r
17 "is ignored unless quoted."\r
18 }\r
19 }\r
20 TestData {\r
21 TestLithuanian {\r
22 Info {\r
23 Description { "Lithuanian sort order." }\r
24 }\r
25 Settings {\r
26 {\r
27 TestLocale { "lt" }\r
28 }\r
29 }\r
30 Cases { "cz<č<d<iz<y<j<sz<š<t<zz<ž" } \r
31 }\r
32 TestLatvian {\r
33 Info {\r
34 Description { "Latvian sort order." }\r
35 }\r
36 Settings {\r
37 {\r
38 TestLocale { "lv" }\r
39 }\r
40 }\r
41 Cases { "cz<č<d<gz<ģ<h<iz<y<j<kz<ķ<l<lz<ļ<m<nz<ņ<o<rz<ŗ<s<sz<š<t<zz<ž" }\r
42 }\r
43 TestEstonian {\r
44 Info {\r
45 Description { "Estonian sort order." }\r
46 }\r
47 Settings {\r
48 {\r
49 TestLocale { "et" }\r
50 }\r
51 }\r
52 Cases { "sy<š<šy<z<zy<ž<v<w<va<õ<õy<ä<äy<ö<öy<ü<üy<x" }\r
53 }\r
54 TestAlbanian {\r
55 Info {\r
56 Description { "Albanian sort order." }\r
57 }\r
58 Settings {\r
59 {\r
60 TestLocale { "sq" }\r
61 }\r
62 }\r
63 Cases { "cz<ç<d<dz<dh<e<ez<ë<f<gz<gj<h<lz<ll<m<nz<nj<o<rz<rr<s<sz<sh<t<tz<th<u<xz<xh<y<zz<zh" }\r
64 }\r
65 \r
66 TestSimplifiedChineseOrder {\r
67 Info {\r
68 Description { "Sorted file has different order." }\r
69 }\r
70 Settings {\r
71 {\r
72 TestLocale { "root" }\r
73 Arguments { "[normalization on]" }\r
74 }\r
75 }\r
76\r
77 Cases { "\u5F20<\u5F20\u4E00\u8E3F" }\r
78 }\r
79 \r
80 TestTibetanNormalizedIterativeCrash {\r
81 Info {\r
82 Description { "This pretty much crashes." }\r
83 }\r
84 Settings {\r
85 {\r
86 TestLocale { "root" }\r
87 }\r
88 }\r
89\r
90 Cases { "\u0f71\u0f72\u0f80\u0f71\u0f72"\r
91 "<\u0f80"\r
92 }\r
93 }\r
94 TestThaiPartialSortKeyProblems {\r
95 Info {\r
96 Description { "These are examples of strings that caused trouble in partial sort key testing." }\r
97 }\r
98 Settings {\r
99 {\r
100 TestLocale { "th_TH" }\r
101 }\r
102 }\r
103 // TODO: the tests that are commented out should be enabled when j2720 is fixed\r
104 Cases { "\u0E01\u0E01\u0E38\u0E18\u0E20\u0E31\u0E13\u0E11\u0E4C"\r
105 "<\u0E01\u0E01\u0E38\u0E2A\u0E31\u0E19\u0E42\u0E18",\r
106 "\u0E01\u0E07\u0E01\u0E32\u0E23"\r
107 "<\u0E01\u0E07\u0E42\u0E01\u0E49",\r
108 "\u0E01\u0E23\u0E19\u0E17\u0E32"\r
109 "<\u0E01\u0E23\u0E19\u0E19\u0E40\u0E0A\u0E49\u0E32",\r
110 "\u0E01\u0E23\u0E30\u0E40\u0E08\u0E35\u0E22\u0E27"\r
111 "<\u0E01\u0E23\u0E30\u0E40\u0E08\u0E35\u0E4A\u0E22\u0E27",\r
112 "\u0E01\u0E23\u0E23\u0E40\u0E0A\u0E2D"\r
113 "<\u0E01\u0E23\u0E23\u0E40\u0E0A\u0E49\u0E32"\r
114 }\r
115 }\r
116 TestJavaStyleRule {\r
117 Info {\r
118 Description { "java.text allows rules to start as '<<<x<<<y...' "\r
119 "we emulate this by assuming a &[first tertiary ignorable] "\r
120 "in this case."\r
121 }\r
122 }\r
123 Settings {\r
124 {\r
125 Rules { "=equal<<<z<<x<<<w<y &[first tertiary ignorable]=a &[first primary ignorable]=b" }\r
126 }\r
127 }\r
128 Cases { "a = equal < z < x < w < b < y" }\r
129 }\r
130 TestShiftedIgnorable {\r
131 Info {\r
132 Description { "New UCA states that primary ignorables should be completely "\r
133 "ignorable when following a shifted code point."\r
134 }\r
135 }\r
136 Settings {\r
137 {\r
138 TestLocale { "root" }\r
139 Arguments { "[alternate shifted][strength 4]" }\r
140 }\r
141 }\r
142 Cases {\r
143 "a' 'b="\r
144 "a' '\u0300b="\r
145 "a' '\u0301b<"\r
146 "a_b="\r
147 "a_\u0300b="\r
148 "a_\u0301b<"\r
149 "A' 'b="\r
150 "A' '\u0300b="\r
151 "A' '\u0301b<"\r
152 "A_b="\r
153 "A_\u0300b="\r
154 "A_\u0301b<"\r
155 "a\u0301b<"\r
156 "A\u0301b<"\r
157 "a\u0300b<"\r
158 "A\u0300b"\r
159\r
160 }\r
161 }\r
162\r
163 TestNShiftedIgnorable {\r
164 Info {\r
165 Description { "New UCA states that primary ignorables should be completely "\r
166 "ignorable when following a shifted code point."\r
167 }\r
168 }\r
169 Settings {\r
170 {\r
171 TestLocale { "root" }\r
172 Arguments { "[alternate non-ignorable][strength 3]" }\r
173 }\r
174 }\r
175 Cases {\r
176 "a' 'b<"\r
177 "A' 'b<"\r
178 "a' '\u0301b<"\r
179 "A' '\u0301b<"\r
180 "a' '\u0300b<"\r
181 "A' '\u0300b<"\r
182 "a_b<"\r
183 "A_b<"\r
184 "a_\u0301b<"\r
185 "A_\u0301b<"\r
186 "a_\u0300b<"\r
187 "A_\u0300b<"\r
188 "a\u0301b<"\r
189 "A\u0301b<"\r
190 "a\u0300b<"\r
191 "A\u0300b<"\r
192 }\r
193 }\r
194\r
195 TestSafeSurrogates {\r
196 Info {\r
197 Description { "It turned out that surrogates were not skipped properly "\r
198 "when iterating backwards if they were in the middle of a "\r
199 "contraction. This test assures that this is fixed."\r
200 }\r
201 }\r
202 Settings {\r
203 {\r
204 Rules {\r
205 "&a < x\ud800\udc00b"\r
206 }\r
207 }\r
208 }\r
209 Cases {\r
210 "a<x\ud800\udc00b"\r
211 }\r
212 }\r
213 \r
214 TestCIgnorableContraction {\r
215 Info {\r
216 Description { "Checks whether completely ignorable code points are "\r
217 "skipped in contractions."\r
218 }\r
219 }\r
220 Settings {\r
221 {\r
222 TestLocale { "sh" }\r
223 }\r
224 {\r
225 Rules {\r
226 "& L < lj, Lj <<< LJ"\r
227 "& N < nj, Nj <<< NJ "\r
228 }\r
229 }\r
230 }\r
231 Cases {\r
232 "njiva=n\ud834\udd65jiva=n\uD834\uDD79jiva=n\u0000\u0000\u0000jiva=n\u0000jiva=n\ud800jiva=n\ufffejiva",\r
233 "ljubav=l\u0000jubav=l\uD834\uDD79jubav=l\u0000\u0000\u0000jubav=l\ud800jubav=l\ufffejubav",\r
234 "Ljubav=L\u0000jubav=L\uD834\uDD79jubav=L\u0000\u0000\u0000jubav=L\ud800jubav=L\ufffejubav",\r
235 }\r
236 }\r
237 \r
238\r
239 TestCIgnorablePrefix {\r
240 Info {\r
241 Description { "Checks whether completely ignorable code points are "\r
242 "skipped in prefix processing."\r
243 }\r
244 }\r
245 Settings {\r
246 {\r
247 TestLocale { "ja" }\r
248 }\r
249 }\r
250 Cases {\r
251 "\u30A1\u30FC"\r
252 "= \u30A1\uDB40\uDC30\u30FC"\r
253 "= \u30A1\uD800\u30FC"\r
254 "= \u30A1\uFFFE\u30FC"\r
255 "= \u30A1\uD834\uDD79\u30FC"\r
256 "= \u30A1\u0000\u0000\u0000\u30FC"\r
257 "= \u30A1\u0000\u30FC"\r
258 "= \u30A1\u30FC"\r
259 "= \u30A1\u0000\u059a\u30FC"\r
260 "= \u30A1\u30FC"\r
261 }\r
262 } \r
263 da_TestPrimary {\r
264 Info {\r
265 Description { "This test goes through primary strength cases" }\r
266 }\r
267 Settings {\r
268 {\r
269 TestLocale { "da" }\r
270 Arguments { "[strength 1]" }\r
271 }\r
272 }\r
273 Cases {\r
274 "Lvi<Lwi",\r
275 "L\u00e4vi<L\u00f6wi",\r
276 "L\u00fcbeck=Lybeck",\r
277 }\r
278 }\r
279 da_TestTertiary {\r
280 Info {\r
281 Description { "This test goes through tertiary strength cases" }\r
282 }\r
283 Settings {\r
284 {\r
285 TestLocale { "da" }\r
286 Arguments { "[strength 3]" }\r
287 }\r
288 }\r
289 Cases {\r
290 "Luc<luck",\r
291 "luck<L\u00fcbeck",\r
292 "L\u00fcbeck>lybeck",\r
293 "L\u00e4vi<L\u00f6we",\r
294 "L\u00f6ww<mast",\r
295 // constUCharCollationDanishTest::testBugs[][CollationDanishTest::MAX_TOKEN_LEN]="\r
296 "A/S<"\r
297 "ANDRE<"\r
298 "ANDR\u00c9<"\r
299 "ANDREAS<"\r
300 "AS<"\r
301 "CA<"\r
302 "\u00c7A<"\r
303 "CB<"\r
304 "\u00c7C<"\r
305 "D.S.B.<"\r
306 "DA<"\r
307 "\u00d0A<"\r
308 "DB<"\r
309 "\u00d0C<"\r
310 "DSB<"\r
311 "DSC<"\r
312 "EKSTRA_ARBEJDE<"\r
313 "EKSTRABUD0<"\r
314 "H\u00d8ST<"\r
315 "HAAG<"\r
316 "H\u00c5NDBOG<"\r
317 "HAANDV\u00c6RKSBANKEN<"\r
318 "Karl<"\r
319 "karl<"\r
320 "'NIELS J\u00d8RGEN'<"\r
321 "NIELS-J\u00d8RGEN<"\r
322 "NIELSEN<"\r
323 "'R\u00c9E, A'<"\r
324 "'REE, B'<"\r
325 "'R\u00c9E, L'<"\r
326 "'REE, V'<"\r
327 "'SCHYTT, B'<"\r
328 "'SCHYTT, H'<"\r
329 "'SCH\u00dcTT, H'<"\r
330 "'SCHYTT, L'<"\r
331 "'SCH\u00dcTT, M'<"\r
332 "SS<"\r
333 "\u00df<"\r
334 "SSA<"\r
335 "'STORE VILDMOSE'<"\r
336 "STOREK\u00c6R0<"\r
337 "'STORM PETERSEN'<"\r
338 "STORMLY<"\r
339 "THORVALD<"\r
340 "THORVARDUR<"\r
341 "\u00feORVAR\u00d0UR<"\r
342 "THYGESEN<"\r
343 "'VESTERG\u00c5RD, A'<"\r
344 "'VESTERGAARD, A'<"\r
345 "'VESTERG\u00c5RD, B'<"\r
346 "\u00c6BLE<"\r
347 "\u00c4BLE<"\r
348 "\u00d8BERG<"\r
349 "\u00d6BERG",\r
350\r
351 // constUCharCollationDanishTest::testNTList[][CollationDanishTest::MAX_TOKEN_LEN]="\r
352 "andere<"\r
353 "chaque<"\r
354 "chemin<"\r
355 "cote<"\r
356 "cot\u00e9<"\r
357 "c\u00f4te<"\r
358 "c\u00f4t\u00e9<"\r
359 "\u010du\u010d\u0113t<"\r
360 "Czech<"\r
361 "hi\u0161a<"\r
362 "irdisch<"\r
363 "lie<"\r
364 "lire<"\r
365 "llama<"\r
366 "l\u00f5ug<"\r
367 "l\u00f2za<"\r
368 "lu\u010d<"\r
369 "luck<"\r
370 "L\u00fcbeck<"\r
371 "lye<"\r
372 "l\u00e4vi<"\r
373 "L\u00f6wen<"\r
374 "m\u00e0\u0161ta<"\r
375 "m\u00eer<"\r
376 "myndig<"\r
377 "M\u00e4nner<"\r
378 "m\u00f6chten<"\r
379 "pi\u00f1a<"\r
380 "pint<"\r
381 "pylon<"\r
382 "\u0161\u00e0ran<"\r
383 "savoir<"\r
384 "\u0160erb\u016bra<"\r
385 "Sietla<"\r
386 "\u015blub<"\r
387 "subtle<"\r
388 "symbol<"\r
389 "s\u00e4mtlich<"\r
390 "verkehrt<"\r
391 "vox<"\r
392 "v\u00e4ga<"\r
393 "waffle<"\r
394 "wood<"\r
395 "yen<"\r
396 "yuan<"\r
397 "yucca<"\r
398 "\u017eal<"\r
399 "\u017eena<"\r
400 "\u017den\u0113va<"\r
401 "zoo0<"\r
402 "Zviedrija<"\r
403 "Z\u00fcrich<"\r
404 "zysk0<"\r
405 "\u00e4ndere"\r
406 }\r
407 }\r
408 hi_TestNewRules {\r
409 Info {\r
410 Description { "This test goes through new rules and tests against old rules" }\r
411 }\r
412 Settings {\r
413 {\r
414 TestLocale { "hi" }\r
415 }\r
416 }\r
417 Cases {\r
418 "ॐ<।<॥<॰<०<१<२<३"\r
419 "<४<५<६<७<८<९<अ<आ"\r
420 "<इ<ई<उ<ऊ<ऋ<ॠ<ऌ<ॡ"\r
421 "<ऍ<ऎ<ए<ऐ<ऑ<ऒ<ओ<औ"\r
422 "<क<क़=क़<कँ<कं<कः<क॑<क॒"\r
423 "<क॓<क॔<कऽ<क्<का<कि<की<कु"\r
424 "<कू<कृ<कॄ<कॢ<कॣ<कॅ<कॆ<के"\r
425 "<कै<कॉ<कॊ<को<कौ<ख<ख़ =ख़<खँ<खं<खः"\r
426 "<ख॑<ख॒<ख॓<ख॔<खऽ<ख्<खा<खि"\r
427 "<खी<खु<खू<खृ<खॄ<खॢ<खॣ<खॅ"\r
428 "<खॆ<खे<खै<खॉ<खॊ<खो<खौ<ग"\r
429 "<ग़=ग़<गँ<गं<गः<ग॑<ग॒<ग॓<ग॔"\r
430 "<गऽ<ग्<गा<गि<गी<गु<गू<गृ"\r
431 "<गॄ<गॢ<गॣ<गॅ<गॆ<गे<गै<गॉ"\r
432 "<गॊ<गो<गौ<घ<ङ<च<छ<ज<ज़ =ज़<जँ<जं<जः"\r
433 "<ज॑<ज॒<ज॓<ज॔<जऽ<ज्<जा<जि"\r
434 "<जी<जु<जू<जृ<जॄ<जॢ<जॣ<जॅ"\r
435 "<जॆ<जे<जै<जॉ<जॊ<जो<जौ<झ"\r
436 "<ञ<ट<ठ<ड<ड़=ड़<डँ<डं<डः<ड॑<ड॒<ड॓<ड॔"\r
437 "<डऽ<ड्<डा<डि<डी<डु<डू<डृ"\r
438 "<डॄ<डॢ<डॣ<डॅ<डॆ<डे<डै<डॉ"\r
439 "<डॊ<डो<डौ<ढ<ढ़=ढ़<ढँ<ढं<ढः"\r
440 "<ढ॑<ढ॒<ढ॓<ढ॔<ढऽ<ढ्<ढा<ढि"\r
441 "<ढी<ढु<ढू<ढृ<ढॄ<ढॢ<ढॣ<ढॅ"\r
442 "<ढॆ<ढे<ढै<ढॉ<ढॊ<ढो<ढौ<ण"\r
443 "<त<थ<द<ध<न<ऩ =ऩ< नँ<नं< नः"\r
444 "<न॑<न॒<न॓<न॔<नऽ<न्<ना<नि"\r
445 "<नी<नु<नू<नृ<नॄ<नॢ<नॣ<नॅ"\r
446 "<नॆ<ने<नै<नॉ<नॊ<नो<नौ"\r
447 "<प<फ<फ़=फ़<फँ<फं<फः<फ॑<फ॒"\r
448 "<फ॓<फ॔<फऽ<फ्<फा<फि<फी<फु<फू<फृ"\r
449 "<फॄ<फॢ<फॣ<फॅ<फॆ<फे<फै<फॉ"\r
450 "<फॊ<फो<फौ<ब<भ<म<य<य़=य़ "\r
451 "<यँ<यं<यः<य॑<य॒<य॓<य॔"\r
452 "<यऽ<य्<या<यि<यी<यु<यू<यृ"\r
453 "<यॄ<यॢ<यॣ<यॅ<यॆ<ये<यै<यॉ"\r
454 "<यॊ<यो<यौ<र<ऱ=ऱ<रँ<रं<रः"\r
455 "<र॑<र॒<र॓<र॔<रऽ<र्<रा<रि"\r
456 "<री<रु<रू<रृ<रॄ<रॢ<रॣ<रॅ"\r
457 "<रॆ<रे<रै<रॉ<रॊ<रो<रौ"\r
458 "<ल<ळ<ऴ=ऴ<ळँ<ळं<ळः<ळ॑<ळ॒"\r
459 "<ळ॓<ळ॔<ळऽ<ळ्<ळा<ळि<ळी<ळु"\r
460 "<ळू<ळृ<ळॄ<ळॢ<ळॣ<ळॅ<ळॆ<ळे"\r
461 "<ळै<ळॉ<ळॊ<ळो<ळौ<व<श<ष<स<ह"\r
462 "<़<ँ<ं<ः<॑<॒<॓<॔<ऽ<्<ा<ि<ी"\r
463 "<ु<ू<ृ<ॄ<ॢ<ॣ<ॅ<ॆ"\r
464 "<े<ै<ॉ<ॊ<ो<ौ"\r
465 }\r
466 }\r
467 fi_TestNewRules {\r
468 Info {\r
469 Description { "This test goes through new rules and tests against old rules" }\r
470 }\r
471 Settings {\r
472 {\r
473 TestLocale { "fi" }\r
474 }\r
475 }\r
476 Cases { \r
477 "xa<xA<Xa<XA<xá<Xá<xax<xAx<xáx<xd<Xd<xð<xÐ<Xð<XÐ<xđ<xĐ<Xđ<XĐ<"\r
478 "xdx<xðx<xÐx<xđx<xĐx<xe<Xe<xex<xn<Xn<xŋ<xŊ<Xŋ<XŊ<xnx<xŋx<xŊx<"\r
479 "xo<Xo<xó<Xó<xox<xóx<xs<Xs<xß<Xß<xßx<xsx<xt<Xt<xþ<xÞ<Xþ<XÞ<xþx<"\r
480 "xÞx<xtx<xu<Xu<xú<Xú<xux<xúx<xv<Xv<xw<Xw<xvx<xwx<xy<Xy<xü<Xü<"\r
481 "xű<Xű<xyx<xüx<xűx<xz<Xz<xzx<xå<Xå<xåx<xä<Xä<xæ<xÆ<Xæ<XÆ<xäx<"\r
482 "xæx<xÆx<xö<Xö<xø<Xø<xő<Xő<xõ<Xõ<xœ<xŒ<Xœ<XŒ<xöx<xøx<xőx<xõx<xœx<xŒx"\r
483 }\r
484 }\r
485 ro_TestNewRules {\r
486 Info {\r
487 Description { "This test goes through new rules and tests against old rules" }\r
488 }\r
489 Settings {\r
490 {\r
491 TestLocale { "ro" }\r
492 }\r
493 }\r
494 Cases { \r
495 "xAx<xă<xĂ<Xă<XĂ<xăx<xĂx<xâ<xÂ<Xâ<XÂ<xâx<xÂx<xb<xIx<xî<xÎ<Xî<XÎ<xîx<xÎx<"\r
496 "xj<xSx<xș=xş<xȘ=xŞ<Xș=Xş<XȘ=XŞ<xșx=xşx<xȘx=xŞx<xT<xTx<xț=xţ<xȚ=xŢ<Xț=Xţ<XȚ"\r
497 "=XŢ<xțx=xţx<xȚx=xŢx<xU"\r
498 }\r
499 } \r
500 }\r
b75a7d8f 501}