]>
Commit | Line | Data |
---|---|---|
46f4442e A |
1 | <?xml version="1.0" encoding="UTF-8"?> |
2 | ||
f3c0d7a5 | 3 | <!-- Copyright (C) 2016 and later: Unicode, Inc. and others. License & terms of use: http://www.unicode.org/copyright.html --> |
729e4ab9 | 4 | <!-- Copyright (c) 2007-2009 IBM Corporation and others. All rights reserved --> |
46f4442e A |
5 | |
6 | <!-- Test data file for string search --> | |
7 | <!DOCTYPE stringsearch-tests [ | |
8 | <!ELEMENT stringsearch-tests (test-case+)> | |
9 | <!ATTLIST stringsearch-tests debug IDREF #IMPLIED > | |
10 | <!ELEMENT test-case (pattern, pre?, m?, post?)> | |
11 | <!ATTLIST test-case | |
12 | id ID #REQUIRED | |
13 | locale CDATA "en" | |
14 | strength (PRIMARY | SECONDARY | TERTIARY | QUATERNARY | IDENTICAL) "TERTIARY" | |
15 | norm (ON | OFF) "OFF" | |
729e4ab9 | 16 | alternate_handling (NON_IGNORABLE | SHIFTED) "NON_IGNORABLE" |
46f4442e A |
17 | > |
18 | ||
19 | <!ELEMENT pattern (#PCDATA)> | |
20 | <!ELEMENT pre (#PCDATA)> | |
21 | <!ELEMENT m (#PCDATA)> | |
22 | <!ELEMENT post (#PCDATA)> | |
23 | ]> | |
24 | ||
729e4ab9 | 25 | <stringsearch-tests> |
46f4442e A |
26 | <!-- debug="test11" (for copying into the above element) --> |
27 | ||
28 | <!-- Very simple match --> | |
29 | <test-case id="test01" > | |
30 | <pattern>abc</pattern> | |
31 | <pre>xxx</pre><m>abc</m><post>yyy</post> | |
32 | </test-case> | |
33 | ||
34 | <!-- Very simple no-match --> | |
35 | <test-case id="test02" > | |
36 | <pattern>abc</pattern> | |
37 | <pre>xxx</pre><post>yyy</post> | |
38 | </test-case> | |
39 | ||
40 | <!-- Match after several near-misses. --> | |
41 | <test-case id="test03" > | |
42 | <pattern>string</pattern> | |
43 | <pre>silly spring stling strxng strilg strinx stri</pre><m>string</m><post> fling</post> | |
44 | </test-case> | |
45 | ||
46 | <test-case id="test04" strength="PRIMARY" > | |
47 | <pattern>FUSS</pattern> | |
48 | <pre>abc</pre><m>fuss</m><post>sss</post> | |
49 | </test-case> | |
50 | ||
51 | <test-case id="test05" strength="PRIMARY" > | |
52 | <pattern>FUSS</pattern> | |
53 | <pre>abc</pre><m>fuß</m><post>sss</post> | |
54 | </test-case> | |
55 | ||
56 | <test-case id="test05.5" strength="PRIMARY" > | |
57 | <pattern>fuss</pattern> | |
58 | <pre>a </pre> | |
59 | <m>fuß</m> | |
60 | <post>ball table</post> | |
61 | </test-case> | |
62 | ||
63 | <test-case id="test06" strength="PRIMARY" > | |
64 | <pattern>fuß</pattern> | |
65 | <pre>abc</pre><m>fuss</m><post>xyz</post> | |
66 | </test-case> | |
67 | ||
68 | <test-case id="test07" strength="SECONDARY" > | |
69 | <pattern>fuß</pattern> | |
70 | <pre>abcfussxyz</pre> | |
71 | </test-case> | |
72 | ||
73 | <test-case id="test08" strength="PRIMARY" > | |
74 | <pattern>fus</pattern> | |
75 | <pre>abcfuß</pre><post>xyz</post> | |
76 | </test-case> | |
77 | ||
78 | <!-- A good match following an initial match that failed because | |
79 | of not ending on a character boundary --> | |
80 | <test-case id="test09" strength="PRIMARY"> | |
81 | <pattern>fus</pattern> | |
82 | <pre>fuß </pre><m>fus</m><post>sss</post> | |
83 | </test-case> | |
84 | ||
85 | ||
86 | <!-- Test cases from usrchdat.c BREAKITERATOREXACT --> | |
87 | ||
88 | <test-case id="test10" strength="TERTIARY"> | |
89 | <pattern>fox</pattern> | |
90 | <m>fox</m><post>y fox</post> | |
91 | </test-case> | |
92 | ||
93 | <test-case id="test11" strength="PRIMARY" locale="de_DE@collation=phonebook"> | |
94 | <pattern>toe</pattern> | |
95 | <pre>This is a </pre><m>Tö</m><post>ne</post> | |
96 | </test-case> | |
97 | ||
98 | <test-case id="test11a" strength="SECONDARY" locale="de_DE@collation=phonebook"> | |
99 | <pattern>toe</pattern> | |
100 | <pre>This is a </pre><post>Töne</post> | |
101 | </test-case> | |
102 | ||
103 | <test-case id="test12" strength="TERTIARY"> | |
104 | <pattern>e</pattern> | |
105 | <pre>tésting that é doés not match </pre><m>e</m><post></post> | |
106 | </test-case> | |
107 | ||
108 | <test-case id="test13" strength="PRIMARY" locale="fr"> | |
109 | <pattern>e</pattern> | |
110 | <pre></pre><m>É</m><post>É</post> | |
111 | </test-case> | |
112 | ||
113 | <test-case id="test14" strength="PRIMARY" locale="fr"> | |
114 | <pattern>O</pattern> | |
115 | <pre>C</pre><m>O\u0302</m><post>TÉ</post> | |
116 | </test-case> | |
117 | ||
118 | ||
119 | <!-- Test cases from usrchdat.c STRENGTH --> | |
120 | ||
121 | ||
122 | <test-case id="test15" strength="PRIMARY" locale="en"> | |
123 | <pattern>fox</pattern> | |
124 | <pre>The quick brown </pre><m>fox</m><post> jumps over the lazy foxes</post> | |
125 | </test-case> | |
126 | ||
127 | <test-case id="test16" strength="PRIMARY" locale="fr"> | |
128 | <pattern>peche</pattern> | |
129 | <pre>blackbirds pat </pre><m>p\u00E9ch\u00E9</m><post> </post> | |
130 | </test-case> | |
131 | ||
132 | <test-case id="test17" strength="PRIMARY" locale="fr"> | |
133 | <pattern>peche</pattern> | |
134 | <pre>blackbirds pat </pre><m>p\u00EAche</m><post> </post> | |
135 | </test-case> | |
136 | ||
137 | <test-case id="test18" strength="PRIMARY" locale="fr"> | |
138 | <pattern>peche</pattern> | |
139 | <pre>blackbirds pat </pre><m>p\u00E9che</m><post>r </post> | |
140 | </test-case> | |
141 | ||
142 | <test-case id="test19" strength="PRIMARY" locale="fr"> | |
143 | <pattern>peche</pattern> | |
144 | <pre>blackbirds pat </pre><m>p\u00EAche</m><post>r </post> | |
145 | </test-case> | |
146 | ||
147 | <test-case id="test20" strength="PRIMARY" locale="es"> | |
148 | <pattern>channel</pattern> | |
149 | <pre>A </pre><m>channel</m><post>, </post> | |
150 | </test-case> | |
151 | ||
152 | <test-case id="test21" strength="PRIMARY" locale="es"> | |
153 | <pattern>channel</pattern> | |
154 | <pre>A </pre><m>CHANNEL</m><post>, </post> | |
155 | </test-case> | |
156 | ||
157 | <test-case id="test22" strength="PRIMARY" locale="es"> | |
158 | <pattern>channel</pattern> | |
159 | <pre>A </pre><m>Channel</m><post>s, </post> | |
160 | </test-case> | |
161 | ||
162 | <test-case id="test23" strength="PRIMARY" locale="es"> | |
163 | <pattern>channel</pattern> | |
164 | <pre>A </pre><m>channel</m><post>... </post> | |
165 | </test-case> | |
166 | ||
167 | <test-case id="test24" strength="TERTIARY" locale="en"> | |
168 | <pattern>A\u0300</pattern> | |
169 | <pre>A miss, and then </pre><m>\u00c0</m><post> should match but not A"</post> | |
170 | </test-case> | |
171 | ||
172 | <!-- TODO: In the original test data, this test matched at IDENTICAL strength. | |
173 | Doesn't seem right. The characters are different. | |
174 | --> | |
175 | <test-case id="test24a" strength="IDENTICAL" locale="en"> | |
176 | <pattern>A\u0300</pattern> | |
177 | <pre>At IDENTICAL, shoud this match? </pre><m>\u00c0</m><post></post> | |
178 | </test-case> | |
729e4ab9 A |
179 | |
180 | <test-case id="test24b" strength="IDENTICAL" alternate_handling="SHIFTED" locale="en"> | |
181 | <pattern>A\u0300</pattern> | |
182 | <pre>At IDENTICAL, shoud this match? </pre> | |
183 | <m>\u00c0</m> | |
184 | <post></post> | |
185 | </test-case> | |
186 | ||
187 | <test-case id="test25" strength="SECONDARY" locale="en"> | |
46f4442e A |
188 | <pattern>Ű</pattern> |
189 | <pre>12</pre><m>ű</m><post> Ű</post> | |
190 | </test-case> | |
191 | ||
192 | <test-case id="test26" strength="SECONDARY" locale="en"> | |
193 | <pattern>A</pattern> | |
194 | <pre>12</pre><m>a</m><post>...</post> | |
195 | </test-case> | |
196 | ||
197 | ||
198 | <!-- Test Cases from usrchdat.c, VARIABLE --> | |
199 | <test-case id="test27" strength="TERTIARY" locale="en"> | |
200 | <pattern>blackbird</pattern> | |
201 | <pre>black-bird </pre><m>blackbird</m><post>...</post> | |
202 | </test-case> | |
203 | ||
204 | <test-case id="test28" strength="TERTIARY" locale="en"> | |
205 | <pattern>go</pattern> | |
206 | <pre> on</pre> | |
207 | </test-case> | |
208 | ||
209 | <!-- TODO: this gives an U_ILLEGAL_ARGUMENT error when opening | |
210 | the UStringSearch. How did the orignal test run? --> | |
211 | <!-- | |
212 | <test-case id="test29" strength="PRIMARY" locale="en"> | |
213 | <pattern> </pattern> | |
214 | <pre></pre><m></m><post>abc</post> | |
215 | </test-case> | |
216 | --> | |
217 | ||
218 | <test-case id="test30" strength="SECONDARY" locale="en"> | |
219 | <pattern>abc</pattern> | |
220 | <pre> a bc ab c a bc ab c"</pre> | |
221 | </test-case> | |
222 | ||
223 | <test-case id="test31" strength="SECONDARY" locale="en"> | |
224 | <pattern>abc</pattern> | |
225 | <pre> ---------------</pre> | |
226 | </test-case> | |
227 | ||
228 | ||
229 | <!-- Normalization test cases from usrchdat.c --> | |
230 | <test-case id="test32" strength="TERTIARY" norm="ON"> | |
231 | <pattern>a\u0325\u0300</pattern> | |
232 | <pre></pre><m>a\u0300\u0325</m> | |
233 | </test-case> | |
234 | ||
235 | ||
236 | <test-case id="test32a" strength="TERTIARY" norm="OFF"> | |
237 | <pattern>a\u0325\u0300</pattern> | |
238 | <pre>a\u0300\u0325</pre> | |
239 | </test-case> | |
240 | ||
241 | ||
242 | <!-- COMPOSITEBOUNDARIES from usrchdat.c | |
243 | Boundaries are not identical to orignal test data because | |
244 | of matching only full combining sequences | |
245 | --> | |
246 | <test-case id="test40" strength="TERTIARY"> | |
247 | <pattern>A</pattern> | |
248 | <pre>À</pre> <!-- \u00C0 --> | |
249 | </test-case> | |
250 | ||
251 | <test-case id="test41" strength="TERTIARY"> | |
252 | <pattern>A</pattern> | |
253 | <pre>À</pre><m>A</m><post>C</post> | |
254 | </test-case> | |
255 | ||
256 | <test-case id="test42" strength="TERTIARY"> | |
257 | <pattern>A\u030A</pattern> | |
258 | <pre>À\u01FA</pre> | |
259 | </test-case> | |
260 | ||
261 | ||
262 | ||
263 | <!-- SUPPLEMENTARYCANONICAL from usrchdat.c --> | |
264 | <test-case id="test50" strength="TERTIARY"> | |
265 | <pattern>\uD800\uDC00</pattern> | |
266 | <pre>abc \uD802\uDC00 \uD800\uDC01 \uD801\uDC00 </pre><m>\uD800\uDC00</m> | |
267 | <post>abc abc\uD800\uDC00 \uD800\uD800\uDC00 \uD800\uDC00\uDC00</post> | |
268 | </test-case> | |
269 | ||
270 | <test-case id="test51" strength="TERTIARY"> | |
271 | <pattern>\\uD834\\uDDB9</pattern> | |
272 | <pre>and</pre><m>\\uD834\\uDDB9</m><post>this sentence</post> | |
273 | </test-case> | |
274 | ||
275 | <test-case id="test52" strength="TERTIARY"> | |
276 | <pattern> \\uD834\\uDDB9 </pattern> | |
277 | <pre>and</pre><m> \\uD834\\uDDB9 </m><post>this sentence</post> | |
278 | </test-case> | |
279 | ||
280 | <test-case id="test53" strength="TERTIARY"> | |
281 | <pattern>-\\uD834\\uDDB9-</pattern> | |
282 | <pre>and</pre><m>-\\uD834\\uDDB9-</m><post>this sentence</post> | |
283 | </test-case> | |
284 | ||
285 | <test-case id="test54" strength="TERTIARY"> | |
286 | <pattern>,\\uD834\\uDDB9,</pattern> | |
287 | <pre>and</pre><m>,\\uD834\\uDDB9,</m><post>this sentence</post> | |
288 | </test-case> | |
289 | ||
290 | <test-case id="test55" strength="TERTIARY"> | |
291 | <pattern>?\\uD834\\uDDB9?</pattern> | |
292 | <pre>and</pre><m>?\\uD834\\uDDB9?</m><post>this sentence</post> | |
293 | </test-case> | |
294 | ||
295 | ||
296 | <!-- Long combining sequences --> | |
729e4ab9 | 297 | <!-- Backwards search fails because patterns ends w/ ignorables |
46f4442e A |
298 | <test-case id="test60" strength="PRIMARY"> |
299 | <pattern>A\u0301\u0301\u0301\u0301</pattern> | |
300 | <m>A\u0301\u0301\u0301\u0301\u0301</m> | |
301 | </test-case> | |
729e4ab9 A |
302 | --> |
303 | ||
46f4442e A |
304 | <test-case id="test61" strength="TERTIARY"> |
305 | <pattern>A\u0301\u0301\u0301\u0301</pattern> | |
306 | <pre>A\u0301\u0301\u0301\u0301\u0301</pre> | |
307 | </test-case> | |
308 | ||
309 | <test-case id="test62" strength="TERTIARY"> | |
310 | <pattern>A\u0301\u0301\u0301\u0301</pattern> | |
311 | <m>A\u0301\u0301\u0301\u0301</m> | |
312 | </test-case> | |
313 | ||
314 | <!-- stand-alone combining marks don't match attached marks --> | |
315 | <test-case id="test63" strength="TERTIARY"> | |
316 | <pattern>\u0301</pattern> | |
317 | <pre>A\u0301\u0301\u0301\u0301</pre> | |
318 | </test-case> | |
319 | ||
320 | <test-case id="test64" strength="TERTIARY"> | |
321 | <pattern>\u0301</pattern> | |
322 | <post>\u0301\u0301\u0301\u0301</post> | |
323 | </test-case> | |
324 | ||
325 | <!-- stand-alone combining mark does match an un-attached combining mark --> | |
326 | <test-case id="test65" strength="TERTIARY"> | |
327 | <pattern>\u0301</pattern> | |
328 | <m>\u0301</m><post>A\u0301\u0301</post> | |
329 | </test-case> | |
330 | ||
331 | <test-case id="test66" strength="TERTIARY"> | |
332 | <pattern>\u0301</pattern> | |
333 | <m>\u0301</m> | |
334 | </test-case> | |
335 | ||
336 | <!-- stand-alone combining marks at end of the target text --> | |
337 | <test-case id="test67" strength="TERTIARY"> | |
338 | <pattern>\u0301</pattern> | |
339 | <pre>abcd\r</pre><m>\u0301</m> | |
340 | </test-case> | |
341 | ||
342 | <!-- attached combining marks at end of the target text, no match --> | |
343 | <test-case id="test68" strength="TERTIARY"> | |
344 | <pattern>\u0301</pattern> | |
345 | <pre>abcd\u0301</pre> | |
346 | </test-case> | |
347 | ||
348 | ||
349 | ||
350 | <!-- no match within expansions at the start --> | |
351 | <test-case id="test70" strength="PRIMARY"> | |
352 | <pattern>Eligature</pattern> | |
353 | <pre>Æligature</pre> | |
354 | </test-case> | |
355 | ||
356 | <test-case id="test71" strength="PRIMARY"> | |
357 | <pattern>AEligature</pattern> | |
358 | <m>Æligature</m> | |
359 | </test-case> | |
360 | ||
361 | <test-case id="test72" strength="PRIMARY"> | |
362 | <pattern>AEligature</pattern> | |
363 | <m>Æligature</m> | |
364 | </test-case> | |
365 | ||
366 | <!-- unattached combining Tilde will not match a Tilde that is | |
367 | part of a composed Ñ (\u00D1) --> | |
368 | <test-case id="test73" strength="SECONDARY"> | |
369 | <pattern>\u0303</pattern> <!-- combining tilde --> | |
370 | <pre>Ñ
</pre><m>\u0303</m> | |
371 | </test-case> | |
372 | ||
373 | <test-case id="test74" strength="SECONDARY"> | |
374 | <pattern>\u0303</pattern> <!-- combining tilde --> | |
375 | <pre>Ñ 
</pre><m>\u0303</m><post>a</post> | |
376 | </test-case> | |
377 | ||
378 | <test-case id="test75" strength="TERTIARY" locale="fr"> | |
379 | <pattern>\u00EA</pattern> | |
380 | <pre>p</pre><m>\u00EA</m><post>che</post> | |
381 | </test-case> | |
382 | ||
383 | <test-case id="test76" strength="TERTIARY" locale="fr"> | |
384 | <pattern>\u00EA</pattern> | |
385 | <pre>p</pre><m>e\u0302</m><post>che</post> | |
386 | </test-case> | |
387 | ||
388 | <test-case id="test77" strength="TERTIARY" locale="fr"> | |
389 | <pattern>e\u0302</pattern> | |
390 | <pre>p</pre><m>\u00EA</m><post>che</post> | |
391 | </test-case> | |
392 | ||
393 | <!-- Test cases from ticket:5382 --> | |
394 | <test-case id="test78" strength="SECONDARY" locale="hu_HU"> | |
395 | <pattern>\u0170</pattern> | |
396 | <m>\u0171</m> | |
397 | <post>12</post> | |
398 | </test-case> | |
399 | ||
400 | <test-case id="test79" strength="SECONDARY" locale="hu_HU"> | |
401 | <pattern>\u0170</pattern> | |
402 | <pre>1</pre> | |
403 | <m>\u0171</m> | |
404 | <post>2</post> | |
405 | </test-case> | |
406 | ||
407 | <test-case id="test80" strength="SECONDARY" locale="hu_HU"> | |
408 | <pattern>\u0170</pattern> | |
409 | <pre>12</pre> | |
410 | <m>\u0171</m> | |
411 | </test-case> | |
412 | ||
413 | <!-- Test cases from ticket:5959 --> | |
414 | <test-case id="test81" strength="SECONDARY"> | |
415 | <pattern>\u2166</pattern> | |
416 | <m>VII</m> | |
417 | </test-case> | |
418 | ||
419 | <test-case id="test82" strength="SECONDARY"> | |
420 | <pattern>VII</pattern> | |
421 | <m>\u2166</m> | |
422 | </test-case> | |
729e4ab9 A |
423 | |
424 | <test-case id="test83" strength="IDENTICAL" alternate_handling="SHIFTED" locale="en"> | |
425 | <pattern>Universal Declaration of Human Rights</pattern> | |
426 | <pre>Proclaims this </pre><m>Universal Declaration of Human Rights</m><post> as a common standard of achievement for all peoples and all nations</post> | |
427 | </test-case> | |
428 | ||
429 | <test-case id="test83b" strength="TERTIARY" alternate_handling="SHIFTED" locale="en"> | |
430 | <pattern>Universal Declaration of Human Rights</pattern> | |
431 | <pre>Proclaims this </pre> | |
432 | <m>Universal-Declaration-of-Human-Rights</m> | |
433 | <post> as a common standard of achievement for all peoples and all nations</post> | |
434 | </test-case> | |
435 | ||
436 | <test-case id="test84" strength="TERTIARY" locale="en"> | |
437 | <pattern>\u05E9\u0591\u05E9</pattern> | |
438 | <m>\u05E9\u0592\u05E9</m> | |
439 | </test-case> | |
440 | ||
441 | <test-case id="test84b" strength="IDENTICAL" locale="en"> | |
442 | <pattern>\u05E9\u0591\u05E9</pattern> | |
443 | <pre>\u05E9\u0592\u05E9</pre> | |
444 | </test-case> | |
46f4442e A |
445 | </stringsearch-tests> |
446 |