]> git.saurik.com Git - apple/icu.git/blame - icuSources/test/testdata/ssearch.xml
ICU-400.42.tar.gz
[apple/icu.git] / icuSources / test / testdata / ssearch.xml
CommitLineData
46f4442e
A
1<?xml version="1.0" encoding="UTF-8"?>
2
3<!-- Copyright (c) 2007-2008 IBM Corporation and others. All rights reserved -->
4
5<!-- Test data file for string search -->
6<!DOCTYPE stringsearch-tests [
7<!ELEMENT stringsearch-tests (test-case+)>
8<!ATTLIST stringsearch-tests debug IDREF #IMPLIED >
9<!ELEMENT test-case (pattern, pre?, m?, post?)>
10<!ATTLIST test-case
11 id ID #REQUIRED
12 locale CDATA "en"
13 strength (PRIMARY | SECONDARY | TERTIARY | QUATERNARY | IDENTICAL) "TERTIARY"
14 norm (ON | OFF) "OFF"
15 >
16
17<!ELEMENT pattern (#PCDATA)>
18<!ELEMENT pre (#PCDATA)>
19<!ELEMENT m (#PCDATA)>
20<!ELEMENT post (#PCDATA)>
21]>
22
23<stringsearch-tests debug="test32">
24 <!-- debug="test11" (for copying into the above element) -->
25
26 <!-- Very simple match -->
27 <test-case id="test01" >
28 <pattern>abc</pattern>
29 <pre>xxx</pre><m>abc</m><post>yyy</post>
30 </test-case>
31
32 <!-- Very simple no-match -->
33 <test-case id="test02" >
34 <pattern>abc</pattern>
35 <pre>xxx</pre><post>yyy</post>
36 </test-case>
37
38 <!-- Match after several near-misses. -->
39 <test-case id="test03" >
40 <pattern>string</pattern>
41 <pre>silly spring stling strxng strilg strinx stri</pre><m>string</m><post> fling</post>
42 </test-case>
43
44 <test-case id="test04" strength="PRIMARY" >
45 <pattern>FUSS</pattern>
46 <pre>abc</pre><m>fuss</m><post>sss</post>
47 </test-case>
48
49 <test-case id="test05" strength="PRIMARY" >
50 <pattern>FUSS</pattern>
51 <pre>abc</pre><m>fuß</m><post>sss</post>
52 </test-case>
53
54 <test-case id="test05.5" strength="PRIMARY" >
55 <pattern>fuss</pattern>
56 <pre>a </pre>
57 <m>fuß</m>
58 <post>ball table</post>
59 </test-case>
60
61 <test-case id="test06" strength="PRIMARY" >
62 <pattern>fuß</pattern>
63 <pre>abc</pre><m>fuss</m><post>xyz</post>
64 </test-case>
65
66 <test-case id="test07" strength="SECONDARY" >
67 <pattern>fuß</pattern>
68 <pre>abcfussxyz</pre>
69 </test-case>
70
71 <test-case id="test08" strength="PRIMARY" >
72 <pattern>fus</pattern>
73 <pre>abcfuß</pre><post>xyz</post>
74 </test-case>
75
76 <!-- A good match following an initial match that failed because
77 of not ending on a character boundary -->
78 <test-case id="test09" strength="PRIMARY">
79 <pattern>fus</pattern>
80 <pre>fuß </pre><m>fus</m><post>sss</post>
81 </test-case>
82
83
84 <!-- Test cases from usrchdat.c BREAKITERATOREXACT -->
85
86 <test-case id="test10" strength="TERTIARY">
87 <pattern>fox</pattern>
88 <m>fox</m><post>y fox</post>
89 </test-case>
90
91 <test-case id="test11" strength="PRIMARY" locale="de_DE@collation=phonebook">
92 <pattern>toe</pattern>
93 <pre>This is a </pre><m>Tö</m><post>ne</post>
94 </test-case>
95
96 <test-case id="test11a" strength="SECONDARY" locale="de_DE@collation=phonebook">
97 <pattern>toe</pattern>
98 <pre>This is a </pre><post>Töne</post>
99 </test-case>
100
101 <test-case id="test12" strength="TERTIARY">
102 <pattern>e</pattern>
103 <pre>tésting that é doés not match </pre><m>e</m><post></post>
104 </test-case>
105
106 <test-case id="test13" strength="PRIMARY" locale="fr">
107 <pattern>e</pattern>
108 <pre></pre><m>É</m><post>É</post>
109 </test-case>
110
111 <test-case id="test14" strength="PRIMARY" locale="fr">
112 <pattern>O</pattern>
113 <pre>C</pre><m>O\u0302</m><post>TÉ</post>
114 </test-case>
115
116
117 <!-- Test cases from usrchdat.c STRENGTH -->
118
119
120 <test-case id="test15" strength="PRIMARY" locale="en">
121 <pattern>fox</pattern>
122 <pre>The quick brown </pre><m>fox</m><post> jumps over the lazy foxes</post>
123 </test-case>
124
125 <test-case id="test16" strength="PRIMARY" locale="fr">
126 <pattern>peche</pattern>
127 <pre>blackbirds pat </pre><m>p\u00E9ch\u00E9</m><post> </post>
128 </test-case>
129
130 <test-case id="test17" strength="PRIMARY" locale="fr">
131 <pattern>peche</pattern>
132 <pre>blackbirds pat </pre><m>p\u00EAche</m><post> </post>
133 </test-case>
134
135 <test-case id="test18" strength="PRIMARY" locale="fr">
136 <pattern>peche</pattern>
137 <pre>blackbirds pat </pre><m>p\u00E9che</m><post>r </post>
138 </test-case>
139
140 <test-case id="test19" strength="PRIMARY" locale="fr">
141 <pattern>peche</pattern>
142 <pre>blackbirds pat </pre><m>p\u00EAche</m><post>r </post>
143 </test-case>
144
145 <test-case id="test20" strength="PRIMARY" locale="es">
146 <pattern>channel</pattern>
147 <pre>A </pre><m>channel</m><post>, </post>
148 </test-case>
149
150 <test-case id="test21" strength="PRIMARY" locale="es">
151 <pattern>channel</pattern>
152 <pre>A </pre><m>CHANNEL</m><post>, </post>
153 </test-case>
154
155 <test-case id="test22" strength="PRIMARY" locale="es">
156 <pattern>channel</pattern>
157 <pre>A </pre><m>Channel</m><post>s, </post>
158 </test-case>
159
160 <test-case id="test23" strength="PRIMARY" locale="es">
161 <pattern>channel</pattern>
162 <pre>A </pre><m>channel</m><post>... </post>
163 </test-case>
164
165 <test-case id="test24" strength="TERTIARY" locale="en">
166 <pattern>A\u0300</pattern>
167 <pre>A miss, and then </pre><m>\u00c0</m><post> should match but not A"</post>
168 </test-case>
169
170 <!-- TODO: In the original test data, this test matched at IDENTICAL strength.
171 Doesn't seem right. The characters are different.
172 -->
173 <test-case id="test24a" strength="IDENTICAL" locale="en">
174 <pattern>A\u0300</pattern>
175 <pre>At IDENTICAL, shoud this match? </pre><m>\u00c0</m><post></post>
176 </test-case>
177
178 <test-case id="test25" strength="SECONDARY" locale="en">
179 <pattern>Ű</pattern>
180 <pre>12</pre><m>ű</m><post> Ű</post>
181 </test-case>
182
183 <test-case id="test26" strength="SECONDARY" locale="en">
184 <pattern>A</pattern>
185 <pre>12</pre><m>a</m><post>...</post>
186 </test-case>
187
188
189 <!-- Test Cases from usrchdat.c, VARIABLE -->
190 <test-case id="test27" strength="TERTIARY" locale="en">
191 <pattern>blackbird</pattern>
192 <pre>black-bird </pre><m>blackbird</m><post>...</post>
193 </test-case>
194
195 <test-case id="test28" strength="TERTIARY" locale="en">
196 <pattern>go</pattern>
197 <pre> on</pre>
198 </test-case>
199
200 <!-- TODO: this gives an U_ILLEGAL_ARGUMENT error when opening
201 the UStringSearch. How did the orignal test run? -->
202 <!--
203 <test-case id="test29" strength="PRIMARY" locale="en">
204 <pattern> </pattern>
205 <pre></pre><m></m><post>abc</post>
206 </test-case>
207 -->
208
209 <test-case id="test30" strength="SECONDARY" locale="en">
210 <pattern>abc</pattern>
211 <pre> a bc ab c a bc ab c"</pre>
212 </test-case>
213
214 <test-case id="test31" strength="SECONDARY" locale="en">
215 <pattern>abc</pattern>
216 <pre> ---------------</pre>
217 </test-case>
218
219
220 <!-- Normalization test cases from usrchdat.c -->
221 <test-case id="test32" strength="TERTIARY" norm="ON">
222 <pattern>a\u0325\u0300</pattern>
223 <pre></pre><m>a\u0300\u0325</m>
224 </test-case>
225
226
227 <test-case id="test32a" strength="TERTIARY" norm="OFF">
228 <pattern>a\u0325\u0300</pattern>
229 <pre>a\u0300\u0325</pre>
230 </test-case>
231
232
233 <!-- COMPOSITEBOUNDARIES from usrchdat.c
234 Boundaries are not identical to orignal test data because
235 of matching only full combining sequences
236 -->
237 <test-case id="test40" strength="TERTIARY">
238 <pattern>A</pattern>
239 <pre>À</pre> <!-- \u00C0 -->
240 </test-case>
241
242 <test-case id="test41" strength="TERTIARY">
243 <pattern>A</pattern>
244 <pre>À</pre><m>A</m><post>C</post>
245 </test-case>
246
247 <test-case id="test42" strength="TERTIARY">
248 <pattern>A\u030A</pattern>
249 <pre>À\u01FA</pre>
250 </test-case>
251
252
253
254 <!-- SUPPLEMENTARYCANONICAL from usrchdat.c -->
255 <test-case id="test50" strength="TERTIARY">
256 <pattern>\uD800\uDC00</pattern>
257 <pre>abc \uD802\uDC00 \uD800\uDC01 \uD801\uDC00 </pre><m>\uD800\uDC00</m>
258 <post>abc abc\uD800\uDC00 \uD800\uD800\uDC00 \uD800\uDC00\uDC00</post>
259 </test-case>
260
261 <test-case id="test51" strength="TERTIARY">
262 <pattern>\\uD834\\uDDB9</pattern>
263 <pre>and</pre><m>\\uD834\\uDDB9</m><post>this sentence</post>
264 </test-case>
265
266 <test-case id="test52" strength="TERTIARY">
267 <pattern> \\uD834\\uDDB9 </pattern>
268 <pre>and</pre><m> \\uD834\\uDDB9 </m><post>this sentence</post>
269 </test-case>
270
271 <test-case id="test53" strength="TERTIARY">
272 <pattern>-\\uD834\\uDDB9-</pattern>
273 <pre>and</pre><m>-\\uD834\\uDDB9-</m><post>this sentence</post>
274 </test-case>
275
276 <test-case id="test54" strength="TERTIARY">
277 <pattern>,\\uD834\\uDDB9,</pattern>
278 <pre>and</pre><m>,\\uD834\\uDDB9,</m><post>this sentence</post>
279 </test-case>
280
281 <test-case id="test55" strength="TERTIARY">
282 <pattern>?\\uD834\\uDDB9?</pattern>
283 <pre>and</pre><m>?\\uD834\\uDDB9?</m><post>this sentence</post>
284 </test-case>
285
286
287 <!-- Long combining sequences -->
288 <test-case id="test60" strength="PRIMARY">
289 <pattern>A\u0301\u0301\u0301\u0301</pattern>
290 <m>A\u0301\u0301\u0301\u0301\u0301</m>
291 </test-case>
292
293 <test-case id="test61" strength="TERTIARY">
294 <pattern>A\u0301\u0301\u0301\u0301</pattern>
295 <pre>A\u0301\u0301\u0301\u0301\u0301</pre>
296 </test-case>
297
298 <test-case id="test62" strength="TERTIARY">
299 <pattern>A\u0301\u0301\u0301\u0301</pattern>
300 <m>A\u0301\u0301\u0301\u0301</m>
301 </test-case>
302
303 <!-- stand-alone combining marks don't match attached marks -->
304 <test-case id="test63" strength="TERTIARY">
305 <pattern>\u0301</pattern>
306 <pre>A\u0301\u0301\u0301\u0301</pre>
307 </test-case>
308
309 <test-case id="test64" strength="TERTIARY">
310 <pattern>\u0301</pattern>
311 <post>\u0301\u0301\u0301\u0301</post>
312 </test-case>
313
314 <!-- stand-alone combining mark does match an un-attached combining mark -->
315 <test-case id="test65" strength="TERTIARY">
316 <pattern>\u0301</pattern>
317 <m>\u0301</m><post>A\u0301\u0301</post>
318 </test-case>
319
320 <test-case id="test66" strength="TERTIARY">
321 <pattern>\u0301</pattern>
322 <m>\u0301</m>
323 </test-case>
324
325 <!-- stand-alone combining marks at end of the target text -->
326 <test-case id="test67" strength="TERTIARY">
327 <pattern>\u0301</pattern>
328 <pre>abcd\r</pre><m>\u0301</m>
329 </test-case>
330
331 <!-- attached combining marks at end of the target text, no match -->
332 <test-case id="test68" strength="TERTIARY">
333 <pattern>\u0301</pattern>
334 <pre>abcd\u0301</pre>
335 </test-case>
336
337
338
339 <!-- no match within expansions at the start -->
340 <test-case id="test70" strength="PRIMARY">
341 <pattern>Eligature</pattern>
342 <pre>Æligature</pre>
343 </test-case>
344
345 <test-case id="test71" strength="PRIMARY">
346 <pattern>AEligature</pattern>
347 <m>Æligature</m>
348 </test-case>
349
350 <test-case id="test72" strength="PRIMARY">
351 <pattern>AEligature</pattern>
352 <m>Æligature</m>
353 </test-case>
354
355 <!-- unattached combining Tilde will not match a Tilde that is
356 part of a composed Ñ (\u00D1) -->
357 <test-case id="test73" strength="SECONDARY">
358 <pattern>\u0303</pattern> <!-- combining tilde -->
359 <pre>Ñ&#x0d;</pre><m>\u0303</m>
360 </test-case>
361
362 <test-case id="test74" strength="SECONDARY">
363 <pattern>\u0303</pattern> <!-- combining tilde -->
364 <pre>Ñ &#x0d;</pre><m>\u0303</m><post>a</post>
365 </test-case>
366
367 <test-case id="test75" strength="TERTIARY" locale="fr">
368 <pattern>\u00EA</pattern>
369 <pre>p</pre><m>\u00EA</m><post>che</post>
370 </test-case>
371
372 <test-case id="test76" strength="TERTIARY" locale="fr">
373 <pattern>\u00EA</pattern>
374 <pre>p</pre><m>e\u0302</m><post>che</post>
375 </test-case>
376
377 <test-case id="test77" strength="TERTIARY" locale="fr">
378 <pattern>e\u0302</pattern>
379 <pre>p</pre><m>\u00EA</m><post>che</post>
380 </test-case>
381
382 <!-- Test cases from ticket:5382 -->
383 <test-case id="test78" strength="SECONDARY" locale="hu_HU">
384 <pattern>\u0170</pattern>
385 <m>\u0171</m>
386 <post>12</post>
387 </test-case>
388
389 <test-case id="test79" strength="SECONDARY" locale="hu_HU">
390 <pattern>\u0170</pattern>
391 <pre>1</pre>
392 <m>\u0171</m>
393 <post>2</post>
394 </test-case>
395
396 <test-case id="test80" strength="SECONDARY" locale="hu_HU">
397 <pattern>\u0170</pattern>
398 <pre>12</pre>
399 <m>\u0171</m>
400 </test-case>
401
402 <!-- Test cases from ticket:5959 -->
403 <test-case id="test81" strength="SECONDARY">
404 <pattern>\u2166</pattern>
405 <m>VII</m>
406 </test-case>
407
408 <test-case id="test82" strength="SECONDARY">
409 <pattern>VII</pattern>
410 <m>\u2166</m>
411 </test-case>
412</stringsearch-tests>
413