]> git.saurik.com Git - apple/icu.git/blob - icuSources/test/cintltst/usrchdat.c
ICU-6.2.22.tar.gz
[apple/icu.git] / icuSources / test / cintltst / usrchdat.c
1 /********************************************************************
2 * Copyright (c) 2001-2003 International Business Machines
3 * Corporation and others. All Rights Reserved.
4 ********************************************************************
5 * File USRCHDAT.H
6 * Modification History:
7 * Name date Description
8 * synwee July 31 2001 creation
9 ********************************************************************/
10
11 #ifndef USRCHDAT_C
12 #define USRCHDAT_C
13
14 #include "unicode/utypes.h"
15
16 #if !UCONFIG_NO_COLLATION
17
18 #include "unicode/ucol.h"
19
20 struct SearchData {
21 const char *text;
22 const char *pattern;
23 const char *collator;
24 UCollationStrength strength;
25 const char *breaker;
26 int32_t offset[32];
27 uint32_t size[32];
28 };
29
30 typedef struct SearchData SearchData;
31
32 static const char *TESTCOLLATORRULE = "& o,O ; p,P";
33
34 static const char *EXTRACOLLATIONRULE = " & ae ; \\u00e4 & AE ; \\u00c4 & oe ; \\u00f6 & OE ; \\u00d6 & ue ; \\u00fc & UE ; \\u00dc";
35
36 static const SearchData BASIC[] = {
37 {"xxxxxxxxxxxxxxxxxxxx", "fisher", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
38 {"silly spring string", "string", NULL, UCOL_TERTIARY, NULL, {13, -1},
39 {6}},
40 {"silly spring string string", "string", NULL, UCOL_TERTIARY, NULL,
41 {13, 20, -1}, {6, 6}},
42 {"silly string spring string", "string", NULL, UCOL_TERTIARY, NULL,
43 {6, 20, -1}, {6, 6}},
44 {"string spring string", "string", NULL, UCOL_TERTIARY, NULL, {0, 14, -1},
45 {6, 6}},
46 {"Scott Ganyo", "c", NULL, UCOL_TERTIARY, NULL, {1, -1}, {1}},
47 {"Scott Ganyo", " ", NULL, UCOL_TERTIARY, NULL, {5, -1}, {1}},
48 {"\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
49 {"a\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
50 {"a\\u0300\\u0325", "\\u0300\\u0325", NULL, UCOL_TERTIARY, NULL, {1, -1},
51 {2}},
52 {"a\\u0300b", "\\u0300", NULL, UCOL_TERTIARY, NULL, {1, -1}, {1}},
53 {"\\u00c9", "e", NULL, UCOL_PRIMARY, NULL, {0, -1}, {1}},
54 {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
55 };
56
57 static const SearchData BREAKITERATOREXACT[] = {
58 {"foxy fox", "fox", NULL, UCOL_TERTIARY, "characterbreaker", {0, 5, -1},
59 {3, 3}},
60 {"foxy fox", "fox", NULL, UCOL_TERTIARY, "wordbreaker", {5, -1}, {3}},
61 {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY,
62 "characterbreaker", {10, 14, -1}, {3, 2}},
63 {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, "wordbreaker",
64 {10, -1}, {3}},
65 {"Channel, another channel, more channels, and one last Channel",
66 "Channel", "es", UCOL_TERTIARY, "wordbreaker", {0, 54, -1}, {7, 7}},
67 /* jitterbug 1745 */
68 {"testing that \\u00e9 does not match e", "e", NULL, UCOL_TERTIARY,
69 "characterbreaker", {1, 17, 30, -1}, {1, 1, 1}},
70 {"testing that string ab\\u00e9cd does not match e", "e", NULL,
71 UCOL_TERTIARY, "characterbreaker", {1, 28, 41, -1}, {1, 1, 1}},
72 {"\\u00c9", "e", "fr", UCOL_PRIMARY, "characterbreaker", {0, -1}, {1}},
73 {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
74 };
75
76 static const SearchData STRENGTH[] = {
77 /*012345678901234567890123456789012345678901234567890123456789*/
78 {"The quick brown fox jumps over the lazy foxes", "fox", "en",
79 UCOL_PRIMARY, NULL, {16, 40, -1}, {3, 3}},
80 {"The quick brown fox jumps over the lazy foxes", "fox", "en",
81 UCOL_PRIMARY, "wordbreaker", {16, -1}, {3}},
82 {"blackbirds Pat p\\u00E9ch\\u00E9 p\\u00EAche p\\u00E9cher p\\u00EAcher Tod T\\u00F6ne black Tofu blackbirds Ton PAT toehold blackbird black-bird pat toe big Toe",
83 "peche", "fr", UCOL_PRIMARY, NULL, {15, 21, 27, 34, -1}, {5, 5, 5, 5}},
84 {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, NULL,
85 {10, 14, -1}, {3, 2}},
86 {"A channel, another CHANNEL, more Channels, and one last channel...",
87 "channel", "es", UCOL_PRIMARY, NULL, {2, 19, 33, 56, -1},
88 {7, 7, 7, 7}},
89 {"\\u00c0 should match but not A", "A\\u0300", "en", UCOL_IDENTICAL,
90 NULL, {0, -1}, {1, 0}},
91 {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
92 };
93
94 static const SearchData VARIABLE[] = {
95 /*012345678901234567890123456789012345678901234567890123456789*/
96 {"blackbirds black blackbirds blackbird black-bird",
97 "blackbird", NULL, UCOL_TERTIARY, NULL, {0, 17, 28, 38, -1},
98 {9, 9, 9, 10}},
99 /* to see that it doesn't go into an infinite loop if the start of text
100 is a ignorable character */
101 {" on", "go", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
102 {"abcdefghijklmnopqrstuvwxyz", " ", NULL, UCOL_PRIMARY, NULL,
103 {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
104 20, 21, 22, 23, 24, 25, -1}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
105 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}},
106 /* testing tightest match */
107 {" abc a bc ab c a bc ab c", "abc", NULL, UCOL_QUATERNARY,
108 NULL, {1, -1}, {3}},
109 /*012345678901234567890123456789012345678901234567890123456789 */
110 {" abc a bc ab c a bc ab c", "abc", NULL, UCOL_SECONDARY,
111 NULL, {1, 6, 13, 21, 31, -1}, {3, 4, 4, 5, 5}},
112 /* totally ignorable text */
113 {" ---------------", "abc", NULL, UCOL_SECONDARY,
114 NULL, {-1}, {0}},
115 {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
116 };
117
118 static const SearchData NORMEXACT[] = {
119 {"a\\u0300\\u0325", "\\u0325\\u0300", NULL, UCOL_TERTIARY, NULL, {1, -1},
120 {2}},
121 {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
122 };
123
124 static const SearchData NONNORMEXACT[] = {
125 {"a\\u0300\\u0325", "\\u0325\\u0300", NULL, UCOL_TERTIARY, NULL, {-1},
126 {0}},
127 {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
128 };
129
130 static const SearchData OVERLAP[] = {
131 {"abababab", "abab", NULL, UCOL_TERTIARY, NULL, {0, 2, 4, -1},
132 {4, 4, 4}},
133 {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
134 };
135
136 static const SearchData NONOVERLAP[] = {
137 {"abababab", "abab", NULL, UCOL_TERTIARY, NULL, {0, 4, -1}, {4, 4}},
138 {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
139 };
140
141 static const SearchData COLLATOR[] = {
142 /* english */
143 {"fox fpx", "fox", NULL, UCOL_TERTIARY, NULL, {0, -1}, {3}},
144 /* tailored */
145 {"fox fpx", "fox", NULL, UCOL_PRIMARY, NULL, {0, 4, -1}, {3, 3}},
146 {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
147 };
148
149 static const SearchData PATTERN[] = {
150 {"The quick brown fox jumps over the lazy foxes", "the", NULL,
151 UCOL_PRIMARY, NULL, {0, 31, -1}, {3, 3}},
152 {"The quick brown fox jumps over the lazy foxes", "fox", NULL,
153 UCOL_PRIMARY, NULL, {16, 40, -1}, {3, 3}},
154 {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
155 };
156
157 static const SearchData TEXT[] = {
158 {"the foxy brown fox", "fox", NULL, UCOL_TERTIARY, NULL, {4, 15, -1},
159 {3, 3}},
160 {"the quick brown fox", "fox", NULL, UCOL_TERTIARY, NULL, {16, -1},
161 {3}},
162 {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
163 };
164
165 static const SearchData COMPOSITEBOUNDARIES[] = {
166 {"\\u00C0", "A", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
167 {"A\\u00C0C", "A", NULL, UCOL_TERTIARY, NULL, {0, 1, -1}, {1, 1}},
168 {"\\u00C0A", "A", NULL, UCOL_TERTIARY, NULL, {0, 1, -1}, {1, 1}},
169 {"B\\u00C0", "A", NULL, UCOL_TERTIARY, NULL, {1, -1}, {1}},
170 {"\\u00C0B", "A", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
171 {"\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
172 {"\\u0300\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, NULL, {0, 1, -1},
173 {1, 1}},
174 {"\\u00C0\\u0300", "\\u0300", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
175 /* A + 030A + 0301 */
176 {"\\u01FA", "\\u01FA", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
177 {"\\u01FA", "\\u030A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
178 {"\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
179 {"\\u01FA", "\\u030AA", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
180 {"\\u01FA", "\\u0301", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
181 {"\\u01FA", "A\\u0301", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
182 {"\\u01FA", "\\u0301A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
183 {"\\u01FA", "\\u030A\\u0301", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
184 {"A\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
185 {"\\u01FAA", "\\u0301A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
186 {"\\u0F73", "\\u0F73", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
187 {"\\u0F73", "\\u0F71", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
188 {"\\u0F73", "\\u0F72", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
189 {"\\u0F73", "\\u0F71\\u0F72", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
190 {"A\\u0F73", "A\\u0F71", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
191 {"\\u0F73A", "\\u0F72A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
192 {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
193 };
194
195 static const SearchData MATCH[] = {
196 {"a busy bee is a very busy beeee", "bee", NULL, UCOL_TERTIARY, NULL,
197 {7, 26, -1}, {3, 3}},
198 /* 012345678901234567890123456789012345678901234567890 */
199 {"a busy bee is a very busy beeee with no bee life", "bee", NULL,
200 UCOL_TERTIARY, NULL, {7, 26, 40, -1}, {3, 3, 3}},
201 {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
202 };
203
204 static const SearchData SUPPLEMENTARY[] = {
205 /* 012345678901234567890123456789012345678901234567890012345678901234567890123456789012345678901234567890012345678901234567890123456789 */
206 {"abc \\uD800\\uDC00 \\uD800\\uDC01 \\uD801\\uDC00 \\uD800\\uDC00abc abc\\uD800\\uDC00 \\uD800\\uD800\\uDC00 \\uD800\\uDC00\\uDC00",
207 "\\uD800\\uDC00", NULL, UCOL_TERTIARY, NULL, {4, 13, 22, 26, 29, -1},
208 {2, 2, 2, 2, 2}},
209 {"and\\uD834\\uDDB9this sentence", "\\uD834\\uDDB9", NULL,
210 UCOL_TERTIARY, NULL, {3, -1}, {2}},
211 {"and \\uD834\\uDDB9 this sentence", " \\uD834\\uDDB9 ", NULL,
212 UCOL_TERTIARY, NULL, {3, -1}, {4}},
213 {"and-\\uD834\\uDDB9-this sentence", "-\\uD834\\uDDB9-", NULL,
214 UCOL_TERTIARY, NULL, {3, -1}, {4}},
215 {"and,\\uD834\\uDDB9,this sentence", ",\\uD834\\uDDB9,", NULL,
216 UCOL_TERTIARY, NULL, {3, -1}, {4}},
217 {"and?\\uD834\\uDDB9?this sentence", "?\\uD834\\uDDB9?", NULL,
218 UCOL_TERTIARY, NULL, {3, -1}, {4}},
219 {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
220 };
221
222 static const char *CONTRACTIONRULE =
223 "&z = ab/c < AB < X\\u0300 < ABC < X\\u0300\\u0315";
224
225 static const SearchData CONTRACTION[] = {
226 /* common discontiguous */
227 {"A\\u0300\\u0315", "\\u0300", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
228 {"A\\u0300\\u0315", "\\u0300\\u0315", NULL, UCOL_TERTIARY, NULL, {1, -1},
229 {2}},
230 /* contraction prefix */
231 {"AB\\u0315C", "A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
232 {"AB\\u0315C", "AB", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}},
233 {"AB\\u0315C", "\\u0315", NULL, UCOL_TERTIARY, NULL, {2, -1}, {1}},
234 /* discontiguous problem here for backwards iteration.
235 accents not found because discontiguous stores all information */
236 {"X\\u0300\\u0319\\u0315", "\\u0319", NULL, UCOL_TERTIARY, NULL, {-1},
237 {0}},
238 /* ends not with a contraction character */
239 {"X\\u0315\\u0300D", "\\u0300\\u0315", NULL, UCOL_TERTIARY, NULL, {-1},
240 {0}},
241 {"X\\u0315\\u0300D", "X\\u0300\\u0315", NULL, UCOL_TERTIARY, NULL,
242 {0, -1}, {3}},
243 {"X\\u0300\\u031A\\u0315D", "X\\u0300", NULL, UCOL_TERTIARY, NULL, {-1},
244 {0}},
245 /* blocked discontiguous */
246 {"X\\u0300\\u031A\\u0315D", "\\u031A\\u0315D", NULL, UCOL_TERTIARY, NULL,
247 {-1}, {0}},
248 {"ab", "z", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}},
249 {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
250 };
251
252 static const char *IGNORABLERULE = "&a = \\u0300";
253
254 static const SearchData IGNORABLE[] = {
255 {"\\u0315\\u0300 \\u0315\\u0300\\u0315 ", "\\u0300", NULL, UCOL_PRIMARY, NULL,
256 {0, 3, -1}, {2, 3}},
257 {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
258 };
259
260 static const SearchData BASICCANONICAL[] = {
261 {"xxxxxxxxxxxxxxxxxxxx", "fisher", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
262 {"silly spring string", "string", NULL, UCOL_TERTIARY, NULL, {13, -1},
263 {6}},
264 {"silly spring string string", "string", NULL, UCOL_TERTIARY, NULL,
265 {13, 20, -1}, {6, 6}},
266 {"silly string spring string", "string", NULL, UCOL_TERTIARY, NULL,
267 {6, 20, -1}, {6, 6}},
268 {"string spring string", "string", NULL, UCOL_TERTIARY, NULL, {0, 14, -1},
269 {6, 6}},
270 {"Scott Ganyo", "c", NULL, UCOL_TERTIARY, NULL, {1, -1}, {1}},
271 {"Scott Ganyo", " ", NULL, UCOL_TERTIARY, NULL, {5, -1}, {1}},
272 {"\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}},
273 {"a\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, NULL, {1, -1}, {2}},
274 {"a\\u0300\\u0325", "\\u0300\\u0325", NULL, UCOL_TERTIARY, NULL, {1, -1},
275 {2}},
276 {"a\\u0300b", "\\u0300", NULL, UCOL_TERTIARY, NULL, {1, -1}, {1}},
277 {"a\\u0300\\u0325b", "\\u0300b", NULL, UCOL_TERTIARY, NULL, {1, -1}, {3}},
278 {"\\u0325\\u0300A\\u0325\\u0300", "\\u0300A\\u0300", NULL, UCOL_TERTIARY,
279 NULL, {0, -1}, {5}},
280 {"\\u0325\\u0300A\\u0325\\u0300", "\\u0325A\\u0325", NULL, UCOL_TERTIARY,
281 NULL, {0, -1}, {5}},
282 {"a\\u0300\\u0325b\\u0300\\u0325c \\u0325b\\u0300 \\u0300b\\u0325",
283 "\\u0300b\\u0325", NULL, UCOL_TERTIARY, NULL, {1, 12, -1}, {5, 3}},
284 {"\\u00c4\\u0323", "A\\u0323\\u0308", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}},
285 {"\\u0308\\u0323", "\\u0323\\u0308", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}},
286 {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
287 };
288
289 static const SearchData NORMCANONICAL[] = {
290 {"\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}},
291 {"\\u0300\\u0325", "\\u0325", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}},
292 {"a\\u0300\\u0325", "\\u0325\\u0300", NULL, UCOL_TERTIARY, NULL, {1, -1},
293 {2}},
294 {"a\\u0300\\u0325", "\\u0300\\u0325", NULL, UCOL_TERTIARY, NULL, {1, -1},
295 {2}},
296 {"a\\u0300\\u0325", "\\u0325", NULL, UCOL_TERTIARY, NULL, {1, -1}, {2}},
297 {"a\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, NULL, {1, -1}, {2}},
298 {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
299 };
300
301 static const SearchData BREAKITERATORCANONICAL[] = {
302 {"foxy fox", "fox", NULL, UCOL_TERTIARY, "characterbreaker", {0, 5, -1},
303 {3, 3}},
304 {"foxy fox", "fox", NULL, UCOL_TERTIARY, "wordbreaker", {5, -1}, {3}},
305 {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY,
306 "characterbreaker", {10, 14, -1}, {3, 2}},
307 {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, "wordbreaker",
308 {10, -1}, {3}},
309 {"Channel, another channel, more channels, and one last Channel",
310 "Channel", "es", UCOL_TERTIARY, "wordbreaker", {0, 54, -1}, {7, 7}},
311 /* jitterbug 1745 */
312 {"testing that \\u00e9 does not match e", "e", NULL, UCOL_TERTIARY,
313 "characterbreaker", {1, 17, 30, -1}, {1, 1, 1}},
314 {"testing that string ab\\u00e9cd does not match e", "e", NULL,
315 UCOL_TERTIARY, "characterbreaker", {1, 28, 41, -1}, {1, 1, 1}},
316 {"\\u00c9", "e", "fr", UCOL_PRIMARY, "characterbreaker", {0, -1}, {1}},
317 {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
318 };
319
320 static const SearchData STRENGTHCANONICAL[] = {
321 /*012345678901234567890123456789012345678901234567890123456789 */
322 {"The quick brown fox jumps over the lazy foxes", "fox", "en",
323 UCOL_PRIMARY, NULL, {16, 40, -1}, {3, 3}},
324 {"The quick brown fox jumps over the lazy foxes", "fox", "en",
325 UCOL_PRIMARY, "wordbreaker", {16, -1}, {3}},
326 {"blackbirds Pat p\\u00E9ch\\u00E9 p\\u00EAche p\\u00E9cher p\\u00EAcher Tod T\\u00F6ne black Tofu blackbirds Ton PAT toehold blackbird black-bird pat toe big Toe",
327 "peche", "fr", UCOL_PRIMARY, NULL, {15, 21, 27, 34, -1}, {5, 5, 5, 5}},
328 {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, NULL,
329 {10, 14, -1}, {3, 2}},
330 {"A channel, another CHANNEL, more Channels, and one last channel...",
331 "channel", "es", UCOL_PRIMARY, NULL, {2, 19, 33, 56, -1},
332 {7, 7, 7, 7}},
333 {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
334 };
335
336 static const SearchData VARIABLECANONICAL[] = {
337 /*012345678901234567890123456789012345678901234567890123456789 */
338 {"blackbirds black blackbirds blackbird black-bird",
339 "blackbird", NULL, UCOL_TERTIARY, NULL, {0, 17, 28, 38, -1},
340 {9, 9, 9, 10}},
341 /* to see that it doesn't go into an infinite loop if the start of text
342 is a ignorable character */
343 {" on", "go", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
344 {"abcdefghijklmnopqrstuvwxyz", " ", NULL, UCOL_PRIMARY, NULL,
345 {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
346 20, 21, 22, 23, 24, 25, -1}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
347 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}},
348 /* testing tightest match */
349 {" abc a bc ab c a bc ab c", "abc", NULL, UCOL_QUATERNARY,
350 NULL, {1, -1}, {3}},
351 /*012345678901234567890123456789012345678901234567890123456789 */
352 {" abc a bc ab c a bc ab c", "abc", NULL, UCOL_SECONDARY,
353 NULL, {1, 6, 13, 21, 31, -1}, {3, 4, 4, 5, 5}},
354 /* totally ignorable text */
355 {" ---------------", "abc", NULL, UCOL_SECONDARY,
356 NULL, {-1}, {0}},
357 {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
358 };
359
360 static const SearchData OVERLAPCANONICAL[] = {
361 {"abababab", "abab", NULL, UCOL_TERTIARY, NULL, {0, 2, 4, -1},
362 {4, 4, 4}},
363 {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
364 };
365
366 static const SearchData NONOVERLAPCANONICAL[] = {
367 {"abababab", "abab", NULL, UCOL_TERTIARY, NULL, {0, 4, -1}, {4, 4}},
368 {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
369 };
370
371 static const SearchData COLLATORCANONICAL[] = {
372 /* english */
373 {"fox fpx", "fox", NULL, UCOL_TERTIARY, NULL, {0, -1}, {3}},
374 /* tailored */
375 {"fox fpx", "fox", NULL, UCOL_PRIMARY, NULL, {0, 4, -1}, {3, 3}},
376 {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
377 };
378
379 static const SearchData PATTERNCANONICAL[] = {
380 {"The quick brown fox jumps over the lazy foxes", "the", NULL,
381 UCOL_PRIMARY, NULL, {0, 31, -1}, {3, 3}},
382 {"The quick brown fox jumps over the lazy foxes", "fox", NULL,
383 UCOL_PRIMARY, NULL, {16, 40, -1}, {3, 3}},
384 {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
385 };
386
387 static const SearchData TEXTCANONICAL[] = {
388 {"the foxy brown fox", "fox", NULL, UCOL_TERTIARY, NULL, {4, 15, -1},
389 {3, 3}},
390 {"the quick brown fox", "fox", NULL, UCOL_TERTIARY, NULL, {16, -1},
391 {3}},
392 {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
393 };
394
395 static const SearchData COMPOSITEBOUNDARIESCANONICAL[] = {
396 {"\\u00C0", "A", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
397 {"A\\u00C0C", "A", NULL, UCOL_TERTIARY, NULL, {0, 1, -1}, {1, 1}},
398 {"\\u00C0A", "A", NULL, UCOL_TERTIARY, NULL, {0, 1, -1}, {1, 1}},
399 {"B\\u00C0", "A", NULL, UCOL_TERTIARY, NULL, {1, -1}, {1}},
400 {"\\u00C0B", "A", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
401 {"\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
402 {"\\u0300\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, NULL, {0, 1, -1},
403 {1, 1}},
404 /* \\u0300 blocked by \\u0300 */
405 {"\\u00C0\\u0300", "\\u0300", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}},
406 /* A + 030A + 0301 */
407 {"\\u01FA", "\\u01FA", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
408 {"\\u01FA", "\\u030A", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
409 {"\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
410 {"\\u01FA", "\\u030AA", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
411 {"\\u01FA", "\\u0301", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
412 /* blocked accent */
413 {"\\u01FA", "A\\u0301", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
414 {"\\u01FA", "\\u0301A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
415 {"\\u01FA", "\\u030A\\u0301", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
416 {"A\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, NULL, {1, -1}, {1}},
417 {"\\u01FAA", "\\u0301A", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}},
418 {"\\u0F73", "\\u0F73", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
419 {"\\u0F73", "\\u0F71", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
420 {"\\u0F73", "\\u0F72", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
421 {"\\u0F73", "\\u0F71\\u0F72", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}},
422 {"A\\u0F73", "A\\u0F71", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}},
423 {"\\u0F73A", "\\u0F72A", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}},
424 {"\\u01FA A\\u0301\\u030A A\\u030A\\u0301 A\\u030A \\u01FA", "A\\u030A",
425 NULL, UCOL_TERTIARY, NULL, {0, 6, 10, 13, -1}, {1, 3, 2, 1}},
426 {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
427 };
428
429 static const SearchData MATCHCANONICAL[] = {
430 {"a busy bee is a very busy beeee", "bee", NULL, UCOL_TERTIARY, NULL,
431 {7, 26, -1}, {3, 3}},
432 /*012345678901234567890123456789012345678901234567890 */
433 {"a busy bee is a very busy beeee with no bee life", "bee", NULL,
434 UCOL_TERTIARY, NULL, {7, 26, 40, -1}, {3, 3, 3}},
435 {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
436 };
437
438 static const SearchData SUPPLEMENTARYCANONICAL[] = {
439 /*012345678901234567890123456789012345678901234567890012345678901234567890123456789012345678901234567890012345678901234567890123456789 */
440 {"abc \\uD800\\uDC00 \\uD800\\uDC01 \\uD801\\uDC00 \\uD800\\uDC00abc abc\\uD800\\uDC00 \\uD800\\uD800\\uDC00 \\uD800\\uDC00\\uDC00",
441 "\\uD800\\uDC00", NULL, UCOL_TERTIARY, NULL, {4, 13, 22, 26, 29, -1},
442 {2, 2, 2, 2, 2}},
443 {"and\\uD834\\uDDB9this sentence", "\\uD834\\uDDB9", NULL,
444 UCOL_TERTIARY, NULL, {3, -1}, {2}},
445 {"and \\uD834\\uDDB9 this sentence", " \\uD834\\uDDB9 ", NULL,
446 UCOL_TERTIARY, NULL, {3, -1}, {4}},
447 {"and-\\uD834\\uDDB9-this sentence", "-\\uD834\\uDDB9-", NULL,
448 UCOL_TERTIARY, NULL, {3, -1}, {4}},
449 {"and,\\uD834\\uDDB9,this sentence", ",\\uD834\\uDDB9,", NULL,
450 UCOL_TERTIARY, NULL, {3, -1}, {4}},
451 {"and?\\uD834\\uDDB9?this sentence", "?\\uD834\\uDDB9?", NULL,
452 UCOL_TERTIARY, NULL, {3, -1}, {4}},
453 {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
454 };
455
456 static const SearchData CONTRACTIONCANONICAL[] = {
457 /* common discontiguous */
458 {"A\\u0300\\u0315", "\\u0300", NULL, UCOL_TERTIARY, NULL, {1, -1}, {2}},
459 {"A\\u0300\\u0315", "\\u0300\\u0315", NULL, UCOL_TERTIARY, NULL, {1, -1},
460 {2}},
461 /* contraction prefix */
462 {"AB\\u0315C", "A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}},
463 {"AB\\u0315C", "AB", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}},
464 {"AB\\u0315C", "\\u0315", NULL, UCOL_TERTIARY, NULL, {2, -1}, {1}},
465 /* discontiguous problem here for backwards iteration.
466 forwards gives 0, 4 but backwards give 1, 3 */
467 /* {"X\\u0300\\u0319\\u0315", "\\u0319", NULL, UCOL_TERTIARY, NULL, {0, -1},
468 {4}}, */
469
470 /* ends not with a contraction character */
471 {"X\\u0315\\u0300D", "\\u0300\\u0315", NULL, UCOL_TERTIARY, NULL, {-1},
472 {0}},
473 {"X\\u0315\\u0300D", "X\\u0300\\u0315", NULL, UCOL_TERTIARY, NULL,
474 {0, -1}, {3}},
475 {"X\\u0300\\u031A\\u0315D", "X\\u0300", NULL, UCOL_TERTIARY, NULL,
476 {0, -1}, {4}},
477 /* blocked discontiguous */
478 {"X\\u0300\\u031A\\u0315D", "\\u031A\\u0315D", NULL, UCOL_TERTIARY, NULL,
479 {1, -1}, {4}},
480 {"ab", "z", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}},
481 {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}}
482 };
483
484 #endif /* #if !UCONFIG_NO_COLLATION */
485
486 #endif