]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | /******************************************************************** |
2 | * Copyright (c) 2001-2003 International Business Machines | |
3 | * Corporation and others. All Rights Reserved. | |
4 | ******************************************************************** | |
5 | * File USRCHDAT.H | |
6 | * Modification History: | |
7 | * Name date Description | |
8 | * synwee July 31 2001 creation | |
9 | ********************************************************************/ | |
10 | ||
11 | #ifndef USRCHDAT_C | |
12 | #define USRCHDAT_C | |
13 | ||
14 | #include "unicode/utypes.h" | |
15 | ||
16 | #if !UCONFIG_NO_COLLATION | |
17 | ||
18 | #include "unicode/ucol.h" | |
19 | ||
20 | struct SearchData { | |
21 | const char *text; | |
22 | const char *pattern; | |
23 | const char *collator; | |
24 | UCollationStrength strength; | |
25 | const char *breaker; | |
26 | int32_t offset[32]; | |
27 | uint32_t size[32]; | |
28 | }; | |
29 | ||
30 | typedef struct SearchData SearchData; | |
31 | ||
32 | static const char *TESTCOLLATORRULE = "& o,O ; p,P"; | |
33 | ||
34 | static const char *EXTRACOLLATIONRULE = " & ae ; \\u00e4 & AE ; \\u00c4 & oe ; \\u00f6 & OE ; \\u00d6 & ue ; \\u00fc & UE ; \\u00dc"; | |
35 | ||
36 | static const SearchData BASIC[] = { | |
37 | {"xxxxxxxxxxxxxxxxxxxx", "fisher", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, | |
38 | {"silly spring string", "string", NULL, UCOL_TERTIARY, NULL, {13, -1}, | |
39 | {6}}, | |
40 | {"silly spring string string", "string", NULL, UCOL_TERTIARY, NULL, | |
41 | {13, 20, -1}, {6, 6}}, | |
42 | {"silly string spring string", "string", NULL, UCOL_TERTIARY, NULL, | |
43 | {6, 20, -1}, {6, 6}}, | |
44 | {"string spring string", "string", NULL, UCOL_TERTIARY, NULL, {0, 14, -1}, | |
45 | {6, 6}}, | |
46 | {"Scott Ganyo", "c", NULL, UCOL_TERTIARY, NULL, {1, -1}, {1}}, | |
47 | {"Scott Ganyo", " ", NULL, UCOL_TERTIARY, NULL, {5, -1}, {1}}, | |
48 | {"\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, | |
49 | {"a\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, | |
50 | {"a\\u0300\\u0325", "\\u0300\\u0325", NULL, UCOL_TERTIARY, NULL, {1, -1}, | |
51 | {2}}, | |
52 | {"a\\u0300b", "\\u0300", NULL, UCOL_TERTIARY, NULL, {1, -1}, {1}}, | |
53 | {"\\u00c9", "e", NULL, UCOL_PRIMARY, NULL, {0, -1}, {1}}, | |
54 | {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} | |
55 | }; | |
56 | ||
57 | static const SearchData BREAKITERATOREXACT[] = { | |
58 | {"foxy fox", "fox", NULL, UCOL_TERTIARY, "characterbreaker", {0, 5, -1}, | |
59 | {3, 3}}, | |
60 | {"foxy fox", "fox", NULL, UCOL_TERTIARY, "wordbreaker", {5, -1}, {3}}, | |
61 | {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, | |
62 | "characterbreaker", {10, 14, -1}, {3, 2}}, | |
63 | {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, "wordbreaker", | |
64 | {10, -1}, {3}}, | |
65 | {"Channel, another channel, more channels, and one last Channel", | |
66 | "Channel", "es", UCOL_TERTIARY, "wordbreaker", {0, 54, -1}, {7, 7}}, | |
67 | /* jitterbug 1745 */ | |
68 | {"testing that \\u00e9 does not match e", "e", NULL, UCOL_TERTIARY, | |
69 | "characterbreaker", {1, 17, 30, -1}, {1, 1, 1}}, | |
70 | {"testing that string ab\\u00e9cd does not match e", "e", NULL, | |
71 | UCOL_TERTIARY, "characterbreaker", {1, 28, 41, -1}, {1, 1, 1}}, | |
72 | {"\\u00c9", "e", "fr", UCOL_PRIMARY, "characterbreaker", {0, -1}, {1}}, | |
73 | {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} | |
74 | }; | |
75 | ||
76 | static const SearchData STRENGTH[] = { | |
77 | /*012345678901234567890123456789012345678901234567890123456789*/ | |
78 | {"The quick brown fox jumps over the lazy foxes", "fox", "en", | |
79 | UCOL_PRIMARY, NULL, {16, 40, -1}, {3, 3}}, | |
80 | {"The quick brown fox jumps over the lazy foxes", "fox", "en", | |
81 | UCOL_PRIMARY, "wordbreaker", {16, -1}, {3}}, | |
82 | {"blackbirds Pat p\\u00E9ch\\u00E9 p\\u00EAche p\\u00E9cher p\\u00EAcher Tod T\\u00F6ne black Tofu blackbirds Ton PAT toehold blackbird black-bird pat toe big Toe", | |
83 | "peche", "fr", UCOL_PRIMARY, NULL, {15, 21, 27, 34, -1}, {5, 5, 5, 5}}, | |
84 | {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, NULL, | |
85 | {10, 14, -1}, {3, 2}}, | |
86 | {"A channel, another CHANNEL, more Channels, and one last channel...", | |
87 | "channel", "es", UCOL_PRIMARY, NULL, {2, 19, 33, 56, -1}, | |
88 | {7, 7, 7, 7}}, | |
89 | {"\\u00c0 should match but not A", "A\\u0300", "en", UCOL_IDENTICAL, | |
90 | NULL, {0, -1}, {1, 0}}, | |
91 | {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} | |
92 | }; | |
93 | ||
94 | static const SearchData VARIABLE[] = { | |
95 | /*012345678901234567890123456789012345678901234567890123456789*/ | |
96 | {"blackbirds black blackbirds blackbird black-bird", | |
97 | "blackbird", NULL, UCOL_TERTIARY, NULL, {0, 17, 28, 38, -1}, | |
98 | {9, 9, 9, 10}}, | |
99 | /* to see that it doesn't go into an infinite loop if the start of text | |
100 | is a ignorable character */ | |
101 | {" on", "go", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, | |
102 | {"abcdefghijklmnopqrstuvwxyz", " ", NULL, UCOL_PRIMARY, NULL, | |
103 | {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, | |
104 | 20, 21, 22, 23, 24, 25, -1}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
105 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, | |
106 | /* testing tightest match */ | |
107 | {" abc a bc ab c a bc ab c", "abc", NULL, UCOL_QUATERNARY, | |
108 | NULL, {1, -1}, {3}}, | |
109 | /*012345678901234567890123456789012345678901234567890123456789 */ | |
110 | {" abc a bc ab c a bc ab c", "abc", NULL, UCOL_SECONDARY, | |
111 | NULL, {1, 6, 13, 21, 31, -1}, {3, 4, 4, 5, 5}}, | |
112 | /* totally ignorable text */ | |
113 | {" ---------------", "abc", NULL, UCOL_SECONDARY, | |
114 | NULL, {-1}, {0}}, | |
115 | {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} | |
116 | }; | |
117 | ||
118 | static const SearchData NORMEXACT[] = { | |
119 | {"a\\u0300\\u0325", "\\u0325\\u0300", NULL, UCOL_TERTIARY, NULL, {1, -1}, | |
120 | {2}}, | |
121 | {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} | |
122 | }; | |
123 | ||
124 | static const SearchData NONNORMEXACT[] = { | |
125 | {"a\\u0300\\u0325", "\\u0325\\u0300", NULL, UCOL_TERTIARY, NULL, {-1}, | |
126 | {0}}, | |
127 | {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} | |
128 | }; | |
129 | ||
130 | static const SearchData OVERLAP[] = { | |
131 | {"abababab", "abab", NULL, UCOL_TERTIARY, NULL, {0, 2, 4, -1}, | |
132 | {4, 4, 4}}, | |
133 | {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} | |
134 | }; | |
135 | ||
136 | static const SearchData NONOVERLAP[] = { | |
137 | {"abababab", "abab", NULL, UCOL_TERTIARY, NULL, {0, 4, -1}, {4, 4}}, | |
138 | {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} | |
139 | }; | |
140 | ||
141 | static const SearchData COLLATOR[] = { | |
142 | /* english */ | |
143 | {"fox fpx", "fox", NULL, UCOL_TERTIARY, NULL, {0, -1}, {3}}, | |
144 | /* tailored */ | |
145 | {"fox fpx", "fox", NULL, UCOL_PRIMARY, NULL, {0, 4, -1}, {3, 3}}, | |
146 | {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} | |
147 | }; | |
148 | ||
149 | static const SearchData PATTERN[] = { | |
150 | {"The quick brown fox jumps over the lazy foxes", "the", NULL, | |
151 | UCOL_PRIMARY, NULL, {0, 31, -1}, {3, 3}}, | |
152 | {"The quick brown fox jumps over the lazy foxes", "fox", NULL, | |
153 | UCOL_PRIMARY, NULL, {16, 40, -1}, {3, 3}}, | |
154 | {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} | |
155 | }; | |
156 | ||
157 | static const SearchData TEXT[] = { | |
158 | {"the foxy brown fox", "fox", NULL, UCOL_TERTIARY, NULL, {4, 15, -1}, | |
159 | {3, 3}}, | |
160 | {"the quick brown fox", "fox", NULL, UCOL_TERTIARY, NULL, {16, -1}, | |
161 | {3}}, | |
162 | {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} | |
163 | }; | |
164 | ||
165 | static const SearchData COMPOSITEBOUNDARIES[] = { | |
166 | {"\\u00C0", "A", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}}, | |
167 | {"A\\u00C0C", "A", NULL, UCOL_TERTIARY, NULL, {0, 1, -1}, {1, 1}}, | |
168 | {"\\u00C0A", "A", NULL, UCOL_TERTIARY, NULL, {0, 1, -1}, {1, 1}}, | |
169 | {"B\\u00C0", "A", NULL, UCOL_TERTIARY, NULL, {1, -1}, {1}}, | |
170 | {"\\u00C0B", "A", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}}, | |
171 | {"\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}}, | |
172 | {"\\u0300\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, NULL, {0, 1, -1}, | |
173 | {1, 1}}, | |
174 | {"\\u00C0\\u0300", "\\u0300", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, | |
175 | /* A + 030A + 0301 */ | |
176 | {"\\u01FA", "\\u01FA", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}}, | |
177 | {"\\u01FA", "\\u030A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, | |
178 | {"\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, | |
179 | {"\\u01FA", "\\u030AA", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, | |
180 | {"\\u01FA", "\\u0301", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, | |
181 | {"\\u01FA", "A\\u0301", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, | |
182 | {"\\u01FA", "\\u0301A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, | |
183 | {"\\u01FA", "\\u030A\\u0301", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}}, | |
184 | {"A\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, | |
185 | {"\\u01FAA", "\\u0301A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, | |
186 | {"\\u0F73", "\\u0F73", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}}, | |
187 | {"\\u0F73", "\\u0F71", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, | |
188 | {"\\u0F73", "\\u0F72", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, | |
189 | {"\\u0F73", "\\u0F71\\u0F72", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}}, | |
190 | {"A\\u0F73", "A\\u0F71", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, | |
191 | {"\\u0F73A", "\\u0F72A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, | |
192 | {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} | |
193 | }; | |
194 | ||
195 | static const SearchData MATCH[] = { | |
196 | {"a busy bee is a very busy beeee", "bee", NULL, UCOL_TERTIARY, NULL, | |
197 | {7, 26, -1}, {3, 3}}, | |
198 | /* 012345678901234567890123456789012345678901234567890 */ | |
199 | {"a busy bee is a very busy beeee with no bee life", "bee", NULL, | |
200 | UCOL_TERTIARY, NULL, {7, 26, 40, -1}, {3, 3, 3}}, | |
201 | {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} | |
202 | }; | |
203 | ||
204 | static const SearchData SUPPLEMENTARY[] = { | |
205 | /* 012345678901234567890123456789012345678901234567890012345678901234567890123456789012345678901234567890012345678901234567890123456789 */ | |
206 | {"abc \\uD800\\uDC00 \\uD800\\uDC01 \\uD801\\uDC00 \\uD800\\uDC00abc abc\\uD800\\uDC00 \\uD800\\uD800\\uDC00 \\uD800\\uDC00\\uDC00", | |
207 | "\\uD800\\uDC00", NULL, UCOL_TERTIARY, NULL, {4, 13, 22, 26, 29, -1}, | |
208 | {2, 2, 2, 2, 2}}, | |
374ca955 A |
209 | {"and\\uD834\\uDDB9this sentence", "\\uD834\\uDDB9", NULL, |
210 | UCOL_TERTIARY, NULL, {3, -1}, {2}}, | |
211 | {"and \\uD834\\uDDB9 this sentence", " \\uD834\\uDDB9 ", NULL, | |
212 | UCOL_TERTIARY, NULL, {3, -1}, {4}}, | |
213 | {"and-\\uD834\\uDDB9-this sentence", "-\\uD834\\uDDB9-", NULL, | |
214 | UCOL_TERTIARY, NULL, {3, -1}, {4}}, | |
215 | {"and,\\uD834\\uDDB9,this sentence", ",\\uD834\\uDDB9,", NULL, | |
216 | UCOL_TERTIARY, NULL, {3, -1}, {4}}, | |
217 | {"and?\\uD834\\uDDB9?this sentence", "?\\uD834\\uDDB9?", NULL, | |
218 | UCOL_TERTIARY, NULL, {3, -1}, {4}}, | |
b75a7d8f A |
219 | {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} |
220 | }; | |
221 | ||
222 | static const char *CONTRACTIONRULE = | |
223 | "&z = ab/c < AB < X\\u0300 < ABC < X\\u0300\\u0315"; | |
224 | ||
225 | static const SearchData CONTRACTION[] = { | |
226 | /* common discontiguous */ | |
227 | {"A\\u0300\\u0315", "\\u0300", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, | |
228 | {"A\\u0300\\u0315", "\\u0300\\u0315", NULL, UCOL_TERTIARY, NULL, {1, -1}, | |
229 | {2}}, | |
230 | /* contraction prefix */ | |
231 | {"AB\\u0315C", "A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, | |
232 | {"AB\\u0315C", "AB", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}}, | |
233 | {"AB\\u0315C", "\\u0315", NULL, UCOL_TERTIARY, NULL, {2, -1}, {1}}, | |
234 | /* discontiguous problem here for backwards iteration. | |
235 | accents not found because discontiguous stores all information */ | |
236 | {"X\\u0300\\u0319\\u0315", "\\u0319", NULL, UCOL_TERTIARY, NULL, {-1}, | |
237 | {0}}, | |
238 | /* ends not with a contraction character */ | |
239 | {"X\\u0315\\u0300D", "\\u0300\\u0315", NULL, UCOL_TERTIARY, NULL, {-1}, | |
240 | {0}}, | |
241 | {"X\\u0315\\u0300D", "X\\u0300\\u0315", NULL, UCOL_TERTIARY, NULL, | |
242 | {0, -1}, {3}}, | |
243 | {"X\\u0300\\u031A\\u0315D", "X\\u0300", NULL, UCOL_TERTIARY, NULL, {-1}, | |
244 | {0}}, | |
245 | /* blocked discontiguous */ | |
246 | {"X\\u0300\\u031A\\u0315D", "\\u031A\\u0315D", NULL, UCOL_TERTIARY, NULL, | |
247 | {-1}, {0}}, | |
248 | {"ab", "z", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}}, | |
249 | {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} | |
250 | }; | |
251 | ||
252 | static const char *IGNORABLERULE = "&a = \\u0300"; | |
253 | ||
254 | static const SearchData IGNORABLE[] = { | |
255 | {"\\u0315\\u0300 \\u0315\\u0300\\u0315 ", "\\u0300", NULL, UCOL_PRIMARY, NULL, | |
256 | {0, 3, -1}, {2, 3}}, | |
257 | {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} | |
258 | }; | |
259 | ||
260 | static const SearchData BASICCANONICAL[] = { | |
261 | {"xxxxxxxxxxxxxxxxxxxx", "fisher", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, | |
262 | {"silly spring string", "string", NULL, UCOL_TERTIARY, NULL, {13, -1}, | |
263 | {6}}, | |
264 | {"silly spring string string", "string", NULL, UCOL_TERTIARY, NULL, | |
265 | {13, 20, -1}, {6, 6}}, | |
266 | {"silly string spring string", "string", NULL, UCOL_TERTIARY, NULL, | |
267 | {6, 20, -1}, {6, 6}}, | |
268 | {"string spring string", "string", NULL, UCOL_TERTIARY, NULL, {0, 14, -1}, | |
269 | {6, 6}}, | |
270 | {"Scott Ganyo", "c", NULL, UCOL_TERTIARY, NULL, {1, -1}, {1}}, | |
271 | {"Scott Ganyo", " ", NULL, UCOL_TERTIARY, NULL, {5, -1}, {1}}, | |
272 | {"\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}}, | |
273 | {"a\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, NULL, {1, -1}, {2}}, | |
274 | {"a\\u0300\\u0325", "\\u0300\\u0325", NULL, UCOL_TERTIARY, NULL, {1, -1}, | |
275 | {2}}, | |
276 | {"a\\u0300b", "\\u0300", NULL, UCOL_TERTIARY, NULL, {1, -1}, {1}}, | |
277 | {"a\\u0300\\u0325b", "\\u0300b", NULL, UCOL_TERTIARY, NULL, {1, -1}, {3}}, | |
278 | {"\\u0325\\u0300A\\u0325\\u0300", "\\u0300A\\u0300", NULL, UCOL_TERTIARY, | |
279 | NULL, {0, -1}, {5}}, | |
280 | {"\\u0325\\u0300A\\u0325\\u0300", "\\u0325A\\u0325", NULL, UCOL_TERTIARY, | |
281 | NULL, {0, -1}, {5}}, | |
282 | {"a\\u0300\\u0325b\\u0300\\u0325c \\u0325b\\u0300 \\u0300b\\u0325", | |
283 | "\\u0300b\\u0325", NULL, UCOL_TERTIARY, NULL, {1, 12, -1}, {5, 3}}, | |
374ca955 A |
284 | {"\\u00c4\\u0323", "A\\u0323\\u0308", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}}, |
285 | {"\\u0308\\u0323", "\\u0323\\u0308", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}}, | |
b75a7d8f A |
286 | {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} |
287 | }; | |
288 | ||
289 | static const SearchData NORMCANONICAL[] = { | |
290 | {"\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}}, | |
291 | {"\\u0300\\u0325", "\\u0325", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}}, | |
292 | {"a\\u0300\\u0325", "\\u0325\\u0300", NULL, UCOL_TERTIARY, NULL, {1, -1}, | |
293 | {2}}, | |
294 | {"a\\u0300\\u0325", "\\u0300\\u0325", NULL, UCOL_TERTIARY, NULL, {1, -1}, | |
295 | {2}}, | |
296 | {"a\\u0300\\u0325", "\\u0325", NULL, UCOL_TERTIARY, NULL, {1, -1}, {2}}, | |
297 | {"a\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, NULL, {1, -1}, {2}}, | |
298 | {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} | |
299 | }; | |
300 | ||
301 | static const SearchData BREAKITERATORCANONICAL[] = { | |
302 | {"foxy fox", "fox", NULL, UCOL_TERTIARY, "characterbreaker", {0, 5, -1}, | |
303 | {3, 3}}, | |
304 | {"foxy fox", "fox", NULL, UCOL_TERTIARY, "wordbreaker", {5, -1}, {3}}, | |
305 | {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, | |
306 | "characterbreaker", {10, 14, -1}, {3, 2}}, | |
307 | {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, "wordbreaker", | |
308 | {10, -1}, {3}}, | |
309 | {"Channel, another channel, more channels, and one last Channel", | |
310 | "Channel", "es", UCOL_TERTIARY, "wordbreaker", {0, 54, -1}, {7, 7}}, | |
311 | /* jitterbug 1745 */ | |
312 | {"testing that \\u00e9 does not match e", "e", NULL, UCOL_TERTIARY, | |
313 | "characterbreaker", {1, 17, 30, -1}, {1, 1, 1}}, | |
314 | {"testing that string ab\\u00e9cd does not match e", "e", NULL, | |
315 | UCOL_TERTIARY, "characterbreaker", {1, 28, 41, -1}, {1, 1, 1}}, | |
316 | {"\\u00c9", "e", "fr", UCOL_PRIMARY, "characterbreaker", {0, -1}, {1}}, | |
317 | {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} | |
318 | }; | |
319 | ||
320 | static const SearchData STRENGTHCANONICAL[] = { | |
321 | /*012345678901234567890123456789012345678901234567890123456789 */ | |
322 | {"The quick brown fox jumps over the lazy foxes", "fox", "en", | |
323 | UCOL_PRIMARY, NULL, {16, 40, -1}, {3, 3}}, | |
324 | {"The quick brown fox jumps over the lazy foxes", "fox", "en", | |
325 | UCOL_PRIMARY, "wordbreaker", {16, -1}, {3}}, | |
326 | {"blackbirds Pat p\\u00E9ch\\u00E9 p\\u00EAche p\\u00E9cher p\\u00EAcher Tod T\\u00F6ne black Tofu blackbirds Ton PAT toehold blackbird black-bird pat toe big Toe", | |
327 | "peche", "fr", UCOL_PRIMARY, NULL, {15, 21, 27, 34, -1}, {5, 5, 5, 5}}, | |
328 | {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, NULL, | |
329 | {10, 14, -1}, {3, 2}}, | |
330 | {"A channel, another CHANNEL, more Channels, and one last channel...", | |
331 | "channel", "es", UCOL_PRIMARY, NULL, {2, 19, 33, 56, -1}, | |
332 | {7, 7, 7, 7}}, | |
333 | {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} | |
334 | }; | |
335 | ||
336 | static const SearchData VARIABLECANONICAL[] = { | |
337 | /*012345678901234567890123456789012345678901234567890123456789 */ | |
338 | {"blackbirds black blackbirds blackbird black-bird", | |
339 | "blackbird", NULL, UCOL_TERTIARY, NULL, {0, 17, 28, 38, -1}, | |
340 | {9, 9, 9, 10}}, | |
341 | /* to see that it doesn't go into an infinite loop if the start of text | |
342 | is a ignorable character */ | |
343 | {" on", "go", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, | |
344 | {"abcdefghijklmnopqrstuvwxyz", " ", NULL, UCOL_PRIMARY, NULL, | |
345 | {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, | |
346 | 20, 21, 22, 23, 24, 25, -1}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
347 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, | |
348 | /* testing tightest match */ | |
349 | {" abc a bc ab c a bc ab c", "abc", NULL, UCOL_QUATERNARY, | |
350 | NULL, {1, -1}, {3}}, | |
351 | /*012345678901234567890123456789012345678901234567890123456789 */ | |
352 | {" abc a bc ab c a bc ab c", "abc", NULL, UCOL_SECONDARY, | |
353 | NULL, {1, 6, 13, 21, 31, -1}, {3, 4, 4, 5, 5}}, | |
354 | /* totally ignorable text */ | |
355 | {" ---------------", "abc", NULL, UCOL_SECONDARY, | |
356 | NULL, {-1}, {0}}, | |
357 | {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} | |
358 | }; | |
359 | ||
360 | static const SearchData OVERLAPCANONICAL[] = { | |
361 | {"abababab", "abab", NULL, UCOL_TERTIARY, NULL, {0, 2, 4, -1}, | |
362 | {4, 4, 4}}, | |
363 | {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} | |
364 | }; | |
365 | ||
366 | static const SearchData NONOVERLAPCANONICAL[] = { | |
367 | {"abababab", "abab", NULL, UCOL_TERTIARY, NULL, {0, 4, -1}, {4, 4}}, | |
368 | {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} | |
369 | }; | |
370 | ||
371 | static const SearchData COLLATORCANONICAL[] = { | |
372 | /* english */ | |
373 | {"fox fpx", "fox", NULL, UCOL_TERTIARY, NULL, {0, -1}, {3}}, | |
374 | /* tailored */ | |
375 | {"fox fpx", "fox", NULL, UCOL_PRIMARY, NULL, {0, 4, -1}, {3, 3}}, | |
376 | {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} | |
377 | }; | |
378 | ||
379 | static const SearchData PATTERNCANONICAL[] = { | |
380 | {"The quick brown fox jumps over the lazy foxes", "the", NULL, | |
381 | UCOL_PRIMARY, NULL, {0, 31, -1}, {3, 3}}, | |
382 | {"The quick brown fox jumps over the lazy foxes", "fox", NULL, | |
383 | UCOL_PRIMARY, NULL, {16, 40, -1}, {3, 3}}, | |
384 | {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} | |
385 | }; | |
386 | ||
387 | static const SearchData TEXTCANONICAL[] = { | |
388 | {"the foxy brown fox", "fox", NULL, UCOL_TERTIARY, NULL, {4, 15, -1}, | |
389 | {3, 3}}, | |
390 | {"the quick brown fox", "fox", NULL, UCOL_TERTIARY, NULL, {16, -1}, | |
391 | {3}}, | |
392 | {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} | |
393 | }; | |
394 | ||
395 | static const SearchData COMPOSITEBOUNDARIESCANONICAL[] = { | |
396 | {"\\u00C0", "A", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}}, | |
397 | {"A\\u00C0C", "A", NULL, UCOL_TERTIARY, NULL, {0, 1, -1}, {1, 1}}, | |
398 | {"\\u00C0A", "A", NULL, UCOL_TERTIARY, NULL, {0, 1, -1}, {1, 1}}, | |
399 | {"B\\u00C0", "A", NULL, UCOL_TERTIARY, NULL, {1, -1}, {1}}, | |
400 | {"\\u00C0B", "A", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}}, | |
401 | {"\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}}, | |
402 | {"\\u0300\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, NULL, {0, 1, -1}, | |
403 | {1, 1}}, | |
404 | /* \\u0300 blocked by \\u0300 */ | |
405 | {"\\u00C0\\u0300", "\\u0300", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}}, | |
406 | /* A + 030A + 0301 */ | |
407 | {"\\u01FA", "\\u01FA", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}}, | |
408 | {"\\u01FA", "\\u030A", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}}, | |
409 | {"\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}}, | |
410 | {"\\u01FA", "\\u030AA", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, | |
411 | {"\\u01FA", "\\u0301", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}}, | |
412 | /* blocked accent */ | |
413 | {"\\u01FA", "A\\u0301", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, | |
414 | {"\\u01FA", "\\u0301A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, | |
415 | {"\\u01FA", "\\u030A\\u0301", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}}, | |
416 | {"A\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, NULL, {1, -1}, {1}}, | |
417 | {"\\u01FAA", "\\u0301A", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}}, | |
418 | {"\\u0F73", "\\u0F73", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}}, | |
419 | {"\\u0F73", "\\u0F71", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}}, | |
420 | {"\\u0F73", "\\u0F72", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}}, | |
421 | {"\\u0F73", "\\u0F71\\u0F72", NULL, UCOL_TERTIARY, NULL, {0, -1}, {1}}, | |
422 | {"A\\u0F73", "A\\u0F71", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}}, | |
423 | {"\\u0F73A", "\\u0F72A", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}}, | |
424 | {"\\u01FA A\\u0301\\u030A A\\u030A\\u0301 A\\u030A \\u01FA", "A\\u030A", | |
425 | NULL, UCOL_TERTIARY, NULL, {0, 6, 10, 13, -1}, {1, 3, 2, 1}}, | |
426 | {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} | |
427 | }; | |
428 | ||
429 | static const SearchData MATCHCANONICAL[] = { | |
430 | {"a busy bee is a very busy beeee", "bee", NULL, UCOL_TERTIARY, NULL, | |
431 | {7, 26, -1}, {3, 3}}, | |
432 | /*012345678901234567890123456789012345678901234567890 */ | |
433 | {"a busy bee is a very busy beeee with no bee life", "bee", NULL, | |
434 | UCOL_TERTIARY, NULL, {7, 26, 40, -1}, {3, 3, 3}}, | |
435 | {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} | |
436 | }; | |
437 | ||
438 | static const SearchData SUPPLEMENTARYCANONICAL[] = { | |
439 | /*012345678901234567890123456789012345678901234567890012345678901234567890123456789012345678901234567890012345678901234567890123456789 */ | |
440 | {"abc \\uD800\\uDC00 \\uD800\\uDC01 \\uD801\\uDC00 \\uD800\\uDC00abc abc\\uD800\\uDC00 \\uD800\\uD800\\uDC00 \\uD800\\uDC00\\uDC00", | |
441 | "\\uD800\\uDC00", NULL, UCOL_TERTIARY, NULL, {4, 13, 22, 26, 29, -1}, | |
442 | {2, 2, 2, 2, 2}}, | |
374ca955 A |
443 | {"and\\uD834\\uDDB9this sentence", "\\uD834\\uDDB9", NULL, |
444 | UCOL_TERTIARY, NULL, {3, -1}, {2}}, | |
445 | {"and \\uD834\\uDDB9 this sentence", " \\uD834\\uDDB9 ", NULL, | |
446 | UCOL_TERTIARY, NULL, {3, -1}, {4}}, | |
447 | {"and-\\uD834\\uDDB9-this sentence", "-\\uD834\\uDDB9-", NULL, | |
448 | UCOL_TERTIARY, NULL, {3, -1}, {4}}, | |
449 | {"and,\\uD834\\uDDB9,this sentence", ",\\uD834\\uDDB9,", NULL, | |
450 | UCOL_TERTIARY, NULL, {3, -1}, {4}}, | |
451 | {"and?\\uD834\\uDDB9?this sentence", "?\\uD834\\uDDB9?", NULL, | |
452 | UCOL_TERTIARY, NULL, {3, -1}, {4}}, | |
b75a7d8f A |
453 | {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} |
454 | }; | |
455 | ||
456 | static const SearchData CONTRACTIONCANONICAL[] = { | |
457 | /* common discontiguous */ | |
458 | {"A\\u0300\\u0315", "\\u0300", NULL, UCOL_TERTIARY, NULL, {1, -1}, {2}}, | |
459 | {"A\\u0300\\u0315", "\\u0300\\u0315", NULL, UCOL_TERTIARY, NULL, {1, -1}, | |
460 | {2}}, | |
461 | /* contraction prefix */ | |
462 | {"AB\\u0315C", "A", NULL, UCOL_TERTIARY, NULL, {-1}, {0}}, | |
463 | {"AB\\u0315C", "AB", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}}, | |
464 | {"AB\\u0315C", "\\u0315", NULL, UCOL_TERTIARY, NULL, {2, -1}, {1}}, | |
465 | /* discontiguous problem here for backwards iteration. | |
466 | forwards gives 0, 4 but backwards give 1, 3 */ | |
467 | /* {"X\\u0300\\u0319\\u0315", "\\u0319", NULL, UCOL_TERTIARY, NULL, {0, -1}, | |
468 | {4}}, */ | |
469 | ||
470 | /* ends not with a contraction character */ | |
471 | {"X\\u0315\\u0300D", "\\u0300\\u0315", NULL, UCOL_TERTIARY, NULL, {-1}, | |
472 | {0}}, | |
473 | {"X\\u0315\\u0300D", "X\\u0300\\u0315", NULL, UCOL_TERTIARY, NULL, | |
474 | {0, -1}, {3}}, | |
475 | {"X\\u0300\\u031A\\u0315D", "X\\u0300", NULL, UCOL_TERTIARY, NULL, | |
476 | {0, -1}, {4}}, | |
477 | /* blocked discontiguous */ | |
478 | {"X\\u0300\\u031A\\u0315D", "\\u031A\\u0315D", NULL, UCOL_TERTIARY, NULL, | |
479 | {1, -1}, {4}}, | |
480 | {"ab", "z", NULL, UCOL_TERTIARY, NULL, {0, -1}, {2}}, | |
481 | {NULL, NULL, NULL, UCOL_TERTIARY, NULL, {-1}, {0}} | |
482 | }; | |
483 | ||
484 | #endif /* #if !UCONFIG_NO_COLLATION */ | |
485 | ||
486 | #endif |