1 /********************************************************************
2 * Copyright (c) 2001-2003 International Business Machines
3 * Corporation and others. All Rights Reserved.
4 ********************************************************************
6 * Modification History:
7 * Name date Description
8 * synwee July 31 2001 creation
9 ********************************************************************/
14 #include "unicode/utypes.h"
16 #if !UCONFIG_NO_COLLATION
18 #include "unicode/ucol.h"
24 UCollationStrength strength
;
30 typedef struct SearchData SearchData
;
32 static const char *TESTCOLLATORRULE
= "& o,O ; p,P";
34 static const char *EXTRACOLLATIONRULE
= " & ae ; \\u00e4 & AE ; \\u00c4 & oe ; \\u00f6 & OE ; \\u00d6 & ue ; \\u00fc & UE ; \\u00dc";
36 static const SearchData BASIC
[] = {
37 {"xxxxxxxxxxxxxxxxxxxx", "fisher", NULL
, UCOL_TERTIARY
, NULL
, {-1}, {0}},
38 {"silly spring string", "string", NULL
, UCOL_TERTIARY
, NULL
, {13, -1},
40 {"silly spring string string", "string", NULL
, UCOL_TERTIARY
, NULL
,
41 {13, 20, -1}, {6, 6}},
42 {"silly string spring string", "string", NULL
, UCOL_TERTIARY
, NULL
,
44 {"string spring string", "string", NULL
, UCOL_TERTIARY
, NULL
, {0, 14, -1},
46 {"Scott Ganyo", "c", NULL
, UCOL_TERTIARY
, NULL
, {1, -1}, {1}},
47 {"Scott Ganyo", " ", NULL
, UCOL_TERTIARY
, NULL
, {5, -1}, {1}},
48 {"\\u0300\\u0325", "\\u0300", NULL
, UCOL_TERTIARY
, NULL
, {-1}, {0}},
49 {"a\\u0300\\u0325", "\\u0300", NULL
, UCOL_TERTIARY
, NULL
, {-1}, {0}},
50 {"a\\u0300\\u0325", "\\u0300\\u0325", NULL
, UCOL_TERTIARY
, NULL
, {1, -1},
52 {"a\\u0300b", "\\u0300", NULL
, UCOL_TERTIARY
, NULL
, {1, -1}, {1}},
53 {"\\u00c9", "e", NULL
, UCOL_PRIMARY
, NULL
, {0, -1}, {1}},
54 {NULL
, NULL
, NULL
, UCOL_TERTIARY
, NULL
, {-1}, {0}}
57 static const SearchData BREAKITERATOREXACT
[] = {
58 {"foxy fox", "fox", NULL
, UCOL_TERTIARY
, "characterbreaker", {0, 5, -1},
60 {"foxy fox", "fox", NULL
, UCOL_TERTIARY
, "wordbreaker", {5, -1}, {3}},
61 {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY
,
62 "characterbreaker", {10, 14, -1}, {3, 2}},
63 {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY
, "wordbreaker",
65 {"Channel, another channel, more channels, and one last Channel",
66 "Channel", "es", UCOL_TERTIARY
, "wordbreaker", {0, 54, -1}, {7, 7}},
68 {"testing that \\u00e9 does not match e", "e", NULL
, UCOL_TERTIARY
,
69 "characterbreaker", {1, 17, 30, -1}, {1, 1, 1}},
70 {"testing that string ab\\u00e9cd does not match e", "e", NULL
,
71 UCOL_TERTIARY
, "characterbreaker", {1, 28, 41, -1}, {1, 1, 1}},
72 {"\\u00c9", "e", "fr", UCOL_PRIMARY
, "characterbreaker", {0, -1}, {1}},
73 {NULL
, NULL
, NULL
, UCOL_TERTIARY
, NULL
, {-1}, {0}}
76 static const SearchData STRENGTH
[] = {
77 /*012345678901234567890123456789012345678901234567890123456789*/
78 {"The quick brown fox jumps over the lazy foxes", "fox", "en",
79 UCOL_PRIMARY
, NULL
, {16, 40, -1}, {3, 3}},
80 {"The quick brown fox jumps over the lazy foxes", "fox", "en",
81 UCOL_PRIMARY
, "wordbreaker", {16, -1}, {3}},
82 {"blackbirds Pat p\\u00E9ch\\u00E9 p\\u00EAche p\\u00E9cher p\\u00EAcher Tod T\\u00F6ne black Tofu blackbirds Ton PAT toehold blackbird black-bird pat toe big Toe",
83 "peche", "fr", UCOL_PRIMARY
, NULL
, {15, 21, 27, 34, -1}, {5, 5, 5, 5}},
84 {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY
, NULL
,
85 {10, 14, -1}, {3, 2}},
86 {"A channel, another CHANNEL, more Channels, and one last channel...",
87 "channel", "es", UCOL_PRIMARY
, NULL
, {2, 19, 33, 56, -1},
89 {"\\u00c0 should match but not A", "A\\u0300", "en", UCOL_IDENTICAL
,
90 NULL
, {0, -1}, {1, 0}},
91 {NULL
, NULL
, NULL
, UCOL_TERTIARY
, NULL
, {-1}, {0}}
94 static const SearchData VARIABLE
[] = {
95 /*012345678901234567890123456789012345678901234567890123456789*/
96 {"blackbirds black blackbirds blackbird black-bird",
97 "blackbird", NULL
, UCOL_TERTIARY
, NULL
, {0, 17, 28, 38, -1},
99 /* to see that it doesn't go into an infinite loop if the start of text
100 is a ignorable character */
101 {" on", "go", NULL
, UCOL_TERTIARY
, NULL
, {-1}, {0}},
102 {"abcdefghijklmnopqrstuvwxyz", " ", NULL
, UCOL_PRIMARY
, NULL
,
103 {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
104 20, 21, 22, 23, 24, 25, -1}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
105 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}},
106 /* testing tightest match */
107 {" abc a bc ab c a bc ab c", "abc", NULL
, UCOL_QUATERNARY
,
109 /*012345678901234567890123456789012345678901234567890123456789 */
110 {" abc a bc ab c a bc ab c", "abc", NULL
, UCOL_SECONDARY
,
111 NULL
, {1, 6, 13, 21, 31, -1}, {3, 4, 4, 5, 5}},
112 /* totally ignorable text */
113 {" ---------------", "abc", NULL
, UCOL_SECONDARY
,
115 {NULL
, NULL
, NULL
, UCOL_TERTIARY
, NULL
, {-1}, {0}}
118 static const SearchData NORMEXACT
[] = {
119 {"a\\u0300\\u0325", "\\u0325\\u0300", NULL
, UCOL_TERTIARY
, NULL
, {1, -1},
121 {NULL
, NULL
, NULL
, UCOL_TERTIARY
, NULL
, {-1}, {0}}
124 static const SearchData NONNORMEXACT
[] = {
125 {"a\\u0300\\u0325", "\\u0325\\u0300", NULL
, UCOL_TERTIARY
, NULL
, {-1},
127 {NULL
, NULL
, NULL
, UCOL_TERTIARY
, NULL
, {-1}, {0}}
130 static const SearchData OVERLAP
[] = {
131 {"abababab", "abab", NULL
, UCOL_TERTIARY
, NULL
, {0, 2, 4, -1},
133 {NULL
, NULL
, NULL
, UCOL_TERTIARY
, NULL
, {-1}, {0}}
136 static const SearchData NONOVERLAP
[] = {
137 {"abababab", "abab", NULL
, UCOL_TERTIARY
, NULL
, {0, 4, -1}, {4, 4}},
138 {NULL
, NULL
, NULL
, UCOL_TERTIARY
, NULL
, {-1}, {0}}
141 static const SearchData COLLATOR
[] = {
143 {"fox fpx", "fox", NULL
, UCOL_TERTIARY
, NULL
, {0, -1}, {3}},
145 {"fox fpx", "fox", NULL
, UCOL_PRIMARY
, NULL
, {0, 4, -1}, {3, 3}},
146 {NULL
, NULL
, NULL
, UCOL_TERTIARY
, NULL
, {-1}, {0}}
149 static const SearchData PATTERN
[] = {
150 {"The quick brown fox jumps over the lazy foxes", "the", NULL
,
151 UCOL_PRIMARY
, NULL
, {0, 31, -1}, {3, 3}},
152 {"The quick brown fox jumps over the lazy foxes", "fox", NULL
,
153 UCOL_PRIMARY
, NULL
, {16, 40, -1}, {3, 3}},
154 {NULL
, NULL
, NULL
, UCOL_TERTIARY
, NULL
, {-1}, {0}}
157 static const SearchData TEXT
[] = {
158 {"the foxy brown fox", "fox", NULL
, UCOL_TERTIARY
, NULL
, {4, 15, -1},
160 {"the quick brown fox", "fox", NULL
, UCOL_TERTIARY
, NULL
, {16, -1},
162 {NULL
, NULL
, NULL
, UCOL_TERTIARY
, NULL
, {-1}, {0}}
165 static const SearchData COMPOSITEBOUNDARIES
[] = {
166 {"\\u00C0", "A", NULL
, UCOL_TERTIARY
, NULL
, {0, -1}, {1}},
167 {"A\\u00C0C", "A", NULL
, UCOL_TERTIARY
, NULL
, {0, 1, -1}, {1, 1}},
168 {"\\u00C0A", "A", NULL
, UCOL_TERTIARY
, NULL
, {0, 1, -1}, {1, 1}},
169 {"B\\u00C0", "A", NULL
, UCOL_TERTIARY
, NULL
, {1, -1}, {1}},
170 {"\\u00C0B", "A", NULL
, UCOL_TERTIARY
, NULL
, {0, -1}, {1}},
171 {"\\u00C0", "\\u0300", NULL
, UCOL_TERTIARY
, NULL
, {0, -1}, {1}},
172 {"\\u0300\\u00C0", "\\u0300", NULL
, UCOL_TERTIARY
, NULL
, {0, 1, -1},
174 {"\\u00C0\\u0300", "\\u0300", NULL
, UCOL_TERTIARY
, NULL
, {-1}, {0}},
175 /* A + 030A + 0301 */
176 {"\\u01FA", "\\u01FA", NULL
, UCOL_TERTIARY
, NULL
, {0, -1}, {1}},
177 {"\\u01FA", "\\u030A", NULL
, UCOL_TERTIARY
, NULL
, {-1}, {0}},
178 {"\\u01FA", "A\\u030A", NULL
, UCOL_TERTIARY
, NULL
, {-1}, {0}},
179 {"\\u01FA", "\\u030AA", NULL
, UCOL_TERTIARY
, NULL
, {-1}, {0}},
180 {"\\u01FA", "\\u0301", NULL
, UCOL_TERTIARY
, NULL
, {-1}, {0}},
181 {"\\u01FA", "A\\u0301", NULL
, UCOL_TERTIARY
, NULL
, {-1}, {0}},
182 {"\\u01FA", "\\u0301A", NULL
, UCOL_TERTIARY
, NULL
, {-1}, {0}},
183 {"\\u01FA", "\\u030A\\u0301", NULL
, UCOL_TERTIARY
, NULL
, {0, -1}, {1}},
184 {"A\\u01FA", "A\\u030A", NULL
, UCOL_TERTIARY
, NULL
, {-1}, {0}},
185 {"\\u01FAA", "\\u0301A", NULL
, UCOL_TERTIARY
, NULL
, {-1}, {0}},
186 {"\\u0F73", "\\u0F73", NULL
, UCOL_TERTIARY
, NULL
, {0, -1}, {1}},
187 {"\\u0F73", "\\u0F71", NULL
, UCOL_TERTIARY
, NULL
, {-1}, {0}},
188 {"\\u0F73", "\\u0F72", NULL
, UCOL_TERTIARY
, NULL
, {-1}, {0}},
189 {"\\u0F73", "\\u0F71\\u0F72", NULL
, UCOL_TERTIARY
, NULL
, {0, -1}, {1}},
190 {"A\\u0F73", "A\\u0F71", NULL
, UCOL_TERTIARY
, NULL
, {-1}, {0}},
191 {"\\u0F73A", "\\u0F72A", NULL
, UCOL_TERTIARY
, NULL
, {-1}, {0}},
192 {NULL
, NULL
, NULL
, UCOL_TERTIARY
, NULL
, {-1}, {0}}
195 static const SearchData MATCH
[] = {
196 {"a busy bee is a very busy beeee", "bee", NULL
, UCOL_TERTIARY
, NULL
,
197 {7, 26, -1}, {3, 3}},
198 /* 012345678901234567890123456789012345678901234567890 */
199 {"a busy bee is a very busy beeee with no bee life", "bee", NULL
,
200 UCOL_TERTIARY
, NULL
, {7, 26, 40, -1}, {3, 3, 3}},
201 {NULL
, NULL
, NULL
, UCOL_TERTIARY
, NULL
, {-1}, {0}}
204 static const SearchData SUPPLEMENTARY
[] = {
205 /* 012345678901234567890123456789012345678901234567890012345678901234567890123456789012345678901234567890012345678901234567890123456789 */
206 {"abc \\uD800\\uDC00 \\uD800\\uDC01 \\uD801\\uDC00 \\uD800\\uDC00abc abc\\uD800\\uDC00 \\uD800\\uD800\\uDC00 \\uD800\\uDC00\\uDC00",
207 "\\uD800\\uDC00", NULL
, UCOL_TERTIARY
, NULL
, {4, 13, 22, 26, 29, -1},
209 {"and\\uD834\\uDDB9this sentence", "\\uD834\\uDDB9", NULL
,
210 UCOL_TERTIARY
, NULL
, {3, -1}, {2}},
211 {"and \\uD834\\uDDB9 this sentence", " \\uD834\\uDDB9 ", NULL
,
212 UCOL_TERTIARY
, NULL
, {3, -1}, {4}},
213 {"and-\\uD834\\uDDB9-this sentence", "-\\uD834\\uDDB9-", NULL
,
214 UCOL_TERTIARY
, NULL
, {3, -1}, {4}},
215 {"and,\\uD834\\uDDB9,this sentence", ",\\uD834\\uDDB9,", NULL
,
216 UCOL_TERTIARY
, NULL
, {3, -1}, {4}},
217 {"and?\\uD834\\uDDB9?this sentence", "?\\uD834\\uDDB9?", NULL
,
218 UCOL_TERTIARY
, NULL
, {3, -1}, {4}},
219 {NULL
, NULL
, NULL
, UCOL_TERTIARY
, NULL
, {-1}, {0}}
222 static const char *CONTRACTIONRULE
=
223 "&z = ab/c < AB < X\\u0300 < ABC < X\\u0300\\u0315";
225 static const SearchData CONTRACTION
[] = {
226 /* common discontiguous */
227 {"A\\u0300\\u0315", "\\u0300", NULL
, UCOL_TERTIARY
, NULL
, {-1}, {0}},
228 {"A\\u0300\\u0315", "\\u0300\\u0315", NULL
, UCOL_TERTIARY
, NULL
, {1, -1},
230 /* contraction prefix */
231 {"AB\\u0315C", "A", NULL
, UCOL_TERTIARY
, NULL
, {-1}, {0}},
232 {"AB\\u0315C", "AB", NULL
, UCOL_TERTIARY
, NULL
, {0, -1}, {2}},
233 {"AB\\u0315C", "\\u0315", NULL
, UCOL_TERTIARY
, NULL
, {2, -1}, {1}},
234 /* discontiguous problem here for backwards iteration.
235 accents not found because discontiguous stores all information */
236 {"X\\u0300\\u0319\\u0315", "\\u0319", NULL
, UCOL_TERTIARY
, NULL
, {-1},
238 /* ends not with a contraction character */
239 {"X\\u0315\\u0300D", "\\u0300\\u0315", NULL
, UCOL_TERTIARY
, NULL
, {-1},
241 {"X\\u0315\\u0300D", "X\\u0300\\u0315", NULL
, UCOL_TERTIARY
, NULL
,
243 {"X\\u0300\\u031A\\u0315D", "X\\u0300", NULL
, UCOL_TERTIARY
, NULL
, {-1},
245 /* blocked discontiguous */
246 {"X\\u0300\\u031A\\u0315D", "\\u031A\\u0315D", NULL
, UCOL_TERTIARY
, NULL
,
248 {"ab", "z", NULL
, UCOL_TERTIARY
, NULL
, {0, -1}, {2}},
249 {NULL
, NULL
, NULL
, UCOL_TERTIARY
, NULL
, {-1}, {0}}
252 static const char *IGNORABLERULE
= "&a = \\u0300";
254 static const SearchData IGNORABLE
[] = {
255 {"\\u0315\\u0300 \\u0315\\u0300\\u0315 ", "\\u0300", NULL
, UCOL_PRIMARY
, NULL
,
257 {NULL
, NULL
, NULL
, UCOL_TERTIARY
, NULL
, {-1}, {0}}
260 static const SearchData BASICCANONICAL
[] = {
261 {"xxxxxxxxxxxxxxxxxxxx", "fisher", NULL
, UCOL_TERTIARY
, NULL
, {-1}, {0}},
262 {"silly spring string", "string", NULL
, UCOL_TERTIARY
, NULL
, {13, -1},
264 {"silly spring string string", "string", NULL
, UCOL_TERTIARY
, NULL
,
265 {13, 20, -1}, {6, 6}},
266 {"silly string spring string", "string", NULL
, UCOL_TERTIARY
, NULL
,
267 {6, 20, -1}, {6, 6}},
268 {"string spring string", "string", NULL
, UCOL_TERTIARY
, NULL
, {0, 14, -1},
270 {"Scott Ganyo", "c", NULL
, UCOL_TERTIARY
, NULL
, {1, -1}, {1}},
271 {"Scott Ganyo", " ", NULL
, UCOL_TERTIARY
, NULL
, {5, -1}, {1}},
272 {"\\u0300\\u0325", "\\u0300", NULL
, UCOL_TERTIARY
, NULL
, {0, -1}, {2}},
273 {"a\\u0300\\u0325", "\\u0300", NULL
, UCOL_TERTIARY
, NULL
, {1, -1}, {2}},
274 {"a\\u0300\\u0325", "\\u0300\\u0325", NULL
, UCOL_TERTIARY
, NULL
, {1, -1},
276 {"a\\u0300b", "\\u0300", NULL
, UCOL_TERTIARY
, NULL
, {1, -1}, {1}},
277 {"a\\u0300\\u0325b", "\\u0300b", NULL
, UCOL_TERTIARY
, NULL
, {1, -1}, {3}},
278 {"\\u0325\\u0300A\\u0325\\u0300", "\\u0300A\\u0300", NULL
, UCOL_TERTIARY
,
280 {"\\u0325\\u0300A\\u0325\\u0300", "\\u0325A\\u0325", NULL
, UCOL_TERTIARY
,
282 {"a\\u0300\\u0325b\\u0300\\u0325c \\u0325b\\u0300 \\u0300b\\u0325",
283 "\\u0300b\\u0325", NULL
, UCOL_TERTIARY
, NULL
, {1, 12, -1}, {5, 3}},
284 {"\\u00c4\\u0323", "A\\u0323\\u0308", NULL
, UCOL_TERTIARY
, NULL
, {0, -1}, {2}},
285 {"\\u0308\\u0323", "\\u0323\\u0308", NULL
, UCOL_TERTIARY
, NULL
, {0, -1}, {2}},
286 {NULL
, NULL
, NULL
, UCOL_TERTIARY
, NULL
, {-1}, {0}}
289 static const SearchData NORMCANONICAL
[] = {
290 {"\\u0300\\u0325", "\\u0300", NULL
, UCOL_TERTIARY
, NULL
, {0, -1}, {2}},
291 {"\\u0300\\u0325", "\\u0325", NULL
, UCOL_TERTIARY
, NULL
, {0, -1}, {2}},
292 {"a\\u0300\\u0325", "\\u0325\\u0300", NULL
, UCOL_TERTIARY
, NULL
, {1, -1},
294 {"a\\u0300\\u0325", "\\u0300\\u0325", NULL
, UCOL_TERTIARY
, NULL
, {1, -1},
296 {"a\\u0300\\u0325", "\\u0325", NULL
, UCOL_TERTIARY
, NULL
, {1, -1}, {2}},
297 {"a\\u0300\\u0325", "\\u0300", NULL
, UCOL_TERTIARY
, NULL
, {1, -1}, {2}},
298 {NULL
, NULL
, NULL
, UCOL_TERTIARY
, NULL
, {-1}, {0}}
301 static const SearchData BREAKITERATORCANONICAL
[] = {
302 {"foxy fox", "fox", NULL
, UCOL_TERTIARY
, "characterbreaker", {0, 5, -1},
304 {"foxy fox", "fox", NULL
, UCOL_TERTIARY
, "wordbreaker", {5, -1}, {3}},
305 {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY
,
306 "characterbreaker", {10, 14, -1}, {3, 2}},
307 {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY
, "wordbreaker",
309 {"Channel, another channel, more channels, and one last Channel",
310 "Channel", "es", UCOL_TERTIARY
, "wordbreaker", {0, 54, -1}, {7, 7}},
312 {"testing that \\u00e9 does not match e", "e", NULL
, UCOL_TERTIARY
,
313 "characterbreaker", {1, 17, 30, -1}, {1, 1, 1}},
314 {"testing that string ab\\u00e9cd does not match e", "e", NULL
,
315 UCOL_TERTIARY
, "characterbreaker", {1, 28, 41, -1}, {1, 1, 1}},
316 {"\\u00c9", "e", "fr", UCOL_PRIMARY
, "characterbreaker", {0, -1}, {1}},
317 {NULL
, NULL
, NULL
, UCOL_TERTIARY
, NULL
, {-1}, {0}}
320 static const SearchData STRENGTHCANONICAL
[] = {
321 /*012345678901234567890123456789012345678901234567890123456789 */
322 {"The quick brown fox jumps over the lazy foxes", "fox", "en",
323 UCOL_PRIMARY
, NULL
, {16, 40, -1}, {3, 3}},
324 {"The quick brown fox jumps over the lazy foxes", "fox", "en",
325 UCOL_PRIMARY
, "wordbreaker", {16, -1}, {3}},
326 {"blackbirds Pat p\\u00E9ch\\u00E9 p\\u00EAche p\\u00E9cher p\\u00EAcher Tod T\\u00F6ne black Tofu blackbirds Ton PAT toehold blackbird black-bird pat toe big Toe",
327 "peche", "fr", UCOL_PRIMARY
, NULL
, {15, 21, 27, 34, -1}, {5, 5, 5, 5}},
328 {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY
, NULL
,
329 {10, 14, -1}, {3, 2}},
330 {"A channel, another CHANNEL, more Channels, and one last channel...",
331 "channel", "es", UCOL_PRIMARY
, NULL
, {2, 19, 33, 56, -1},
333 {NULL
, NULL
, NULL
, UCOL_TERTIARY
, NULL
, {-1}, {0}}
336 static const SearchData VARIABLECANONICAL
[] = {
337 /*012345678901234567890123456789012345678901234567890123456789 */
338 {"blackbirds black blackbirds blackbird black-bird",
339 "blackbird", NULL
, UCOL_TERTIARY
, NULL
, {0, 17, 28, 38, -1},
341 /* to see that it doesn't go into an infinite loop if the start of text
342 is a ignorable character */
343 {" on", "go", NULL
, UCOL_TERTIARY
, NULL
, {-1}, {0}},
344 {"abcdefghijklmnopqrstuvwxyz", " ", NULL
, UCOL_PRIMARY
, NULL
,
345 {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
346 20, 21, 22, 23, 24, 25, -1}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
347 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}},
348 /* testing tightest match */
349 {" abc a bc ab c a bc ab c", "abc", NULL
, UCOL_QUATERNARY
,
351 /*012345678901234567890123456789012345678901234567890123456789 */
352 {" abc a bc ab c a bc ab c", "abc", NULL
, UCOL_SECONDARY
,
353 NULL
, {1, 6, 13, 21, 31, -1}, {3, 4, 4, 5, 5}},
354 /* totally ignorable text */
355 {" ---------------", "abc", NULL
, UCOL_SECONDARY
,
357 {NULL
, NULL
, NULL
, UCOL_TERTIARY
, NULL
, {-1}, {0}}
360 static const SearchData OVERLAPCANONICAL
[] = {
361 {"abababab", "abab", NULL
, UCOL_TERTIARY
, NULL
, {0, 2, 4, -1},
363 {NULL
, NULL
, NULL
, UCOL_TERTIARY
, NULL
, {-1}, {0}}
366 static const SearchData NONOVERLAPCANONICAL
[] = {
367 {"abababab", "abab", NULL
, UCOL_TERTIARY
, NULL
, {0, 4, -1}, {4, 4}},
368 {NULL
, NULL
, NULL
, UCOL_TERTIARY
, NULL
, {-1}, {0}}
371 static const SearchData COLLATORCANONICAL
[] = {
373 {"fox fpx", "fox", NULL
, UCOL_TERTIARY
, NULL
, {0, -1}, {3}},
375 {"fox fpx", "fox", NULL
, UCOL_PRIMARY
, NULL
, {0, 4, -1}, {3, 3}},
376 {NULL
, NULL
, NULL
, UCOL_TERTIARY
, NULL
, {-1}, {0}}
379 static const SearchData PATTERNCANONICAL
[] = {
380 {"The quick brown fox jumps over the lazy foxes", "the", NULL
,
381 UCOL_PRIMARY
, NULL
, {0, 31, -1}, {3, 3}},
382 {"The quick brown fox jumps over the lazy foxes", "fox", NULL
,
383 UCOL_PRIMARY
, NULL
, {16, 40, -1}, {3, 3}},
384 {NULL
, NULL
, NULL
, UCOL_TERTIARY
, NULL
, {-1}, {0}}
387 static const SearchData TEXTCANONICAL
[] = {
388 {"the foxy brown fox", "fox", NULL
, UCOL_TERTIARY
, NULL
, {4, 15, -1},
390 {"the quick brown fox", "fox", NULL
, UCOL_TERTIARY
, NULL
, {16, -1},
392 {NULL
, NULL
, NULL
, UCOL_TERTIARY
, NULL
, {-1}, {0}}
395 static const SearchData COMPOSITEBOUNDARIESCANONICAL
[] = {
396 {"\\u00C0", "A", NULL
, UCOL_TERTIARY
, NULL
, {0, -1}, {1}},
397 {"A\\u00C0C", "A", NULL
, UCOL_TERTIARY
, NULL
, {0, 1, -1}, {1, 1}},
398 {"\\u00C0A", "A", NULL
, UCOL_TERTIARY
, NULL
, {0, 1, -1}, {1, 1}},
399 {"B\\u00C0", "A", NULL
, UCOL_TERTIARY
, NULL
, {1, -1}, {1}},
400 {"\\u00C0B", "A", NULL
, UCOL_TERTIARY
, NULL
, {0, -1}, {1}},
401 {"\\u00C0", "\\u0300", NULL
, UCOL_TERTIARY
, NULL
, {0, -1}, {1}},
402 {"\\u0300\\u00C0", "\\u0300", NULL
, UCOL_TERTIARY
, NULL
, {0, 1, -1},
404 /* \\u0300 blocked by \\u0300 */
405 {"\\u00C0\\u0300", "\\u0300", NULL
, UCOL_TERTIARY
, NULL
, {0, -1}, {2}},
406 /* A + 030A + 0301 */
407 {"\\u01FA", "\\u01FA", NULL
, UCOL_TERTIARY
, NULL
, {0, -1}, {1}},
408 {"\\u01FA", "\\u030A", NULL
, UCOL_TERTIARY
, NULL
, {0, -1}, {1}},
409 {"\\u01FA", "A\\u030A", NULL
, UCOL_TERTIARY
, NULL
, {0, -1}, {1}},
410 {"\\u01FA", "\\u030AA", NULL
, UCOL_TERTIARY
, NULL
, {-1}, {0}},
411 {"\\u01FA", "\\u0301", NULL
, UCOL_TERTIARY
, NULL
, {0, -1}, {1}},
413 {"\\u01FA", "A\\u0301", NULL
, UCOL_TERTIARY
, NULL
, {-1}, {0}},
414 {"\\u01FA", "\\u0301A", NULL
, UCOL_TERTIARY
, NULL
, {-1}, {0}},
415 {"\\u01FA", "\\u030A\\u0301", NULL
, UCOL_TERTIARY
, NULL
, {0, -1}, {1}},
416 {"A\\u01FA", "A\\u030A", NULL
, UCOL_TERTIARY
, NULL
, {1, -1}, {1}},
417 {"\\u01FAA", "\\u0301A", NULL
, UCOL_TERTIARY
, NULL
, {0, -1}, {2}},
418 {"\\u0F73", "\\u0F73", NULL
, UCOL_TERTIARY
, NULL
, {0, -1}, {1}},
419 {"\\u0F73", "\\u0F71", NULL
, UCOL_TERTIARY
, NULL
, {0, -1}, {1}},
420 {"\\u0F73", "\\u0F72", NULL
, UCOL_TERTIARY
, NULL
, {0, -1}, {1}},
421 {"\\u0F73", "\\u0F71\\u0F72", NULL
, UCOL_TERTIARY
, NULL
, {0, -1}, {1}},
422 {"A\\u0F73", "A\\u0F71", NULL
, UCOL_TERTIARY
, NULL
, {0, -1}, {2}},
423 {"\\u0F73A", "\\u0F72A", NULL
, UCOL_TERTIARY
, NULL
, {0, -1}, {2}},
424 {"\\u01FA A\\u0301\\u030A A\\u030A\\u0301 A\\u030A \\u01FA", "A\\u030A",
425 NULL
, UCOL_TERTIARY
, NULL
, {0, 6, 10, 13, -1}, {1, 3, 2, 1}},
426 {NULL
, NULL
, NULL
, UCOL_TERTIARY
, NULL
, {-1}, {0}}
429 static const SearchData MATCHCANONICAL
[] = {
430 {"a busy bee is a very busy beeee", "bee", NULL
, UCOL_TERTIARY
, NULL
,
431 {7, 26, -1}, {3, 3}},
432 /*012345678901234567890123456789012345678901234567890 */
433 {"a busy bee is a very busy beeee with no bee life", "bee", NULL
,
434 UCOL_TERTIARY
, NULL
, {7, 26, 40, -1}, {3, 3, 3}},
435 {NULL
, NULL
, NULL
, UCOL_TERTIARY
, NULL
, {-1}, {0}}
438 static const SearchData SUPPLEMENTARYCANONICAL
[] = {
439 /*012345678901234567890123456789012345678901234567890012345678901234567890123456789012345678901234567890012345678901234567890123456789 */
440 {"abc \\uD800\\uDC00 \\uD800\\uDC01 \\uD801\\uDC00 \\uD800\\uDC00abc abc\\uD800\\uDC00 \\uD800\\uD800\\uDC00 \\uD800\\uDC00\\uDC00",
441 "\\uD800\\uDC00", NULL
, UCOL_TERTIARY
, NULL
, {4, 13, 22, 26, 29, -1},
443 {"and\\uD834\\uDDB9this sentence", "\\uD834\\uDDB9", NULL
,
444 UCOL_TERTIARY
, NULL
, {3, -1}, {2}},
445 {"and \\uD834\\uDDB9 this sentence", " \\uD834\\uDDB9 ", NULL
,
446 UCOL_TERTIARY
, NULL
, {3, -1}, {4}},
447 {"and-\\uD834\\uDDB9-this sentence", "-\\uD834\\uDDB9-", NULL
,
448 UCOL_TERTIARY
, NULL
, {3, -1}, {4}},
449 {"and,\\uD834\\uDDB9,this sentence", ",\\uD834\\uDDB9,", NULL
,
450 UCOL_TERTIARY
, NULL
, {3, -1}, {4}},
451 {"and?\\uD834\\uDDB9?this sentence", "?\\uD834\\uDDB9?", NULL
,
452 UCOL_TERTIARY
, NULL
, {3, -1}, {4}},
453 {NULL
, NULL
, NULL
, UCOL_TERTIARY
, NULL
, {-1}, {0}}
456 static const SearchData CONTRACTIONCANONICAL
[] = {
457 /* common discontiguous */
458 {"A\\u0300\\u0315", "\\u0300", NULL
, UCOL_TERTIARY
, NULL
, {1, -1}, {2}},
459 {"A\\u0300\\u0315", "\\u0300\\u0315", NULL
, UCOL_TERTIARY
, NULL
, {1, -1},
461 /* contraction prefix */
462 {"AB\\u0315C", "A", NULL
, UCOL_TERTIARY
, NULL
, {-1}, {0}},
463 {"AB\\u0315C", "AB", NULL
, UCOL_TERTIARY
, NULL
, {0, -1}, {2}},
464 {"AB\\u0315C", "\\u0315", NULL
, UCOL_TERTIARY
, NULL
, {2, -1}, {1}},
465 /* discontiguous problem here for backwards iteration.
466 forwards gives 0, 4 but backwards give 1, 3 */
467 /* {"X\\u0300\\u0319\\u0315", "\\u0319", NULL, UCOL_TERTIARY, NULL, {0, -1},
470 /* ends not with a contraction character */
471 {"X\\u0315\\u0300D", "\\u0300\\u0315", NULL
, UCOL_TERTIARY
, NULL
, {-1},
473 {"X\\u0315\\u0300D", "X\\u0300\\u0315", NULL
, UCOL_TERTIARY
, NULL
,
475 {"X\\u0300\\u031A\\u0315D", "X\\u0300", NULL
, UCOL_TERTIARY
, NULL
,
477 /* blocked discontiguous */
478 {"X\\u0300\\u031A\\u0315D", "\\u031A\\u0315D", NULL
, UCOL_TERTIARY
, NULL
,
480 {"ab", "z", NULL
, UCOL_TERTIARY
, NULL
, {0, -1}, {2}},
481 {NULL
, NULL
, NULL
, UCOL_TERTIARY
, NULL
, {-1}, {0}}
484 #endif /* #if !UCONFIG_NO_COLLATION */