]> git.saurik.com Git - apple/icu.git/blame - icuSources/test/cintltst/usrchdat.c
ICU-531.48.tar.gz
[apple/icu.git] / icuSources / test / cintltst / usrchdat.c
CommitLineData
b75a7d8f 1/********************************************************************
4388f060 2 * Copyright (c) 2001-2011 International Business Machines
b75a7d8f
A
3 * Corporation and others. All Rights Reserved.
4 ********************************************************************
5 * File USRCHDAT.H
6 * Modification History:
7 * Name date Description
8 * synwee July 31 2001 creation
9 ********************************************************************/
10
46f4442e
A
11
12/*
13Note: This file is included by other C and C++ files. This file should not be directly compiled.
14*/
b75a7d8f
A
15#ifndef USRCHDAT_C
16#define USRCHDAT_C
17
46f4442e 18#include "unicode/ucol.h"
b75a7d8f
A
19
20#if !UCONFIG_NO_COLLATION
21
46f4442e
A
22/* Set to 1 if matches must be on grapheme boundaries */
23#define GRAPHEME_BOUNDARIES 1
b75a7d8f 24
46f4442e 25U_CDECL_BEGIN
b75a7d8f
A
26struct SearchData {
27 const char *text;
28 const char *pattern;
729e4ab9 29 const char *collator; /* currently supported "fr" "es" "de", plus NULL/other => "en" */
b75a7d8f 30 UCollationStrength strength;
729e4ab9
A
31 USearchAttributeValue elemCompare; /* value for the USEARCH_ELEMENT_COMPARISON attribute */
32 const char *breaker; /* currently supported "wordbreaker" for EN_WORDBREAKER_, plus NULL/other => EN_CHARACTERBREAKER_ */
46f4442e
A
33 int8_t offset[32];
34 uint8_t size[32];
b75a7d8f 35};
46f4442e 36U_CDECL_END
b75a7d8f
A
37
38typedef struct SearchData SearchData;
39
40static const char *TESTCOLLATORRULE = "& o,O ; p,P";
41
42static const char *EXTRACOLLATIONRULE = " & ae ; \\u00e4 & AE ; \\u00c4 & oe ; \\u00f6 & OE ; \\u00d6 & ue ; \\u00fc & UE ; \\u00dc";
43
44static const SearchData BASIC[] = {
729e4ab9
A
45 {"xxxxxxxxxxxxxxxxxxxx", "fisher", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
46 {"silly spring string", "string", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {13, -1},
b75a7d8f 47 {6}},
729e4ab9 48 {"silly spring string string", "string", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
b75a7d8f 49 {13, 20, -1}, {6, 6}},
729e4ab9 50 {"silly string spring string", "string", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
b75a7d8f 51 {6, 20, -1}, {6, 6}},
729e4ab9 52 {"string spring string", "string", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 14, -1},
b75a7d8f 53 {6, 6}},
729e4ab9
A
54 {"Scott Ganyo", "c", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {1}},
55 {"Scott Ganyo", " ", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {5, -1}, {1}},
56 {"\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
57 {"a\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
46f4442e
A
58
59#if GRAPHEME_BOUNDARIES
729e4ab9
A
60 {"a\\u0300\\u0325", "\\u0300\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
61 {"a\\u0300b", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
46f4442e 62#else
729e4ab9
A
63 {"a\\u0300\\u0325", "\\u0300\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {2}},
64 {"a\\u0300b", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {1}},
46f4442e
A
65#endif
66
729e4ab9 67 {"\\u00c9", "e", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}},
4388f060 68 {"x a\\u0301", "a\\u0301", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {2, -1}, {2}},
729e4ab9 69 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}
b75a7d8f
A
70};
71
72static const SearchData BREAKITERATOREXACT[] = {
729e4ab9 73 {"foxy fox", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "characterbreaker", {0, 5, -1},
b75a7d8f 74 {3, 3}},
729e4ab9
A
75 {"foxy fox", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "wordbreaker", {5, -1}, {3}},
76 {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
b75a7d8f 77 "characterbreaker", {10, 14, -1}, {3, 2}},
729e4ab9 78 {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "wordbreaker",
b75a7d8f
A
79 {10, -1}, {3}},
80 {"Channel, another channel, more channels, and one last Channel",
729e4ab9 81 "Channel", "es", UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "wordbreaker", {0, 54, -1}, {7, 7}},
b75a7d8f 82 /* jitterbug 1745 */
729e4ab9 83 {"testing that \\u00e9 does not match e", "e", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
b75a7d8f
A
84 "characterbreaker", {1, 17, 30, -1}, {1, 1, 1}},
85 {"testing that string ab\\u00e9cd does not match e", "e", NULL,
729e4ab9
A
86 UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "characterbreaker", {1, 28, 41, -1}, {1, 1, 1}},
87 {"\\u00c9", "e", "fr", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "characterbreaker", {0, -1}, {1}},
46f4442e
A
88#if 0
89 /* Problem reported by Dave Bertoni, same as ticket 4279? */
729e4ab9 90 {"\\u0043\\u004F\\u0302\\u0054\\u00C9", "\\u004F", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "characterbreaker", {1, -1}, {2}},
46f4442e 91#endif
729e4ab9
A
92 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}
93};
94
95#define PECHE_WITH_ACCENTS "un p\\u00E9ch\\u00E9, " \
96 "\\u00E7a p\\u00E8che par, " \
97 "p\\u00E9cher, " \
98 "une p\\u00EAche, " \
99 "un p\\u00EAcher, " \
100 "j\\u2019ai p\\u00EAch\\u00E9, " \
101 "un p\\u00E9cheur, " \
102 "\\u201Cp\\u00E9che\\u201D, " \
103 "decomp peche\\u0301, " \
104 "base peche"
105/* in the above, the interesting words and their offsets are:
106 3 pe<301>che<301>
107 13 pe<300>che
108 24 pe<301>cher
109 36 pe<302>che
110 46 pe<302>cher
111 59 pe<302>che<301>
112 69 pe<301>cheur
113 79 pe<301>che
114 94 peche<+301>
115107 peche
116*/
b75a7d8f
A
117
118static const SearchData STRENGTH[] = {
729e4ab9
A
119 /*012345678901234567890123456789012345678901234567890123456789*/
120 /*00*/{"The quick brown fox jumps over the lazy foxes", "fox", "en",
121 UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {16, 40, -1}, {3, 3}},
122 /*01*/{"The quick brown fox jumps over the lazy foxes", "fox", "en",
123 UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "wordbreaker", {16, -1}, {3}},
124 /*02*/{"blackbirds Pat p\\u00E9ch\\u00E9 p\\u00EAche p\\u00E9cher p\\u00EAcher Tod T\\u00F6ne black Tofu blackbirds Ton PAT toehold blackbird black-bird pat toe big Toe",
125 "peche", "fr", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {15, 21, 27, 34, -1}, {5, 5, 5, 5}},
126 /*03*/{"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
127 {10, 14, -1}, {3, 2}},
128 /*04*/{"A channel, another CHANNEL, more Channels, and one last channel...",
129 "channel", "es", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {2, 19, 33, 56, -1}, {7, 7, 7, 7}},
130 /*05*/{"\\u00c0 should match but not A", "A\\u0300", "en", UCOL_IDENTICAL, USEARCH_STANDARD_ELEMENT_COMPARISON,
131 NULL, {0, -1}, {1, 0}},
132 /* some tests for modified element comparison, ticket #7093 */
133 /*06*/{PECHE_WITH_ACCENTS, "peche", "en", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1}, {5, 5, 5, 5, 5, 5, 5, 5, 6, 5}},
134 /*07*/{PECHE_WITH_ACCENTS, "peche", "en", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "wordbreaker", {3, 13, 36, 59, 79, 94, 107, -1}, {5, 5, 5, 5, 5, 6, 5}},
135 /*08*/{PECHE_WITH_ACCENTS, "peche", "en", UCOL_SECONDARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {107, -1}, {5}},
136 /*09*/{PECHE_WITH_ACCENTS, "peche", "en", UCOL_SECONDARY, USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1}, {5, 5, 5, 5, 5, 5, 5, 5, 6, 5}},
137 /*10*/{PECHE_WITH_ACCENTS, "peche", "en", UCOL_SECONDARY, USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 13, 36, 59, 79, 94, 107, -1}, {5, 5, 5, 5, 5, 6, 5}},
138 /*11*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "en", UCOL_SECONDARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {24, 69, 79, -1}, {5, 5, 5}},
139 /*12*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "en", UCOL_SECONDARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "wordbreaker", {79, -1}, {5}},
140 /*13*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "en", UCOL_SECONDARY, USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 24, 69, 79, -1}, {5, 5, 5, 5}},
141 /*14*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "en", UCOL_SECONDARY, USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 79, -1}, {5, 5}},
142 /*15*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "en", UCOL_SECONDARY, USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 24, 69, 79, 94, 107, -1}, {5, 5, 5, 5, 6, 5}},
143 /*16*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "en", UCOL_SECONDARY, USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 79, 94, 107, -1}, {5, 5, 6, 5}},
144 /*17*/{PECHE_WITH_ACCENTS, "pech\\u00E9", "en", UCOL_SECONDARY, USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 59, 94, -1}, {5, 5, 6}},
145 /*18*/{PECHE_WITH_ACCENTS, "pech\\u00E9", "en", UCOL_SECONDARY, USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 59, 94, -1}, {5, 5, 6}},
146 /*19*/{PECHE_WITH_ACCENTS, "pech\\u00E9", "en", UCOL_SECONDARY, USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1}, {5, 5, 5, 5, 5, 5, 5, 5, 6, 5}},
147 /*20*/{PECHE_WITH_ACCENTS, "pech\\u00E9", "en", UCOL_SECONDARY, USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 13, 36, 59, 79, 94, 107, -1}, {5, 5, 5, 5, 5, 6, 5}},
148 /*21*/{PECHE_WITH_ACCENTS, "peche\\u0301", "en", UCOL_SECONDARY, USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 59, 94, -1}, {5, 5, 6}},
149 /*22*/{PECHE_WITH_ACCENTS, "peche\\u0301", "en", UCOL_SECONDARY, USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 59, 94, -1}, {5, 5, 6}},
150 /*23*/{PECHE_WITH_ACCENTS, "peche\\u0301", "en", UCOL_SECONDARY, USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1}, {5, 5, 5, 5, 5, 5, 5, 5, 6, 5}},
151 /*24*/{PECHE_WITH_ACCENTS, "peche\\u0301", "en", UCOL_SECONDARY, USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 13, 36, 59, 79, 94, 107, -1}, {5, 5, 5, 5, 5, 6, 5}},
152 /* more tests for modified element comparison (with fr), ticket #7093 */
153 /*25*/{PECHE_WITH_ACCENTS, "peche", "fr", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1}, {5, 5, 5, 5, 5, 5, 5, 5, 6, 5}},
154 /*26*/{PECHE_WITH_ACCENTS, "peche", "fr", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "wordbreaker", {3, 13, 36, 59, 79, 94, 107, -1}, {5, 5, 5, 5, 5, 6, 5}},
155 /*27*/{PECHE_WITH_ACCENTS, "peche", "fr", UCOL_SECONDARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {107, -1}, {5}},
156 /*28*/{PECHE_WITH_ACCENTS, "peche", "fr", UCOL_SECONDARY, USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1}, {5, 5, 5, 5, 5, 5, 5, 5, 6, 5}},
157 /*29*/{PECHE_WITH_ACCENTS, "peche", "fr", UCOL_SECONDARY, USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 13, 36, 59, 79, 94, 107, -1}, {5, 5, 5, 5, 5, 6, 5}},
158 /*30*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "fr", UCOL_SECONDARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {24, 69, 79, -1}, {5, 5, 5}},
159 /*31*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "fr", UCOL_SECONDARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "wordbreaker", {79, -1}, {5}},
160 /*32*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "fr", UCOL_SECONDARY, USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 24, 69, 79, -1}, {5, 5, 5, 5}},
161 /*33*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "fr", UCOL_SECONDARY, USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 79, -1}, {5, 5}},
162 /*34*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "fr", UCOL_SECONDARY, USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 24, 69, 79, 94, 107, -1}, {5, 5, 5, 5, 6, 5}},
163 /*35*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "fr", UCOL_SECONDARY, USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 79, 94, 107, -1}, {5, 5, 6, 5}},
164 /*36*/{PECHE_WITH_ACCENTS, "pech\\u00E9", "fr", UCOL_SECONDARY, USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 59, 94, -1}, {5, 5, 6}},
165 /*37*/{PECHE_WITH_ACCENTS, "pech\\u00E9", "fr", UCOL_SECONDARY, USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 59, 94, -1}, {5, 5, 6}},
166 /*38*/{PECHE_WITH_ACCENTS, "pech\\u00E9", "fr", UCOL_SECONDARY, USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1}, {5, 5, 5, 5, 5, 5, 5, 5, 6, 5}},
167 /*39*/{PECHE_WITH_ACCENTS, "pech\\u00E9", "fr", UCOL_SECONDARY, USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 13, 36, 59, 79, 94, 107, -1}, {5, 5, 5, 5, 5, 6, 5}},
168 /*40*/{PECHE_WITH_ACCENTS, "peche\\u0301", "fr", UCOL_SECONDARY, USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 59, 94, -1}, {5, 5, 6}},
169 /*41*/{PECHE_WITH_ACCENTS, "peche\\u0301", "fr", UCOL_SECONDARY, USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 59, 94, -1}, {5, 5, 6}},
170 /*42*/{PECHE_WITH_ACCENTS, "peche\\u0301", "fr", UCOL_SECONDARY, USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1}, {5, 5, 5, 5, 5, 5, 5, 5, 6, 5}},
171 /*43*/{PECHE_WITH_ACCENTS, "peche\\u0301", "fr", UCOL_SECONDARY, USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 13, 36, 59, 79, 94, 107, -1}, {5, 5, 5, 5, 5, 6, 5}},
46f4442e
A
172
173#if 0
174 /* Ticket 5382 */
729e4ab9 175 {"12\\u0171", "\\u0170", NULL, UCOL_SECONDARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {2, -1}, {2}},
46f4442e
A
176#endif
177
729e4ab9 178 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}
b75a7d8f
A
179};
180
181static const SearchData VARIABLE[] = {
182 /*012345678901234567890123456789012345678901234567890123456789*/
183 {"blackbirds black blackbirds blackbird black-bird",
729e4ab9 184 "blackbird", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 17, 28, 38, -1},
b75a7d8f
A
185 {9, 9, 9, 10}},
186 /* to see that it doesn't go into an infinite loop if the start of text
187 is a ignorable character */
729e4ab9
A
188 {" on", "go", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
189 {"abcdefghijklmnopqrstuvwxyz", " ", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
b75a7d8f
A
190 {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
191 20, 21, 22, 23, 24, 25, -1}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
192 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}},
193 /* testing tightest match */
729e4ab9 194 {" abc a bc ab c a bc ab c", "abc", NULL, UCOL_QUATERNARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
b75a7d8f
A
195 NULL, {1, -1}, {3}},
196 /*012345678901234567890123456789012345678901234567890123456789 */
729e4ab9 197 {" abc a bc ab c a bc ab c", "abc", NULL, UCOL_SECONDARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
b75a7d8f
A
198 NULL, {1, 6, 13, 21, 31, -1}, {3, 4, 4, 5, 5}},
199 /* totally ignorable text */
729e4ab9 200 {" ---------------", "abc", NULL, UCOL_SECONDARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
b75a7d8f 201 NULL, {-1}, {0}},
729e4ab9 202 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}
b75a7d8f
A
203};
204
205static const SearchData NORMEXACT[] = {
729e4ab9 206 {"a\\u0300\\u0325", "a\\u0325\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {3}},
46f4442e
A
207
208#if GRAPHEME_BOUNDARIES
729e4ab9 209 {"a\\u0300\\u0325", "\\u0325\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
46f4442e 210#else
729e4ab9 211 {"a\\u0300\\u0325", "\\u0325\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {2}},
46f4442e
A
212#endif
213
729e4ab9 214 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}
b75a7d8f
A
215};
216
217static const SearchData NONNORMEXACT[] = {
729e4ab9
A
218 {"a\\u0300\\u0325", "\\u0325\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
219 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}
b75a7d8f
A
220};
221
222static const SearchData OVERLAP[] = {
729e4ab9 223 {"abababab", "abab", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 2, 4, -1},
b75a7d8f 224 {4, 4, 4}},
729e4ab9 225 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}
b75a7d8f
A
226};
227
228static const SearchData NONOVERLAP[] = {
729e4ab9
A
229 {"abababab", "abab", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 4, -1}, {4, 4}},
230 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}
b75a7d8f
A
231};
232
233static const SearchData COLLATOR[] = {
234 /* english */
729e4ab9 235 {"fox fpx", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {3}},
b75a7d8f 236 /* tailored */
729e4ab9
A
237 {"fox fpx", "fox", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 4, -1}, {3, 3}},
238 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}
b75a7d8f
A
239};
240
241static const SearchData PATTERN[] = {
242 {"The quick brown fox jumps over the lazy foxes", "the", NULL,
729e4ab9 243 UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 31, -1}, {3, 3}},
b75a7d8f 244 {"The quick brown fox jumps over the lazy foxes", "fox", NULL,
729e4ab9
A
245 UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {16, 40, -1}, {3, 3}},
246 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}
b75a7d8f
A
247};
248
249static const SearchData TEXT[] = {
729e4ab9 250 {"the foxy brown fox", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {4, 15, -1},
b75a7d8f 251 {3, 3}},
729e4ab9 252 {"the quick brown fox", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {16, -1},
b75a7d8f 253 {3}},
729e4ab9 254 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}
b75a7d8f
A
255};
256
257static const SearchData COMPOSITEBOUNDARIES[] = {
46f4442e 258#if GRAPHEME_BOUNDARIES
729e4ab9
A
259 {"\\u00C0", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
260 {"A\\u00C0C", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}},
261 {"\\u00C0A", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {1}},
262 {"B\\u00C0", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
263 {"\\u00C0B", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
264 {"\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
265 {"\\u0300\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}},
46f4442e 266#else
729e4ab9
A
267 {"\\u00C0", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}},
268 {"A\\u00C0C", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 1, -1}, {1, 1}},
269 {"\\u00C0A", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 1, -1}, {1, 1}},
270 {"B\\u00C0", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {1}},
271 {"\\u00C0B", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}},
272 {"\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}},
273 {"\\u0300\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 1, -1},
b75a7d8f 274 {1, 1}},
46f4442e
A
275#endif
276
729e4ab9 277 {"\\u00C0\\u0300", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
b75a7d8f 278 /* A + 030A + 0301 */
729e4ab9
A
279 {"\\u01FA", "\\u01FA", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}},
280 {"\\u01FA", "A\\u030A\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}},
281 {"\\u01FA", "\\u030A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
282 {"\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
283 {"\\u01FA", "\\u030AA", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
284 {"\\u01FA", "\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
285 {"\\u01FA", "A\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
286 {"\\u01FA", "\\u0301A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
46f4442e
A
287
288#if GRAPHEME_BOUNDARIES
729e4ab9 289 {"\\u01FA", "\\u030A\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
46f4442e 290#else
729e4ab9 291 {"\\u01FA", "\\u030A\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}},
46f4442e
A
292#endif
293
729e4ab9
A
294 {"A\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
295 {"\\u01FAA", "\\u0301A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
296 {"\\u0F73", "\\u0F73", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}},
297 {"\\u0F73", "\\u0F71", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
298 {"\\u0F73", "\\u0F72", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
299 {"\\u0F73", "\\u0F71\\u0F72", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}},
300 {"A\\u0F73", "A\\u0F71", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
301 {"\\u0F73A", "\\u0F72A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
46f4442e
A
302
303 /* Ticket 5024 */
729e4ab9 304 {"a\\u00e1", "a\\u00e1", NULL, UCOL_SECONDARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {2}},
46f4442e
A
305
306 /* Ticket 5420 */
729e4ab9
A
307 {"fu\\u00dfball", "fu\\u00df", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {3}},
308 {"fu\\u00dfball", "fuss", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {3}},
309 {"fu\\u00dfball", "uss", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {2}},
46f4442e 310
729e4ab9 311 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}
b75a7d8f
A
312};
313
314static const SearchData MATCH[] = {
729e4ab9 315 {"a busy bee is a very busy beeee", "bee", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
b75a7d8f
A
316 {7, 26, -1}, {3, 3}},
317 /* 012345678901234567890123456789012345678901234567890 */
318 {"a busy bee is a very busy beeee with no bee life", "bee", NULL,
729e4ab9
A
319 UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {7, 26, 40, -1}, {3, 3, 3}},
320 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}
b75a7d8f
A
321};
322
323static const SearchData SUPPLEMENTARY[] = {
324 /* 012345678901234567890123456789012345678901234567890012345678901234567890123456789012345678901234567890012345678901234567890123456789 */
325 {"abc \\uD800\\uDC00 \\uD800\\uDC01 \\uD801\\uDC00 \\uD800\\uDC00abc abc\\uD800\\uDC00 \\uD800\\uD800\\uDC00 \\uD800\\uDC00\\uDC00",
729e4ab9 326 "\\uD800\\uDC00", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {4, 13, 22, 26, 29, -1},
b75a7d8f 327 {2, 2, 2, 2, 2}},
374ca955 328 {"and\\uD834\\uDDB9this sentence", "\\uD834\\uDDB9", NULL,
729e4ab9 329 UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {2}},
374ca955 330 {"and \\uD834\\uDDB9 this sentence", " \\uD834\\uDDB9 ", NULL,
729e4ab9 331 UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {4}},
374ca955 332 {"and-\\uD834\\uDDB9-this sentence", "-\\uD834\\uDDB9-", NULL,
729e4ab9 333 UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {4}},
374ca955 334 {"and,\\uD834\\uDDB9,this sentence", ",\\uD834\\uDDB9,", NULL,
729e4ab9 335 UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {4}},
374ca955 336 {"and?\\uD834\\uDDB9?this sentence", "?\\uD834\\uDDB9?", NULL,
729e4ab9
A
337 UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {4}},
338 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}
b75a7d8f
A
339};
340
341static const char *CONTRACTIONRULE =
342 "&z = ab/c < AB < X\\u0300 < ABC < X\\u0300\\u0315";
343
344static const SearchData CONTRACTION[] = {
345 /* common discontiguous */
729e4ab9 346 {"A\\u0300\\u0315", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
46f4442e
A
347
348#if GRAPHEME_BOUNDARIES
729e4ab9 349 {"A\\u0300\\u0315", "\\u0300\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
46f4442e 350#else
729e4ab9 351 {"A\\u0300\\u0315", "\\u0300\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {2}},
46f4442e
A
352#endif
353
b75a7d8f 354 /* contraction prefix */
729e4ab9 355 {"AB\\u0315C", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
46f4442e
A
356
357#if GRAPHEME_BOUNDARIES
729e4ab9
A
358 {"AB\\u0315C", "AB", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
359 {"AB\\u0315C", "\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
46f4442e 360#else
729e4ab9
A
361 {"AB\\u0315C", "AB", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {2}},
362 {"AB\\u0315C", "\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {2, -1}, {1}},
46f4442e
A
363#endif
364
b75a7d8f
A
365 /* discontiguous problem here for backwards iteration.
366 accents not found because discontiguous stores all information */
729e4ab9 367 {"X\\u0300\\u0319\\u0315", "\\u0319", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1},
b75a7d8f
A
368 {0}},
369 /* ends not with a contraction character */
729e4ab9 370 {"X\\u0315\\u0300D", "\\u0300\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1},
b75a7d8f 371 {0}},
729e4ab9 372 {"X\\u0315\\u0300D", "X\\u0300\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
b75a7d8f 373 {0, -1}, {3}},
729e4ab9 374 {"X\\u0300\\u031A\\u0315D", "X\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1},
b75a7d8f
A
375 {0}},
376 /* blocked discontiguous */
729e4ab9 377 {"X\\u0300\\u031A\\u0315D", "\\u031A\\u0315D", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
b75a7d8f 378 {-1}, {0}},
46f4442e
A
379
380#if GRAPHEME_BOUNDARIES
381 /*
382 * "ab" generates a contraction that's an expansion. The "z" matches the
383 * first CE of the expansion but the match fails because it ends in the
384 * middle of an expansion...
385 */
729e4ab9 386 {"ab", "z", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
46f4442e 387#else
729e4ab9 388 {"ab", "z", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {2}},
46f4442e
A
389#endif
390
729e4ab9 391 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}
b75a7d8f
A
392};
393
394static const char *IGNORABLERULE = "&a = \\u0300";
395
396static const SearchData IGNORABLE[] = {
46f4442e
A
397#if GRAPHEME_BOUNDARIES
398 /*
399 * This isn't much of a test when matches have to be on
400 * grapheme boundiaries. The match at 0 only works because
401 * it's at the start of the text.
402 */
729e4ab9 403 {"\\u0300\\u0315 \\u0300\\u0315 ", "\\u0300", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
46f4442e
A
404 {0, -1}, {2}},
405#else
729e4ab9 406 {"\\u0300\\u0315 \\u0300\\u0315 ", "\\u0300", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
46f4442e
A
407 {0, 3, -1}, {2, 2}},
408#endif
409
729e4ab9 410 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}
b75a7d8f
A
411};
412
413static const SearchData BASICCANONICAL[] = {
729e4ab9
A
414 {"xxxxxxxxxxxxxxxxxxxx", "fisher", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
415 {"silly spring string", "string", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {13, -1},
b75a7d8f 416 {6}},
729e4ab9 417 {"silly spring string string", "string", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
b75a7d8f 418 {13, 20, -1}, {6, 6}},
729e4ab9 419 {"silly string spring string", "string", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
b75a7d8f 420 {6, 20, -1}, {6, 6}},
729e4ab9 421 {"string spring string", "string", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 14, -1},
b75a7d8f 422 {6, 6}},
729e4ab9
A
423 {"Scott Ganyo", "c", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {1}},
424 {"Scott Ganyo", " ", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {5, -1}, {1}},
46f4442e
A
425
426#if GRAPHEME_BOUNDARIES
729e4ab9
A
427 {"\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
428 {"a\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
429 {"a\\u0300\\u0325", "\\u0300\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
430 {"a\\u0300b", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
431 {"a\\u0300\\u0325b", "\\u0300b", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
432 {"\\u0325\\u0300A\\u0325\\u0300", "\\u0300A\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
46f4442e 433 NULL, {-1}, {0}},
729e4ab9 434 {"\\u0325\\u0300A\\u0325\\u0300", "\\u0325A\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
46f4442e
A
435 NULL, {-1}, {0}},
436 {"a\\u0300\\u0325b\\u0300\\u0325c \\u0325b\\u0300 \\u0300b\\u0325",
729e4ab9 437 "\\u0300b\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
46f4442e 438#else
729e4ab9
A
439 {"\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {2}},
440 {"a\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {2}},
441 {"a\\u0300\\u0325", "\\u0300\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1},
b75a7d8f 442 {2}},
729e4ab9
A
443 {"a\\u0300b", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {1}},
444 {"a\\u0300\\u0325b", "\\u0300b", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {3}},
445 {"\\u0325\\u0300A\\u0325\\u0300", "\\u0300A\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
b75a7d8f 446 NULL, {0, -1}, {5}},
729e4ab9 447 {"\\u0325\\u0300A\\u0325\\u0300", "\\u0325A\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
b75a7d8f
A
448 NULL, {0, -1}, {5}},
449 {"a\\u0300\\u0325b\\u0300\\u0325c \\u0325b\\u0300 \\u0300b\\u0325",
729e4ab9 450 "\\u0300b\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, 12, -1}, {5, 3}},
46f4442e
A
451#endif
452
729e4ab9
A
453 {"\\u00c4\\u0323", "A\\u0323\\u0308", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {2}},
454 {"\\u0308\\u0323", "\\u0323\\u0308", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {2}},
455 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}
b75a7d8f
A
456};
457
46f4442e 458
b75a7d8f 459static const SearchData NORMCANONICAL[] = {
46f4442e
A
460#if GRAPHEME_BOUNDARIES
461 /*
462 * These tests don't really mean anything. With matches restricted to grapheme
463 * boundaries, isCanonicalMatch doesn't mean anything unless normalization is
464 * also turned on...
465 */
729e4ab9
A
466 {"\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
467 {"\\u0300\\u0325", "\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
468 {"a\\u0300\\u0325", "\\u0325\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
469 {"a\\u0300\\u0325", "\\u0300\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
470 {"a\\u0300\\u0325", "\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
471 {"a\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
46f4442e 472#else
729e4ab9
A
473 {"\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {2}},
474 {"\\u0300\\u0325", "\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {2}},
475 {"a\\u0300\\u0325", "\\u0325\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1},
b75a7d8f 476 {2}},
729e4ab9 477 {"a\\u0300\\u0325", "\\u0300\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1},
b75a7d8f 478 {2}},
729e4ab9
A
479 {"a\\u0300\\u0325", "\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {2}},
480 {"a\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {2}},
46f4442e
A
481#endif
482
729e4ab9 483 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}
b75a7d8f
A
484};
485
486static const SearchData BREAKITERATORCANONICAL[] = {
729e4ab9 487 {"foxy fox", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "characterbreaker", {0, 5, -1},
b75a7d8f 488 {3, 3}},
729e4ab9
A
489 {"foxy fox", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "wordbreaker", {5, -1}, {3}},
490 {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
b75a7d8f 491 "characterbreaker", {10, 14, -1}, {3, 2}},
729e4ab9 492 {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "wordbreaker",
b75a7d8f
A
493 {10, -1}, {3}},
494 {"Channel, another channel, more channels, and one last Channel",
729e4ab9 495 "Channel", "es", UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "wordbreaker", {0, 54, -1}, {7, 7}},
b75a7d8f 496 /* jitterbug 1745 */
729e4ab9 497 {"testing that \\u00e9 does not match e", "e", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
b75a7d8f
A
498 "characterbreaker", {1, 17, 30, -1}, {1, 1, 1}},
499 {"testing that string ab\\u00e9cd does not match e", "e", NULL,
729e4ab9
A
500 UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "characterbreaker", {1, 28, 41, -1}, {1, 1, 1}},
501 {"\\u00c9", "e", "fr", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "characterbreaker", {0, -1}, {1}},
502 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}
b75a7d8f
A
503};
504
505static const SearchData STRENGTHCANONICAL[] = {
506 /*012345678901234567890123456789012345678901234567890123456789 */
507 {"The quick brown fox jumps over the lazy foxes", "fox", "en",
729e4ab9 508 UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {16, 40, -1}, {3, 3}},
b75a7d8f 509 {"The quick brown fox jumps over the lazy foxes", "fox", "en",
729e4ab9 510 UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "wordbreaker", {16, -1}, {3}},
b75a7d8f 511 {"blackbirds Pat p\\u00E9ch\\u00E9 p\\u00EAche p\\u00E9cher p\\u00EAcher Tod T\\u00F6ne black Tofu blackbirds Ton PAT toehold blackbird black-bird pat toe big Toe",
729e4ab9
A
512 "peche", "fr", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {15, 21, 27, 34, -1}, {5, 5, 5, 5}},
513 {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
b75a7d8f
A
514 {10, 14, -1}, {3, 2}},
515 {"A channel, another CHANNEL, more Channels, and one last channel...",
729e4ab9 516 "channel", "es", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {2, 19, 33, 56, -1},
b75a7d8f 517 {7, 7, 7, 7}},
729e4ab9 518 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}
b75a7d8f
A
519};
520
521static const SearchData VARIABLECANONICAL[] = {
522 /*012345678901234567890123456789012345678901234567890123456789 */
523 {"blackbirds black blackbirds blackbird black-bird",
729e4ab9 524 "blackbird", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 17, 28, 38, -1},
b75a7d8f
A
525 {9, 9, 9, 10}},
526 /* to see that it doesn't go into an infinite loop if the start of text
527 is a ignorable character */
729e4ab9
A
528 {" on", "go", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
529 {"abcdefghijklmnopqrstuvwxyz", " ", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
b75a7d8f
A
530 {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
531 20, 21, 22, 23, 24, 25, -1}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
532 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}},
533 /* testing tightest match */
729e4ab9 534 {" abc a bc ab c a bc ab c", "abc", NULL, UCOL_QUATERNARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
b75a7d8f
A
535 NULL, {1, -1}, {3}},
536 /*012345678901234567890123456789012345678901234567890123456789 */
729e4ab9 537 {" abc a bc ab c a bc ab c", "abc", NULL, UCOL_SECONDARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
b75a7d8f
A
538 NULL, {1, 6, 13, 21, 31, -1}, {3, 4, 4, 5, 5}},
539 /* totally ignorable text */
729e4ab9 540 {" ---------------", "abc", NULL, UCOL_SECONDARY, USEARCH_STANDARD_ELEMENT_COMPARISON,
b75a7d8f 541 NULL, {-1}, {0}},
729e4ab9 542 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}
b75a7d8f
A
543};
544
545static const SearchData OVERLAPCANONICAL[] = {
729e4ab9 546 {"abababab", "abab", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 2, 4, -1},
b75a7d8f 547 {4, 4, 4}},
729e4ab9 548 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}
b75a7d8f
A
549};
550
551static const SearchData NONOVERLAPCANONICAL[] = {
729e4ab9
A
552 {"abababab", "abab", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 4, -1}, {4, 4}},
553 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}
b75a7d8f
A
554};
555
556static const SearchData COLLATORCANONICAL[] = {
557 /* english */
729e4ab9 558 {"fox fpx", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {3}},
b75a7d8f 559 /* tailored */
729e4ab9
A
560 {"fox fpx", "fox", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 4, -1}, {3, 3}},
561 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}
b75a7d8f
A
562};
563
564static const SearchData PATTERNCANONICAL[] = {
565 {"The quick brown fox jumps over the lazy foxes", "the", NULL,
729e4ab9 566 UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 31, -1}, {3, 3}},
b75a7d8f 567 {"The quick brown fox jumps over the lazy foxes", "fox", NULL,
729e4ab9
A
568 UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {16, 40, -1}, {3, 3}},
569 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}
b75a7d8f
A
570};
571
572static const SearchData TEXTCANONICAL[] = {
729e4ab9 573 {"the foxy brown fox", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {4, 15, -1},
b75a7d8f 574 {3, 3}},
729e4ab9 575 {"the quick brown fox", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {16, -1},
b75a7d8f 576 {3}},
729e4ab9 577 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}
b75a7d8f
A
578};
579
580static const SearchData COMPOSITEBOUNDARIESCANONICAL[] = {
46f4442e 581#if GRAPHEME_BOUNDARIES
729e4ab9
A
582 {"\\u00C0", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
583 {"A\\u00C0C", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}},
584 {"\\u00C0A", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {1}},
585 {"B\\u00C0", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
586 {"\\u00C0B", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
587 {"\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
46f4442e
A
588
589 /* first one matches only because it's at the start of the text */
729e4ab9 590 {"\\u0300\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}},
46f4442e
A
591
592 /* \\u0300 blocked by \\u0300 */
729e4ab9 593 {"\\u00C0\\u0300", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
46f4442e 594#else
729e4ab9
A
595 {"\\u00C0", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}},
596 {"A\\u00C0C", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 1, -1}, {1, 1}},
597 {"\\u00C0A", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 1, -1}, {1, 1}},
598 {"B\\u00C0", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {1}},
599 {"\\u00C0B", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}},
600 {"\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}},
601 {"\\u0300\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 1, -1},
b75a7d8f
A
602 {1, 1}},
603 /* \\u0300 blocked by \\u0300 */
729e4ab9 604 {"\\u00C0\\u0300", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {2}},
46f4442e
A
605#endif
606
b75a7d8f 607 /* A + 030A + 0301 */
729e4ab9
A
608 {"\\u01FA", "\\u01FA", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}},
609 {"\\u01FA", "A\\u030A\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}},
46f4442e
A
610
611#if GRAPHEME_BOUNDARIES
729e4ab9
A
612 {"\\u01FA", "\\u030A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
613 {"\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
46f4442e 614#else
729e4ab9
A
615 {"\\u01FA", "\\u030A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}},
616 {"\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}},
46f4442e
A
617#endif
618
729e4ab9 619 {"\\u01FA", "\\u030AA", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
46f4442e
A
620
621#if GRAPHEME_BOUNDARIES
729e4ab9 622 {"\\u01FA", "\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
46f4442e 623#else
729e4ab9 624 {"\\u01FA", "\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}},
46f4442e
A
625#endif
626
b75a7d8f 627 /* blocked accent */
729e4ab9
A
628 {"\\u01FA", "A\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
629 {"\\u01FA", "\\u0301A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
46f4442e
A
630
631#if GRAPHEME_BOUNDARIES
729e4ab9
A
632 {"\\u01FA", "\\u030A\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
633 {"A\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
634 {"\\u01FAA", "\\u0301A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
46f4442e 635#else
729e4ab9
A
636 {"\\u01FA", "\\u030A\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}},
637 {"A\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {1}},
638 {"\\u01FAA", "\\u0301A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {2}},
46f4442e
A
639#endif
640
729e4ab9 641 {"\\u0F73", "\\u0F73", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}},
46f4442e
A
642
643#if GRAPHEME_BOUNDARIES
729e4ab9
A
644 {"\\u0F73", "\\u0F71", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
645 {"\\u0F73", "\\u0F72", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
46f4442e 646#else
729e4ab9
A
647 {"\\u0F73", "\\u0F71", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}},
648 {"\\u0F73", "\\u0F72", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}},
46f4442e
A
649#endif
650
729e4ab9 651 {"\\u0F73", "\\u0F71\\u0F72", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}},
46f4442e
A
652
653#if GRAPHEME_BOUNDARIES
729e4ab9
A
654 {"A\\u0F73", "A\\u0F71", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
655 {"\\u0F73A", "\\u0F72A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
46f4442e 656 {"\\u01FA A\\u0301\\u030A A\\u030A\\u0301 A\\u030A \\u01FA", "A\\u030A",
729e4ab9 657 NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {10, -1}, {2}},
46f4442e 658#else
729e4ab9
A
659 {"A\\u0F73", "A\\u0F71", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {2}},
660 {"\\u0F73A", "\\u0F72A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {2}},
b75a7d8f 661 {"\\u01FA A\\u0301\\u030A A\\u030A\\u0301 A\\u030A \\u01FA", "A\\u030A",
729e4ab9 662 NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 6, 10, 13, -1}, {1, 3, 2, 1}},
46f4442e
A
663#endif
664
729e4ab9 665 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}
b75a7d8f
A
666};
667
668static const SearchData MATCHCANONICAL[] = {
729e4ab9 669 {"a busy bee is a very busy beeee", "bee", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL,
b75a7d8f
A
670 {7, 26, -1}, {3, 3}},
671 /*012345678901234567890123456789012345678901234567890 */
672 {"a busy bee is a very busy beeee with no bee life", "bee", NULL,
729e4ab9
A
673 UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {7, 26, 40, -1}, {3, 3, 3}},
674 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}
b75a7d8f
A
675};
676
677static const SearchData SUPPLEMENTARYCANONICAL[] = {
678 /*012345678901234567890123456789012345678901234567890012345678901234567890123456789012345678901234567890012345678901234567890123456789 */
679 {"abc \\uD800\\uDC00 \\uD800\\uDC01 \\uD801\\uDC00 \\uD800\\uDC00abc abc\\uD800\\uDC00 \\uD800\\uD800\\uDC00 \\uD800\\uDC00\\uDC00",
729e4ab9 680 "\\uD800\\uDC00", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {4, 13, 22, 26, 29, -1},
b75a7d8f 681 {2, 2, 2, 2, 2}},
374ca955 682 {"and\\uD834\\uDDB9this sentence", "\\uD834\\uDDB9", NULL,
729e4ab9 683 UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {2}},
374ca955 684 {"and \\uD834\\uDDB9 this sentence", " \\uD834\\uDDB9 ", NULL,
729e4ab9 685 UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {4}},
374ca955 686 {"and-\\uD834\\uDDB9-this sentence", "-\\uD834\\uDDB9-", NULL,
729e4ab9 687 UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {4}},
374ca955 688 {"and,\\uD834\\uDDB9,this sentence", ",\\uD834\\uDDB9,", NULL,
729e4ab9 689 UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {4}},
374ca955 690 {"and?\\uD834\\uDDB9?this sentence", "?\\uD834\\uDDB9?", NULL,
729e4ab9
A
691 UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {4}},
692 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}
b75a7d8f
A
693};
694
695static const SearchData CONTRACTIONCANONICAL[] = {
696 /* common discontiguous */
46f4442e 697#if GRAPHEME_BOUNDARIES
729e4ab9
A
698 {"A\\u0300\\u0315", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
699 {"A\\u0300\\u0315", "\\u0300\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
46f4442e 700#else
729e4ab9
A
701 {"A\\u0300\\u0315", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {2}},
702 {"A\\u0300\\u0315", "\\u0300\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {2}},
46f4442e
A
703#endif
704
b75a7d8f 705 /* contraction prefix */
729e4ab9 706 {"AB\\u0315C", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
46f4442e
A
707
708#if GRAPHEME_BOUNDARIES
729e4ab9
A
709 {"AB\\u0315C", "AB", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
710 {"AB\\u0315C", "\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
46f4442e 711#else
729e4ab9
A
712 {"AB\\u0315C", "AB", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {2}},
713 {"AB\\u0315C", "\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {2, -1}, {1}},
46f4442e
A
714#endif
715
b75a7d8f
A
716 /* discontiguous problem here for backwards iteration.
717 forwards gives 0, 4 but backwards give 1, 3 */
729e4ab9 718 /* {"X\\u0300\\u0319\\u0315", "\\u0319", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1},
b75a7d8f
A
719 {4}}, */
720
721 /* ends not with a contraction character */
729e4ab9
A
722 {"X\\u0315\\u0300D", "\\u0300\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
723 {"X\\u0315\\u0300D", "X\\u0300\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {3}},
46f4442e
A
724
725#if GRAPHEME_BOUNDARIES
729e4ab9 726 {"X\\u0300\\u031A\\u0315D", "X\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
46f4442e 727
b75a7d8f 728 /* blocked discontiguous */
729e4ab9 729 {"X\\u0300\\u031A\\u0315D", "\\u031A\\u0315D", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}},
46f4442e
A
730
731 /*
732 * "ab" generates a contraction that's an expansion. The "z" matches the
733 * first CE of the expansion but the match fails because it ends in the
734 * middle of an expansion...
735 */
729e4ab9 736 {"ab", "z", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {2}},
46f4442e 737#else
729e4ab9 738 {"X\\u0300\\u031A\\u0315D", "X\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {4}},
46f4442e
A
739
740 /* blocked discontiguous */
729e4ab9 741 {"X\\u0300\\u031A\\u0315D", "\\u031A\\u0315D", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {4}},
46f4442e 742
729e4ab9 743 {"ab", "z", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {2}},
46f4442e
A
744#endif
745
729e4ab9 746 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}
b75a7d8f
A
747};
748
46f4442e 749static const SearchData DIACRITICMATCH[] = {
4388f060
A
750 {"\\u03BA\\u03B1\\u03B9\\u0300\\u0020\\u03BA\\u03B1\\u1F76", "\\u03BA\\u03B1\\u03B9", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 5,-1}, {4, 3}},
751 {"\\u0061\\u0061\\u00E1", "\\u0061\\u00E1", NULL, UCOL_SECONDARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {2}},
752 {"\\u0020\\u00C2\\u0303\\u0020\\u0041\\u0061\\u1EAA\\u0041\\u0302\\u0303\\u00C2\\u0303\\u1EAB\\u0061\\u0302\\u0303\\u00E2\\u0303\\uD806\\uDC01\\u0300\\u0020",
753 "\\u00C2\\u0303", "LDE_AN_CX_EX_FX_HX_NX_S1", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, 4, 5, 6, 7, 10, 12, 13, 16,-1}, {2, 1, 1, 1, 3, 2, 1, 3, 2}},
754 {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}
46f4442e
A
755};
756
b75a7d8f
A
757#endif /* #if !UCONFIG_NO_COLLATION */
758
759#endif