]>
Commit | Line | Data |
---|---|---|
b75a7d8f | 1 | /******************************************************************** |
4388f060 | 2 | * Copyright (c) 2001-2011 International Business Machines |
b75a7d8f A |
3 | * Corporation and others. All Rights Reserved. |
4 | ******************************************************************** | |
5 | * File USRCHDAT.H | |
6 | * Modification History: | |
7 | * Name date Description | |
8 | * synwee July 31 2001 creation | |
9 | ********************************************************************/ | |
10 | ||
46f4442e A |
11 | |
12 | /* | |
13 | Note: This file is included by other C and C++ files. This file should not be directly compiled. | |
14 | */ | |
b75a7d8f A |
15 | #ifndef USRCHDAT_C |
16 | #define USRCHDAT_C | |
17 | ||
46f4442e | 18 | #include "unicode/ucol.h" |
b75a7d8f A |
19 | |
20 | #if !UCONFIG_NO_COLLATION | |
21 | ||
46f4442e A |
22 | /* Set to 1 if matches must be on grapheme boundaries */ |
23 | #define GRAPHEME_BOUNDARIES 1 | |
b75a7d8f | 24 | |
46f4442e | 25 | U_CDECL_BEGIN |
b75a7d8f A |
26 | struct SearchData { |
27 | const char *text; | |
28 | const char *pattern; | |
729e4ab9 | 29 | const char *collator; /* currently supported "fr" "es" "de", plus NULL/other => "en" */ |
b75a7d8f | 30 | UCollationStrength strength; |
729e4ab9 A |
31 | USearchAttributeValue elemCompare; /* value for the USEARCH_ELEMENT_COMPARISON attribute */ |
32 | const char *breaker; /* currently supported "wordbreaker" for EN_WORDBREAKER_, plus NULL/other => EN_CHARACTERBREAKER_ */ | |
46f4442e A |
33 | int8_t offset[32]; |
34 | uint8_t size[32]; | |
b75a7d8f | 35 | }; |
46f4442e | 36 | U_CDECL_END |
b75a7d8f A |
37 | |
38 | typedef struct SearchData SearchData; | |
39 | ||
40 | static const char *TESTCOLLATORRULE = "& o,O ; p,P"; | |
41 | ||
42 | static const char *EXTRACOLLATIONRULE = " & ae ; \\u00e4 & AE ; \\u00c4 & oe ; \\u00f6 & OE ; \\u00d6 & ue ; \\u00fc & UE ; \\u00dc"; | |
43 | ||
44 | static const SearchData BASIC[] = { | |
729e4ab9 A |
45 | {"xxxxxxxxxxxxxxxxxxxx", "fisher", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, |
46 | {"silly spring string", "string", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {13, -1}, | |
b75a7d8f | 47 | {6}}, |
729e4ab9 | 48 | {"silly spring string string", "string", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, |
b75a7d8f | 49 | {13, 20, -1}, {6, 6}}, |
729e4ab9 | 50 | {"silly string spring string", "string", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, |
b75a7d8f | 51 | {6, 20, -1}, {6, 6}}, |
729e4ab9 | 52 | {"string spring string", "string", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 14, -1}, |
b75a7d8f | 53 | {6, 6}}, |
729e4ab9 A |
54 | {"Scott Ganyo", "c", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {1}}, |
55 | {"Scott Ganyo", " ", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {5, -1}, {1}}, | |
56 | {"\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
57 | {"a\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
46f4442e A |
58 | |
59 | #if GRAPHEME_BOUNDARIES | |
729e4ab9 A |
60 | {"a\\u0300\\u0325", "\\u0300\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, |
61 | {"a\\u0300b", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
46f4442e | 62 | #else |
729e4ab9 A |
63 | {"a\\u0300\\u0325", "\\u0300\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {2}}, |
64 | {"a\\u0300b", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {1}}, | |
46f4442e A |
65 | #endif |
66 | ||
729e4ab9 | 67 | {"\\u00c9", "e", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}}, |
4388f060 | 68 | {"x a\\u0301", "a\\u0301", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {2, -1}, {2}}, |
729e4ab9 | 69 | {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} |
b75a7d8f A |
70 | }; |
71 | ||
72 | static const SearchData BREAKITERATOREXACT[] = { | |
729e4ab9 | 73 | {"foxy fox", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "characterbreaker", {0, 5, -1}, |
b75a7d8f | 74 | {3, 3}}, |
729e4ab9 A |
75 | {"foxy fox", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "wordbreaker", {5, -1}, {3}}, |
76 | {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, | |
b75a7d8f | 77 | "characterbreaker", {10, 14, -1}, {3, 2}}, |
729e4ab9 | 78 | {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "wordbreaker", |
b75a7d8f A |
79 | {10, -1}, {3}}, |
80 | {"Channel, another channel, more channels, and one last Channel", | |
729e4ab9 | 81 | "Channel", "es", UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "wordbreaker", {0, 54, -1}, {7, 7}}, |
b75a7d8f | 82 | /* jitterbug 1745 */ |
729e4ab9 | 83 | {"testing that \\u00e9 does not match e", "e", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, |
b75a7d8f A |
84 | "characterbreaker", {1, 17, 30, -1}, {1, 1, 1}}, |
85 | {"testing that string ab\\u00e9cd does not match e", "e", NULL, | |
729e4ab9 A |
86 | UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "characterbreaker", {1, 28, 41, -1}, {1, 1, 1}}, |
87 | {"\\u00c9", "e", "fr", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "characterbreaker", {0, -1}, {1}}, | |
46f4442e A |
88 | #if 0 |
89 | /* Problem reported by Dave Bertoni, same as ticket 4279? */ | |
729e4ab9 | 90 | {"\\u0043\\u004F\\u0302\\u0054\\u00C9", "\\u004F", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "characterbreaker", {1, -1}, {2}}, |
46f4442e | 91 | #endif |
729e4ab9 A |
92 | {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} |
93 | }; | |
94 | ||
95 | #define PECHE_WITH_ACCENTS "un p\\u00E9ch\\u00E9, " \ | |
96 | "\\u00E7a p\\u00E8che par, " \ | |
97 | "p\\u00E9cher, " \ | |
98 | "une p\\u00EAche, " \ | |
99 | "un p\\u00EAcher, " \ | |
100 | "j\\u2019ai p\\u00EAch\\u00E9, " \ | |
101 | "un p\\u00E9cheur, " \ | |
102 | "\\u201Cp\\u00E9che\\u201D, " \ | |
103 | "decomp peche\\u0301, " \ | |
104 | "base peche" | |
105 | /* in the above, the interesting words and their offsets are: | |
106 | 3 pe<301>che<301> | |
107 | 13 pe<300>che | |
108 | 24 pe<301>cher | |
109 | 36 pe<302>che | |
110 | 46 pe<302>cher | |
111 | 59 pe<302>che<301> | |
112 | 69 pe<301>cheur | |
113 | 79 pe<301>che | |
114 | 94 peche<+301> | |
115 | 107 peche | |
116 | */ | |
b75a7d8f A |
117 | |
118 | static const SearchData STRENGTH[] = { | |
729e4ab9 A |
119 | /*012345678901234567890123456789012345678901234567890123456789*/ |
120 | /*00*/{"The quick brown fox jumps over the lazy foxes", "fox", "en", | |
121 | UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {16, 40, -1}, {3, 3}}, | |
122 | /*01*/{"The quick brown fox jumps over the lazy foxes", "fox", "en", | |
123 | UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "wordbreaker", {16, -1}, {3}}, | |
124 | /*02*/{"blackbirds Pat p\\u00E9ch\\u00E9 p\\u00EAche p\\u00E9cher p\\u00EAcher Tod T\\u00F6ne black Tofu blackbirds Ton PAT toehold blackbird black-bird pat toe big Toe", | |
125 | "peche", "fr", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {15, 21, 27, 34, -1}, {5, 5, 5, 5}}, | |
126 | /*03*/{"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, | |
127 | {10, 14, -1}, {3, 2}}, | |
128 | /*04*/{"A channel, another CHANNEL, more Channels, and one last channel...", | |
129 | "channel", "es", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {2, 19, 33, 56, -1}, {7, 7, 7, 7}}, | |
130 | /*05*/{"\\u00c0 should match but not A", "A\\u0300", "en", UCOL_IDENTICAL, USEARCH_STANDARD_ELEMENT_COMPARISON, | |
131 | NULL, {0, -1}, {1, 0}}, | |
132 | /* some tests for modified element comparison, ticket #7093 */ | |
133 | /*06*/{PECHE_WITH_ACCENTS, "peche", "en", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1}, {5, 5, 5, 5, 5, 5, 5, 5, 6, 5}}, | |
134 | /*07*/{PECHE_WITH_ACCENTS, "peche", "en", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "wordbreaker", {3, 13, 36, 59, 79, 94, 107, -1}, {5, 5, 5, 5, 5, 6, 5}}, | |
135 | /*08*/{PECHE_WITH_ACCENTS, "peche", "en", UCOL_SECONDARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {107, -1}, {5}}, | |
136 | /*09*/{PECHE_WITH_ACCENTS, "peche", "en", UCOL_SECONDARY, USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1}, {5, 5, 5, 5, 5, 5, 5, 5, 6, 5}}, | |
137 | /*10*/{PECHE_WITH_ACCENTS, "peche", "en", UCOL_SECONDARY, USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 13, 36, 59, 79, 94, 107, -1}, {5, 5, 5, 5, 5, 6, 5}}, | |
138 | /*11*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "en", UCOL_SECONDARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {24, 69, 79, -1}, {5, 5, 5}}, | |
139 | /*12*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "en", UCOL_SECONDARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "wordbreaker", {79, -1}, {5}}, | |
140 | /*13*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "en", UCOL_SECONDARY, USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 24, 69, 79, -1}, {5, 5, 5, 5}}, | |
141 | /*14*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "en", UCOL_SECONDARY, USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 79, -1}, {5, 5}}, | |
142 | /*15*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "en", UCOL_SECONDARY, USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 24, 69, 79, 94, 107, -1}, {5, 5, 5, 5, 6, 5}}, | |
143 | /*16*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "en", UCOL_SECONDARY, USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 79, 94, 107, -1}, {5, 5, 6, 5}}, | |
144 | /*17*/{PECHE_WITH_ACCENTS, "pech\\u00E9", "en", UCOL_SECONDARY, USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 59, 94, -1}, {5, 5, 6}}, | |
145 | /*18*/{PECHE_WITH_ACCENTS, "pech\\u00E9", "en", UCOL_SECONDARY, USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 59, 94, -1}, {5, 5, 6}}, | |
146 | /*19*/{PECHE_WITH_ACCENTS, "pech\\u00E9", "en", UCOL_SECONDARY, USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1}, {5, 5, 5, 5, 5, 5, 5, 5, 6, 5}}, | |
147 | /*20*/{PECHE_WITH_ACCENTS, "pech\\u00E9", "en", UCOL_SECONDARY, USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 13, 36, 59, 79, 94, 107, -1}, {5, 5, 5, 5, 5, 6, 5}}, | |
148 | /*21*/{PECHE_WITH_ACCENTS, "peche\\u0301", "en", UCOL_SECONDARY, USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 59, 94, -1}, {5, 5, 6}}, | |
149 | /*22*/{PECHE_WITH_ACCENTS, "peche\\u0301", "en", UCOL_SECONDARY, USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 59, 94, -1}, {5, 5, 6}}, | |
150 | /*23*/{PECHE_WITH_ACCENTS, "peche\\u0301", "en", UCOL_SECONDARY, USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1}, {5, 5, 5, 5, 5, 5, 5, 5, 6, 5}}, | |
151 | /*24*/{PECHE_WITH_ACCENTS, "peche\\u0301", "en", UCOL_SECONDARY, USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 13, 36, 59, 79, 94, 107, -1}, {5, 5, 5, 5, 5, 6, 5}}, | |
152 | /* more tests for modified element comparison (with fr), ticket #7093 */ | |
153 | /*25*/{PECHE_WITH_ACCENTS, "peche", "fr", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1}, {5, 5, 5, 5, 5, 5, 5, 5, 6, 5}}, | |
154 | /*26*/{PECHE_WITH_ACCENTS, "peche", "fr", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "wordbreaker", {3, 13, 36, 59, 79, 94, 107, -1}, {5, 5, 5, 5, 5, 6, 5}}, | |
155 | /*27*/{PECHE_WITH_ACCENTS, "peche", "fr", UCOL_SECONDARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {107, -1}, {5}}, | |
156 | /*28*/{PECHE_WITH_ACCENTS, "peche", "fr", UCOL_SECONDARY, USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1}, {5, 5, 5, 5, 5, 5, 5, 5, 6, 5}}, | |
157 | /*29*/{PECHE_WITH_ACCENTS, "peche", "fr", UCOL_SECONDARY, USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 13, 36, 59, 79, 94, 107, -1}, {5, 5, 5, 5, 5, 6, 5}}, | |
158 | /*30*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "fr", UCOL_SECONDARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {24, 69, 79, -1}, {5, 5, 5}}, | |
159 | /*31*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "fr", UCOL_SECONDARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "wordbreaker", {79, -1}, {5}}, | |
160 | /*32*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "fr", UCOL_SECONDARY, USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 24, 69, 79, -1}, {5, 5, 5, 5}}, | |
161 | /*33*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "fr", UCOL_SECONDARY, USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 79, -1}, {5, 5}}, | |
162 | /*34*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "fr", UCOL_SECONDARY, USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 24, 69, 79, 94, 107, -1}, {5, 5, 5, 5, 6, 5}}, | |
163 | /*35*/{PECHE_WITH_ACCENTS, "p\\u00E9che", "fr", UCOL_SECONDARY, USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 79, 94, 107, -1}, {5, 5, 6, 5}}, | |
164 | /*36*/{PECHE_WITH_ACCENTS, "pech\\u00E9", "fr", UCOL_SECONDARY, USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 59, 94, -1}, {5, 5, 6}}, | |
165 | /*37*/{PECHE_WITH_ACCENTS, "pech\\u00E9", "fr", UCOL_SECONDARY, USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 59, 94, -1}, {5, 5, 6}}, | |
166 | /*38*/{PECHE_WITH_ACCENTS, "pech\\u00E9", "fr", UCOL_SECONDARY, USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1}, {5, 5, 5, 5, 5, 5, 5, 5, 6, 5}}, | |
167 | /*39*/{PECHE_WITH_ACCENTS, "pech\\u00E9", "fr", UCOL_SECONDARY, USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 13, 36, 59, 79, 94, 107, -1}, {5, 5, 5, 5, 5, 6, 5}}, | |
168 | /*40*/{PECHE_WITH_ACCENTS, "peche\\u0301", "fr", UCOL_SECONDARY, USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 59, 94, -1}, {5, 5, 6}}, | |
169 | /*41*/{PECHE_WITH_ACCENTS, "peche\\u0301", "fr", UCOL_SECONDARY, USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 59, 94, -1}, {5, 5, 6}}, | |
170 | /*42*/{PECHE_WITH_ACCENTS, "peche\\u0301", "fr", UCOL_SECONDARY, USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD, NULL, {3, 13, 24, 36, 46, 59, 69, 79, 94, 107, -1}, {5, 5, 5, 5, 5, 5, 5, 5, 6, 5}}, | |
171 | /*43*/{PECHE_WITH_ACCENTS, "peche\\u0301", "fr", UCOL_SECONDARY, USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD, "wordbreaker", {3, 13, 36, 59, 79, 94, 107, -1}, {5, 5, 5, 5, 5, 6, 5}}, | |
46f4442e A |
172 | |
173 | #if 0 | |
174 | /* Ticket 5382 */ | |
729e4ab9 | 175 | {"12\\u0171", "\\u0170", NULL, UCOL_SECONDARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {2, -1}, {2}}, |
46f4442e A |
176 | #endif |
177 | ||
729e4ab9 | 178 | {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} |
b75a7d8f A |
179 | }; |
180 | ||
181 | static const SearchData VARIABLE[] = { | |
182 | /*012345678901234567890123456789012345678901234567890123456789*/ | |
183 | {"blackbirds black blackbirds blackbird black-bird", | |
729e4ab9 | 184 | "blackbird", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 17, 28, 38, -1}, |
b75a7d8f A |
185 | {9, 9, 9, 10}}, |
186 | /* to see that it doesn't go into an infinite loop if the start of text | |
187 | is a ignorable character */ | |
729e4ab9 A |
188 | {" on", "go", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, |
189 | {"abcdefghijklmnopqrstuvwxyz", " ", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, | |
b75a7d8f A |
190 | {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, |
191 | 20, 21, 22, 23, 24, 25, -1}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
192 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, | |
193 | /* testing tightest match */ | |
729e4ab9 | 194 | {" abc a bc ab c a bc ab c", "abc", NULL, UCOL_QUATERNARY, USEARCH_STANDARD_ELEMENT_COMPARISON, |
b75a7d8f A |
195 | NULL, {1, -1}, {3}}, |
196 | /*012345678901234567890123456789012345678901234567890123456789 */ | |
729e4ab9 | 197 | {" abc a bc ab c a bc ab c", "abc", NULL, UCOL_SECONDARY, USEARCH_STANDARD_ELEMENT_COMPARISON, |
b75a7d8f A |
198 | NULL, {1, 6, 13, 21, 31, -1}, {3, 4, 4, 5, 5}}, |
199 | /* totally ignorable text */ | |
729e4ab9 | 200 | {" ---------------", "abc", NULL, UCOL_SECONDARY, USEARCH_STANDARD_ELEMENT_COMPARISON, |
b75a7d8f | 201 | NULL, {-1}, {0}}, |
729e4ab9 | 202 | {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} |
b75a7d8f A |
203 | }; |
204 | ||
205 | static const SearchData NORMEXACT[] = { | |
729e4ab9 | 206 | {"a\\u0300\\u0325", "a\\u0325\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {3}}, |
46f4442e A |
207 | |
208 | #if GRAPHEME_BOUNDARIES | |
729e4ab9 | 209 | {"a\\u0300\\u0325", "\\u0325\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, |
46f4442e | 210 | #else |
729e4ab9 | 211 | {"a\\u0300\\u0325", "\\u0325\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {2}}, |
46f4442e A |
212 | #endif |
213 | ||
729e4ab9 | 214 | {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} |
b75a7d8f A |
215 | }; |
216 | ||
217 | static const SearchData NONNORMEXACT[] = { | |
729e4ab9 A |
218 | {"a\\u0300\\u0325", "\\u0325\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, |
219 | {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} | |
b75a7d8f A |
220 | }; |
221 | ||
222 | static const SearchData OVERLAP[] = { | |
729e4ab9 | 223 | {"abababab", "abab", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 2, 4, -1}, |
b75a7d8f | 224 | {4, 4, 4}}, |
729e4ab9 | 225 | {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} |
b75a7d8f A |
226 | }; |
227 | ||
228 | static const SearchData NONOVERLAP[] = { | |
729e4ab9 A |
229 | {"abababab", "abab", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 4, -1}, {4, 4}}, |
230 | {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} | |
b75a7d8f A |
231 | }; |
232 | ||
233 | static const SearchData COLLATOR[] = { | |
234 | /* english */ | |
729e4ab9 | 235 | {"fox fpx", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {3}}, |
b75a7d8f | 236 | /* tailored */ |
729e4ab9 A |
237 | {"fox fpx", "fox", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 4, -1}, {3, 3}}, |
238 | {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} | |
b75a7d8f A |
239 | }; |
240 | ||
241 | static const SearchData PATTERN[] = { | |
242 | {"The quick brown fox jumps over the lazy foxes", "the", NULL, | |
729e4ab9 | 243 | UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 31, -1}, {3, 3}}, |
b75a7d8f | 244 | {"The quick brown fox jumps over the lazy foxes", "fox", NULL, |
729e4ab9 A |
245 | UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {16, 40, -1}, {3, 3}}, |
246 | {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} | |
b75a7d8f A |
247 | }; |
248 | ||
249 | static const SearchData TEXT[] = { | |
729e4ab9 | 250 | {"the foxy brown fox", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {4, 15, -1}, |
b75a7d8f | 251 | {3, 3}}, |
729e4ab9 | 252 | {"the quick brown fox", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {16, -1}, |
b75a7d8f | 253 | {3}}, |
729e4ab9 | 254 | {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} |
b75a7d8f A |
255 | }; |
256 | ||
257 | static const SearchData COMPOSITEBOUNDARIES[] = { | |
46f4442e | 258 | #if GRAPHEME_BOUNDARIES |
729e4ab9 A |
259 | {"\\u00C0", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, |
260 | {"A\\u00C0C", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}}, | |
261 | {"\\u00C0A", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {1}}, | |
262 | {"B\\u00C0", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
263 | {"\\u00C0B", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
264 | {"\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
265 | {"\\u0300\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}}, | |
46f4442e | 266 | #else |
729e4ab9 A |
267 | {"\\u00C0", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}}, |
268 | {"A\\u00C0C", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 1, -1}, {1, 1}}, | |
269 | {"\\u00C0A", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 1, -1}, {1, 1}}, | |
270 | {"B\\u00C0", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {1}}, | |
271 | {"\\u00C0B", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}}, | |
272 | {"\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}}, | |
273 | {"\\u0300\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 1, -1}, | |
b75a7d8f | 274 | {1, 1}}, |
46f4442e A |
275 | #endif |
276 | ||
729e4ab9 | 277 | {"\\u00C0\\u0300", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, |
b75a7d8f | 278 | /* A + 030A + 0301 */ |
729e4ab9 A |
279 | {"\\u01FA", "\\u01FA", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}}, |
280 | {"\\u01FA", "A\\u030A\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}}, | |
281 | {"\\u01FA", "\\u030A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
282 | {"\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
283 | {"\\u01FA", "\\u030AA", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
284 | {"\\u01FA", "\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
285 | {"\\u01FA", "A\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
286 | {"\\u01FA", "\\u0301A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
46f4442e A |
287 | |
288 | #if GRAPHEME_BOUNDARIES | |
729e4ab9 | 289 | {"\\u01FA", "\\u030A\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, |
46f4442e | 290 | #else |
729e4ab9 | 291 | {"\\u01FA", "\\u030A\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}}, |
46f4442e A |
292 | #endif |
293 | ||
729e4ab9 A |
294 | {"A\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, |
295 | {"\\u01FAA", "\\u0301A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
296 | {"\\u0F73", "\\u0F73", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}}, | |
297 | {"\\u0F73", "\\u0F71", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
298 | {"\\u0F73", "\\u0F72", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
299 | {"\\u0F73", "\\u0F71\\u0F72", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}}, | |
300 | {"A\\u0F73", "A\\u0F71", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
301 | {"\\u0F73A", "\\u0F72A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
46f4442e A |
302 | |
303 | /* Ticket 5024 */ | |
729e4ab9 | 304 | {"a\\u00e1", "a\\u00e1", NULL, UCOL_SECONDARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {2}}, |
46f4442e A |
305 | |
306 | /* Ticket 5420 */ | |
729e4ab9 A |
307 | {"fu\\u00dfball", "fu\\u00df", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {3}}, |
308 | {"fu\\u00dfball", "fuss", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {3}}, | |
309 | {"fu\\u00dfball", "uss", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {2}}, | |
46f4442e | 310 | |
729e4ab9 | 311 | {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} |
b75a7d8f A |
312 | }; |
313 | ||
314 | static const SearchData MATCH[] = { | |
729e4ab9 | 315 | {"a busy bee is a very busy beeee", "bee", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, |
b75a7d8f A |
316 | {7, 26, -1}, {3, 3}}, |
317 | /* 012345678901234567890123456789012345678901234567890 */ | |
318 | {"a busy bee is a very busy beeee with no bee life", "bee", NULL, | |
729e4ab9 A |
319 | UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {7, 26, 40, -1}, {3, 3, 3}}, |
320 | {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} | |
b75a7d8f A |
321 | }; |
322 | ||
323 | static const SearchData SUPPLEMENTARY[] = { | |
324 | /* 012345678901234567890123456789012345678901234567890012345678901234567890123456789012345678901234567890012345678901234567890123456789 */ | |
325 | {"abc \\uD800\\uDC00 \\uD800\\uDC01 \\uD801\\uDC00 \\uD800\\uDC00abc abc\\uD800\\uDC00 \\uD800\\uD800\\uDC00 \\uD800\\uDC00\\uDC00", | |
729e4ab9 | 326 | "\\uD800\\uDC00", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {4, 13, 22, 26, 29, -1}, |
b75a7d8f | 327 | {2, 2, 2, 2, 2}}, |
374ca955 | 328 | {"and\\uD834\\uDDB9this sentence", "\\uD834\\uDDB9", NULL, |
729e4ab9 | 329 | UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {2}}, |
374ca955 | 330 | {"and \\uD834\\uDDB9 this sentence", " \\uD834\\uDDB9 ", NULL, |
729e4ab9 | 331 | UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {4}}, |
374ca955 | 332 | {"and-\\uD834\\uDDB9-this sentence", "-\\uD834\\uDDB9-", NULL, |
729e4ab9 | 333 | UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {4}}, |
374ca955 | 334 | {"and,\\uD834\\uDDB9,this sentence", ",\\uD834\\uDDB9,", NULL, |
729e4ab9 | 335 | UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {4}}, |
374ca955 | 336 | {"and?\\uD834\\uDDB9?this sentence", "?\\uD834\\uDDB9?", NULL, |
729e4ab9 A |
337 | UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {4}}, |
338 | {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} | |
b75a7d8f A |
339 | }; |
340 | ||
341 | static const char *CONTRACTIONRULE = | |
342 | "&z = ab/c < AB < X\\u0300 < ABC < X\\u0300\\u0315"; | |
343 | ||
344 | static const SearchData CONTRACTION[] = { | |
345 | /* common discontiguous */ | |
729e4ab9 | 346 | {"A\\u0300\\u0315", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, |
46f4442e A |
347 | |
348 | #if GRAPHEME_BOUNDARIES | |
729e4ab9 | 349 | {"A\\u0300\\u0315", "\\u0300\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, |
46f4442e | 350 | #else |
729e4ab9 | 351 | {"A\\u0300\\u0315", "\\u0300\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {2}}, |
46f4442e A |
352 | #endif |
353 | ||
b75a7d8f | 354 | /* contraction prefix */ |
729e4ab9 | 355 | {"AB\\u0315C", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, |
46f4442e A |
356 | |
357 | #if GRAPHEME_BOUNDARIES | |
729e4ab9 A |
358 | {"AB\\u0315C", "AB", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, |
359 | {"AB\\u0315C", "\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
46f4442e | 360 | #else |
729e4ab9 A |
361 | {"AB\\u0315C", "AB", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {2}}, |
362 | {"AB\\u0315C", "\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {2, -1}, {1}}, | |
46f4442e A |
363 | #endif |
364 | ||
b75a7d8f A |
365 | /* discontiguous problem here for backwards iteration. |
366 | accents not found because discontiguous stores all information */ | |
729e4ab9 | 367 | {"X\\u0300\\u0319\\u0315", "\\u0319", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, |
b75a7d8f A |
368 | {0}}, |
369 | /* ends not with a contraction character */ | |
729e4ab9 | 370 | {"X\\u0315\\u0300D", "\\u0300\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, |
b75a7d8f | 371 | {0}}, |
729e4ab9 | 372 | {"X\\u0315\\u0300D", "X\\u0300\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, |
b75a7d8f | 373 | {0, -1}, {3}}, |
729e4ab9 | 374 | {"X\\u0300\\u031A\\u0315D", "X\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, |
b75a7d8f A |
375 | {0}}, |
376 | /* blocked discontiguous */ | |
729e4ab9 | 377 | {"X\\u0300\\u031A\\u0315D", "\\u031A\\u0315D", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, |
b75a7d8f | 378 | {-1}, {0}}, |
46f4442e A |
379 | |
380 | #if GRAPHEME_BOUNDARIES | |
381 | /* | |
382 | * "ab" generates a contraction that's an expansion. The "z" matches the | |
383 | * first CE of the expansion but the match fails because it ends in the | |
384 | * middle of an expansion... | |
385 | */ | |
729e4ab9 | 386 | {"ab", "z", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, |
46f4442e | 387 | #else |
729e4ab9 | 388 | {"ab", "z", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {2}}, |
46f4442e A |
389 | #endif |
390 | ||
729e4ab9 | 391 | {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} |
b75a7d8f A |
392 | }; |
393 | ||
394 | static const char *IGNORABLERULE = "&a = \\u0300"; | |
395 | ||
396 | static const SearchData IGNORABLE[] = { | |
46f4442e A |
397 | #if GRAPHEME_BOUNDARIES |
398 | /* | |
399 | * This isn't much of a test when matches have to be on | |
400 | * grapheme boundiaries. The match at 0 only works because | |
401 | * it's at the start of the text. | |
402 | */ | |
729e4ab9 | 403 | {"\\u0300\\u0315 \\u0300\\u0315 ", "\\u0300", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, |
46f4442e A |
404 | {0, -1}, {2}}, |
405 | #else | |
729e4ab9 | 406 | {"\\u0300\\u0315 \\u0300\\u0315 ", "\\u0300", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, |
46f4442e A |
407 | {0, 3, -1}, {2, 2}}, |
408 | #endif | |
409 | ||
729e4ab9 | 410 | {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} |
b75a7d8f A |
411 | }; |
412 | ||
413 | static const SearchData BASICCANONICAL[] = { | |
729e4ab9 A |
414 | {"xxxxxxxxxxxxxxxxxxxx", "fisher", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, |
415 | {"silly spring string", "string", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {13, -1}, | |
b75a7d8f | 416 | {6}}, |
729e4ab9 | 417 | {"silly spring string string", "string", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, |
b75a7d8f | 418 | {13, 20, -1}, {6, 6}}, |
729e4ab9 | 419 | {"silly string spring string", "string", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, |
b75a7d8f | 420 | {6, 20, -1}, {6, 6}}, |
729e4ab9 | 421 | {"string spring string", "string", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 14, -1}, |
b75a7d8f | 422 | {6, 6}}, |
729e4ab9 A |
423 | {"Scott Ganyo", "c", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {1}}, |
424 | {"Scott Ganyo", " ", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {5, -1}, {1}}, | |
46f4442e A |
425 | |
426 | #if GRAPHEME_BOUNDARIES | |
729e4ab9 A |
427 | {"\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, |
428 | {"a\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
429 | {"a\\u0300\\u0325", "\\u0300\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
430 | {"a\\u0300b", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
431 | {"a\\u0300\\u0325b", "\\u0300b", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
432 | {"\\u0325\\u0300A\\u0325\\u0300", "\\u0300A\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, | |
46f4442e | 433 | NULL, {-1}, {0}}, |
729e4ab9 | 434 | {"\\u0325\\u0300A\\u0325\\u0300", "\\u0325A\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, |
46f4442e A |
435 | NULL, {-1}, {0}}, |
436 | {"a\\u0300\\u0325b\\u0300\\u0325c \\u0325b\\u0300 \\u0300b\\u0325", | |
729e4ab9 | 437 | "\\u0300b\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, |
46f4442e | 438 | #else |
729e4ab9 A |
439 | {"\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {2}}, |
440 | {"a\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {2}}, | |
441 | {"a\\u0300\\u0325", "\\u0300\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, | |
b75a7d8f | 442 | {2}}, |
729e4ab9 A |
443 | {"a\\u0300b", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {1}}, |
444 | {"a\\u0300\\u0325b", "\\u0300b", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {3}}, | |
445 | {"\\u0325\\u0300A\\u0325\\u0300", "\\u0300A\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, | |
b75a7d8f | 446 | NULL, {0, -1}, {5}}, |
729e4ab9 | 447 | {"\\u0325\\u0300A\\u0325\\u0300", "\\u0325A\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, |
b75a7d8f A |
448 | NULL, {0, -1}, {5}}, |
449 | {"a\\u0300\\u0325b\\u0300\\u0325c \\u0325b\\u0300 \\u0300b\\u0325", | |
729e4ab9 | 450 | "\\u0300b\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, 12, -1}, {5, 3}}, |
46f4442e A |
451 | #endif |
452 | ||
729e4ab9 A |
453 | {"\\u00c4\\u0323", "A\\u0323\\u0308", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {2}}, |
454 | {"\\u0308\\u0323", "\\u0323\\u0308", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {2}}, | |
455 | {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} | |
b75a7d8f A |
456 | }; |
457 | ||
46f4442e | 458 | |
b75a7d8f | 459 | static const SearchData NORMCANONICAL[] = { |
46f4442e A |
460 | #if GRAPHEME_BOUNDARIES |
461 | /* | |
462 | * These tests don't really mean anything. With matches restricted to grapheme | |
463 | * boundaries, isCanonicalMatch doesn't mean anything unless normalization is | |
464 | * also turned on... | |
465 | */ | |
729e4ab9 A |
466 | {"\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, |
467 | {"\\u0300\\u0325", "\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
468 | {"a\\u0300\\u0325", "\\u0325\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
469 | {"a\\u0300\\u0325", "\\u0300\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
470 | {"a\\u0300\\u0325", "\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
471 | {"a\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
46f4442e | 472 | #else |
729e4ab9 A |
473 | {"\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {2}}, |
474 | {"\\u0300\\u0325", "\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {2}}, | |
475 | {"a\\u0300\\u0325", "\\u0325\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, | |
b75a7d8f | 476 | {2}}, |
729e4ab9 | 477 | {"a\\u0300\\u0325", "\\u0300\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, |
b75a7d8f | 478 | {2}}, |
729e4ab9 A |
479 | {"a\\u0300\\u0325", "\\u0325", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {2}}, |
480 | {"a\\u0300\\u0325", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {2}}, | |
46f4442e A |
481 | #endif |
482 | ||
729e4ab9 | 483 | {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} |
b75a7d8f A |
484 | }; |
485 | ||
486 | static const SearchData BREAKITERATORCANONICAL[] = { | |
729e4ab9 | 487 | {"foxy fox", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "characterbreaker", {0, 5, -1}, |
b75a7d8f | 488 | {3, 3}}, |
729e4ab9 A |
489 | {"foxy fox", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "wordbreaker", {5, -1}, {3}}, |
490 | {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, | |
b75a7d8f | 491 | "characterbreaker", {10, 14, -1}, {3, 2}}, |
729e4ab9 | 492 | {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "wordbreaker", |
b75a7d8f A |
493 | {10, -1}, {3}}, |
494 | {"Channel, another channel, more channels, and one last Channel", | |
729e4ab9 | 495 | "Channel", "es", UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "wordbreaker", {0, 54, -1}, {7, 7}}, |
b75a7d8f | 496 | /* jitterbug 1745 */ |
729e4ab9 | 497 | {"testing that \\u00e9 does not match e", "e", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, |
b75a7d8f A |
498 | "characterbreaker", {1, 17, 30, -1}, {1, 1, 1}}, |
499 | {"testing that string ab\\u00e9cd does not match e", "e", NULL, | |
729e4ab9 A |
500 | UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "characterbreaker", {1, 28, 41, -1}, {1, 1, 1}}, |
501 | {"\\u00c9", "e", "fr", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "characterbreaker", {0, -1}, {1}}, | |
502 | {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} | |
b75a7d8f A |
503 | }; |
504 | ||
505 | static const SearchData STRENGTHCANONICAL[] = { | |
506 | /*012345678901234567890123456789012345678901234567890123456789 */ | |
507 | {"The quick brown fox jumps over the lazy foxes", "fox", "en", | |
729e4ab9 | 508 | UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {16, 40, -1}, {3, 3}}, |
b75a7d8f | 509 | {"The quick brown fox jumps over the lazy foxes", "fox", "en", |
729e4ab9 | 510 | UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, "wordbreaker", {16, -1}, {3}}, |
b75a7d8f | 511 | {"blackbirds Pat p\\u00E9ch\\u00E9 p\\u00EAche p\\u00E9cher p\\u00EAcher Tod T\\u00F6ne black Tofu blackbirds Ton PAT toehold blackbird black-bird pat toe big Toe", |
729e4ab9 A |
512 | "peche", "fr", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {15, 21, 27, 34, -1}, {5, 5, 5, 5}}, |
513 | {"This is a toe T\\u00F6ne", "toe", "de", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, | |
b75a7d8f A |
514 | {10, 14, -1}, {3, 2}}, |
515 | {"A channel, another CHANNEL, more Channels, and one last channel...", | |
729e4ab9 | 516 | "channel", "es", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {2, 19, 33, 56, -1}, |
b75a7d8f | 517 | {7, 7, 7, 7}}, |
729e4ab9 | 518 | {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} |
b75a7d8f A |
519 | }; |
520 | ||
521 | static const SearchData VARIABLECANONICAL[] = { | |
522 | /*012345678901234567890123456789012345678901234567890123456789 */ | |
523 | {"blackbirds black blackbirds blackbird black-bird", | |
729e4ab9 | 524 | "blackbird", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 17, 28, 38, -1}, |
b75a7d8f A |
525 | {9, 9, 9, 10}}, |
526 | /* to see that it doesn't go into an infinite loop if the start of text | |
527 | is a ignorable character */ | |
729e4ab9 A |
528 | {" on", "go", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, |
529 | {"abcdefghijklmnopqrstuvwxyz", " ", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, | |
b75a7d8f A |
530 | {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, |
531 | 20, 21, 22, 23, 24, 25, -1}, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
532 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, | |
533 | /* testing tightest match */ | |
729e4ab9 | 534 | {" abc a bc ab c a bc ab c", "abc", NULL, UCOL_QUATERNARY, USEARCH_STANDARD_ELEMENT_COMPARISON, |
b75a7d8f A |
535 | NULL, {1, -1}, {3}}, |
536 | /*012345678901234567890123456789012345678901234567890123456789 */ | |
729e4ab9 | 537 | {" abc a bc ab c a bc ab c", "abc", NULL, UCOL_SECONDARY, USEARCH_STANDARD_ELEMENT_COMPARISON, |
b75a7d8f A |
538 | NULL, {1, 6, 13, 21, 31, -1}, {3, 4, 4, 5, 5}}, |
539 | /* totally ignorable text */ | |
729e4ab9 | 540 | {" ---------------", "abc", NULL, UCOL_SECONDARY, USEARCH_STANDARD_ELEMENT_COMPARISON, |
b75a7d8f | 541 | NULL, {-1}, {0}}, |
729e4ab9 | 542 | {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} |
b75a7d8f A |
543 | }; |
544 | ||
545 | static const SearchData OVERLAPCANONICAL[] = { | |
729e4ab9 | 546 | {"abababab", "abab", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 2, 4, -1}, |
b75a7d8f | 547 | {4, 4, 4}}, |
729e4ab9 | 548 | {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} |
b75a7d8f A |
549 | }; |
550 | ||
551 | static const SearchData NONOVERLAPCANONICAL[] = { | |
729e4ab9 A |
552 | {"abababab", "abab", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 4, -1}, {4, 4}}, |
553 | {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} | |
b75a7d8f A |
554 | }; |
555 | ||
556 | static const SearchData COLLATORCANONICAL[] = { | |
557 | /* english */ | |
729e4ab9 | 558 | {"fox fpx", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {3}}, |
b75a7d8f | 559 | /* tailored */ |
729e4ab9 A |
560 | {"fox fpx", "fox", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 4, -1}, {3, 3}}, |
561 | {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} | |
b75a7d8f A |
562 | }; |
563 | ||
564 | static const SearchData PATTERNCANONICAL[] = { | |
565 | {"The quick brown fox jumps over the lazy foxes", "the", NULL, | |
729e4ab9 | 566 | UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 31, -1}, {3, 3}}, |
b75a7d8f | 567 | {"The quick brown fox jumps over the lazy foxes", "fox", NULL, |
729e4ab9 A |
568 | UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {16, 40, -1}, {3, 3}}, |
569 | {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} | |
b75a7d8f A |
570 | }; |
571 | ||
572 | static const SearchData TEXTCANONICAL[] = { | |
729e4ab9 | 573 | {"the foxy brown fox", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {4, 15, -1}, |
b75a7d8f | 574 | {3, 3}}, |
729e4ab9 | 575 | {"the quick brown fox", "fox", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {16, -1}, |
b75a7d8f | 576 | {3}}, |
729e4ab9 | 577 | {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} |
b75a7d8f A |
578 | }; |
579 | ||
580 | static const SearchData COMPOSITEBOUNDARIESCANONICAL[] = { | |
46f4442e | 581 | #if GRAPHEME_BOUNDARIES |
729e4ab9 A |
582 | {"\\u00C0", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, |
583 | {"A\\u00C0C", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}}, | |
584 | {"\\u00C0A", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {1}}, | |
585 | {"B\\u00C0", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
586 | {"\\u00C0B", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
587 | {"\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
46f4442e A |
588 | |
589 | /* first one matches only because it's at the start of the text */ | |
729e4ab9 | 590 | {"\\u0300\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}}, |
46f4442e A |
591 | |
592 | /* \\u0300 blocked by \\u0300 */ | |
729e4ab9 | 593 | {"\\u00C0\\u0300", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, |
46f4442e | 594 | #else |
729e4ab9 A |
595 | {"\\u00C0", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}}, |
596 | {"A\\u00C0C", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 1, -1}, {1, 1}}, | |
597 | {"\\u00C0A", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 1, -1}, {1, 1}}, | |
598 | {"B\\u00C0", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {1}}, | |
599 | {"\\u00C0B", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}}, | |
600 | {"\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}}, | |
601 | {"\\u0300\\u00C0", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 1, -1}, | |
b75a7d8f A |
602 | {1, 1}}, |
603 | /* \\u0300 blocked by \\u0300 */ | |
729e4ab9 | 604 | {"\\u00C0\\u0300", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {2}}, |
46f4442e A |
605 | #endif |
606 | ||
b75a7d8f | 607 | /* A + 030A + 0301 */ |
729e4ab9 A |
608 | {"\\u01FA", "\\u01FA", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}}, |
609 | {"\\u01FA", "A\\u030A\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}}, | |
46f4442e A |
610 | |
611 | #if GRAPHEME_BOUNDARIES | |
729e4ab9 A |
612 | {"\\u01FA", "\\u030A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, |
613 | {"\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
46f4442e | 614 | #else |
729e4ab9 A |
615 | {"\\u01FA", "\\u030A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}}, |
616 | {"\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}}, | |
46f4442e A |
617 | #endif |
618 | ||
729e4ab9 | 619 | {"\\u01FA", "\\u030AA", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, |
46f4442e A |
620 | |
621 | #if GRAPHEME_BOUNDARIES | |
729e4ab9 | 622 | {"\\u01FA", "\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, |
46f4442e | 623 | #else |
729e4ab9 | 624 | {"\\u01FA", "\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}}, |
46f4442e A |
625 | #endif |
626 | ||
b75a7d8f | 627 | /* blocked accent */ |
729e4ab9 A |
628 | {"\\u01FA", "A\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, |
629 | {"\\u01FA", "\\u0301A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
46f4442e A |
630 | |
631 | #if GRAPHEME_BOUNDARIES | |
729e4ab9 A |
632 | {"\\u01FA", "\\u030A\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, |
633 | {"A\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
634 | {"\\u01FAA", "\\u0301A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
46f4442e | 635 | #else |
729e4ab9 A |
636 | {"\\u01FA", "\\u030A\\u0301", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}}, |
637 | {"A\\u01FA", "A\\u030A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {1}}, | |
638 | {"\\u01FAA", "\\u0301A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {2}}, | |
46f4442e A |
639 | #endif |
640 | ||
729e4ab9 | 641 | {"\\u0F73", "\\u0F73", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}}, |
46f4442e A |
642 | |
643 | #if GRAPHEME_BOUNDARIES | |
729e4ab9 A |
644 | {"\\u0F73", "\\u0F71", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, |
645 | {"\\u0F73", "\\u0F72", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
46f4442e | 646 | #else |
729e4ab9 A |
647 | {"\\u0F73", "\\u0F71", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}}, |
648 | {"\\u0F73", "\\u0F72", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}}, | |
46f4442e A |
649 | #endif |
650 | ||
729e4ab9 | 651 | {"\\u0F73", "\\u0F71\\u0F72", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {1}}, |
46f4442e A |
652 | |
653 | #if GRAPHEME_BOUNDARIES | |
729e4ab9 A |
654 | {"A\\u0F73", "A\\u0F71", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, |
655 | {"\\u0F73A", "\\u0F72A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
46f4442e | 656 | {"\\u01FA A\\u0301\\u030A A\\u030A\\u0301 A\\u030A \\u01FA", "A\\u030A", |
729e4ab9 | 657 | NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {10, -1}, {2}}, |
46f4442e | 658 | #else |
729e4ab9 A |
659 | {"A\\u0F73", "A\\u0F71", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {2}}, |
660 | {"\\u0F73A", "\\u0F72A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {2}}, | |
b75a7d8f | 661 | {"\\u01FA A\\u0301\\u030A A\\u030A\\u0301 A\\u030A \\u01FA", "A\\u030A", |
729e4ab9 | 662 | NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 6, 10, 13, -1}, {1, 3, 2, 1}}, |
46f4442e A |
663 | #endif |
664 | ||
729e4ab9 | 665 | {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} |
b75a7d8f A |
666 | }; |
667 | ||
668 | static const SearchData MATCHCANONICAL[] = { | |
729e4ab9 | 669 | {"a busy bee is a very busy beeee", "bee", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, |
b75a7d8f A |
670 | {7, 26, -1}, {3, 3}}, |
671 | /*012345678901234567890123456789012345678901234567890 */ | |
672 | {"a busy bee is a very busy beeee with no bee life", "bee", NULL, | |
729e4ab9 A |
673 | UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {7, 26, 40, -1}, {3, 3, 3}}, |
674 | {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} | |
b75a7d8f A |
675 | }; |
676 | ||
677 | static const SearchData SUPPLEMENTARYCANONICAL[] = { | |
678 | /*012345678901234567890123456789012345678901234567890012345678901234567890123456789012345678901234567890012345678901234567890123456789 */ | |
679 | {"abc \\uD800\\uDC00 \\uD800\\uDC01 \\uD801\\uDC00 \\uD800\\uDC00abc abc\\uD800\\uDC00 \\uD800\\uD800\\uDC00 \\uD800\\uDC00\\uDC00", | |
729e4ab9 | 680 | "\\uD800\\uDC00", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {4, 13, 22, 26, 29, -1}, |
b75a7d8f | 681 | {2, 2, 2, 2, 2}}, |
374ca955 | 682 | {"and\\uD834\\uDDB9this sentence", "\\uD834\\uDDB9", NULL, |
729e4ab9 | 683 | UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {2}}, |
374ca955 | 684 | {"and \\uD834\\uDDB9 this sentence", " \\uD834\\uDDB9 ", NULL, |
729e4ab9 | 685 | UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {4}}, |
374ca955 | 686 | {"and-\\uD834\\uDDB9-this sentence", "-\\uD834\\uDDB9-", NULL, |
729e4ab9 | 687 | UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {4}}, |
374ca955 | 688 | {"and,\\uD834\\uDDB9,this sentence", ",\\uD834\\uDDB9,", NULL, |
729e4ab9 | 689 | UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {4}}, |
374ca955 | 690 | {"and?\\uD834\\uDDB9?this sentence", "?\\uD834\\uDDB9?", NULL, |
729e4ab9 A |
691 | UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {3, -1}, {4}}, |
692 | {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} | |
b75a7d8f A |
693 | }; |
694 | ||
695 | static const SearchData CONTRACTIONCANONICAL[] = { | |
696 | /* common discontiguous */ | |
46f4442e | 697 | #if GRAPHEME_BOUNDARIES |
729e4ab9 A |
698 | {"A\\u0300\\u0315", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, |
699 | {"A\\u0300\\u0315", "\\u0300\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
46f4442e | 700 | #else |
729e4ab9 A |
701 | {"A\\u0300\\u0315", "\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {2}}, |
702 | {"A\\u0300\\u0315", "\\u0300\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {2}}, | |
46f4442e A |
703 | #endif |
704 | ||
b75a7d8f | 705 | /* contraction prefix */ |
729e4ab9 | 706 | {"AB\\u0315C", "A", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, |
46f4442e A |
707 | |
708 | #if GRAPHEME_BOUNDARIES | |
729e4ab9 A |
709 | {"AB\\u0315C", "AB", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, |
710 | {"AB\\u0315C", "\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, | |
46f4442e | 711 | #else |
729e4ab9 A |
712 | {"AB\\u0315C", "AB", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {2}}, |
713 | {"AB\\u0315C", "\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {2, -1}, {1}}, | |
46f4442e A |
714 | #endif |
715 | ||
b75a7d8f A |
716 | /* discontiguous problem here for backwards iteration. |
717 | forwards gives 0, 4 but backwards give 1, 3 */ | |
729e4ab9 | 718 | /* {"X\\u0300\\u0319\\u0315", "\\u0319", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, |
b75a7d8f A |
719 | {4}}, */ |
720 | ||
721 | /* ends not with a contraction character */ | |
729e4ab9 A |
722 | {"X\\u0315\\u0300D", "\\u0300\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, |
723 | {"X\\u0315\\u0300D", "X\\u0300\\u0315", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {3}}, | |
46f4442e A |
724 | |
725 | #if GRAPHEME_BOUNDARIES | |
729e4ab9 | 726 | {"X\\u0300\\u031A\\u0315D", "X\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, |
46f4442e | 727 | |
b75a7d8f | 728 | /* blocked discontiguous */ |
729e4ab9 | 729 | {"X\\u0300\\u031A\\u0315D", "\\u031A\\u0315D", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}}, |
46f4442e A |
730 | |
731 | /* | |
732 | * "ab" generates a contraction that's an expansion. The "z" matches the | |
733 | * first CE of the expansion but the match fails because it ends in the | |
734 | * middle of an expansion... | |
735 | */ | |
729e4ab9 | 736 | {"ab", "z", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {2}}, |
46f4442e | 737 | #else |
729e4ab9 | 738 | {"X\\u0300\\u031A\\u0315D", "X\\u0300", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {4}}, |
46f4442e A |
739 | |
740 | /* blocked discontiguous */ | |
729e4ab9 | 741 | {"X\\u0300\\u031A\\u0315D", "\\u031A\\u0315D", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {4}}, |
46f4442e | 742 | |
729e4ab9 | 743 | {"ab", "z", NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, -1}, {2}}, |
46f4442e A |
744 | #endif |
745 | ||
729e4ab9 | 746 | {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} |
b75a7d8f A |
747 | }; |
748 | ||
46f4442e | 749 | static const SearchData DIACRITICMATCH[] = { |
4388f060 A |
750 | {"\\u03BA\\u03B1\\u03B9\\u0300\\u0020\\u03BA\\u03B1\\u1F76", "\\u03BA\\u03B1\\u03B9", NULL, UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {0, 5,-1}, {4, 3}}, |
751 | {"\\u0061\\u0061\\u00E1", "\\u0061\\u00E1", NULL, UCOL_SECONDARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, -1}, {2}}, | |
752 | {"\\u0020\\u00C2\\u0303\\u0020\\u0041\\u0061\\u1EAA\\u0041\\u0302\\u0303\\u00C2\\u0303\\u1EAB\\u0061\\u0302\\u0303\\u00E2\\u0303\\uD806\\uDC01\\u0300\\u0020", | |
753 | "\\u00C2\\u0303", "LDE_AN_CX_EX_FX_HX_NX_S1", UCOL_PRIMARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {1, 4, 5, 6, 7, 10, 12, 13, 16,-1}, {2, 1, 1, 1, 3, 2, 1, 3, 2}}, | |
754 | {NULL, NULL, NULL, UCOL_TERTIARY, USEARCH_STANDARD_ELEMENT_COMPARISON, NULL, {-1}, {0}} | |
46f4442e A |
755 | }; |
756 | ||
b75a7d8f A |
757 | #endif /* #if !UCONFIG_NO_COLLATION */ |
758 | ||
759 | #endif |