]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | /************************************************************************* |
2 | * © 2016 and later: Unicode, Inc. and others. | |
3 | * License & terms of use: http://www.unicode.org/copyright.html#License | |
4 | * | |
5 | ************************************************************************* | |
6 | ************************************************************************* | |
b75a7d8f | 7 | * COPYRIGHT: |
73c04bcf | 8 | * Copyright (C) 2002-2006 IBM, Inc. All Rights Reserved. |
b75a7d8f | 9 | * |
f3c0d7a5 | 10 | *************************************************************************/ |
b75a7d8f A |
11 | |
12 | /** | |
13 | * This program demos string collation | |
14 | */ | |
15 | ||
16 | const char gHelpString[] = | |
17 | "usage: strsrch [options*] -source source_string -pattern pattern_string\n" | |
18 | "-help Display this message.\n" | |
19 | "-locale name ICU locale to use. Default is en_US\n" | |
20 | "-rules rule Collation rules file (overrides locale)\n" | |
21 | "-french French accent ordering\n" | |
22 | "-norm Normalizing mode on\n" | |
23 | "-shifted Shifted mode\n" | |
24 | "-lower Lower case first\n" | |
25 | "-upper Upper case first\n" | |
26 | "-case Enable separate case level\n" | |
27 | "-level n Sort level, 1 to 5, for Primary, Secndary, Tertiary, Quaternary, Identical\n" | |
28 | "-source string Source string\n" | |
29 | "-pattern string Pattern string to look for in source\n" | |
30 | "-overlap Enable searching to be done on overlapping patterns\n" | |
31 | "-canonical Enable searching to be done matching canonical equivalent patterns" | |
32 | "Example strsrch -rules \\u0026b\\u003ca -source a\\u0020b\\u0020bc -pattern b\n" | |
33 | "The format \\uXXXX is supported for the rules and comparison strings\n" | |
34 | ; | |
35 | ||
36 | #include <stdio.h> | |
37 | #include <string.h> | |
38 | #include <stdlib.h> | |
39 | ||
40 | #include <unicode/utypes.h> | |
41 | #include <unicode/ucol.h> | |
42 | #include <unicode/usearch.h> | |
43 | #include <unicode/ustring.h> | |
44 | ||
45 | /** | |
46 | * Command line option variables | |
47 | * These global variables are set according to the options specified | |
48 | * on the command line by the user. | |
49 | */ | |
50 | char * opt_locale = "en_US"; | |
51 | char * opt_rules = 0; | |
52 | UBool opt_help = FALSE; | |
53 | UBool opt_norm = FALSE; | |
54 | UBool opt_french = FALSE; | |
55 | UBool opt_shifted = FALSE; | |
56 | UBool opt_lower = FALSE; | |
57 | UBool opt_upper = FALSE; | |
58 | UBool opt_case = FALSE; | |
59 | UBool opt_overlap = FALSE; | |
60 | UBool opt_canonical = FALSE; | |
61 | int opt_level = 0; | |
62 | char * opt_source = "International Components for Unicode"; | |
63 | char * opt_pattern = "Unicode"; | |
64 | UCollator * collator = 0; | |
65 | UStringSearch * search = 0; | |
66 | UChar rules[100]; | |
67 | UChar source[100]; | |
68 | UChar pattern[100]; | |
69 | ||
70 | /** | |
71 | * Definitions for the command line options | |
72 | */ | |
73 | struct OptSpec { | |
74 | const char *name; | |
75 | enum {FLAG, NUM, STRING} type; | |
76 | void *pVar; | |
77 | }; | |
78 | ||
79 | OptSpec opts[] = { | |
80 | {"-locale", OptSpec::STRING, &opt_locale}, | |
81 | {"-rules", OptSpec::STRING, &opt_rules}, | |
82 | {"-source", OptSpec::STRING, &opt_source}, | |
83 | {"-pattern", OptSpec::STRING, &opt_pattern}, | |
84 | {"-norm", OptSpec::FLAG, &opt_norm}, | |
85 | {"-french", OptSpec::FLAG, &opt_french}, | |
86 | {"-shifted", OptSpec::FLAG, &opt_shifted}, | |
87 | {"-lower", OptSpec::FLAG, &opt_lower}, | |
88 | {"-upper", OptSpec::FLAG, &opt_upper}, | |
89 | {"-case", OptSpec::FLAG, &opt_case}, | |
90 | {"-level", OptSpec::NUM, &opt_level}, | |
91 | {"-overlap", OptSpec::FLAG, &opt_overlap}, | |
92 | {"-canonical", OptSpec::FLAG, &opt_canonical}, | |
93 | {"-help", OptSpec::FLAG, &opt_help}, | |
94 | {"-?", OptSpec::FLAG, &opt_help}, | |
95 | {0, OptSpec::FLAG, 0} | |
96 | }; | |
97 | ||
98 | /** | |
99 | * processOptions() Function to read the command line options. | |
100 | */ | |
101 | UBool processOptions(int argc, const char **argv, OptSpec opts[]) | |
102 | { | |
103 | for (int argNum = 1; argNum < argc; argNum ++) { | |
104 | const char *pArgName = argv[argNum]; | |
73c04bcf A |
105 | OptSpec *pOpt; |
106 | for (pOpt = opts; pOpt->name != 0; pOpt ++) { | |
b75a7d8f A |
107 | if (strcmp(pOpt->name, pArgName) == 0) { |
108 | switch (pOpt->type) { | |
109 | case OptSpec::FLAG: | |
110 | *(UBool *)(pOpt->pVar) = TRUE; | |
111 | break; | |
112 | case OptSpec::STRING: | |
113 | argNum ++; | |
114 | if (argNum >= argc) { | |
115 | fprintf(stderr, "value expected for \"%s\" option.\n", | |
116 | pOpt->name); | |
117 | return FALSE; | |
118 | } | |
119 | *(const char **)(pOpt->pVar) = argv[argNum]; | |
120 | break; | |
121 | case OptSpec::NUM: | |
122 | argNum ++; | |
123 | if (argNum >= argc) { | |
124 | fprintf(stderr, "value expected for \"%s\" option.\n", | |
125 | pOpt->name); | |
126 | return FALSE; | |
127 | } | |
128 | char *endp; | |
129 | int i = strtol(argv[argNum], &endp, 0); | |
130 | if (endp == argv[argNum]) { | |
131 | fprintf(stderr, | |
132 | "integer value expected for \"%s\" option.\n", | |
133 | pOpt->name); | |
134 | return FALSE; | |
135 | } | |
136 | *(int *)(pOpt->pVar) = i; | |
137 | } | |
138 | break; | |
139 | } | |
140 | } | |
141 | if (pOpt->name == 0) | |
142 | { | |
143 | fprintf(stderr, "Unrecognized option \"%s\"\n", pArgName); | |
144 | return FALSE; | |
145 | } | |
146 | } | |
147 | return TRUE; | |
148 | } | |
149 | ||
150 | /** | |
151 | * Creates a collator | |
152 | */ | |
153 | UBool processCollator() | |
154 | { | |
155 | // Set up an ICU collator | |
156 | UErrorCode status = U_ZERO_ERROR; | |
157 | ||
158 | if (opt_rules != 0) { | |
159 | u_unescape(opt_rules, rules, 100); | |
160 | collator = ucol_openRules(rules, -1, UCOL_OFF, UCOL_TERTIARY, | |
161 | NULL, &status); | |
162 | } | |
163 | else { | |
164 | collator = ucol_open(opt_locale, &status); | |
165 | } | |
166 | if (U_FAILURE(status)) { | |
167 | fprintf(stderr, "Collator creation failed.: %d\n", status); | |
168 | return FALSE; | |
169 | } | |
170 | if (status == U_USING_DEFAULT_WARNING) { | |
171 | fprintf(stderr, "Warning, U_USING_DEFAULT_WARNING for %s\n", | |
172 | opt_locale); | |
173 | } | |
174 | if (status == U_USING_FALLBACK_WARNING) { | |
175 | fprintf(stderr, "Warning, U_USING_FALLBACK_ERROR for %s\n", | |
176 | opt_locale); | |
177 | } | |
178 | if (opt_norm) { | |
179 | ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_ON, &status); | |
180 | } | |
181 | if (opt_french) { | |
182 | ucol_setAttribute(collator, UCOL_FRENCH_COLLATION, UCOL_ON, &status); | |
183 | } | |
184 | if (opt_lower) { | |
185 | ucol_setAttribute(collator, UCOL_CASE_FIRST, UCOL_LOWER_FIRST, | |
186 | &status); | |
187 | } | |
188 | if (opt_upper) { | |
189 | ucol_setAttribute(collator, UCOL_CASE_FIRST, UCOL_UPPER_FIRST, | |
190 | &status); | |
191 | } | |
192 | if (opt_case) { | |
193 | ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_ON, &status); | |
194 | } | |
195 | if (opt_shifted) { | |
196 | ucol_setAttribute(collator, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED, | |
197 | &status); | |
198 | } | |
199 | if (opt_level != 0) { | |
200 | switch (opt_level) { | |
201 | case 1: | |
202 | ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_PRIMARY, &status); | |
203 | break; | |
204 | case 2: | |
205 | ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_SECONDARY, | |
206 | &status); | |
207 | break; | |
208 | case 3: | |
209 | ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_TERTIARY, &status); | |
210 | break; | |
211 | case 4: | |
212 | ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_QUATERNARY, | |
213 | &status); | |
214 | break; | |
215 | case 5: | |
216 | ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_IDENTICAL, | |
217 | &status); | |
218 | break; | |
219 | default: | |
220 | fprintf(stderr, "-level param must be between 1 and 5\n"); | |
221 | return FALSE; | |
222 | } | |
223 | } | |
224 | if (U_FAILURE(status)) { | |
225 | fprintf(stderr, "Collator attribute setting failed.: %d\n", status); | |
226 | return FALSE; | |
227 | } | |
228 | return TRUE; | |
229 | } | |
230 | ||
231 | /** | |
232 | * Creates a string search | |
233 | */ | |
234 | UBool processStringSearch() | |
235 | { | |
236 | u_unescape(opt_source, source, 100); | |
237 | u_unescape(opt_pattern, pattern, 100); | |
238 | UErrorCode status = U_ZERO_ERROR; | |
239 | search = usearch_openFromCollator(pattern, -1, source, -1, collator, NULL, | |
240 | &status); | |
241 | if (U_FAILURE(status)) { | |
242 | return FALSE; | |
243 | } | |
244 | if (opt_overlap == TRUE) { | |
245 | usearch_setAttribute(search, USEARCH_OVERLAP, USEARCH_ON, &status); | |
246 | } | |
247 | if (opt_canonical == TRUE) { | |
248 | usearch_setAttribute(search, USEARCH_CANONICAL_MATCH, USEARCH_ON, | |
249 | &status); | |
250 | } | |
251 | if (U_FAILURE(status)) { | |
252 | fprintf(stderr, "Error setting search attributes\n"); | |
253 | return FALSE; | |
254 | } | |
255 | return TRUE; | |
256 | } | |
257 | ||
258 | UBool findPattern() | |
259 | { | |
260 | UErrorCode status = U_ZERO_ERROR; | |
261 | int32_t offset = usearch_next(search, &status); | |
262 | if (offset == USEARCH_DONE) { | |
263 | fprintf(stdout, "Pattern not found in source\n"); | |
264 | } | |
265 | while (offset != USEARCH_DONE) { | |
266 | fprintf(stdout, "Pattern found at offset %d size %d\n", offset, | |
267 | usearch_getMatchedLength(search)); | |
268 | offset = usearch_next(search, &status); | |
269 | } | |
270 | if (U_FAILURE(status)) { | |
271 | fprintf(stderr, "Error in searching for pattern %d\n", status); | |
272 | return FALSE; | |
273 | } | |
274 | fprintf(stdout, "End of search\n"); | |
275 | return TRUE; | |
276 | } | |
277 | ||
278 | /** | |
279 | * Main -- process command line, read in and pre-process the test file, | |
280 | * call other functions to do the actual tests. | |
281 | */ | |
282 | int main(int argc, const char** argv) | |
283 | { | |
284 | if (processOptions(argc, argv, opts) != TRUE || opt_help) { | |
285 | printf(gHelpString); | |
286 | return -1; | |
287 | } | |
288 | ||
289 | if (processCollator() != TRUE) { | |
290 | fprintf(stderr, "Error creating collator\n"); | |
291 | return -1; | |
292 | } | |
293 | ||
294 | if (processStringSearch() != TRUE) { | |
295 | fprintf(stderr, "Error creating string search\n"); | |
296 | return -1; | |
297 | } | |
298 | ||
299 | fprintf(stdout, "Finding pattern %s in source %s\n", opt_pattern, | |
300 | opt_source); | |
301 | ||
302 | findPattern(); | |
303 | ucol_close(collator); | |
304 | usearch_close(search); | |
305 | return 0; | |
306 | } |