]> git.saurik.com Git - apple/icu.git/blob - icuSources/samples/strsrch/strsrch.cpp
ICU-3.13.tar.gz
[apple/icu.git] / icuSources / samples / strsrch / strsrch.cpp
1 /********************************************************************
2 * COPYRIGHT:
3 * Copyright (C) 2002-2003 IBM, Inc. All Rights Reserved.
4 *
5 ********************************************************************/
6
7 /**
8 * This program demos string collation
9 */
10
11 const char gHelpString[] =
12 "usage: strsrch [options*] -source source_string -pattern pattern_string\n"
13 "-help Display this message.\n"
14 "-locale name ICU locale to use. Default is en_US\n"
15 "-rules rule Collation rules file (overrides locale)\n"
16 "-french French accent ordering\n"
17 "-norm Normalizing mode on\n"
18 "-shifted Shifted mode\n"
19 "-lower Lower case first\n"
20 "-upper Upper case first\n"
21 "-case Enable separate case level\n"
22 "-level n Sort level, 1 to 5, for Primary, Secndary, Tertiary, Quaternary, Identical\n"
23 "-source string Source string\n"
24 "-pattern string Pattern string to look for in source\n"
25 "-overlap Enable searching to be done on overlapping patterns\n"
26 "-canonical Enable searching to be done matching canonical equivalent patterns"
27 "Example strsrch -rules \\u0026b\\u003ca -source a\\u0020b\\u0020bc -pattern b\n"
28 "The format \\uXXXX is supported for the rules and comparison strings\n"
29 ;
30
31 #include <stdio.h>
32 #include <string.h>
33 #include <stdlib.h>
34
35 #include <unicode/utypes.h>
36 #include <unicode/ucol.h>
37 #include <unicode/usearch.h>
38 #include <unicode/ustring.h>
39
40 /**
41 * Command line option variables
42 * These global variables are set according to the options specified
43 * on the command line by the user.
44 */
45 char * opt_locale = "en_US";
46 char * opt_rules = 0;
47 UBool opt_help = FALSE;
48 UBool opt_norm = FALSE;
49 UBool opt_french = FALSE;
50 UBool opt_shifted = FALSE;
51 UBool opt_lower = FALSE;
52 UBool opt_upper = FALSE;
53 UBool opt_case = FALSE;
54 UBool opt_overlap = FALSE;
55 UBool opt_canonical = FALSE;
56 int opt_level = 0;
57 char * opt_source = "International Components for Unicode";
58 char * opt_pattern = "Unicode";
59 UCollator * collator = 0;
60 UStringSearch * search = 0;
61 UChar rules[100];
62 UChar source[100];
63 UChar pattern[100];
64
65 /**
66 * Definitions for the command line options
67 */
68 struct OptSpec {
69 const char *name;
70 enum {FLAG, NUM, STRING} type;
71 void *pVar;
72 };
73
74 OptSpec opts[] = {
75 {"-locale", OptSpec::STRING, &opt_locale},
76 {"-rules", OptSpec::STRING, &opt_rules},
77 {"-source", OptSpec::STRING, &opt_source},
78 {"-pattern", OptSpec::STRING, &opt_pattern},
79 {"-norm", OptSpec::FLAG, &opt_norm},
80 {"-french", OptSpec::FLAG, &opt_french},
81 {"-shifted", OptSpec::FLAG, &opt_shifted},
82 {"-lower", OptSpec::FLAG, &opt_lower},
83 {"-upper", OptSpec::FLAG, &opt_upper},
84 {"-case", OptSpec::FLAG, &opt_case},
85 {"-level", OptSpec::NUM, &opt_level},
86 {"-overlap", OptSpec::FLAG, &opt_overlap},
87 {"-canonical", OptSpec::FLAG, &opt_canonical},
88 {"-help", OptSpec::FLAG, &opt_help},
89 {"-?", OptSpec::FLAG, &opt_help},
90 {0, OptSpec::FLAG, 0}
91 };
92
93 /**
94 * processOptions() Function to read the command line options.
95 */
96 UBool processOptions(int argc, const char **argv, OptSpec opts[])
97 {
98 for (int argNum = 1; argNum < argc; argNum ++) {
99 const char *pArgName = argv[argNum];
100 for (OptSpec *pOpt = opts; pOpt->name != 0; pOpt ++) {
101 if (strcmp(pOpt->name, pArgName) == 0) {
102 switch (pOpt->type) {
103 case OptSpec::FLAG:
104 *(UBool *)(pOpt->pVar) = TRUE;
105 break;
106 case OptSpec::STRING:
107 argNum ++;
108 if (argNum >= argc) {
109 fprintf(stderr, "value expected for \"%s\" option.\n",
110 pOpt->name);
111 return FALSE;
112 }
113 *(const char **)(pOpt->pVar) = argv[argNum];
114 break;
115 case OptSpec::NUM:
116 argNum ++;
117 if (argNum >= argc) {
118 fprintf(stderr, "value expected for \"%s\" option.\n",
119 pOpt->name);
120 return FALSE;
121 }
122 char *endp;
123 int i = strtol(argv[argNum], &endp, 0);
124 if (endp == argv[argNum]) {
125 fprintf(stderr,
126 "integer value expected for \"%s\" option.\n",
127 pOpt->name);
128 return FALSE;
129 }
130 *(int *)(pOpt->pVar) = i;
131 }
132 break;
133 }
134 }
135 if (pOpt->name == 0)
136 {
137 fprintf(stderr, "Unrecognized option \"%s\"\n", pArgName);
138 return FALSE;
139 }
140 }
141 return TRUE;
142 }
143
144 /**
145 * Creates a collator
146 */
147 UBool processCollator()
148 {
149 // Set up an ICU collator
150 UErrorCode status = U_ZERO_ERROR;
151
152 if (opt_rules != 0) {
153 u_unescape(opt_rules, rules, 100);
154 collator = ucol_openRules(rules, -1, UCOL_OFF, UCOL_TERTIARY,
155 NULL, &status);
156 }
157 else {
158 collator = ucol_open(opt_locale, &status);
159 }
160 if (U_FAILURE(status)) {
161 fprintf(stderr, "Collator creation failed.: %d\n", status);
162 return FALSE;
163 }
164 if (status == U_USING_DEFAULT_WARNING) {
165 fprintf(stderr, "Warning, U_USING_DEFAULT_WARNING for %s\n",
166 opt_locale);
167 }
168 if (status == U_USING_FALLBACK_WARNING) {
169 fprintf(stderr, "Warning, U_USING_FALLBACK_ERROR for %s\n",
170 opt_locale);
171 }
172 if (opt_norm) {
173 ucol_setAttribute(collator, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
174 }
175 if (opt_french) {
176 ucol_setAttribute(collator, UCOL_FRENCH_COLLATION, UCOL_ON, &status);
177 }
178 if (opt_lower) {
179 ucol_setAttribute(collator, UCOL_CASE_FIRST, UCOL_LOWER_FIRST,
180 &status);
181 }
182 if (opt_upper) {
183 ucol_setAttribute(collator, UCOL_CASE_FIRST, UCOL_UPPER_FIRST,
184 &status);
185 }
186 if (opt_case) {
187 ucol_setAttribute(collator, UCOL_CASE_LEVEL, UCOL_ON, &status);
188 }
189 if (opt_shifted) {
190 ucol_setAttribute(collator, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED,
191 &status);
192 }
193 if (opt_level != 0) {
194 switch (opt_level) {
195 case 1:
196 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_PRIMARY, &status);
197 break;
198 case 2:
199 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_SECONDARY,
200 &status);
201 break;
202 case 3:
203 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_TERTIARY, &status);
204 break;
205 case 4:
206 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_QUATERNARY,
207 &status);
208 break;
209 case 5:
210 ucol_setAttribute(collator, UCOL_STRENGTH, UCOL_IDENTICAL,
211 &status);
212 break;
213 default:
214 fprintf(stderr, "-level param must be between 1 and 5\n");
215 return FALSE;
216 }
217 }
218 if (U_FAILURE(status)) {
219 fprintf(stderr, "Collator attribute setting failed.: %d\n", status);
220 return FALSE;
221 }
222 return TRUE;
223 }
224
225 /**
226 * Creates a string search
227 */
228 UBool processStringSearch()
229 {
230 u_unescape(opt_source, source, 100);
231 u_unescape(opt_pattern, pattern, 100);
232 UErrorCode status = U_ZERO_ERROR;
233 search = usearch_openFromCollator(pattern, -1, source, -1, collator, NULL,
234 &status);
235 if (U_FAILURE(status)) {
236 return FALSE;
237 }
238 if (opt_overlap == TRUE) {
239 usearch_setAttribute(search, USEARCH_OVERLAP, USEARCH_ON, &status);
240 }
241 if (opt_canonical == TRUE) {
242 usearch_setAttribute(search, USEARCH_CANONICAL_MATCH, USEARCH_ON,
243 &status);
244 }
245 if (U_FAILURE(status)) {
246 fprintf(stderr, "Error setting search attributes\n");
247 return FALSE;
248 }
249 return TRUE;
250 }
251
252 UBool findPattern()
253 {
254 UErrorCode status = U_ZERO_ERROR;
255 int32_t offset = usearch_next(search, &status);
256 if (offset == USEARCH_DONE) {
257 fprintf(stdout, "Pattern not found in source\n");
258 }
259 while (offset != USEARCH_DONE) {
260 fprintf(stdout, "Pattern found at offset %d size %d\n", offset,
261 usearch_getMatchedLength(search));
262 offset = usearch_next(search, &status);
263 }
264 if (U_FAILURE(status)) {
265 fprintf(stderr, "Error in searching for pattern %d\n", status);
266 return FALSE;
267 }
268 fprintf(stdout, "End of search\n");
269 return TRUE;
270 }
271
272 /**
273 * Main -- process command line, read in and pre-process the test file,
274 * call other functions to do the actual tests.
275 */
276 int main(int argc, const char** argv)
277 {
278 if (processOptions(argc, argv, opts) != TRUE || opt_help) {
279 printf(gHelpString);
280 return -1;
281 }
282
283 if (processCollator() != TRUE) {
284 fprintf(stderr, "Error creating collator\n");
285 return -1;
286 }
287
288 if (processStringSearch() != TRUE) {
289 fprintf(stderr, "Error creating string search\n");
290 return -1;
291 }
292
293 fprintf(stdout, "Finding pattern %s in source %s\n", opt_pattern,
294 opt_source);
295
296 findPattern();
297 ucol_close(collator);
298 usearch_close(search);
299 return 0;
300 }