]>
git.saurik.com Git - apple/icu.git/blob - icuSources/samples/strsrch/strsrch.cpp
1 /*************************************************************************
2 * © 2016 and later: Unicode, Inc. and others.
3 * License & terms of use: http://www.unicode.org/copyright.html#License
5 *************************************************************************
6 *************************************************************************
8 * Copyright (C) 2002-2006 IBM, Inc. All Rights Reserved.
10 *************************************************************************/
13 * This program demos string collation
16 const char gHelpString
[] =
17 "usage: strsrch [options*] -source source_string -pattern pattern_string\n"
18 "-help Display this message.\n"
19 "-locale name ICU locale to use. Default is en_US\n"
20 "-rules rule Collation rules file (overrides locale)\n"
21 "-french French accent ordering\n"
22 "-norm Normalizing mode on\n"
23 "-shifted Shifted mode\n"
24 "-lower Lower case first\n"
25 "-upper Upper case first\n"
26 "-case Enable separate case level\n"
27 "-level n Sort level, 1 to 5, for Primary, Secndary, Tertiary, Quaternary, Identical\n"
28 "-source string Source string\n"
29 "-pattern string Pattern string to look for in source\n"
30 "-overlap Enable searching to be done on overlapping patterns\n"
31 "-canonical Enable searching to be done matching canonical equivalent patterns"
32 "Example strsrch -rules \\u0026b\\u003ca -source a\\u0020b\\u0020bc -pattern b\n"
33 "The format \\uXXXX is supported for the rules and comparison strings\n"
40 #include <unicode/utypes.h>
41 #include <unicode/ucol.h>
42 #include <unicode/usearch.h>
43 #include <unicode/ustring.h>
46 * Command line option variables
47 * These global variables are set according to the options specified
48 * on the command line by the user.
50 char * opt_locale
= "en_US";
52 UBool opt_help
= FALSE
;
53 UBool opt_norm
= FALSE
;
54 UBool opt_french
= FALSE
;
55 UBool opt_shifted
= FALSE
;
56 UBool opt_lower
= FALSE
;
57 UBool opt_upper
= FALSE
;
58 UBool opt_case
= FALSE
;
59 UBool opt_overlap
= FALSE
;
60 UBool opt_canonical
= FALSE
;
62 char * opt_source
= "International Components for Unicode";
63 char * opt_pattern
= "Unicode";
64 UCollator
* collator
= 0;
65 UStringSearch
* search
= 0;
71 * Definitions for the command line options
75 enum {FLAG
, NUM
, STRING
} type
;
80 {"-locale", OptSpec::STRING
, &opt_locale
},
81 {"-rules", OptSpec::STRING
, &opt_rules
},
82 {"-source", OptSpec::STRING
, &opt_source
},
83 {"-pattern", OptSpec::STRING
, &opt_pattern
},
84 {"-norm", OptSpec::FLAG
, &opt_norm
},
85 {"-french", OptSpec::FLAG
, &opt_french
},
86 {"-shifted", OptSpec::FLAG
, &opt_shifted
},
87 {"-lower", OptSpec::FLAG
, &opt_lower
},
88 {"-upper", OptSpec::FLAG
, &opt_upper
},
89 {"-case", OptSpec::FLAG
, &opt_case
},
90 {"-level", OptSpec::NUM
, &opt_level
},
91 {"-overlap", OptSpec::FLAG
, &opt_overlap
},
92 {"-canonical", OptSpec::FLAG
, &opt_canonical
},
93 {"-help", OptSpec::FLAG
, &opt_help
},
94 {"-?", OptSpec::FLAG
, &opt_help
},
99 * processOptions() Function to read the command line options.
101 UBool
processOptions(int argc
, const char **argv
, OptSpec opts
[])
103 for (int argNum
= 1; argNum
< argc
; argNum
++) {
104 const char *pArgName
= argv
[argNum
];
106 for (pOpt
= opts
; pOpt
->name
!= 0; pOpt
++) {
107 if (strcmp(pOpt
->name
, pArgName
) == 0) {
108 switch (pOpt
->type
) {
110 *(UBool
*)(pOpt
->pVar
) = TRUE
;
112 case OptSpec::STRING
:
114 if (argNum
>= argc
) {
115 fprintf(stderr
, "value expected for \"%s\" option.\n",
119 *(const char **)(pOpt
->pVar
) = argv
[argNum
];
123 if (argNum
>= argc
) {
124 fprintf(stderr
, "value expected for \"%s\" option.\n",
129 int i
= strtol(argv
[argNum
], &endp
, 0);
130 if (endp
== argv
[argNum
]) {
132 "integer value expected for \"%s\" option.\n",
136 *(int *)(pOpt
->pVar
) = i
;
143 fprintf(stderr
, "Unrecognized option \"%s\"\n", pArgName
);
153 UBool
processCollator()
155 // Set up an ICU collator
156 UErrorCode status
= U_ZERO_ERROR
;
158 if (opt_rules
!= 0) {
159 u_unescape(opt_rules
, rules
, 100);
160 collator
= ucol_openRules(rules
, -1, UCOL_OFF
, UCOL_TERTIARY
,
164 collator
= ucol_open(opt_locale
, &status
);
166 if (U_FAILURE(status
)) {
167 fprintf(stderr
, "Collator creation failed.: %d\n", status
);
170 if (status
== U_USING_DEFAULT_WARNING
) {
171 fprintf(stderr
, "Warning, U_USING_DEFAULT_WARNING for %s\n",
174 if (status
== U_USING_FALLBACK_WARNING
) {
175 fprintf(stderr
, "Warning, U_USING_FALLBACK_ERROR for %s\n",
179 ucol_setAttribute(collator
, UCOL_NORMALIZATION_MODE
, UCOL_ON
, &status
);
182 ucol_setAttribute(collator
, UCOL_FRENCH_COLLATION
, UCOL_ON
, &status
);
185 ucol_setAttribute(collator
, UCOL_CASE_FIRST
, UCOL_LOWER_FIRST
,
189 ucol_setAttribute(collator
, UCOL_CASE_FIRST
, UCOL_UPPER_FIRST
,
193 ucol_setAttribute(collator
, UCOL_CASE_LEVEL
, UCOL_ON
, &status
);
196 ucol_setAttribute(collator
, UCOL_ALTERNATE_HANDLING
, UCOL_SHIFTED
,
199 if (opt_level
!= 0) {
202 ucol_setAttribute(collator
, UCOL_STRENGTH
, UCOL_PRIMARY
, &status
);
205 ucol_setAttribute(collator
, UCOL_STRENGTH
, UCOL_SECONDARY
,
209 ucol_setAttribute(collator
, UCOL_STRENGTH
, UCOL_TERTIARY
, &status
);
212 ucol_setAttribute(collator
, UCOL_STRENGTH
, UCOL_QUATERNARY
,
216 ucol_setAttribute(collator
, UCOL_STRENGTH
, UCOL_IDENTICAL
,
220 fprintf(stderr
, "-level param must be between 1 and 5\n");
224 if (U_FAILURE(status
)) {
225 fprintf(stderr
, "Collator attribute setting failed.: %d\n", status
);
232 * Creates a string search
234 UBool
processStringSearch()
236 u_unescape(opt_source
, source
, 100);
237 u_unescape(opt_pattern
, pattern
, 100);
238 UErrorCode status
= U_ZERO_ERROR
;
239 search
= usearch_openFromCollator(pattern
, -1, source
, -1, collator
, NULL
,
241 if (U_FAILURE(status
)) {
244 if (opt_overlap
== TRUE
) {
245 usearch_setAttribute(search
, USEARCH_OVERLAP
, USEARCH_ON
, &status
);
247 if (opt_canonical
== TRUE
) {
248 usearch_setAttribute(search
, USEARCH_CANONICAL_MATCH
, USEARCH_ON
,
251 if (U_FAILURE(status
)) {
252 fprintf(stderr
, "Error setting search attributes\n");
260 UErrorCode status
= U_ZERO_ERROR
;
261 int32_t offset
= usearch_next(search
, &status
);
262 if (offset
== USEARCH_DONE
) {
263 fprintf(stdout
, "Pattern not found in source\n");
265 while (offset
!= USEARCH_DONE
) {
266 fprintf(stdout
, "Pattern found at offset %d size %d\n", offset
,
267 usearch_getMatchedLength(search
));
268 offset
= usearch_next(search
, &status
);
270 if (U_FAILURE(status
)) {
271 fprintf(stderr
, "Error in searching for pattern %d\n", status
);
274 fprintf(stdout
, "End of search\n");
279 * Main -- process command line, read in and pre-process the test file,
280 * call other functions to do the actual tests.
282 int main(int argc
, const char** argv
)
284 if (processOptions(argc
, argv
, opts
) != TRUE
|| opt_help
) {
289 if (processCollator() != TRUE
) {
290 fprintf(stderr
, "Error creating collator\n");
294 if (processStringSearch() != TRUE
) {
295 fprintf(stderr
, "Error creating string search\n");
299 fprintf(stdout
, "Finding pattern %s in source %s\n", opt_pattern
,
303 ucol_close(collator
);
304 usearch_close(search
);