]> git.saurik.com Git - wxWidgets.git/blob - src/regex/re_main.c
create masks even (slightly) faster
[wxWidgets.git] / src / regex / re_main.c
1 #include <stdio.h>
2 #include <string.h>
3 #include <sys/types.h>
4 #include <regex.h>
5 #include <assert.h>
6
7 #include "main.ih"
8
9 char *progname;
10 int debug = 0;
11 int line = 0;
12 int status = 0;
13
14 int copts = REG_EXTENDED;
15 int eopts = 0;
16 regoff_t startoff = 0;
17 regoff_t endoff = 0;
18
19
20 extern int split();
21 extern void regprint();
22
23 /*
24 - main - do the simple case, hand off to regress() for regression
25 */
26 main(argc, argv)
27 int argc;
28 char *argv[];
29 {
30 regex_t re;
31 # define NS 10
32 regmatch_t subs[NS];
33 char erbuf[100];
34 int err;
35 size_t len;
36 int c;
37 int errflg = 0;
38 register int i;
39 extern int optind;
40 extern char *optarg;
41
42 progname = argv[0];
43
44 while ((c = getopt(argc, argv, "c:e:S:E:x")) != EOF)
45 switch (c) {
46 case 'c': /* compile options */
47 copts = options('c', optarg);
48 break;
49 case 'e': /* execute options */
50 eopts = options('e', optarg);
51 break;
52 case 'S': /* start offset */
53 startoff = (regoff_t)atoi(optarg);
54 break;
55 case 'E': /* end offset */
56 endoff = (regoff_t)atoi(optarg);
57 break;
58 case 'x': /* Debugging. */
59 debug++;
60 break;
61 case '?':
62 default:
63 errflg++;
64 break;
65 }
66 if (errflg) {
67 fprintf(stderr, "usage: %s ", progname);
68 fprintf(stderr, "[-c copt][-C][-d] [re]\n");
69 exit(2);
70 }
71
72 if (optind >= argc) {
73 regress(stdin);
74 exit(status);
75 }
76
77 err = regcomp(&re, argv[optind++], copts);
78 if (err) {
79 len = regerror(err, &re, erbuf, sizeof(erbuf));
80 fprintf(stderr, "error %s, %d/%d `%s'\n",
81 eprint(err), len, sizeof(erbuf), erbuf);
82 exit(status);
83 }
84 regprint(&re, stdout);
85
86 if (optind >= argc) {
87 regfree(&re);
88 exit(status);
89 }
90
91 if (eopts&REG_STARTEND) {
92 subs[0].rm_so = startoff;
93 subs[0].rm_eo = strlen(argv[optind]) - endoff;
94 }
95 err = regexec(&re, argv[optind], (size_t)NS, subs, eopts);
96 if (err) {
97 len = regerror(err, &re, erbuf, sizeof(erbuf));
98 fprintf(stderr, "error %s, %d/%d `%s'\n",
99 eprint(err), len, sizeof(erbuf), erbuf);
100 exit(status);
101 }
102 if (!(copts&REG_NOSUB)) {
103 len = (int)(subs[0].rm_eo - subs[0].rm_so);
104 if (subs[0].rm_so != -1) {
105 if (len != 0)
106 printf("match `%.*s'\n", len,
107 argv[optind] + subs[0].rm_so);
108 else
109 printf("match `'@%.1s\n",
110 argv[optind] + subs[0].rm_so);
111 }
112 for (i = 1; i < NS; i++)
113 if (subs[i].rm_so != -1)
114 printf("(%d) `%.*s'\n", i,
115 (int)(subs[i].rm_eo - subs[i].rm_so),
116 argv[optind] + subs[i].rm_so);
117 }
118 exit(status);
119 }
120
121 /*
122 - regress - main loop of regression test
123 == void regress(FILE *in);
124 */
125 void
126 regress(in)
127 FILE *in;
128 {
129 char inbuf[1000];
130 # define MAXF 10
131 char *f[MAXF];
132 int nf;
133 int i;
134 char erbuf[100];
135 size_t ne;
136 char *badpat = "invalid regular expression";
137 # define SHORT 10
138 char *bpname = "REG_BADPAT";
139 regex_t re;
140
141 while (fgets(inbuf, sizeof(inbuf), in) != NULL) {
142 line++;
143 if (inbuf[0] == '#' || inbuf[0] == '\n')
144 continue; /* NOTE CONTINUE */
145 inbuf[strlen(inbuf)-1] = '\0'; /* get rid of stupid \n */
146 if (debug)
147 fprintf(stdout, "%d:\n", line);
148 nf = split(inbuf, f, MAXF, "\t\t");
149 if (nf < 3) {
150 fprintf(stderr, "bad input, line %d\n", line);
151 exit(1);
152 }
153 for (i = 0; i < nf; i++)
154 if (strcmp(f[i], "\"\"") == 0)
155 f[i] = "";
156 if (nf <= 3)
157 f[3] = NULL;
158 if (nf <= 4)
159 f[4] = NULL;
160 try(f[0], f[1], f[2], f[3], f[4], options('c', f[1]));
161 if (opt('&', f[1])) /* try with either type of RE */
162 try(f[0], f[1], f[2], f[3], f[4],
163 options('c', f[1]) &~ REG_EXTENDED);
164 }
165
166 ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
167 if (strcmp(erbuf, badpat) != 0 || ne != strlen(badpat)+1) {
168 fprintf(stderr, "end: regerror() test gave `%s' not `%s'\n",
169 erbuf, badpat);
170 status = 1;
171 }
172 ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, (size_t)SHORT);
173 if (strncmp(erbuf, badpat, SHORT-1) != 0 || erbuf[SHORT-1] != '\0' ||
174 ne != strlen(badpat)+1) {
175 fprintf(stderr, "end: regerror() short test gave `%s' not `%.*s'\n",
176 erbuf, SHORT-1, badpat);
177 status = 1;
178 }
179 ne = regerror(REG_ITOA|REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
180 if (strcmp(erbuf, bpname) != 0 || ne != strlen(bpname)+1) {
181 fprintf(stderr, "end: regerror() ITOA test gave `%s' not `%s'\n",
182 erbuf, bpname);
183 status = 1;
184 }
185 re.re_endp = bpname;
186 ne = regerror(REG_ATOI, &re, erbuf, sizeof(erbuf));
187 if (atoi(erbuf) != (int)REG_BADPAT) {
188 fprintf(stderr, "end: regerror() ATOI test gave `%s' not `%ld'\n",
189 erbuf, (long)REG_BADPAT);
190 status = 1;
191 } else if (ne != strlen(erbuf)+1) {
192 fprintf(stderr, "end: regerror() ATOI test len(`%s') = %ld\n",
193 erbuf, (long)REG_BADPAT);
194 status = 1;
195 }
196 }
197
198 /*
199 - try - try it, and report on problems
200 == void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts);
201 */
202 void
203 try(f0, f1, f2, f3, f4, opts)
204 char *f0;
205 char *f1;
206 char *f2;
207 char *f3;
208 char *f4;
209 int opts; /* may not match f1 */
210 {
211 regex_t re;
212 # define NSUBS 10
213 regmatch_t subs[NSUBS];
214 # define NSHOULD 15
215 char *should[NSHOULD];
216 int nshould;
217 char erbuf[100];
218 int err;
219 int len;
220 char *type = (opts & REG_EXTENDED) ? "ERE" : "BRE";
221 register int i;
222 char *grump;
223 char f0copy[1000];
224 char f2copy[1000];
225
226 strcpy(f0copy, f0);
227 re.re_endp = (opts&REG_PEND) ? f0copy + strlen(f0copy) : NULL;
228 fixstr(f0copy);
229 err = regcomp(&re, f0copy, opts);
230 if (err != 0 && (!opt('C', f1) || err != efind(f2))) {
231 /* unexpected error or wrong error */
232 len = regerror(err, &re, erbuf, sizeof(erbuf));
233 fprintf(stderr, "%d: %s error %s, %d/%d `%s'\n",
234 line, type, eprint(err), len,
235 sizeof(erbuf), erbuf);
236 status = 1;
237 } else if (err == 0 && opt('C', f1)) {
238 /* unexpected success */
239 fprintf(stderr, "%d: %s should have given REG_%s\n",
240 line, type, f2);
241 status = 1;
242 err = 1; /* so we won't try regexec */
243 }
244
245 if (err != 0) {
246 regfree(&re);
247 return;
248 }
249
250 strcpy(f2copy, f2);
251 fixstr(f2copy);
252
253 if (options('e', f1)&REG_STARTEND) {
254 if (strchr(f2, '(') == NULL || strchr(f2, ')') == NULL)
255 fprintf(stderr, "%d: bad STARTEND syntax\n", line);
256 subs[0].rm_so = strchr(f2, '(') - f2 + 1;
257 subs[0].rm_eo = strchr(f2, ')') - f2;
258 }
259 err = regexec(&re, f2copy, NSUBS, subs, options('e', f1));
260
261 if (err != 0 && (f3 != NULL || err != REG_NOMATCH)) {
262 /* unexpected error or wrong error */
263 len = regerror(err, &re, erbuf, sizeof(erbuf));
264 fprintf(stderr, "%d: %s exec error %s, %d/%d `%s'\n",
265 line, type, eprint(err), len,
266 sizeof(erbuf), erbuf);
267 status = 1;
268 } else if (err != 0) {
269 /* nothing more to check */
270 } else if (f3 == NULL) {
271 /* unexpected success */
272 fprintf(stderr, "%d: %s exec should have failed\n",
273 line, type);
274 status = 1;
275 err = 1; /* just on principle */
276 } else if (opts&REG_NOSUB) {
277 /* nothing more to check */
278 } else if ((grump = check(f2, subs[0], f3)) != NULL) {
279 fprintf(stderr, "%d: %s %s\n", line, type, grump);
280 status = 1;
281 err = 1;
282 }
283
284 if (err != 0 || f4 == NULL) {
285 regfree(&re);
286 return;
287 }
288
289 for (i = 1; i < NSHOULD; i++)
290 should[i] = NULL;
291 nshould = split(f4, should+1, NSHOULD-1, ",");
292 if (nshould == 0) {
293 nshould = 1;
294 should[1] = "";
295 }
296 for (i = 1; i < NSUBS; i++) {
297 grump = check(f2, subs[i], should[i]);
298 if (grump != NULL) {
299 fprintf(stderr, "%d: %s $%d %s\n", line,
300 type, i, grump);
301 status = 1;
302 err = 1;
303 }
304 }
305
306 regfree(&re);
307 }
308
309 /*
310 - options - pick options out of a regression-test string
311 == int options(int type, char *s);
312 */
313 int
314 options(type, s)
315 int type; /* 'c' compile, 'e' exec */
316 char *s;
317 {
318 register char *p;
319 register int o = (type == 'c') ? copts : eopts;
320 register char *legal = (type == 'c') ? "bisnmp" : "^$#tl";
321
322 for (p = s; *p != '\0'; p++)
323 if (strchr(legal, *p) != NULL)
324 switch (*p) {
325 case 'b':
326 o &= ~REG_EXTENDED;
327 break;
328 case 'i':
329 o |= REG_ICASE;
330 break;
331 case 's':
332 o |= REG_NOSUB;
333 break;
334 case 'n':
335 o |= REG_NEWLINE;
336 break;
337 case 'm':
338 o &= ~REG_EXTENDED;
339 o |= REG_NOSPEC;
340 break;
341 case 'p':
342 o |= REG_PEND;
343 break;
344 case '^':
345 o |= REG_NOTBOL;
346 break;
347 case '$':
348 o |= REG_NOTEOL;
349 break;
350 case '#':
351 o |= REG_STARTEND;
352 break;
353 case 't': /* trace */
354 o |= REG_TRACE;
355 break;
356 case 'l': /* force long representation */
357 o |= REG_LARGE;
358 break;
359 case 'r': /* force backref use */
360 o |= REG_BACKR;
361 break;
362 }
363 return(o);
364 }
365
366 /*
367 - opt - is a particular option in a regression string?
368 == int opt(int c, char *s);
369 */
370 int /* predicate */
371 opt(c, s)
372 int c;
373 char *s;
374 {
375 return(strchr(s, c) != NULL);
376 }
377
378 /*
379 - fixstr - transform magic characters in strings
380 == void fixstr(register char *p);
381 */
382 void
383 fixstr(p)
384 register char *p;
385 {
386 if (p == NULL)
387 return;
388
389 for (; *p != '\0'; p++)
390 if (*p == 'N')
391 *p = '\n';
392 else if (*p == 'T')
393 *p = '\t';
394 else if (*p == 'S')
395 *p = ' ';
396 else if (*p == 'Z')
397 *p = '\0';
398 }
399
400 /*
401 - check - check a substring match
402 == char *check(char *str, regmatch_t sub, char *should);
403 */
404 char * /* NULL or complaint */
405 check(str, sub, should)
406 char *str;
407 regmatch_t sub;
408 char *should;
409 {
410 register int len;
411 register int shlen;
412 register char *p;
413 static char grump[500];
414 register char *at = NULL;
415
416 if (should != NULL && strcmp(should, "-") == 0)
417 should = NULL;
418 if (should != NULL && should[0] == '@') {
419 at = should + 1;
420 should = "";
421 }
422
423 /* check rm_so and rm_eo for consistency */
424 if (sub.rm_so > sub.rm_eo || (sub.rm_so == -1 && sub.rm_eo != -1) ||
425 (sub.rm_so != -1 && sub.rm_eo == -1) ||
426 (sub.rm_so != -1 && sub.rm_so < 0) ||
427 (sub.rm_eo != -1 && sub.rm_eo < 0) ) {
428 sprintf(grump, "start %ld end %ld", (long)sub.rm_so,
429 (long)sub.rm_eo);
430 return(grump);
431 }
432
433 /* check for no match */
434 if (sub.rm_so == -1 && should == NULL)
435 return(NULL);
436 if (sub.rm_so == -1)
437 return("did not match");
438
439 /* check for in range */
440 if (sub.rm_eo > strlen(str)) {
441 sprintf(grump, "start %ld end %ld, past end of string",
442 (long)sub.rm_so, (long)sub.rm_eo);
443 return(grump);
444 }
445
446 len = (int)(sub.rm_eo - sub.rm_so);
447 shlen = (int)strlen(should);
448 p = str + sub.rm_so;
449
450 /* check for not supposed to match */
451 if (should == NULL) {
452 sprintf(grump, "matched `%.*s'", len, p);
453 return(grump);
454 }
455
456 /* check for wrong match */
457 if (len != shlen || strncmp(p, should, (size_t)shlen) != 0) {
458 sprintf(grump, "matched `%.*s' instead", len, p);
459 return(grump);
460 }
461 if (shlen > 0)
462 return(NULL);
463
464 /* check null match in right place */
465 if (at == NULL)
466 return(NULL);
467 shlen = strlen(at);
468 if (shlen == 0)
469 shlen = 1; /* force check for end-of-string */
470 if (strncmp(p, at, shlen) != 0) {
471 sprintf(grump, "matched null at `%.20s'", p);
472 return(grump);
473 }
474 return(NULL);
475 }
476
477 /*
478 - eprint - convert error number to name
479 == static char *eprint(int err);
480 */
481 static char *
482 eprint(err)
483 int err;
484 {
485 static char epbuf[100];
486 size_t len;
487
488 len = regerror(REG_ITOA|err, (regex_t *)NULL, epbuf, sizeof(epbuf));
489 assert(len <= sizeof(epbuf));
490 return(epbuf);
491 }
492
493 /*
494 - efind - convert error name to number
495 == static int efind(char *name);
496 */
497 static int
498 efind(name)
499 char *name;
500 {
501 static char efbuf[100];
502 size_t n;
503 regex_t re;
504
505 sprintf(efbuf, "REG_%s", name);
506 assert(strlen(efbuf) < sizeof(efbuf));
507 re.re_endp = efbuf;
508 (void) regerror(REG_ATOI, &re, efbuf, sizeof(efbuf));
509 return(atoi(efbuf));
510 }