]>
Commit | Line | Data |
---|---|---|
1 | #include <stdio.h> | |
2 | #include <string.h> | |
3 | #include <sys/types.h> | |
4 | #include <regex.h> | |
5 | #include <assert.h> | |
6 | ||
7 | #include "main.ih" | |
8 | ||
9 | char *progname; | |
10 | int debug = 0; | |
11 | int line = 0; | |
12 | int status = 0; | |
13 | ||
14 | int copts = REG_EXTENDED; | |
15 | int eopts = 0; | |
16 | regoff_t startoff = 0; | |
17 | regoff_t endoff = 0; | |
18 | ||
19 | ||
20 | extern int split(); | |
21 | extern void regprint(); | |
22 | ||
23 | /* | |
24 | - main - do the simple case, hand off to regress() for regression | |
25 | */ | |
26 | main(argc, argv) | |
27 | int argc; | |
28 | char *argv[]; | |
29 | { | |
30 | regex_t re; | |
31 | # define NS 10 | |
32 | regmatch_t subs[NS]; | |
33 | char erbuf[100]; | |
34 | int err; | |
35 | size_t len; | |
36 | int c; | |
37 | int errflg = 0; | |
38 | register int i; | |
39 | extern int optind; | |
40 | extern char *optarg; | |
41 | ||
42 | progname = argv[0]; | |
43 | ||
44 | while ((c = getopt(argc, argv, "c:e:S:E:x")) != EOF) | |
45 | switch (c) { | |
46 | case 'c': /* compile options */ | |
47 | copts = options('c', optarg); | |
48 | break; | |
49 | case 'e': /* execute options */ | |
50 | eopts = options('e', optarg); | |
51 | break; | |
52 | case 'S': /* start offset */ | |
53 | startoff = (regoff_t)atoi(optarg); | |
54 | break; | |
55 | case 'E': /* end offset */ | |
56 | endoff = (regoff_t)atoi(optarg); | |
57 | break; | |
58 | case 'x': /* Debugging. */ | |
59 | debug++; | |
60 | break; | |
61 | case '?': | |
62 | default: | |
63 | errflg++; | |
64 | break; | |
65 | } | |
66 | if (errflg) { | |
67 | fprintf(stderr, "usage: %s ", progname); | |
68 | fprintf(stderr, "[-c copt][-C][-d] [re]\n"); | |
69 | exit(2); | |
70 | } | |
71 | ||
72 | if (optind >= argc) { | |
73 | regress(stdin); | |
74 | exit(status); | |
75 | } | |
76 | ||
77 | err = regcomp(&re, argv[optind++], copts); | |
78 | if (err) { | |
79 | len = regerror(err, &re, erbuf, sizeof(erbuf)); | |
80 | fprintf(stderr, "error %s, %d/%d `%s'\n", | |
81 | eprint(err), len, sizeof(erbuf), erbuf); | |
82 | exit(status); | |
83 | } | |
84 | regprint(&re, stdout); | |
85 | ||
86 | if (optind >= argc) { | |
87 | regfree(&re); | |
88 | exit(status); | |
89 | } | |
90 | ||
91 | if (eopts®_STARTEND) { | |
92 | subs[0].rm_so = startoff; | |
93 | subs[0].rm_eo = strlen(argv[optind]) - endoff; | |
94 | } | |
95 | err = regexec(&re, argv[optind], (size_t)NS, subs, eopts); | |
96 | if (err) { | |
97 | len = regerror(err, &re, erbuf, sizeof(erbuf)); | |
98 | fprintf(stderr, "error %s, %d/%d `%s'\n", | |
99 | eprint(err), len, sizeof(erbuf), erbuf); | |
100 | exit(status); | |
101 | } | |
102 | if (!(copts®_NOSUB)) { | |
103 | len = (int)(subs[0].rm_eo - subs[0].rm_so); | |
104 | if (subs[0].rm_so != -1) { | |
105 | if (len != 0) | |
106 | printf("match `%.*s'\n", len, | |
107 | argv[optind] + subs[0].rm_so); | |
108 | else | |
109 | printf("match `'@%.1s\n", | |
110 | argv[optind] + subs[0].rm_so); | |
111 | } | |
112 | for (i = 1; i < NS; i++) | |
113 | if (subs[i].rm_so != -1) | |
114 | printf("(%d) `%.*s'\n", i, | |
115 | (int)(subs[i].rm_eo - subs[i].rm_so), | |
116 | argv[optind] + subs[i].rm_so); | |
117 | } | |
118 | exit(status); | |
119 | } | |
120 | ||
121 | /* | |
122 | - regress - main loop of regression test | |
123 | == void regress(FILE *in); | |
124 | */ | |
125 | void | |
126 | regress(in) | |
127 | FILE *in; | |
128 | { | |
129 | char inbuf[1000]; | |
130 | # define MAXF 10 | |
131 | char *f[MAXF]; | |
132 | int nf; | |
133 | int i; | |
134 | char erbuf[100]; | |
135 | size_t ne; | |
136 | char *badpat = "invalid regular expression"; | |
137 | # define SHORT 10 | |
138 | char *bpname = "REG_BADPAT"; | |
139 | regex_t re; | |
140 | ||
141 | while (fgets(inbuf, sizeof(inbuf), in) != NULL) { | |
142 | line++; | |
143 | if (inbuf[0] == '#' || inbuf[0] == '\n') | |
144 | continue; /* NOTE CONTINUE */ | |
145 | inbuf[strlen(inbuf)-1] = '\0'; /* get rid of stupid \n */ | |
146 | if (debug) | |
147 | fprintf(stdout, "%d:\n", line); | |
148 | nf = split(inbuf, f, MAXF, "\t\t"); | |
149 | if (nf < 3) { | |
150 | fprintf(stderr, "bad input, line %d\n", line); | |
151 | exit(1); | |
152 | } | |
153 | for (i = 0; i < nf; i++) | |
154 | if (strcmp(f[i], "\"\"") == 0) | |
155 | f[i] = ""; | |
156 | if (nf <= 3) | |
157 | f[3] = NULL; | |
158 | if (nf <= 4) | |
159 | f[4] = NULL; | |
160 | try(f[0], f[1], f[2], f[3], f[4], options('c', f[1])); | |
161 | if (opt('&', f[1])) /* try with either type of RE */ | |
162 | try(f[0], f[1], f[2], f[3], f[4], | |
163 | options('c', f[1]) &~ REG_EXTENDED); | |
164 | } | |
165 | ||
166 | ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf)); | |
167 | if (strcmp(erbuf, badpat) != 0 || ne != strlen(badpat)+1) { | |
168 | fprintf(stderr, "end: regerror() test gave `%s' not `%s'\n", | |
169 | erbuf, badpat); | |
170 | status = 1; | |
171 | } | |
172 | ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, (size_t)SHORT); | |
173 | if (strncmp(erbuf, badpat, SHORT-1) != 0 || erbuf[SHORT-1] != '\0' || | |
174 | ne != strlen(badpat)+1) { | |
175 | fprintf(stderr, "end: regerror() short test gave `%s' not `%.*s'\n", | |
176 | erbuf, SHORT-1, badpat); | |
177 | status = 1; | |
178 | } | |
179 | ne = regerror(REG_ITOA|REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf)); | |
180 | if (strcmp(erbuf, bpname) != 0 || ne != strlen(bpname)+1) { | |
181 | fprintf(stderr, "end: regerror() ITOA test gave `%s' not `%s'\n", | |
182 | erbuf, bpname); | |
183 | status = 1; | |
184 | } | |
185 | re.re_endp = bpname; | |
186 | ne = regerror(REG_ATOI, &re, erbuf, sizeof(erbuf)); | |
187 | if (atoi(erbuf) != (int)REG_BADPAT) { | |
188 | fprintf(stderr, "end: regerror() ATOI test gave `%s' not `%ld'\n", | |
189 | erbuf, (long)REG_BADPAT); | |
190 | status = 1; | |
191 | } else if (ne != strlen(erbuf)+1) { | |
192 | fprintf(stderr, "end: regerror() ATOI test len(`%s') = %ld\n", | |
193 | erbuf, (long)REG_BADPAT); | |
194 | status = 1; | |
195 | } | |
196 | } | |
197 | ||
198 | /* | |
199 | - try - try it, and report on problems | |
200 | == void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts); | |
201 | */ | |
202 | void | |
203 | try(f0, f1, f2, f3, f4, opts) | |
204 | char *f0; | |
205 | char *f1; | |
206 | char *f2; | |
207 | char *f3; | |
208 | char *f4; | |
209 | int opts; /* may not match f1 */ | |
210 | { | |
211 | regex_t re; | |
212 | # define NSUBS 10 | |
213 | regmatch_t subs[NSUBS]; | |
214 | # define NSHOULD 15 | |
215 | char *should[NSHOULD]; | |
216 | int nshould; | |
217 | char erbuf[100]; | |
218 | int err; | |
219 | int len; | |
220 | char *type = (opts & REG_EXTENDED) ? "ERE" : "BRE"; | |
221 | register int i; | |
222 | char *grump; | |
223 | char f0copy[1000]; | |
224 | char f2copy[1000]; | |
225 | ||
226 | strcpy(f0copy, f0); | |
227 | re.re_endp = (opts®_PEND) ? f0copy + strlen(f0copy) : NULL; | |
228 | fixstr(f0copy); | |
229 | err = regcomp(&re, f0copy, opts); | |
230 | if (err != 0 && (!opt('C', f1) || err != efind(f2))) { | |
231 | /* unexpected error or wrong error */ | |
232 | len = regerror(err, &re, erbuf, sizeof(erbuf)); | |
233 | fprintf(stderr, "%d: %s error %s, %d/%d `%s'\n", | |
234 | line, type, eprint(err), len, | |
235 | sizeof(erbuf), erbuf); | |
236 | status = 1; | |
237 | } else if (err == 0 && opt('C', f1)) { | |
238 | /* unexpected success */ | |
239 | fprintf(stderr, "%d: %s should have given REG_%s\n", | |
240 | line, type, f2); | |
241 | status = 1; | |
242 | err = 1; /* so we won't try regexec */ | |
243 | } | |
244 | ||
245 | if (err != 0) { | |
246 | regfree(&re); | |
247 | return; | |
248 | } | |
249 | ||
250 | strcpy(f2copy, f2); | |
251 | fixstr(f2copy); | |
252 | ||
253 | if (options('e', f1)®_STARTEND) { | |
254 | if (strchr(f2, '(') == NULL || strchr(f2, ')') == NULL) | |
255 | fprintf(stderr, "%d: bad STARTEND syntax\n", line); | |
256 | subs[0].rm_so = strchr(f2, '(') - f2 + 1; | |
257 | subs[0].rm_eo = strchr(f2, ')') - f2; | |
258 | } | |
259 | err = regexec(&re, f2copy, NSUBS, subs, options('e', f1)); | |
260 | ||
261 | if (err != 0 && (f3 != NULL || err != REG_NOMATCH)) { | |
262 | /* unexpected error or wrong error */ | |
263 | len = regerror(err, &re, erbuf, sizeof(erbuf)); | |
264 | fprintf(stderr, "%d: %s exec error %s, %d/%d `%s'\n", | |
265 | line, type, eprint(err), len, | |
266 | sizeof(erbuf), erbuf); | |
267 | status = 1; | |
268 | } else if (err != 0) { | |
269 | /* nothing more to check */ | |
270 | } else if (f3 == NULL) { | |
271 | /* unexpected success */ | |
272 | fprintf(stderr, "%d: %s exec should have failed\n", | |
273 | line, type); | |
274 | status = 1; | |
275 | err = 1; /* just on principle */ | |
276 | } else if (opts®_NOSUB) { | |
277 | /* nothing more to check */ | |
278 | } else if ((grump = check(f2, subs[0], f3)) != NULL) { | |
279 | fprintf(stderr, "%d: %s %s\n", line, type, grump); | |
280 | status = 1; | |
281 | err = 1; | |
282 | } | |
283 | ||
284 | if (err != 0 || f4 == NULL) { | |
285 | regfree(&re); | |
286 | return; | |
287 | } | |
288 | ||
289 | for (i = 1; i < NSHOULD; i++) | |
290 | should[i] = NULL; | |
291 | nshould = split(f4, should+1, NSHOULD-1, ","); | |
292 | if (nshould == 0) { | |
293 | nshould = 1; | |
294 | should[1] = ""; | |
295 | } | |
296 | for (i = 1; i < NSUBS; i++) { | |
297 | grump = check(f2, subs[i], should[i]); | |
298 | if (grump != NULL) { | |
299 | fprintf(stderr, "%d: %s $%d %s\n", line, | |
300 | type, i, grump); | |
301 | status = 1; | |
302 | err = 1; | |
303 | } | |
304 | } | |
305 | ||
306 | regfree(&re); | |
307 | } | |
308 | ||
309 | /* | |
310 | - options - pick options out of a regression-test string | |
311 | == int options(int type, char *s); | |
312 | */ | |
313 | int | |
314 | options(type, s) | |
315 | int type; /* 'c' compile, 'e' exec */ | |
316 | char *s; | |
317 | { | |
318 | register char *p; | |
319 | register int o = (type == 'c') ? copts : eopts; | |
320 | register char *legal = (type == 'c') ? "bisnmp" : "^$#tl"; | |
321 | ||
322 | for (p = s; *p != '\0'; p++) | |
323 | if (strchr(legal, *p) != NULL) | |
324 | switch (*p) { | |
325 | case 'b': | |
326 | o &= ~REG_EXTENDED; | |
327 | break; | |
328 | case 'i': | |
329 | o |= REG_ICASE; | |
330 | break; | |
331 | case 's': | |
332 | o |= REG_NOSUB; | |
333 | break; | |
334 | case 'n': | |
335 | o |= REG_NEWLINE; | |
336 | break; | |
337 | case 'm': | |
338 | o &= ~REG_EXTENDED; | |
339 | o |= REG_NOSPEC; | |
340 | break; | |
341 | case 'p': | |
342 | o |= REG_PEND; | |
343 | break; | |
344 | case '^': | |
345 | o |= REG_NOTBOL; | |
346 | break; | |
347 | case '$': | |
348 | o |= REG_NOTEOL; | |
349 | break; | |
350 | case '#': | |
351 | o |= REG_STARTEND; | |
352 | break; | |
353 | case 't': /* trace */ | |
354 | o |= REG_TRACE; | |
355 | break; | |
356 | case 'l': /* force long representation */ | |
357 | o |= REG_LARGE; | |
358 | break; | |
359 | case 'r': /* force backref use */ | |
360 | o |= REG_BACKR; | |
361 | break; | |
362 | } | |
363 | return(o); | |
364 | } | |
365 | ||
366 | /* | |
367 | - opt - is a particular option in a regression string? | |
368 | == int opt(int c, char *s); | |
369 | */ | |
370 | int /* predicate */ | |
371 | opt(c, s) | |
372 | int c; | |
373 | char *s; | |
374 | { | |
375 | return(strchr(s, c) != NULL); | |
376 | } | |
377 | ||
378 | /* | |
379 | - fixstr - transform magic characters in strings | |
380 | == void fixstr(register char *p); | |
381 | */ | |
382 | void | |
383 | fixstr(p) | |
384 | register char *p; | |
385 | { | |
386 | if (p == NULL) | |
387 | return; | |
388 | ||
389 | for (; *p != '\0'; p++) | |
390 | if (*p == 'N') | |
391 | *p = '\n'; | |
392 | else if (*p == 'T') | |
393 | *p = '\t'; | |
394 | else if (*p == 'S') | |
395 | *p = ' '; | |
396 | else if (*p == 'Z') | |
397 | *p = '\0'; | |
398 | } | |
399 | ||
400 | /* | |
401 | - check - check a substring match | |
402 | == char *check(char *str, regmatch_t sub, char *should); | |
403 | */ | |
404 | char * /* NULL or complaint */ | |
405 | check(str, sub, should) | |
406 | char *str; | |
407 | regmatch_t sub; | |
408 | char *should; | |
409 | { | |
410 | register int len; | |
411 | register int shlen; | |
412 | register char *p; | |
413 | static char grump[500]; | |
414 | register char *at = NULL; | |
415 | ||
416 | if (should != NULL && strcmp(should, "-") == 0) | |
417 | should = NULL; | |
418 | if (should != NULL && should[0] == '@') { | |
419 | at = should + 1; | |
420 | should = ""; | |
421 | } | |
422 | ||
423 | /* check rm_so and rm_eo for consistency */ | |
424 | if (sub.rm_so > sub.rm_eo || (sub.rm_so == -1 && sub.rm_eo != -1) || | |
425 | (sub.rm_so != -1 && sub.rm_eo == -1) || | |
426 | (sub.rm_so != -1 && sub.rm_so < 0) || | |
427 | (sub.rm_eo != -1 && sub.rm_eo < 0) ) { | |
428 | sprintf(grump, "start %ld end %ld", (long)sub.rm_so, | |
429 | (long)sub.rm_eo); | |
430 | return(grump); | |
431 | } | |
432 | ||
433 | /* check for no match */ | |
434 | if (sub.rm_so == -1 && should == NULL) | |
435 | return(NULL); | |
436 | if (sub.rm_so == -1) | |
437 | return("did not match"); | |
438 | ||
439 | /* check for in range */ | |
440 | if (sub.rm_eo > strlen(str)) { | |
441 | sprintf(grump, "start %ld end %ld, past end of string", | |
442 | (long)sub.rm_so, (long)sub.rm_eo); | |
443 | return(grump); | |
444 | } | |
445 | ||
446 | len = (int)(sub.rm_eo - sub.rm_so); | |
447 | shlen = (int)strlen(should); | |
448 | p = str + sub.rm_so; | |
449 | ||
450 | /* check for not supposed to match */ | |
451 | if (should == NULL) { | |
452 | sprintf(grump, "matched `%.*s'", len, p); | |
453 | return(grump); | |
454 | } | |
455 | ||
456 | /* check for wrong match */ | |
457 | if (len != shlen || strncmp(p, should, (size_t)shlen) != 0) { | |
458 | sprintf(grump, "matched `%.*s' instead", len, p); | |
459 | return(grump); | |
460 | } | |
461 | if (shlen > 0) | |
462 | return(NULL); | |
463 | ||
464 | /* check null match in right place */ | |
465 | if (at == NULL) | |
466 | return(NULL); | |
467 | shlen = strlen(at); | |
468 | if (shlen == 0) | |
469 | shlen = 1; /* force check for end-of-string */ | |
470 | if (strncmp(p, at, shlen) != 0) { | |
471 | sprintf(grump, "matched null at `%.20s'", p); | |
472 | return(grump); | |
473 | } | |
474 | return(NULL); | |
475 | } | |
476 | ||
477 | /* | |
478 | - eprint - convert error number to name | |
479 | == static char *eprint(int err); | |
480 | */ | |
481 | static char * | |
482 | eprint(err) | |
483 | int err; | |
484 | { | |
485 | static char epbuf[100]; | |
486 | size_t len; | |
487 | ||
488 | len = regerror(REG_ITOA|err, (regex_t *)NULL, epbuf, sizeof(epbuf)); | |
489 | assert(len <= sizeof(epbuf)); | |
490 | return(epbuf); | |
491 | } | |
492 | ||
493 | /* | |
494 | - efind - convert error name to number | |
495 | == static int efind(char *name); | |
496 | */ | |
497 | static int | |
498 | efind(name) | |
499 | char *name; | |
500 | { | |
501 | static char efbuf[100]; | |
502 | size_t n; | |
503 | regex_t re; | |
504 | ||
505 | sprintf(efbuf, "REG_%s", name); | |
506 | assert(strlen(efbuf) < sizeof(efbuf)); | |
507 | re.re_endp = efbuf; | |
508 | (void) regerror(REG_ATOI, &re, efbuf, sizeof(efbuf)); | |
509 | return(atoi(efbuf)); | |
510 | } |