]> git.saurik.com Git - wxWidgets.git/blob - src/regex/regcomp.c
revert my last change
[wxWidgets.git] / src / regex / regcomp.c
1 #if defined(__MWERKS__) && !defined(__MACH__)
2 typedef long off_t ;
3 #else
4 #include <sys/types.h>
5 #endif
6 #include <stdio.h>
7 #include <string.h>
8 #include <ctype.h>
9 #include <limits.h>
10 #include <stdlib.h>
11 #include "regex.h"
12
13 #include "utils.h"
14 #include "regex2.h"
15
16 #include "cclass.h"
17 #include "cname.h"
18
19 /*
20 * parse structure, passed up and down to avoid global variables and
21 * other clumsinesses
22 */
23 struct parse {
24 char *next; /* next character in RE */
25 char *end; /* end of string (-> NUL normally) */
26 int error; /* has an error been seen? */
27 sop *strip; /* malloced strip */
28 sopno ssize; /* malloced strip size (allocated) */
29 sopno slen; /* malloced strip length (used) */
30 int ncsalloc; /* number of csets allocated */
31 struct re_guts *g;
32 # define NPAREN 10 /* we need to remember () 1-9 for back refs */
33 sopno pbegin[NPAREN]; /* -> ( ([0] unused) */
34 sopno pend[NPAREN]; /* -> ) ([0] unused) */
35 };
36
37 #include "regcomp.ih"
38
39 static char nuls[10]; /* place to point scanner in event of error */
40
41 /*
42 * macros for use with parse structure
43 * BEWARE: these know that the parse structure is named `p' !!!
44 */
45 #define PEEK() (*p->next)
46 #define PEEK2() (*(p->next+1))
47 #define MORE() (p->next < p->end)
48 #define MORE2() (p->next+1 < p->end)
49 #define SEE(c) (MORE() && PEEK() == (c))
50 #define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b))
51 #define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0)
52 #define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0)
53 #define NEXT() (p->next++)
54 #define NEXT2() (p->next += 2)
55 #define NEXTn(n) (p->next += (n))
56 #define GETNEXT() (*p->next++)
57 #define SETERROR(e) seterr(p, (e))
58 #define REQUIRE(co, e) ((void)((co) || SETERROR(e)))
59 #define MUSTSEE(c, e) (REQUIRE(MORE() && PEEK() == (c), e))
60 #define MUSTEAT(c, e) (REQUIRE(MORE() && GETNEXT() == (c), e))
61 #define MUSTNOTSEE(c, e) (REQUIRE(!MORE() || PEEK() != (c), e))
62 #define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd))
63 #define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos)
64 #define AHEAD(pos) dofwd(p, pos, HERE()-(pos))
65 #define ASTERN(sop, pos) EMIT(sop, HERE()-pos)
66 #define HERE() (p->slen)
67 #define THERE() (p->slen - 1)
68 #define THERETHERE() (p->slen - 2)
69 #define DROP(n) (p->slen -= (n))
70
71 #ifndef NDEBUG
72 static int never = 0; /* for use in asserts; shuts lint up */
73 #else
74 #define never 0 /* some <assert.h>s have bugs too */
75 #endif
76
77 /*
78 - regcomp - interface for parser and compilation
79 = extern int regcomp(regex_t *, const char *, int);
80 = #define REG_BASIC 0000
81 = #define REG_EXTENDED 0001
82 = #define REG_ICASE 0002
83 = #define REG_NOSUB 0004
84 = #define REG_NEWLINE 0010
85 = #define REG_NOSPEC 0020
86 = #define REG_PEND 0040
87 = #define REG_DUMP 0200
88 */
89 int /* 0 success, otherwise REG_something */
90 regcomp(preg, pattern, cflags)
91 regex_t *preg;
92 const char *pattern;
93 int cflags;
94 {
95 struct parse pa;
96 register struct re_guts *g;
97 register struct parse *p = &pa;
98 register int i;
99 register size_t len;
100 #ifdef REDEBUG
101 # define GOODFLAGS(f) (f)
102 #else
103 # define GOODFLAGS(f) ((f)&~REG_DUMP)
104 #endif
105
106 cflags = GOODFLAGS(cflags);
107 if ((cflags&REG_EXTENDED) && (cflags&REG_NOSPEC))
108 return(REG_INVARG);
109
110 if (cflags&REG_PEND) {
111 if (preg->re_endp < pattern)
112 return(REG_INVARG);
113 len = preg->re_endp - pattern;
114 } else
115 len = strlen((char *)pattern);
116
117 /* do the mallocs early so failure handling is easy */
118 g = (struct re_guts *)malloc(sizeof(struct re_guts) +
119 (NC-1)*sizeof(cat_t));
120 if (g == NULL)
121 return(REG_ESPACE);
122 p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */
123 p->strip = (sop *)malloc(p->ssize * sizeof(sop));
124 p->slen = 0;
125 if (p->strip == NULL) {
126 free((char *)g);
127 return(REG_ESPACE);
128 }
129
130 /* set things up */
131 p->g = g;
132 p->next = (char *)pattern; /* convenience; we do not modify it */
133 p->end = p->next + len;
134 p->error = 0;
135 p->ncsalloc = 0;
136 for (i = 0; i < NPAREN; i++) {
137 p->pbegin[i] = 0;
138 p->pend[i] = 0;
139 }
140 g->csetsize = NC;
141 g->sets = NULL;
142 g->setbits = NULL;
143 g->ncsets = 0;
144 g->cflags = cflags;
145 g->iflags = 0;
146 g->nbol = 0;
147 g->neol = 0;
148 g->must = NULL;
149 g->mlen = 0;
150 g->nsub = 0;
151 g->ncategories = 1; /* category 0 is "everything else" */
152 g->categories = &g->catspace[-(CHAR_MIN)];
153 (void) memset((char *)g->catspace, 0, NC*sizeof(cat_t));
154 g->backrefs = 0;
155
156 /* do it */
157 EMIT(OEND, 0);
158 g->firststate = THERE();
159 if (cflags&REG_EXTENDED)
160 p_ere(p, OUT);
161 else if (cflags&REG_NOSPEC)
162 p_str(p);
163 else
164 p_bre(p, OUT, OUT);
165 EMIT(OEND, 0);
166 g->laststate = THERE();
167
168 /* tidy up loose ends and fill things in */
169 categorize(p, g);
170 stripsnug(p, g);
171 findmust(p, g);
172 g->nplus = pluscount(p, g);
173 g->magic = MAGIC2;
174 preg->re_nsub = g->nsub;
175 preg->re_g = g;
176 preg->re_magic = MAGIC1;
177 #ifndef REDEBUG
178 /* not debugging, so can't rely on the assert() in regexec() */
179 if (g->iflags&BAD)
180 SETERROR(REG_ASSERT);
181 #endif
182
183 /* win or lose, we're done */
184 if (p->error != 0) /* lose */
185 regfree(preg);
186 return(p->error);
187 }
188
189 /*
190 - p_ere - ERE parser top level, concatenation and alternation
191 == static void p_ere(register struct parse *p, int stop);
192 */
193 static void
194 p_ere(p, stop)
195 register struct parse *p;
196 int stop; /* character this ERE should end at */
197 {
198 register char c;
199 register sopno prevback;
200 register sopno prevfwd;
201 register sopno conc;
202 register int first = 1; /* is this the first alternative? */
203
204 for (;;) {
205 /* do a bunch of concatenated expressions */
206 conc = HERE();
207 while (MORE() && (c = PEEK()) != '|' && c != stop)
208 p_ere_exp(p);
209 REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */
210
211 if (!EAT('|'))
212 break; /* NOTE BREAK OUT */
213
214 if (first) {
215 INSERT(OCH_, conc); /* offset is wrong */
216 prevfwd = conc;
217 prevback = conc;
218 first = 0;
219 }
220 ASTERN(OOR1, prevback);
221 prevback = THERE();
222 AHEAD(prevfwd); /* fix previous offset */
223 prevfwd = HERE();
224 EMIT(OOR2, 0); /* offset is very wrong */
225 }
226
227 if (!first) { /* tail-end fixups */
228 AHEAD(prevfwd);
229 ASTERN(O_CH, prevback);
230 }
231
232 assert(!MORE() || SEE(stop));
233 }
234
235 /*
236 - p_ere_exp - parse one subERE, an atom possibly followed by a repetition op
237 == static void p_ere_exp(register struct parse *p);
238 */
239 static void
240 p_ere_exp(p)
241 register struct parse *p;
242 {
243 register char c;
244 register sopno pos;
245 register int count;
246 register int count2;
247 register sopno subno;
248 int wascaret = 0;
249
250 assert(MORE()); /* caller should have ensured this */
251 c = GETNEXT();
252
253 pos = HERE();
254 switch (c) {
255 case '(':
256 REQUIRE(MORE(), REG_EPAREN);
257 p->g->nsub++;
258 subno = p->g->nsub;
259 if (subno < NPAREN)
260 p->pbegin[subno] = HERE();
261 EMIT(OLPAREN, subno);
262 if (!SEE(')'))
263 p_ere(p, ')');
264 if (subno < NPAREN) {
265 p->pend[subno] = HERE();
266 assert(p->pend[subno] != 0);
267 }
268 EMIT(ORPAREN, subno);
269 MUSTEAT(')', REG_EPAREN);
270 break;
271 #ifndef POSIX_MISTAKE
272 case ')': /* happens only if no current unmatched ( */
273 /*
274 * You may ask, why the ifndef? Because I didn't notice
275 * this until slightly too late for 1003.2, and none of the
276 * other 1003.2 regular-expression reviewers noticed it at
277 * all. So an unmatched ) is legal POSIX, at least until
278 * we can get it fixed.
279 */
280 SETERROR(REG_EPAREN);
281 break;
282 #endif
283 case '^':
284 EMIT(OBOL, 0);
285 p->g->iflags |= USEBOL;
286 p->g->nbol++;
287 wascaret = 1;
288 break;
289 case '$':
290 EMIT(OEOL, 0);
291 p->g->iflags |= USEEOL;
292 p->g->neol++;
293 break;
294 case '|':
295 SETERROR(REG_EMPTY);
296 break;
297 case '*':
298 case '+':
299 case '?':
300 SETERROR(REG_BADRPT);
301 break;
302 case '.':
303 if (p->g->cflags&REG_NEWLINE)
304 nonnewline(p);
305 else
306 EMIT(OANY, 0);
307 break;
308 case '[':
309 p_bracket(p);
310 break;
311 case '\\':
312 REQUIRE(MORE(), REG_EESCAPE);
313 c = GETNEXT();
314 ordinary(p, c);
315 break;
316 case '{': /* okay as ordinary except if digit follows */
317 REQUIRE(!MORE() || !isdigit(PEEK()), REG_BADRPT);
318 /* FALLTHROUGH */
319 default:
320 ordinary(p, c);
321 break;
322 }
323
324 if (!MORE())
325 return;
326 c = PEEK();
327 /* we call { a repetition if followed by a digit */
328 if (!( c == '*' || c == '+' || c == '?' ||
329 (c == '{' && MORE2() && isdigit(PEEK2())) ))
330 return; /* no repetition, we're done */
331 NEXT();
332
333 REQUIRE(!wascaret, REG_BADRPT);
334 switch (c) {
335 case '*': /* implemented as +? */
336 /* this case does not require the (y|) trick, noKLUDGE */
337 INSERT(OPLUS_, pos);
338 ASTERN(O_PLUS, pos);
339 INSERT(OQUEST_, pos);
340 ASTERN(O_QUEST, pos);
341 break;
342 case '+':
343 INSERT(OPLUS_, pos);
344 ASTERN(O_PLUS, pos);
345 break;
346 case '?':
347 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
348 INSERT(OCH_, pos); /* offset slightly wrong */
349 ASTERN(OOR1, pos); /* this one's right */
350 AHEAD(pos); /* fix the OCH_ */
351 EMIT(OOR2, 0); /* offset very wrong... */
352 AHEAD(THERE()); /* ...so fix it */
353 ASTERN(O_CH, THERETHERE());
354 break;
355 case '{':
356 count = p_count(p);
357 if (EAT(',')) {
358 if (isdigit(PEEK())) {
359 count2 = p_count(p);
360 REQUIRE(count <= count2, REG_BADBR);
361 } else /* single number with comma */
362 count2 = INFINITY;
363 } else /* just a single number */
364 count2 = count;
365 repeat(p, pos, count, count2);
366 if (!EAT('}')) { /* error heuristics */
367 while (MORE() && PEEK() != '}')
368 NEXT();
369 REQUIRE(MORE(), REG_EBRACE);
370 SETERROR(REG_BADBR);
371 }
372 break;
373 }
374
375 if (!MORE())
376 return;
377 c = PEEK();
378 if (!( c == '*' || c == '+' || c == '?' ||
379 (c == '{' && MORE2() && isdigit(PEEK2())) ) )
380 return;
381 SETERROR(REG_BADRPT);
382 }
383
384 /*
385 - p_str - string (no metacharacters) "parser"
386 == static void p_str(register struct parse *p);
387 */
388 static void
389 p_str(p)
390 register struct parse *p;
391 {
392 REQUIRE(MORE(), REG_EMPTY);
393 while (MORE())
394 ordinary(p, GETNEXT());
395 }
396
397 /*
398 - p_bre - BRE parser top level, anchoring and concatenation
399 == static void p_bre(register struct parse *p, register int end1, \
400 == register int end2);
401 * Giving end1 as OUT essentially eliminates the end1/end2 check.
402 *
403 * This implementation is a bit of a kludge, in that a trailing $ is first
404 * taken as an ordinary character and then revised to be an anchor. The
405 * only undesirable side effect is that '$' gets included as a character
406 * category in such cases. This is fairly harmless; not worth fixing.
407 * The amount of lookahead needed to avoid this kludge is excessive.
408 */
409 static void
410 p_bre(p, end1, end2)
411 register struct parse *p;
412 register int end1; /* first terminating character */
413 register int end2; /* second terminating character */
414 {
415 register sopno start = HERE();
416 register int first = 1; /* first subexpression? */
417 register int wasdollar = 0;
418
419 if (EAT('^')) {
420 EMIT(OBOL, 0);
421 p->g->iflags |= USEBOL;
422 p->g->nbol++;
423 }
424 while (MORE() && !SEETWO(end1, end2)) {
425 wasdollar = p_simp_re(p, first);
426 first = 0;
427 }
428 if (wasdollar) { /* oops, that was a trailing anchor */
429 DROP(1);
430 EMIT(OEOL, 0);
431 p->g->iflags |= USEEOL;
432 p->g->neol++;
433 }
434
435 REQUIRE(HERE() != start, REG_EMPTY); /* require nonempty */
436 }
437
438 /*
439 - p_simp_re - parse a simple RE, an atom possibly followed by a repetition
440 == static int p_simp_re(register struct parse *p, int starordinary);
441 */
442 static int /* was the simple RE an unbackslashed $? */
443 p_simp_re(p, starordinary)
444 register struct parse *p;
445 int starordinary; /* is a leading * an ordinary character? */
446 {
447 register int c;
448 register int count;
449 register int count2;
450 register sopno pos;
451 register int i;
452 register sopno subno;
453 # define BACKSL (1<<CHAR_BIT)
454
455 pos = HERE(); /* repetion op, if any, covers from here */
456
457 assert(MORE()); /* caller should have ensured this */
458 c = GETNEXT();
459 if (c == '\\') {
460 REQUIRE(MORE(), REG_EESCAPE);
461 c = BACKSL | (unsigned char)GETNEXT();
462 }
463 switch (c) {
464 case '.':
465 if (p->g->cflags&REG_NEWLINE)
466 nonnewline(p);
467 else
468 EMIT(OANY, 0);
469 break;
470 case '[':
471 p_bracket(p);
472 break;
473 case BACKSL|'{':
474 SETERROR(REG_BADRPT);
475 break;
476 case BACKSL|'(':
477 p->g->nsub++;
478 subno = p->g->nsub;
479 if (subno < NPAREN)
480 p->pbegin[subno] = HERE();
481 EMIT(OLPAREN, subno);
482 /* the MORE here is an error heuristic */
483 if (MORE() && !SEETWO('\\', ')'))
484 p_bre(p, '\\', ')');
485 if (subno < NPAREN) {
486 p->pend[subno] = HERE();
487 assert(p->pend[subno] != 0);
488 }
489 EMIT(ORPAREN, subno);
490 REQUIRE(EATTWO('\\', ')'), REG_EPAREN);
491 break;
492 case BACKSL|')': /* should not get here -- must be user */
493 case BACKSL|'}':
494 SETERROR(REG_EPAREN);
495 break;
496 case BACKSL|'1':
497 case BACKSL|'2':
498 case BACKSL|'3':
499 case BACKSL|'4':
500 case BACKSL|'5':
501 case BACKSL|'6':
502 case BACKSL|'7':
503 case BACKSL|'8':
504 case BACKSL|'9':
505 i = (c&~BACKSL) - '0';
506 assert(i < NPAREN);
507 if (p->pend[i] != 0) {
508 assert(i <= p->g->nsub);
509 EMIT(OBACK_, i);
510 assert(p->pbegin[i] != 0);
511 assert(OP(p->strip[p->pbegin[i]]) == OLPAREN);
512 assert(OP(p->strip[p->pend[i]]) == ORPAREN);
513 (void) dupl(p, p->pbegin[i]+1, p->pend[i]);
514 EMIT(O_BACK, i);
515 } else
516 SETERROR(REG_ESUBREG);
517 p->g->backrefs = 1;
518 break;
519 case '*':
520 REQUIRE(starordinary, REG_BADRPT);
521 /* FALLTHROUGH */
522 default:
523 ordinary(p, (char)c); /* takes off BACKSL, if any */
524 break;
525 }
526
527 if (EAT('*')) { /* implemented as +? */
528 /* this case does not require the (y|) trick, noKLUDGE */
529 INSERT(OPLUS_, pos);
530 ASTERN(O_PLUS, pos);
531 INSERT(OQUEST_, pos);
532 ASTERN(O_QUEST, pos);
533 } else if (EATTWO('\\', '{')) {
534 count = p_count(p);
535 if (EAT(',')) {
536 if (MORE() && isdigit(PEEK())) {
537 count2 = p_count(p);
538 REQUIRE(count <= count2, REG_BADBR);
539 } else /* single number with comma */
540 count2 = INFINITY;
541 } else /* just a single number */
542 count2 = count;
543 repeat(p, pos, count, count2);
544 if (!EATTWO('\\', '}')) { /* error heuristics */
545 while (MORE() && !SEETWO('\\', '}'))
546 NEXT();
547 REQUIRE(MORE(), REG_EBRACE);
548 SETERROR(REG_BADBR);
549 }
550 } else if (c == (unsigned char)'$') /* $ (but not \$) ends it */
551 return(1);
552
553 return(0);
554 }
555
556 /*
557 - p_count - parse a repetition count
558 == static int p_count(register struct parse *p);
559 */
560 static int /* the value */
561 p_count(p)
562 register struct parse *p;
563 {
564 register int count = 0;
565 register int ndigits = 0;
566
567 while (MORE() && isdigit(PEEK()) && count <= DUPMAX) {
568 count = count*10 + (GETNEXT() - '0');
569 ndigits++;
570 }
571
572 REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR);
573 return(count);
574 }
575
576 /*
577 - p_bracket - parse a bracketed character list
578 == static void p_bracket(register struct parse *p);
579 *
580 * Note a significant property of this code: if the allocset() did SETERROR,
581 * no set operations are done.
582 */
583 static void
584 p_bracket(p)
585 register struct parse *p;
586 {
587 register cset *cs = allocset(p);
588 register int invert = 0;
589
590 /* Dept of Truly Sickening Special-Case Kludges */
591 if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) {
592 EMIT(OBOW, 0);
593 NEXTn(6);
594 return;
595 }
596 if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) {
597 EMIT(OEOW, 0);
598 NEXTn(6);
599 return;
600 }
601
602 if (EAT('^'))
603 invert++; /* make note to invert set at end */
604 if (EAT(']'))
605 CHadd(cs, ']');
606 else if (EAT('-'))
607 CHadd(cs, '-');
608 while (MORE() && PEEK() != ']' && !SEETWO('-', ']'))
609 p_b_term(p, cs);
610 if (EAT('-'))
611 CHadd(cs, '-');
612 MUSTEAT(']', REG_EBRACK);
613
614 if (p->error != 0) /* don't mess things up further */
615 return;
616
617 if (p->g->cflags&REG_ICASE) {
618 register int i;
619 register int ci;
620
621 for (i = p->g->csetsize - 1; i >= 0; i--)
622 if (CHIN(cs, i) && isalpha(i)) {
623 ci = othercase(i);
624 if (ci != i)
625 CHadd(cs, ci);
626 }
627 if (cs->multis != NULL)
628 mccase(p, cs);
629 }
630 if (invert) {
631 register int i;
632
633 for (i = p->g->csetsize - 1; i >= 0; i--)
634 if (CHIN(cs, i))
635 CHsub(cs, i);
636 else
637 CHadd(cs, i);
638 if (p->g->cflags&REG_NEWLINE)
639 CHsub(cs, '\n');
640 if (cs->multis != NULL)
641 mcinvert(p, cs);
642 }
643
644 assert(cs->multis == NULL); /* xxx */
645
646 if (nch(p, cs) == 1) { /* optimize singleton sets */
647 ordinary(p, firstch(p, cs));
648 freeset(p, cs);
649 } else
650 EMIT(OANYOF, freezeset(p, cs));
651 }
652
653 /*
654 - p_b_term - parse one term of a bracketed character list
655 == static void p_b_term(register struct parse *p, register cset *cs);
656 */
657 static void
658 p_b_term(p, cs)
659 register struct parse *p;
660 register cset *cs;
661 {
662 register char c;
663 register char start, finish;
664 register int i;
665
666 /* classify what we've got */
667 switch ((MORE()) ? PEEK() : '\0') {
668 case '[':
669 c = (MORE2()) ? PEEK2() : '\0';
670 break;
671 case '-':
672 SETERROR(REG_ERANGE);
673 return; /* NOTE RETURN */
674 break;
675 default:
676 c = '\0';
677 break;
678 }
679
680 switch (c) {
681 case ':': /* character class */
682 NEXT2();
683 REQUIRE(MORE(), REG_EBRACK);
684 c = PEEK();
685 REQUIRE(c != '-' && c != ']', REG_ECTYPE);
686 p_b_cclass(p, cs);
687 REQUIRE(MORE(), REG_EBRACK);
688 REQUIRE(EATTWO(':', ']'), REG_ECTYPE);
689 break;
690 case '=': /* equivalence class */
691 NEXT2();
692 REQUIRE(MORE(), REG_EBRACK);
693 c = PEEK();
694 REQUIRE(c != '-' && c != ']', REG_ECOLLATE);
695 p_b_eclass(p, cs);
696 REQUIRE(MORE(), REG_EBRACK);
697 REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
698 break;
699 default: /* symbol, ordinary character, or range */
700 /* xxx revision needed for multichar stuff */
701 start = p_b_symbol(p);
702 if (SEE('-') && MORE2() && PEEK2() != ']') {
703 /* range */
704 NEXT();
705 if (EAT('-'))
706 finish = '-';
707 else
708 finish = p_b_symbol(p);
709 } else
710 finish = start;
711 /* xxx what about signed chars here... */
712 REQUIRE(start <= finish, REG_ERANGE);
713 for (i = start; i <= finish; i++)
714 CHadd(cs, i);
715 break;
716 }
717 }
718
719 /*
720 - p_b_cclass - parse a character-class name and deal with it
721 == static void p_b_cclass(register struct parse *p, register cset *cs);
722 */
723 static void
724 p_b_cclass(p, cs)
725 register struct parse *p;
726 register cset *cs;
727 {
728 register char *sp = p->next;
729 register struct cclass *cp;
730 register size_t len;
731 register char *u;
732 register char c;
733
734 while (MORE() && isalpha(PEEK()))
735 NEXT();
736 len = p->next - sp;
737 for (cp = cclasses; cp->name != NULL; cp++)
738 if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
739 break;
740 if (cp->name == NULL) {
741 /* oops, didn't find it */
742 SETERROR(REG_ECTYPE);
743 return;
744 }
745
746 u = cp->chars;
747 while ((c = *u++) != '\0')
748 CHadd(cs, c);
749 for (u = cp->multis; *u != '\0'; u += strlen(u) + 1)
750 MCadd(p, cs, u);
751 }
752
753 /*
754 - p_b_eclass - parse an equivalence-class name and deal with it
755 == static void p_b_eclass(register struct parse *p, register cset *cs);
756 *
757 * This implementation is incomplete. xxx
758 */
759 static void
760 p_b_eclass(p, cs)
761 register struct parse *p;
762 register cset *cs;
763 {
764 register char c;
765
766 c = p_b_coll_elem(p, '=');
767 CHadd(cs, c);
768 }
769
770 /*
771 - p_b_symbol - parse a character or [..]ed multicharacter collating symbol
772 == static char p_b_symbol(register struct parse *p);
773 */
774 static char /* value of symbol */
775 p_b_symbol(p)
776 register struct parse *p;
777 {
778 register char value;
779
780 REQUIRE(MORE(), REG_EBRACK);
781 if (!EATTWO('[', '.'))
782 return(GETNEXT());
783
784 /* collating symbol */
785 value = p_b_coll_elem(p, '.');
786 REQUIRE(EATTWO('.', ']'), REG_ECOLLATE);
787 return(value);
788 }
789
790 /*
791 - p_b_coll_elem - parse a collating-element name and look it up
792 == static char p_b_coll_elem(register struct parse *p, int endc);
793 */
794 static char /* value of collating element */
795 p_b_coll_elem(p, endc)
796 register struct parse *p;
797 int endc; /* name ended by endc,']' */
798 {
799 register char *sp = p->next;
800 register struct cname *cp;
801 register int len;
802
803 while (MORE() && !SEETWO(endc, ']'))
804 NEXT();
805 if (!MORE()) {
806 SETERROR(REG_EBRACK);
807 return(0);
808 }
809 len = p->next - sp;
810 for (cp = cnames; cp->name != NULL; cp++)
811 if (strncmp(cp->name, sp, len) == 0 && cp->name[len] == '\0')
812 return(cp->code); /* known name */
813 if (len == 1)
814 return(*sp); /* single character */
815 SETERROR(REG_ECOLLATE); /* neither */
816 return(0);
817 }
818
819 /*
820 - othercase - return the case counterpart of an alphabetic
821 == static char othercase(int ch);
822 */
823 static char /* if no counterpart, return ch */
824 othercase(ch)
825 int ch;
826 {
827 assert(isalpha(ch));
828 if (isupper(ch))
829 return(tolower(ch));
830 else if (islower(ch))
831 return(toupper(ch));
832 else /* peculiar, but could happen */
833 return(ch);
834 }
835
836 /*
837 - bothcases - emit a dualcase version of a two-case character
838 == static void bothcases(register struct parse *p, int ch);
839 *
840 * Boy, is this implementation ever a kludge...
841 */
842 static void
843 bothcases(p, ch)
844 register struct parse *p;
845 int ch;
846 {
847 register char *oldnext = p->next;
848 register char *oldend = p->end;
849 char bracket[3];
850
851 assert(othercase(ch) != ch); /* p_bracket() would recurse */
852 p->next = bracket;
853 p->end = bracket+2;
854 bracket[0] = ch;
855 bracket[1] = ']';
856 bracket[2] = '\0';
857 p_bracket(p);
858 assert(p->next == bracket+2);
859 p->next = oldnext;
860 p->end = oldend;
861 }
862
863 /*
864 - ordinary - emit an ordinary character
865 == static void ordinary(register struct parse *p, register int ch);
866 */
867 static void
868 ordinary(p, ch)
869 register struct parse *p;
870 register int ch;
871 {
872 register cat_t *cap = p->g->categories;
873
874 if ((p->g->cflags&REG_ICASE) && isalpha(ch) && othercase(ch) != ch)
875 bothcases(p, ch);
876 else {
877 EMIT(OCHAR, (unsigned char)ch);
878 if (cap[ch] == 0)
879 cap[ch] = p->g->ncategories++;
880 }
881 }
882
883 /*
884 - nonnewline - emit REG_NEWLINE version of OANY
885 == static void nonnewline(register struct parse *p);
886 *
887 * Boy, is this implementation ever a kludge...
888 */
889 static void
890 nonnewline(p)
891 register struct parse *p;
892 {
893 register char *oldnext = p->next;
894 register char *oldend = p->end;
895 char bracket[4];
896
897 p->next = bracket;
898 p->end = bracket+3;
899 bracket[0] = '^';
900 bracket[1] = '\n';
901 bracket[2] = ']';
902 bracket[3] = '\0';
903 p_bracket(p);
904 assert(p->next == bracket+3);
905 p->next = oldnext;
906 p->end = oldend;
907 }
908
909 /*
910 - repeat - generate code for a bounded repetition, recursively if needed
911 == static void repeat(register struct parse *p, sopno start, int from, int to);
912 */
913 static void
914 repeat(p, start, from, to)
915 register struct parse *p;
916 sopno start; /* operand from here to end of strip */
917 int from; /* repeated from this number */
918 int to; /* to this number of times (maybe INFINITY) */
919 {
920 register sopno finish = HERE();
921 # define N 2
922 # define INF 3
923 # define REP(f, t) ((f)*8 + (t))
924 # define MAP(n) (((n) <= 1) ? (n) : ((n) == INFINITY) ? INF : N)
925 register sopno copy;
926
927 if (p->error != 0) /* head off possible runaway recursion */
928 return;
929
930 assert(from <= to);
931
932 switch (REP(MAP(from), MAP(to))) {
933 case REP(0, 0): /* must be user doing this */
934 DROP(finish-start); /* drop the operand */
935 break;
936 case REP(0, 1): /* as x{1,1}? */
937 case REP(0, N): /* as x{1,n}? */
938 case REP(0, INF): /* as x{1,}? */
939 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
940 INSERT(OCH_, start); /* offset is wrong... */
941 repeat(p, start+1, 1, to);
942 ASTERN(OOR1, start);
943 AHEAD(start); /* ... fix it */
944 EMIT(OOR2, 0);
945 AHEAD(THERE());
946 ASTERN(O_CH, THERETHERE());
947 break;
948 case REP(1, 1): /* trivial case */
949 /* done */
950 break;
951 case REP(1, N): /* as x?x{1,n-1} */
952 /* KLUDGE: emit y? as (y|) until subtle bug gets fixed */
953 INSERT(OCH_, start);
954 ASTERN(OOR1, start);
955 AHEAD(start);
956 EMIT(OOR2, 0); /* offset very wrong... */
957 AHEAD(THERE()); /* ...so fix it */
958 ASTERN(O_CH, THERETHERE());
959 copy = dupl(p, start+1, finish+1);
960 assert(copy == finish+4);
961 repeat(p, copy, 1, to-1);
962 break;
963 case REP(1, INF): /* as x+ */
964 INSERT(OPLUS_, start);
965 ASTERN(O_PLUS, start);
966 break;
967 case REP(N, N): /* as xx{m-1,n-1} */
968 copy = dupl(p, start, finish);
969 repeat(p, copy, from-1, to-1);
970 break;
971 case REP(N, INF): /* as xx{n-1,INF} */
972 copy = dupl(p, start, finish);
973 repeat(p, copy, from-1, to);
974 break;
975 default: /* "can't happen" */
976 SETERROR(REG_ASSERT); /* just in case */
977 break;
978 }
979 }
980
981 /*
982 - seterr - set an error condition
983 == static int seterr(register struct parse *p, int e);
984 */
985 static int /* useless but makes type checking happy */
986 seterr(p, e)
987 register struct parse *p;
988 int e;
989 {
990 if (p->error == 0) /* keep earliest error condition */
991 p->error = e;
992 p->next = nuls; /* try to bring things to a halt */
993 p->end = nuls;
994 return(0); /* make the return value well-defined */
995 }
996
997 /*
998 - allocset - allocate a set of characters for []
999 == static cset *allocset(register struct parse *p);
1000 */
1001 static cset *
1002 allocset(p)
1003 register struct parse *p;
1004 {
1005 register int no = p->g->ncsets++;
1006 register size_t nc;
1007 register size_t nbytes;
1008 register cset *cs;
1009 register size_t css = (size_t)p->g->csetsize;
1010 register int i;
1011
1012 if (no >= p->ncsalloc) { /* need another column of space */
1013 p->ncsalloc += CHAR_BIT;
1014 nc = p->ncsalloc;
1015 assert(nc % CHAR_BIT == 0);
1016 nbytes = nc / CHAR_BIT * css;
1017 if (p->g->sets == NULL)
1018 p->g->sets = (cset *)malloc(nc * sizeof(cset));
1019 else
1020 p->g->sets = (cset *)realloc((char *)p->g->sets,
1021 nc * sizeof(cset));
1022 if (p->g->setbits == NULL)
1023 p->g->setbits = (uch *)malloc(nbytes);
1024 else {
1025 p->g->setbits = (uch *)realloc((char *)p->g->setbits,
1026 nbytes);
1027 /* xxx this isn't right if setbits is now NULL */
1028 for (i = 0; i < no; i++)
1029 p->g->sets[i].ptr = p->g->setbits + css*(i/CHAR_BIT);
1030 }
1031 if (p->g->sets != NULL && p->g->setbits != NULL)
1032 (void) memset((char *)p->g->setbits + (nbytes - css),
1033 0, css);
1034 else {
1035 no = 0;
1036 SETERROR(REG_ESPACE);
1037 /* caller's responsibility not to do set ops */
1038 }
1039 }
1040
1041 assert(p->g->sets != NULL); /* xxx */
1042 cs = &p->g->sets[no];
1043 cs->ptr = p->g->setbits + css*((no)/CHAR_BIT);
1044 cs->mask = 1 << ((no) % CHAR_BIT);
1045 cs->hash = 0;
1046 cs->smultis = 0;
1047 cs->multis = NULL;
1048
1049 return(cs);
1050 }
1051
1052 /*
1053 - freeset - free a now-unused set
1054 == static void freeset(register struct parse *p, register cset *cs);
1055 */
1056 static void
1057 freeset(p, cs)
1058 register struct parse *p;
1059 register cset *cs;
1060 {
1061 register size_t i;
1062 register cset *top = &p->g->sets[p->g->ncsets];
1063 register size_t css = (size_t)p->g->csetsize;
1064
1065 for (i = 0; i < css; i++)
1066 CHsub(cs, i);
1067 if (cs == top-1) /* recover only the easy case */
1068 p->g->ncsets--;
1069 }
1070
1071 /*
1072 - freezeset - final processing on a set of characters
1073 == static int freezeset(register struct parse *p, register cset *cs);
1074 *
1075 * The main task here is merging identical sets. This is usually a waste
1076 * of time (although the hash code minimizes the overhead), but can win
1077 * big if REG_ICASE is being used. REG_ICASE, by the way, is why the hash
1078 * is done using addition rather than xor -- all ASCII [aA] sets xor to
1079 * the same value!
1080 */
1081 static int /* set number */
1082 freezeset(p, cs)
1083 register struct parse *p;
1084 register cset *cs;
1085 {
1086 register uch h = cs->hash;
1087 register size_t i;
1088 register cset *top = &p->g->sets[p->g->ncsets];
1089 register cset *cs2;
1090 register size_t css = (size_t)p->g->csetsize;
1091
1092 /* look for an earlier one which is the same */
1093 for (cs2 = &p->g->sets[0]; cs2 < top; cs2++)
1094 if (cs2->hash == h && cs2 != cs) {
1095 /* maybe */
1096 for (i = 0; i < css; i++)
1097 if (!!CHIN(cs2, i) != !!CHIN(cs, i))
1098 break; /* no */
1099 if (i == css)
1100 break; /* yes */
1101 }
1102
1103 if (cs2 < top) { /* found one */
1104 freeset(p, cs);
1105 cs = cs2;
1106 }
1107
1108 return((int)(cs - p->g->sets));
1109 }
1110
1111 /*
1112 - firstch - return first character in a set (which must have at least one)
1113 == static int firstch(register struct parse *p, register cset *cs);
1114 */
1115 static int /* character; there is no "none" value */
1116 firstch(p, cs)
1117 register struct parse *p;
1118 register cset *cs;
1119 {
1120 register size_t i;
1121 register size_t css = (size_t)p->g->csetsize;
1122
1123 for (i = 0; i < css; i++)
1124 if (CHIN(cs, i))
1125 return((char)i);
1126 assert(never);
1127 return(0); /* arbitrary */
1128 }
1129
1130 /*
1131 - nch - number of characters in a set
1132 == static int nch(register struct parse *p, register cset *cs);
1133 */
1134 static int
1135 nch(p, cs)
1136 register struct parse *p;
1137 register cset *cs;
1138 {
1139 register size_t i;
1140 register size_t css = (size_t)p->g->csetsize;
1141 register int n = 0;
1142
1143 for (i = 0; i < css; i++)
1144 if (CHIN(cs, i))
1145 n++;
1146 return(n);
1147 }
1148
1149 /*
1150 - mcadd - add a collating element to a cset
1151 == static void mcadd(register struct parse *p, register cset *cs, \
1152 == register char *cp);
1153 */
1154 static void
1155 mcadd(p, cs, cp)
1156 register struct parse *p;
1157 register cset *cs;
1158 register char *cp;
1159 {
1160 register size_t oldend = cs->smultis;
1161
1162 cs->smultis += strlen(cp) + 1;
1163 if (cs->multis == NULL)
1164 cs->multis = malloc(cs->smultis);
1165 else
1166 cs->multis = realloc(cs->multis, cs->smultis);
1167 if (cs->multis == NULL) {
1168 SETERROR(REG_ESPACE);
1169 return;
1170 }
1171
1172 (void) strcpy(cs->multis + oldend - 1, cp);
1173 cs->multis[cs->smultis - 1] = '\0';
1174 }
1175
1176 /* these functions don't seem to be used (yet?), suppress warnings */
1177 #if 0
1178 /*
1179 - mcsub - subtract a collating element from a cset
1180 == static void mcsub(register cset *cs, register char *cp);
1181 */
1182 static void
1183 mcsub(cs, cp)
1184 register cset *cs;
1185 register char *cp;
1186 {
1187 register char *fp = mcfind(cs, cp);
1188 register size_t len = strlen(fp);
1189
1190 assert(fp != NULL);
1191 (void) memmove(fp, fp + len + 1,
1192 cs->smultis - (fp + len + 1 - cs->multis));
1193 cs->smultis -= len;
1194
1195 if (cs->smultis == 0) {
1196 free(cs->multis);
1197 cs->multis = NULL;
1198 return;
1199 }
1200
1201 cs->multis = realloc(cs->multis, cs->smultis);
1202 assert(cs->multis != NULL);
1203 }
1204
1205 /*
1206 - mcin - is a collating element in a cset?
1207 == static int mcin(register cset *cs, register char *cp);
1208 */
1209 static int
1210 mcin(cs, cp)
1211 register cset *cs;
1212 register char *cp;
1213 {
1214 return(mcfind(cs, cp) != NULL);
1215 }
1216
1217 /*
1218 - mcfind - find a collating element in a cset
1219 == static char *mcfind(register cset *cs, register char *cp);
1220 */
1221 static char *
1222 mcfind(cs, cp)
1223 register cset *cs;
1224 register char *cp;
1225 {
1226 register char *p;
1227
1228 if (cs->multis == NULL)
1229 return(NULL);
1230 for (p = cs->multis; *p != '\0'; p += strlen(p) + 1)
1231 if (strcmp(cp, p) == 0)
1232 return(p);
1233 return(NULL);
1234 }
1235 #endif /* 0 */
1236
1237 /*
1238 - mcinvert - invert the list of collating elements in a cset
1239 == static void mcinvert(register struct parse *p, register cset *cs);
1240 *
1241 * This would have to know the set of possibilities. Implementation
1242 * is deferred.
1243 */
1244 static void
1245 mcinvert(p, cs)
1246 register struct parse *p;
1247 register cset *cs;
1248 {
1249 assert(cs->multis == NULL); /* xxx */
1250 }
1251
1252 /*
1253 - mccase - add case counterparts of the list of collating elements in a cset
1254 == static void mccase(register struct parse *p, register cset *cs);
1255 *
1256 * This would have to know the set of possibilities. Implementation
1257 * is deferred.
1258 */
1259 static void
1260 mccase(p, cs)
1261 register struct parse *p;
1262 register cset *cs;
1263 {
1264 assert(cs->multis == NULL); /* xxx */
1265 }
1266
1267 /*
1268 - isinsets - is this character in any sets?
1269 == static int isinsets(register struct re_guts *g, int c);
1270 */
1271 static int /* predicate */
1272 isinsets(g, c)
1273 register struct re_guts *g;
1274 int c;
1275 {
1276 register uch *col;
1277 register int i;
1278 register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
1279 register unsigned uc = (unsigned char)c;
1280
1281 for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
1282 if (col[uc] != 0)
1283 return(1);
1284 return(0);
1285 }
1286
1287 /*
1288 - samesets - are these two characters in exactly the same sets?
1289 == static int samesets(register struct re_guts *g, int c1, int c2);
1290 */
1291 static int /* predicate */
1292 samesets(g, c1, c2)
1293 register struct re_guts *g;
1294 int c1;
1295 int c2;
1296 {
1297 register uch *col;
1298 register int i;
1299 register int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
1300 register unsigned uc1 = (unsigned char)c1;
1301 register unsigned uc2 = (unsigned char)c2;
1302
1303 for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
1304 if (col[uc1] != col[uc2])
1305 return(0);
1306 return(1);
1307 }
1308
1309 /*
1310 - categorize - sort out character categories
1311 == static void categorize(struct parse *p, register struct re_guts *g);
1312 */
1313 static void
1314 categorize(p, g)
1315 struct parse *p;
1316 register struct re_guts *g;
1317 {
1318 register cat_t *cats = g->categories;
1319 register int c;
1320 register int c2;
1321 register cat_t cat;
1322
1323 /* avoid making error situations worse */
1324 if (p->error != 0)
1325 return;
1326
1327 for (c = CHAR_MIN; c <= CHAR_MAX; c++)
1328 if (cats[c] == 0 && isinsets(g, c)) {
1329 cat = g->ncategories++;
1330 cats[c] = cat;
1331 for (c2 = c+1; c2 <= CHAR_MAX; c2++)
1332 if (cats[c2] == 0 && samesets(g, c, c2))
1333 cats[c2] = cat;
1334 }
1335 }
1336
1337 /*
1338 - dupl - emit a duplicate of a bunch of sops
1339 == static sopno dupl(register struct parse *p, sopno start, sopno finish);
1340 */
1341 static sopno /* start of duplicate */
1342 dupl(p, start, finish)
1343 register struct parse *p;
1344 sopno start; /* from here */
1345 sopno finish; /* to this less one */
1346 {
1347 register sopno ret = HERE();
1348 register sopno len = finish - start;
1349
1350 assert(finish >= start);
1351 if (len == 0)
1352 return(ret);
1353 enlarge(p, p->ssize + len); /* this many unexpected additions */
1354 assert(p->ssize >= p->slen + len);
1355 (void) memcpy((char *)(p->strip + p->slen),
1356 (char *)(p->strip + start), (size_t)len*sizeof(sop));
1357 p->slen += len;
1358 return(ret);
1359 }
1360
1361 /*
1362 - doemit - emit a strip operator
1363 == static void doemit(register struct parse *p, sop op, size_t opnd);
1364 *
1365 * It might seem better to implement this as a macro with a function as
1366 * hard-case backup, but it's just too big and messy unless there are
1367 * some changes to the data structures. Maybe later.
1368 */
1369 static void
1370 doemit(p, op, opnd)
1371 register struct parse *p;
1372 sop op;
1373 size_t opnd;
1374 {
1375 /* avoid making error situations worse */
1376 if (p->error != 0)
1377 return;
1378
1379 /* deal with oversize operands ("can't happen", more or less) */
1380 assert(opnd < 1<<OPSHIFT);
1381
1382 /* deal with undersized strip */
1383 if (p->slen >= p->ssize)
1384 enlarge(p, (p->ssize+1) / 2 * 3); /* +50% */
1385 assert(p->slen < p->ssize);
1386
1387 /* finally, it's all reduced to the easy case */
1388 p->strip[p->slen++] = SOP(op, opnd);
1389 }
1390
1391 /*
1392 - doinsert - insert a sop into the strip
1393 == static void doinsert(register struct parse *p, sop op, size_t opnd, sopno pos);
1394 */
1395 static void
1396 doinsert(p, op, opnd, pos)
1397 register struct parse *p;
1398 sop op;
1399 size_t opnd;
1400 sopno pos;
1401 {
1402 register sopno sn;
1403 register sop s;
1404 register int i;
1405
1406 /* avoid making error situations worse */
1407 if (p->error != 0)
1408 return;
1409
1410 sn = HERE();
1411 EMIT(op, opnd); /* do checks, ensure space */
1412 assert(HERE() == sn+1);
1413 s = p->strip[sn];
1414
1415 /* adjust paren pointers */
1416 assert(pos > 0);
1417 for (i = 1; i < NPAREN; i++) {
1418 if (p->pbegin[i] >= pos) {
1419 p->pbegin[i]++;
1420 }
1421 if (p->pend[i] >= pos) {
1422 p->pend[i]++;
1423 }
1424 }
1425
1426 memmove((char *)&p->strip[pos+1], (char *)&p->strip[pos],
1427 (HERE()-pos-1)*sizeof(sop));
1428 p->strip[pos] = s;
1429 }
1430
1431 /*
1432 - dofwd - complete a forward reference
1433 == static void dofwd(register struct parse *p, sopno pos, sop value);
1434 */
1435 static void
1436 dofwd(p, pos, value)
1437 register struct parse *p;
1438 register sopno pos;
1439 sop value;
1440 {
1441 /* avoid making error situations worse */
1442 if (p->error != 0)
1443 return;
1444
1445 assert(value < 1<<OPSHIFT);
1446 p->strip[pos] = OP(p->strip[pos]) | value;
1447 }
1448
1449 /*
1450 - enlarge - enlarge the strip
1451 == static void enlarge(register struct parse *p, sopno size);
1452 */
1453 static void
1454 enlarge(p, size)
1455 register struct parse *p;
1456 register sopno size;
1457 {
1458 register sop *sp;
1459
1460 if (p->ssize >= size)
1461 return;
1462
1463 sp = (sop *)realloc(p->strip, size*sizeof(sop));
1464 if (sp == NULL) {
1465 SETERROR(REG_ESPACE);
1466 return;
1467 }
1468 p->strip = sp;
1469 p->ssize = size;
1470 }
1471
1472 /*
1473 - stripsnug - compact the strip
1474 == static void stripsnug(register struct parse *p, register struct re_guts *g);
1475 */
1476 static void
1477 stripsnug(p, g)
1478 register struct parse *p;
1479 register struct re_guts *g;
1480 {
1481 g->nstates = p->slen;
1482 g->strip = (sop *)realloc((char *)p->strip, p->slen * sizeof(sop));
1483 if (g->strip == NULL) {
1484 SETERROR(REG_ESPACE);
1485 g->strip = p->strip;
1486 }
1487 }
1488
1489 /*
1490 - findmust - fill in must and mlen with longest mandatory literal string
1491 == static void findmust(register struct parse *p, register struct re_guts *g);
1492 *
1493 * This algorithm could do fancy things like analyzing the operands of |
1494 * for common subsequences. Someday. This code is simple and finds most
1495 * of the interesting cases.
1496 *
1497 * Note that must and mlen got initialized during setup.
1498 */
1499 static void
1500 findmust(p, g)
1501 struct parse *p;
1502 register struct re_guts *g;
1503 {
1504 register sop *scan;
1505 sop *start;
1506 register sop *newstart;
1507 register sopno newlen;
1508 register sop s;
1509 register char *cp;
1510 register sopno i;
1511
1512 /* avoid making error situations worse */
1513 if (p->error != 0)
1514 return;
1515
1516 /* find the longest OCHAR sequence in strip */
1517 newlen = 0;
1518 scan = g->strip + 1;
1519 do {
1520 s = *scan++;
1521 switch (OP(s)) {
1522 case OCHAR: /* sequence member */
1523 if (newlen == 0) /* new sequence */
1524 newstart = scan - 1;
1525 newlen++;
1526 break;
1527 case OPLUS_: /* things that don't break one */
1528 case OLPAREN:
1529 case ORPAREN:
1530 break;
1531 case OQUEST_: /* things that must be skipped */
1532 case OCH_:
1533 scan--;
1534 do {
1535 scan += OPND(s);
1536 s = *scan;
1537 /* assert() interferes w debug printouts */
1538 if (OP(s) != O_QUEST && OP(s) != O_CH &&
1539 OP(s) != OOR2) {
1540 g->iflags |= BAD;
1541 return;
1542 }
1543 } while (OP(s) != O_QUEST && OP(s) != O_CH);
1544 /* fallthrough */
1545 default: /* things that break a sequence */
1546 if (newlen > g->mlen) { /* ends one */
1547 start = newstart;
1548 g->mlen = newlen;
1549 }
1550 newlen = 0;
1551 break;
1552 }
1553 } while (OP(s) != OEND);
1554
1555 if (g->mlen == 0) /* there isn't one */
1556 return;
1557
1558 /* turn it into a character string */
1559 g->must = malloc((size_t)g->mlen + 1);
1560 if (g->must == NULL) { /* argh; just forget it */
1561 g->mlen = 0;
1562 return;
1563 }
1564 cp = g->must;
1565 scan = start;
1566 for (i = g->mlen; i > 0; i--) {
1567 while (OP(s = *scan++) != OCHAR)
1568 continue;
1569 assert(cp < g->must + g->mlen);
1570 *cp++ = (char)OPND(s);
1571 }
1572 assert(cp == g->must + g->mlen);
1573 *cp++ = '\0'; /* just on general principles */
1574 }
1575
1576 /*
1577 - pluscount - count + nesting
1578 == static sopno pluscount(register struct parse *p, register struct re_guts *g);
1579 */
1580 static sopno /* nesting depth */
1581 pluscount(p, g)
1582 struct parse *p;
1583 register struct re_guts *g;
1584 {
1585 register sop *scan;
1586 register sop s;
1587 register sopno plusnest = 0;
1588 register sopno maxnest = 0;
1589
1590 if (p->error != 0)
1591 return(0); /* there may not be an OEND */
1592
1593 scan = g->strip + 1;
1594 do {
1595 s = *scan++;
1596 switch (OP(s)) {
1597 case OPLUS_:
1598 plusnest++;
1599 break;
1600 case O_PLUS:
1601 if (plusnest > maxnest)
1602 maxnest = plusnest;
1603 plusnest--;
1604 break;
1605 }
1606 } while (OP(s) != OEND);
1607 if (plusnest != 0)
1608 g->iflags |= BAD;
1609 return(maxnest);
1610 }