| 1 | /* |
| 2 | * Utility functions for handling cvecs |
| 3 | * This file is #included by regcomp.c. |
| 4 | * |
| 5 | * Copyright (c) 1998, 1999 Henry Spencer. All rights reserved. |
| 6 | * |
| 7 | * Development of this software was funded, in part, by Cray Research Inc., |
| 8 | * UUNET Communications Services Inc., Sun Microsystems Inc., and Scriptics |
| 9 | * Corporation, none of whom are responsible for the results. The author |
| 10 | * thanks all of them. |
| 11 | * |
| 12 | * Redistribution and use in source and binary forms -- with or without |
| 13 | * modification -- are permitted for any purpose, provided that |
| 14 | * redistributions in source form retain this entire copyright notice and |
| 15 | * indicate the origin and nature of any modifications. |
| 16 | * |
| 17 | * I'd appreciate being given credit for this package in the documentation |
| 18 | * of software which uses it, but that is not a requirement. |
| 19 | * |
| 20 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, |
| 21 | * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY |
| 22 | * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL |
| 23 | * HENRY SPENCER BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| 24 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| 25 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; |
| 26 | * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, |
| 27 | * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR |
| 28 | * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF |
| 29 | * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 30 | * |
| 31 | */ |
| 32 | |
| 33 | /* |
| 34 | - newcvec - allocate a new cvec |
| 35 | ^ static struct cvec *newcvec(int, int, int); |
| 36 | */ |
| 37 | static struct cvec * |
| 38 | newcvec(nchrs, nranges, nmcces) |
| 39 | int nchrs; /* to hold this many chrs... */ |
| 40 | int nranges; /* ... and this many ranges... */ |
| 41 | int nmcces; /* ... and this many MCCEs */ |
| 42 | { |
| 43 | size_t n; |
| 44 | size_t nc; |
| 45 | struct cvec *cv; |
| 46 | |
| 47 | nc = (size_t)nchrs + (size_t)nmcces*(MAXMCCE+1) + (size_t)nranges*2; |
| 48 | n = sizeof(struct cvec) + (size_t)(nmcces-1)*sizeof(chr *) |
| 49 | + nc*sizeof(chr); |
| 50 | cv = (struct cvec *)MALLOC(n); |
| 51 | if (cv == NULL) { |
| 52 | return NULL; |
| 53 | } |
| 54 | cv->chrspace = nchrs; |
| 55 | cv->chrs = (chr *)&cv->mcces[nmcces]; /* chrs just after MCCE ptrs */ |
| 56 | cv->mccespace = nmcces; |
| 57 | cv->ranges = cv->chrs + nchrs + nmcces*(MAXMCCE+1); |
| 58 | cv->rangespace = nranges; |
| 59 | return clearcvec(cv); |
| 60 | } |
| 61 | |
| 62 | /* |
| 63 | - clearcvec - clear a possibly-new cvec |
| 64 | * Returns pointer as convenience. |
| 65 | ^ static struct cvec *clearcvec(struct cvec *); |
| 66 | */ |
| 67 | static struct cvec * |
| 68 | clearcvec(cv) |
| 69 | struct cvec *cv; /* character vector */ |
| 70 | { |
| 71 | int i; |
| 72 | |
| 73 | assert(cv != NULL); |
| 74 | cv->nchrs = 0; |
| 75 | assert(cv->chrs == (chr *)&cv->mcces[cv->mccespace]); |
| 76 | cv->nmcces = 0; |
| 77 | cv->nmccechrs = 0; |
| 78 | cv->nranges = 0; |
| 79 | for (i = 0; i < cv->mccespace; i++) { |
| 80 | cv->mcces[i] = NULL; |
| 81 | } |
| 82 | |
| 83 | return cv; |
| 84 | } |
| 85 | |
| 86 | /* |
| 87 | - addchr - add a chr to a cvec |
| 88 | ^ static VOID addchr(struct cvec *, pchr); |
| 89 | */ |
| 90 | static VOID |
| 91 | addchr(cv, c) |
| 92 | struct cvec *cv; /* character vector */ |
| 93 | pchr c; /* character to add */ |
| 94 | { |
| 95 | assert(cv->nchrs < cv->chrspace - cv->nmccechrs); |
| 96 | cv->chrs[cv->nchrs++] = (chr)c; |
| 97 | } |
| 98 | |
| 99 | /* |
| 100 | - addrange - add a range to a cvec |
| 101 | ^ static VOID addrange(struct cvec *, pchr, pchr); |
| 102 | */ |
| 103 | static VOID |
| 104 | addrange(cv, from, to) |
| 105 | struct cvec *cv; /* character vector */ |
| 106 | pchr from; /* first character of range */ |
| 107 | pchr to; /* last character of range */ |
| 108 | { |
| 109 | assert(cv->nranges < cv->rangespace); |
| 110 | cv->ranges[cv->nranges*2] = (chr)from; |
| 111 | cv->ranges[cv->nranges*2 + 1] = (chr)to; |
| 112 | cv->nranges++; |
| 113 | } |
| 114 | |
| 115 | /* |
| 116 | - addmcce - add an MCCE to a cvec |
| 117 | ^ static VOID addmcce(struct cvec *, chr *, chr *); |
| 118 | */ |
| 119 | static VOID |
| 120 | addmcce(cv, startp, endp) |
| 121 | struct cvec *cv; /* character vector */ |
| 122 | chr *startp; /* beginning of text */ |
| 123 | chr *endp; /* just past end of text */ |
| 124 | { |
| 125 | int len; |
| 126 | int i; |
| 127 | chr *s; |
| 128 | chr *d; |
| 129 | |
| 130 | if (startp == NULL && endp == NULL) { |
| 131 | return; |
| 132 | } |
| 133 | len = endp - startp; |
| 134 | assert(len > 0); |
| 135 | assert(cv->nchrs + len < cv->chrspace - cv->nmccechrs); |
| 136 | assert(cv->nmcces < cv->mccespace); |
| 137 | d = &cv->chrs[cv->chrspace - cv->nmccechrs - len - 1]; |
| 138 | cv->mcces[cv->nmcces++] = d; |
| 139 | for (s = startp, i = len; i > 0; s++, i--) { |
| 140 | *d++ = *s; |
| 141 | } |
| 142 | *d++ = 0; /* endmarker */ |
| 143 | assert(d == &cv->chrs[cv->chrspace - cv->nmccechrs]); |
| 144 | cv->nmccechrs += len + 1; |
| 145 | } |
| 146 | |
| 147 | /* |
| 148 | - haschr - does a cvec contain this chr? |
| 149 | ^ static int haschr(struct cvec *, pchr); |
| 150 | */ |
| 151 | static int /* predicate */ |
| 152 | haschr(cv, c) |
| 153 | struct cvec *cv; /* character vector */ |
| 154 | pchr c; /* character to test for */ |
| 155 | { |
| 156 | int i; |
| 157 | chr *p; |
| 158 | |
| 159 | for (p = cv->chrs, i = cv->nchrs; i > 0; p++, i--) { |
| 160 | if (*p == c) { |
| 161 | return 1; |
| 162 | } |
| 163 | } |
| 164 | for (p = cv->ranges, i = cv->nranges; i > 0; p += 2, i--) { |
| 165 | if ((*p <= c) && (c <= *(p+1))) { |
| 166 | return 1; |
| 167 | } |
| 168 | } |
| 169 | return 0; |
| 170 | } |
| 171 | |
| 172 | /* |
| 173 | - getcvec - get a cvec, remembering it as v->cv |
| 174 | ^ static struct cvec *getcvec(struct vars *, int, int, int); |
| 175 | */ |
| 176 | static struct cvec * |
| 177 | getcvec(v, nchrs, nranges, nmcces) |
| 178 | struct vars *v; /* context */ |
| 179 | int nchrs; /* to hold this many chrs... */ |
| 180 | int nranges; /* ... and this many ranges... */ |
| 181 | int nmcces; /* ... and this many MCCEs */ |
| 182 | { |
| 183 | if (v->cv != NULL && nchrs <= v->cv->chrspace && |
| 184 | nranges <= v->cv->rangespace && nmcces <= v->cv->mccespace) { |
| 185 | return clearcvec(v->cv); |
| 186 | } |
| 187 | |
| 188 | if (v->cv != NULL) { |
| 189 | freecvec(v->cv); |
| 190 | } |
| 191 | v->cv = newcvec(nchrs, nranges, nmcces); |
| 192 | if (v->cv == NULL) { |
| 193 | ERR(REG_ESPACE); |
| 194 | } |
| 195 | |
| 196 | return v->cv; |
| 197 | } |
| 198 | |
| 199 | /* |
| 200 | - freecvec - free a cvec |
| 201 | ^ static VOID freecvec(struct cvec *); |
| 202 | */ |
| 203 | static VOID |
| 204 | freecvec(cv) |
| 205 | struct cvec *cv; /* character vector */ |
| 206 | { |
| 207 | FREE(cv); |
| 208 | } |