]> git.saurik.com Git - apple/shell_cmds.git/blob - window/scanner.c
ea0ae55ccc99fc6a98a2ad44ed9132dda31d1a96
[apple/shell_cmds.git] / window / scanner.c
1 /* $NetBSD: scanner.c,v 1.5 1998/08/25 20:59:43 ross Exp $ */
2
3 /*
4 * Copyright (c) 1983, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Edward Wang at The University of California, Berkeley.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 */
38
39 #include <sys/cdefs.h>
40 #ifndef lint
41 #if 0
42 static char sccsid[] = "@(#)scanner.c 8.1 (Berkeley) 6/6/93";
43 #else
44 __RCSID("$NetBSD: scanner.c,v 1.5 1998/08/25 20:59:43 ross Exp $");
45 #endif
46 #endif /* not lint */
47
48 #include "defs.h"
49 #include "token.h"
50 #include "context.h"
51 #include "window_string.h"
52
53 int s_getc __P((void));
54 int s_gettok1 __P((void));
55 int s_ungetc __P((int));
56
57 int
58 s_getc()
59 {
60 int c;
61
62 switch (cx.x_type) {
63 case X_FILE:
64 c = getc(cx.x_fp);
65 if (cx.x_bol && c != EOF) {
66 cx.x_bol = 0;
67 cx.x_lineno++;
68 }
69 if (c == '\n')
70 cx.x_bol = 1;
71 return c;
72 case X_BUF:
73 if (*cx.x_bufp != 0)
74 return *cx.x_bufp++ & 0xff;
75 else
76 return EOF;
77 }
78 /*NOTREACHED*/
79 return(0); /* XXX: placate gcc */
80 }
81
82 int
83 s_ungetc(c)
84 int c;
85 {
86 if (c == EOF)
87 return EOF;
88 switch (cx.x_type) {
89 case X_FILE:
90 cx.x_bol = 0;
91 return ungetc(c, cx.x_fp);
92 case X_BUF:
93 if (cx.x_bufp > cx.x_buf)
94 return *--cx.x_bufp = c;
95 else
96 return EOF;
97 }
98 /*NOTREACHED*/
99 return(0); /* XXX: placate gcc */
100 }
101
102 int
103 s_gettok()
104 {
105 char buf[100];
106 char *p = buf;
107 int c;
108 int state = 0;
109
110 loop:
111 c = s_getc();
112 switch (state) {
113 case 0:
114 switch (c) {
115 case ' ':
116 case '\t':
117 break;
118 case '\n':
119 case ';':
120 cx.x_token = T_EOL;
121 state = -1;
122 break;
123 case '#':
124 state = 1;
125 break;
126 case EOF:
127 cx.x_token = T_EOF;
128 state = -1;
129 break;
130 case 'a': case 'b': case 'c': case 'd': case 'e':
131 case 'f': case 'g': case 'h': case 'i': case 'j':
132 case 'k': case 'l': case 'm': case 'n': case 'o':
133 case 'p': case 'q': case 'r': case 's': case 't':
134 case 'u': case 'v': case 'w': case 'x': case 'y':
135 case 'z':
136 case 'A': case 'B': case 'C': case 'D': case 'E':
137 case 'F': case 'G': case 'H': case 'I': case 'J':
138 case 'K': case 'L': case 'M': case 'N': case 'O':
139 case 'P': case 'Q': case 'R': case 'S': case 'T':
140 case 'U': case 'V': case 'W': case 'X': case 'Y':
141 case 'Z':
142 case '_': case '.':
143 *p++ = c;
144 state = 2;
145 break;
146 case '"':
147 state = 3;
148 break;
149 case '\'':
150 state = 4;
151 break;
152 case '\\':
153 switch (c = s_gettok1()) {
154 case -1:
155 break;
156 case -2:
157 state = 0;
158 break;
159 default:
160 *p++ = c;
161 state = 2;
162 }
163 break;
164 case '0':
165 cx.x_val.v_num = 0;
166 state = 10;
167 break;
168 case '1': case '2': case '3': case '4':
169 case '5': case '6': case '7': case '8': case '9':
170 cx.x_val.v_num = c - '0';
171 state = 11;
172 break;
173 case '>':
174 state = 20;
175 break;
176 case '<':
177 state = 21;
178 break;
179 case '=':
180 state = 22;
181 break;
182 case '!':
183 state = 23;
184 break;
185 case '&':
186 state = 24;
187 break;
188 case '|':
189 state = 25;
190 break;
191 case '$':
192 state = 26;
193 break;
194 case '~':
195 cx.x_token = T_COMP;
196 state = -1;
197 break;
198 case '+':
199 cx.x_token = T_PLUS;
200 state = -1;
201 break;
202 case '-':
203 cx.x_token = T_MINUS;
204 state = -1;
205 break;
206 case '*':
207 cx.x_token = T_MUL;
208 state = -1;
209 break;
210 case '/':
211 cx.x_token = T_DIV;
212 state = -1;
213 break;
214 case '%':
215 cx.x_token = T_MOD;
216 state = -1;
217 break;
218 case '^':
219 cx.x_token = T_XOR;
220 state = -1;
221 break;
222 case '(':
223 cx.x_token = T_LP;
224 state = -1;
225 break;
226 case ')':
227 cx.x_token = T_RP;
228 state = -1;
229 break;
230 case ',':
231 cx.x_token = T_COMMA;
232 state = -1;
233 break;
234 case '?':
235 cx.x_token = T_QUEST;
236 state = -1;
237 break;
238 case ':':
239 cx.x_token = T_COLON;
240 state = -1;
241 break;
242 case '[':
243 cx.x_token = T_LB;
244 state = -1;
245 break;
246 case ']':
247 cx.x_token = T_RB;
248 state = -1;
249 break;
250 default:
251 cx.x_val.v_num = c;
252 cx.x_token = T_CHAR;
253 state = -1;
254 break;
255 }
256 break;
257 case 1: /* got # */
258 if (c == '\n' || c == EOF) {
259 (void) s_ungetc(c);
260 state = 0;
261 }
262 break;
263 case 2: /* unquoted string */
264 switch (c) {
265 case 'a': case 'b': case 'c': case 'd': case 'e':
266 case 'f': case 'g': case 'h': case 'i': case 'j':
267 case 'k': case 'l': case 'm': case 'n': case 'o':
268 case 'p': case 'q': case 'r': case 's': case 't':
269 case 'u': case 'v': case 'w': case 'x': case 'y':
270 case 'z':
271 case 'A': case 'B': case 'C': case 'D': case 'E':
272 case 'F': case 'G': case 'H': case 'I': case 'J':
273 case 'K': case 'L': case 'M': case 'N': case 'O':
274 case 'P': case 'Q': case 'R': case 'S': case 'T':
275 case 'U': case 'V': case 'W': case 'X': case 'Y':
276 case 'Z':
277 case '_': case '.':
278 case '0': case '1': case '2': case '3': case '4':
279 case '5': case '6': case '7': case '8': case '9':
280 if (p < buf + sizeof buf - 1)
281 *p++ = c;
282 break;
283 case '"':
284 state = 3;
285 break;
286 case '\'':
287 state = 4;
288 break;
289 case '\\':
290 switch (c = s_gettok1()) {
291 case -2:
292 (void) s_ungetc(' ');
293 case -1:
294 break;
295 default:
296 if (p < buf + sizeof buf - 1)
297 *p++ = c;
298 }
299 break;
300 default:
301 (void) s_ungetc(c);
302 case EOF:
303 *p = 0;
304 cx.x_token = T_STR;
305 switch (*buf) {
306 case 'i':
307 if (buf[1] == 'f' && buf[2] == 0)
308 cx.x_token = T_IF;
309 break;
310 case 't':
311 if (buf[1] == 'h' && buf[2] == 'e'
312 && buf[3] == 'n' && buf[4] == 0)
313 cx.x_token = T_THEN;
314 break;
315 case 'e':
316 if (buf[1] == 'n' && buf[2] == 'd'
317 && buf[3] == 'i' && buf[4] == 'f'
318 && buf[5] == 0)
319 cx.x_token = T_ENDIF;
320 else {
321 if (buf[1] == 'l' && buf[2] == 's') {
322 if (buf[3] == 'i'
323 && buf[4] == 'f'
324 && buf[5] == 0)
325 cx.x_token = T_ELSIF;
326 else {
327 if (buf[3] == 'e'
328 && buf[4] == 0)
329 cx.x_token =
330 T_ELSE;
331 }
332 }
333 }
334 break;
335 }
336 if (cx.x_token == T_STR
337 && (cx.x_val.v_str = str_cpy(buf)) == 0) {
338 p_memerror();
339 cx.x_token = T_EOF;
340 }
341 state = -1;
342 break;
343 }
344 break;
345 case 3: /* " quoted string */
346 switch (c) {
347 case '\n':
348 (void) s_ungetc(c);
349 case EOF:
350 case '"':
351 state = 2;
352 break;
353 case '\\':
354 switch (c = s_gettok1()) {
355 case -1:
356 case -2: /* newlines are invisible */
357 break;
358 default:
359 if (p < buf + sizeof buf - 1)
360 *p++ = c;
361 }
362 break;
363 default:
364 if (p < buf + sizeof buf - 1)
365 *p++ = c;
366 break;
367 }
368 break;
369 case 4: /* ' quoted string */
370 switch (c) {
371 case '\n':
372 (void) s_ungetc(c);
373 case EOF:
374 case '\'':
375 state = 2;
376 break;
377 case '\\':
378 switch (c = s_gettok1()) {
379 case -1:
380 case -2: /* newlines are invisible */
381 break;
382 default:
383 if (p < buf + sizeof buf - 1)
384 *p++ = c;
385 }
386 break;
387 default:
388 if (p < buf + sizeof buf - 1)
389 *p++ = c;
390 break;
391 }
392 break;
393 case 10: /* got 0 */
394 switch (c) {
395 case 'x':
396 case 'X':
397 cx.x_val.v_num = 0;
398 state = 12;
399 break;
400 case '0': case '1': case '2': case '3': case '4':
401 case '5': case '6': case '7':
402 cx.x_val.v_num = c - '0';
403 state = 13;
404 break;
405 case '8': case '9':
406 cx.x_val.v_num = c - '0';
407 state = 11;
408 break;
409 default:
410 (void) s_ungetc(c);
411 state = -1;
412 cx.x_token = T_NUM;
413 }
414 break;
415 case 11: /* decimal number */
416 switch (c) {
417 case '0': case '1': case '2': case '3': case '4':
418 case '5': case '6': case '7': case '8': case '9':
419 cx.x_val.v_num = cx.x_val.v_num * 10 + c - '0';
420 break;
421 default:
422 (void) s_ungetc(c);
423 state = -1;
424 cx.x_token = T_NUM;
425 }
426 break;
427 case 12: /* hex number */
428 switch (c) {
429 case '0': case '1': case '2': case '3': case '4':
430 case '5': case '6': case '7': case '8': case '9':
431 cx.x_val.v_num = cx.x_val.v_num * 16 + c - '0';
432 break;
433 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
434 cx.x_val.v_num = cx.x_val.v_num * 16 + c - 'a' + 10;
435 break;
436 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
437 cx.x_val.v_num = cx.x_val.v_num * 16 + c - 'A' + 10;
438 break;
439 default:
440 (void) s_ungetc(c);
441 state = -1;
442 cx.x_token = T_NUM;
443 }
444 break;
445 case 13: /* octal number */
446 switch (c) {
447 case '0': case '1': case '2': case '3': case '4':
448 case '5': case '6': case '7':
449 cx.x_val.v_num = cx.x_val.v_num * 8 + c - '0';
450 break;
451 default:
452 (void) s_ungetc(c);
453 state = -1;
454 cx.x_token = T_NUM;
455 }
456 break;
457 case 20: /* got > */
458 switch (c) {
459 case '=':
460 cx.x_token = T_GE;
461 state = -1;
462 break;
463 case '>':
464 cx.x_token = T_RS;
465 state = -1;
466 break;
467 default:
468 (void) s_ungetc(c);
469 cx.x_token = T_GT;
470 state = -1;
471 }
472 break;
473 case 21: /* got < */
474 switch (c) {
475 case '=':
476 cx.x_token = T_LE;
477 state = -1;
478 break;
479 case '<':
480 cx.x_token = T_LS;
481 state = -1;
482 break;
483 default:
484 (void) s_ungetc(c);
485 cx.x_token = T_LT;
486 state = -1;
487 }
488 break;
489 case 22: /* got = */
490 switch (c) {
491 case '=':
492 cx.x_token = T_EQ;
493 state = -1;
494 break;
495 default:
496 (void) s_ungetc(c);
497 cx.x_token = T_ASSIGN;
498 state = -1;
499 }
500 break;
501 case 23: /* got ! */
502 switch (c) {
503 case '=':
504 cx.x_token = T_NE;
505 state = -1;
506 break;
507 default:
508 (void) s_ungetc(c);
509 cx.x_token = T_NOT;
510 state = -1;
511 }
512 break;
513 case 24: /* got & */
514 switch (c) {
515 case '&':
516 cx.x_token = T_ANDAND;
517 state = -1;
518 break;
519 default:
520 (void) s_ungetc(c);
521 cx.x_token = T_AND;
522 state = -1;
523 }
524 break;
525 case 25: /* got | */
526 switch (c) {
527 case '|':
528 cx.x_token = T_OROR;
529 state = -1;
530 break;
531 default:
532 (void) s_ungetc(c);
533 cx.x_token = T_OR;
534 state = -1;
535 }
536 break;
537 case 26: /* got $ */
538 switch (c) {
539 case '?':
540 cx.x_token = T_DQ;
541 state = -1;
542 break;
543 default:
544 (void) s_ungetc(c);
545 cx.x_token = T_DOLLAR;
546 state = -1;
547 }
548 break;
549 default:
550 abort();
551 }
552 if (state >= 0)
553 goto loop;
554 return cx.x_token;
555 }
556
557 int
558 s_gettok1()
559 {
560 int c;
561 int n;
562
563 c = s_getc(); /* got \ */
564 switch (c) {
565 case EOF:
566 return -1;
567 case '\n':
568 return -2;
569 case 'b':
570 return '\b';
571 case 'f':
572 return '\f';
573 case 'n':
574 return '\n';
575 case 'r':
576 return '\r';
577 case 't':
578 return '\t';
579 default:
580 return c;
581 case '0': case '1': case '2': case '3': case '4':
582 case '5': case '6': case '7':
583 break;
584 }
585 n = c - '0';
586 c = s_getc(); /* got \[0-7] */
587 if (c < '0' || c > '7') {
588 (void) s_ungetc(c);
589 return n;
590 }
591 n = n * 8 + c - '0';
592 c = s_getc(); /* got \[0-7][0-7] */
593 if (c < '0' || c > '7') {
594 (void) s_ungetc(c);
595 return n;
596 }
597 return n * 8 + c - '0';
598 }