]> git.saurik.com Git - bison.git/blame - src/reader.c
Added <unistd.h> for unlink().
[bison.git] / src / reader.c
CommitLineData
1ff442ca 1/* Input parser for bison
74a53b4b 2 Copyright (C) 1984, 1986, 1989, 1992, 1998 Free Software Foundation, Inc.
1ff442ca
NF
3
4This file is part of Bison, the GNU Compiler Compiler.
5
6Bison is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
8the Free Software Foundation; either version 2, or (at your option)
9any later version.
10
11Bison is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with Bison; see the file COPYING. If not, write to
c49a8e71
JT
18the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19Boston, MA 02111-1307, USA. */
1ff442ca
NF
20
21
22/* read in the grammar specification and record it in the format described in gram.h.
23 All guards are copied into the fguard file and all actions into faction,
24 in each case forming the body of a C function (yyguard or yyaction)
25 which contains a switch statement to decide which guard or action to execute.
26
27The entry point is reader(). */
28
29#include <stdio.h>
1ff442ca
NF
30#include "system.h"
31#include "files.h"
7612000c 32#include "alloc.h"
1ff442ca
NF
33#include "symtab.h"
34#include "lex.h"
35#include "gram.h"
36#include "machine.h"
37
38#define LTYPESTR "\n#ifndef YYLTYPE\ntypedef\n struct yyltype\n\
39 {\n int timestamp;\n int first_line;\n int first_column;\
40\n int last_line;\n int last_column;\n char *text;\n }\n\
41 yyltype;\n\n#define YYLTYPE yyltype\n#endif\n\n"
42
43/* Number of slots allocated (but not necessarily used yet) in `rline' */
44int rline_allocated;
45
46extern char *program_name;
47extern int definesflag;
48extern int nolinesflag;
943819bf
RS
49extern int noparserflag;
50extern int rawtoknumflag;
1ff442ca
NF
51extern bucket *symval;
52extern int numval;
1ff442ca
NF
53extern int expected_conflicts;
54extern char *token_buffer;
118fb205
JT
55extern int maxtoken;
56
57extern void init_lex PARAMS((void));
58extern char *grow_token_buffer PARAMS((char *));
59extern void tabinit PARAMS((void));
60extern void output_headers PARAMS((void));
61extern void output_trailers PARAMS((void));
62extern void free_symtab PARAMS((void));
63extern void open_extra_files PARAMS((void));
64extern char *int_to_string PARAMS((int));
65extern char *printable_version PARAMS((int));
66extern void fatal PARAMS((char *));
67extern void fatals PARAMS((char *, char *));
68extern void warn PARAMS((char *));
69extern void warni PARAMS((char *, int));
70extern void warns PARAMS((char *, char *));
71extern void warnss PARAMS((char *, char *, char *));
72extern void warnsss PARAMS((char *, char *, char *, char *));
73extern void unlex PARAMS((int));
74extern void done PARAMS((int));
75
76extern int skip_white_space PARAMS((void));
77extern int parse_percent_token PARAMS((void));
78extern int lex PARAMS((void));
1ff442ca
NF
79
80typedef
81 struct symbol_list
82 {
83 struct symbol_list *next;
84 bucket *sym;
85 bucket *ruleprec;
86 }
87 symbol_list;
88
89
118fb205
JT
90void reader PARAMS((void));
91void reader_output_yylsp PARAMS((FILE *));
92void read_declarations PARAMS((void));
93void copy_definition PARAMS((void));
94void parse_token_decl PARAMS((int, int));
95void parse_start_decl PARAMS((void));
96void parse_type_decl PARAMS((void));
97void parse_assoc_decl PARAMS((int));
98void parse_union_decl PARAMS((void));
99void parse_expect_decl PARAMS((void));
100char *get_type_name PARAMS((int, symbol_list *));
101void copy_guard PARAMS((symbol_list *, int));
102void parse_thong_decl PARAMS((void));
103void copy_action PARAMS((symbol_list *, int));
104bucket *gensym PARAMS((void));
105void readgram PARAMS((void));
106void record_rule_line PARAMS((void));
107void packsymbols PARAMS((void));
108void output_token_defines PARAMS((FILE *));
109void packgram PARAMS((void));
110int read_signed_integer PARAMS((FILE *));
118fb205 111
2686a6e7
JT
112#if 0
113static int get_type PARAMS((void));
114#endif
1ff442ca
NF
115
116int lineno;
117symbol_list *grammar;
118int start_flag;
119bucket *startval;
120char **tags;
943819bf 121int *user_toknums;
1ff442ca
NF
122
123/* Nonzero if components of semantic values are used, implying
124 they must be unions. */
125static int value_components_used;
126
127static int typed; /* nonzero if %union has been seen. */
128
129static int lastprec; /* incremented for each %left, %right or %nonassoc seen */
130
131static int gensym_count; /* incremented for each generated symbol */
132
133static bucket *errtoken;
5b2e3c89 134static bucket *undeftoken;
1ff442ca
NF
135
136/* Nonzero if any action or guard uses the @n construct. */
137static int yylsp_needed;
138
139extern char *version_string;
140
943819bf
RS
141
142static void
118fb205 143skip_to_char (int target)
943819bf
RS
144{
145 int c;
146 if (target == '\n')
a083fbbf 147 warn(_(" Skipping to next \\n"));
943819bf 148 else
a083fbbf 149 warni(_(" Skipping to next %c"), target);
943819bf
RS
150
151 do
152 c = skip_white_space();
153 while (c != target && c != EOF);
a083fbbf 154 if (c != EOF)
943819bf
RS
155 ungetc(c, finput);
156}
157
158
1ff442ca 159void
118fb205 160reader (void)
1ff442ca
NF
161{
162 start_flag = 0;
163 startval = NULL; /* start symbol not specified yet. */
164
165#if 0
166 translations = 0; /* initially assume token number translation not needed. */
167#endif
168 /* Nowadays translations is always set to 1,
169 since we give `error' a user-token-number
170 to satisfy the Posix demand for YYERRCODE==256. */
171 translations = 1;
172
173 nsyms = 1;
174 nvars = 0;
175 nrules = 0;
176 nitems = 0;
177 rline_allocated = 10;
178 rline = NEW2(rline_allocated, short);
179
180 typed = 0;
181 lastprec = 0;
182
183 gensym_count = 0;
184
185 semantic_parser = 0;
186 pure_parser = 0;
187 yylsp_needed = 0;
188
189 grammar = NULL;
190
191 init_lex();
192 lineno = 1;
193
194 /* initialize the symbol table. */
195 tabinit();
196 /* construct the error token */
197 errtoken = getsym("error");
198 errtoken->class = STOKEN;
199 errtoken->user_token_number = 256; /* Value specified by posix. */
200 /* construct a token that represents all undefined literal tokens. */
201 /* it is always token number 2. */
5b2e3c89
JT
202 undeftoken = getsym("$undefined.");
203 undeftoken->class = STOKEN;
204 undeftoken->user_token_number = 2;
1ff442ca
NF
205 /* Read the declaration section. Copy %{ ... %} groups to ftable and fdefines file.
206 Also notice any %token, %left, etc. found there. */
a083fbbf 207 if (noparserflag)
943819bf
RS
208 fprintf(ftable, "\n/* Bison-generated parse tables, made from %s\n",
209 infile);
210 else
211 fprintf(ftable, "\n/* A Bison parser, made from %s\n", infile);
212 fprintf(ftable, " by %s */\n\n", version_string);
1ff442ca
NF
213 fprintf(ftable, "#define YYBISON 1 /* Identify Bison output. */\n\n");
214 read_declarations();
1ff442ca
NF
215 /* start writing the guard and action files, if they are needed. */
216 output_headers();
217 /* read in the grammar, build grammar in list form. write out guards and actions. */
218 readgram();
219 /* Now we know whether we need the line-number stack.
220 If we do, write its type into the .tab.h file. */
943819bf
RS
221 if (fdefines)
222 reader_output_yylsp(fdefines);
1ff442ca
NF
223 /* write closing delimiters for actions and guards. */
224 output_trailers();
225 if (yylsp_needed)
226 fprintf(ftable, "#define YYLSP_NEEDED\n\n");
227 /* assign the symbols their symbol numbers.
228 Write #defines for the token symbols into fdefines if requested. */
229 packsymbols();
230 /* convert the grammar into the format described in gram.h. */
231 packgram();
232 /* free the symbol table data structure
233 since symbols are now all referred to by symbol number. */
234 free_symtab();
235}
236
943819bf 237void
118fb205 238reader_output_yylsp (FILE *f)
943819bf
RS
239{
240 if (yylsp_needed)
241 fprintf(f, LTYPESTR);
242}
1ff442ca
NF
243
244/* read from finput until %% is seen. Discard the %%.
245Handle any % declarations,
246and copy the contents of any %{ ... %} groups to fattrs. */
247
248void
118fb205 249read_declarations (void)
1ff442ca
NF
250{
251 register int c;
252 register int tok;
253
254 for (;;)
255 {
256 c = skip_white_space();
257
258 if (c == '%')
259 {
260 tok = parse_percent_token();
261
262 switch (tok)
263 {
264 case TWO_PERCENTS:
265 return;
266
267 case PERCENT_LEFT_CURLY:
268 copy_definition();
269 break;
270
271 case TOKEN:
272 parse_token_decl (STOKEN, SNTERM);
273 break;
a083fbbf 274
1ff442ca
NF
275 case NTERM:
276 parse_token_decl (SNTERM, STOKEN);
277 break;
a083fbbf 278
1ff442ca
NF
279 case TYPE:
280 parse_type_decl();
281 break;
a083fbbf 282
1ff442ca
NF
283 case START:
284 parse_start_decl();
285 break;
a083fbbf 286
1ff442ca
NF
287 case UNION:
288 parse_union_decl();
289 break;
a083fbbf 290
1ff442ca
NF
291 case EXPECT:
292 parse_expect_decl();
293 break;
943819bf
RS
294 case THONG:
295 parse_thong_decl();
296 break;
1ff442ca
NF
297 case LEFT:
298 parse_assoc_decl(LEFT_ASSOC);
299 break;
300
301 case RIGHT:
302 parse_assoc_decl(RIGHT_ASSOC);
303 break;
304
305 case NONASSOC:
306 parse_assoc_decl(NON_ASSOC);
307 break;
308
309 case SEMANTIC_PARSER:
310 if (semantic_parser == 0)
311 {
312 semantic_parser = 1;
313 open_extra_files();
314 }
315 break;
316
317 case PURE_PARSER:
318 pure_parser = 1;
319 break;
320
943819bf
RS
321 case NOOP:
322 break;
323
1ff442ca 324 default:
a083fbbf 325 warns(_("unrecognized: %s"), token_buffer);
943819bf
RS
326 skip_to_char('%');
327 }
1ff442ca
NF
328 }
329 else if (c == EOF)
a083fbbf 330 fatal(_("no input grammar"));
1ff442ca 331 else
943819bf
RS
332 {
333 char buff[100];
a083fbbf 334 sprintf(buff, _("unknown character: %s"), printable_version(c));
943819bf
RS
335 warn(buff);
336 skip_to_char('%');
337 }
1ff442ca
NF
338 }
339}
340
341
342/* copy the contents of a %{ ... %} into the definitions file.
343The %{ has already been read. Return after reading the %}. */
344
345void
118fb205 346copy_definition (void)
1ff442ca
NF
347{
348 register int c;
349 register int match;
350 register int ended;
351 register int after_percent; /* -1 while reading a character if prev char was % */
352 int cplus_comment;
353
354 if (!nolinesflag)
355 fprintf(fattrs, "#line %d \"%s\"\n", lineno, infile);
356
357 after_percent = 0;
358
359 c = getc(finput);
360
361 for (;;)
362 {
363 switch (c)
364 {
365 case '\n':
366 putc(c, fattrs);
367 lineno++;
368 break;
369
370 case '%':
371 after_percent = -1;
372 break;
a083fbbf 373
1ff442ca
NF
374 case '\'':
375 case '"':
376 match = c;
377 putc(c, fattrs);
378 c = getc(finput);
379
380 while (c != match)
381 {
943819bf 382 if (c == EOF)
a083fbbf 383 fatal(_("unterminated string at end of file"));
943819bf
RS
384 if (c == '\n')
385 {
a083fbbf 386 warn(_("unterminated string"));
943819bf
RS
387 ungetc(c, finput);
388 c = match;
389 continue;
390 }
1ff442ca
NF
391
392 putc(c, fattrs);
a083fbbf 393
1ff442ca
NF
394 if (c == '\\')
395 {
396 c = getc(finput);
397 if (c == EOF)
a083fbbf 398 fatal(_("unterminated string at end of file"));
1ff442ca
NF
399 putc(c, fattrs);
400 if (c == '\n')
401 lineno++;
402 }
403
404 c = getc(finput);
405 }
406
407 putc(c, fattrs);
408 break;
409
410 case '/':
411 putc(c, fattrs);
412 c = getc(finput);
413 if (c != '*' && c != '/')
414 continue;
415
416 cplus_comment = (c == '/');
417 putc(c, fattrs);
418 c = getc(finput);
419
420 ended = 0;
421 while (!ended)
422 {
423 if (!cplus_comment && c == '*')
424 {
425 while (c == '*')
426 {
427 putc(c, fattrs);
428 c = getc(finput);
429 }
430
431 if (c == '/')
432 {
433 putc(c, fattrs);
434 ended = 1;
435 }
436 }
437 else if (c == '\n')
438 {
439 lineno++;
440 putc(c, fattrs);
441 if (cplus_comment)
442 ended = 1;
443 else
444 c = getc(finput);
445 }
446 else if (c == EOF)
a083fbbf 447 fatal(_("unterminated comment in `%{' definition"));
1ff442ca
NF
448 else
449 {
450 putc(c, fattrs);
451 c = getc(finput);
452 }
453 }
454
455 break;
456
457 case EOF:
a083fbbf 458 fatal(_("unterminated `%{' definition"));
1ff442ca
NF
459
460 default:
461 putc(c, fattrs);
462 }
463
464 c = getc(finput);
465
466 if (after_percent)
467 {
468 if (c == '}')
469 return;
470 putc('%', fattrs);
471 }
472 after_percent = 0;
473
474 }
475
476}
477
478
479
480/* parse what comes after %token or %nterm.
481For %token, what_is is STOKEN and what_is_not is SNTERM.
482For %nterm, the arguments are reversed. */
483
484void
118fb205 485parse_token_decl (int what_is, int what_is_not)
1ff442ca 486{
1ff442ca 487 register int token = 0;
1ff442ca 488 register char *typename = 0;
943819bf 489 register struct bucket *symbol = NULL; /* pts to symbol being defined */
1ff442ca
NF
490 int k;
491
1ff442ca
NF
492 for (;;)
493 {
e6011337
JT
494 int tmp_char = ungetc (skip_white_space (), finput);
495
496 if (tmp_char == '%')
1ff442ca 497 return;
e6011337
JT
498 if (tmp_char == EOF)
499 fatals ("Premature EOF after %s", token_buffer);
500
1ff442ca
NF
501 token = lex();
502 if (token == COMMA)
943819bf
RS
503 {
504 symbol = NULL;
505 continue;
506 }
1ff442ca
NF
507 if (token == TYPENAME)
508 {
509 k = strlen(token_buffer);
510 typename = NEW2(k + 1, char);
511 strcpy(typename, token_buffer);
512 value_components_used = 1;
943819bf
RS
513 symbol = NULL;
514 }
515 else if (token == IDENTIFIER && *symval->tag == '\"'
a083fbbf 516 && symbol)
943819bf
RS
517 {
518 translations = 1;
519 symval->class = STOKEN;
520 symval->type_name = typename;
521 symval->user_token_number = symbol->user_token_number;
522 symbol->user_token_number = SALIAS;
523
a083fbbf
RS
524 symval->alias = symbol;
525 symbol->alias = symval;
943819bf
RS
526 symbol = NULL;
527
528 nsyms--; /* symbol and symval combined are only one symbol */
1ff442ca
NF
529 }
530 else if (token == IDENTIFIER)
531 {
532 int oldclass = symval->class;
943819bf 533 symbol = symval;
1ff442ca 534
943819bf 535 if (symbol->class == what_is_not)
a083fbbf 536 warns(_("symbol %s redefined"), symbol->tag);
943819bf 537 symbol->class = what_is;
1ff442ca 538 if (what_is == SNTERM && oldclass != SNTERM)
943819bf 539 symbol->value = nvars++;
1ff442ca
NF
540
541 if (typename)
542 {
943819bf
RS
543 if (symbol->type_name == NULL)
544 symbol->type_name = typename;
545 else if (strcmp(typename, symbol->type_name) != 0)
a083fbbf 546 warns(_("type redeclaration for %s"), symbol->tag);
1ff442ca
NF
547 }
548 }
943819bf 549 else if (symbol && token == NUMBER)
1ff442ca 550 {
943819bf 551 symbol->user_token_number = numval;
1ff442ca
NF
552 translations = 1;
553 }
554 else
943819bf 555 {
a083fbbf
RS
556 warnss(_("`%s' is invalid in %s"),
557 token_buffer,
943819bf
RS
558 (what_is == STOKEN) ? "%token" : "%nterm");
559 skip_to_char('%');
560 }
1ff442ca
NF
561 }
562
563}
564
a083fbbf 565/* parse what comes after %thong
943819bf
RS
566 the full syntax is
567 %thong <type> token number literal
568 the <type> or number may be omitted. The number specifies the
569 user_token_number.
570
571 Two symbols are entered in the table, one for the token symbol and
572 one for the literal. Both are given the <type>, if any, from the declaration.
573 The ->user_token_number of the first is SALIAS and the ->user_token_number
574 of the second is set to the number, if any, from the declaration.
575 The two symbols are linked via pointers in their ->alias fields.
a083fbbf 576
943819bf
RS
577 during output_defines_table, the symbol is reported
578 thereafter, only the literal string is retained
579 it is the literal string that is output to yytname
580*/
581
582void
118fb205 583parse_thong_decl (void)
943819bf
RS
584{
585 register int token;
586 register struct bucket *symbol;
587 register char *typename = 0;
588 int k, usrtoknum;
589
590 translations = 1;
591 token = lex(); /* fetch typename or first token */
592 if (token == TYPENAME) {
593 k = strlen(token_buffer);
594 typename = NEW2(k + 1, char);
595 strcpy(typename, token_buffer);
596 value_components_used = 1;
597 token = lex(); /* fetch first token */
598 }
599
600 /* process first token */
601
a083fbbf 602 if (token != IDENTIFIER)
943819bf 603 {
a083fbbf 604 warns(_("unrecognized item %s, expected an identifier"),
943819bf
RS
605 token_buffer);
606 skip_to_char('%');
607 return;
608 }
609 symval->class = STOKEN;
610 symval->type_name = typename;
611 symval->user_token_number = SALIAS;
612 symbol = symval;
613
614 token = lex(); /* get number or literal string */
a083fbbf 615
943819bf
RS
616 if (token == NUMBER) {
617 usrtoknum = numval;
618 token = lex(); /* okay, did number, now get literal */
619 }
620 else usrtoknum = 0;
621
622 /* process literal string token */
623
a083fbbf 624 if (token != IDENTIFIER || *symval->tag != '\"')
943819bf 625 {
a083fbbf 626 warns(_("expected string constant instead of %s"),
943819bf
RS
627 token_buffer);
628 skip_to_char('%');
629 return;
630 }
631 symval->class = STOKEN;
632 symval->type_name = typename;
633 symval->user_token_number = usrtoknum;
634
a083fbbf
RS
635 symval->alias = symbol;
636 symbol->alias = symval;
943819bf
RS
637
638 nsyms--; /* symbol and symval combined are only one symbol */
639}
1ff442ca
NF
640
641
642/* parse what comes after %start */
643
644void
118fb205 645parse_start_decl (void)
1ff442ca
NF
646{
647 if (start_flag)
a083fbbf 648 warn(_("multiple %start declarations"));
1ff442ca 649 if (lex() != IDENTIFIER)
a083fbbf 650 warn(_("invalid %start declaration"));
943819bf
RS
651 else
652 {
653 start_flag = 1;
654 startval = symval;
655 }
1ff442ca
NF
656}
657
658
659
660/* read in a %type declaration and record its information for get_type_name to access */
661
662void
118fb205 663parse_type_decl (void)
1ff442ca
NF
664{
665 register int k;
666 register char *name;
1ff442ca
NF
667
668 if (lex() != TYPENAME)
943819bf 669 {
a083fbbf 670 warn(_("%type declaration has no <typename>"));
943819bf
RS
671 skip_to_char('%');
672 return;
673 }
1ff442ca
NF
674
675 k = strlen(token_buffer);
676 name = NEW2(k + 1, char);
677 strcpy(name, token_buffer);
678
1ff442ca
NF
679 for (;;)
680 {
681 register int t;
e6011337 682 int tmp_char = ungetc (skip_white_space (), finput);
1ff442ca 683
e6011337 684 if (tmp_char == '%')
1ff442ca 685 return;
e6011337
JT
686 if (tmp_char == EOF)
687 fatals ("Premature EOF after %s", token_buffer);
1ff442ca 688
1ff442ca
NF
689 t = lex();
690
691 switch (t)
692 {
693
694 case COMMA:
695 case SEMICOLON:
696 break;
697
698 case IDENTIFIER:
699 if (symval->type_name == NULL)
700 symval->type_name = name;
943819bf 701 else if (strcmp(name, symval->type_name) != 0)
a083fbbf 702 warns(_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
703
704 break;
705
706 default:
a083fbbf 707 warns(_("invalid %%type declaration due to item: `%s'"), token_buffer);
943819bf 708 skip_to_char('%');
1ff442ca
NF
709 }
710 }
711}
712
713
714
715/* read in a %left, %right or %nonassoc declaration and record its information. */
716/* assoc is either LEFT_ASSOC, RIGHT_ASSOC or NON_ASSOC. */
717
718void
118fb205 719parse_assoc_decl (int assoc)
1ff442ca
NF
720{
721 register int k;
722 register char *name = NULL;
943819bf 723 register int prev = 0;
1ff442ca
NF
724
725 lastprec++; /* Assign a new precedence level, never 0. */
726
1ff442ca
NF
727 for (;;)
728 {
729 register int t;
e6011337 730 int tmp_char = ungetc (skip_white_space (), finput);
1ff442ca 731
e6011337 732 if (tmp_char == '%')
1ff442ca 733 return;
e6011337
JT
734 if (tmp_char == EOF)
735 fatals ("Premature EOF after %s", token_buffer);
1ff442ca 736
1ff442ca
NF
737 t = lex();
738
739 switch (t)
740 {
741
742 case TYPENAME:
743 k = strlen(token_buffer);
744 name = NEW2(k + 1, char);
745 strcpy(name, token_buffer);
746 break;
747
748 case COMMA:
749 break;
750
751 case IDENTIFIER:
752 if (symval->prec != 0)
a083fbbf 753 warns(_("redefining precedence of %s"), symval->tag);
1ff442ca
NF
754 symval->prec = lastprec;
755 symval->assoc = assoc;
756 if (symval->class == SNTERM)
a083fbbf 757 warns(_("symbol %s redefined"), symval->tag);
1ff442ca
NF
758 symval->class = STOKEN;
759 if (name)
760 { /* record the type, if one is specified */
761 if (symval->type_name == NULL)
762 symval->type_name = name;
943819bf 763 else if (strcmp(name, symval->type_name) != 0)
a083fbbf 764 warns(_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
765 }
766 break;
767
768 case NUMBER:
769 if (prev == IDENTIFIER)
770 {
771 symval->user_token_number = numval;
772 translations = 1;
773 }
a083fbbf 774 else
943819bf 775 {
a083fbbf 776 warns(_("invalid text (%s) - number should be after identifier"),
943819bf
RS
777 token_buffer);
778 skip_to_char('%');
779 }
1ff442ca
NF
780 break;
781
782 case SEMICOLON:
783 return;
784
785 default:
a083fbbf 786 warns(_("unexpected item: %s"), token_buffer);
943819bf 787 skip_to_char('%');
1ff442ca
NF
788 }
789
790 prev = t;
791
792 }
793}
794
795
796
797/* copy the union declaration into fattrs (and fdefines),
798 where it is made into the
799 definition of YYSTYPE, the type of elements of the parser value stack. */
800
801void
118fb205 802parse_union_decl (void)
1ff442ca
NF
803{
804 register int c;
805 register int count;
806 register int in_comment;
807 int cplus_comment;
808
809 if (typed)
a083fbbf 810 warn(_("multiple %union declarations"));
1ff442ca
NF
811
812 typed = 1;
813
814 if (!nolinesflag)
815 fprintf(fattrs, "\n#line %d \"%s\"\n", lineno, infile);
816 else
817 fprintf(fattrs, "\n");
818
819 fprintf(fattrs, "typedef union");
820 if (fdefines)
821 fprintf(fdefines, "typedef union");
822
823 count = 0;
824 in_comment = 0;
825
826 c = getc(finput);
827
828 while (c != EOF)
829 {
830 putc(c, fattrs);
831 if (fdefines)
832 putc(c, fdefines);
833
834 switch (c)
835 {
836 case '\n':
837 lineno++;
838 break;
839
840 case '/':
841 c = getc(finput);
842 if (c != '*' && c != '/')
843 ungetc(c, finput);
844 else
845 {
846 putc(c, fattrs);
847 if (fdefines)
848 putc(c, fdefines);
849 cplus_comment = (c == '/');
850 in_comment = 1;
851 c = getc(finput);
852 while (in_comment)
853 {
854 putc(c, fattrs);
855 if (fdefines)
856 putc(c, fdefines);
857
858 if (c == '\n')
859 {
860 lineno++;
861 if (cplus_comment)
862 {
863 in_comment = 0;
864 break;
865 }
866 }
867 if (c == EOF)
a083fbbf 868 fatal(_("unterminated comment at end of file"));
1ff442ca
NF
869
870 if (!cplus_comment && c == '*')
871 {
872 c = getc(finput);
873 if (c == '/')
874 {
875 putc('/', fattrs);
876 if (fdefines)
877 putc('/', fdefines);
878 in_comment = 0;
879 }
880 }
881 else
882 c = getc(finput);
883 }
884 }
885 break;
886
887
888 case '{':
889 count++;
890 break;
891
892 case '}':
893 if (count == 0)
a083fbbf 894 warn (_("unmatched close-brace (`}')"));
1ff442ca 895 count--;
943819bf 896 if (count <= 0)
1ff442ca
NF
897 {
898 fprintf(fattrs, " YYSTYPE;\n");
899 if (fdefines)
900 fprintf(fdefines, " YYSTYPE;\n");
901 /* JF don't choke on trailing semi */
902 c=skip_white_space();
903 if(c!=';') ungetc(c,finput);
904 return;
905 }
906 }
907
908 c = getc(finput);
909 }
910}
911
912/* parse the declaration %expect N which says to expect N
913 shift-reduce conflicts. */
914
915void
118fb205 916parse_expect_decl (void)
1ff442ca
NF
917{
918 register int c;
919 register int count;
920 char buffer[20];
921
922 c = getc(finput);
923 while (c == ' ' || c == '\t')
924 c = getc(finput);
925
926 count = 0;
927 while (c >= '0' && c <= '9')
928 {
929 if (count < 20)
930 buffer[count++] = c;
931 c = getc(finput);
932 }
933 buffer[count] = 0;
934
935 ungetc (c, finput);
936
943819bf 937 if (count <= 0 || count > 10)
a083fbbf 938 warn(_("argument of %expect is not an integer"));
1ff442ca
NF
939 expected_conflicts = atoi (buffer);
940}
941
942/* that's all of parsing the declaration section */
943\f
944/* Get the data type (alternative in the union) of the value for symbol n in rule rule. */
945
946char *
118fb205 947get_type_name (int n, symbol_list *rule)
1ff442ca 948{
a083fbbf 949 static char *msg = N_("invalid $ value");
1ff442ca
NF
950
951 register int i;
952 register symbol_list *rp;
953
954 if (n < 0)
943819bf 955 {
a083fbbf 956 warn(_(msg));
943819bf
RS
957 return NULL;
958 }
1ff442ca
NF
959
960 rp = rule;
961 i = 0;
962
963 while (i < n)
964 {
965 rp = rp->next;
966 if (rp == NULL || rp->sym == NULL)
943819bf 967 {
a083fbbf 968 warn(_(msg));
943819bf
RS
969 return NULL;
970 }
1ff442ca
NF
971 i++;
972 }
973
974 return (rp->sym->type_name);
975}
976
977
1ff442ca
NF
978/* after %guard is seen in the input file,
979copy the actual guard into the guards file.
980If the guard is followed by an action, copy that into the actions file.
981stack_offset is the number of values in the current rule so far,
982which says where to find $0 with respect to the top of the stack,
983for the simple parser in which the stack is not popped until after the guard is run. */
984
985void
118fb205 986copy_guard (symbol_list *rule, int stack_offset)
1ff442ca
NF
987{
988 register int c;
989 register int n;
990 register int count;
991 register int match;
992 register int ended;
993 register char *type_name;
994 int brace_flag = 0;
995 int cplus_comment;
996
997 /* offset is always 0 if parser has already popped the stack pointer */
998 if (semantic_parser) stack_offset = 0;
999
1000 fprintf(fguard, "\ncase %d:\n", nrules);
1001 if (!nolinesflag)
1002 fprintf(fguard, "#line %d \"%s\"\n", lineno, infile);
1003 putc('{', fguard);
1004
1005 count = 0;
1006 c = getc(finput);
1007
1008 while (brace_flag ? (count > 0) : (c != ';'))
1009 {
1010 switch (c)
1011 {
1012 case '\n':
1013 putc(c, fguard);
1014 lineno++;
1015 break;
1016
1017 case '{':
1018 putc(c, fguard);
1019 brace_flag = 1;
1020 count++;
1021 break;
1022
1023 case '}':
1024 putc(c, fguard);
1025 if (count > 0)
1026 count--;
a083fbbf 1027 else
943819bf 1028 {
a083fbbf 1029 warn(_("unmatched right brace (`}')"));
943819bf
RS
1030 c = getc(finput); /* skip it */
1031 }
1ff442ca
NF
1032 break;
1033
1034 case '\'':
1035 case '"':
1036 match = c;
1037 putc(c, fguard);
1038 c = getc(finput);
1039
1040 while (c != match)
1041 {
943819bf 1042 if (c == EOF)
a083fbbf
RS
1043 fatal(_("unterminated string at end of file"));
1044 if (c == '\n')
943819bf 1045 {
a083fbbf 1046 warn(_("unterminated string"));
943819bf
RS
1047 ungetc(c, finput);
1048 c = match; /* invent terminator */
1049 continue;
1050 }
1ff442ca
NF
1051
1052 putc(c, fguard);
a083fbbf 1053
1ff442ca
NF
1054 if (c == '\\')
1055 {
1056 c = getc(finput);
1057 if (c == EOF)
a083fbbf 1058 fatal(_("unterminated string"));
1ff442ca
NF
1059 putc(c, fguard);
1060 if (c == '\n')
1061 lineno++;
1062 }
1063
1064 c = getc(finput);
1065 }
1066
1067 putc(c, fguard);
1068 break;
1069
1070 case '/':
1071 putc(c, fguard);
1072 c = getc(finput);
1073 if (c != '*' && c != '/')
1074 continue;
1075
1076 cplus_comment = (c == '/');
1077 putc(c, fguard);
1078 c = getc(finput);
1079
1080 ended = 0;
1081 while (!ended)
1082 {
1083 if (!cplus_comment && c == '*')
1084 {
1085 while (c == '*')
1086 {
1087 putc(c, fguard);
1088 c = getc(finput);
1089 }
1090
1091 if (c == '/')
1092 {
1093 putc(c, fguard);
1094 ended = 1;
1095 }
1096 }
1097 else if (c == '\n')
1098 {
1099 lineno++;
1100 putc(c, fguard);
1101 if (cplus_comment)
1102 ended = 1;
1103 else
1104 c = getc(finput);
1105 }
1106 else if (c == EOF)
a083fbbf 1107 fatal(_("unterminated comment"));
1ff442ca
NF
1108 else
1109 {
1110 putc(c, fguard);
1111 c = getc(finput);
1112 }
1113 }
1114
1115 break;
1116
1117 case '$':
1118 c = getc(finput);
1119 type_name = NULL;
1120
1121 if (c == '<')
1122 {
1123 register char *cp = token_buffer;
1124
1125 while ((c = getc(finput)) != '>' && c > 0)
118fb205
JT
1126 {
1127 if (cp == token_buffer + maxtoken)
1128 cp = grow_token_buffer(cp);
1129
1130 *cp++ = c;
1131 }
1ff442ca
NF
1132 *cp = 0;
1133 type_name = token_buffer;
1134
1135 c = getc(finput);
1136 }
1137
1138 if (c == '$')
1139 {
1140 fprintf(fguard, "yyval");
1141 if (!type_name) type_name = rule->sym->type_name;
1142 if (type_name)
1143 fprintf(fguard, ".%s", type_name);
943819bf 1144 if(!type_name && typed)
a083fbbf 1145 warns(_("$$ of `%s' has no declared type"), rule->sym->tag);
1ff442ca
NF
1146 }
1147
1148 else if (isdigit(c) || c == '-')
1149 {
1150 ungetc (c, finput);
1151 n = read_signed_integer(finput);
1152 c = getc(finput);
1153
1154 if (!type_name && n > 0)
1155 type_name = get_type_name(n, rule);
1156
1157 fprintf(fguard, "yyvsp[%d]", n - stack_offset);
1158 if (type_name)
1159 fprintf(fguard, ".%s", type_name);
943819bf 1160 if(!type_name && typed)
a083fbbf 1161 warnss(_("$%s of `%s' has no declared type"), int_to_string(n), rule->sym->tag);
1ff442ca
NF
1162 continue;
1163 }
1164 else
aba5ca6d 1165 warns(_("$%s is invalid"), printable_version(c));
1ff442ca
NF
1166
1167 break;
1168
1169 case '@':
1170 c = getc(finput);
1171 if (isdigit(c) || c == '-')
1172 {
1173 ungetc (c, finput);
1174 n = read_signed_integer(finput);
1175 c = getc(finput);
1176 }
1177 else
943819bf 1178 {
aba5ca6d 1179 warns(_("@%s is invalid"), printable_version(c));
943819bf
RS
1180 n = 1;
1181 }
1ff442ca
NF
1182
1183 fprintf(fguard, "yylsp[%d]", n - stack_offset);
1184 yylsp_needed = 1;
1185
1186 continue;
1187
1188 case EOF:
a083fbbf 1189 fatal(_("unterminated %%guard clause"));
1ff442ca
NF
1190
1191 default:
1192 putc(c, fguard);
1193 }
1194
1195 if (c != '}' || count != 0)
1196 c = getc(finput);
1197 }
1198
1199 c = skip_white_space();
1200
1201 fprintf(fguard, ";\n break;}");
1202 if (c == '{')
1203 copy_action(rule, stack_offset);
1204 else if (c == '=')
1205 {
943819bf 1206 c = getc(finput); /* why not skip_white_space -wjh */
1ff442ca
NF
1207 if (c == '{')
1208 copy_action(rule, stack_offset);
1209 }
1210 else
1211 ungetc(c, finput);
1212}
1213
1214
1215
1216/* Assuming that a { has just been seen, copy everything up to the matching }
1217into the actions file.
1218stack_offset is the number of values in the current rule so far,
1219which says where to find $0 with respect to the top of the stack. */
1220
1221void
118fb205 1222copy_action (symbol_list *rule, int stack_offset)
1ff442ca
NF
1223{
1224 register int c;
1225 register int n;
1226 register int count;
1227 register int match;
1228 register int ended;
1229 register char *type_name;
1230 int cplus_comment;
1231
1232 /* offset is always 0 if parser has already popped the stack pointer */
1233 if (semantic_parser) stack_offset = 0;
1234
1235 fprintf(faction, "\ncase %d:\n", nrules);
1236 if (!nolinesflag)
1237 fprintf(faction, "#line %d \"%s\"\n", lineno, infile);
1238 putc('{', faction);
1239
1240 count = 1;
1241 c = getc(finput);
1242
1243 while (count > 0)
1244 {
1245 while (c != '}')
1246 {
1247 switch (c)
1248 {
1249 case '\n':
1250 putc(c, faction);
1251 lineno++;
1252 break;
1253
1254 case '{':
1255 putc(c, faction);
1256 count++;
1257 break;
1258
1259 case '\'':
1260 case '"':
1261 match = c;
1262 putc(c, faction);
1263 c = getc(finput);
1264
1265 while (c != match)
1266 {
943819bf
RS
1267 if (c == '\n')
1268 {
a083fbbf 1269 warn(_("unterminated string"));
943819bf
RS
1270 ungetc(c, finput);
1271 c = match;
1272 continue;
1273 }
1274 else if (c == EOF)
a083fbbf 1275 fatal(_("unterminated string at end of file"));
1ff442ca
NF
1276
1277 putc(c, faction);
1278
1279 if (c == '\\')
1280 {
1281 c = getc(finput);
1282 if (c == EOF)
a083fbbf 1283 fatal(_("unterminated string"));
1ff442ca
NF
1284 putc(c, faction);
1285 if (c == '\n')
1286 lineno++;
1287 }
1288
1289 c = getc(finput);
1290 }
1291
1292 putc(c, faction);
1293 break;
1294
1295 case '/':
1296 putc(c, faction);
1297 c = getc(finput);
1298 if (c != '*' && c != '/')
1299 continue;
1300
1301 cplus_comment = (c == '/');
1302 putc(c, faction);
1303 c = getc(finput);
1304
1305 ended = 0;
1306 while (!ended)
1307 {
1308 if (!cplus_comment && c == '*')
1309 {
1310 while (c == '*')
1311 {
1312 putc(c, faction);
1313 c = getc(finput);
1314 }
1315
1316 if (c == '/')
1317 {
1318 putc(c, faction);
1319 ended = 1;
1320 }
1321 }
1322 else if (c == '\n')
1323 {
1324 lineno++;
1325 putc(c, faction);
1326 if (cplus_comment)
1327 ended = 1;
1328 else
1329 c = getc(finput);
1330 }
1331 else if (c == EOF)
a083fbbf 1332 fatal(_("unterminated comment"));
1ff442ca
NF
1333 else
1334 {
1335 putc(c, faction);
1336 c = getc(finput);
1337 }
1338 }
1339
1340 break;
1341
1342 case '$':
1343 c = getc(finput);
1344 type_name = NULL;
1345
1346 if (c == '<')
1347 {
1348 register char *cp = token_buffer;
1349
1350 while ((c = getc(finput)) != '>' && c > 0)
118fb205
JT
1351 {
1352 if (cp == token_buffer + maxtoken)
1353 cp = grow_token_buffer(cp);
1354
1355 *cp++ = c;
1356 }
1ff442ca
NF
1357 *cp = 0;
1358 type_name = token_buffer;
1359 value_components_used = 1;
1360
1361 c = getc(finput);
1362 }
1363 if (c == '$')
1364 {
1365 fprintf(faction, "yyval");
1366 if (!type_name) type_name = get_type_name(0, rule);
1367 if (type_name)
1368 fprintf(faction, ".%s", type_name);
a083fbbf
RS
1369 if(!type_name && typed)
1370 warns(_("$$ of `%s' has no declared type"), rule->sym->tag);
1ff442ca
NF
1371 }
1372 else if (isdigit(c) || c == '-')
1373 {
1374 ungetc (c, finput);
1375 n = read_signed_integer(finput);
1376 c = getc(finput);
1377
1378 if (!type_name && n > 0)
1379 type_name = get_type_name(n, rule);
1380
1381 fprintf(faction, "yyvsp[%d]", n - stack_offset);
1382 if (type_name)
1383 fprintf(faction, ".%s", type_name);
a083fbbf
RS
1384 if(!type_name && typed)
1385 warnss(_("$%s of `%s' has no declared type"),
943819bf 1386 int_to_string(n), rule->sym->tag);
1ff442ca
NF
1387 continue;
1388 }
1389 else
aba5ca6d 1390 warns(_("$%s is invalid"), printable_version(c));
1ff442ca
NF
1391
1392 break;
1393
1394 case '@':
1395 c = getc(finput);
1396 if (isdigit(c) || c == '-')
1397 {
1398 ungetc (c, finput);
1399 n = read_signed_integer(finput);
1400 c = getc(finput);
1401 }
1402 else
943819bf 1403 {
a083fbbf 1404 warn(_("invalid @-construct"));
943819bf
RS
1405 n = 1;
1406 }
1ff442ca
NF
1407
1408 fprintf(faction, "yylsp[%d]", n - stack_offset);
1409 yylsp_needed = 1;
1410
1411 continue;
1412
1413 case EOF:
a083fbbf 1414 fatal(_("unmatched `{'"));
1ff442ca
NF
1415
1416 default:
1417 putc(c, faction);
1418 }
1419
1420 c = getc(finput);
1421 }
1422
1423 /* above loop exits when c is '}' */
1424
1425 if (--count)
1426 {
1427 putc(c, faction);
1428 c = getc(finput);
1429 }
1430 }
1431
1432 fprintf(faction, ";\n break;}");
1433}
1434
1435
1436
1437/* generate a dummy symbol, a nonterminal,
1438whose name cannot conflict with the user's names. */
1439
1440bucket *
118fb205 1441gensym (void)
1ff442ca
NF
1442{
1443 register bucket *sym;
1444
1445 sprintf (token_buffer, "@%d", ++gensym_count);
1446 sym = getsym(token_buffer);
1447 sym->class = SNTERM;
1448 sym->value = nvars++;
1449 return (sym);
1450}
1451
1452/* Parse the input grammar into a one symbol_list structure.
1453Each rule is represented by a sequence of symbols: the left hand side
1454followed by the contents of the right hand side, followed by a null pointer
1455instead of a symbol to terminate the rule.
1456The next symbol is the lhs of the following rule.
1457
1458All guards and actions are copied out to the appropriate files,
1459labelled by the rule number they apply to. */
1460
1461void
118fb205 1462readgram (void)
1ff442ca
NF
1463{
1464 register int t;
2686a6e7 1465 register bucket *lhs = NULL;
1ff442ca
NF
1466 register symbol_list *p;
1467 register symbol_list *p1;
1468 register bucket *bp;
1469
1470 symbol_list *crule; /* points to first symbol_list of current rule. */
1471 /* its symbol is the lhs of the rule. */
1472 symbol_list *crule1; /* points to the symbol_list preceding crule. */
1473
1474 p1 = NULL;
1475
1476 t = lex();
1477
1478 while (t != TWO_PERCENTS && t != ENDFILE)
1479 {
1480 if (t == IDENTIFIER || t == BAR)
1481 {
1482 register int actionflag = 0;
1483 int rulelength = 0; /* number of symbols in rhs of this rule so far */
1484 int xactions = 0; /* JF for error checking */
1485 bucket *first_rhs = 0;
1486
1487 if (t == IDENTIFIER)
1488 {
1489 lhs = symval;
943819bf
RS
1490
1491 if (!start_flag)
1492 {
1493 startval = lhs;
1494 start_flag = 1;
1495 }
a083fbbf 1496
1ff442ca
NF
1497 t = lex();
1498 if (t != COLON)
943819bf 1499 {
a083fbbf 1500 warn(_("ill-formed rule: initial symbol not followed by colon"));
943819bf
RS
1501 unlex(t);
1502 }
1ff442ca
NF
1503 }
1504
943819bf 1505 if (nrules == 0 && t == BAR)
1ff442ca 1506 {
a083fbbf 1507 warn(_("grammar starts with vertical bar"));
943819bf 1508 lhs = symval; /* BOGUS: use a random symval */
1ff442ca 1509 }
1ff442ca
NF
1510 /* start a new rule and record its lhs. */
1511
1512 nrules++;
1513 nitems++;
1514
1515 record_rule_line ();
1516
1517 p = NEW(symbol_list);
1518 p->sym = lhs;
1519
1520 crule1 = p1;
1521 if (p1)
1522 p1->next = p;
1523 else
1524 grammar = p;
1525
1526 p1 = p;
1527 crule = p;
1528
1529 /* mark the rule's lhs as a nonterminal if not already so. */
1530
1531 if (lhs->class == SUNKNOWN)
1532 {
1533 lhs->class = SNTERM;
1534 lhs->value = nvars;
1535 nvars++;
1536 }
1537 else if (lhs->class == STOKEN)
a083fbbf 1538 warns(_("rule given for %s, which is a token"), lhs->tag);
1ff442ca
NF
1539
1540 /* read the rhs of the rule. */
1541
1542 for (;;)
1543 {
1544 t = lex();
943819bf
RS
1545 if (t == PREC)
1546 {
1547 t = lex();
1548 crule->ruleprec = symval;
1549 t = lex();
1550 }
1ff442ca
NF
1551
1552 if (! (t == IDENTIFIER || t == LEFT_CURLY)) break;
1553
1554 /* If next token is an identifier, see if a colon follows it.
1555 If one does, exit this rule now. */
1556 if (t == IDENTIFIER)
1557 {
1558 register bucket *ssave;
1559 register int t1;
1560
1561 ssave = symval;
1562 t1 = lex();
1563 unlex(t1);
1564 symval = ssave;
1565 if (t1 == COLON) break;
1566
1567 if(!first_rhs) /* JF */
1568 first_rhs = symval;
1569 /* Not followed by colon =>
1570 process as part of this rule's rhs. */
1571 }
1572
1573 /* If we just passed an action, that action was in the middle
1574 of a rule, so make a dummy rule to reduce it to a
1575 non-terminal. */
1576 if (actionflag)
1577 {
1578 register bucket *sdummy;
1579
1580 /* Since the action was written out with this rule's */
943819bf 1581 /* number, we must give the new rule this number */
1ff442ca
NF
1582 /* by inserting the new rule before it. */
1583
1584 /* Make a dummy nonterminal, a gensym. */
1585 sdummy = gensym();
1586
1587 /* Make a new rule, whose body is empty,
1588 before the current one, so that the action
1589 just read can belong to it. */
1590 nrules++;
1591 nitems++;
1592 record_rule_line ();
1593 p = NEW(symbol_list);
1594 if (crule1)
1595 crule1->next = p;
1596 else grammar = p;
1597 p->sym = sdummy;
1598 crule1 = NEW(symbol_list);
1599 p->next = crule1;
1600 crule1->next = crule;
1601
1602 /* insert the dummy generated by that rule into this rule. */
1603 nitems++;
1604 p = NEW(symbol_list);
1605 p->sym = sdummy;
1606 p1->next = p;
1607 p1 = p;
1608
1609 actionflag = 0;
1610 }
1611
1612 if (t == IDENTIFIER)
1613 {
1614 nitems++;
1615 p = NEW(symbol_list);
1616 p->sym = symval;
1617 p1->next = p;
1618 p1 = p;
1619 }
1620 else /* handle an action. */
1621 {
1622 copy_action(crule, rulelength);
1623 actionflag = 1;
1624 xactions++; /* JF */
1625 }
1626 rulelength++;
943819bf 1627 } /* end of read rhs of rule */
1ff442ca
NF
1628
1629 /* Put an empty link in the list to mark the end of this rule */
1630 p = NEW(symbol_list);
1631 p1->next = p;
1632 p1 = p;
1633
1634 if (t == PREC)
1635 {
a083fbbf 1636 warn(_("two @prec's in a row"));
1ff442ca
NF
1637 t = lex();
1638 crule->ruleprec = symval;
1639 t = lex();
1640 }
1641 if (t == GUARD)
1642 {
1643 if (! semantic_parser)
a083fbbf 1644 warn(_("%%guard present but %%semantic_parser not specified"));
1ff442ca
NF
1645
1646 copy_guard(crule, rulelength);
1647 t = lex();
1648 }
1649 else if (t == LEFT_CURLY)
1650 {
943819bf 1651 /* This case never occurs -wjh */
a083fbbf 1652 if (actionflag) warn(_("two actions at end of one rule"));
1ff442ca 1653 copy_action(crule, rulelength);
943819bf
RS
1654 actionflag = 1;
1655 xactions++; /* -wjh */
1ff442ca
NF
1656 t = lex();
1657 }
1658 /* If $$ is being set in default way,
1659 warn if any type mismatch. */
1660 else if (!xactions && first_rhs && lhs->type_name != first_rhs->type_name)
1661 {
1662 if (lhs->type_name == 0 || first_rhs->type_name == 0
1663 || strcmp(lhs->type_name,first_rhs->type_name))
a083fbbf 1664 warnss(_("type clash (`%s' `%s') on default action"),
1ff442ca
NF
1665 lhs->type_name ? lhs->type_name : "",
1666 first_rhs->type_name ? first_rhs->type_name : "");
1667 }
1668 /* Warn if there is no default for $$ but we need one. */
1669 else if (!xactions && !first_rhs && lhs->type_name != 0)
a083fbbf 1670 warn(_("empty rule for typed nonterminal, and no action"));
1ff442ca
NF
1671 if (t == SEMICOLON)
1672 t = lex();
a083fbbf 1673 }
943819bf
RS
1674#if 0
1675 /* these things can appear as alternatives to rules. */
1676/* NO, they cannot.
1677 a) none of the documentation allows them
1678 b) most of them scan forward until finding a next %
1679 thus they may swallow lots of intervening rules
1680*/
1ff442ca
NF
1681 else if (t == TOKEN)
1682 {
1683 parse_token_decl(STOKEN, SNTERM);
1684 t = lex();
1685 }
1686 else if (t == NTERM)
1687 {
1688 parse_token_decl(SNTERM, STOKEN);
1689 t = lex();
1690 }
1691 else if (t == TYPE)
1692 {
1693 t = get_type();
1694 }
1695 else if (t == UNION)
1696 {
1697 parse_union_decl();
1698 t = lex();
1699 }
1700 else if (t == EXPECT)
1701 {
1702 parse_expect_decl();
1703 t = lex();
1704 }
1705 else if (t == START)
1706 {
1707 parse_start_decl();
1708 t = lex();
1709 }
943819bf
RS
1710#endif
1711
1ff442ca 1712 else
943819bf 1713 {
a083fbbf 1714 warns(_("invalid input: %s"), token_buffer);
943819bf
RS
1715 t = lex();
1716 }
1ff442ca
NF
1717 }
1718
943819bf
RS
1719 /* grammar has been read. Do some checking */
1720
1ff442ca 1721 if (nsyms > MAXSHORT)
a083fbbf 1722 fatals(_("too many symbols (tokens plus nonterminals); maximum %s"),
943819bf 1723 int_to_string(MAXSHORT));
1ff442ca 1724 if (nrules == 0)
a083fbbf 1725 fatal(_("no rules in the input grammar"));
1ff442ca
NF
1726
1727 if (typed == 0 /* JF put out same default YYSTYPE as YACC does */
1728 && !value_components_used)
1729 {
1730 /* We used to use `unsigned long' as YYSTYPE on MSDOS,
1731 but it seems better to be consistent.
1732 Most programs should declare their own type anyway. */
1733 fprintf(fattrs, "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n");
1734 if (fdefines)
1735 fprintf(fdefines, "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n");
1736 }
1737
1738 /* Report any undefined symbols and consider them nonterminals. */
1739
1740 for (bp = firstsymbol; bp; bp = bp->next)
1741 if (bp->class == SUNKNOWN)
1742 {
a083fbbf 1743 warns(_("symbol %s is used, but is not defined as a token and has no rules"),
1ff442ca 1744 bp->tag);
1ff442ca
NF
1745 bp->class = SNTERM;
1746 bp->value = nvars++;
1747 }
1748
1749 ntokens = nsyms - nvars;
1750}
1751
1752
1753void
118fb205 1754record_rule_line (void)
1ff442ca
NF
1755{
1756 /* Record each rule's source line number in rline table. */
1757
1758 if (nrules >= rline_allocated)
1759 {
1760 rline_allocated = nrules * 2;
118fb205
JT
1761 rline = (short *) xrealloc ((char *) rline,
1762 rline_allocated * sizeof (short));
1ff442ca
NF
1763 }
1764 rline[nrules] = lineno;
1765}
1766
1767
2686a6e7 1768#if 0
1ff442ca 1769/* read in a %type declaration and record its information for get_type_name to access */
943819bf
RS
1770/* this is unused. it is only called from the #if 0 part of readgram */
1771static int
118fb205 1772get_type (void)
1ff442ca
NF
1773{
1774 register int k;
1775 register int t;
1776 register char *name;
1777
1778 t = lex();
1779
a083fbbf 1780 if (t != TYPENAME)
943819bf 1781 {
a083fbbf 1782 warn(_("ill-formed %type declaration"));
943819bf
RS
1783 return t;
1784 }
1ff442ca
NF
1785
1786 k = strlen(token_buffer);
1787 name = NEW2(k + 1, char);
1788 strcpy(name, token_buffer);
1789
1790 for (;;)
1791 {
1792 t = lex();
1793
1794 switch (t)
1795 {
1796 case SEMICOLON:
1797 return (lex());
1798
1799 case COMMA:
1800 break;
1801
1802 case IDENTIFIER:
1803 if (symval->type_name == NULL)
1804 symval->type_name = name;
943819bf 1805 else if (strcmp(name, symval->type_name) != 0)
a083fbbf 1806 warns(_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
1807
1808 break;
1809
1810 default:
1811 return (t);
1812 }
1813 }
1814}
2686a6e7 1815#endif
1ff442ca
NF
1816
1817
1818/* assign symbol numbers, and write definition of token names into fdefines.
1819Set up vectors tags and sprec of names and precedences of symbols. */
1820
1821void
118fb205 1822packsymbols (void)
1ff442ca
NF
1823{
1824 register bucket *bp;
1825 register int tokno = 1;
1826 register int i;
1827 register int last_user_token_number;
1828
1829 /* int lossage = 0; JF set but not used */
1830
1831 tags = NEW2(nsyms + 1, char *);
1832 tags[0] = "$";
943819bf
RS
1833 user_toknums = NEW2(nsyms + 1, int);
1834 user_toknums[0] = 0;
1ff442ca
NF
1835
1836 sprec = NEW2(nsyms, short);
1837 sassoc = NEW2(nsyms, short);
1838
1839 max_user_token_number = 256;
1840 last_user_token_number = 256;
1841
1842 for (bp = firstsymbol; bp; bp = bp->next)
1843 {
1844 if (bp->class == SNTERM)
1845 {
1846 bp->value += ntokens;
1847 }
943819bf
RS
1848 else if (bp->alias)
1849 {
1850 /* this symbol and its alias are a single token defn.
1851 allocate a tokno, and assign to both
a083fbbf 1852 check agreement of ->prec and ->assoc fields
943819bf
RS
1853 and make both the same
1854 */
1855 if (bp->value == 0)
1856 bp->value = bp->alias->value = tokno++;
1857
1858 if (bp->prec != bp->alias->prec) {
1859 if (bp->prec != 0 && bp->alias->prec != 0
1860 && bp->user_token_number == SALIAS)
a083fbbf 1861 warnss(_("conflicting precedences for %s and %s"),
943819bf
RS
1862 bp->tag, bp->alias->tag);
1863 if (bp->prec != 0) bp->alias->prec = bp->prec;
1864 else bp->prec = bp->alias->prec;
1865 }
1866
1867 if (bp->assoc != bp->alias->assoc) {
1868 if (bp->assoc != 0 && bp->alias->assoc != 0
1869 && bp->user_token_number == SALIAS)
a083fbbf 1870 warnss(_("conflicting assoc values for %s and %s"),
943819bf
RS
1871 bp->tag, bp->alias->tag);
1872 if (bp->assoc != 0) bp->alias->assoc = bp->assoc;
1873 else bp->assoc = bp->alias->assoc;
1874 }
1875
1876 if (bp->user_token_number == SALIAS)
1877 continue; /* do not do processing below for SALIASs */
1878
1879 }
1880 else /* bp->class == STOKEN */
1881 {
1882 bp->value = tokno++;
1883 }
1884
1885 if (bp->class == STOKEN)
1ff442ca
NF
1886 {
1887 if (translations && !(bp->user_token_number))
1888 bp->user_token_number = ++last_user_token_number;
1889 if (bp->user_token_number > max_user_token_number)
1890 max_user_token_number = bp->user_token_number;
1ff442ca
NF
1891 }
1892
1893 tags[bp->value] = bp->tag;
943819bf 1894 user_toknums[bp->value] = bp->user_token_number;
1ff442ca
NF
1895 sprec[bp->value] = bp->prec;
1896 sassoc[bp->value] = bp->assoc;
1897
1898 }
1899
1900 if (translations)
1901 {
1902 register int i;
1903
1904 token_translations = NEW2(max_user_token_number+1, short);
1905
1906 /* initialize all entries for literal tokens to 2,
572909b5
RS
1907 the internal token number for $undefined.,
1908 which represents all invalid inputs. */
1ff442ca 1909 for (i = 0; i <= max_user_token_number; i++)
a083fbbf 1910 token_translations[i] = 2;
1ff442ca 1911
943819bf
RS
1912 for (bp = firstsymbol; bp; bp = bp->next)
1913 {
1914 if (bp->value >= ntokens) continue; /* non-terminal */
a083fbbf 1915 if (bp->user_token_number == SALIAS) continue;
943819bf 1916 if (token_translations[bp->user_token_number] != 2)
a083fbbf 1917 warnsss(_("tokens %s and %s both assigned number %s"),
1ff442ca
NF
1918 tags[token_translations[bp->user_token_number]],
1919 bp->tag,
943819bf
RS
1920 int_to_string(bp->user_token_number));
1921 token_translations[bp->user_token_number] = bp->value;
1922 }
1ff442ca
NF
1923 }
1924
1925 error_token_number = errtoken->value;
1926
943819bf
RS
1927 if (! noparserflag)
1928 output_token_defines(ftable);
1ff442ca
NF
1929
1930 if (startval->class == SUNKNOWN)
a083fbbf 1931 fatals(_("the start symbol %s is undefined"), startval->tag);
1ff442ca 1932 else if (startval->class == STOKEN)
a083fbbf 1933 fatals(_("the start symbol %s is a token"), startval->tag);
1ff442ca
NF
1934
1935 start_symbol = startval->value;
1936
1937 if (definesflag)
1938 {
1939 output_token_defines(fdefines);
1940
1941 if (!pure_parser)
1942 {
1943 if (spec_name_prefix)
1944 fprintf(fdefines, "\nextern YYSTYPE %slval;\n", spec_name_prefix);
1945 else
1946 fprintf(fdefines, "\nextern YYSTYPE yylval;\n");
1947 }
1948
1949 if (semantic_parser)
1950 for (i = ntokens; i < nsyms; i++)
1951 {
1952 /* don't make these for dummy nonterminals made by gensym. */
1953 if (*tags[i] != '@')
1954 fprintf(fdefines, "#define\tNT%s\t%d\n", tags[i], i);
1955 }
1956#if 0
1957 /* `fdefines' is now a temporary file, so we need to copy its
1958 contents in `done', so we can't close it here. */
1959 fclose(fdefines);
1960 fdefines = NULL;
1961#endif
1962 }
1963}
a083fbbf
RS
1964
1965/* For named tokens, but not literal ones, define the name.
1966 The value is the user token number.
943819bf 1967*/
1ff442ca 1968void
118fb205 1969output_token_defines (FILE *file)
1ff442ca
NF
1970{
1971 bucket *bp;
943819bf
RS
1972 register char *cp, *symbol;
1973 register char c;
1ff442ca
NF
1974
1975 for (bp = firstsymbol; bp; bp = bp->next)
1976 {
943819bf 1977 symbol = bp->tag; /* get symbol */
1ff442ca 1978
943819bf
RS
1979 if (bp->value >= ntokens) continue;
1980 if (bp->user_token_number == SALIAS) continue;
1981 if ('\'' == *symbol) continue; /* skip literal character */
1982 if (bp == errtoken) continue; /* skip error token */
a083fbbf 1983 if ('\"' == *symbol)
1ff442ca 1984 {
943819bf
RS
1985 /* use literal string only if given a symbol with an alias */
1986 if (bp->alias)
1987 symbol = bp->alias->tag;
1988 else
1989 continue;
1990 }
1ff442ca 1991
943819bf
RS
1992 /* Don't #define nonliteral tokens whose names contain periods. */
1993 cp = symbol;
1994 while ((c = *cp++) && c != '.');
1995 if (c != '\0') continue;
1ff442ca 1996
943819bf 1997 fprintf(file, "#define\t%s\t%d\n", symbol,
a083fbbf
RS
1998 ((translations && ! rawtoknumflag)
1999 ? bp->user_token_number
943819bf
RS
2000 : bp->value));
2001 if (semantic_parser)
2002 fprintf(file, "#define\tT%s\t%d\n", symbol, bp->value);
1ff442ca
NF
2003 }
2004
2005 putc('\n', file);
2006}
2007
2008
2009
2010/* convert the rules into the representation using rrhs, rlhs and ritems. */
2011
2012void
118fb205 2013packgram (void)
1ff442ca
NF
2014{
2015 register int itemno;
2016 register int ruleno;
2017 register symbol_list *p;
2018/* register bucket *bp; JF unused */
2019
2020 bucket *ruleprec;
2021
2022 ritem = NEW2(nitems + 1, short);
2023 rlhs = NEW2(nrules, short) - 1;
2024 rrhs = NEW2(nrules, short) - 1;
2025 rprec = NEW2(nrules, short) - 1;
2026 rprecsym = NEW2(nrules, short) - 1;
2027 rassoc = NEW2(nrules, short) - 1;
2028
2029 itemno = 0;
2030 ruleno = 1;
2031
2032 p = grammar;
2033 while (p)
2034 {
2035 rlhs[ruleno] = p->sym->value;
2036 rrhs[ruleno] = itemno;
2037 ruleprec = p->ruleprec;
2038
2039 p = p->next;
2040 while (p && p->sym)
2041 {
2042 ritem[itemno++] = p->sym->value;
2043 /* A rule gets by default the precedence and associativity
2044 of the last token in it. */
2045 if (p->sym->class == STOKEN)
2046 {
2047 rprec[ruleno] = p->sym->prec;
2048 rassoc[ruleno] = p->sym->assoc;
2049 }
2050 if (p) p = p->next;
2051 }
2052
2053 /* If this rule has a %prec,
2054 the specified symbol's precedence replaces the default. */
2055 if (ruleprec)
2056 {
2057 rprec[ruleno] = ruleprec->prec;
2058 rassoc[ruleno] = ruleprec->assoc;
2059 rprecsym[ruleno] = ruleprec->value;
2060 }
2061
2062 ritem[itemno++] = -ruleno;
2063 ruleno++;
2064
2065 if (p) p = p->next;
2066 }
2067
2068 ritem[itemno] = 0;
2069}
2070\f
2071/* Read a signed integer from STREAM and return its value. */
2072
2073int
118fb205 2074read_signed_integer (FILE *stream)
1ff442ca
NF
2075{
2076 register int c = getc(stream);
2077 register int sign = 1;
2078 register int n;
2079
2080 if (c == '-')
2081 {
2082 c = getc(stream);
2083 sign = -1;
2084 }
2085 n = 0;
2086 while (isdigit(c))
2087 {
2088 n = 10*n + (c - '0');
2089 c = getc(stream);
2090 }
2091
2092 ungetc(c, stream);
2093
2094 return n * sign;
2095}