]> git.saurik.com Git - bison.git/blame - src/reader.c
Don't define PACKAGE here, since config.h defines it.
[bison.git] / src / reader.c
CommitLineData
1ff442ca 1/* Input parser for bison
74a53b4b 2 Copyright (C) 1984, 1986, 1989, 1992, 1998 Free Software Foundation, Inc.
1ff442ca
NF
3
4This file is part of Bison, the GNU Compiler Compiler.
5
6Bison is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
8the Free Software Foundation; either version 2, or (at your option)
9any later version.
10
11Bison is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with Bison; see the file COPYING. If not, write to
18the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
19
20
21/* read in the grammar specification and record it in the format described in gram.h.
22 All guards are copied into the fguard file and all actions into faction,
23 in each case forming the body of a C function (yyguard or yyaction)
24 which contains a switch statement to decide which guard or action to execute.
25
26The entry point is reader(). */
27
28#include <stdio.h>
1ff442ca
NF
29#include "system.h"
30#include "files.h"
7612000c 31#include "alloc.h"
1ff442ca
NF
32#include "symtab.h"
33#include "lex.h"
34#include "gram.h"
35#include "machine.h"
36
37#define LTYPESTR "\n#ifndef YYLTYPE\ntypedef\n struct yyltype\n\
38 {\n int timestamp;\n int first_line;\n int first_column;\
39\n int last_line;\n int last_column;\n char *text;\n }\n\
40 yyltype;\n\n#define YYLTYPE yyltype\n#endif\n\n"
41
42/* Number of slots allocated (but not necessarily used yet) in `rline' */
43int rline_allocated;
44
45extern char *program_name;
46extern int definesflag;
47extern int nolinesflag;
943819bf
RS
48extern int noparserflag;
49extern int rawtoknumflag;
1ff442ca
NF
50extern bucket *symval;
51extern int numval;
1ff442ca
NF
52extern int expected_conflicts;
53extern char *token_buffer;
118fb205
JT
54extern int maxtoken;
55
56extern void init_lex PARAMS((void));
57extern char *grow_token_buffer PARAMS((char *));
58extern void tabinit PARAMS((void));
59extern void output_headers PARAMS((void));
60extern void output_trailers PARAMS((void));
61extern void free_symtab PARAMS((void));
62extern void open_extra_files PARAMS((void));
63extern char *int_to_string PARAMS((int));
64extern char *printable_version PARAMS((int));
65extern void fatal PARAMS((char *));
66extern void fatals PARAMS((char *, char *));
67extern void warn PARAMS((char *));
68extern void warni PARAMS((char *, int));
69extern void warns PARAMS((char *, char *));
70extern void warnss PARAMS((char *, char *, char *));
71extern void warnsss PARAMS((char *, char *, char *, char *));
72extern void unlex PARAMS((int));
73extern void done PARAMS((int));
74
75extern int skip_white_space PARAMS((void));
76extern int parse_percent_token PARAMS((void));
77extern int lex PARAMS((void));
1ff442ca
NF
78
79typedef
80 struct symbol_list
81 {
82 struct symbol_list *next;
83 bucket *sym;
84 bucket *ruleprec;
85 }
86 symbol_list;
87
88
118fb205
JT
89void reader PARAMS((void));
90void reader_output_yylsp PARAMS((FILE *));
91void read_declarations PARAMS((void));
92void copy_definition PARAMS((void));
93void parse_token_decl PARAMS((int, int));
94void parse_start_decl PARAMS((void));
95void parse_type_decl PARAMS((void));
96void parse_assoc_decl PARAMS((int));
97void parse_union_decl PARAMS((void));
98void parse_expect_decl PARAMS((void));
99char *get_type_name PARAMS((int, symbol_list *));
100void copy_guard PARAMS((symbol_list *, int));
101void parse_thong_decl PARAMS((void));
102void copy_action PARAMS((symbol_list *, int));
103bucket *gensym PARAMS((void));
104void readgram PARAMS((void));
105void record_rule_line PARAMS((void));
106void packsymbols PARAMS((void));
107void output_token_defines PARAMS((FILE *));
108void packgram PARAMS((void));
109int read_signed_integer PARAMS((FILE *));
110static int get_type PARAMS((void));
111
1ff442ca
NF
112
113int lineno;
114symbol_list *grammar;
115int start_flag;
116bucket *startval;
117char **tags;
943819bf 118int *user_toknums;
1ff442ca
NF
119
120/* Nonzero if components of semantic values are used, implying
121 they must be unions. */
122static int value_components_used;
123
124static int typed; /* nonzero if %union has been seen. */
125
126static int lastprec; /* incremented for each %left, %right or %nonassoc seen */
127
128static int gensym_count; /* incremented for each generated symbol */
129
130static bucket *errtoken;
5b2e3c89 131static bucket *undeftoken;
1ff442ca
NF
132
133/* Nonzero if any action or guard uses the @n construct. */
134static int yylsp_needed;
135
136extern char *version_string;
137
943819bf
RS
138
139static void
118fb205 140skip_to_char (int target)
943819bf
RS
141{
142 int c;
143 if (target == '\n')
a083fbbf 144 warn(_(" Skipping to next \\n"));
943819bf 145 else
a083fbbf 146 warni(_(" Skipping to next %c"), target);
943819bf
RS
147
148 do
149 c = skip_white_space();
150 while (c != target && c != EOF);
a083fbbf 151 if (c != EOF)
943819bf
RS
152 ungetc(c, finput);
153}
154
155
1ff442ca 156void
118fb205 157reader (void)
1ff442ca
NF
158{
159 start_flag = 0;
160 startval = NULL; /* start symbol not specified yet. */
161
162#if 0
163 translations = 0; /* initially assume token number translation not needed. */
164#endif
165 /* Nowadays translations is always set to 1,
166 since we give `error' a user-token-number
167 to satisfy the Posix demand for YYERRCODE==256. */
168 translations = 1;
169
170 nsyms = 1;
171 nvars = 0;
172 nrules = 0;
173 nitems = 0;
174 rline_allocated = 10;
175 rline = NEW2(rline_allocated, short);
176
177 typed = 0;
178 lastprec = 0;
179
180 gensym_count = 0;
181
182 semantic_parser = 0;
183 pure_parser = 0;
184 yylsp_needed = 0;
185
186 grammar = NULL;
187
188 init_lex();
189 lineno = 1;
190
191 /* initialize the symbol table. */
192 tabinit();
193 /* construct the error token */
194 errtoken = getsym("error");
195 errtoken->class = STOKEN;
196 errtoken->user_token_number = 256; /* Value specified by posix. */
197 /* construct a token that represents all undefined literal tokens. */
198 /* it is always token number 2. */
5b2e3c89
JT
199 undeftoken = getsym("$undefined.");
200 undeftoken->class = STOKEN;
201 undeftoken->user_token_number = 2;
1ff442ca
NF
202 /* Read the declaration section. Copy %{ ... %} groups to ftable and fdefines file.
203 Also notice any %token, %left, etc. found there. */
a083fbbf 204 if (noparserflag)
943819bf
RS
205 fprintf(ftable, "\n/* Bison-generated parse tables, made from %s\n",
206 infile);
207 else
208 fprintf(ftable, "\n/* A Bison parser, made from %s\n", infile);
209 fprintf(ftable, " by %s */\n\n", version_string);
1ff442ca
NF
210 fprintf(ftable, "#define YYBISON 1 /* Identify Bison output. */\n\n");
211 read_declarations();
1ff442ca
NF
212 /* start writing the guard and action files, if they are needed. */
213 output_headers();
214 /* read in the grammar, build grammar in list form. write out guards and actions. */
215 readgram();
216 /* Now we know whether we need the line-number stack.
217 If we do, write its type into the .tab.h file. */
943819bf
RS
218 if (fdefines)
219 reader_output_yylsp(fdefines);
1ff442ca
NF
220 /* write closing delimiters for actions and guards. */
221 output_trailers();
222 if (yylsp_needed)
223 fprintf(ftable, "#define YYLSP_NEEDED\n\n");
224 /* assign the symbols their symbol numbers.
225 Write #defines for the token symbols into fdefines if requested. */
226 packsymbols();
227 /* convert the grammar into the format described in gram.h. */
228 packgram();
229 /* free the symbol table data structure
230 since symbols are now all referred to by symbol number. */
231 free_symtab();
232}
233
943819bf 234void
118fb205 235reader_output_yylsp (FILE *f)
943819bf
RS
236{
237 if (yylsp_needed)
238 fprintf(f, LTYPESTR);
239}
1ff442ca
NF
240
241/* read from finput until %% is seen. Discard the %%.
242Handle any % declarations,
243and copy the contents of any %{ ... %} groups to fattrs. */
244
245void
118fb205 246read_declarations (void)
1ff442ca
NF
247{
248 register int c;
249 register int tok;
250
251 for (;;)
252 {
253 c = skip_white_space();
254
255 if (c == '%')
256 {
257 tok = parse_percent_token();
258
259 switch (tok)
260 {
261 case TWO_PERCENTS:
262 return;
263
264 case PERCENT_LEFT_CURLY:
265 copy_definition();
266 break;
267
268 case TOKEN:
269 parse_token_decl (STOKEN, SNTERM);
270 break;
a083fbbf 271
1ff442ca
NF
272 case NTERM:
273 parse_token_decl (SNTERM, STOKEN);
274 break;
a083fbbf 275
1ff442ca
NF
276 case TYPE:
277 parse_type_decl();
278 break;
a083fbbf 279
1ff442ca
NF
280 case START:
281 parse_start_decl();
282 break;
a083fbbf 283
1ff442ca
NF
284 case UNION:
285 parse_union_decl();
286 break;
a083fbbf 287
1ff442ca
NF
288 case EXPECT:
289 parse_expect_decl();
290 break;
943819bf
RS
291 case THONG:
292 parse_thong_decl();
293 break;
1ff442ca
NF
294 case LEFT:
295 parse_assoc_decl(LEFT_ASSOC);
296 break;
297
298 case RIGHT:
299 parse_assoc_decl(RIGHT_ASSOC);
300 break;
301
302 case NONASSOC:
303 parse_assoc_decl(NON_ASSOC);
304 break;
305
306 case SEMANTIC_PARSER:
307 if (semantic_parser == 0)
308 {
309 semantic_parser = 1;
310 open_extra_files();
311 }
312 break;
313
314 case PURE_PARSER:
315 pure_parser = 1;
316 break;
317
943819bf
RS
318 case NOOP:
319 break;
320
1ff442ca 321 default:
a083fbbf 322 warns(_("unrecognized: %s"), token_buffer);
943819bf
RS
323 skip_to_char('%');
324 }
1ff442ca
NF
325 }
326 else if (c == EOF)
a083fbbf 327 fatal(_("no input grammar"));
1ff442ca 328 else
943819bf
RS
329 {
330 char buff[100];
a083fbbf 331 sprintf(buff, _("unknown character: %s"), printable_version(c));
943819bf
RS
332 warn(buff);
333 skip_to_char('%');
334 }
1ff442ca
NF
335 }
336}
337
338
339/* copy the contents of a %{ ... %} into the definitions file.
340The %{ has already been read. Return after reading the %}. */
341
342void
118fb205 343copy_definition (void)
1ff442ca
NF
344{
345 register int c;
346 register int match;
347 register int ended;
348 register int after_percent; /* -1 while reading a character if prev char was % */
349 int cplus_comment;
350
351 if (!nolinesflag)
352 fprintf(fattrs, "#line %d \"%s\"\n", lineno, infile);
353
354 after_percent = 0;
355
356 c = getc(finput);
357
358 for (;;)
359 {
360 switch (c)
361 {
362 case '\n':
363 putc(c, fattrs);
364 lineno++;
365 break;
366
367 case '%':
368 after_percent = -1;
369 break;
a083fbbf 370
1ff442ca
NF
371 case '\'':
372 case '"':
373 match = c;
374 putc(c, fattrs);
375 c = getc(finput);
376
377 while (c != match)
378 {
943819bf 379 if (c == EOF)
a083fbbf 380 fatal(_("unterminated string at end of file"));
943819bf
RS
381 if (c == '\n')
382 {
a083fbbf 383 warn(_("unterminated string"));
943819bf
RS
384 ungetc(c, finput);
385 c = match;
386 continue;
387 }
1ff442ca
NF
388
389 putc(c, fattrs);
a083fbbf 390
1ff442ca
NF
391 if (c == '\\')
392 {
393 c = getc(finput);
394 if (c == EOF)
a083fbbf 395 fatal(_("unterminated string at end of file"));
1ff442ca
NF
396 putc(c, fattrs);
397 if (c == '\n')
398 lineno++;
399 }
400
401 c = getc(finput);
402 }
403
404 putc(c, fattrs);
405 break;
406
407 case '/':
408 putc(c, fattrs);
409 c = getc(finput);
410 if (c != '*' && c != '/')
411 continue;
412
413 cplus_comment = (c == '/');
414 putc(c, fattrs);
415 c = getc(finput);
416
417 ended = 0;
418 while (!ended)
419 {
420 if (!cplus_comment && c == '*')
421 {
422 while (c == '*')
423 {
424 putc(c, fattrs);
425 c = getc(finput);
426 }
427
428 if (c == '/')
429 {
430 putc(c, fattrs);
431 ended = 1;
432 }
433 }
434 else if (c == '\n')
435 {
436 lineno++;
437 putc(c, fattrs);
438 if (cplus_comment)
439 ended = 1;
440 else
441 c = getc(finput);
442 }
443 else if (c == EOF)
a083fbbf 444 fatal(_("unterminated comment in `%{' definition"));
1ff442ca
NF
445 else
446 {
447 putc(c, fattrs);
448 c = getc(finput);
449 }
450 }
451
452 break;
453
454 case EOF:
a083fbbf 455 fatal(_("unterminated `%{' definition"));
1ff442ca
NF
456
457 default:
458 putc(c, fattrs);
459 }
460
461 c = getc(finput);
462
463 if (after_percent)
464 {
465 if (c == '}')
466 return;
467 putc('%', fattrs);
468 }
469 after_percent = 0;
470
471 }
472
473}
474
475
476
477/* parse what comes after %token or %nterm.
478For %token, what_is is STOKEN and what_is_not is SNTERM.
479For %nterm, the arguments are reversed. */
480
481void
118fb205 482parse_token_decl (int what_is, int what_is_not)
1ff442ca 483{
1ff442ca 484 register int token = 0;
1ff442ca 485 register char *typename = 0;
943819bf 486 register struct bucket *symbol = NULL; /* pts to symbol being defined */
1ff442ca
NF
487 int k;
488
1ff442ca
NF
489 for (;;)
490 {
e6011337
JT
491 int tmp_char = ungetc (skip_white_space (), finput);
492
493 if (tmp_char == '%')
1ff442ca 494 return;
e6011337
JT
495 if (tmp_char == EOF)
496 fatals ("Premature EOF after %s", token_buffer);
497
1ff442ca
NF
498 token = lex();
499 if (token == COMMA)
943819bf
RS
500 {
501 symbol = NULL;
502 continue;
503 }
1ff442ca
NF
504 if (token == TYPENAME)
505 {
506 k = strlen(token_buffer);
507 typename = NEW2(k + 1, char);
508 strcpy(typename, token_buffer);
509 value_components_used = 1;
943819bf
RS
510 symbol = NULL;
511 }
512 else if (token == IDENTIFIER && *symval->tag == '\"'
a083fbbf 513 && symbol)
943819bf
RS
514 {
515 translations = 1;
516 symval->class = STOKEN;
517 symval->type_name = typename;
518 symval->user_token_number = symbol->user_token_number;
519 symbol->user_token_number = SALIAS;
520
a083fbbf
RS
521 symval->alias = symbol;
522 symbol->alias = symval;
943819bf
RS
523 symbol = NULL;
524
525 nsyms--; /* symbol and symval combined are only one symbol */
1ff442ca
NF
526 }
527 else if (token == IDENTIFIER)
528 {
529 int oldclass = symval->class;
943819bf 530 symbol = symval;
1ff442ca 531
943819bf 532 if (symbol->class == what_is_not)
a083fbbf 533 warns(_("symbol %s redefined"), symbol->tag);
943819bf 534 symbol->class = what_is;
1ff442ca 535 if (what_is == SNTERM && oldclass != SNTERM)
943819bf 536 symbol->value = nvars++;
1ff442ca
NF
537
538 if (typename)
539 {
943819bf
RS
540 if (symbol->type_name == NULL)
541 symbol->type_name = typename;
542 else if (strcmp(typename, symbol->type_name) != 0)
a083fbbf 543 warns(_("type redeclaration for %s"), symbol->tag);
1ff442ca
NF
544 }
545 }
943819bf 546 else if (symbol && token == NUMBER)
1ff442ca 547 {
943819bf 548 symbol->user_token_number = numval;
1ff442ca
NF
549 translations = 1;
550 }
551 else
943819bf 552 {
a083fbbf
RS
553 warnss(_("`%s' is invalid in %s"),
554 token_buffer,
943819bf
RS
555 (what_is == STOKEN) ? "%token" : "%nterm");
556 skip_to_char('%');
557 }
1ff442ca
NF
558 }
559
560}
561
a083fbbf 562/* parse what comes after %thong
943819bf
RS
563 the full syntax is
564 %thong <type> token number literal
565 the <type> or number may be omitted. The number specifies the
566 user_token_number.
567
568 Two symbols are entered in the table, one for the token symbol and
569 one for the literal. Both are given the <type>, if any, from the declaration.
570 The ->user_token_number of the first is SALIAS and the ->user_token_number
571 of the second is set to the number, if any, from the declaration.
572 The two symbols are linked via pointers in their ->alias fields.
a083fbbf 573
943819bf
RS
574 during output_defines_table, the symbol is reported
575 thereafter, only the literal string is retained
576 it is the literal string that is output to yytname
577*/
578
579void
118fb205 580parse_thong_decl (void)
943819bf
RS
581{
582 register int token;
583 register struct bucket *symbol;
584 register char *typename = 0;
585 int k, usrtoknum;
586
587 translations = 1;
588 token = lex(); /* fetch typename or first token */
589 if (token == TYPENAME) {
590 k = strlen(token_buffer);
591 typename = NEW2(k + 1, char);
592 strcpy(typename, token_buffer);
593 value_components_used = 1;
594 token = lex(); /* fetch first token */
595 }
596
597 /* process first token */
598
a083fbbf 599 if (token != IDENTIFIER)
943819bf 600 {
a083fbbf 601 warns(_("unrecognized item %s, expected an identifier"),
943819bf
RS
602 token_buffer);
603 skip_to_char('%');
604 return;
605 }
606 symval->class = STOKEN;
607 symval->type_name = typename;
608 symval->user_token_number = SALIAS;
609 symbol = symval;
610
611 token = lex(); /* get number or literal string */
a083fbbf 612
943819bf
RS
613 if (token == NUMBER) {
614 usrtoknum = numval;
615 token = lex(); /* okay, did number, now get literal */
616 }
617 else usrtoknum = 0;
618
619 /* process literal string token */
620
a083fbbf 621 if (token != IDENTIFIER || *symval->tag != '\"')
943819bf 622 {
a083fbbf 623 warns(_("expected string constant instead of %s"),
943819bf
RS
624 token_buffer);
625 skip_to_char('%');
626 return;
627 }
628 symval->class = STOKEN;
629 symval->type_name = typename;
630 symval->user_token_number = usrtoknum;
631
a083fbbf
RS
632 symval->alias = symbol;
633 symbol->alias = symval;
943819bf
RS
634
635 nsyms--; /* symbol and symval combined are only one symbol */
636}
1ff442ca
NF
637
638
639/* parse what comes after %start */
640
641void
118fb205 642parse_start_decl (void)
1ff442ca
NF
643{
644 if (start_flag)
a083fbbf 645 warn(_("multiple %start declarations"));
1ff442ca 646 if (lex() != IDENTIFIER)
a083fbbf 647 warn(_("invalid %start declaration"));
943819bf
RS
648 else
649 {
650 start_flag = 1;
651 startval = symval;
652 }
1ff442ca
NF
653}
654
655
656
657/* read in a %type declaration and record its information for get_type_name to access */
658
659void
118fb205 660parse_type_decl (void)
1ff442ca
NF
661{
662 register int k;
663 register char *name;
1ff442ca
NF
664
665 if (lex() != TYPENAME)
943819bf 666 {
a083fbbf 667 warn(_("%type declaration has no <typename>"));
943819bf
RS
668 skip_to_char('%');
669 return;
670 }
1ff442ca
NF
671
672 k = strlen(token_buffer);
673 name = NEW2(k + 1, char);
674 strcpy(name, token_buffer);
675
1ff442ca
NF
676 for (;;)
677 {
678 register int t;
e6011337 679 int tmp_char = ungetc (skip_white_space (), finput);
1ff442ca 680
e6011337 681 if (tmp_char == '%')
1ff442ca 682 return;
e6011337
JT
683 if (tmp_char == EOF)
684 fatals ("Premature EOF after %s", token_buffer);
1ff442ca 685
1ff442ca
NF
686 t = lex();
687
688 switch (t)
689 {
690
691 case COMMA:
692 case SEMICOLON:
693 break;
694
695 case IDENTIFIER:
696 if (symval->type_name == NULL)
697 symval->type_name = name;
943819bf 698 else if (strcmp(name, symval->type_name) != 0)
a083fbbf 699 warns(_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
700
701 break;
702
703 default:
a083fbbf 704 warns(_("invalid %%type declaration due to item: `%s'"), token_buffer);
943819bf 705 skip_to_char('%');
1ff442ca
NF
706 }
707 }
708}
709
710
711
712/* read in a %left, %right or %nonassoc declaration and record its information. */
713/* assoc is either LEFT_ASSOC, RIGHT_ASSOC or NON_ASSOC. */
714
715void
118fb205 716parse_assoc_decl (int assoc)
1ff442ca
NF
717{
718 register int k;
719 register char *name = NULL;
943819bf 720 register int prev = 0;
1ff442ca
NF
721
722 lastprec++; /* Assign a new precedence level, never 0. */
723
1ff442ca
NF
724 for (;;)
725 {
726 register int t;
e6011337 727 int tmp_char = ungetc (skip_white_space (), finput);
1ff442ca 728
e6011337 729 if (tmp_char == '%')
1ff442ca 730 return;
e6011337
JT
731 if (tmp_char == EOF)
732 fatals ("Premature EOF after %s", token_buffer);
1ff442ca 733
1ff442ca
NF
734 t = lex();
735
736 switch (t)
737 {
738
739 case TYPENAME:
740 k = strlen(token_buffer);
741 name = NEW2(k + 1, char);
742 strcpy(name, token_buffer);
743 break;
744
745 case COMMA:
746 break;
747
748 case IDENTIFIER:
749 if (symval->prec != 0)
a083fbbf 750 warns(_("redefining precedence of %s"), symval->tag);
1ff442ca
NF
751 symval->prec = lastprec;
752 symval->assoc = assoc;
753 if (symval->class == SNTERM)
a083fbbf 754 warns(_("symbol %s redefined"), symval->tag);
1ff442ca
NF
755 symval->class = STOKEN;
756 if (name)
757 { /* record the type, if one is specified */
758 if (symval->type_name == NULL)
759 symval->type_name = name;
943819bf 760 else if (strcmp(name, symval->type_name) != 0)
a083fbbf 761 warns(_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
762 }
763 break;
764
765 case NUMBER:
766 if (prev == IDENTIFIER)
767 {
768 symval->user_token_number = numval;
769 translations = 1;
770 }
a083fbbf 771 else
943819bf 772 {
a083fbbf 773 warns(_("invalid text (%s) - number should be after identifier"),
943819bf
RS
774 token_buffer);
775 skip_to_char('%');
776 }
1ff442ca
NF
777 break;
778
779 case SEMICOLON:
780 return;
781
782 default:
a083fbbf 783 warns(_("unexpected item: %s"), token_buffer);
943819bf 784 skip_to_char('%');
1ff442ca
NF
785 }
786
787 prev = t;
788
789 }
790}
791
792
793
794/* copy the union declaration into fattrs (and fdefines),
795 where it is made into the
796 definition of YYSTYPE, the type of elements of the parser value stack. */
797
798void
118fb205 799parse_union_decl (void)
1ff442ca
NF
800{
801 register int c;
802 register int count;
803 register int in_comment;
804 int cplus_comment;
805
806 if (typed)
a083fbbf 807 warn(_("multiple %union declarations"));
1ff442ca
NF
808
809 typed = 1;
810
811 if (!nolinesflag)
812 fprintf(fattrs, "\n#line %d \"%s\"\n", lineno, infile);
813 else
814 fprintf(fattrs, "\n");
815
816 fprintf(fattrs, "typedef union");
817 if (fdefines)
818 fprintf(fdefines, "typedef union");
819
820 count = 0;
821 in_comment = 0;
822
823 c = getc(finput);
824
825 while (c != EOF)
826 {
827 putc(c, fattrs);
828 if (fdefines)
829 putc(c, fdefines);
830
831 switch (c)
832 {
833 case '\n':
834 lineno++;
835 break;
836
837 case '/':
838 c = getc(finput);
839 if (c != '*' && c != '/')
840 ungetc(c, finput);
841 else
842 {
843 putc(c, fattrs);
844 if (fdefines)
845 putc(c, fdefines);
846 cplus_comment = (c == '/');
847 in_comment = 1;
848 c = getc(finput);
849 while (in_comment)
850 {
851 putc(c, fattrs);
852 if (fdefines)
853 putc(c, fdefines);
854
855 if (c == '\n')
856 {
857 lineno++;
858 if (cplus_comment)
859 {
860 in_comment = 0;
861 break;
862 }
863 }
864 if (c == EOF)
a083fbbf 865 fatal(_("unterminated comment at end of file"));
1ff442ca
NF
866
867 if (!cplus_comment && c == '*')
868 {
869 c = getc(finput);
870 if (c == '/')
871 {
872 putc('/', fattrs);
873 if (fdefines)
874 putc('/', fdefines);
875 in_comment = 0;
876 }
877 }
878 else
879 c = getc(finput);
880 }
881 }
882 break;
883
884
885 case '{':
886 count++;
887 break;
888
889 case '}':
890 if (count == 0)
a083fbbf 891 warn (_("unmatched close-brace (`}')"));
1ff442ca 892 count--;
943819bf 893 if (count <= 0)
1ff442ca
NF
894 {
895 fprintf(fattrs, " YYSTYPE;\n");
896 if (fdefines)
897 fprintf(fdefines, " YYSTYPE;\n");
898 /* JF don't choke on trailing semi */
899 c=skip_white_space();
900 if(c!=';') ungetc(c,finput);
901 return;
902 }
903 }
904
905 c = getc(finput);
906 }
907}
908
909/* parse the declaration %expect N which says to expect N
910 shift-reduce conflicts. */
911
912void
118fb205 913parse_expect_decl (void)
1ff442ca
NF
914{
915 register int c;
916 register int count;
917 char buffer[20];
918
919 c = getc(finput);
920 while (c == ' ' || c == '\t')
921 c = getc(finput);
922
923 count = 0;
924 while (c >= '0' && c <= '9')
925 {
926 if (count < 20)
927 buffer[count++] = c;
928 c = getc(finput);
929 }
930 buffer[count] = 0;
931
932 ungetc (c, finput);
933
943819bf 934 if (count <= 0 || count > 10)
a083fbbf 935 warn(_("argument of %expect is not an integer"));
1ff442ca
NF
936 expected_conflicts = atoi (buffer);
937}
938
939/* that's all of parsing the declaration section */
940\f
941/* Get the data type (alternative in the union) of the value for symbol n in rule rule. */
942
943char *
118fb205 944get_type_name (int n, symbol_list *rule)
1ff442ca 945{
a083fbbf 946 static char *msg = N_("invalid $ value");
1ff442ca
NF
947
948 register int i;
949 register symbol_list *rp;
950
951 if (n < 0)
943819bf 952 {
a083fbbf 953 warn(_(msg));
943819bf
RS
954 return NULL;
955 }
1ff442ca
NF
956
957 rp = rule;
958 i = 0;
959
960 while (i < n)
961 {
962 rp = rp->next;
963 if (rp == NULL || rp->sym == NULL)
943819bf 964 {
a083fbbf 965 warn(_(msg));
943819bf
RS
966 return NULL;
967 }
1ff442ca
NF
968 i++;
969 }
970
971 return (rp->sym->type_name);
972}
973
974
1ff442ca
NF
975/* after %guard is seen in the input file,
976copy the actual guard into the guards file.
977If the guard is followed by an action, copy that into the actions file.
978stack_offset is the number of values in the current rule so far,
979which says where to find $0 with respect to the top of the stack,
980for the simple parser in which the stack is not popped until after the guard is run. */
981
982void
118fb205 983copy_guard (symbol_list *rule, int stack_offset)
1ff442ca
NF
984{
985 register int c;
986 register int n;
987 register int count;
988 register int match;
989 register int ended;
990 register char *type_name;
991 int brace_flag = 0;
992 int cplus_comment;
993
994 /* offset is always 0 if parser has already popped the stack pointer */
995 if (semantic_parser) stack_offset = 0;
996
997 fprintf(fguard, "\ncase %d:\n", nrules);
998 if (!nolinesflag)
999 fprintf(fguard, "#line %d \"%s\"\n", lineno, infile);
1000 putc('{', fguard);
1001
1002 count = 0;
1003 c = getc(finput);
1004
1005 while (brace_flag ? (count > 0) : (c != ';'))
1006 {
1007 switch (c)
1008 {
1009 case '\n':
1010 putc(c, fguard);
1011 lineno++;
1012 break;
1013
1014 case '{':
1015 putc(c, fguard);
1016 brace_flag = 1;
1017 count++;
1018 break;
1019
1020 case '}':
1021 putc(c, fguard);
1022 if (count > 0)
1023 count--;
a083fbbf 1024 else
943819bf 1025 {
a083fbbf 1026 warn(_("unmatched right brace (`}')"));
943819bf
RS
1027 c = getc(finput); /* skip it */
1028 }
1ff442ca
NF
1029 break;
1030
1031 case '\'':
1032 case '"':
1033 match = c;
1034 putc(c, fguard);
1035 c = getc(finput);
1036
1037 while (c != match)
1038 {
943819bf 1039 if (c == EOF)
a083fbbf
RS
1040 fatal(_("unterminated string at end of file"));
1041 if (c == '\n')
943819bf 1042 {
a083fbbf 1043 warn(_("unterminated string"));
943819bf
RS
1044 ungetc(c, finput);
1045 c = match; /* invent terminator */
1046 continue;
1047 }
1ff442ca
NF
1048
1049 putc(c, fguard);
a083fbbf 1050
1ff442ca
NF
1051 if (c == '\\')
1052 {
1053 c = getc(finput);
1054 if (c == EOF)
a083fbbf 1055 fatal(_("unterminated string"));
1ff442ca
NF
1056 putc(c, fguard);
1057 if (c == '\n')
1058 lineno++;
1059 }
1060
1061 c = getc(finput);
1062 }
1063
1064 putc(c, fguard);
1065 break;
1066
1067 case '/':
1068 putc(c, fguard);
1069 c = getc(finput);
1070 if (c != '*' && c != '/')
1071 continue;
1072
1073 cplus_comment = (c == '/');
1074 putc(c, fguard);
1075 c = getc(finput);
1076
1077 ended = 0;
1078 while (!ended)
1079 {
1080 if (!cplus_comment && c == '*')
1081 {
1082 while (c == '*')
1083 {
1084 putc(c, fguard);
1085 c = getc(finput);
1086 }
1087
1088 if (c == '/')
1089 {
1090 putc(c, fguard);
1091 ended = 1;
1092 }
1093 }
1094 else if (c == '\n')
1095 {
1096 lineno++;
1097 putc(c, fguard);
1098 if (cplus_comment)
1099 ended = 1;
1100 else
1101 c = getc(finput);
1102 }
1103 else if (c == EOF)
a083fbbf 1104 fatal(_("unterminated comment"));
1ff442ca
NF
1105 else
1106 {
1107 putc(c, fguard);
1108 c = getc(finput);
1109 }
1110 }
1111
1112 break;
1113
1114 case '$':
1115 c = getc(finput);
1116 type_name = NULL;
1117
1118 if (c == '<')
1119 {
1120 register char *cp = token_buffer;
1121
1122 while ((c = getc(finput)) != '>' && c > 0)
118fb205
JT
1123 {
1124 if (cp == token_buffer + maxtoken)
1125 cp = grow_token_buffer(cp);
1126
1127 *cp++ = c;
1128 }
1ff442ca
NF
1129 *cp = 0;
1130 type_name = token_buffer;
1131
1132 c = getc(finput);
1133 }
1134
1135 if (c == '$')
1136 {
1137 fprintf(fguard, "yyval");
1138 if (!type_name) type_name = rule->sym->type_name;
1139 if (type_name)
1140 fprintf(fguard, ".%s", type_name);
943819bf 1141 if(!type_name && typed)
a083fbbf 1142 warns(_("$$ of `%s' has no declared type"), rule->sym->tag);
1ff442ca
NF
1143 }
1144
1145 else if (isdigit(c) || c == '-')
1146 {
1147 ungetc (c, finput);
1148 n = read_signed_integer(finput);
1149 c = getc(finput);
1150
1151 if (!type_name && n > 0)
1152 type_name = get_type_name(n, rule);
1153
1154 fprintf(fguard, "yyvsp[%d]", n - stack_offset);
1155 if (type_name)
1156 fprintf(fguard, ".%s", type_name);
943819bf 1157 if(!type_name && typed)
a083fbbf 1158 warnss(_("$%s of `%s' has no declared type"), int_to_string(n), rule->sym->tag);
1ff442ca
NF
1159 continue;
1160 }
1161 else
aba5ca6d 1162 warns(_("$%s is invalid"), printable_version(c));
1ff442ca
NF
1163
1164 break;
1165
1166 case '@':
1167 c = getc(finput);
1168 if (isdigit(c) || c == '-')
1169 {
1170 ungetc (c, finput);
1171 n = read_signed_integer(finput);
1172 c = getc(finput);
1173 }
1174 else
943819bf 1175 {
aba5ca6d 1176 warns(_("@%s is invalid"), printable_version(c));
943819bf
RS
1177 n = 1;
1178 }
1ff442ca
NF
1179
1180 fprintf(fguard, "yylsp[%d]", n - stack_offset);
1181 yylsp_needed = 1;
1182
1183 continue;
1184
1185 case EOF:
a083fbbf 1186 fatal(_("unterminated %%guard clause"));
1ff442ca
NF
1187
1188 default:
1189 putc(c, fguard);
1190 }
1191
1192 if (c != '}' || count != 0)
1193 c = getc(finput);
1194 }
1195
1196 c = skip_white_space();
1197
1198 fprintf(fguard, ";\n break;}");
1199 if (c == '{')
1200 copy_action(rule, stack_offset);
1201 else if (c == '=')
1202 {
943819bf 1203 c = getc(finput); /* why not skip_white_space -wjh */
1ff442ca
NF
1204 if (c == '{')
1205 copy_action(rule, stack_offset);
1206 }
1207 else
1208 ungetc(c, finput);
1209}
1210
1211
1212
1213/* Assuming that a { has just been seen, copy everything up to the matching }
1214into the actions file.
1215stack_offset is the number of values in the current rule so far,
1216which says where to find $0 with respect to the top of the stack. */
1217
1218void
118fb205 1219copy_action (symbol_list *rule, int stack_offset)
1ff442ca
NF
1220{
1221 register int c;
1222 register int n;
1223 register int count;
1224 register int match;
1225 register int ended;
1226 register char *type_name;
1227 int cplus_comment;
1228
1229 /* offset is always 0 if parser has already popped the stack pointer */
1230 if (semantic_parser) stack_offset = 0;
1231
1232 fprintf(faction, "\ncase %d:\n", nrules);
1233 if (!nolinesflag)
1234 fprintf(faction, "#line %d \"%s\"\n", lineno, infile);
1235 putc('{', faction);
1236
1237 count = 1;
1238 c = getc(finput);
1239
1240 while (count > 0)
1241 {
1242 while (c != '}')
1243 {
1244 switch (c)
1245 {
1246 case '\n':
1247 putc(c, faction);
1248 lineno++;
1249 break;
1250
1251 case '{':
1252 putc(c, faction);
1253 count++;
1254 break;
1255
1256 case '\'':
1257 case '"':
1258 match = c;
1259 putc(c, faction);
1260 c = getc(finput);
1261
1262 while (c != match)
1263 {
943819bf
RS
1264 if (c == '\n')
1265 {
a083fbbf 1266 warn(_("unterminated string"));
943819bf
RS
1267 ungetc(c, finput);
1268 c = match;
1269 continue;
1270 }
1271 else if (c == EOF)
a083fbbf 1272 fatal(_("unterminated string at end of file"));
1ff442ca
NF
1273
1274 putc(c, faction);
1275
1276 if (c == '\\')
1277 {
1278 c = getc(finput);
1279 if (c == EOF)
a083fbbf 1280 fatal(_("unterminated string"));
1ff442ca
NF
1281 putc(c, faction);
1282 if (c == '\n')
1283 lineno++;
1284 }
1285
1286 c = getc(finput);
1287 }
1288
1289 putc(c, faction);
1290 break;
1291
1292 case '/':
1293 putc(c, faction);
1294 c = getc(finput);
1295 if (c != '*' && c != '/')
1296 continue;
1297
1298 cplus_comment = (c == '/');
1299 putc(c, faction);
1300 c = getc(finput);
1301
1302 ended = 0;
1303 while (!ended)
1304 {
1305 if (!cplus_comment && c == '*')
1306 {
1307 while (c == '*')
1308 {
1309 putc(c, faction);
1310 c = getc(finput);
1311 }
1312
1313 if (c == '/')
1314 {
1315 putc(c, faction);
1316 ended = 1;
1317 }
1318 }
1319 else if (c == '\n')
1320 {
1321 lineno++;
1322 putc(c, faction);
1323 if (cplus_comment)
1324 ended = 1;
1325 else
1326 c = getc(finput);
1327 }
1328 else if (c == EOF)
a083fbbf 1329 fatal(_("unterminated comment"));
1ff442ca
NF
1330 else
1331 {
1332 putc(c, faction);
1333 c = getc(finput);
1334 }
1335 }
1336
1337 break;
1338
1339 case '$':
1340 c = getc(finput);
1341 type_name = NULL;
1342
1343 if (c == '<')
1344 {
1345 register char *cp = token_buffer;
1346
1347 while ((c = getc(finput)) != '>' && c > 0)
118fb205
JT
1348 {
1349 if (cp == token_buffer + maxtoken)
1350 cp = grow_token_buffer(cp);
1351
1352 *cp++ = c;
1353 }
1ff442ca
NF
1354 *cp = 0;
1355 type_name = token_buffer;
1356 value_components_used = 1;
1357
1358 c = getc(finput);
1359 }
1360 if (c == '$')
1361 {
1362 fprintf(faction, "yyval");
1363 if (!type_name) type_name = get_type_name(0, rule);
1364 if (type_name)
1365 fprintf(faction, ".%s", type_name);
a083fbbf
RS
1366 if(!type_name && typed)
1367 warns(_("$$ of `%s' has no declared type"), rule->sym->tag);
1ff442ca
NF
1368 }
1369 else if (isdigit(c) || c == '-')
1370 {
1371 ungetc (c, finput);
1372 n = read_signed_integer(finput);
1373 c = getc(finput);
1374
1375 if (!type_name && n > 0)
1376 type_name = get_type_name(n, rule);
1377
1378 fprintf(faction, "yyvsp[%d]", n - stack_offset);
1379 if (type_name)
1380 fprintf(faction, ".%s", type_name);
a083fbbf
RS
1381 if(!type_name && typed)
1382 warnss(_("$%s of `%s' has no declared type"),
943819bf 1383 int_to_string(n), rule->sym->tag);
1ff442ca
NF
1384 continue;
1385 }
1386 else
aba5ca6d 1387 warns(_("$%s is invalid"), printable_version(c));
1ff442ca
NF
1388
1389 break;
1390
1391 case '@':
1392 c = getc(finput);
1393 if (isdigit(c) || c == '-')
1394 {
1395 ungetc (c, finput);
1396 n = read_signed_integer(finput);
1397 c = getc(finput);
1398 }
1399 else
943819bf 1400 {
a083fbbf 1401 warn(_("invalid @-construct"));
943819bf
RS
1402 n = 1;
1403 }
1ff442ca
NF
1404
1405 fprintf(faction, "yylsp[%d]", n - stack_offset);
1406 yylsp_needed = 1;
1407
1408 continue;
1409
1410 case EOF:
a083fbbf 1411 fatal(_("unmatched `{'"));
1ff442ca
NF
1412
1413 default:
1414 putc(c, faction);
1415 }
1416
1417 c = getc(finput);
1418 }
1419
1420 /* above loop exits when c is '}' */
1421
1422 if (--count)
1423 {
1424 putc(c, faction);
1425 c = getc(finput);
1426 }
1427 }
1428
1429 fprintf(faction, ";\n break;}");
1430}
1431
1432
1433
1434/* generate a dummy symbol, a nonterminal,
1435whose name cannot conflict with the user's names. */
1436
1437bucket *
118fb205 1438gensym (void)
1ff442ca
NF
1439{
1440 register bucket *sym;
1441
1442 sprintf (token_buffer, "@%d", ++gensym_count);
1443 sym = getsym(token_buffer);
1444 sym->class = SNTERM;
1445 sym->value = nvars++;
1446 return (sym);
1447}
1448
1449/* Parse the input grammar into a one symbol_list structure.
1450Each rule is represented by a sequence of symbols: the left hand side
1451followed by the contents of the right hand side, followed by a null pointer
1452instead of a symbol to terminate the rule.
1453The next symbol is the lhs of the following rule.
1454
1455All guards and actions are copied out to the appropriate files,
1456labelled by the rule number they apply to. */
1457
1458void
118fb205 1459readgram (void)
1ff442ca
NF
1460{
1461 register int t;
1462 register bucket *lhs;
1463 register symbol_list *p;
1464 register symbol_list *p1;
1465 register bucket *bp;
1466
1467 symbol_list *crule; /* points to first symbol_list of current rule. */
1468 /* its symbol is the lhs of the rule. */
1469 symbol_list *crule1; /* points to the symbol_list preceding crule. */
1470
1471 p1 = NULL;
1472
1473 t = lex();
1474
1475 while (t != TWO_PERCENTS && t != ENDFILE)
1476 {
1477 if (t == IDENTIFIER || t == BAR)
1478 {
1479 register int actionflag = 0;
1480 int rulelength = 0; /* number of symbols in rhs of this rule so far */
1481 int xactions = 0; /* JF for error checking */
1482 bucket *first_rhs = 0;
1483
1484 if (t == IDENTIFIER)
1485 {
1486 lhs = symval;
943819bf
RS
1487
1488 if (!start_flag)
1489 {
1490 startval = lhs;
1491 start_flag = 1;
1492 }
a083fbbf 1493
1ff442ca
NF
1494 t = lex();
1495 if (t != COLON)
943819bf 1496 {
a083fbbf 1497 warn(_("ill-formed rule: initial symbol not followed by colon"));
943819bf
RS
1498 unlex(t);
1499 }
1ff442ca
NF
1500 }
1501
943819bf 1502 if (nrules == 0 && t == BAR)
1ff442ca 1503 {
a083fbbf 1504 warn(_("grammar starts with vertical bar"));
943819bf 1505 lhs = symval; /* BOGUS: use a random symval */
1ff442ca 1506 }
1ff442ca
NF
1507 /* start a new rule and record its lhs. */
1508
1509 nrules++;
1510 nitems++;
1511
1512 record_rule_line ();
1513
1514 p = NEW(symbol_list);
1515 p->sym = lhs;
1516
1517 crule1 = p1;
1518 if (p1)
1519 p1->next = p;
1520 else
1521 grammar = p;
1522
1523 p1 = p;
1524 crule = p;
1525
1526 /* mark the rule's lhs as a nonterminal if not already so. */
1527
1528 if (lhs->class == SUNKNOWN)
1529 {
1530 lhs->class = SNTERM;
1531 lhs->value = nvars;
1532 nvars++;
1533 }
1534 else if (lhs->class == STOKEN)
a083fbbf 1535 warns(_("rule given for %s, which is a token"), lhs->tag);
1ff442ca
NF
1536
1537 /* read the rhs of the rule. */
1538
1539 for (;;)
1540 {
1541 t = lex();
943819bf
RS
1542 if (t == PREC)
1543 {
1544 t = lex();
1545 crule->ruleprec = symval;
1546 t = lex();
1547 }
1ff442ca
NF
1548
1549 if (! (t == IDENTIFIER || t == LEFT_CURLY)) break;
1550
1551 /* If next token is an identifier, see if a colon follows it.
1552 If one does, exit this rule now. */
1553 if (t == IDENTIFIER)
1554 {
1555 register bucket *ssave;
1556 register int t1;
1557
1558 ssave = symval;
1559 t1 = lex();
1560 unlex(t1);
1561 symval = ssave;
1562 if (t1 == COLON) break;
1563
1564 if(!first_rhs) /* JF */
1565 first_rhs = symval;
1566 /* Not followed by colon =>
1567 process as part of this rule's rhs. */
1568 }
1569
1570 /* If we just passed an action, that action was in the middle
1571 of a rule, so make a dummy rule to reduce it to a
1572 non-terminal. */
1573 if (actionflag)
1574 {
1575 register bucket *sdummy;
1576
1577 /* Since the action was written out with this rule's */
943819bf 1578 /* number, we must give the new rule this number */
1ff442ca
NF
1579 /* by inserting the new rule before it. */
1580
1581 /* Make a dummy nonterminal, a gensym. */
1582 sdummy = gensym();
1583
1584 /* Make a new rule, whose body is empty,
1585 before the current one, so that the action
1586 just read can belong to it. */
1587 nrules++;
1588 nitems++;
1589 record_rule_line ();
1590 p = NEW(symbol_list);
1591 if (crule1)
1592 crule1->next = p;
1593 else grammar = p;
1594 p->sym = sdummy;
1595 crule1 = NEW(symbol_list);
1596 p->next = crule1;
1597 crule1->next = crule;
1598
1599 /* insert the dummy generated by that rule into this rule. */
1600 nitems++;
1601 p = NEW(symbol_list);
1602 p->sym = sdummy;
1603 p1->next = p;
1604 p1 = p;
1605
1606 actionflag = 0;
1607 }
1608
1609 if (t == IDENTIFIER)
1610 {
1611 nitems++;
1612 p = NEW(symbol_list);
1613 p->sym = symval;
1614 p1->next = p;
1615 p1 = p;
1616 }
1617 else /* handle an action. */
1618 {
1619 copy_action(crule, rulelength);
1620 actionflag = 1;
1621 xactions++; /* JF */
1622 }
1623 rulelength++;
943819bf 1624 } /* end of read rhs of rule */
1ff442ca
NF
1625
1626 /* Put an empty link in the list to mark the end of this rule */
1627 p = NEW(symbol_list);
1628 p1->next = p;
1629 p1 = p;
1630
1631 if (t == PREC)
1632 {
a083fbbf 1633 warn(_("two @prec's in a row"));
1ff442ca
NF
1634 t = lex();
1635 crule->ruleprec = symval;
1636 t = lex();
1637 }
1638 if (t == GUARD)
1639 {
1640 if (! semantic_parser)
a083fbbf 1641 warn(_("%%guard present but %%semantic_parser not specified"));
1ff442ca
NF
1642
1643 copy_guard(crule, rulelength);
1644 t = lex();
1645 }
1646 else if (t == LEFT_CURLY)
1647 {
943819bf 1648 /* This case never occurs -wjh */
a083fbbf 1649 if (actionflag) warn(_("two actions at end of one rule"));
1ff442ca 1650 copy_action(crule, rulelength);
943819bf
RS
1651 actionflag = 1;
1652 xactions++; /* -wjh */
1ff442ca
NF
1653 t = lex();
1654 }
1655 /* If $$ is being set in default way,
1656 warn if any type mismatch. */
1657 else if (!xactions && first_rhs && lhs->type_name != first_rhs->type_name)
1658 {
1659 if (lhs->type_name == 0 || first_rhs->type_name == 0
1660 || strcmp(lhs->type_name,first_rhs->type_name))
a083fbbf 1661 warnss(_("type clash (`%s' `%s') on default action"),
1ff442ca
NF
1662 lhs->type_name ? lhs->type_name : "",
1663 first_rhs->type_name ? first_rhs->type_name : "");
1664 }
1665 /* Warn if there is no default for $$ but we need one. */
1666 else if (!xactions && !first_rhs && lhs->type_name != 0)
a083fbbf 1667 warn(_("empty rule for typed nonterminal, and no action"));
1ff442ca
NF
1668 if (t == SEMICOLON)
1669 t = lex();
a083fbbf 1670 }
943819bf
RS
1671#if 0
1672 /* these things can appear as alternatives to rules. */
1673/* NO, they cannot.
1674 a) none of the documentation allows them
1675 b) most of them scan forward until finding a next %
1676 thus they may swallow lots of intervening rules
1677*/
1ff442ca
NF
1678 else if (t == TOKEN)
1679 {
1680 parse_token_decl(STOKEN, SNTERM);
1681 t = lex();
1682 }
1683 else if (t == NTERM)
1684 {
1685 parse_token_decl(SNTERM, STOKEN);
1686 t = lex();
1687 }
1688 else if (t == TYPE)
1689 {
1690 t = get_type();
1691 }
1692 else if (t == UNION)
1693 {
1694 parse_union_decl();
1695 t = lex();
1696 }
1697 else if (t == EXPECT)
1698 {
1699 parse_expect_decl();
1700 t = lex();
1701 }
1702 else if (t == START)
1703 {
1704 parse_start_decl();
1705 t = lex();
1706 }
943819bf
RS
1707#endif
1708
1ff442ca 1709 else
943819bf 1710 {
a083fbbf 1711 warns(_("invalid input: %s"), token_buffer);
943819bf
RS
1712 t = lex();
1713 }
1ff442ca
NF
1714 }
1715
943819bf
RS
1716 /* grammar has been read. Do some checking */
1717
1ff442ca 1718 if (nsyms > MAXSHORT)
a083fbbf 1719 fatals(_("too many symbols (tokens plus nonterminals); maximum %s"),
943819bf 1720 int_to_string(MAXSHORT));
1ff442ca 1721 if (nrules == 0)
a083fbbf 1722 fatal(_("no rules in the input grammar"));
1ff442ca
NF
1723
1724 if (typed == 0 /* JF put out same default YYSTYPE as YACC does */
1725 && !value_components_used)
1726 {
1727 /* We used to use `unsigned long' as YYSTYPE on MSDOS,
1728 but it seems better to be consistent.
1729 Most programs should declare their own type anyway. */
1730 fprintf(fattrs, "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n");
1731 if (fdefines)
1732 fprintf(fdefines, "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n");
1733 }
1734
1735 /* Report any undefined symbols and consider them nonterminals. */
1736
1737 for (bp = firstsymbol; bp; bp = bp->next)
1738 if (bp->class == SUNKNOWN)
1739 {
a083fbbf 1740 warns(_("symbol %s is used, but is not defined as a token and has no rules"),
1ff442ca 1741 bp->tag);
1ff442ca
NF
1742 bp->class = SNTERM;
1743 bp->value = nvars++;
1744 }
1745
1746 ntokens = nsyms - nvars;
1747}
1748
1749
1750void
118fb205 1751record_rule_line (void)
1ff442ca
NF
1752{
1753 /* Record each rule's source line number in rline table. */
1754
1755 if (nrules >= rline_allocated)
1756 {
1757 rline_allocated = nrules * 2;
118fb205
JT
1758 rline = (short *) xrealloc ((char *) rline,
1759 rline_allocated * sizeof (short));
1ff442ca
NF
1760 }
1761 rline[nrules] = lineno;
1762}
1763
1764
1765/* read in a %type declaration and record its information for get_type_name to access */
943819bf
RS
1766/* this is unused. it is only called from the #if 0 part of readgram */
1767static int
118fb205 1768get_type (void)
1ff442ca
NF
1769{
1770 register int k;
1771 register int t;
1772 register char *name;
1773
1774 t = lex();
1775
a083fbbf 1776 if (t != TYPENAME)
943819bf 1777 {
a083fbbf 1778 warn(_("ill-formed %type declaration"));
943819bf
RS
1779 return t;
1780 }
1ff442ca
NF
1781
1782 k = strlen(token_buffer);
1783 name = NEW2(k + 1, char);
1784 strcpy(name, token_buffer);
1785
1786 for (;;)
1787 {
1788 t = lex();
1789
1790 switch (t)
1791 {
1792 case SEMICOLON:
1793 return (lex());
1794
1795 case COMMA:
1796 break;
1797
1798 case IDENTIFIER:
1799 if (symval->type_name == NULL)
1800 symval->type_name = name;
943819bf 1801 else if (strcmp(name, symval->type_name) != 0)
a083fbbf 1802 warns(_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
1803
1804 break;
1805
1806 default:
1807 return (t);
1808 }
1809 }
1810}
1811
1812
1813
1814/* assign symbol numbers, and write definition of token names into fdefines.
1815Set up vectors tags and sprec of names and precedences of symbols. */
1816
1817void
118fb205 1818packsymbols (void)
1ff442ca
NF
1819{
1820 register bucket *bp;
1821 register int tokno = 1;
1822 register int i;
1823 register int last_user_token_number;
1824
1825 /* int lossage = 0; JF set but not used */
1826
1827 tags = NEW2(nsyms + 1, char *);
1828 tags[0] = "$";
943819bf
RS
1829 user_toknums = NEW2(nsyms + 1, int);
1830 user_toknums[0] = 0;
1ff442ca
NF
1831
1832 sprec = NEW2(nsyms, short);
1833 sassoc = NEW2(nsyms, short);
1834
1835 max_user_token_number = 256;
1836 last_user_token_number = 256;
1837
1838 for (bp = firstsymbol; bp; bp = bp->next)
1839 {
1840 if (bp->class == SNTERM)
1841 {
1842 bp->value += ntokens;
1843 }
943819bf
RS
1844 else if (bp->alias)
1845 {
1846 /* this symbol and its alias are a single token defn.
1847 allocate a tokno, and assign to both
a083fbbf 1848 check agreement of ->prec and ->assoc fields
943819bf
RS
1849 and make both the same
1850 */
1851 if (bp->value == 0)
1852 bp->value = bp->alias->value = tokno++;
1853
1854 if (bp->prec != bp->alias->prec) {
1855 if (bp->prec != 0 && bp->alias->prec != 0
1856 && bp->user_token_number == SALIAS)
a083fbbf 1857 warnss(_("conflicting precedences for %s and %s"),
943819bf
RS
1858 bp->tag, bp->alias->tag);
1859 if (bp->prec != 0) bp->alias->prec = bp->prec;
1860 else bp->prec = bp->alias->prec;
1861 }
1862
1863 if (bp->assoc != bp->alias->assoc) {
1864 if (bp->assoc != 0 && bp->alias->assoc != 0
1865 && bp->user_token_number == SALIAS)
a083fbbf 1866 warnss(_("conflicting assoc values for %s and %s"),
943819bf
RS
1867 bp->tag, bp->alias->tag);
1868 if (bp->assoc != 0) bp->alias->assoc = bp->assoc;
1869 else bp->assoc = bp->alias->assoc;
1870 }
1871
1872 if (bp->user_token_number == SALIAS)
1873 continue; /* do not do processing below for SALIASs */
1874
1875 }
1876 else /* bp->class == STOKEN */
1877 {
1878 bp->value = tokno++;
1879 }
1880
1881 if (bp->class == STOKEN)
1ff442ca
NF
1882 {
1883 if (translations && !(bp->user_token_number))
1884 bp->user_token_number = ++last_user_token_number;
1885 if (bp->user_token_number > max_user_token_number)
1886 max_user_token_number = bp->user_token_number;
1ff442ca
NF
1887 }
1888
1889 tags[bp->value] = bp->tag;
943819bf 1890 user_toknums[bp->value] = bp->user_token_number;
1ff442ca
NF
1891 sprec[bp->value] = bp->prec;
1892 sassoc[bp->value] = bp->assoc;
1893
1894 }
1895
1896 if (translations)
1897 {
1898 register int i;
1899
1900 token_translations = NEW2(max_user_token_number+1, short);
1901
1902 /* initialize all entries for literal tokens to 2,
572909b5
RS
1903 the internal token number for $undefined.,
1904 which represents all invalid inputs. */
1ff442ca 1905 for (i = 0; i <= max_user_token_number; i++)
a083fbbf 1906 token_translations[i] = 2;
1ff442ca 1907
943819bf
RS
1908 for (bp = firstsymbol; bp; bp = bp->next)
1909 {
1910 if (bp->value >= ntokens) continue; /* non-terminal */
a083fbbf 1911 if (bp->user_token_number == SALIAS) continue;
943819bf 1912 if (token_translations[bp->user_token_number] != 2)
a083fbbf 1913 warnsss(_("tokens %s and %s both assigned number %s"),
1ff442ca
NF
1914 tags[token_translations[bp->user_token_number]],
1915 bp->tag,
943819bf
RS
1916 int_to_string(bp->user_token_number));
1917 token_translations[bp->user_token_number] = bp->value;
1918 }
1ff442ca
NF
1919 }
1920
1921 error_token_number = errtoken->value;
1922
943819bf
RS
1923 if (! noparserflag)
1924 output_token_defines(ftable);
1ff442ca
NF
1925
1926 if (startval->class == SUNKNOWN)
a083fbbf 1927 fatals(_("the start symbol %s is undefined"), startval->tag);
1ff442ca 1928 else if (startval->class == STOKEN)
a083fbbf 1929 fatals(_("the start symbol %s is a token"), startval->tag);
1ff442ca
NF
1930
1931 start_symbol = startval->value;
1932
1933 if (definesflag)
1934 {
1935 output_token_defines(fdefines);
1936
1937 if (!pure_parser)
1938 {
1939 if (spec_name_prefix)
1940 fprintf(fdefines, "\nextern YYSTYPE %slval;\n", spec_name_prefix);
1941 else
1942 fprintf(fdefines, "\nextern YYSTYPE yylval;\n");
1943 }
1944
1945 if (semantic_parser)
1946 for (i = ntokens; i < nsyms; i++)
1947 {
1948 /* don't make these for dummy nonterminals made by gensym. */
1949 if (*tags[i] != '@')
1950 fprintf(fdefines, "#define\tNT%s\t%d\n", tags[i], i);
1951 }
1952#if 0
1953 /* `fdefines' is now a temporary file, so we need to copy its
1954 contents in `done', so we can't close it here. */
1955 fclose(fdefines);
1956 fdefines = NULL;
1957#endif
1958 }
1959}
a083fbbf
RS
1960
1961/* For named tokens, but not literal ones, define the name.
1962 The value is the user token number.
943819bf 1963*/
1ff442ca 1964void
118fb205 1965output_token_defines (FILE *file)
1ff442ca
NF
1966{
1967 bucket *bp;
943819bf
RS
1968 register char *cp, *symbol;
1969 register char c;
1ff442ca
NF
1970
1971 for (bp = firstsymbol; bp; bp = bp->next)
1972 {
943819bf 1973 symbol = bp->tag; /* get symbol */
1ff442ca 1974
943819bf
RS
1975 if (bp->value >= ntokens) continue;
1976 if (bp->user_token_number == SALIAS) continue;
1977 if ('\'' == *symbol) continue; /* skip literal character */
1978 if (bp == errtoken) continue; /* skip error token */
a083fbbf 1979 if ('\"' == *symbol)
1ff442ca 1980 {
943819bf
RS
1981 /* use literal string only if given a symbol with an alias */
1982 if (bp->alias)
1983 symbol = bp->alias->tag;
1984 else
1985 continue;
1986 }
1ff442ca 1987
943819bf
RS
1988 /* Don't #define nonliteral tokens whose names contain periods. */
1989 cp = symbol;
1990 while ((c = *cp++) && c != '.');
1991 if (c != '\0') continue;
1ff442ca 1992
943819bf 1993 fprintf(file, "#define\t%s\t%d\n", symbol,
a083fbbf
RS
1994 ((translations && ! rawtoknumflag)
1995 ? bp->user_token_number
943819bf
RS
1996 : bp->value));
1997 if (semantic_parser)
1998 fprintf(file, "#define\tT%s\t%d\n", symbol, bp->value);
1ff442ca
NF
1999 }
2000
2001 putc('\n', file);
2002}
2003
2004
2005
2006/* convert the rules into the representation using rrhs, rlhs and ritems. */
2007
2008void
118fb205 2009packgram (void)
1ff442ca
NF
2010{
2011 register int itemno;
2012 register int ruleno;
2013 register symbol_list *p;
2014/* register bucket *bp; JF unused */
2015
2016 bucket *ruleprec;
2017
2018 ritem = NEW2(nitems + 1, short);
2019 rlhs = NEW2(nrules, short) - 1;
2020 rrhs = NEW2(nrules, short) - 1;
2021 rprec = NEW2(nrules, short) - 1;
2022 rprecsym = NEW2(nrules, short) - 1;
2023 rassoc = NEW2(nrules, short) - 1;
2024
2025 itemno = 0;
2026 ruleno = 1;
2027
2028 p = grammar;
2029 while (p)
2030 {
2031 rlhs[ruleno] = p->sym->value;
2032 rrhs[ruleno] = itemno;
2033 ruleprec = p->ruleprec;
2034
2035 p = p->next;
2036 while (p && p->sym)
2037 {
2038 ritem[itemno++] = p->sym->value;
2039 /* A rule gets by default the precedence and associativity
2040 of the last token in it. */
2041 if (p->sym->class == STOKEN)
2042 {
2043 rprec[ruleno] = p->sym->prec;
2044 rassoc[ruleno] = p->sym->assoc;
2045 }
2046 if (p) p = p->next;
2047 }
2048
2049 /* If this rule has a %prec,
2050 the specified symbol's precedence replaces the default. */
2051 if (ruleprec)
2052 {
2053 rprec[ruleno] = ruleprec->prec;
2054 rassoc[ruleno] = ruleprec->assoc;
2055 rprecsym[ruleno] = ruleprec->value;
2056 }
2057
2058 ritem[itemno++] = -ruleno;
2059 ruleno++;
2060
2061 if (p) p = p->next;
2062 }
2063
2064 ritem[itemno] = 0;
2065}
2066\f
2067/* Read a signed integer from STREAM and return its value. */
2068
2069int
118fb205 2070read_signed_integer (FILE *stream)
1ff442ca
NF
2071{
2072 register int c = getc(stream);
2073 register int sign = 1;
2074 register int n;
2075
2076 if (c == '-')
2077 {
2078 c = getc(stream);
2079 sign = -1;
2080 }
2081 n = 0;
2082 while (isdigit(c))
2083 {
2084 n = 10*n + (c - '0');
2085 c = getc(stream);
2086 }
2087
2088 ungetc(c, stream);
2089
2090 return n * sign;
2091}