]> git.saurik.com Git - bison.git/blame - src/reader.c
s/return (foo)/return foo/
[bison.git] / src / reader.c
CommitLineData
1ff442ca 1/* Input parser for bison
41aca2e0 2 Copyright (C) 1984, 86, 89, 92, 98, 2000 Free Software Foundation, Inc.
1ff442ca 3
41aca2e0 4 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 5
41aca2e0
AD
6 Bison is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
1ff442ca 10
41aca2e0
AD
11 Bison is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
1ff442ca 15
41aca2e0
AD
16 You should have received a copy of the GNU General Public License
17 along with Bison; see the file COPYING. If not, write to
18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
1ff442ca
NF
20
21
41aca2e0
AD
22/* Read in the grammar specification and record it in the format
23 described in gram.h. All guards are copied into the fguard file
24 and all actions into faction, in each case forming the body of a C
25 function (yyguard or yyaction) which contains a switch statement to
26 decide which guard or action to execute.
1ff442ca 27
41aca2e0 28 The entry point is reader (). */
1ff442ca
NF
29
30#include <stdio.h>
1ff442ca
NF
31#include "system.h"
32#include "files.h"
7612000c 33#include "alloc.h"
1ff442ca
NF
34#include "symtab.h"
35#include "lex.h"
36#include "gram.h"
37#include "machine.h"
a0f6b076 38#include "complain.h"
1ff442ca 39
6666f98f
AD
40#define LTYPESTR "\
41\n\
42#ifndef YYLTYPE\n\
43typedef\n\
44 struct yyltype\n\
45\
46 {\n\
47 int timestamp;\n\
48 int first_line;\n\
49 int first_column;\
50\n\
51 int last_line;\n\
52 int last_column;\n\
53 char *text;\n\
54 }\n\
55\
56 yyltype;\n\
57\n\
58#define YYLTYPE yyltype\n\
59#endif\n\
60\n"
1ff442ca
NF
61
62/* Number of slots allocated (but not necessarily used yet) in `rline' */
63int rline_allocated;
64
1ff442ca
NF
65extern int definesflag;
66extern int nolinesflag;
943819bf
RS
67extern int noparserflag;
68extern int rawtoknumflag;
1ff442ca
NF
69extern bucket *symval;
70extern int numval;
1ff442ca
NF
71extern int expected_conflicts;
72extern char *token_buffer;
118fb205
JT
73extern int maxtoken;
74
75extern void init_lex PARAMS((void));
76extern char *grow_token_buffer PARAMS((char *));
77extern void tabinit PARAMS((void));
78extern void output_headers PARAMS((void));
79extern void output_trailers PARAMS((void));
80extern void free_symtab PARAMS((void));
81extern void open_extra_files PARAMS((void));
118fb205 82extern char *printable_version PARAMS((int));
118fb205 83extern void unlex PARAMS((int));
118fb205
JT
84
85extern int skip_white_space PARAMS((void));
86extern int parse_percent_token PARAMS((void));
87extern int lex PARAMS((void));
1ff442ca
NF
88
89typedef
90 struct symbol_list
91 {
92 struct symbol_list *next;
93 bucket *sym;
94 bucket *ruleprec;
95 }
96 symbol_list;
97
98
118fb205
JT
99void reader PARAMS((void));
100void reader_output_yylsp PARAMS((FILE *));
101void read_declarations PARAMS((void));
102void copy_definition PARAMS((void));
103void parse_token_decl PARAMS((int, int));
104void parse_start_decl PARAMS((void));
105void parse_type_decl PARAMS((void));
106void parse_assoc_decl PARAMS((int));
107void parse_union_decl PARAMS((void));
108void parse_expect_decl PARAMS((void));
109char *get_type_name PARAMS((int, symbol_list *));
110void copy_guard PARAMS((symbol_list *, int));
111void parse_thong_decl PARAMS((void));
112void copy_action PARAMS((symbol_list *, int));
113bucket *gensym PARAMS((void));
114void readgram PARAMS((void));
115void record_rule_line PARAMS((void));
116void packsymbols PARAMS((void));
117void output_token_defines PARAMS((FILE *));
118void packgram PARAMS((void));
119int read_signed_integer PARAMS((FILE *));
118fb205 120
2686a6e7
JT
121#if 0
122static int get_type PARAMS((void));
123#endif
1ff442ca
NF
124
125int lineno;
126symbol_list *grammar;
127int start_flag;
128bucket *startval;
129char **tags;
943819bf 130int *user_toknums;
1ff442ca
NF
131
132/* Nonzero if components of semantic values are used, implying
133 they must be unions. */
134static int value_components_used;
135
136static int typed; /* nonzero if %union has been seen. */
137
138static int lastprec; /* incremented for each %left, %right or %nonassoc seen */
139
140static int gensym_count; /* incremented for each generated symbol */
141
142static bucket *errtoken;
5b2e3c89 143static bucket *undeftoken;
1ff442ca
NF
144
145/* Nonzero if any action or guard uses the @n construct. */
146static int yylsp_needed;
147
943819bf
RS
148
149static void
118fb205 150skip_to_char (int target)
943819bf
RS
151{
152 int c;
153 if (target == '\n')
a0f6b076 154 complain (_(" Skipping to next \\n"));
943819bf 155 else
a0f6b076 156 complain (_(" Skipping to next %c"), target);
943819bf
RS
157
158 do
159 c = skip_white_space();
160 while (c != target && c != EOF);
a083fbbf 161 if (c != EOF)
943819bf
RS
162 ungetc(c, finput);
163}
164
165
ae3c3164
AD
166/* Dump the string from FINPUT to FOUTPUT. MATCH is the delimiter of
167 the string (either ' or "). */
168
169static inline void
170copy_string (FILE *finput, FILE *foutput, int match)
171{
172 int c;
173
174 putc (match, foutput);
175 c = getc (finput);
176
177 while (c != match)
178 {
179 if (c == EOF)
180 fatal (_("unterminated string at end of file"));
181 if (c == '\n')
182 {
a0f6b076 183 complain (_("unterminated string"));
ae3c3164
AD
184 ungetc (c, finput);
185 c = match; /* invent terminator */
186 continue;
187 }
188
189 putc(c, foutput);
190
191 if (c == '\\')
192 {
193 c = getc (finput);
194 if (c == EOF)
195 fatal (_("unterminated string at end of file"));
196 putc (c, foutput);
197 if (c == '\n')
198 lineno++;
199 }
200
201 c = getc(finput);
202 }
203
204 putc(c, foutput);
205}
206
207
208/* Dump the comment from FINPUT to FOUTPUT. C is either `*' or `/',
209 depending upon the type of comments used. */
210
211static inline void
212copy_comment (FILE *finput, FILE *foutput, int c)
213{
214 int cplus_comment;
ae3c3164
AD
215 register int ended;
216
217 cplus_comment = (c == '/');
218 putc (c, foutput);
219 c = getc (finput);
220
221 ended = 0;
222 while (!ended)
223 {
224 if (!cplus_comment && c == '*')
225 {
226 while (c == '*')
227 {
228 putc(c, foutput);
229 c = getc(finput);
230 }
231
232 if (c == '/')
233 {
234 putc(c, foutput);
235 ended = 1;
236 }
237 }
238 else if (c == '\n')
239 {
240 lineno++;
241 putc (c, foutput);
242 if (cplus_comment)
243 ended = 1;
244 else
245 c = getc(finput);
246 }
247 else if (c == EOF)
248 fatal (_("unterminated comment"));
249 else
250 {
251 putc (c, foutput);
252 c = getc (finput);
253 }
254 }
255}
256
257
1ff442ca 258void
118fb205 259reader (void)
1ff442ca
NF
260{
261 start_flag = 0;
262 startval = NULL; /* start symbol not specified yet. */
263
264#if 0
265 translations = 0; /* initially assume token number translation not needed. */
266#endif
267 /* Nowadays translations is always set to 1,
268 since we give `error' a user-token-number
269 to satisfy the Posix demand for YYERRCODE==256. */
270 translations = 1;
271
272 nsyms = 1;
273 nvars = 0;
274 nrules = 0;
275 nitems = 0;
276 rline_allocated = 10;
277 rline = NEW2(rline_allocated, short);
278
279 typed = 0;
280 lastprec = 0;
281
282 gensym_count = 0;
283
284 semantic_parser = 0;
285 pure_parser = 0;
286 yylsp_needed = 0;
287
288 grammar = NULL;
289
290 init_lex();
291 lineno = 1;
292
293 /* initialize the symbol table. */
294 tabinit();
295 /* construct the error token */
296 errtoken = getsym("error");
297 errtoken->class = STOKEN;
298 errtoken->user_token_number = 256; /* Value specified by posix. */
299 /* construct a token that represents all undefined literal tokens. */
300 /* it is always token number 2. */
5b2e3c89
JT
301 undeftoken = getsym("$undefined.");
302 undeftoken->class = STOKEN;
303 undeftoken->user_token_number = 2;
1ff442ca
NF
304 /* Read the declaration section. Copy %{ ... %} groups to ftable and fdefines file.
305 Also notice any %token, %left, etc. found there. */
a083fbbf 306 if (noparserflag)
943819bf
RS
307 fprintf(ftable, "\n/* Bison-generated parse tables, made from %s\n",
308 infile);
309 else
310 fprintf(ftable, "\n/* A Bison parser, made from %s\n", infile);
6ed61226 311 fprintf(ftable, " by %s */\n\n", VERSION_STRING);
1ff442ca
NF
312 fprintf(ftable, "#define YYBISON 1 /* Identify Bison output. */\n\n");
313 read_declarations();
1ff442ca
NF
314 /* start writing the guard and action files, if they are needed. */
315 output_headers();
316 /* read in the grammar, build grammar in list form. write out guards and actions. */
317 readgram();
318 /* Now we know whether we need the line-number stack.
319 If we do, write its type into the .tab.h file. */
943819bf
RS
320 if (fdefines)
321 reader_output_yylsp(fdefines);
1ff442ca
NF
322 /* write closing delimiters for actions and guards. */
323 output_trailers();
324 if (yylsp_needed)
325 fprintf(ftable, "#define YYLSP_NEEDED\n\n");
326 /* assign the symbols their symbol numbers.
327 Write #defines for the token symbols into fdefines if requested. */
328 packsymbols();
329 /* convert the grammar into the format described in gram.h. */
330 packgram();
331 /* free the symbol table data structure
332 since symbols are now all referred to by symbol number. */
333 free_symtab();
334}
335
943819bf 336void
118fb205 337reader_output_yylsp (FILE *f)
943819bf
RS
338{
339 if (yylsp_needed)
340 fprintf(f, LTYPESTR);
341}
1ff442ca 342
41aca2e0
AD
343/* Read from finput until `%%' is seen. Discard the `%%'. Handle any
344 `%' declarations, and copy the contents of any `%{ ... %}' groups
345 to fattrs. */
1ff442ca
NF
346
347void
118fb205 348read_declarations (void)
1ff442ca
NF
349{
350 register int c;
351 register int tok;
352
353 for (;;)
354 {
355 c = skip_white_space();
356
357 if (c == '%')
358 {
359 tok = parse_percent_token();
360
361 switch (tok)
362 {
363 case TWO_PERCENTS:
364 return;
365
366 case PERCENT_LEFT_CURLY:
367 copy_definition();
368 break;
369
370 case TOKEN:
371 parse_token_decl (STOKEN, SNTERM);
372 break;
a083fbbf 373
1ff442ca
NF
374 case NTERM:
375 parse_token_decl (SNTERM, STOKEN);
376 break;
a083fbbf 377
1ff442ca
NF
378 case TYPE:
379 parse_type_decl();
380 break;
a083fbbf 381
1ff442ca
NF
382 case START:
383 parse_start_decl();
384 break;
a083fbbf 385
1ff442ca
NF
386 case UNION:
387 parse_union_decl();
388 break;
a083fbbf 389
1ff442ca
NF
390 case EXPECT:
391 parse_expect_decl();
392 break;
943819bf
RS
393 case THONG:
394 parse_thong_decl();
395 break;
1ff442ca
NF
396 case LEFT:
397 parse_assoc_decl(LEFT_ASSOC);
398 break;
399
400 case RIGHT:
401 parse_assoc_decl(RIGHT_ASSOC);
402 break;
403
404 case NONASSOC:
405 parse_assoc_decl(NON_ASSOC);
406 break;
407
408 case SEMANTIC_PARSER:
409 if (semantic_parser == 0)
410 {
411 semantic_parser = 1;
412 open_extra_files();
413 }
414 break;
415
416 case PURE_PARSER:
417 pure_parser = 1;
418 break;
419
943819bf
RS
420 case NOOP:
421 break;
422
1ff442ca 423 default:
a0f6b076 424 complain (_("unrecognized: %s"), token_buffer);
943819bf
RS
425 skip_to_char('%');
426 }
1ff442ca
NF
427 }
428 else if (c == EOF)
a0f6b076 429 fatal (_("no input grammar"));
1ff442ca 430 else
943819bf 431 {
a0f6b076 432 complain (_("unknown character: %s"), printable_version(c));
6666f98f 433 skip_to_char('%');
943819bf 434 }
1ff442ca
NF
435 }
436}
437
438
ae3c3164
AD
439/* Copy the contents of a `%{ ... %}' into the definitions file. The
440 `%{' has already been read. Return after reading the `%}'. */
1ff442ca
NF
441
442void
118fb205 443copy_definition (void)
1ff442ca
NF
444{
445 register int c;
ae3c3164
AD
446 /* -1 while reading a character if prev char was %. */
447 register int after_percent;
1ff442ca
NF
448
449 if (!nolinesflag)
450 fprintf(fattrs, "#line %d \"%s\"\n", lineno, infile);
451
452 after_percent = 0;
453
ae3c3164 454 c = getc (finput);
1ff442ca
NF
455
456 for (;;)
457 {
458 switch (c)
459 {
460 case '\n':
461 putc(c, fattrs);
462 lineno++;
463 break;
464
465 case '%':
466 after_percent = -1;
467 break;
a083fbbf 468
1ff442ca
NF
469 case '\'':
470 case '"':
ae3c3164 471 copy_string (finput, fattrs, c);
1ff442ca
NF
472 break;
473
474 case '/':
ae3c3164
AD
475 putc (c, fattrs);
476 c = getc (finput);
1ff442ca
NF
477 if (c != '*' && c != '/')
478 continue;
ae3c3164 479 copy_comment (finput, fattrs, c);
1ff442ca
NF
480 break;
481
482 case EOF:
a0f6b076
AD
483 fatal ("%s",
484 _("unterminated `%{' definition"));
1ff442ca
NF
485
486 default:
487 putc(c, fattrs);
488 }
489
490 c = getc(finput);
491
492 if (after_percent)
493 {
494 if (c == '}')
495 return;
496 putc('%', fattrs);
497 }
498 after_percent = 0;
499
500 }
501
502}
503
504
505
506/* parse what comes after %token or %nterm.
507For %token, what_is is STOKEN and what_is_not is SNTERM.
508For %nterm, the arguments are reversed. */
509
510void
118fb205 511parse_token_decl (int what_is, int what_is_not)
1ff442ca 512{
1ff442ca 513 register int token = 0;
1ff442ca 514 register char *typename = 0;
943819bf 515 register struct bucket *symbol = NULL; /* pts to symbol being defined */
1ff442ca
NF
516 int k;
517
1ff442ca
NF
518 for (;;)
519 {
e6011337
JT
520 int tmp_char = ungetc (skip_white_space (), finput);
521
522 if (tmp_char == '%')
1ff442ca 523 return;
e6011337 524 if (tmp_char == EOF)
a0f6b076 525 fatal (_("Premature EOF after %s"), token_buffer);
e6011337 526
1ff442ca
NF
527 token = lex();
528 if (token == COMMA)
943819bf
RS
529 {
530 symbol = NULL;
531 continue;
532 }
1ff442ca
NF
533 if (token == TYPENAME)
534 {
535 k = strlen(token_buffer);
536 typename = NEW2(k + 1, char);
537 strcpy(typename, token_buffer);
538 value_components_used = 1;
943819bf
RS
539 symbol = NULL;
540 }
541 else if (token == IDENTIFIER && *symval->tag == '\"'
a083fbbf 542 && symbol)
943819bf
RS
543 {
544 translations = 1;
545 symval->class = STOKEN;
546 symval->type_name = typename;
547 symval->user_token_number = symbol->user_token_number;
548 symbol->user_token_number = SALIAS;
549
a083fbbf
RS
550 symval->alias = symbol;
551 symbol->alias = symval;
943819bf
RS
552 symbol = NULL;
553
554 nsyms--; /* symbol and symval combined are only one symbol */
1ff442ca
NF
555 }
556 else if (token == IDENTIFIER)
557 {
558 int oldclass = symval->class;
943819bf 559 symbol = symval;
1ff442ca 560
943819bf 561 if (symbol->class == what_is_not)
a0f6b076 562 complain (_("symbol %s redefined"), symbol->tag);
943819bf 563 symbol->class = what_is;
1ff442ca 564 if (what_is == SNTERM && oldclass != SNTERM)
943819bf 565 symbol->value = nvars++;
1ff442ca
NF
566
567 if (typename)
568 {
943819bf
RS
569 if (symbol->type_name == NULL)
570 symbol->type_name = typename;
571 else if (strcmp(typename, symbol->type_name) != 0)
a0f6b076 572 complain (_("type redeclaration for %s"), symbol->tag);
1ff442ca
NF
573 }
574 }
943819bf 575 else if (symbol && token == NUMBER)
1ff442ca 576 {
943819bf 577 symbol->user_token_number = numval;
1ff442ca
NF
578 translations = 1;
579 }
580 else
943819bf 581 {
a0f6b076
AD
582 complain (_("`%s' is invalid in %s"),
583 token_buffer,
943819bf
RS
584 (what_is == STOKEN) ? "%token" : "%nterm");
585 skip_to_char('%');
586 }
1ff442ca
NF
587 }
588
589}
590
a083fbbf 591/* parse what comes after %thong
943819bf
RS
592 the full syntax is
593 %thong <type> token number literal
594 the <type> or number may be omitted. The number specifies the
595 user_token_number.
596
597 Two symbols are entered in the table, one for the token symbol and
598 one for the literal. Both are given the <type>, if any, from the declaration.
599 The ->user_token_number of the first is SALIAS and the ->user_token_number
600 of the second is set to the number, if any, from the declaration.
601 The two symbols are linked via pointers in their ->alias fields.
a083fbbf 602
943819bf
RS
603 during output_defines_table, the symbol is reported
604 thereafter, only the literal string is retained
605 it is the literal string that is output to yytname
606*/
607
608void
118fb205 609parse_thong_decl (void)
943819bf
RS
610{
611 register int token;
612 register struct bucket *symbol;
613 register char *typename = 0;
614 int k, usrtoknum;
615
616 translations = 1;
617 token = lex(); /* fetch typename or first token */
618 if (token == TYPENAME) {
619 k = strlen(token_buffer);
620 typename = NEW2(k + 1, char);
621 strcpy(typename, token_buffer);
622 value_components_used = 1;
623 token = lex(); /* fetch first token */
624 }
625
626 /* process first token */
627
a083fbbf 628 if (token != IDENTIFIER)
943819bf 629 {
a0f6b076
AD
630 complain (_("unrecognized item %s, expected an identifier"),
631 token_buffer);
943819bf
RS
632 skip_to_char('%');
633 return;
634 }
635 symval->class = STOKEN;
636 symval->type_name = typename;
637 symval->user_token_number = SALIAS;
638 symbol = symval;
639
640 token = lex(); /* get number or literal string */
a083fbbf 641
943819bf
RS
642 if (token == NUMBER) {
643 usrtoknum = numval;
644 token = lex(); /* okay, did number, now get literal */
645 }
646 else usrtoknum = 0;
647
648 /* process literal string token */
649
a083fbbf 650 if (token != IDENTIFIER || *symval->tag != '\"')
943819bf 651 {
a0f6b076
AD
652 complain (_("expected string constant instead of %s"),
653 token_buffer);
943819bf
RS
654 skip_to_char('%');
655 return;
656 }
657 symval->class = STOKEN;
658 symval->type_name = typename;
659 symval->user_token_number = usrtoknum;
660
a083fbbf
RS
661 symval->alias = symbol;
662 symbol->alias = symval;
943819bf
RS
663
664 nsyms--; /* symbol and symval combined are only one symbol */
665}
1ff442ca
NF
666
667
a0f6b076 668/* Parse what comes after %start */
1ff442ca
NF
669
670void
118fb205 671parse_start_decl (void)
1ff442ca
NF
672{
673 if (start_flag)
a0f6b076 674 complain ("%s", _("multiple %start declarations"));
1ff442ca 675 if (lex() != IDENTIFIER)
a0f6b076 676 complain ("%s", _("invalid %start declaration"));
943819bf
RS
677 else
678 {
679 start_flag = 1;
680 startval = symval;
681 }
1ff442ca
NF
682}
683
684
685
686/* read in a %type declaration and record its information for get_type_name to access */
687
688void
118fb205 689parse_type_decl (void)
1ff442ca
NF
690{
691 register int k;
692 register char *name;
1ff442ca
NF
693
694 if (lex() != TYPENAME)
943819bf 695 {
a0f6b076 696 complain ("%s", _("%type declaration has no <typename>"));
943819bf
RS
697 skip_to_char('%');
698 return;
699 }
1ff442ca
NF
700
701 k = strlen(token_buffer);
702 name = NEW2(k + 1, char);
703 strcpy(name, token_buffer);
704
1ff442ca
NF
705 for (;;)
706 {
707 register int t;
e6011337 708 int tmp_char = ungetc (skip_white_space (), finput);
1ff442ca 709
e6011337 710 if (tmp_char == '%')
1ff442ca 711 return;
e6011337 712 if (tmp_char == EOF)
a0f6b076 713 fatal (_("Premature EOF after %s"), token_buffer);
1ff442ca 714
1ff442ca
NF
715 t = lex();
716
717 switch (t)
718 {
719
720 case COMMA:
721 case SEMICOLON:
722 break;
723
724 case IDENTIFIER:
725 if (symval->type_name == NULL)
726 symval->type_name = name;
943819bf 727 else if (strcmp(name, symval->type_name) != 0)
a0f6b076 728 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
729
730 break;
731
732 default:
a0f6b076
AD
733 complain (_("invalid %%type declaration due to item: %s"),
734 token_buffer);
943819bf 735 skip_to_char('%');
1ff442ca
NF
736 }
737 }
738}
739
740
741
742/* read in a %left, %right or %nonassoc declaration and record its information. */
743/* assoc is either LEFT_ASSOC, RIGHT_ASSOC or NON_ASSOC. */
744
745void
118fb205 746parse_assoc_decl (int assoc)
1ff442ca
NF
747{
748 register int k;
749 register char *name = NULL;
943819bf 750 register int prev = 0;
1ff442ca
NF
751
752 lastprec++; /* Assign a new precedence level, never 0. */
753
1ff442ca
NF
754 for (;;)
755 {
756 register int t;
e6011337 757 int tmp_char = ungetc (skip_white_space (), finput);
1ff442ca 758
e6011337 759 if (tmp_char == '%')
1ff442ca 760 return;
e6011337 761 if (tmp_char == EOF)
a0f6b076 762 fatal (_("Premature EOF after %s"), token_buffer);
1ff442ca 763
1ff442ca
NF
764 t = lex();
765
766 switch (t)
767 {
768
769 case TYPENAME:
770 k = strlen(token_buffer);
771 name = NEW2(k + 1, char);
772 strcpy(name, token_buffer);
773 break;
774
775 case COMMA:
776 break;
777
778 case IDENTIFIER:
779 if (symval->prec != 0)
a0f6b076 780 complain (_("redefining precedence of %s"), symval->tag);
1ff442ca
NF
781 symval->prec = lastprec;
782 symval->assoc = assoc;
783 if (symval->class == SNTERM)
a0f6b076 784 complain (_("symbol %s redefined"), symval->tag);
1ff442ca
NF
785 symval->class = STOKEN;
786 if (name)
787 { /* record the type, if one is specified */
788 if (symval->type_name == NULL)
789 symval->type_name = name;
943819bf 790 else if (strcmp(name, symval->type_name) != 0)
a0f6b076 791 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
792 }
793 break;
794
795 case NUMBER:
796 if (prev == IDENTIFIER)
797 {
798 symval->user_token_number = numval;
799 translations = 1;
800 }
a083fbbf 801 else
943819bf 802 {
a0f6b076 803 complain (_("invalid text (%s) - number should be after identifier"),
943819bf
RS
804 token_buffer);
805 skip_to_char('%');
806 }
1ff442ca
NF
807 break;
808
809 case SEMICOLON:
810 return;
811
812 default:
a0f6b076 813 complain (_("unexpected item: %s"), token_buffer);
943819bf 814 skip_to_char('%');
1ff442ca
NF
815 }
816
817 prev = t;
818
819 }
820}
821
822
823
824/* copy the union declaration into fattrs (and fdefines),
825 where it is made into the
826 definition of YYSTYPE, the type of elements of the parser value stack. */
827
828void
118fb205 829parse_union_decl (void)
1ff442ca
NF
830{
831 register int c;
832 register int count;
833 register int in_comment;
834 int cplus_comment;
835
836 if (typed)
a0f6b076 837 complain ("%s", _("multiple %union declarations"));
1ff442ca
NF
838
839 typed = 1;
840
841 if (!nolinesflag)
842 fprintf(fattrs, "\n#line %d \"%s\"\n", lineno, infile);
843 else
844 fprintf(fattrs, "\n");
845
846 fprintf(fattrs, "typedef union");
847 if (fdefines)
848 fprintf(fdefines, "typedef union");
849
850 count = 0;
851 in_comment = 0;
852
853 c = getc(finput);
854
855 while (c != EOF)
856 {
857 putc(c, fattrs);
858 if (fdefines)
859 putc(c, fdefines);
860
861 switch (c)
862 {
863 case '\n':
864 lineno++;
865 break;
866
867 case '/':
868 c = getc(finput);
869 if (c != '*' && c != '/')
870 ungetc(c, finput);
871 else
872 {
873 putc(c, fattrs);
874 if (fdefines)
875 putc(c, fdefines);
876 cplus_comment = (c == '/');
877 in_comment = 1;
878 c = getc(finput);
879 while (in_comment)
880 {
881 putc(c, fattrs);
882 if (fdefines)
883 putc(c, fdefines);
884
885 if (c == '\n')
886 {
887 lineno++;
888 if (cplus_comment)
889 {
890 in_comment = 0;
891 break;
892 }
893 }
894 if (c == EOF)
a0f6b076 895 fatal (_("unterminated comment at end of file"));
1ff442ca
NF
896
897 if (!cplus_comment && c == '*')
898 {
899 c = getc(finput);
900 if (c == '/')
901 {
902 putc('/', fattrs);
903 if (fdefines)
904 putc('/', fdefines);
905 in_comment = 0;
906 }
907 }
908 else
909 c = getc(finput);
910 }
911 }
912 break;
913
914
915 case '{':
916 count++;
917 break;
918
919 case '}':
920 if (count == 0)
a0f6b076 921 complain (_("unmatched close-brace (`}')"));
1ff442ca 922 count--;
943819bf 923 if (count <= 0)
1ff442ca
NF
924 {
925 fprintf(fattrs, " YYSTYPE;\n");
926 if (fdefines)
927 fprintf(fdefines, " YYSTYPE;\n");
928 /* JF don't choke on trailing semi */
929 c=skip_white_space();
930 if(c!=';') ungetc(c,finput);
931 return;
932 }
933 }
934
935 c = getc(finput);
936 }
937}
938
939/* parse the declaration %expect N which says to expect N
940 shift-reduce conflicts. */
941
942void
118fb205 943parse_expect_decl (void)
1ff442ca
NF
944{
945 register int c;
946 register int count;
947 char buffer[20];
948
949 c = getc(finput);
950 while (c == ' ' || c == '\t')
951 c = getc(finput);
952
953 count = 0;
954 while (c >= '0' && c <= '9')
955 {
956 if (count < 20)
957 buffer[count++] = c;
958 c = getc(finput);
959 }
960 buffer[count] = 0;
961
962 ungetc (c, finput);
963
943819bf 964 if (count <= 0 || count > 10)
a0f6b076 965 complain ("%s", _("argument of %expect is not an integer"));
1ff442ca
NF
966 expected_conflicts = atoi (buffer);
967}
968
969/* that's all of parsing the declaration section */
970\f
7b306f52
AD
971/* FINPUT is pointing to a location (i.e., a `@'). Output to FOUTPUT
972 a reference to this location. STACK_OFFSET is the number of values
973 in the current rule so far, which says where to find `$0' with
974 respect to the top of the stack. */
975static inline void
976copy_at (FILE *finput, FILE *foutput, int stack_offset)
977{
978 int c;
979
980 c = getc (finput);
981 if (c == '$')
982 {
983 fprintf (foutput, "yyloc");
984 yylsp_needed = 1;
985 }
986 else if (isdigit(c) || c == '-')
987 {
988 int n;
989
990 ungetc (c, finput);
991 n = read_signed_integer (finput);
992
993 fprintf (foutput, "yylsp[%d]", n - stack_offset);
994 yylsp_needed = 1;
995 }
996 else
a0f6b076 997 complain (_("@%s is invalid"), printable_version (c));
7b306f52
AD
998}
999
1000
a0f6b076
AD
1001/* Get the data type (alternative in the union) of the value for
1002 symbol n in rule rule. */
1ff442ca
NF
1003
1004char *
118fb205 1005get_type_name (int n, symbol_list *rule)
1ff442ca 1006{
1ff442ca
NF
1007 register int i;
1008 register symbol_list *rp;
1009
1010 if (n < 0)
943819bf 1011 {
a0f6b076 1012 complain (_("invalid $ value"));
943819bf
RS
1013 return NULL;
1014 }
1ff442ca
NF
1015
1016 rp = rule;
1017 i = 0;
1018
1019 while (i < n)
1020 {
1021 rp = rp->next;
1022 if (rp == NULL || rp->sym == NULL)
943819bf 1023 {
a0f6b076 1024 complain (_("invalid $ value"));
943819bf
RS
1025 return NULL;
1026 }
1ff442ca
NF
1027 i++;
1028 }
1029
36281465 1030 return rp->sym->type_name;
1ff442ca
NF
1031}
1032
1033
3cef001a 1034
41aca2e0
AD
1035/* After `%guard' is seen in the input file, copy the actual guard
1036 into the guards file. If the guard is followed by an action, copy
1037 that into the actions file. STACK_OFFSET is the number of values
1038 in the current rule so far, which says where to find `$0' with
1039 respect to the top of the stack, for the simple parser in which the
1040 stack is not popped until after the guard is run. */
1ff442ca
NF
1041
1042void
118fb205 1043copy_guard (symbol_list *rule, int stack_offset)
1ff442ca
NF
1044{
1045 register int c;
1046 register int n;
1047 register int count;
1ff442ca
NF
1048 register char *type_name;
1049 int brace_flag = 0;
1ff442ca
NF
1050
1051 /* offset is always 0 if parser has already popped the stack pointer */
1052 if (semantic_parser) stack_offset = 0;
1053
1054 fprintf(fguard, "\ncase %d:\n", nrules);
1055 if (!nolinesflag)
41aca2e0 1056 fprintf (fguard, "#line %d \"%s\"\n", lineno, infile);
1ff442ca
NF
1057 putc('{', fguard);
1058
1059 count = 0;
1060 c = getc(finput);
1061
1062 while (brace_flag ? (count > 0) : (c != ';'))
1063 {
1064 switch (c)
1065 {
1066 case '\n':
1067 putc(c, fguard);
1068 lineno++;
1069 break;
1070
1071 case '{':
1072 putc(c, fguard);
1073 brace_flag = 1;
1074 count++;
1075 break;
1076
1077 case '}':
1078 putc(c, fguard);
1079 if (count > 0)
1080 count--;
a083fbbf 1081 else
943819bf 1082 {
a0f6b076 1083 complain (_("unmatched right brace (`}')"));
943819bf
RS
1084 c = getc(finput); /* skip it */
1085 }
1ff442ca
NF
1086 break;
1087
1088 case '\'':
1089 case '"':
ca36d2ef 1090 copy_string (finput, fguard, c);
1ff442ca
NF
1091 break;
1092
1093 case '/':
3cef001a
AD
1094 putc (c, fguard);
1095 c = getc (finput);
1ff442ca
NF
1096 if (c != '*' && c != '/')
1097 continue;
3cef001a 1098 copy_comment (finput, fguard, c);
1ff442ca
NF
1099 break;
1100
1101 case '$':
1102 c = getc(finput);
1103 type_name = NULL;
1104
1105 if (c == '<')
1106 {
1107 register char *cp = token_buffer;
1108
1109 while ((c = getc(finput)) != '>' && c > 0)
118fb205
JT
1110 {
1111 if (cp == token_buffer + maxtoken)
1112 cp = grow_token_buffer(cp);
1113
1114 *cp++ = c;
1115 }
1ff442ca
NF
1116 *cp = 0;
1117 type_name = token_buffer;
1118
1119 c = getc(finput);
1120 }
1121
1122 if (c == '$')
1123 {
1124 fprintf(fguard, "yyval");
41aca2e0
AD
1125 if (!type_name)
1126 type_name = rule->sym->type_name;
1ff442ca
NF
1127 if (type_name)
1128 fprintf(fguard, ".%s", type_name);
943819bf 1129 if(!type_name && typed)
a0f6b076
AD
1130 complain (_("$$ of `%s' has no declared type"),
1131 rule->sym->tag);
1ff442ca 1132 }
1ff442ca
NF
1133 else if (isdigit(c) || c == '-')
1134 {
1135 ungetc (c, finput);
41aca2e0
AD
1136 n = read_signed_integer (finput);
1137 c = getc (finput);
1ff442ca
NF
1138
1139 if (!type_name && n > 0)
1140 type_name = get_type_name(n, rule);
1141
1142 fprintf(fguard, "yyvsp[%d]", n - stack_offset);
1143 if (type_name)
1144 fprintf(fguard, ".%s", type_name);
6666f98f 1145 if (!type_name && typed)
a0f6b076
AD
1146 complain (_("$%d of `%s' has no declared type"),
1147 n, rule->sym->tag);
1ff442ca
NF
1148 continue;
1149 }
1150 else
a0f6b076 1151 complain (_("$%s is invalid"), printable_version (c));
1ff442ca
NF
1152 break;
1153
1154 case '@':
7b306f52 1155 copy_at (finput, fguard, stack_offset);
6666f98f 1156 break;
1ff442ca
NF
1157
1158 case EOF:
a0f6b076
AD
1159 fatal ("%s",
1160 _("unterminated %guard clause"));
1ff442ca
NF
1161
1162 default:
6666f98f 1163 putc (c, fguard);
1ff442ca
NF
1164 }
1165
1166 if (c != '}' || count != 0)
1167 c = getc(finput);
1168 }
1169
1170 c = skip_white_space();
1171
1172 fprintf(fguard, ";\n break;}");
1173 if (c == '{')
7b306f52 1174 copy_action (rule, stack_offset);
1ff442ca
NF
1175 else if (c == '=')
1176 {
943819bf 1177 c = getc(finput); /* why not skip_white_space -wjh */
1ff442ca 1178 if (c == '{')
7b306f52 1179 copy_action (rule, stack_offset);
1ff442ca
NF
1180 }
1181 else
1182 ungetc(c, finput);
1183}
1184
1185
1186
41aca2e0
AD
1187/* Assuming that a `{' has just been seen, copy everything up to the
1188 matching `}' into the actions file. STACK_OFFSET is the number of
1189 values in the current rule so far, which says where to find `$0'
1190 with respect to the top of the stack. */
1ff442ca
NF
1191
1192void
118fb205 1193copy_action (symbol_list *rule, int stack_offset)
1ff442ca
NF
1194{
1195 register int c;
1196 register int n;
1197 register int count;
1ff442ca 1198 register char *type_name;
1ff442ca
NF
1199
1200 /* offset is always 0 if parser has already popped the stack pointer */
41aca2e0
AD
1201 if (semantic_parser)
1202 stack_offset = 0;
1ff442ca 1203
41aca2e0 1204 fprintf (faction, "\ncase %d:\n", nrules);
1ff442ca 1205 if (!nolinesflag)
41aca2e0
AD
1206 fprintf (faction, "#line %d \"%s\"\n", lineno, infile);
1207 putc ('{', faction);
1ff442ca
NF
1208
1209 count = 1;
1210 c = getc(finput);
1211
1212 while (count > 0)
1213 {
1214 while (c != '}')
1215 {
1216 switch (c)
1217 {
1218 case '\n':
1219 putc(c, faction);
1220 lineno++;
1221 break;
1222
1223 case '{':
1224 putc(c, faction);
1225 count++;
1226 break;
1227
1228 case '\'':
1229 case '"':
ca36d2ef 1230 copy_string (finput, faction, c);
1ff442ca
NF
1231 break;
1232
1233 case '/':
1234 putc(c, faction);
1235 c = getc(finput);
1236 if (c != '*' && c != '/')
1237 continue;
3cef001a 1238 copy_comment (finput, faction, c);
1ff442ca
NF
1239 break;
1240
1241 case '$':
1242 c = getc(finput);
1243 type_name = NULL;
1244
1245 if (c == '<')
1246 {
1247 register char *cp = token_buffer;
1248
1249 while ((c = getc(finput)) != '>' && c > 0)
118fb205
JT
1250 {
1251 if (cp == token_buffer + maxtoken)
1252 cp = grow_token_buffer(cp);
1253
1254 *cp++ = c;
1255 }
1ff442ca
NF
1256 *cp = 0;
1257 type_name = token_buffer;
1258 value_components_used = 1;
1259
1260 c = getc(finput);
1261 }
1262 if (c == '$')
1263 {
1264 fprintf(faction, "yyval");
41aca2e0
AD
1265 if (!type_name)
1266 type_name = get_type_name(0, rule);
1ff442ca
NF
1267 if (type_name)
1268 fprintf(faction, ".%s", type_name);
a083fbbf 1269 if(!type_name && typed)
a0f6b076
AD
1270 complain (_("$$ of `%s' has no declared type"),
1271 rule->sym->tag);
1ff442ca
NF
1272 }
1273 else if (isdigit(c) || c == '-')
1274 {
1275 ungetc (c, finput);
1276 n = read_signed_integer(finput);
1277 c = getc(finput);
1278
1279 if (!type_name && n > 0)
1280 type_name = get_type_name(n, rule);
1281
1282 fprintf(faction, "yyvsp[%d]", n - stack_offset);
1283 if (type_name)
1284 fprintf(faction, ".%s", type_name);
a083fbbf 1285 if(!type_name && typed)
a0f6b076
AD
1286 complain (_("$%d of `%s' has no declared type"),
1287 n, rule->sym->tag);
1ff442ca
NF
1288 continue;
1289 }
1290 else
a0f6b076 1291 complain (_("$%s is invalid"), printable_version (c));
1ff442ca
NF
1292
1293 break;
1294
1295 case '@':
7b306f52 1296 copy_at (finput, faction, stack_offset);
6666f98f 1297 break;
1ff442ca
NF
1298
1299 case EOF:
a0f6b076 1300 fatal (_("unmatched `{'"));
1ff442ca
NF
1301
1302 default:
1303 putc(c, faction);
1304 }
1305
1306 c = getc(finput);
1307 }
1308
1309 /* above loop exits when c is '}' */
1310
1311 if (--count)
1312 {
1313 putc(c, faction);
1314 c = getc(finput);
1315 }
1316 }
1317
1318 fprintf(faction, ";\n break;}");
1319}
1320
1321
1322
1323/* generate a dummy symbol, a nonterminal,
1324whose name cannot conflict with the user's names. */
1325
1326bucket *
118fb205 1327gensym (void)
1ff442ca
NF
1328{
1329 register bucket *sym;
1330
1331 sprintf (token_buffer, "@%d", ++gensym_count);
1332 sym = getsym(token_buffer);
1333 sym->class = SNTERM;
1334 sym->value = nvars++;
36281465 1335 return sym;
1ff442ca
NF
1336}
1337
1338/* Parse the input grammar into a one symbol_list structure.
1339Each rule is represented by a sequence of symbols: the left hand side
1340followed by the contents of the right hand side, followed by a null pointer
1341instead of a symbol to terminate the rule.
1342The next symbol is the lhs of the following rule.
1343
1344All guards and actions are copied out to the appropriate files,
1345labelled by the rule number they apply to. */
1346
1347void
118fb205 1348readgram (void)
1ff442ca
NF
1349{
1350 register int t;
2686a6e7 1351 register bucket *lhs = NULL;
1ff442ca
NF
1352 register symbol_list *p;
1353 register symbol_list *p1;
1354 register bucket *bp;
1355
1356 symbol_list *crule; /* points to first symbol_list of current rule. */
1357 /* its symbol is the lhs of the rule. */
1358 symbol_list *crule1; /* points to the symbol_list preceding crule. */
1359
1360 p1 = NULL;
1361
1362 t = lex();
1363
1364 while (t != TWO_PERCENTS && t != ENDFILE)
1365 {
1366 if (t == IDENTIFIER || t == BAR)
1367 {
1368 register int actionflag = 0;
1369 int rulelength = 0; /* number of symbols in rhs of this rule so far */
1370 int xactions = 0; /* JF for error checking */
1371 bucket *first_rhs = 0;
1372
1373 if (t == IDENTIFIER)
1374 {
1375 lhs = symval;
943819bf
RS
1376
1377 if (!start_flag)
1378 {
1379 startval = lhs;
1380 start_flag = 1;
1381 }
a083fbbf 1382
1ff442ca
NF
1383 t = lex();
1384 if (t != COLON)
943819bf 1385 {
a0f6b076 1386 complain (_("ill-formed rule: initial symbol not followed by colon"));
943819bf
RS
1387 unlex(t);
1388 }
1ff442ca
NF
1389 }
1390
943819bf 1391 if (nrules == 0 && t == BAR)
1ff442ca 1392 {
a0f6b076 1393 complain (_("grammar starts with vertical bar"));
943819bf 1394 lhs = symval; /* BOGUS: use a random symval */
1ff442ca 1395 }
1ff442ca
NF
1396 /* start a new rule and record its lhs. */
1397
1398 nrules++;
1399 nitems++;
1400
1401 record_rule_line ();
1402
1403 p = NEW(symbol_list);
1404 p->sym = lhs;
1405
1406 crule1 = p1;
1407 if (p1)
1408 p1->next = p;
1409 else
1410 grammar = p;
1411
1412 p1 = p;
1413 crule = p;
1414
1415 /* mark the rule's lhs as a nonterminal if not already so. */
1416
1417 if (lhs->class == SUNKNOWN)
1418 {
1419 lhs->class = SNTERM;
1420 lhs->value = nvars;
1421 nvars++;
1422 }
1423 else if (lhs->class == STOKEN)
a0f6b076 1424 complain (_("rule given for %s, which is a token"), lhs->tag);
1ff442ca
NF
1425
1426 /* read the rhs of the rule. */
1427
1428 for (;;)
1429 {
1430 t = lex();
943819bf
RS
1431 if (t == PREC)
1432 {
1433 t = lex();
1434 crule->ruleprec = symval;
1435 t = lex();
1436 }
1ff442ca
NF
1437
1438 if (! (t == IDENTIFIER || t == LEFT_CURLY)) break;
1439
1440 /* If next token is an identifier, see if a colon follows it.
1441 If one does, exit this rule now. */
1442 if (t == IDENTIFIER)
1443 {
1444 register bucket *ssave;
1445 register int t1;
1446
1447 ssave = symval;
1448 t1 = lex();
1449 unlex(t1);
1450 symval = ssave;
1451 if (t1 == COLON) break;
1452
1453 if(!first_rhs) /* JF */
1454 first_rhs = symval;
1455 /* Not followed by colon =>
1456 process as part of this rule's rhs. */
1457 }
1458
1459 /* If we just passed an action, that action was in the middle
1460 of a rule, so make a dummy rule to reduce it to a
1461 non-terminal. */
1462 if (actionflag)
1463 {
1464 register bucket *sdummy;
1465
1466 /* Since the action was written out with this rule's */
943819bf 1467 /* number, we must give the new rule this number */
1ff442ca
NF
1468 /* by inserting the new rule before it. */
1469
1470 /* Make a dummy nonterminal, a gensym. */
1471 sdummy = gensym();
1472
1473 /* Make a new rule, whose body is empty,
1474 before the current one, so that the action
1475 just read can belong to it. */
1476 nrules++;
1477 nitems++;
1478 record_rule_line ();
1479 p = NEW(symbol_list);
1480 if (crule1)
1481 crule1->next = p;
1482 else grammar = p;
1483 p->sym = sdummy;
1484 crule1 = NEW(symbol_list);
1485 p->next = crule1;
1486 crule1->next = crule;
1487
1488 /* insert the dummy generated by that rule into this rule. */
1489 nitems++;
1490 p = NEW(symbol_list);
1491 p->sym = sdummy;
1492 p1->next = p;
1493 p1 = p;
1494
1495 actionflag = 0;
1496 }
1497
1498 if (t == IDENTIFIER)
1499 {
1500 nitems++;
1501 p = NEW(symbol_list);
1502 p->sym = symval;
1503 p1->next = p;
1504 p1 = p;
1505 }
1506 else /* handle an action. */
1507 {
1508 copy_action(crule, rulelength);
1509 actionflag = 1;
1510 xactions++; /* JF */
1511 }
1512 rulelength++;
943819bf 1513 } /* end of read rhs of rule */
1ff442ca
NF
1514
1515 /* Put an empty link in the list to mark the end of this rule */
1516 p = NEW(symbol_list);
1517 p1->next = p;
1518 p1 = p;
1519
1520 if (t == PREC)
1521 {
a0f6b076 1522 complain (_("two @prec's in a row"));
1ff442ca
NF
1523 t = lex();
1524 crule->ruleprec = symval;
1525 t = lex();
1526 }
1527 if (t == GUARD)
1528 {
1529 if (! semantic_parser)
a0f6b076
AD
1530 complain ("%s",
1531 _("%guard present but %semantic_parser not specified"));
1ff442ca
NF
1532
1533 copy_guard(crule, rulelength);
1534 t = lex();
1535 }
1536 else if (t == LEFT_CURLY)
1537 {
943819bf 1538 /* This case never occurs -wjh */
6666f98f 1539 if (actionflag)
a0f6b076 1540 complain (_("two actions at end of one rule"));
1ff442ca 1541 copy_action(crule, rulelength);
943819bf
RS
1542 actionflag = 1;
1543 xactions++; /* -wjh */
1ff442ca
NF
1544 t = lex();
1545 }
a0f6b076 1546 /* If $$ is being set in default way, report if any type
6666f98f
AD
1547 mismatch. */
1548 else if (!xactions
1549 && first_rhs
1550 && lhs->type_name != first_rhs->type_name)
1ff442ca 1551 {
6666f98f
AD
1552 if (lhs->type_name == 0
1553 || first_rhs->type_name == 0
1ff442ca 1554 || strcmp(lhs->type_name,first_rhs->type_name))
a0f6b076
AD
1555 complain (_("type clash (`%s' `%s') on default action"),
1556 lhs->type_name ? lhs->type_name : "",
1557 first_rhs->type_name ? first_rhs->type_name : "");
1ff442ca
NF
1558 }
1559 /* Warn if there is no default for $$ but we need one. */
1560 else if (!xactions && !first_rhs && lhs->type_name != 0)
a0f6b076 1561 complain (_("empty rule for typed nonterminal, and no action"));
1ff442ca
NF
1562 if (t == SEMICOLON)
1563 t = lex();
a083fbbf 1564 }
943819bf
RS
1565#if 0
1566 /* these things can appear as alternatives to rules. */
1567/* NO, they cannot.
1568 a) none of the documentation allows them
1569 b) most of them scan forward until finding a next %
1570 thus they may swallow lots of intervening rules
1571*/
1ff442ca
NF
1572 else if (t == TOKEN)
1573 {
1574 parse_token_decl(STOKEN, SNTERM);
1575 t = lex();
1576 }
1577 else if (t == NTERM)
1578 {
1579 parse_token_decl(SNTERM, STOKEN);
1580 t = lex();
1581 }
1582 else if (t == TYPE)
1583 {
1584 t = get_type();
1585 }
1586 else if (t == UNION)
1587 {
1588 parse_union_decl();
1589 t = lex();
1590 }
1591 else if (t == EXPECT)
1592 {
1593 parse_expect_decl();
1594 t = lex();
1595 }
1596 else if (t == START)
1597 {
1598 parse_start_decl();
1599 t = lex();
1600 }
943819bf
RS
1601#endif
1602
1ff442ca 1603 else
943819bf 1604 {
a0f6b076 1605 complain (_("invalid input: %s"), token_buffer);
943819bf
RS
1606 t = lex();
1607 }
1ff442ca
NF
1608 }
1609
943819bf
RS
1610 /* grammar has been read. Do some checking */
1611
1ff442ca 1612 if (nsyms > MAXSHORT)
a0f6b076
AD
1613 fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
1614 MAXSHORT);
1ff442ca 1615 if (nrules == 0)
a0f6b076 1616 fatal (_("no rules in the input grammar"));
1ff442ca
NF
1617
1618 if (typed == 0 /* JF put out same default YYSTYPE as YACC does */
1619 && !value_components_used)
1620 {
1621 /* We used to use `unsigned long' as YYSTYPE on MSDOS,
1622 but it seems better to be consistent.
1623 Most programs should declare their own type anyway. */
1624 fprintf(fattrs, "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n");
1625 if (fdefines)
1626 fprintf(fdefines, "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n");
1627 }
1628
1629 /* Report any undefined symbols and consider them nonterminals. */
1630
1631 for (bp = firstsymbol; bp; bp = bp->next)
1632 if (bp->class == SUNKNOWN)
1633 {
a0f6b076
AD
1634 complain (_("symbol %s is used, but is not defined as a token and has no rules"),
1635 bp->tag);
1ff442ca
NF
1636 bp->class = SNTERM;
1637 bp->value = nvars++;
1638 }
1639
1640 ntokens = nsyms - nvars;
1641}
1642
1643
1644void
118fb205 1645record_rule_line (void)
1ff442ca
NF
1646{
1647 /* Record each rule's source line number in rline table. */
1648
1649 if (nrules >= rline_allocated)
1650 {
1651 rline_allocated = nrules * 2;
118fb205
JT
1652 rline = (short *) xrealloc ((char *) rline,
1653 rline_allocated * sizeof (short));
1ff442ca
NF
1654 }
1655 rline[nrules] = lineno;
1656}
1657
1658
2686a6e7 1659#if 0
1ff442ca 1660/* read in a %type declaration and record its information for get_type_name to access */
943819bf
RS
1661/* this is unused. it is only called from the #if 0 part of readgram */
1662static int
118fb205 1663get_type (void)
1ff442ca
NF
1664{
1665 register int k;
1666 register int t;
1667 register char *name;
1668
1669 t = lex();
1670
a083fbbf 1671 if (t != TYPENAME)
943819bf 1672 {
a0f6b076 1673 complain (_("ill-formed %type declaration"));
943819bf
RS
1674 return t;
1675 }
1ff442ca
NF
1676
1677 k = strlen(token_buffer);
1678 name = NEW2(k + 1, char);
1679 strcpy(name, token_buffer);
1680
1681 for (;;)
1682 {
1683 t = lex();
1684
1685 switch (t)
1686 {
1687 case SEMICOLON:
36281465 1688 return lex();
1ff442ca
NF
1689
1690 case COMMA:
1691 break;
1692
1693 case IDENTIFIER:
1694 if (symval->type_name == NULL)
1695 symval->type_name = name;
943819bf 1696 else if (strcmp(name, symval->type_name) != 0)
a0f6b076 1697 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
1698
1699 break;
1700
1701 default:
36281465 1702 return t;
1ff442ca
NF
1703 }
1704 }
1705}
2686a6e7 1706#endif
1ff442ca
NF
1707
1708
0a6384c4
AD
1709/* Assign symbol numbers, and write definition of token names into
1710 fdefines. Set up vectors tags and sprec of names and precedences
1711 of symbols. */
1ff442ca
NF
1712
1713void
118fb205 1714packsymbols (void)
1ff442ca
NF
1715{
1716 register bucket *bp;
1717 register int tokno = 1;
1718 register int i;
1719 register int last_user_token_number;
1720
1721 /* int lossage = 0; JF set but not used */
1722
1723 tags = NEW2(nsyms + 1, char *);
1724 tags[0] = "$";
943819bf
RS
1725 user_toknums = NEW2(nsyms + 1, int);
1726 user_toknums[0] = 0;
1ff442ca
NF
1727
1728 sprec = NEW2(nsyms, short);
1729 sassoc = NEW2(nsyms, short);
1730
1731 max_user_token_number = 256;
1732 last_user_token_number = 256;
1733
1734 for (bp = firstsymbol; bp; bp = bp->next)
1735 {
1736 if (bp->class == SNTERM)
1737 {
1738 bp->value += ntokens;
1739 }
943819bf
RS
1740 else if (bp->alias)
1741 {
0a6384c4
AD
1742 /* this symbol and its alias are a single token defn.
1743 allocate a tokno, and assign to both check agreement of
1744 ->prec and ->assoc fields and make both the same */
1745 if (bp->value == 0)
1746 bp->value = bp->alias->value = tokno++;
943819bf 1747
0a6384c4
AD
1748 if (bp->prec != bp->alias->prec)
1749 {
1750 if (bp->prec != 0 && bp->alias->prec != 0
1751 && bp->user_token_number == SALIAS)
a0f6b076
AD
1752 complain (_("conflicting precedences for %s and %s"),
1753 bp->tag, bp->alias->tag);
0a6384c4
AD
1754 if (bp->prec != 0)
1755 bp->alias->prec = bp->prec;
1756 else
1757 bp->prec = bp->alias->prec;
1758 }
943819bf 1759
0a6384c4
AD
1760 if (bp->assoc != bp->alias->assoc)
1761 {
a0f6b076
AD
1762 if (bp->assoc != 0 && bp->alias->assoc != 0
1763 && bp->user_token_number == SALIAS)
1764 complain (_("conflicting assoc values for %s and %s"),
1765 bp->tag, bp->alias->tag);
1766 if (bp->assoc != 0)
1767 bp->alias->assoc = bp->assoc;
1768 else
1769 bp->assoc = bp->alias->assoc;
1770 }
0a6384c4
AD
1771
1772 if (bp->user_token_number == SALIAS)
1773 continue; /* do not do processing below for SALIASs */
943819bf
RS
1774
1775 }
1776 else /* bp->class == STOKEN */
1777 {
1778 bp->value = tokno++;
1779 }
1780
1781 if (bp->class == STOKEN)
1ff442ca
NF
1782 {
1783 if (translations && !(bp->user_token_number))
1784 bp->user_token_number = ++last_user_token_number;
1785 if (bp->user_token_number > max_user_token_number)
1786 max_user_token_number = bp->user_token_number;
1ff442ca
NF
1787 }
1788
1789 tags[bp->value] = bp->tag;
943819bf 1790 user_toknums[bp->value] = bp->user_token_number;
1ff442ca
NF
1791 sprec[bp->value] = bp->prec;
1792 sassoc[bp->value] = bp->assoc;
1793
1794 }
1795
1796 if (translations)
1797 {
1798 register int i;
1799
1800 token_translations = NEW2(max_user_token_number+1, short);
1801
0a6384c4
AD
1802 /* initialize all entries for literal tokens to 2, the internal
1803 token number for $undefined., which represents all invalid
1804 inputs. */
1ff442ca 1805 for (i = 0; i <= max_user_token_number; i++)
a083fbbf 1806 token_translations[i] = 2;
1ff442ca 1807
943819bf
RS
1808 for (bp = firstsymbol; bp; bp = bp->next)
1809 {
0a6384c4
AD
1810 if (bp->value >= ntokens)
1811 continue; /* non-terminal */
1812 if (bp->user_token_number == SALIAS)
1813 continue;
943819bf 1814 if (token_translations[bp->user_token_number] != 2)
a0f6b076
AD
1815 complain (_("tokens %s and %s both assigned number %d"),
1816 tags[token_translations[bp->user_token_number]],
1817 bp->tag,
1818 bp->user_token_number);
943819bf
RS
1819 token_translations[bp->user_token_number] = bp->value;
1820 }
1ff442ca
NF
1821 }
1822
1823 error_token_number = errtoken->value;
1824
943819bf
RS
1825 if (! noparserflag)
1826 output_token_defines(ftable);
1ff442ca
NF
1827
1828 if (startval->class == SUNKNOWN)
a0f6b076 1829 fatal (_("the start symbol %s is undefined"), startval->tag);
1ff442ca 1830 else if (startval->class == STOKEN)
a0f6b076 1831 fatal (_("the start symbol %s is a token"), startval->tag);
1ff442ca
NF
1832
1833 start_symbol = startval->value;
1834
1835 if (definesflag)
1836 {
1837 output_token_defines(fdefines);
1838
1839 if (!pure_parser)
1840 {
1841 if (spec_name_prefix)
1842 fprintf(fdefines, "\nextern YYSTYPE %slval;\n", spec_name_prefix);
1843 else
1844 fprintf(fdefines, "\nextern YYSTYPE yylval;\n");
1845 }
1846
1847 if (semantic_parser)
1848 for (i = ntokens; i < nsyms; i++)
1849 {
1850 /* don't make these for dummy nonterminals made by gensym. */
1851 if (*tags[i] != '@')
1852 fprintf(fdefines, "#define\tNT%s\t%d\n", tags[i], i);
1853 }
1854#if 0
1855 /* `fdefines' is now a temporary file, so we need to copy its
1856 contents in `done', so we can't close it here. */
1857 fclose(fdefines);
1858 fdefines = NULL;
1859#endif
1860 }
1861}
a083fbbf 1862
0a6384c4
AD
1863/* For named tokens, but not literal ones, define the name. The value
1864 is the user token number. */
1865
1ff442ca 1866void
118fb205 1867output_token_defines (FILE *file)
1ff442ca
NF
1868{
1869 bucket *bp;
943819bf
RS
1870 register char *cp, *symbol;
1871 register char c;
1ff442ca
NF
1872
1873 for (bp = firstsymbol; bp; bp = bp->next)
1874 {
943819bf 1875 symbol = bp->tag; /* get symbol */
1ff442ca 1876
943819bf
RS
1877 if (bp->value >= ntokens) continue;
1878 if (bp->user_token_number == SALIAS) continue;
1879 if ('\'' == *symbol) continue; /* skip literal character */
1880 if (bp == errtoken) continue; /* skip error token */
a083fbbf 1881 if ('\"' == *symbol)
1ff442ca 1882 {
943819bf
RS
1883 /* use literal string only if given a symbol with an alias */
1884 if (bp->alias)
1885 symbol = bp->alias->tag;
1886 else
1887 continue;
1888 }
1ff442ca 1889
943819bf
RS
1890 /* Don't #define nonliteral tokens whose names contain periods. */
1891 cp = symbol;
1892 while ((c = *cp++) && c != '.');
1893 if (c != '\0') continue;
1ff442ca 1894
a0f6b076
AD
1895 fprintf (file, "#define\t%s\t%d\n", symbol,
1896 ((translations && ! rawtoknumflag)
1897 ? bp->user_token_number
1898 : bp->value));
943819bf 1899 if (semantic_parser)
a0f6b076 1900 fprintf (file, "#define\tT%s\t%d\n", symbol, bp->value);
1ff442ca
NF
1901 }
1902
1903 putc('\n', file);
1904}
1905
1906
1907
1908/* convert the rules into the representation using rrhs, rlhs and ritems. */
1909
1910void
118fb205 1911packgram (void)
1ff442ca
NF
1912{
1913 register int itemno;
1914 register int ruleno;
1915 register symbol_list *p;
1916/* register bucket *bp; JF unused */
1917
1918 bucket *ruleprec;
1919
1920 ritem = NEW2(nitems + 1, short);
1921 rlhs = NEW2(nrules, short) - 1;
1922 rrhs = NEW2(nrules, short) - 1;
1923 rprec = NEW2(nrules, short) - 1;
1924 rprecsym = NEW2(nrules, short) - 1;
1925 rassoc = NEW2(nrules, short) - 1;
1926
1927 itemno = 0;
1928 ruleno = 1;
1929
1930 p = grammar;
1931 while (p)
1932 {
1933 rlhs[ruleno] = p->sym->value;
1934 rrhs[ruleno] = itemno;
1935 ruleprec = p->ruleprec;
1936
1937 p = p->next;
1938 while (p && p->sym)
1939 {
1940 ritem[itemno++] = p->sym->value;
1941 /* A rule gets by default the precedence and associativity
1942 of the last token in it. */
1943 if (p->sym->class == STOKEN)
1944 {
1945 rprec[ruleno] = p->sym->prec;
1946 rassoc[ruleno] = p->sym->assoc;
1947 }
1948 if (p) p = p->next;
1949 }
1950
1951 /* If this rule has a %prec,
1952 the specified symbol's precedence replaces the default. */
1953 if (ruleprec)
1954 {
1955 rprec[ruleno] = ruleprec->prec;
1956 rassoc[ruleno] = ruleprec->assoc;
1957 rprecsym[ruleno] = ruleprec->value;
1958 }
1959
1960 ritem[itemno++] = -ruleno;
1961 ruleno++;
1962
1963 if (p) p = p->next;
1964 }
1965
1966 ritem[itemno] = 0;
1967}
1968\f
1969/* Read a signed integer from STREAM and return its value. */
1970
1971int
118fb205 1972read_signed_integer (FILE *stream)
1ff442ca
NF
1973{
1974 register int c = getc(stream);
1975 register int sign = 1;
1976 register int n;
1977
1978 if (c == '-')
1979 {
1980 c = getc(stream);
1981 sign = -1;
1982 }
1983 n = 0;
1984 while (isdigit(c))
1985 {
1986 n = 10*n + (c - '0');
1987 c = getc(stream);
1988 }
1989
1990 ungetc(c, stream);
1991
1992 return n * sign;
1993}