]> git.saurik.com Git - bison.git/blame - src/reader.c
* src/output.c (output): Topological sort of the functions, in
[bison.git] / src / reader.c
CommitLineData
1ff442ca 1/* Input parser for bison
41aca2e0 2 Copyright (C) 1984, 86, 89, 92, 98, 2000 Free Software Foundation, Inc.
1ff442ca 3
41aca2e0 4 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 5
41aca2e0
AD
6 Bison is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
1ff442ca 10
41aca2e0
AD
11 Bison is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
1ff442ca 15
41aca2e0
AD
16 You should have received a copy of the GNU General Public License
17 along with Bison; see the file COPYING. If not, write to
18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
1ff442ca
NF
20
21
41aca2e0
AD
22/* Read in the grammar specification and record it in the format
23 described in gram.h. All guards are copied into the fguard file
24 and all actions into faction, in each case forming the body of a C
25 function (yyguard or yyaction) which contains a switch statement to
26 decide which guard or action to execute.
1ff442ca 27
41aca2e0 28 The entry point is reader (). */
1ff442ca 29
1ff442ca 30#include "system.h"
ceed8467 31#include "getargs.h"
1ff442ca 32#include "files.h"
7612000c 33#include "alloc.h"
1ff442ca
NF
34#include "symtab.h"
35#include "lex.h"
36#include "gram.h"
a0f6b076 37#include "complain.h"
6c89f1c1 38#include "output.h"
1ff442ca 39
6666f98f
AD
40#define LTYPESTR "\
41\n\
42#ifndef YYLTYPE\n\
43typedef\n\
44 struct yyltype\n\
45\
46 {\n\
47 int timestamp;\n\
48 int first_line;\n\
49 int first_column;\
50\n\
51 int last_line;\n\
52 int last_column;\n\
53 char *text;\n\
54 }\n\
55\
56 yyltype;\n\
57\n\
58#define YYLTYPE yyltype\n\
59#endif\n\
60\n"
1ff442ca
NF
61
62/* Number of slots allocated (but not necessarily used yet) in `rline' */
4a120d45 63static int rline_allocated;
1ff442ca 64
1ff442ca
NF
65extern bucket *symval;
66extern int numval;
1ff442ca
NF
67extern int expected_conflicts;
68extern char *token_buffer;
118fb205
JT
69extern int maxtoken;
70
71extern void init_lex PARAMS((void));
72extern char *grow_token_buffer PARAMS((char *));
73extern void tabinit PARAMS((void));
118fb205
JT
74extern void free_symtab PARAMS((void));
75extern void open_extra_files PARAMS((void));
118fb205 76extern char *printable_version PARAMS((int));
118fb205 77extern void unlex PARAMS((int));
118fb205
JT
78
79extern int skip_white_space PARAMS((void));
80extern int parse_percent_token PARAMS((void));
81extern int lex PARAMS((void));
1ff442ca
NF
82
83typedef
84 struct symbol_list
85 {
86 struct symbol_list *next;
87 bucket *sym;
88 bucket *ruleprec;
89 }
90 symbol_list;
91
92
4a120d45
JT
93extern void reader PARAMS((void));
94extern void reader_output_yylsp PARAMS((FILE *));
95
96static void read_declarations PARAMS((void));
97static void copy_definition PARAMS((void));
98static void parse_token_decl PARAMS((int, int));
99static void parse_start_decl PARAMS((void));
100static void parse_type_decl PARAMS((void));
101static void parse_assoc_decl PARAMS((int));
102static void parse_union_decl PARAMS((void));
103static void parse_expect_decl PARAMS((void));
104static char *get_type_name PARAMS((int, symbol_list *));
105static void copy_guard PARAMS((symbol_list *, int));
106static void parse_thong_decl PARAMS((void));
107static void copy_action PARAMS((symbol_list *, int));
108static bucket *gensym PARAMS((void));
109static void readgram PARAMS((void));
110static void record_rule_line PARAMS((void));
111static void packsymbols PARAMS((void));
112static void output_token_defines PARAMS((FILE *));
113static void packgram PARAMS((void));
118fb205 114
2686a6e7
JT
115#if 0
116static int get_type PARAMS((void));
117#endif
1ff442ca
NF
118
119int lineno;
1ff442ca 120char **tags;
943819bf 121int *user_toknums;
4a120d45
JT
122static symbol_list *grammar;
123static int start_flag;
124static bucket *startval;
1ff442ca
NF
125
126/* Nonzero if components of semantic values are used, implying
127 they must be unions. */
128static int value_components_used;
129
130static int typed; /* nonzero if %union has been seen. */
131
132static int lastprec; /* incremented for each %left, %right or %nonassoc seen */
133
134static int gensym_count; /* incremented for each generated symbol */
135
136static bucket *errtoken;
5b2e3c89 137static bucket *undeftoken;
1ff442ca
NF
138
139/* Nonzero if any action or guard uses the @n construct. */
140static int yylsp_needed;
141
0d533154
AD
142\f
143/*===================\
144| Low level lexing. |
145\===================*/
943819bf
RS
146
147static void
118fb205 148skip_to_char (int target)
943819bf
RS
149{
150 int c;
151 if (target == '\n')
a0f6b076 152 complain (_(" Skipping to next \\n"));
943819bf 153 else
a0f6b076 154 complain (_(" Skipping to next %c"), target);
943819bf
RS
155
156 do
0d533154 157 c = skip_white_space ();
943819bf 158 while (c != target && c != EOF);
a083fbbf 159 if (c != EOF)
0d533154 160 ungetc (c, finput);
943819bf
RS
161}
162
163
0d533154
AD
164/*---------------------------------------------------------.
165| Read a signed integer from STREAM and return its value. |
166`---------------------------------------------------------*/
167
168static inline int
169read_signed_integer (FILE *stream)
170{
171 register int c = getc (stream);
172 register int sign = 1;
173 register int n = 0;
174
175 if (c == '-')
176 {
177 c = getc (stream);
178 sign = -1;
179 }
180
181 while (isdigit (c))
182 {
183 n = 10 * n + (c - '0');
184 c = getc (stream);
185 }
186
187 ungetc (c, stream);
188
189 return sign * n;
190}
191\f
192/*-------------------------------------------------------------------.
193| Dump the string from FINPUT to FOUTPUT. MATCH is the delimiter of |
194| the string (either ' or "). |
195`-------------------------------------------------------------------*/
ae3c3164
AD
196
197static inline void
4a120d45 198copy_string (FILE *fin, FILE *fout, int match)
ae3c3164
AD
199{
200 int c;
201
4a120d45
JT
202 putc (match, fout);
203 c = getc (fin);
ae3c3164
AD
204
205 while (c != match)
206 {
207 if (c == EOF)
208 fatal (_("unterminated string at end of file"));
209 if (c == '\n')
210 {
a0f6b076 211 complain (_("unterminated string"));
4a120d45 212 ungetc (c, fin);
ae3c3164
AD
213 c = match; /* invent terminator */
214 continue;
215 }
216
4a120d45 217 putc(c, fout);
ae3c3164
AD
218
219 if (c == '\\')
220 {
4a120d45 221 c = getc (fin);
ae3c3164
AD
222 if (c == EOF)
223 fatal (_("unterminated string at end of file"));
4a120d45 224 putc (c, fout);
ae3c3164
AD
225 if (c == '\n')
226 lineno++;
227 }
228
4a120d45 229 c = getc(fin);
ae3c3164
AD
230 }
231
4a120d45 232 putc(c, fout);
ae3c3164
AD
233}
234
235
6c89f1c1
AD
236/*---------------------------------------------------------------.
237| Dump the comment from IN to OUT1 and OUT2. C is either `*' or |
238| `/', depending upon the type of comments used. OUT2 might be |
239| NULL. |
240`---------------------------------------------------------------*/
ae3c3164
AD
241
242static inline void
27821bff 243copy_comment2 (FILE *in, FILE *out1, FILE* out2, int c)
ae3c3164
AD
244{
245 int cplus_comment;
ae3c3164
AD
246 register int ended;
247
248 cplus_comment = (c == '/');
27821bff
AD
249 putc (c, out1);
250 if (out2)
251 putc (c, out2);
252 c = getc (in);
ae3c3164
AD
253
254 ended = 0;
255 while (!ended)
256 {
257 if (!cplus_comment && c == '*')
258 {
259 while (c == '*')
260 {
27821bff
AD
261 putc (c, out1);
262 if (out2)
263 putc (c, out2);
264 c = getc (in);
ae3c3164
AD
265 }
266
267 if (c == '/')
268 {
27821bff
AD
269 putc(c, out1);
270 if (out2)
271 putc(c, out2);
ae3c3164
AD
272 ended = 1;
273 }
274 }
275 else if (c == '\n')
276 {
277 lineno++;
27821bff
AD
278 putc (c, out1);
279 if (out2)
280 putc (c, out2);
ae3c3164
AD
281 if (cplus_comment)
282 ended = 1;
283 else
27821bff 284 c = getc (in);
ae3c3164
AD
285 }
286 else if (c == EOF)
287 fatal (_("unterminated comment"));
288 else
289 {
27821bff
AD
290 putc (c, out1);
291 if (out2)
292 putc (c, out2);
293 c = getc (in);
ae3c3164
AD
294 }
295 }
296}
297
298
4a120d45 299/* Dump the comment from FIN to FOUT. C is either `*' or `/',
27821bff
AD
300 depending upon the type of comments used. */
301
302static inline void
4a120d45 303copy_comment (FILE *fin, FILE *fout, int c)
27821bff 304{
4a120d45 305 copy_comment2 (fin, fout, NULL, c);
27821bff
AD
306}
307
308
1ff442ca 309void
118fb205 310reader (void)
1ff442ca
NF
311{
312 start_flag = 0;
313 startval = NULL; /* start symbol not specified yet. */
314
315#if 0
e79137ac
AD
316 /* initially assume token number translation not needed. */
317 translations = 0;
1ff442ca 318#endif
e79137ac
AD
319 /* Nowadays translations is always set to 1, since we give `error' a
320 user-token-number to satisfy the Posix demand for YYERRCODE==256.
321 */
1ff442ca
NF
322 translations = 1;
323
324 nsyms = 1;
325 nvars = 0;
326 nrules = 0;
327 nitems = 0;
328 rline_allocated = 10;
e79137ac 329 rline = NEW2 (rline_allocated, short);
1ff442ca
NF
330
331 typed = 0;
332 lastprec = 0;
333
334 gensym_count = 0;
335
336 semantic_parser = 0;
337 pure_parser = 0;
338 yylsp_needed = 0;
339
340 grammar = NULL;
341
e79137ac 342 init_lex ();
1ff442ca
NF
343 lineno = 1;
344
e79137ac
AD
345 /* Initialize the symbol table. */
346 tabinit ();
347 /* Construct the error token */
348 errtoken = getsym ("error");
1ff442ca 349 errtoken->class = STOKEN;
e79137ac
AD
350 errtoken->user_token_number = 256; /* Value specified by POSIX. */
351 /* Construct a token that represents all undefined literal tokens.
352 It is always token number 2. */
353 undeftoken = getsym ("$undefined.");
5b2e3c89
JT
354 undeftoken->class = STOKEN;
355 undeftoken->user_token_number = 2;
e79137ac
AD
356
357 /* Read the declaration section. Copy %{ ... %} groups to FTABLE
358 and FDEFINES file. Also notice any %token, %left, etc. found
359 there. */
360 putc ('\n', ftable);
361 fprintf (ftable, "\
362/* %s, made from %s\n\
363 by GNU bison %s. */\n\
364\n",
365 noparserflag ? "Bison-generated parse tables" : "A Bison parser",
366 infile,
367 VERSION);
368
369 fputs ("#define YYBISON 1 /* Identify Bison output. */\n\n", ftable);
370 read_declarations ();
371 /* Start writing the guard and action files, if they are needed. */
372 output_headers ();
373 /* Read in the grammar, build grammar in list form. Write out
374 guards and actions. */
375 readgram ();
376 /* Now we know whether we need the line-number stack. If we do,
377 write its type into the .tab.h file. */
943819bf 378 if (fdefines)
e79137ac
AD
379 reader_output_yylsp (fdefines);
380 /* Write closing delimiters for actions and guards. */
381 output_trailers ();
1ff442ca 382 if (yylsp_needed)
0d533154 383 fputs ("#define YYLSP_NEEDED\n\n", ftable);
e79137ac
AD
384 /* Assign the symbols their symbol numbers. Write #defines for the
385 token symbols into FDEFINES if requested. */
386 packsymbols ();
387 /* Convert the grammar into the format described in gram.h. */
388 packgram ();
389 /* Free the symbol table data structure since symbols are now all
390 referred to by symbol number. */
391 free_symtab ();
1ff442ca
NF
392}
393
943819bf 394void
118fb205 395reader_output_yylsp (FILE *f)
943819bf
RS
396{
397 if (yylsp_needed)
398 fprintf(f, LTYPESTR);
399}
1ff442ca 400
41aca2e0
AD
401/* Read from finput until `%%' is seen. Discard the `%%'. Handle any
402 `%' declarations, and copy the contents of any `%{ ... %}' groups
403 to fattrs. */
1ff442ca 404
4a120d45 405static void
118fb205 406read_declarations (void)
1ff442ca
NF
407{
408 register int c;
409 register int tok;
410
411 for (;;)
412 {
413 c = skip_white_space();
414
415 if (c == '%')
416 {
417 tok = parse_percent_token();
418
419 switch (tok)
420 {
421 case TWO_PERCENTS:
422 return;
423
424 case PERCENT_LEFT_CURLY:
425 copy_definition();
426 break;
427
428 case TOKEN:
429 parse_token_decl (STOKEN, SNTERM);
430 break;
a083fbbf 431
1ff442ca
NF
432 case NTERM:
433 parse_token_decl (SNTERM, STOKEN);
434 break;
a083fbbf 435
1ff442ca
NF
436 case TYPE:
437 parse_type_decl();
438 break;
a083fbbf 439
1ff442ca
NF
440 case START:
441 parse_start_decl();
442 break;
a083fbbf 443
1ff442ca 444 case UNION:
27821bff 445 parse_union_decl ();
1ff442ca 446 break;
a083fbbf 447
1ff442ca
NF
448 case EXPECT:
449 parse_expect_decl();
450 break;
943819bf
RS
451 case THONG:
452 parse_thong_decl();
453 break;
1ff442ca
NF
454 case LEFT:
455 parse_assoc_decl(LEFT_ASSOC);
456 break;
457
458 case RIGHT:
459 parse_assoc_decl(RIGHT_ASSOC);
460 break;
461
462 case NONASSOC:
463 parse_assoc_decl(NON_ASSOC);
464 break;
465
466 case SEMANTIC_PARSER:
467 if (semantic_parser == 0)
468 {
469 semantic_parser = 1;
470 open_extra_files();
471 }
472 break;
473
474 case PURE_PARSER:
475 pure_parser = 1;
476 break;
477
943819bf
RS
478 case NOOP:
479 break;
480
1ff442ca 481 default:
a0f6b076 482 complain (_("unrecognized: %s"), token_buffer);
943819bf
RS
483 skip_to_char('%');
484 }
1ff442ca
NF
485 }
486 else if (c == EOF)
a0f6b076 487 fatal (_("no input grammar"));
1ff442ca 488 else
943819bf 489 {
a0f6b076 490 complain (_("unknown character: %s"), printable_version(c));
6666f98f 491 skip_to_char('%');
943819bf 492 }
1ff442ca
NF
493 }
494}
495
496
ae3c3164
AD
497/* Copy the contents of a `%{ ... %}' into the definitions file. The
498 `%{' has already been read. Return after reading the `%}'. */
1ff442ca 499
4a120d45 500static void
118fb205 501copy_definition (void)
1ff442ca
NF
502{
503 register int c;
ae3c3164
AD
504 /* -1 while reading a character if prev char was %. */
505 register int after_percent;
1ff442ca
NF
506
507 if (!nolinesflag)
508 fprintf(fattrs, "#line %d \"%s\"\n", lineno, infile);
509
510 after_percent = 0;
511
ae3c3164 512 c = getc (finput);
1ff442ca
NF
513
514 for (;;)
515 {
516 switch (c)
517 {
518 case '\n':
519 putc(c, fattrs);
520 lineno++;
521 break;
522
523 case '%':
524 after_percent = -1;
525 break;
a083fbbf 526
1ff442ca
NF
527 case '\'':
528 case '"':
ae3c3164 529 copy_string (finput, fattrs, c);
1ff442ca
NF
530 break;
531
532 case '/':
ae3c3164
AD
533 putc (c, fattrs);
534 c = getc (finput);
1ff442ca
NF
535 if (c != '*' && c != '/')
536 continue;
ae3c3164 537 copy_comment (finput, fattrs, c);
1ff442ca
NF
538 break;
539
540 case EOF:
a0f6b076
AD
541 fatal ("%s",
542 _("unterminated `%{' definition"));
1ff442ca
NF
543
544 default:
545 putc(c, fattrs);
546 }
547
548 c = getc(finput);
549
550 if (after_percent)
551 {
552 if (c == '}')
553 return;
554 putc('%', fattrs);
555 }
556 after_percent = 0;
557
558 }
559
560}
561
562
563
564/* parse what comes after %token or %nterm.
565For %token, what_is is STOKEN and what_is_not is SNTERM.
566For %nterm, the arguments are reversed. */
567
4a120d45 568static void
118fb205 569parse_token_decl (int what_is, int what_is_not)
1ff442ca 570{
1ff442ca 571 register int token = 0;
1ff442ca 572 register char *typename = 0;
943819bf 573 register struct bucket *symbol = NULL; /* pts to symbol being defined */
1ff442ca
NF
574 int k;
575
1ff442ca
NF
576 for (;;)
577 {
e6011337
JT
578 int tmp_char = ungetc (skip_white_space (), finput);
579
580 if (tmp_char == '%')
1ff442ca 581 return;
e6011337 582 if (tmp_char == EOF)
a0f6b076 583 fatal (_("Premature EOF after %s"), token_buffer);
e6011337 584
1ff442ca
NF
585 token = lex();
586 if (token == COMMA)
943819bf
RS
587 {
588 symbol = NULL;
589 continue;
590 }
1ff442ca
NF
591 if (token == TYPENAME)
592 {
593 k = strlen(token_buffer);
594 typename = NEW2(k + 1, char);
595 strcpy(typename, token_buffer);
596 value_components_used = 1;
943819bf
RS
597 symbol = NULL;
598 }
599 else if (token == IDENTIFIER && *symval->tag == '\"'
a083fbbf 600 && symbol)
943819bf
RS
601 {
602 translations = 1;
603 symval->class = STOKEN;
604 symval->type_name = typename;
605 symval->user_token_number = symbol->user_token_number;
606 symbol->user_token_number = SALIAS;
607
a083fbbf
RS
608 symval->alias = symbol;
609 symbol->alias = symval;
943819bf
RS
610 symbol = NULL;
611
612 nsyms--; /* symbol and symval combined are only one symbol */
1ff442ca
NF
613 }
614 else if (token == IDENTIFIER)
615 {
616 int oldclass = symval->class;
943819bf 617 symbol = symval;
1ff442ca 618
943819bf 619 if (symbol->class == what_is_not)
a0f6b076 620 complain (_("symbol %s redefined"), symbol->tag);
943819bf 621 symbol->class = what_is;
1ff442ca 622 if (what_is == SNTERM && oldclass != SNTERM)
943819bf 623 symbol->value = nvars++;
1ff442ca
NF
624
625 if (typename)
626 {
943819bf
RS
627 if (symbol->type_name == NULL)
628 symbol->type_name = typename;
629 else if (strcmp(typename, symbol->type_name) != 0)
a0f6b076 630 complain (_("type redeclaration for %s"), symbol->tag);
1ff442ca
NF
631 }
632 }
943819bf 633 else if (symbol && token == NUMBER)
1ff442ca 634 {
943819bf 635 symbol->user_token_number = numval;
1ff442ca
NF
636 translations = 1;
637 }
638 else
943819bf 639 {
a0f6b076
AD
640 complain (_("`%s' is invalid in %s"),
641 token_buffer,
943819bf
RS
642 (what_is == STOKEN) ? "%token" : "%nterm");
643 skip_to_char('%');
644 }
1ff442ca
NF
645 }
646
647}
648
a083fbbf 649/* parse what comes after %thong
943819bf
RS
650 the full syntax is
651 %thong <type> token number literal
652 the <type> or number may be omitted. The number specifies the
653 user_token_number.
654
655 Two symbols are entered in the table, one for the token symbol and
656 one for the literal. Both are given the <type>, if any, from the declaration.
657 The ->user_token_number of the first is SALIAS and the ->user_token_number
658 of the second is set to the number, if any, from the declaration.
659 The two symbols are linked via pointers in their ->alias fields.
a083fbbf 660
943819bf
RS
661 during output_defines_table, the symbol is reported
662 thereafter, only the literal string is retained
663 it is the literal string that is output to yytname
664*/
665
4a120d45 666static void
118fb205 667parse_thong_decl (void)
943819bf
RS
668{
669 register int token;
670 register struct bucket *symbol;
671 register char *typename = 0;
672 int k, usrtoknum;
673
674 translations = 1;
675 token = lex(); /* fetch typename or first token */
676 if (token == TYPENAME) {
677 k = strlen(token_buffer);
678 typename = NEW2(k + 1, char);
679 strcpy(typename, token_buffer);
680 value_components_used = 1;
681 token = lex(); /* fetch first token */
682 }
683
684 /* process first token */
685
a083fbbf 686 if (token != IDENTIFIER)
943819bf 687 {
a0f6b076
AD
688 complain (_("unrecognized item %s, expected an identifier"),
689 token_buffer);
943819bf
RS
690 skip_to_char('%');
691 return;
692 }
693 symval->class = STOKEN;
694 symval->type_name = typename;
695 symval->user_token_number = SALIAS;
696 symbol = symval;
697
698 token = lex(); /* get number or literal string */
a083fbbf 699
943819bf
RS
700 if (token == NUMBER) {
701 usrtoknum = numval;
702 token = lex(); /* okay, did number, now get literal */
703 }
704 else usrtoknum = 0;
705
706 /* process literal string token */
707
a083fbbf 708 if (token != IDENTIFIER || *symval->tag != '\"')
943819bf 709 {
a0f6b076
AD
710 complain (_("expected string constant instead of %s"),
711 token_buffer);
943819bf
RS
712 skip_to_char('%');
713 return;
714 }
715 symval->class = STOKEN;
716 symval->type_name = typename;
717 symval->user_token_number = usrtoknum;
718
a083fbbf
RS
719 symval->alias = symbol;
720 symbol->alias = symval;
943819bf
RS
721
722 nsyms--; /* symbol and symval combined are only one symbol */
723}
1ff442ca
NF
724
725
a0f6b076 726/* Parse what comes after %start */
1ff442ca 727
4a120d45 728static void
118fb205 729parse_start_decl (void)
1ff442ca
NF
730{
731 if (start_flag)
27821bff
AD
732 complain (_("multiple %s declarations"), "%start");
733 if (lex () != IDENTIFIER)
734 complain (_("invalid %s declaration"), "%start");
943819bf
RS
735 else
736 {
737 start_flag = 1;
738 startval = symval;
739 }
1ff442ca
NF
740}
741
742
743
744/* read in a %type declaration and record its information for get_type_name to access */
745
4a120d45 746static void
118fb205 747parse_type_decl (void)
1ff442ca
NF
748{
749 register int k;
750 register char *name;
1ff442ca
NF
751
752 if (lex() != TYPENAME)
943819bf 753 {
a0f6b076 754 complain ("%s", _("%type declaration has no <typename>"));
943819bf
RS
755 skip_to_char('%');
756 return;
757 }
1ff442ca
NF
758
759 k = strlen(token_buffer);
760 name = NEW2(k + 1, char);
761 strcpy(name, token_buffer);
762
1ff442ca
NF
763 for (;;)
764 {
765 register int t;
e6011337 766 int tmp_char = ungetc (skip_white_space (), finput);
1ff442ca 767
e6011337 768 if (tmp_char == '%')
1ff442ca 769 return;
e6011337 770 if (tmp_char == EOF)
a0f6b076 771 fatal (_("Premature EOF after %s"), token_buffer);
1ff442ca 772
1ff442ca
NF
773 t = lex();
774
775 switch (t)
776 {
777
778 case COMMA:
779 case SEMICOLON:
780 break;
781
782 case IDENTIFIER:
783 if (symval->type_name == NULL)
784 symval->type_name = name;
943819bf 785 else if (strcmp(name, symval->type_name) != 0)
a0f6b076 786 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
787
788 break;
789
790 default:
a0f6b076
AD
791 complain (_("invalid %%type declaration due to item: %s"),
792 token_buffer);
943819bf 793 skip_to_char('%');
1ff442ca
NF
794 }
795 }
796}
797
798
799
800/* read in a %left, %right or %nonassoc declaration and record its information. */
801/* assoc is either LEFT_ASSOC, RIGHT_ASSOC or NON_ASSOC. */
802
4a120d45 803static void
118fb205 804parse_assoc_decl (int assoc)
1ff442ca
NF
805{
806 register int k;
807 register char *name = NULL;
943819bf 808 register int prev = 0;
1ff442ca
NF
809
810 lastprec++; /* Assign a new precedence level, never 0. */
811
1ff442ca
NF
812 for (;;)
813 {
814 register int t;
e6011337 815 int tmp_char = ungetc (skip_white_space (), finput);
1ff442ca 816
e6011337 817 if (tmp_char == '%')
1ff442ca 818 return;
e6011337 819 if (tmp_char == EOF)
a0f6b076 820 fatal (_("Premature EOF after %s"), token_buffer);
1ff442ca 821
1ff442ca
NF
822 t = lex();
823
824 switch (t)
825 {
826
827 case TYPENAME:
828 k = strlen(token_buffer);
829 name = NEW2(k + 1, char);
830 strcpy(name, token_buffer);
831 break;
832
833 case COMMA:
834 break;
835
836 case IDENTIFIER:
837 if (symval->prec != 0)
a0f6b076 838 complain (_("redefining precedence of %s"), symval->tag);
1ff442ca
NF
839 symval->prec = lastprec;
840 symval->assoc = assoc;
841 if (symval->class == SNTERM)
a0f6b076 842 complain (_("symbol %s redefined"), symval->tag);
1ff442ca
NF
843 symval->class = STOKEN;
844 if (name)
845 { /* record the type, if one is specified */
846 if (symval->type_name == NULL)
847 symval->type_name = name;
943819bf 848 else if (strcmp(name, symval->type_name) != 0)
a0f6b076 849 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
850 }
851 break;
852
853 case NUMBER:
854 if (prev == IDENTIFIER)
855 {
856 symval->user_token_number = numval;
857 translations = 1;
858 }
a083fbbf 859 else
943819bf 860 {
a0f6b076 861 complain (_("invalid text (%s) - number should be after identifier"),
943819bf
RS
862 token_buffer);
863 skip_to_char('%');
864 }
1ff442ca
NF
865 break;
866
867 case SEMICOLON:
868 return;
869
870 default:
a0f6b076 871 complain (_("unexpected item: %s"), token_buffer);
943819bf 872 skip_to_char('%');
1ff442ca
NF
873 }
874
875 prev = t;
876
877 }
878}
879
880
881
882/* copy the union declaration into fattrs (and fdefines),
883 where it is made into the
884 definition of YYSTYPE, the type of elements of the parser value stack. */
885
4a120d45 886static void
118fb205 887parse_union_decl (void)
1ff442ca
NF
888{
889 register int c;
27821bff 890 register int count = 0;
1ff442ca
NF
891
892 if (typed)
27821bff 893 complain (_("multiple %s declarations"), "%union");
1ff442ca
NF
894
895 typed = 1;
896
897 if (!nolinesflag)
27821bff 898 fprintf (fattrs, "\n#line %d \"%s\"\n", lineno, infile);
1ff442ca 899 else
27821bff 900 fprintf (fattrs, "\n");
1ff442ca 901
27821bff 902 fprintf (fattrs, "typedef union");
1ff442ca 903 if (fdefines)
27821bff 904 fprintf (fdefines, "typedef union");
1ff442ca 905
27821bff 906 c = getc (finput);
1ff442ca
NF
907
908 while (c != EOF)
909 {
27821bff 910 putc (c, fattrs);
1ff442ca 911 if (fdefines)
27821bff 912 putc (c, fdefines);
1ff442ca
NF
913
914 switch (c)
915 {
916 case '\n':
917 lineno++;
918 break;
919
920 case '/':
27821bff 921 c = getc (finput);
1ff442ca 922 if (c != '*' && c != '/')
27821bff
AD
923 continue;
924 copy_comment2 (finput, fattrs, fdefines, c);
1ff442ca
NF
925 break;
926
927
928 case '{':
929 count++;
930 break;
931
932 case '}':
933 if (count == 0)
27821bff 934 complain (_("unmatched %s"), "`}'");
1ff442ca 935 count--;
943819bf 936 if (count <= 0)
1ff442ca 937 {
27821bff 938 fprintf (fattrs, " YYSTYPE;\n");
1ff442ca 939 if (fdefines)
27821bff 940 fprintf (fdefines, " YYSTYPE;\n");
1ff442ca 941 /* JF don't choke on trailing semi */
27821bff
AD
942 c = skip_white_space ();
943 if (c != ';')
944 ungetc (c,finput);
1ff442ca
NF
945 return;
946 }
947 }
948
27821bff 949 c = getc (finput);
1ff442ca
NF
950 }
951}
952
953/* parse the declaration %expect N which says to expect N
954 shift-reduce conflicts. */
955
4a120d45 956static void
118fb205 957parse_expect_decl (void)
1ff442ca
NF
958{
959 register int c;
960 register int count;
961 char buffer[20];
962
963 c = getc(finput);
964 while (c == ' ' || c == '\t')
965 c = getc(finput);
966
967 count = 0;
968 while (c >= '0' && c <= '9')
969 {
970 if (count < 20)
971 buffer[count++] = c;
972 c = getc(finput);
973 }
974 buffer[count] = 0;
975
976 ungetc (c, finput);
977
943819bf 978 if (count <= 0 || count > 10)
a0f6b076 979 complain ("%s", _("argument of %expect is not an integer"));
1ff442ca
NF
980 expected_conflicts = atoi (buffer);
981}
982
983/* that's all of parsing the declaration section */
984\f
4a120d45 985/* FIN is pointing to a location (i.e., a `@'). Output to FOUT
7b306f52
AD
986 a reference to this location. STACK_OFFSET is the number of values
987 in the current rule so far, which says where to find `$0' with
988 respect to the top of the stack. */
989static inline void
4a120d45 990copy_at (FILE *fin, FILE *fout, int stack_offset)
7b306f52
AD
991{
992 int c;
993
4a120d45 994 c = getc (fin);
7b306f52
AD
995 if (c == '$')
996 {
4a120d45 997 fprintf (fout, "yyloc");
7b306f52
AD
998 yylsp_needed = 1;
999 }
1000 else if (isdigit(c) || c == '-')
1001 {
1002 int n;
1003
4a120d45
JT
1004 ungetc (c, fin);
1005 n = read_signed_integer (fin);
7b306f52 1006
4a120d45 1007 fprintf (fout, "yylsp[%d]", n - stack_offset);
7b306f52
AD
1008 yylsp_needed = 1;
1009 }
1010 else
a0f6b076 1011 complain (_("@%s is invalid"), printable_version (c));
7b306f52
AD
1012}
1013
1014
a0f6b076
AD
1015/* Get the data type (alternative in the union) of the value for
1016 symbol n in rule rule. */
1ff442ca 1017
4a120d45 1018static char *
118fb205 1019get_type_name (int n, symbol_list *rule)
1ff442ca 1020{
1ff442ca
NF
1021 register int i;
1022 register symbol_list *rp;
1023
1024 if (n < 0)
943819bf 1025 {
a0f6b076 1026 complain (_("invalid $ value"));
943819bf
RS
1027 return NULL;
1028 }
1ff442ca
NF
1029
1030 rp = rule;
1031 i = 0;
1032
1033 while (i < n)
1034 {
1035 rp = rp->next;
1036 if (rp == NULL || rp->sym == NULL)
943819bf 1037 {
a0f6b076 1038 complain (_("invalid $ value"));
943819bf
RS
1039 return NULL;
1040 }
1ff442ca
NF
1041 i++;
1042 }
1043
36281465 1044 return rp->sym->type_name;
1ff442ca
NF
1045}
1046
1047
3cef001a 1048
41aca2e0
AD
1049/* After `%guard' is seen in the input file, copy the actual guard
1050 into the guards file. If the guard is followed by an action, copy
1051 that into the actions file. STACK_OFFSET is the number of values
1052 in the current rule so far, which says where to find `$0' with
1053 respect to the top of the stack, for the simple parser in which the
1054 stack is not popped until after the guard is run. */
1ff442ca 1055
4a120d45 1056static void
118fb205 1057copy_guard (symbol_list *rule, int stack_offset)
1ff442ca
NF
1058{
1059 register int c;
1060 register int n;
1061 register int count;
1ff442ca
NF
1062 register char *type_name;
1063 int brace_flag = 0;
1ff442ca
NF
1064
1065 /* offset is always 0 if parser has already popped the stack pointer */
1066 if (semantic_parser) stack_offset = 0;
1067
1068 fprintf(fguard, "\ncase %d:\n", nrules);
1069 if (!nolinesflag)
41aca2e0 1070 fprintf (fguard, "#line %d \"%s\"\n", lineno, infile);
1ff442ca
NF
1071 putc('{', fguard);
1072
1073 count = 0;
1074 c = getc(finput);
1075
1076 while (brace_flag ? (count > 0) : (c != ';'))
1077 {
1078 switch (c)
1079 {
1080 case '\n':
1081 putc(c, fguard);
1082 lineno++;
1083 break;
1084
1085 case '{':
1086 putc(c, fguard);
1087 brace_flag = 1;
1088 count++;
1089 break;
1090
1091 case '}':
1092 putc(c, fguard);
1093 if (count > 0)
1094 count--;
a083fbbf 1095 else
943819bf 1096 {
27821bff 1097 complain (_("unmatched %s"), "`}'");
943819bf
RS
1098 c = getc(finput); /* skip it */
1099 }
1ff442ca
NF
1100 break;
1101
1102 case '\'':
1103 case '"':
ca36d2ef 1104 copy_string (finput, fguard, c);
1ff442ca
NF
1105 break;
1106
1107 case '/':
3cef001a
AD
1108 putc (c, fguard);
1109 c = getc (finput);
1ff442ca
NF
1110 if (c != '*' && c != '/')
1111 continue;
3cef001a 1112 copy_comment (finput, fguard, c);
1ff442ca
NF
1113 break;
1114
1115 case '$':
1116 c = getc(finput);
1117 type_name = NULL;
1118
1119 if (c == '<')
1120 {
1121 register char *cp = token_buffer;
1122
1123 while ((c = getc(finput)) != '>' && c > 0)
118fb205
JT
1124 {
1125 if (cp == token_buffer + maxtoken)
1126 cp = grow_token_buffer(cp);
1127
1128 *cp++ = c;
1129 }
1ff442ca
NF
1130 *cp = 0;
1131 type_name = token_buffer;
1132
1133 c = getc(finput);
1134 }
1135
1136 if (c == '$')
1137 {
1138 fprintf(fguard, "yyval");
41aca2e0
AD
1139 if (!type_name)
1140 type_name = rule->sym->type_name;
1ff442ca
NF
1141 if (type_name)
1142 fprintf(fguard, ".%s", type_name);
943819bf 1143 if(!type_name && typed)
a0f6b076
AD
1144 complain (_("$$ of `%s' has no declared type"),
1145 rule->sym->tag);
1ff442ca 1146 }
1ff442ca
NF
1147 else if (isdigit(c) || c == '-')
1148 {
1149 ungetc (c, finput);
41aca2e0
AD
1150 n = read_signed_integer (finput);
1151 c = getc (finput);
1ff442ca
NF
1152
1153 if (!type_name && n > 0)
1154 type_name = get_type_name(n, rule);
1155
1156 fprintf(fguard, "yyvsp[%d]", n - stack_offset);
1157 if (type_name)
1158 fprintf(fguard, ".%s", type_name);
6666f98f 1159 if (!type_name && typed)
a0f6b076
AD
1160 complain (_("$%d of `%s' has no declared type"),
1161 n, rule->sym->tag);
1ff442ca
NF
1162 continue;
1163 }
1164 else
a0f6b076 1165 complain (_("$%s is invalid"), printable_version (c));
1ff442ca
NF
1166 break;
1167
1168 case '@':
7b306f52 1169 copy_at (finput, fguard, stack_offset);
6666f98f 1170 break;
1ff442ca
NF
1171
1172 case EOF:
a0f6b076
AD
1173 fatal ("%s",
1174 _("unterminated %guard clause"));
1ff442ca
NF
1175
1176 default:
6666f98f 1177 putc (c, fguard);
1ff442ca
NF
1178 }
1179
1180 if (c != '}' || count != 0)
1181 c = getc(finput);
1182 }
1183
1184 c = skip_white_space();
1185
1186 fprintf(fguard, ";\n break;}");
1187 if (c == '{')
7b306f52 1188 copy_action (rule, stack_offset);
1ff442ca
NF
1189 else if (c == '=')
1190 {
943819bf 1191 c = getc(finput); /* why not skip_white_space -wjh */
1ff442ca 1192 if (c == '{')
7b306f52 1193 copy_action (rule, stack_offset);
1ff442ca
NF
1194 }
1195 else
1196 ungetc(c, finput);
1197}
1198
1199
1200
41aca2e0
AD
1201/* Assuming that a `{' has just been seen, copy everything up to the
1202 matching `}' into the actions file. STACK_OFFSET is the number of
1203 values in the current rule so far, which says where to find `$0'
1204 with respect to the top of the stack. */
1ff442ca 1205
4a120d45 1206static void
118fb205 1207copy_action (symbol_list *rule, int stack_offset)
1ff442ca
NF
1208{
1209 register int c;
1210 register int n;
1211 register int count;
1ff442ca 1212 register char *type_name;
1ff442ca
NF
1213
1214 /* offset is always 0 if parser has already popped the stack pointer */
41aca2e0
AD
1215 if (semantic_parser)
1216 stack_offset = 0;
1ff442ca 1217
41aca2e0 1218 fprintf (faction, "\ncase %d:\n", nrules);
1ff442ca 1219 if (!nolinesflag)
41aca2e0
AD
1220 fprintf (faction, "#line %d \"%s\"\n", lineno, infile);
1221 putc ('{', faction);
1ff442ca
NF
1222
1223 count = 1;
1224 c = getc(finput);
1225
1226 while (count > 0)
1227 {
1228 while (c != '}')
1229 {
1230 switch (c)
1231 {
1232 case '\n':
1233 putc(c, faction);
1234 lineno++;
1235 break;
1236
1237 case '{':
1238 putc(c, faction);
1239 count++;
1240 break;
1241
1242 case '\'':
1243 case '"':
ca36d2ef 1244 copy_string (finput, faction, c);
1ff442ca
NF
1245 break;
1246
1247 case '/':
27821bff
AD
1248 putc (c, faction);
1249 c = getc (finput);
1ff442ca
NF
1250 if (c != '*' && c != '/')
1251 continue;
3cef001a 1252 copy_comment (finput, faction, c);
1ff442ca
NF
1253 break;
1254
1255 case '$':
1256 c = getc(finput);
1257 type_name = NULL;
1258
1259 if (c == '<')
1260 {
1261 register char *cp = token_buffer;
1262
1263 while ((c = getc(finput)) != '>' && c > 0)
118fb205
JT
1264 {
1265 if (cp == token_buffer + maxtoken)
1266 cp = grow_token_buffer(cp);
1267
1268 *cp++ = c;
1269 }
1ff442ca
NF
1270 *cp = 0;
1271 type_name = token_buffer;
1272 value_components_used = 1;
1273
1274 c = getc(finput);
1275 }
1276 if (c == '$')
1277 {
1278 fprintf(faction, "yyval");
41aca2e0
AD
1279 if (!type_name)
1280 type_name = get_type_name(0, rule);
1ff442ca
NF
1281 if (type_name)
1282 fprintf(faction, ".%s", type_name);
a083fbbf 1283 if(!type_name && typed)
a0f6b076
AD
1284 complain (_("$$ of `%s' has no declared type"),
1285 rule->sym->tag);
1ff442ca
NF
1286 }
1287 else if (isdigit(c) || c == '-')
1288 {
1289 ungetc (c, finput);
1290 n = read_signed_integer(finput);
1291 c = getc(finput);
1292
1293 if (!type_name && n > 0)
1294 type_name = get_type_name(n, rule);
1295
1296 fprintf(faction, "yyvsp[%d]", n - stack_offset);
1297 if (type_name)
1298 fprintf(faction, ".%s", type_name);
a083fbbf 1299 if(!type_name && typed)
a0f6b076
AD
1300 complain (_("$%d of `%s' has no declared type"),
1301 n, rule->sym->tag);
1ff442ca
NF
1302 continue;
1303 }
1304 else
a0f6b076 1305 complain (_("$%s is invalid"), printable_version (c));
1ff442ca
NF
1306
1307 break;
1308
1309 case '@':
7b306f52 1310 copy_at (finput, faction, stack_offset);
6666f98f 1311 break;
1ff442ca
NF
1312
1313 case EOF:
27821bff 1314 fatal (_("unmatched %s"), "`{'");
1ff442ca
NF
1315
1316 default:
1317 putc(c, faction);
1318 }
1319
1320 c = getc(finput);
1321 }
1322
1323 /* above loop exits when c is '}' */
1324
1325 if (--count)
1326 {
1327 putc(c, faction);
1328 c = getc(finput);
1329 }
1330 }
1331
1332 fprintf(faction, ";\n break;}");
1333}
1334
1335
1336
1337/* generate a dummy symbol, a nonterminal,
1338whose name cannot conflict with the user's names. */
1339
4a120d45 1340static bucket *
118fb205 1341gensym (void)
1ff442ca
NF
1342{
1343 register bucket *sym;
1344
1345 sprintf (token_buffer, "@%d", ++gensym_count);
1346 sym = getsym(token_buffer);
1347 sym->class = SNTERM;
1348 sym->value = nvars++;
36281465 1349 return sym;
1ff442ca
NF
1350}
1351
1352/* Parse the input grammar into a one symbol_list structure.
1353Each rule is represented by a sequence of symbols: the left hand side
1354followed by the contents of the right hand side, followed by a null pointer
1355instead of a symbol to terminate the rule.
1356The next symbol is the lhs of the following rule.
1357
1358All guards and actions are copied out to the appropriate files,
1359labelled by the rule number they apply to. */
1360
4a120d45 1361static void
118fb205 1362readgram (void)
1ff442ca
NF
1363{
1364 register int t;
2686a6e7 1365 register bucket *lhs = NULL;
1ff442ca
NF
1366 register symbol_list *p;
1367 register symbol_list *p1;
1368 register bucket *bp;
1369
1370 symbol_list *crule; /* points to first symbol_list of current rule. */
1371 /* its symbol is the lhs of the rule. */
1372 symbol_list *crule1; /* points to the symbol_list preceding crule. */
1373
1374 p1 = NULL;
1375
1376 t = lex();
1377
1378 while (t != TWO_PERCENTS && t != ENDFILE)
1379 {
1380 if (t == IDENTIFIER || t == BAR)
1381 {
1382 register int actionflag = 0;
1383 int rulelength = 0; /* number of symbols in rhs of this rule so far */
1384 int xactions = 0; /* JF for error checking */
1385 bucket *first_rhs = 0;
1386
1387 if (t == IDENTIFIER)
1388 {
1389 lhs = symval;
943819bf
RS
1390
1391 if (!start_flag)
1392 {
1393 startval = lhs;
1394 start_flag = 1;
1395 }
a083fbbf 1396
1ff442ca
NF
1397 t = lex();
1398 if (t != COLON)
943819bf 1399 {
a0f6b076 1400 complain (_("ill-formed rule: initial symbol not followed by colon"));
943819bf
RS
1401 unlex(t);
1402 }
1ff442ca
NF
1403 }
1404
943819bf 1405 if (nrules == 0 && t == BAR)
1ff442ca 1406 {
a0f6b076 1407 complain (_("grammar starts with vertical bar"));
943819bf 1408 lhs = symval; /* BOGUS: use a random symval */
1ff442ca 1409 }
1ff442ca
NF
1410 /* start a new rule and record its lhs. */
1411
1412 nrules++;
1413 nitems++;
1414
1415 record_rule_line ();
1416
1417 p = NEW(symbol_list);
1418 p->sym = lhs;
1419
1420 crule1 = p1;
1421 if (p1)
1422 p1->next = p;
1423 else
1424 grammar = p;
1425
1426 p1 = p;
1427 crule = p;
1428
1429 /* mark the rule's lhs as a nonterminal if not already so. */
1430
1431 if (lhs->class == SUNKNOWN)
1432 {
1433 lhs->class = SNTERM;
1434 lhs->value = nvars;
1435 nvars++;
1436 }
1437 else if (lhs->class == STOKEN)
a0f6b076 1438 complain (_("rule given for %s, which is a token"), lhs->tag);
1ff442ca
NF
1439
1440 /* read the rhs of the rule. */
1441
1442 for (;;)
1443 {
1444 t = lex();
943819bf
RS
1445 if (t == PREC)
1446 {
1447 t = lex();
1448 crule->ruleprec = symval;
1449 t = lex();
1450 }
1ff442ca
NF
1451
1452 if (! (t == IDENTIFIER || t == LEFT_CURLY)) break;
1453
1454 /* If next token is an identifier, see if a colon follows it.
1455 If one does, exit this rule now. */
1456 if (t == IDENTIFIER)
1457 {
1458 register bucket *ssave;
1459 register int t1;
1460
1461 ssave = symval;
1462 t1 = lex();
1463 unlex(t1);
1464 symval = ssave;
1465 if (t1 == COLON) break;
1466
1467 if(!first_rhs) /* JF */
1468 first_rhs = symval;
1469 /* Not followed by colon =>
1470 process as part of this rule's rhs. */
1471 }
1472
1473 /* If we just passed an action, that action was in the middle
1474 of a rule, so make a dummy rule to reduce it to a
1475 non-terminal. */
1476 if (actionflag)
1477 {
1478 register bucket *sdummy;
1479
1480 /* Since the action was written out with this rule's */
943819bf 1481 /* number, we must give the new rule this number */
1ff442ca
NF
1482 /* by inserting the new rule before it. */
1483
1484 /* Make a dummy nonterminal, a gensym. */
1485 sdummy = gensym();
1486
1487 /* Make a new rule, whose body is empty,
1488 before the current one, so that the action
1489 just read can belong to it. */
1490 nrules++;
1491 nitems++;
1492 record_rule_line ();
1493 p = NEW(symbol_list);
1494 if (crule1)
1495 crule1->next = p;
1496 else grammar = p;
1497 p->sym = sdummy;
1498 crule1 = NEW(symbol_list);
1499 p->next = crule1;
1500 crule1->next = crule;
1501
1502 /* insert the dummy generated by that rule into this rule. */
1503 nitems++;
1504 p = NEW(symbol_list);
1505 p->sym = sdummy;
1506 p1->next = p;
1507 p1 = p;
1508
1509 actionflag = 0;
1510 }
1511
1512 if (t == IDENTIFIER)
1513 {
1514 nitems++;
1515 p = NEW(symbol_list);
1516 p->sym = symval;
1517 p1->next = p;
1518 p1 = p;
1519 }
1520 else /* handle an action. */
1521 {
1522 copy_action(crule, rulelength);
1523 actionflag = 1;
1524 xactions++; /* JF */
1525 }
1526 rulelength++;
943819bf 1527 } /* end of read rhs of rule */
1ff442ca
NF
1528
1529 /* Put an empty link in the list to mark the end of this rule */
1530 p = NEW(symbol_list);
1531 p1->next = p;
1532 p1 = p;
1533
1534 if (t == PREC)
1535 {
a0f6b076 1536 complain (_("two @prec's in a row"));
1ff442ca
NF
1537 t = lex();
1538 crule->ruleprec = symval;
1539 t = lex();
1540 }
1541 if (t == GUARD)
1542 {
1543 if (! semantic_parser)
a0f6b076
AD
1544 complain ("%s",
1545 _("%guard present but %semantic_parser not specified"));
1ff442ca
NF
1546
1547 copy_guard(crule, rulelength);
1548 t = lex();
1549 }
1550 else if (t == LEFT_CURLY)
1551 {
943819bf 1552 /* This case never occurs -wjh */
6666f98f 1553 if (actionflag)
a0f6b076 1554 complain (_("two actions at end of one rule"));
1ff442ca 1555 copy_action(crule, rulelength);
943819bf
RS
1556 actionflag = 1;
1557 xactions++; /* -wjh */
1ff442ca
NF
1558 t = lex();
1559 }
a0f6b076 1560 /* If $$ is being set in default way, report if any type
6666f98f
AD
1561 mismatch. */
1562 else if (!xactions
1563 && first_rhs
1564 && lhs->type_name != first_rhs->type_name)
1ff442ca 1565 {
6666f98f
AD
1566 if (lhs->type_name == 0
1567 || first_rhs->type_name == 0
1ff442ca 1568 || strcmp(lhs->type_name,first_rhs->type_name))
a0f6b076
AD
1569 complain (_("type clash (`%s' `%s') on default action"),
1570 lhs->type_name ? lhs->type_name : "",
1571 first_rhs->type_name ? first_rhs->type_name : "");
1ff442ca
NF
1572 }
1573 /* Warn if there is no default for $$ but we need one. */
1574 else if (!xactions && !first_rhs && lhs->type_name != 0)
a0f6b076 1575 complain (_("empty rule for typed nonterminal, and no action"));
1ff442ca
NF
1576 if (t == SEMICOLON)
1577 t = lex();
a083fbbf 1578 }
943819bf
RS
1579#if 0
1580 /* these things can appear as alternatives to rules. */
1581/* NO, they cannot.
1582 a) none of the documentation allows them
1583 b) most of them scan forward until finding a next %
1584 thus they may swallow lots of intervening rules
1585*/
1ff442ca
NF
1586 else if (t == TOKEN)
1587 {
1588 parse_token_decl(STOKEN, SNTERM);
1589 t = lex();
1590 }
1591 else if (t == NTERM)
1592 {
1593 parse_token_decl(SNTERM, STOKEN);
1594 t = lex();
1595 }
1596 else if (t == TYPE)
1597 {
1598 t = get_type();
1599 }
1600 else if (t == UNION)
1601 {
1602 parse_union_decl();
1603 t = lex();
1604 }
1605 else if (t == EXPECT)
1606 {
1607 parse_expect_decl();
1608 t = lex();
1609 }
1610 else if (t == START)
1611 {
1612 parse_start_decl();
1613 t = lex();
1614 }
943819bf
RS
1615#endif
1616
1ff442ca 1617 else
943819bf 1618 {
a0f6b076 1619 complain (_("invalid input: %s"), token_buffer);
943819bf
RS
1620 t = lex();
1621 }
1ff442ca
NF
1622 }
1623
943819bf
RS
1624 /* grammar has been read. Do some checking */
1625
1ff442ca 1626 if (nsyms > MAXSHORT)
a0f6b076
AD
1627 fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
1628 MAXSHORT);
1ff442ca 1629 if (nrules == 0)
a0f6b076 1630 fatal (_("no rules in the input grammar"));
1ff442ca
NF
1631
1632 if (typed == 0 /* JF put out same default YYSTYPE as YACC does */
1633 && !value_components_used)
1634 {
1635 /* We used to use `unsigned long' as YYSTYPE on MSDOS,
1636 but it seems better to be consistent.
1637 Most programs should declare their own type anyway. */
1638 fprintf(fattrs, "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n");
1639 if (fdefines)
1640 fprintf(fdefines, "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n");
1641 }
1642
1643 /* Report any undefined symbols and consider them nonterminals. */
1644
1645 for (bp = firstsymbol; bp; bp = bp->next)
1646 if (bp->class == SUNKNOWN)
1647 {
a0f6b076
AD
1648 complain (_("symbol %s is used, but is not defined as a token and has no rules"),
1649 bp->tag);
1ff442ca
NF
1650 bp->class = SNTERM;
1651 bp->value = nvars++;
1652 }
1653
1654 ntokens = nsyms - nvars;
1655}
1656
1657
4a120d45 1658static void
118fb205 1659record_rule_line (void)
1ff442ca
NF
1660{
1661 /* Record each rule's source line number in rline table. */
1662
1663 if (nrules >= rline_allocated)
1664 {
1665 rline_allocated = nrules * 2;
118fb205
JT
1666 rline = (short *) xrealloc ((char *) rline,
1667 rline_allocated * sizeof (short));
1ff442ca
NF
1668 }
1669 rline[nrules] = lineno;
1670}
1671
1672
2686a6e7 1673#if 0
1ff442ca 1674/* read in a %type declaration and record its information for get_type_name to access */
943819bf
RS
1675/* this is unused. it is only called from the #if 0 part of readgram */
1676static int
118fb205 1677get_type (void)
1ff442ca
NF
1678{
1679 register int k;
1680 register int t;
1681 register char *name;
1682
1683 t = lex();
1684
a083fbbf 1685 if (t != TYPENAME)
943819bf 1686 {
27821bff 1687 complain (_("invalid %s declaration"), "%type");
943819bf
RS
1688 return t;
1689 }
1ff442ca
NF
1690
1691 k = strlen(token_buffer);
1692 name = NEW2(k + 1, char);
1693 strcpy(name, token_buffer);
1694
1695 for (;;)
1696 {
1697 t = lex();
1698
1699 switch (t)
1700 {
1701 case SEMICOLON:
36281465 1702 return lex();
1ff442ca
NF
1703
1704 case COMMA:
1705 break;
1706
1707 case IDENTIFIER:
1708 if (symval->type_name == NULL)
1709 symval->type_name = name;
943819bf 1710 else if (strcmp(name, symval->type_name) != 0)
a0f6b076 1711 complain (_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
1712
1713 break;
1714
1715 default:
36281465 1716 return t;
1ff442ca
NF
1717 }
1718 }
1719}
2686a6e7 1720#endif
1ff442ca
NF
1721
1722
0a6384c4
AD
1723/* Assign symbol numbers, and write definition of token names into
1724 fdefines. Set up vectors tags and sprec of names and precedences
1725 of symbols. */
1ff442ca 1726
4a120d45 1727static void
118fb205 1728packsymbols (void)
1ff442ca
NF
1729{
1730 register bucket *bp;
1731 register int tokno = 1;
1732 register int i;
1733 register int last_user_token_number;
4a120d45 1734 static char DOLLAR[] = "$";
1ff442ca
NF
1735
1736 /* int lossage = 0; JF set but not used */
1737
1738 tags = NEW2(nsyms + 1, char *);
4a120d45 1739 tags[0] = DOLLAR;
943819bf
RS
1740 user_toknums = NEW2(nsyms + 1, int);
1741 user_toknums[0] = 0;
1ff442ca
NF
1742
1743 sprec = NEW2(nsyms, short);
1744 sassoc = NEW2(nsyms, short);
1745
1746 max_user_token_number = 256;
1747 last_user_token_number = 256;
1748
1749 for (bp = firstsymbol; bp; bp = bp->next)
1750 {
1751 if (bp->class == SNTERM)
1752 {
1753 bp->value += ntokens;
1754 }
943819bf
RS
1755 else if (bp->alias)
1756 {
0a6384c4
AD
1757 /* this symbol and its alias are a single token defn.
1758 allocate a tokno, and assign to both check agreement of
1759 ->prec and ->assoc fields and make both the same */
1760 if (bp->value == 0)
1761 bp->value = bp->alias->value = tokno++;
943819bf 1762
0a6384c4
AD
1763 if (bp->prec != bp->alias->prec)
1764 {
1765 if (bp->prec != 0 && bp->alias->prec != 0
1766 && bp->user_token_number == SALIAS)
a0f6b076
AD
1767 complain (_("conflicting precedences for %s and %s"),
1768 bp->tag, bp->alias->tag);
0a6384c4
AD
1769 if (bp->prec != 0)
1770 bp->alias->prec = bp->prec;
1771 else
1772 bp->prec = bp->alias->prec;
1773 }
943819bf 1774
0a6384c4
AD
1775 if (bp->assoc != bp->alias->assoc)
1776 {
a0f6b076
AD
1777 if (bp->assoc != 0 && bp->alias->assoc != 0
1778 && bp->user_token_number == SALIAS)
1779 complain (_("conflicting assoc values for %s and %s"),
1780 bp->tag, bp->alias->tag);
1781 if (bp->assoc != 0)
1782 bp->alias->assoc = bp->assoc;
1783 else
1784 bp->assoc = bp->alias->assoc;
1785 }
0a6384c4
AD
1786
1787 if (bp->user_token_number == SALIAS)
1788 continue; /* do not do processing below for SALIASs */
943819bf
RS
1789
1790 }
1791 else /* bp->class == STOKEN */
1792 {
1793 bp->value = tokno++;
1794 }
1795
1796 if (bp->class == STOKEN)
1ff442ca
NF
1797 {
1798 if (translations && !(bp->user_token_number))
1799 bp->user_token_number = ++last_user_token_number;
1800 if (bp->user_token_number > max_user_token_number)
1801 max_user_token_number = bp->user_token_number;
1ff442ca
NF
1802 }
1803
1804 tags[bp->value] = bp->tag;
943819bf 1805 user_toknums[bp->value] = bp->user_token_number;
1ff442ca
NF
1806 sprec[bp->value] = bp->prec;
1807 sassoc[bp->value] = bp->assoc;
1808
1809 }
1810
1811 if (translations)
1812 {
4a120d45 1813 register int j;
1ff442ca
NF
1814
1815 token_translations = NEW2(max_user_token_number+1, short);
1816
0a6384c4
AD
1817 /* initialize all entries for literal tokens to 2, the internal
1818 token number for $undefined., which represents all invalid
1819 inputs. */
4a120d45
JT
1820 for (j = 0; j <= max_user_token_number; j++)
1821 token_translations[j] = 2;
1ff442ca 1822
943819bf
RS
1823 for (bp = firstsymbol; bp; bp = bp->next)
1824 {
0a6384c4
AD
1825 if (bp->value >= ntokens)
1826 continue; /* non-terminal */
1827 if (bp->user_token_number == SALIAS)
1828 continue;
943819bf 1829 if (token_translations[bp->user_token_number] != 2)
a0f6b076
AD
1830 complain (_("tokens %s and %s both assigned number %d"),
1831 tags[token_translations[bp->user_token_number]],
1832 bp->tag,
1833 bp->user_token_number);
943819bf
RS
1834 token_translations[bp->user_token_number] = bp->value;
1835 }
1ff442ca
NF
1836 }
1837
1838 error_token_number = errtoken->value;
1839
943819bf
RS
1840 if (! noparserflag)
1841 output_token_defines(ftable);
1ff442ca
NF
1842
1843 if (startval->class == SUNKNOWN)
a0f6b076 1844 fatal (_("the start symbol %s is undefined"), startval->tag);
1ff442ca 1845 else if (startval->class == STOKEN)
a0f6b076 1846 fatal (_("the start symbol %s is a token"), startval->tag);
1ff442ca
NF
1847
1848 start_symbol = startval->value;
1849
1850 if (definesflag)
1851 {
1852 output_token_defines(fdefines);
1853
1854 if (!pure_parser)
1855 {
1856 if (spec_name_prefix)
1857 fprintf(fdefines, "\nextern YYSTYPE %slval;\n", spec_name_prefix);
1858 else
1859 fprintf(fdefines, "\nextern YYSTYPE yylval;\n");
1860 }
1861
1862 if (semantic_parser)
1863 for (i = ntokens; i < nsyms; i++)
1864 {
1865 /* don't make these for dummy nonterminals made by gensym. */
1866 if (*tags[i] != '@')
1867 fprintf(fdefines, "#define\tNT%s\t%d\n", tags[i], i);
1868 }
1869#if 0
1870 /* `fdefines' is now a temporary file, so we need to copy its
1871 contents in `done', so we can't close it here. */
1872 fclose(fdefines);
1873 fdefines = NULL;
1874#endif
1875 }
1876}
a083fbbf 1877
0a6384c4
AD
1878/* For named tokens, but not literal ones, define the name. The value
1879 is the user token number. */
1880
4a120d45 1881static void
118fb205 1882output_token_defines (FILE *file)
1ff442ca
NF
1883{
1884 bucket *bp;
943819bf
RS
1885 register char *cp, *symbol;
1886 register char c;
1ff442ca
NF
1887
1888 for (bp = firstsymbol; bp; bp = bp->next)
1889 {
943819bf 1890 symbol = bp->tag; /* get symbol */
1ff442ca 1891
943819bf
RS
1892 if (bp->value >= ntokens) continue;
1893 if (bp->user_token_number == SALIAS) continue;
1894 if ('\'' == *symbol) continue; /* skip literal character */
1895 if (bp == errtoken) continue; /* skip error token */
a083fbbf 1896 if ('\"' == *symbol)
1ff442ca 1897 {
943819bf
RS
1898 /* use literal string only if given a symbol with an alias */
1899 if (bp->alias)
1900 symbol = bp->alias->tag;
1901 else
1902 continue;
1903 }
1ff442ca 1904
943819bf
RS
1905 /* Don't #define nonliteral tokens whose names contain periods. */
1906 cp = symbol;
1907 while ((c = *cp++) && c != '.');
1908 if (c != '\0') continue;
1ff442ca 1909
a0f6b076
AD
1910 fprintf (file, "#define\t%s\t%d\n", symbol,
1911 ((translations && ! rawtoknumflag)
1912 ? bp->user_token_number
1913 : bp->value));
943819bf 1914 if (semantic_parser)
a0f6b076 1915 fprintf (file, "#define\tT%s\t%d\n", symbol, bp->value);
1ff442ca
NF
1916 }
1917
1918 putc('\n', file);
1919}
1920
1921
1922
1923/* convert the rules into the representation using rrhs, rlhs and ritems. */
1924
4a120d45 1925static void
118fb205 1926packgram (void)
1ff442ca
NF
1927{
1928 register int itemno;
1929 register int ruleno;
1930 register symbol_list *p;
1931/* register bucket *bp; JF unused */
1932
1933 bucket *ruleprec;
1934
1935 ritem = NEW2(nitems + 1, short);
1936 rlhs = NEW2(nrules, short) - 1;
1937 rrhs = NEW2(nrules, short) - 1;
1938 rprec = NEW2(nrules, short) - 1;
1939 rprecsym = NEW2(nrules, short) - 1;
1940 rassoc = NEW2(nrules, short) - 1;
1941
1942 itemno = 0;
1943 ruleno = 1;
1944
1945 p = grammar;
1946 while (p)
1947 {
1948 rlhs[ruleno] = p->sym->value;
1949 rrhs[ruleno] = itemno;
1950 ruleprec = p->ruleprec;
1951
1952 p = p->next;
1953 while (p && p->sym)
1954 {
1955 ritem[itemno++] = p->sym->value;
1956 /* A rule gets by default the precedence and associativity
1957 of the last token in it. */
1958 if (p->sym->class == STOKEN)
1959 {
1960 rprec[ruleno] = p->sym->prec;
1961 rassoc[ruleno] = p->sym->assoc;
1962 }
1963 if (p) p = p->next;
1964 }
1965
1966 /* If this rule has a %prec,
1967 the specified symbol's precedence replaces the default. */
1968 if (ruleprec)
1969 {
1970 rprec[ruleno] = ruleprec->prec;
1971 rassoc[ruleno] = ruleprec->assoc;
1972 rprecsym[ruleno] = ruleprec->value;
1973 }
1974
1975 ritem[itemno++] = -ruleno;
1976 ruleno++;
1977
1978 if (p) p = p->next;
1979 }
1980
1981 ritem[itemno] = 0;
1982}