]> git.saurik.com Git - bison.git/blame - src/reader.c
Change the handling of @s so that they behave exactly like $s.
[bison.git] / src / reader.c
CommitLineData
1ff442ca 1/* Input parser for bison
41aca2e0 2 Copyright (C) 1984, 86, 89, 92, 98, 2000 Free Software Foundation, Inc.
1ff442ca 3
41aca2e0 4 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 5
41aca2e0
AD
6 Bison is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
1ff442ca 10
41aca2e0
AD
11 Bison is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
1ff442ca 15
41aca2e0
AD
16 You should have received a copy of the GNU General Public License
17 along with Bison; see the file COPYING. If not, write to
18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
1ff442ca
NF
20
21
41aca2e0
AD
22/* Read in the grammar specification and record it in the format
23 described in gram.h. All guards are copied into the fguard file
24 and all actions into faction, in each case forming the body of a C
25 function (yyguard or yyaction) which contains a switch statement to
26 decide which guard or action to execute.
1ff442ca 27
41aca2e0 28 The entry point is reader (). */
1ff442ca
NF
29
30#include <stdio.h>
1ff442ca
NF
31#include "system.h"
32#include "files.h"
7612000c 33#include "alloc.h"
1ff442ca
NF
34#include "symtab.h"
35#include "lex.h"
36#include "gram.h"
37#include "machine.h"
38
6666f98f
AD
39#define LTYPESTR "\
40\n\
41#ifndef YYLTYPE\n\
42typedef\n\
43 struct yyltype\n\
44\
45 {\n\
46 int timestamp;\n\
47 int first_line;\n\
48 int first_column;\
49\n\
50 int last_line;\n\
51 int last_column;\n\
52 char *text;\n\
53 }\n\
54\
55 yyltype;\n\
56\n\
57#define YYLTYPE yyltype\n\
58#endif\n\
59\n"
1ff442ca
NF
60
61/* Number of slots allocated (but not necessarily used yet) in `rline' */
62int rline_allocated;
63
64extern char *program_name;
65extern int definesflag;
66extern int nolinesflag;
943819bf
RS
67extern int noparserflag;
68extern int rawtoknumflag;
1ff442ca
NF
69extern bucket *symval;
70extern int numval;
1ff442ca
NF
71extern int expected_conflicts;
72extern char *token_buffer;
118fb205
JT
73extern int maxtoken;
74
75extern void init_lex PARAMS((void));
76extern char *grow_token_buffer PARAMS((char *));
77extern void tabinit PARAMS((void));
78extern void output_headers PARAMS((void));
79extern void output_trailers PARAMS((void));
80extern void free_symtab PARAMS((void));
81extern void open_extra_files PARAMS((void));
82extern char *int_to_string PARAMS((int));
83extern char *printable_version PARAMS((int));
84extern void fatal PARAMS((char *));
85extern void fatals PARAMS((char *, char *));
86extern void warn PARAMS((char *));
87extern void warni PARAMS((char *, int));
88extern void warns PARAMS((char *, char *));
89extern void warnss PARAMS((char *, char *, char *));
90extern void warnsss PARAMS((char *, char *, char *, char *));
91extern void unlex PARAMS((int));
92extern void done PARAMS((int));
93
94extern int skip_white_space PARAMS((void));
95extern int parse_percent_token PARAMS((void));
96extern int lex PARAMS((void));
1ff442ca
NF
97
98typedef
99 struct symbol_list
100 {
101 struct symbol_list *next;
102 bucket *sym;
103 bucket *ruleprec;
104 }
105 symbol_list;
106
107
118fb205
JT
108void reader PARAMS((void));
109void reader_output_yylsp PARAMS((FILE *));
110void read_declarations PARAMS((void));
111void copy_definition PARAMS((void));
112void parse_token_decl PARAMS((int, int));
113void parse_start_decl PARAMS((void));
114void parse_type_decl PARAMS((void));
115void parse_assoc_decl PARAMS((int));
116void parse_union_decl PARAMS((void));
117void parse_expect_decl PARAMS((void));
118char *get_type_name PARAMS((int, symbol_list *));
119void copy_guard PARAMS((symbol_list *, int));
120void parse_thong_decl PARAMS((void));
121void copy_action PARAMS((symbol_list *, int));
122bucket *gensym PARAMS((void));
123void readgram PARAMS((void));
124void record_rule_line PARAMS((void));
125void packsymbols PARAMS((void));
126void output_token_defines PARAMS((FILE *));
127void packgram PARAMS((void));
128int read_signed_integer PARAMS((FILE *));
118fb205 129
2686a6e7
JT
130#if 0
131static int get_type PARAMS((void));
132#endif
1ff442ca
NF
133
134int lineno;
135symbol_list *grammar;
136int start_flag;
137bucket *startval;
138char **tags;
943819bf 139int *user_toknums;
1ff442ca
NF
140
141/* Nonzero if components of semantic values are used, implying
142 they must be unions. */
143static int value_components_used;
144
145static int typed; /* nonzero if %union has been seen. */
146
147static int lastprec; /* incremented for each %left, %right or %nonassoc seen */
148
149static int gensym_count; /* incremented for each generated symbol */
150
151static bucket *errtoken;
5b2e3c89 152static bucket *undeftoken;
1ff442ca
NF
153
154/* Nonzero if any action or guard uses the @n construct. */
155static int yylsp_needed;
156
943819bf
RS
157
158static void
118fb205 159skip_to_char (int target)
943819bf
RS
160{
161 int c;
162 if (target == '\n')
a083fbbf 163 warn(_(" Skipping to next \\n"));
943819bf 164 else
a083fbbf 165 warni(_(" Skipping to next %c"), target);
943819bf
RS
166
167 do
168 c = skip_white_space();
169 while (c != target && c != EOF);
a083fbbf 170 if (c != EOF)
943819bf
RS
171 ungetc(c, finput);
172}
173
174
1ff442ca 175void
118fb205 176reader (void)
1ff442ca
NF
177{
178 start_flag = 0;
179 startval = NULL; /* start symbol not specified yet. */
180
181#if 0
182 translations = 0; /* initially assume token number translation not needed. */
183#endif
184 /* Nowadays translations is always set to 1,
185 since we give `error' a user-token-number
186 to satisfy the Posix demand for YYERRCODE==256. */
187 translations = 1;
188
189 nsyms = 1;
190 nvars = 0;
191 nrules = 0;
192 nitems = 0;
193 rline_allocated = 10;
194 rline = NEW2(rline_allocated, short);
195
196 typed = 0;
197 lastprec = 0;
198
199 gensym_count = 0;
200
201 semantic_parser = 0;
202 pure_parser = 0;
203 yylsp_needed = 0;
204
205 grammar = NULL;
206
207 init_lex();
208 lineno = 1;
209
210 /* initialize the symbol table. */
211 tabinit();
212 /* construct the error token */
213 errtoken = getsym("error");
214 errtoken->class = STOKEN;
215 errtoken->user_token_number = 256; /* Value specified by posix. */
216 /* construct a token that represents all undefined literal tokens. */
217 /* it is always token number 2. */
5b2e3c89
JT
218 undeftoken = getsym("$undefined.");
219 undeftoken->class = STOKEN;
220 undeftoken->user_token_number = 2;
1ff442ca
NF
221 /* Read the declaration section. Copy %{ ... %} groups to ftable and fdefines file.
222 Also notice any %token, %left, etc. found there. */
a083fbbf 223 if (noparserflag)
943819bf
RS
224 fprintf(ftable, "\n/* Bison-generated parse tables, made from %s\n",
225 infile);
226 else
227 fprintf(ftable, "\n/* A Bison parser, made from %s\n", infile);
6ed61226 228 fprintf(ftable, " by %s */\n\n", VERSION_STRING);
1ff442ca
NF
229 fprintf(ftable, "#define YYBISON 1 /* Identify Bison output. */\n\n");
230 read_declarations();
1ff442ca
NF
231 /* start writing the guard and action files, if they are needed. */
232 output_headers();
233 /* read in the grammar, build grammar in list form. write out guards and actions. */
234 readgram();
235 /* Now we know whether we need the line-number stack.
236 If we do, write its type into the .tab.h file. */
943819bf
RS
237 if (fdefines)
238 reader_output_yylsp(fdefines);
1ff442ca
NF
239 /* write closing delimiters for actions and guards. */
240 output_trailers();
241 if (yylsp_needed)
242 fprintf(ftable, "#define YYLSP_NEEDED\n\n");
243 /* assign the symbols their symbol numbers.
244 Write #defines for the token symbols into fdefines if requested. */
245 packsymbols();
246 /* convert the grammar into the format described in gram.h. */
247 packgram();
248 /* free the symbol table data structure
249 since symbols are now all referred to by symbol number. */
250 free_symtab();
251}
252
943819bf 253void
118fb205 254reader_output_yylsp (FILE *f)
943819bf
RS
255{
256 if (yylsp_needed)
257 fprintf(f, LTYPESTR);
258}
1ff442ca 259
41aca2e0
AD
260/* Read from finput until `%%' is seen. Discard the `%%'. Handle any
261 `%' declarations, and copy the contents of any `%{ ... %}' groups
262 to fattrs. */
1ff442ca
NF
263
264void
118fb205 265read_declarations (void)
1ff442ca
NF
266{
267 register int c;
268 register int tok;
269
270 for (;;)
271 {
272 c = skip_white_space();
273
274 if (c == '%')
275 {
276 tok = parse_percent_token();
277
278 switch (tok)
279 {
280 case TWO_PERCENTS:
281 return;
282
283 case PERCENT_LEFT_CURLY:
284 copy_definition();
285 break;
286
287 case TOKEN:
288 parse_token_decl (STOKEN, SNTERM);
289 break;
a083fbbf 290
1ff442ca
NF
291 case NTERM:
292 parse_token_decl (SNTERM, STOKEN);
293 break;
a083fbbf 294
1ff442ca
NF
295 case TYPE:
296 parse_type_decl();
297 break;
a083fbbf 298
1ff442ca
NF
299 case START:
300 parse_start_decl();
301 break;
a083fbbf 302
1ff442ca
NF
303 case UNION:
304 parse_union_decl();
305 break;
a083fbbf 306
1ff442ca
NF
307 case EXPECT:
308 parse_expect_decl();
309 break;
943819bf
RS
310 case THONG:
311 parse_thong_decl();
312 break;
1ff442ca
NF
313 case LEFT:
314 parse_assoc_decl(LEFT_ASSOC);
315 break;
316
317 case RIGHT:
318 parse_assoc_decl(RIGHT_ASSOC);
319 break;
320
321 case NONASSOC:
322 parse_assoc_decl(NON_ASSOC);
323 break;
324
325 case SEMANTIC_PARSER:
326 if (semantic_parser == 0)
327 {
328 semantic_parser = 1;
329 open_extra_files();
330 }
331 break;
332
333 case PURE_PARSER:
334 pure_parser = 1;
335 break;
336
943819bf
RS
337 case NOOP:
338 break;
339
1ff442ca 340 default:
a083fbbf 341 warns(_("unrecognized: %s"), token_buffer);
943819bf
RS
342 skip_to_char('%');
343 }
1ff442ca
NF
344 }
345 else if (c == EOF)
a083fbbf 346 fatal(_("no input grammar"));
1ff442ca 347 else
943819bf 348 {
6666f98f
AD
349 warns (_("unknown character: %s"), printable_version(c));
350 skip_to_char('%');
943819bf 351 }
1ff442ca
NF
352 }
353}
354
355
356/* copy the contents of a %{ ... %} into the definitions file.
357The %{ has already been read. Return after reading the %}. */
358
359void
118fb205 360copy_definition (void)
1ff442ca
NF
361{
362 register int c;
363 register int match;
364 register int ended;
365 register int after_percent; /* -1 while reading a character if prev char was % */
366 int cplus_comment;
367
368 if (!nolinesflag)
369 fprintf(fattrs, "#line %d \"%s\"\n", lineno, infile);
370
371 after_percent = 0;
372
373 c = getc(finput);
374
375 for (;;)
376 {
377 switch (c)
378 {
379 case '\n':
380 putc(c, fattrs);
381 lineno++;
382 break;
383
384 case '%':
385 after_percent = -1;
386 break;
a083fbbf 387
1ff442ca
NF
388 case '\'':
389 case '"':
390 match = c;
391 putc(c, fattrs);
392 c = getc(finput);
393
394 while (c != match)
395 {
943819bf 396 if (c == EOF)
a083fbbf 397 fatal(_("unterminated string at end of file"));
943819bf
RS
398 if (c == '\n')
399 {
a083fbbf 400 warn(_("unterminated string"));
943819bf
RS
401 ungetc(c, finput);
402 c = match;
403 continue;
404 }
1ff442ca
NF
405
406 putc(c, fattrs);
a083fbbf 407
1ff442ca
NF
408 if (c == '\\')
409 {
410 c = getc(finput);
411 if (c == EOF)
a083fbbf 412 fatal(_("unterminated string at end of file"));
1ff442ca
NF
413 putc(c, fattrs);
414 if (c == '\n')
415 lineno++;
416 }
417
418 c = getc(finput);
419 }
420
421 putc(c, fattrs);
422 break;
423
424 case '/':
425 putc(c, fattrs);
426 c = getc(finput);
427 if (c != '*' && c != '/')
428 continue;
429
430 cplus_comment = (c == '/');
431 putc(c, fattrs);
432 c = getc(finput);
433
434 ended = 0;
435 while (!ended)
436 {
437 if (!cplus_comment && c == '*')
438 {
439 while (c == '*')
440 {
441 putc(c, fattrs);
442 c = getc(finput);
443 }
444
445 if (c == '/')
446 {
447 putc(c, fattrs);
448 ended = 1;
449 }
450 }
451 else if (c == '\n')
452 {
453 lineno++;
454 putc(c, fattrs);
455 if (cplus_comment)
456 ended = 1;
457 else
458 c = getc(finput);
459 }
460 else if (c == EOF)
a083fbbf 461 fatal(_("unterminated comment in `%{' definition"));
1ff442ca
NF
462 else
463 {
464 putc(c, fattrs);
465 c = getc(finput);
466 }
467 }
468
469 break;
470
471 case EOF:
a083fbbf 472 fatal(_("unterminated `%{' definition"));
1ff442ca
NF
473
474 default:
475 putc(c, fattrs);
476 }
477
478 c = getc(finput);
479
480 if (after_percent)
481 {
482 if (c == '}')
483 return;
484 putc('%', fattrs);
485 }
486 after_percent = 0;
487
488 }
489
490}
491
492
493
494/* parse what comes after %token or %nterm.
495For %token, what_is is STOKEN and what_is_not is SNTERM.
496For %nterm, the arguments are reversed. */
497
498void
118fb205 499parse_token_decl (int what_is, int what_is_not)
1ff442ca 500{
1ff442ca 501 register int token = 0;
1ff442ca 502 register char *typename = 0;
943819bf 503 register struct bucket *symbol = NULL; /* pts to symbol being defined */
1ff442ca
NF
504 int k;
505
1ff442ca
NF
506 for (;;)
507 {
e6011337
JT
508 int tmp_char = ungetc (skip_white_space (), finput);
509
510 if (tmp_char == '%')
1ff442ca 511 return;
e6011337
JT
512 if (tmp_char == EOF)
513 fatals ("Premature EOF after %s", token_buffer);
514
1ff442ca
NF
515 token = lex();
516 if (token == COMMA)
943819bf
RS
517 {
518 symbol = NULL;
519 continue;
520 }
1ff442ca
NF
521 if (token == TYPENAME)
522 {
523 k = strlen(token_buffer);
524 typename = NEW2(k + 1, char);
525 strcpy(typename, token_buffer);
526 value_components_used = 1;
943819bf
RS
527 symbol = NULL;
528 }
529 else if (token == IDENTIFIER && *symval->tag == '\"'
a083fbbf 530 && symbol)
943819bf
RS
531 {
532 translations = 1;
533 symval->class = STOKEN;
534 symval->type_name = typename;
535 symval->user_token_number = symbol->user_token_number;
536 symbol->user_token_number = SALIAS;
537
a083fbbf
RS
538 symval->alias = symbol;
539 symbol->alias = symval;
943819bf
RS
540 symbol = NULL;
541
542 nsyms--; /* symbol and symval combined are only one symbol */
1ff442ca
NF
543 }
544 else if (token == IDENTIFIER)
545 {
546 int oldclass = symval->class;
943819bf 547 symbol = symval;
1ff442ca 548
943819bf 549 if (symbol->class == what_is_not)
a083fbbf 550 warns(_("symbol %s redefined"), symbol->tag);
943819bf 551 symbol->class = what_is;
1ff442ca 552 if (what_is == SNTERM && oldclass != SNTERM)
943819bf 553 symbol->value = nvars++;
1ff442ca
NF
554
555 if (typename)
556 {
943819bf
RS
557 if (symbol->type_name == NULL)
558 symbol->type_name = typename;
559 else if (strcmp(typename, symbol->type_name) != 0)
a083fbbf 560 warns(_("type redeclaration for %s"), symbol->tag);
1ff442ca
NF
561 }
562 }
943819bf 563 else if (symbol && token == NUMBER)
1ff442ca 564 {
943819bf 565 symbol->user_token_number = numval;
1ff442ca
NF
566 translations = 1;
567 }
568 else
943819bf 569 {
a083fbbf
RS
570 warnss(_("`%s' is invalid in %s"),
571 token_buffer,
943819bf
RS
572 (what_is == STOKEN) ? "%token" : "%nterm");
573 skip_to_char('%');
574 }
1ff442ca
NF
575 }
576
577}
578
a083fbbf 579/* parse what comes after %thong
943819bf
RS
580 the full syntax is
581 %thong <type> token number literal
582 the <type> or number may be omitted. The number specifies the
583 user_token_number.
584
585 Two symbols are entered in the table, one for the token symbol and
586 one for the literal. Both are given the <type>, if any, from the declaration.
587 The ->user_token_number of the first is SALIAS and the ->user_token_number
588 of the second is set to the number, if any, from the declaration.
589 The two symbols are linked via pointers in their ->alias fields.
a083fbbf 590
943819bf
RS
591 during output_defines_table, the symbol is reported
592 thereafter, only the literal string is retained
593 it is the literal string that is output to yytname
594*/
595
596void
118fb205 597parse_thong_decl (void)
943819bf
RS
598{
599 register int token;
600 register struct bucket *symbol;
601 register char *typename = 0;
602 int k, usrtoknum;
603
604 translations = 1;
605 token = lex(); /* fetch typename or first token */
606 if (token == TYPENAME) {
607 k = strlen(token_buffer);
608 typename = NEW2(k + 1, char);
609 strcpy(typename, token_buffer);
610 value_components_used = 1;
611 token = lex(); /* fetch first token */
612 }
613
614 /* process first token */
615
a083fbbf 616 if (token != IDENTIFIER)
943819bf 617 {
a083fbbf 618 warns(_("unrecognized item %s, expected an identifier"),
943819bf
RS
619 token_buffer);
620 skip_to_char('%');
621 return;
622 }
623 symval->class = STOKEN;
624 symval->type_name = typename;
625 symval->user_token_number = SALIAS;
626 symbol = symval;
627
628 token = lex(); /* get number or literal string */
a083fbbf 629
943819bf
RS
630 if (token == NUMBER) {
631 usrtoknum = numval;
632 token = lex(); /* okay, did number, now get literal */
633 }
634 else usrtoknum = 0;
635
636 /* process literal string token */
637
a083fbbf 638 if (token != IDENTIFIER || *symval->tag != '\"')
943819bf 639 {
a083fbbf 640 warns(_("expected string constant instead of %s"),
943819bf
RS
641 token_buffer);
642 skip_to_char('%');
643 return;
644 }
645 symval->class = STOKEN;
646 symval->type_name = typename;
647 symval->user_token_number = usrtoknum;
648
a083fbbf
RS
649 symval->alias = symbol;
650 symbol->alias = symval;
943819bf
RS
651
652 nsyms--; /* symbol and symval combined are only one symbol */
653}
1ff442ca
NF
654
655
656/* parse what comes after %start */
657
658void
118fb205 659parse_start_decl (void)
1ff442ca
NF
660{
661 if (start_flag)
a083fbbf 662 warn(_("multiple %start declarations"));
1ff442ca 663 if (lex() != IDENTIFIER)
a083fbbf 664 warn(_("invalid %start declaration"));
943819bf
RS
665 else
666 {
667 start_flag = 1;
668 startval = symval;
669 }
1ff442ca
NF
670}
671
672
673
674/* read in a %type declaration and record its information for get_type_name to access */
675
676void
118fb205 677parse_type_decl (void)
1ff442ca
NF
678{
679 register int k;
680 register char *name;
1ff442ca
NF
681
682 if (lex() != TYPENAME)
943819bf 683 {
a083fbbf 684 warn(_("%type declaration has no <typename>"));
943819bf
RS
685 skip_to_char('%');
686 return;
687 }
1ff442ca
NF
688
689 k = strlen(token_buffer);
690 name = NEW2(k + 1, char);
691 strcpy(name, token_buffer);
692
1ff442ca
NF
693 for (;;)
694 {
695 register int t;
e6011337 696 int tmp_char = ungetc (skip_white_space (), finput);
1ff442ca 697
e6011337 698 if (tmp_char == '%')
1ff442ca 699 return;
e6011337
JT
700 if (tmp_char == EOF)
701 fatals ("Premature EOF after %s", token_buffer);
1ff442ca 702
1ff442ca
NF
703 t = lex();
704
705 switch (t)
706 {
707
708 case COMMA:
709 case SEMICOLON:
710 break;
711
712 case IDENTIFIER:
713 if (symval->type_name == NULL)
714 symval->type_name = name;
943819bf 715 else if (strcmp(name, symval->type_name) != 0)
a083fbbf 716 warns(_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
717
718 break;
719
720 default:
a083fbbf 721 warns(_("invalid %%type declaration due to item: `%s'"), token_buffer);
943819bf 722 skip_to_char('%');
1ff442ca
NF
723 }
724 }
725}
726
727
728
729/* read in a %left, %right or %nonassoc declaration and record its information. */
730/* assoc is either LEFT_ASSOC, RIGHT_ASSOC or NON_ASSOC. */
731
732void
118fb205 733parse_assoc_decl (int assoc)
1ff442ca
NF
734{
735 register int k;
736 register char *name = NULL;
943819bf 737 register int prev = 0;
1ff442ca
NF
738
739 lastprec++; /* Assign a new precedence level, never 0. */
740
1ff442ca
NF
741 for (;;)
742 {
743 register int t;
e6011337 744 int tmp_char = ungetc (skip_white_space (), finput);
1ff442ca 745
e6011337 746 if (tmp_char == '%')
1ff442ca 747 return;
e6011337
JT
748 if (tmp_char == EOF)
749 fatals ("Premature EOF after %s", token_buffer);
1ff442ca 750
1ff442ca
NF
751 t = lex();
752
753 switch (t)
754 {
755
756 case TYPENAME:
757 k = strlen(token_buffer);
758 name = NEW2(k + 1, char);
759 strcpy(name, token_buffer);
760 break;
761
762 case COMMA:
763 break;
764
765 case IDENTIFIER:
766 if (symval->prec != 0)
a083fbbf 767 warns(_("redefining precedence of %s"), symval->tag);
1ff442ca
NF
768 symval->prec = lastprec;
769 symval->assoc = assoc;
770 if (symval->class == SNTERM)
a083fbbf 771 warns(_("symbol %s redefined"), symval->tag);
1ff442ca
NF
772 symval->class = STOKEN;
773 if (name)
774 { /* record the type, if one is specified */
775 if (symval->type_name == NULL)
776 symval->type_name = name;
943819bf 777 else if (strcmp(name, symval->type_name) != 0)
a083fbbf 778 warns(_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
779 }
780 break;
781
782 case NUMBER:
783 if (prev == IDENTIFIER)
784 {
785 symval->user_token_number = numval;
786 translations = 1;
787 }
a083fbbf 788 else
943819bf 789 {
a083fbbf 790 warns(_("invalid text (%s) - number should be after identifier"),
943819bf
RS
791 token_buffer);
792 skip_to_char('%');
793 }
1ff442ca
NF
794 break;
795
796 case SEMICOLON:
797 return;
798
799 default:
a083fbbf 800 warns(_("unexpected item: %s"), token_buffer);
943819bf 801 skip_to_char('%');
1ff442ca
NF
802 }
803
804 prev = t;
805
806 }
807}
808
809
810
811/* copy the union declaration into fattrs (and fdefines),
812 where it is made into the
813 definition of YYSTYPE, the type of elements of the parser value stack. */
814
815void
118fb205 816parse_union_decl (void)
1ff442ca
NF
817{
818 register int c;
819 register int count;
820 register int in_comment;
821 int cplus_comment;
822
823 if (typed)
a083fbbf 824 warn(_("multiple %union declarations"));
1ff442ca
NF
825
826 typed = 1;
827
828 if (!nolinesflag)
829 fprintf(fattrs, "\n#line %d \"%s\"\n", lineno, infile);
830 else
831 fprintf(fattrs, "\n");
832
833 fprintf(fattrs, "typedef union");
834 if (fdefines)
835 fprintf(fdefines, "typedef union");
836
837 count = 0;
838 in_comment = 0;
839
840 c = getc(finput);
841
842 while (c != EOF)
843 {
844 putc(c, fattrs);
845 if (fdefines)
846 putc(c, fdefines);
847
848 switch (c)
849 {
850 case '\n':
851 lineno++;
852 break;
853
854 case '/':
855 c = getc(finput);
856 if (c != '*' && c != '/')
857 ungetc(c, finput);
858 else
859 {
860 putc(c, fattrs);
861 if (fdefines)
862 putc(c, fdefines);
863 cplus_comment = (c == '/');
864 in_comment = 1;
865 c = getc(finput);
866 while (in_comment)
867 {
868 putc(c, fattrs);
869 if (fdefines)
870 putc(c, fdefines);
871
872 if (c == '\n')
873 {
874 lineno++;
875 if (cplus_comment)
876 {
877 in_comment = 0;
878 break;
879 }
880 }
881 if (c == EOF)
a083fbbf 882 fatal(_("unterminated comment at end of file"));
1ff442ca
NF
883
884 if (!cplus_comment && c == '*')
885 {
886 c = getc(finput);
887 if (c == '/')
888 {
889 putc('/', fattrs);
890 if (fdefines)
891 putc('/', fdefines);
892 in_comment = 0;
893 }
894 }
895 else
896 c = getc(finput);
897 }
898 }
899 break;
900
901
902 case '{':
903 count++;
904 break;
905
906 case '}':
907 if (count == 0)
a083fbbf 908 warn (_("unmatched close-brace (`}')"));
1ff442ca 909 count--;
943819bf 910 if (count <= 0)
1ff442ca
NF
911 {
912 fprintf(fattrs, " YYSTYPE;\n");
913 if (fdefines)
914 fprintf(fdefines, " YYSTYPE;\n");
915 /* JF don't choke on trailing semi */
916 c=skip_white_space();
917 if(c!=';') ungetc(c,finput);
918 return;
919 }
920 }
921
922 c = getc(finput);
923 }
924}
925
926/* parse the declaration %expect N which says to expect N
927 shift-reduce conflicts. */
928
929void
118fb205 930parse_expect_decl (void)
1ff442ca
NF
931{
932 register int c;
933 register int count;
934 char buffer[20];
935
936 c = getc(finput);
937 while (c == ' ' || c == '\t')
938 c = getc(finput);
939
940 count = 0;
941 while (c >= '0' && c <= '9')
942 {
943 if (count < 20)
944 buffer[count++] = c;
945 c = getc(finput);
946 }
947 buffer[count] = 0;
948
949 ungetc (c, finput);
950
943819bf 951 if (count <= 0 || count > 10)
a083fbbf 952 warn(_("argument of %expect is not an integer"));
1ff442ca
NF
953 expected_conflicts = atoi (buffer);
954}
955
956/* that's all of parsing the declaration section */
957\f
958/* Get the data type (alternative in the union) of the value for symbol n in rule rule. */
959
960char *
118fb205 961get_type_name (int n, symbol_list *rule)
1ff442ca 962{
a083fbbf 963 static char *msg = N_("invalid $ value");
1ff442ca
NF
964
965 register int i;
966 register symbol_list *rp;
967
968 if (n < 0)
943819bf 969 {
a083fbbf 970 warn(_(msg));
943819bf
RS
971 return NULL;
972 }
1ff442ca
NF
973
974 rp = rule;
975 i = 0;
976
977 while (i < n)
978 {
979 rp = rp->next;
980 if (rp == NULL || rp->sym == NULL)
943819bf 981 {
a083fbbf 982 warn(_(msg));
943819bf
RS
983 return NULL;
984 }
1ff442ca
NF
985 i++;
986 }
987
988 return (rp->sym->type_name);
989}
990
991
41aca2e0
AD
992/* After `%guard' is seen in the input file, copy the actual guard
993 into the guards file. If the guard is followed by an action, copy
994 that into the actions file. STACK_OFFSET is the number of values
995 in the current rule so far, which says where to find `$0' with
996 respect to the top of the stack, for the simple parser in which the
997 stack is not popped until after the guard is run. */
1ff442ca
NF
998
999void
118fb205 1000copy_guard (symbol_list *rule, int stack_offset)
1ff442ca
NF
1001{
1002 register int c;
1003 register int n;
1004 register int count;
1005 register int match;
1006 register int ended;
1007 register char *type_name;
1008 int brace_flag = 0;
1009 int cplus_comment;
1010
1011 /* offset is always 0 if parser has already popped the stack pointer */
1012 if (semantic_parser) stack_offset = 0;
1013
1014 fprintf(fguard, "\ncase %d:\n", nrules);
1015 if (!nolinesflag)
41aca2e0 1016 fprintf (fguard, "#line %d \"%s\"\n", lineno, infile);
1ff442ca
NF
1017 putc('{', fguard);
1018
1019 count = 0;
1020 c = getc(finput);
1021
1022 while (brace_flag ? (count > 0) : (c != ';'))
1023 {
1024 switch (c)
1025 {
1026 case '\n':
1027 putc(c, fguard);
1028 lineno++;
1029 break;
1030
1031 case '{':
1032 putc(c, fguard);
1033 brace_flag = 1;
1034 count++;
1035 break;
1036
1037 case '}':
1038 putc(c, fguard);
1039 if (count > 0)
1040 count--;
a083fbbf 1041 else
943819bf 1042 {
a083fbbf 1043 warn(_("unmatched right brace (`}')"));
943819bf
RS
1044 c = getc(finput); /* skip it */
1045 }
1ff442ca
NF
1046 break;
1047
1048 case '\'':
1049 case '"':
1050 match = c;
1051 putc(c, fguard);
1052 c = getc(finput);
1053
1054 while (c != match)
1055 {
943819bf 1056 if (c == EOF)
a083fbbf
RS
1057 fatal(_("unterminated string at end of file"));
1058 if (c == '\n')
943819bf 1059 {
a083fbbf 1060 warn(_("unterminated string"));
943819bf
RS
1061 ungetc(c, finput);
1062 c = match; /* invent terminator */
1063 continue;
1064 }
1ff442ca
NF
1065
1066 putc(c, fguard);
a083fbbf 1067
1ff442ca
NF
1068 if (c == '\\')
1069 {
1070 c = getc(finput);
1071 if (c == EOF)
a083fbbf 1072 fatal(_("unterminated string"));
1ff442ca
NF
1073 putc(c, fguard);
1074 if (c == '\n')
1075 lineno++;
1076 }
1077
1078 c = getc(finput);
1079 }
1080
1081 putc(c, fguard);
1082 break;
1083
1084 case '/':
1085 putc(c, fguard);
1086 c = getc(finput);
1087 if (c != '*' && c != '/')
1088 continue;
1089
1090 cplus_comment = (c == '/');
1091 putc(c, fguard);
1092 c = getc(finput);
1093
1094 ended = 0;
1095 while (!ended)
1096 {
1097 if (!cplus_comment && c == '*')
1098 {
1099 while (c == '*')
1100 {
1101 putc(c, fguard);
1102 c = getc(finput);
1103 }
1104
1105 if (c == '/')
1106 {
1107 putc(c, fguard);
1108 ended = 1;
1109 }
1110 }
1111 else if (c == '\n')
1112 {
1113 lineno++;
1114 putc(c, fguard);
1115 if (cplus_comment)
1116 ended = 1;
1117 else
1118 c = getc(finput);
1119 }
1120 else if (c == EOF)
a083fbbf 1121 fatal(_("unterminated comment"));
1ff442ca
NF
1122 else
1123 {
1124 putc(c, fguard);
1125 c = getc(finput);
1126 }
1127 }
1128
1129 break;
1130
1131 case '$':
1132 c = getc(finput);
1133 type_name = NULL;
1134
1135 if (c == '<')
1136 {
1137 register char *cp = token_buffer;
1138
1139 while ((c = getc(finput)) != '>' && c > 0)
118fb205
JT
1140 {
1141 if (cp == token_buffer + maxtoken)
1142 cp = grow_token_buffer(cp);
1143
1144 *cp++ = c;
1145 }
1ff442ca
NF
1146 *cp = 0;
1147 type_name = token_buffer;
1148
1149 c = getc(finput);
1150 }
1151
1152 if (c == '$')
1153 {
1154 fprintf(fguard, "yyval");
41aca2e0
AD
1155 if (!type_name)
1156 type_name = rule->sym->type_name;
1ff442ca
NF
1157 if (type_name)
1158 fprintf(fguard, ".%s", type_name);
943819bf 1159 if(!type_name && typed)
a083fbbf 1160 warns(_("$$ of `%s' has no declared type"), rule->sym->tag);
1ff442ca 1161 }
1ff442ca
NF
1162 else if (isdigit(c) || c == '-')
1163 {
1164 ungetc (c, finput);
41aca2e0
AD
1165 n = read_signed_integer (finput);
1166 c = getc (finput);
1ff442ca
NF
1167
1168 if (!type_name && n > 0)
1169 type_name = get_type_name(n, rule);
1170
1171 fprintf(fguard, "yyvsp[%d]", n - stack_offset);
1172 if (type_name)
1173 fprintf(fguard, ".%s", type_name);
6666f98f
AD
1174 if (!type_name && typed)
1175 warnss (_("$%s of `%s' has no declared type"),
1176 int_to_string(n), rule->sym->tag);
1ff442ca
NF
1177 continue;
1178 }
1179 else
aba5ca6d 1180 warns(_("$%s is invalid"), printable_version(c));
1ff442ca
NF
1181 break;
1182
1183 case '@':
6666f98f
AD
1184 c = getc (finput);
1185 if (c == '$')
1186 {
1187 fprintf (fguard, "yyloc");
1188 yylsp_needed = 1;
1189 }
1190 else if (isdigit(c) || c == '-')
1ff442ca
NF
1191 {
1192 ungetc (c, finput);
6666f98f
AD
1193 n = read_signed_integer (finput);
1194 c = getc (finput);
1195 fprintf (fguard, "yylsp[%d]", n - stack_offset);
1196 yylsp_needed = 1;
1197 continue;
1ff442ca
NF
1198 }
1199 else
943819bf 1200 {
6666f98f 1201 warns (_("@%s is invalid"), printable_version (c));
943819bf
RS
1202 n = 1;
1203 }
6666f98f 1204 break;
1ff442ca
NF
1205
1206 case EOF:
6666f98f 1207 fatal (_("unterminated %%guard clause"));
1ff442ca
NF
1208
1209 default:
6666f98f 1210 putc (c, fguard);
1ff442ca
NF
1211 }
1212
1213 if (c != '}' || count != 0)
1214 c = getc(finput);
1215 }
1216
1217 c = skip_white_space();
1218
1219 fprintf(fguard, ";\n break;}");
1220 if (c == '{')
1221 copy_action(rule, stack_offset);
1222 else if (c == '=')
1223 {
943819bf 1224 c = getc(finput); /* why not skip_white_space -wjh */
1ff442ca
NF
1225 if (c == '{')
1226 copy_action(rule, stack_offset);
1227 }
1228 else
1229 ungetc(c, finput);
1230}
1231
1232
1233
41aca2e0
AD
1234/* Assuming that a `{' has just been seen, copy everything up to the
1235 matching `}' into the actions file. STACK_OFFSET is the number of
1236 values in the current rule so far, which says where to find `$0'
1237 with respect to the top of the stack. */
1ff442ca
NF
1238
1239void
118fb205 1240copy_action (symbol_list *rule, int stack_offset)
1ff442ca
NF
1241{
1242 register int c;
1243 register int n;
1244 register int count;
1245 register int match;
1246 register int ended;
1247 register char *type_name;
1248 int cplus_comment;
1249
1250 /* offset is always 0 if parser has already popped the stack pointer */
41aca2e0
AD
1251 if (semantic_parser)
1252 stack_offset = 0;
1ff442ca 1253
41aca2e0 1254 fprintf (faction, "\ncase %d:\n", nrules);
1ff442ca 1255 if (!nolinesflag)
41aca2e0
AD
1256 fprintf (faction, "#line %d \"%s\"\n", lineno, infile);
1257 putc ('{', faction);
1ff442ca
NF
1258
1259 count = 1;
1260 c = getc(finput);
1261
1262 while (count > 0)
1263 {
1264 while (c != '}')
1265 {
1266 switch (c)
1267 {
1268 case '\n':
1269 putc(c, faction);
1270 lineno++;
1271 break;
1272
1273 case '{':
1274 putc(c, faction);
1275 count++;
1276 break;
1277
1278 case '\'':
1279 case '"':
1280 match = c;
1281 putc(c, faction);
1282 c = getc(finput);
1283
1284 while (c != match)
1285 {
943819bf
RS
1286 if (c == '\n')
1287 {
a083fbbf 1288 warn(_("unterminated string"));
943819bf
RS
1289 ungetc(c, finput);
1290 c = match;
1291 continue;
1292 }
1293 else if (c == EOF)
a083fbbf 1294 fatal(_("unterminated string at end of file"));
1ff442ca
NF
1295
1296 putc(c, faction);
1297
1298 if (c == '\\')
1299 {
1300 c = getc(finput);
1301 if (c == EOF)
a083fbbf 1302 fatal(_("unterminated string"));
1ff442ca
NF
1303 putc(c, faction);
1304 if (c == '\n')
1305 lineno++;
1306 }
1307
1308 c = getc(finput);
1309 }
1310
1311 putc(c, faction);
1312 break;
1313
1314 case '/':
1315 putc(c, faction);
1316 c = getc(finput);
1317 if (c != '*' && c != '/')
1318 continue;
1319
1320 cplus_comment = (c == '/');
1321 putc(c, faction);
1322 c = getc(finput);
1323
1324 ended = 0;
1325 while (!ended)
1326 {
1327 if (!cplus_comment && c == '*')
1328 {
1329 while (c == '*')
1330 {
1331 putc(c, faction);
1332 c = getc(finput);
1333 }
1334
1335 if (c == '/')
1336 {
1337 putc(c, faction);
1338 ended = 1;
1339 }
1340 }
1341 else if (c == '\n')
1342 {
1343 lineno++;
1344 putc(c, faction);
1345 if (cplus_comment)
1346 ended = 1;
1347 else
1348 c = getc(finput);
1349 }
1350 else if (c == EOF)
a083fbbf 1351 fatal(_("unterminated comment"));
1ff442ca
NF
1352 else
1353 {
1354 putc(c, faction);
1355 c = getc(finput);
1356 }
1357 }
1358
1359 break;
1360
1361 case '$':
1362 c = getc(finput);
1363 type_name = NULL;
1364
1365 if (c == '<')
1366 {
1367 register char *cp = token_buffer;
1368
1369 while ((c = getc(finput)) != '>' && c > 0)
118fb205
JT
1370 {
1371 if (cp == token_buffer + maxtoken)
1372 cp = grow_token_buffer(cp);
1373
1374 *cp++ = c;
1375 }
1ff442ca
NF
1376 *cp = 0;
1377 type_name = token_buffer;
1378 value_components_used = 1;
1379
1380 c = getc(finput);
1381 }
1382 if (c == '$')
1383 {
1384 fprintf(faction, "yyval");
41aca2e0
AD
1385 if (!type_name)
1386 type_name = get_type_name(0, rule);
1ff442ca
NF
1387 if (type_name)
1388 fprintf(faction, ".%s", type_name);
a083fbbf 1389 if(!type_name && typed)
41aca2e0
AD
1390 warns(_("$$ of `%s' has no declared type"),
1391 rule->sym->tag);
1ff442ca
NF
1392 }
1393 else if (isdigit(c) || c == '-')
1394 {
1395 ungetc (c, finput);
1396 n = read_signed_integer(finput);
1397 c = getc(finput);
1398
1399 if (!type_name && n > 0)
1400 type_name = get_type_name(n, rule);
1401
1402 fprintf(faction, "yyvsp[%d]", n - stack_offset);
1403 if (type_name)
1404 fprintf(faction, ".%s", type_name);
a083fbbf
RS
1405 if(!type_name && typed)
1406 warnss(_("$%s of `%s' has no declared type"),
943819bf 1407 int_to_string(n), rule->sym->tag);
1ff442ca
NF
1408 continue;
1409 }
1410 else
aba5ca6d 1411 warns(_("$%s is invalid"), printable_version(c));
1ff442ca
NF
1412
1413 break;
1414
1415 case '@':
6666f98f
AD
1416 c = getc (finput);
1417 if (c == '$')
1418 {
1419 fprintf (faction, "yyloc");
1420 yylsp_needed = 1;
1421 }
1422 else if (isdigit(c) || c == '-')
1ff442ca
NF
1423 {
1424 ungetc (c, finput);
6666f98f
AD
1425 n = read_signed_integer (finput);
1426 c = getc (finput);
1427 fprintf (faction, "yylsp[%d]", n - stack_offset);
1428 yylsp_needed = 1;
1429 continue;
1ff442ca
NF
1430 }
1431 else
943819bf 1432 {
6666f98f 1433 warns (_("@%s is invalid"), printable_version (c));
943819bf
RS
1434 n = 1;
1435 }
6666f98f 1436 break;
1ff442ca
NF
1437
1438 case EOF:
a083fbbf 1439 fatal(_("unmatched `{'"));
1ff442ca
NF
1440
1441 default:
1442 putc(c, faction);
1443 }
1444
1445 c = getc(finput);
1446 }
1447
1448 /* above loop exits when c is '}' */
1449
1450 if (--count)
1451 {
1452 putc(c, faction);
1453 c = getc(finput);
1454 }
1455 }
1456
1457 fprintf(faction, ";\n break;}");
1458}
1459
1460
1461
1462/* generate a dummy symbol, a nonterminal,
1463whose name cannot conflict with the user's names. */
1464
1465bucket *
118fb205 1466gensym (void)
1ff442ca
NF
1467{
1468 register bucket *sym;
1469
1470 sprintf (token_buffer, "@%d", ++gensym_count);
1471 sym = getsym(token_buffer);
1472 sym->class = SNTERM;
1473 sym->value = nvars++;
1474 return (sym);
1475}
1476
1477/* Parse the input grammar into a one symbol_list structure.
1478Each rule is represented by a sequence of symbols: the left hand side
1479followed by the contents of the right hand side, followed by a null pointer
1480instead of a symbol to terminate the rule.
1481The next symbol is the lhs of the following rule.
1482
1483All guards and actions are copied out to the appropriate files,
1484labelled by the rule number they apply to. */
1485
1486void
118fb205 1487readgram (void)
1ff442ca
NF
1488{
1489 register int t;
2686a6e7 1490 register bucket *lhs = NULL;
1ff442ca
NF
1491 register symbol_list *p;
1492 register symbol_list *p1;
1493 register bucket *bp;
1494
1495 symbol_list *crule; /* points to first symbol_list of current rule. */
1496 /* its symbol is the lhs of the rule. */
1497 symbol_list *crule1; /* points to the symbol_list preceding crule. */
1498
1499 p1 = NULL;
1500
1501 t = lex();
1502
1503 while (t != TWO_PERCENTS && t != ENDFILE)
1504 {
1505 if (t == IDENTIFIER || t == BAR)
1506 {
1507 register int actionflag = 0;
1508 int rulelength = 0; /* number of symbols in rhs of this rule so far */
1509 int xactions = 0; /* JF for error checking */
1510 bucket *first_rhs = 0;
1511
1512 if (t == IDENTIFIER)
1513 {
1514 lhs = symval;
943819bf
RS
1515
1516 if (!start_flag)
1517 {
1518 startval = lhs;
1519 start_flag = 1;
1520 }
a083fbbf 1521
1ff442ca
NF
1522 t = lex();
1523 if (t != COLON)
943819bf 1524 {
a083fbbf 1525 warn(_("ill-formed rule: initial symbol not followed by colon"));
943819bf
RS
1526 unlex(t);
1527 }
1ff442ca
NF
1528 }
1529
943819bf 1530 if (nrules == 0 && t == BAR)
1ff442ca 1531 {
a083fbbf 1532 warn(_("grammar starts with vertical bar"));
943819bf 1533 lhs = symval; /* BOGUS: use a random symval */
1ff442ca 1534 }
1ff442ca
NF
1535 /* start a new rule and record its lhs. */
1536
1537 nrules++;
1538 nitems++;
1539
1540 record_rule_line ();
1541
1542 p = NEW(symbol_list);
1543 p->sym = lhs;
1544
1545 crule1 = p1;
1546 if (p1)
1547 p1->next = p;
1548 else
1549 grammar = p;
1550
1551 p1 = p;
1552 crule = p;
1553
1554 /* mark the rule's lhs as a nonterminal if not already so. */
1555
1556 if (lhs->class == SUNKNOWN)
1557 {
1558 lhs->class = SNTERM;
1559 lhs->value = nvars;
1560 nvars++;
1561 }
1562 else if (lhs->class == STOKEN)
a083fbbf 1563 warns(_("rule given for %s, which is a token"), lhs->tag);
1ff442ca
NF
1564
1565 /* read the rhs of the rule. */
1566
1567 for (;;)
1568 {
1569 t = lex();
943819bf
RS
1570 if (t == PREC)
1571 {
1572 t = lex();
1573 crule->ruleprec = symval;
1574 t = lex();
1575 }
1ff442ca
NF
1576
1577 if (! (t == IDENTIFIER || t == LEFT_CURLY)) break;
1578
1579 /* If next token is an identifier, see if a colon follows it.
1580 If one does, exit this rule now. */
1581 if (t == IDENTIFIER)
1582 {
1583 register bucket *ssave;
1584 register int t1;
1585
1586 ssave = symval;
1587 t1 = lex();
1588 unlex(t1);
1589 symval = ssave;
1590 if (t1 == COLON) break;
1591
1592 if(!first_rhs) /* JF */
1593 first_rhs = symval;
1594 /* Not followed by colon =>
1595 process as part of this rule's rhs. */
1596 }
1597
1598 /* If we just passed an action, that action was in the middle
1599 of a rule, so make a dummy rule to reduce it to a
1600 non-terminal. */
1601 if (actionflag)
1602 {
1603 register bucket *sdummy;
1604
1605 /* Since the action was written out with this rule's */
943819bf 1606 /* number, we must give the new rule this number */
1ff442ca
NF
1607 /* by inserting the new rule before it. */
1608
1609 /* Make a dummy nonterminal, a gensym. */
1610 sdummy = gensym();
1611
1612 /* Make a new rule, whose body is empty,
1613 before the current one, so that the action
1614 just read can belong to it. */
1615 nrules++;
1616 nitems++;
1617 record_rule_line ();
1618 p = NEW(symbol_list);
1619 if (crule1)
1620 crule1->next = p;
1621 else grammar = p;
1622 p->sym = sdummy;
1623 crule1 = NEW(symbol_list);
1624 p->next = crule1;
1625 crule1->next = crule;
1626
1627 /* insert the dummy generated by that rule into this rule. */
1628 nitems++;
1629 p = NEW(symbol_list);
1630 p->sym = sdummy;
1631 p1->next = p;
1632 p1 = p;
1633
1634 actionflag = 0;
1635 }
1636
1637 if (t == IDENTIFIER)
1638 {
1639 nitems++;
1640 p = NEW(symbol_list);
1641 p->sym = symval;
1642 p1->next = p;
1643 p1 = p;
1644 }
1645 else /* handle an action. */
1646 {
1647 copy_action(crule, rulelength);
1648 actionflag = 1;
1649 xactions++; /* JF */
1650 }
1651 rulelength++;
943819bf 1652 } /* end of read rhs of rule */
1ff442ca
NF
1653
1654 /* Put an empty link in the list to mark the end of this rule */
1655 p = NEW(symbol_list);
1656 p1->next = p;
1657 p1 = p;
1658
1659 if (t == PREC)
1660 {
a083fbbf 1661 warn(_("two @prec's in a row"));
1ff442ca
NF
1662 t = lex();
1663 crule->ruleprec = symval;
1664 t = lex();
1665 }
1666 if (t == GUARD)
1667 {
1668 if (! semantic_parser)
a083fbbf 1669 warn(_("%%guard present but %%semantic_parser not specified"));
1ff442ca
NF
1670
1671 copy_guard(crule, rulelength);
1672 t = lex();
1673 }
1674 else if (t == LEFT_CURLY)
1675 {
943819bf 1676 /* This case never occurs -wjh */
6666f98f
AD
1677 if (actionflag)
1678 warn(_("two actions at end of one rule"));
1ff442ca 1679 copy_action(crule, rulelength);
943819bf
RS
1680 actionflag = 1;
1681 xactions++; /* -wjh */
1ff442ca
NF
1682 t = lex();
1683 }
6666f98f
AD
1684 /* If $$ is being set in default way, warn if any type
1685 mismatch. */
1686 else if (!xactions
1687 && first_rhs
1688 && lhs->type_name != first_rhs->type_name)
1ff442ca 1689 {
6666f98f
AD
1690 if (lhs->type_name == 0
1691 || first_rhs->type_name == 0
1ff442ca 1692 || strcmp(lhs->type_name,first_rhs->type_name))
a083fbbf 1693 warnss(_("type clash (`%s' `%s') on default action"),
6666f98f
AD
1694 lhs->type_name ? lhs->type_name : "",
1695 first_rhs->type_name ? first_rhs->type_name : "");
1ff442ca
NF
1696 }
1697 /* Warn if there is no default for $$ but we need one. */
1698 else if (!xactions && !first_rhs && lhs->type_name != 0)
a083fbbf 1699 warn(_("empty rule for typed nonterminal, and no action"));
1ff442ca
NF
1700 if (t == SEMICOLON)
1701 t = lex();
a083fbbf 1702 }
943819bf
RS
1703#if 0
1704 /* these things can appear as alternatives to rules. */
1705/* NO, they cannot.
1706 a) none of the documentation allows them
1707 b) most of them scan forward until finding a next %
1708 thus they may swallow lots of intervening rules
1709*/
1ff442ca
NF
1710 else if (t == TOKEN)
1711 {
1712 parse_token_decl(STOKEN, SNTERM);
1713 t = lex();
1714 }
1715 else if (t == NTERM)
1716 {
1717 parse_token_decl(SNTERM, STOKEN);
1718 t = lex();
1719 }
1720 else if (t == TYPE)
1721 {
1722 t = get_type();
1723 }
1724 else if (t == UNION)
1725 {
1726 parse_union_decl();
1727 t = lex();
1728 }
1729 else if (t == EXPECT)
1730 {
1731 parse_expect_decl();
1732 t = lex();
1733 }
1734 else if (t == START)
1735 {
1736 parse_start_decl();
1737 t = lex();
1738 }
943819bf
RS
1739#endif
1740
1ff442ca 1741 else
943819bf 1742 {
a083fbbf 1743 warns(_("invalid input: %s"), token_buffer);
943819bf
RS
1744 t = lex();
1745 }
1ff442ca
NF
1746 }
1747
943819bf
RS
1748 /* grammar has been read. Do some checking */
1749
1ff442ca 1750 if (nsyms > MAXSHORT)
a083fbbf 1751 fatals(_("too many symbols (tokens plus nonterminals); maximum %s"),
943819bf 1752 int_to_string(MAXSHORT));
1ff442ca 1753 if (nrules == 0)
a083fbbf 1754 fatal(_("no rules in the input grammar"));
1ff442ca
NF
1755
1756 if (typed == 0 /* JF put out same default YYSTYPE as YACC does */
1757 && !value_components_used)
1758 {
1759 /* We used to use `unsigned long' as YYSTYPE on MSDOS,
1760 but it seems better to be consistent.
1761 Most programs should declare their own type anyway. */
1762 fprintf(fattrs, "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n");
1763 if (fdefines)
1764 fprintf(fdefines, "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n");
1765 }
1766
1767 /* Report any undefined symbols and consider them nonterminals. */
1768
1769 for (bp = firstsymbol; bp; bp = bp->next)
1770 if (bp->class == SUNKNOWN)
1771 {
a083fbbf 1772 warns(_("symbol %s is used, but is not defined as a token and has no rules"),
1ff442ca 1773 bp->tag);
1ff442ca
NF
1774 bp->class = SNTERM;
1775 bp->value = nvars++;
1776 }
1777
1778 ntokens = nsyms - nvars;
1779}
1780
1781
1782void
118fb205 1783record_rule_line (void)
1ff442ca
NF
1784{
1785 /* Record each rule's source line number in rline table. */
1786
1787 if (nrules >= rline_allocated)
1788 {
1789 rline_allocated = nrules * 2;
118fb205
JT
1790 rline = (short *) xrealloc ((char *) rline,
1791 rline_allocated * sizeof (short));
1ff442ca
NF
1792 }
1793 rline[nrules] = lineno;
1794}
1795
1796
2686a6e7 1797#if 0
1ff442ca 1798/* read in a %type declaration and record its information for get_type_name to access */
943819bf
RS
1799/* this is unused. it is only called from the #if 0 part of readgram */
1800static int
118fb205 1801get_type (void)
1ff442ca
NF
1802{
1803 register int k;
1804 register int t;
1805 register char *name;
1806
1807 t = lex();
1808
a083fbbf 1809 if (t != TYPENAME)
943819bf 1810 {
a083fbbf 1811 warn(_("ill-formed %type declaration"));
943819bf
RS
1812 return t;
1813 }
1ff442ca
NF
1814
1815 k = strlen(token_buffer);
1816 name = NEW2(k + 1, char);
1817 strcpy(name, token_buffer);
1818
1819 for (;;)
1820 {
1821 t = lex();
1822
1823 switch (t)
1824 {
1825 case SEMICOLON:
1826 return (lex());
1827
1828 case COMMA:
1829 break;
1830
1831 case IDENTIFIER:
1832 if (symval->type_name == NULL)
1833 symval->type_name = name;
943819bf 1834 else if (strcmp(name, symval->type_name) != 0)
a083fbbf 1835 warns(_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
1836
1837 break;
1838
1839 default:
1840 return (t);
1841 }
1842 }
1843}
2686a6e7 1844#endif
1ff442ca
NF
1845
1846
1847/* assign symbol numbers, and write definition of token names into fdefines.
1848Set up vectors tags and sprec of names and precedences of symbols. */
1849
1850void
118fb205 1851packsymbols (void)
1ff442ca
NF
1852{
1853 register bucket *bp;
1854 register int tokno = 1;
1855 register int i;
1856 register int last_user_token_number;
1857
1858 /* int lossage = 0; JF set but not used */
1859
1860 tags = NEW2(nsyms + 1, char *);
1861 tags[0] = "$";
943819bf
RS
1862 user_toknums = NEW2(nsyms + 1, int);
1863 user_toknums[0] = 0;
1ff442ca
NF
1864
1865 sprec = NEW2(nsyms, short);
1866 sassoc = NEW2(nsyms, short);
1867
1868 max_user_token_number = 256;
1869 last_user_token_number = 256;
1870
1871 for (bp = firstsymbol; bp; bp = bp->next)
1872 {
1873 if (bp->class == SNTERM)
1874 {
1875 bp->value += ntokens;
1876 }
943819bf
RS
1877 else if (bp->alias)
1878 {
1879 /* this symbol and its alias are a single token defn.
1880 allocate a tokno, and assign to both
a083fbbf 1881 check agreement of ->prec and ->assoc fields
943819bf
RS
1882 and make both the same
1883 */
1884 if (bp->value == 0)
1885 bp->value = bp->alias->value = tokno++;
1886
1887 if (bp->prec != bp->alias->prec) {
1888 if (bp->prec != 0 && bp->alias->prec != 0
1889 && bp->user_token_number == SALIAS)
a083fbbf 1890 warnss(_("conflicting precedences for %s and %s"),
943819bf
RS
1891 bp->tag, bp->alias->tag);
1892 if (bp->prec != 0) bp->alias->prec = bp->prec;
1893 else bp->prec = bp->alias->prec;
1894 }
1895
1896 if (bp->assoc != bp->alias->assoc) {
1897 if (bp->assoc != 0 && bp->alias->assoc != 0
1898 && bp->user_token_number == SALIAS)
a083fbbf 1899 warnss(_("conflicting assoc values for %s and %s"),
943819bf
RS
1900 bp->tag, bp->alias->tag);
1901 if (bp->assoc != 0) bp->alias->assoc = bp->assoc;
1902 else bp->assoc = bp->alias->assoc;
1903 }
1904
1905 if (bp->user_token_number == SALIAS)
1906 continue; /* do not do processing below for SALIASs */
1907
1908 }
1909 else /* bp->class == STOKEN */
1910 {
1911 bp->value = tokno++;
1912 }
1913
1914 if (bp->class == STOKEN)
1ff442ca
NF
1915 {
1916 if (translations && !(bp->user_token_number))
1917 bp->user_token_number = ++last_user_token_number;
1918 if (bp->user_token_number > max_user_token_number)
1919 max_user_token_number = bp->user_token_number;
1ff442ca
NF
1920 }
1921
1922 tags[bp->value] = bp->tag;
943819bf 1923 user_toknums[bp->value] = bp->user_token_number;
1ff442ca
NF
1924 sprec[bp->value] = bp->prec;
1925 sassoc[bp->value] = bp->assoc;
1926
1927 }
1928
1929 if (translations)
1930 {
1931 register int i;
1932
1933 token_translations = NEW2(max_user_token_number+1, short);
1934
1935 /* initialize all entries for literal tokens to 2,
572909b5
RS
1936 the internal token number for $undefined.,
1937 which represents all invalid inputs. */
1ff442ca 1938 for (i = 0; i <= max_user_token_number; i++)
a083fbbf 1939 token_translations[i] = 2;
1ff442ca 1940
943819bf
RS
1941 for (bp = firstsymbol; bp; bp = bp->next)
1942 {
1943 if (bp->value >= ntokens) continue; /* non-terminal */
a083fbbf 1944 if (bp->user_token_number == SALIAS) continue;
943819bf 1945 if (token_translations[bp->user_token_number] != 2)
a083fbbf 1946 warnsss(_("tokens %s and %s both assigned number %s"),
1ff442ca
NF
1947 tags[token_translations[bp->user_token_number]],
1948 bp->tag,
943819bf
RS
1949 int_to_string(bp->user_token_number));
1950 token_translations[bp->user_token_number] = bp->value;
1951 }
1ff442ca
NF
1952 }
1953
1954 error_token_number = errtoken->value;
1955
943819bf
RS
1956 if (! noparserflag)
1957 output_token_defines(ftable);
1ff442ca
NF
1958
1959 if (startval->class == SUNKNOWN)
a083fbbf 1960 fatals(_("the start symbol %s is undefined"), startval->tag);
1ff442ca 1961 else if (startval->class == STOKEN)
a083fbbf 1962 fatals(_("the start symbol %s is a token"), startval->tag);
1ff442ca
NF
1963
1964 start_symbol = startval->value;
1965
1966 if (definesflag)
1967 {
1968 output_token_defines(fdefines);
1969
1970 if (!pure_parser)
1971 {
1972 if (spec_name_prefix)
1973 fprintf(fdefines, "\nextern YYSTYPE %slval;\n", spec_name_prefix);
1974 else
1975 fprintf(fdefines, "\nextern YYSTYPE yylval;\n");
1976 }
1977
1978 if (semantic_parser)
1979 for (i = ntokens; i < nsyms; i++)
1980 {
1981 /* don't make these for dummy nonterminals made by gensym. */
1982 if (*tags[i] != '@')
1983 fprintf(fdefines, "#define\tNT%s\t%d\n", tags[i], i);
1984 }
1985#if 0
1986 /* `fdefines' is now a temporary file, so we need to copy its
1987 contents in `done', so we can't close it here. */
1988 fclose(fdefines);
1989 fdefines = NULL;
1990#endif
1991 }
1992}
a083fbbf
RS
1993
1994/* For named tokens, but not literal ones, define the name.
1995 The value is the user token number.
943819bf 1996*/
1ff442ca 1997void
118fb205 1998output_token_defines (FILE *file)
1ff442ca
NF
1999{
2000 bucket *bp;
943819bf
RS
2001 register char *cp, *symbol;
2002 register char c;
1ff442ca
NF
2003
2004 for (bp = firstsymbol; bp; bp = bp->next)
2005 {
943819bf 2006 symbol = bp->tag; /* get symbol */
1ff442ca 2007
943819bf
RS
2008 if (bp->value >= ntokens) continue;
2009 if (bp->user_token_number == SALIAS) continue;
2010 if ('\'' == *symbol) continue; /* skip literal character */
2011 if (bp == errtoken) continue; /* skip error token */
a083fbbf 2012 if ('\"' == *symbol)
1ff442ca 2013 {
943819bf
RS
2014 /* use literal string only if given a symbol with an alias */
2015 if (bp->alias)
2016 symbol = bp->alias->tag;
2017 else
2018 continue;
2019 }
1ff442ca 2020
943819bf
RS
2021 /* Don't #define nonliteral tokens whose names contain periods. */
2022 cp = symbol;
2023 while ((c = *cp++) && c != '.');
2024 if (c != '\0') continue;
1ff442ca 2025
943819bf 2026 fprintf(file, "#define\t%s\t%d\n", symbol,
a083fbbf
RS
2027 ((translations && ! rawtoknumflag)
2028 ? bp->user_token_number
943819bf
RS
2029 : bp->value));
2030 if (semantic_parser)
2031 fprintf(file, "#define\tT%s\t%d\n", symbol, bp->value);
1ff442ca
NF
2032 }
2033
2034 putc('\n', file);
2035}
2036
2037
2038
2039/* convert the rules into the representation using rrhs, rlhs and ritems. */
2040
2041void
118fb205 2042packgram (void)
1ff442ca
NF
2043{
2044 register int itemno;
2045 register int ruleno;
2046 register symbol_list *p;
2047/* register bucket *bp; JF unused */
2048
2049 bucket *ruleprec;
2050
2051 ritem = NEW2(nitems + 1, short);
2052 rlhs = NEW2(nrules, short) - 1;
2053 rrhs = NEW2(nrules, short) - 1;
2054 rprec = NEW2(nrules, short) - 1;
2055 rprecsym = NEW2(nrules, short) - 1;
2056 rassoc = NEW2(nrules, short) - 1;
2057
2058 itemno = 0;
2059 ruleno = 1;
2060
2061 p = grammar;
2062 while (p)
2063 {
2064 rlhs[ruleno] = p->sym->value;
2065 rrhs[ruleno] = itemno;
2066 ruleprec = p->ruleprec;
2067
2068 p = p->next;
2069 while (p && p->sym)
2070 {
2071 ritem[itemno++] = p->sym->value;
2072 /* A rule gets by default the precedence and associativity
2073 of the last token in it. */
2074 if (p->sym->class == STOKEN)
2075 {
2076 rprec[ruleno] = p->sym->prec;
2077 rassoc[ruleno] = p->sym->assoc;
2078 }
2079 if (p) p = p->next;
2080 }
2081
2082 /* If this rule has a %prec,
2083 the specified symbol's precedence replaces the default. */
2084 if (ruleprec)
2085 {
2086 rprec[ruleno] = ruleprec->prec;
2087 rassoc[ruleno] = ruleprec->assoc;
2088 rprecsym[ruleno] = ruleprec->value;
2089 }
2090
2091 ritem[itemno++] = -ruleno;
2092 ruleno++;
2093
2094 if (p) p = p->next;
2095 }
2096
2097 ritem[itemno] = 0;
2098}
2099\f
2100/* Read a signed integer from STREAM and return its value. */
2101
2102int
118fb205 2103read_signed_integer (FILE *stream)
1ff442ca
NF
2104{
2105 register int c = getc(stream);
2106 register int sign = 1;
2107 register int n;
2108
2109 if (c == '-')
2110 {
2111 c = getc(stream);
2112 sign = -1;
2113 }
2114 n = 0;
2115 while (isdigit(c))
2116 {
2117 n = 10*n + (c - '0');
2118 c = getc(stream);
2119 }
2120
2121 ungetc(c, stream);
2122
2123 return n * sign;
2124}