]> git.saurik.com Git - bison.git/blame - src/reader.c
* src/output.c (output_rule_data): Output the documentation of
[bison.git] / src / reader.c
CommitLineData
1ff442ca 1/* Input parser for bison
74a53b4b 2 Copyright (C) 1984, 1986, 1989, 1992, 1998 Free Software Foundation, Inc.
1ff442ca
NF
3
4This file is part of Bison, the GNU Compiler Compiler.
5
6Bison is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
8the Free Software Foundation; either version 2, or (at your option)
9any later version.
10
11Bison is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
15
16You should have received a copy of the GNU General Public License
17along with Bison; see the file COPYING. If not, write to
c49a8e71
JT
18the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19Boston, MA 02111-1307, USA. */
1ff442ca
NF
20
21
22/* read in the grammar specification and record it in the format described in gram.h.
23 All guards are copied into the fguard file and all actions into faction,
24 in each case forming the body of a C function (yyguard or yyaction)
25 which contains a switch statement to decide which guard or action to execute.
26
27The entry point is reader(). */
28
29#include <stdio.h>
1ff442ca
NF
30#include "system.h"
31#include "files.h"
7612000c 32#include "alloc.h"
1ff442ca
NF
33#include "symtab.h"
34#include "lex.h"
35#include "gram.h"
36#include "machine.h"
37
38#define LTYPESTR "\n#ifndef YYLTYPE\ntypedef\n struct yyltype\n\
39 {\n int timestamp;\n int first_line;\n int first_column;\
40\n int last_line;\n int last_column;\n char *text;\n }\n\
41 yyltype;\n\n#define YYLTYPE yyltype\n#endif\n\n"
42
43/* Number of slots allocated (but not necessarily used yet) in `rline' */
44int rline_allocated;
45
46extern char *program_name;
47extern int definesflag;
48extern int nolinesflag;
943819bf
RS
49extern int noparserflag;
50extern int rawtoknumflag;
1ff442ca
NF
51extern bucket *symval;
52extern int numval;
1ff442ca
NF
53extern int expected_conflicts;
54extern char *token_buffer;
118fb205
JT
55extern int maxtoken;
56
57extern void init_lex PARAMS((void));
58extern char *grow_token_buffer PARAMS((char *));
59extern void tabinit PARAMS((void));
60extern void output_headers PARAMS((void));
61extern void output_trailers PARAMS((void));
62extern void free_symtab PARAMS((void));
63extern void open_extra_files PARAMS((void));
64extern char *int_to_string PARAMS((int));
65extern char *printable_version PARAMS((int));
66extern void fatal PARAMS((char *));
67extern void fatals PARAMS((char *, char *));
68extern void warn PARAMS((char *));
69extern void warni PARAMS((char *, int));
70extern void warns PARAMS((char *, char *));
71extern void warnss PARAMS((char *, char *, char *));
72extern void warnsss PARAMS((char *, char *, char *, char *));
73extern void unlex PARAMS((int));
74extern void done PARAMS((int));
75
76extern int skip_white_space PARAMS((void));
77extern int parse_percent_token PARAMS((void));
78extern int lex PARAMS((void));
1ff442ca
NF
79
80typedef
81 struct symbol_list
82 {
83 struct symbol_list *next;
84 bucket *sym;
85 bucket *ruleprec;
86 }
87 symbol_list;
88
89
118fb205
JT
90void reader PARAMS((void));
91void reader_output_yylsp PARAMS((FILE *));
92void read_declarations PARAMS((void));
93void copy_definition PARAMS((void));
94void parse_token_decl PARAMS((int, int));
95void parse_start_decl PARAMS((void));
96void parse_type_decl PARAMS((void));
97void parse_assoc_decl PARAMS((int));
98void parse_union_decl PARAMS((void));
99void parse_expect_decl PARAMS((void));
100char *get_type_name PARAMS((int, symbol_list *));
101void copy_guard PARAMS((symbol_list *, int));
102void parse_thong_decl PARAMS((void));
103void copy_action PARAMS((symbol_list *, int));
104bucket *gensym PARAMS((void));
105void readgram PARAMS((void));
106void record_rule_line PARAMS((void));
107void packsymbols PARAMS((void));
108void output_token_defines PARAMS((FILE *));
109void packgram PARAMS((void));
110int read_signed_integer PARAMS((FILE *));
118fb205 111
2686a6e7
JT
112#if 0
113static int get_type PARAMS((void));
114#endif
1ff442ca
NF
115
116int lineno;
117symbol_list *grammar;
118int start_flag;
119bucket *startval;
120char **tags;
943819bf 121int *user_toknums;
1ff442ca
NF
122
123/* Nonzero if components of semantic values are used, implying
124 they must be unions. */
125static int value_components_used;
126
127static int typed; /* nonzero if %union has been seen. */
128
129static int lastprec; /* incremented for each %left, %right or %nonassoc seen */
130
131static int gensym_count; /* incremented for each generated symbol */
132
133static bucket *errtoken;
5b2e3c89 134static bucket *undeftoken;
1ff442ca
NF
135
136/* Nonzero if any action or guard uses the @n construct. */
137static int yylsp_needed;
138
943819bf
RS
139
140static void
118fb205 141skip_to_char (int target)
943819bf
RS
142{
143 int c;
144 if (target == '\n')
a083fbbf 145 warn(_(" Skipping to next \\n"));
943819bf 146 else
a083fbbf 147 warni(_(" Skipping to next %c"), target);
943819bf
RS
148
149 do
150 c = skip_white_space();
151 while (c != target && c != EOF);
a083fbbf 152 if (c != EOF)
943819bf
RS
153 ungetc(c, finput);
154}
155
156
1ff442ca 157void
118fb205 158reader (void)
1ff442ca
NF
159{
160 start_flag = 0;
161 startval = NULL; /* start symbol not specified yet. */
162
163#if 0
164 translations = 0; /* initially assume token number translation not needed. */
165#endif
166 /* Nowadays translations is always set to 1,
167 since we give `error' a user-token-number
168 to satisfy the Posix demand for YYERRCODE==256. */
169 translations = 1;
170
171 nsyms = 1;
172 nvars = 0;
173 nrules = 0;
174 nitems = 0;
175 rline_allocated = 10;
176 rline = NEW2(rline_allocated, short);
177
178 typed = 0;
179 lastprec = 0;
180
181 gensym_count = 0;
182
183 semantic_parser = 0;
184 pure_parser = 0;
185 yylsp_needed = 0;
186
187 grammar = NULL;
188
189 init_lex();
190 lineno = 1;
191
192 /* initialize the symbol table. */
193 tabinit();
194 /* construct the error token */
195 errtoken = getsym("error");
196 errtoken->class = STOKEN;
197 errtoken->user_token_number = 256; /* Value specified by posix. */
198 /* construct a token that represents all undefined literal tokens. */
199 /* it is always token number 2. */
5b2e3c89
JT
200 undeftoken = getsym("$undefined.");
201 undeftoken->class = STOKEN;
202 undeftoken->user_token_number = 2;
1ff442ca
NF
203 /* Read the declaration section. Copy %{ ... %} groups to ftable and fdefines file.
204 Also notice any %token, %left, etc. found there. */
a083fbbf 205 if (noparserflag)
943819bf
RS
206 fprintf(ftable, "\n/* Bison-generated parse tables, made from %s\n",
207 infile);
208 else
209 fprintf(ftable, "\n/* A Bison parser, made from %s\n", infile);
6ed61226 210 fprintf(ftable, " by %s */\n\n", VERSION_STRING);
1ff442ca
NF
211 fprintf(ftable, "#define YYBISON 1 /* Identify Bison output. */\n\n");
212 read_declarations();
1ff442ca
NF
213 /* start writing the guard and action files, if they are needed. */
214 output_headers();
215 /* read in the grammar, build grammar in list form. write out guards and actions. */
216 readgram();
217 /* Now we know whether we need the line-number stack.
218 If we do, write its type into the .tab.h file. */
943819bf
RS
219 if (fdefines)
220 reader_output_yylsp(fdefines);
1ff442ca
NF
221 /* write closing delimiters for actions and guards. */
222 output_trailers();
223 if (yylsp_needed)
224 fprintf(ftable, "#define YYLSP_NEEDED\n\n");
225 /* assign the symbols their symbol numbers.
226 Write #defines for the token symbols into fdefines if requested. */
227 packsymbols();
228 /* convert the grammar into the format described in gram.h. */
229 packgram();
230 /* free the symbol table data structure
231 since symbols are now all referred to by symbol number. */
232 free_symtab();
233}
234
943819bf 235void
118fb205 236reader_output_yylsp (FILE *f)
943819bf
RS
237{
238 if (yylsp_needed)
239 fprintf(f, LTYPESTR);
240}
1ff442ca
NF
241
242/* read from finput until %% is seen. Discard the %%.
243Handle any % declarations,
244and copy the contents of any %{ ... %} groups to fattrs. */
245
246void
118fb205 247read_declarations (void)
1ff442ca
NF
248{
249 register int c;
250 register int tok;
251
252 for (;;)
253 {
254 c = skip_white_space();
255
256 if (c == '%')
257 {
258 tok = parse_percent_token();
259
260 switch (tok)
261 {
262 case TWO_PERCENTS:
263 return;
264
265 case PERCENT_LEFT_CURLY:
266 copy_definition();
267 break;
268
269 case TOKEN:
270 parse_token_decl (STOKEN, SNTERM);
271 break;
a083fbbf 272
1ff442ca
NF
273 case NTERM:
274 parse_token_decl (SNTERM, STOKEN);
275 break;
a083fbbf 276
1ff442ca
NF
277 case TYPE:
278 parse_type_decl();
279 break;
a083fbbf 280
1ff442ca
NF
281 case START:
282 parse_start_decl();
283 break;
a083fbbf 284
1ff442ca
NF
285 case UNION:
286 parse_union_decl();
287 break;
a083fbbf 288
1ff442ca
NF
289 case EXPECT:
290 parse_expect_decl();
291 break;
943819bf
RS
292 case THONG:
293 parse_thong_decl();
294 break;
1ff442ca
NF
295 case LEFT:
296 parse_assoc_decl(LEFT_ASSOC);
297 break;
298
299 case RIGHT:
300 parse_assoc_decl(RIGHT_ASSOC);
301 break;
302
303 case NONASSOC:
304 parse_assoc_decl(NON_ASSOC);
305 break;
306
307 case SEMANTIC_PARSER:
308 if (semantic_parser == 0)
309 {
310 semantic_parser = 1;
311 open_extra_files();
312 }
313 break;
314
315 case PURE_PARSER:
316 pure_parser = 1;
317 break;
318
943819bf
RS
319 case NOOP:
320 break;
321
1ff442ca 322 default:
a083fbbf 323 warns(_("unrecognized: %s"), token_buffer);
943819bf
RS
324 skip_to_char('%');
325 }
1ff442ca
NF
326 }
327 else if (c == EOF)
a083fbbf 328 fatal(_("no input grammar"));
1ff442ca 329 else
943819bf
RS
330 {
331 char buff[100];
a083fbbf 332 sprintf(buff, _("unknown character: %s"), printable_version(c));
943819bf
RS
333 warn(buff);
334 skip_to_char('%');
335 }
1ff442ca
NF
336 }
337}
338
339
340/* copy the contents of a %{ ... %} into the definitions file.
341The %{ has already been read. Return after reading the %}. */
342
343void
118fb205 344copy_definition (void)
1ff442ca
NF
345{
346 register int c;
347 register int match;
348 register int ended;
349 register int after_percent; /* -1 while reading a character if prev char was % */
350 int cplus_comment;
351
352 if (!nolinesflag)
353 fprintf(fattrs, "#line %d \"%s\"\n", lineno, infile);
354
355 after_percent = 0;
356
357 c = getc(finput);
358
359 for (;;)
360 {
361 switch (c)
362 {
363 case '\n':
364 putc(c, fattrs);
365 lineno++;
366 break;
367
368 case '%':
369 after_percent = -1;
370 break;
a083fbbf 371
1ff442ca
NF
372 case '\'':
373 case '"':
374 match = c;
375 putc(c, fattrs);
376 c = getc(finput);
377
378 while (c != match)
379 {
943819bf 380 if (c == EOF)
a083fbbf 381 fatal(_("unterminated string at end of file"));
943819bf
RS
382 if (c == '\n')
383 {
a083fbbf 384 warn(_("unterminated string"));
943819bf
RS
385 ungetc(c, finput);
386 c = match;
387 continue;
388 }
1ff442ca
NF
389
390 putc(c, fattrs);
a083fbbf 391
1ff442ca
NF
392 if (c == '\\')
393 {
394 c = getc(finput);
395 if (c == EOF)
a083fbbf 396 fatal(_("unterminated string at end of file"));
1ff442ca
NF
397 putc(c, fattrs);
398 if (c == '\n')
399 lineno++;
400 }
401
402 c = getc(finput);
403 }
404
405 putc(c, fattrs);
406 break;
407
408 case '/':
409 putc(c, fattrs);
410 c = getc(finput);
411 if (c != '*' && c != '/')
412 continue;
413
414 cplus_comment = (c == '/');
415 putc(c, fattrs);
416 c = getc(finput);
417
418 ended = 0;
419 while (!ended)
420 {
421 if (!cplus_comment && c == '*')
422 {
423 while (c == '*')
424 {
425 putc(c, fattrs);
426 c = getc(finput);
427 }
428
429 if (c == '/')
430 {
431 putc(c, fattrs);
432 ended = 1;
433 }
434 }
435 else if (c == '\n')
436 {
437 lineno++;
438 putc(c, fattrs);
439 if (cplus_comment)
440 ended = 1;
441 else
442 c = getc(finput);
443 }
444 else if (c == EOF)
a083fbbf 445 fatal(_("unterminated comment in `%{' definition"));
1ff442ca
NF
446 else
447 {
448 putc(c, fattrs);
449 c = getc(finput);
450 }
451 }
452
453 break;
454
455 case EOF:
a083fbbf 456 fatal(_("unterminated `%{' definition"));
1ff442ca
NF
457
458 default:
459 putc(c, fattrs);
460 }
461
462 c = getc(finput);
463
464 if (after_percent)
465 {
466 if (c == '}')
467 return;
468 putc('%', fattrs);
469 }
470 after_percent = 0;
471
472 }
473
474}
475
476
477
478/* parse what comes after %token or %nterm.
479For %token, what_is is STOKEN and what_is_not is SNTERM.
480For %nterm, the arguments are reversed. */
481
482void
118fb205 483parse_token_decl (int what_is, int what_is_not)
1ff442ca 484{
1ff442ca 485 register int token = 0;
1ff442ca 486 register char *typename = 0;
943819bf 487 register struct bucket *symbol = NULL; /* pts to symbol being defined */
1ff442ca
NF
488 int k;
489
1ff442ca
NF
490 for (;;)
491 {
e6011337
JT
492 int tmp_char = ungetc (skip_white_space (), finput);
493
494 if (tmp_char == '%')
1ff442ca 495 return;
e6011337
JT
496 if (tmp_char == EOF)
497 fatals ("Premature EOF after %s", token_buffer);
498
1ff442ca
NF
499 token = lex();
500 if (token == COMMA)
943819bf
RS
501 {
502 symbol = NULL;
503 continue;
504 }
1ff442ca
NF
505 if (token == TYPENAME)
506 {
507 k = strlen(token_buffer);
508 typename = NEW2(k + 1, char);
509 strcpy(typename, token_buffer);
510 value_components_used = 1;
943819bf
RS
511 symbol = NULL;
512 }
513 else if (token == IDENTIFIER && *symval->tag == '\"'
a083fbbf 514 && symbol)
943819bf
RS
515 {
516 translations = 1;
517 symval->class = STOKEN;
518 symval->type_name = typename;
519 symval->user_token_number = symbol->user_token_number;
520 symbol->user_token_number = SALIAS;
521
a083fbbf
RS
522 symval->alias = symbol;
523 symbol->alias = symval;
943819bf
RS
524 symbol = NULL;
525
526 nsyms--; /* symbol and symval combined are only one symbol */
1ff442ca
NF
527 }
528 else if (token == IDENTIFIER)
529 {
530 int oldclass = symval->class;
943819bf 531 symbol = symval;
1ff442ca 532
943819bf 533 if (symbol->class == what_is_not)
a083fbbf 534 warns(_("symbol %s redefined"), symbol->tag);
943819bf 535 symbol->class = what_is;
1ff442ca 536 if (what_is == SNTERM && oldclass != SNTERM)
943819bf 537 symbol->value = nvars++;
1ff442ca
NF
538
539 if (typename)
540 {
943819bf
RS
541 if (symbol->type_name == NULL)
542 symbol->type_name = typename;
543 else if (strcmp(typename, symbol->type_name) != 0)
a083fbbf 544 warns(_("type redeclaration for %s"), symbol->tag);
1ff442ca
NF
545 }
546 }
943819bf 547 else if (symbol && token == NUMBER)
1ff442ca 548 {
943819bf 549 symbol->user_token_number = numval;
1ff442ca
NF
550 translations = 1;
551 }
552 else
943819bf 553 {
a083fbbf
RS
554 warnss(_("`%s' is invalid in %s"),
555 token_buffer,
943819bf
RS
556 (what_is == STOKEN) ? "%token" : "%nterm");
557 skip_to_char('%');
558 }
1ff442ca
NF
559 }
560
561}
562
a083fbbf 563/* parse what comes after %thong
943819bf
RS
564 the full syntax is
565 %thong <type> token number literal
566 the <type> or number may be omitted. The number specifies the
567 user_token_number.
568
569 Two symbols are entered in the table, one for the token symbol and
570 one for the literal. Both are given the <type>, if any, from the declaration.
571 The ->user_token_number of the first is SALIAS and the ->user_token_number
572 of the second is set to the number, if any, from the declaration.
573 The two symbols are linked via pointers in their ->alias fields.
a083fbbf 574
943819bf
RS
575 during output_defines_table, the symbol is reported
576 thereafter, only the literal string is retained
577 it is the literal string that is output to yytname
578*/
579
580void
118fb205 581parse_thong_decl (void)
943819bf
RS
582{
583 register int token;
584 register struct bucket *symbol;
585 register char *typename = 0;
586 int k, usrtoknum;
587
588 translations = 1;
589 token = lex(); /* fetch typename or first token */
590 if (token == TYPENAME) {
591 k = strlen(token_buffer);
592 typename = NEW2(k + 1, char);
593 strcpy(typename, token_buffer);
594 value_components_used = 1;
595 token = lex(); /* fetch first token */
596 }
597
598 /* process first token */
599
a083fbbf 600 if (token != IDENTIFIER)
943819bf 601 {
a083fbbf 602 warns(_("unrecognized item %s, expected an identifier"),
943819bf
RS
603 token_buffer);
604 skip_to_char('%');
605 return;
606 }
607 symval->class = STOKEN;
608 symval->type_name = typename;
609 symval->user_token_number = SALIAS;
610 symbol = symval;
611
612 token = lex(); /* get number or literal string */
a083fbbf 613
943819bf
RS
614 if (token == NUMBER) {
615 usrtoknum = numval;
616 token = lex(); /* okay, did number, now get literal */
617 }
618 else usrtoknum = 0;
619
620 /* process literal string token */
621
a083fbbf 622 if (token != IDENTIFIER || *symval->tag != '\"')
943819bf 623 {
a083fbbf 624 warns(_("expected string constant instead of %s"),
943819bf
RS
625 token_buffer);
626 skip_to_char('%');
627 return;
628 }
629 symval->class = STOKEN;
630 symval->type_name = typename;
631 symval->user_token_number = usrtoknum;
632
a083fbbf
RS
633 symval->alias = symbol;
634 symbol->alias = symval;
943819bf
RS
635
636 nsyms--; /* symbol and symval combined are only one symbol */
637}
1ff442ca
NF
638
639
640/* parse what comes after %start */
641
642void
118fb205 643parse_start_decl (void)
1ff442ca
NF
644{
645 if (start_flag)
a083fbbf 646 warn(_("multiple %start declarations"));
1ff442ca 647 if (lex() != IDENTIFIER)
a083fbbf 648 warn(_("invalid %start declaration"));
943819bf
RS
649 else
650 {
651 start_flag = 1;
652 startval = symval;
653 }
1ff442ca
NF
654}
655
656
657
658/* read in a %type declaration and record its information for get_type_name to access */
659
660void
118fb205 661parse_type_decl (void)
1ff442ca
NF
662{
663 register int k;
664 register char *name;
1ff442ca
NF
665
666 if (lex() != TYPENAME)
943819bf 667 {
a083fbbf 668 warn(_("%type declaration has no <typename>"));
943819bf
RS
669 skip_to_char('%');
670 return;
671 }
1ff442ca
NF
672
673 k = strlen(token_buffer);
674 name = NEW2(k + 1, char);
675 strcpy(name, token_buffer);
676
1ff442ca
NF
677 for (;;)
678 {
679 register int t;
e6011337 680 int tmp_char = ungetc (skip_white_space (), finput);
1ff442ca 681
e6011337 682 if (tmp_char == '%')
1ff442ca 683 return;
e6011337
JT
684 if (tmp_char == EOF)
685 fatals ("Premature EOF after %s", token_buffer);
1ff442ca 686
1ff442ca
NF
687 t = lex();
688
689 switch (t)
690 {
691
692 case COMMA:
693 case SEMICOLON:
694 break;
695
696 case IDENTIFIER:
697 if (symval->type_name == NULL)
698 symval->type_name = name;
943819bf 699 else if (strcmp(name, symval->type_name) != 0)
a083fbbf 700 warns(_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
701
702 break;
703
704 default:
a083fbbf 705 warns(_("invalid %%type declaration due to item: `%s'"), token_buffer);
943819bf 706 skip_to_char('%');
1ff442ca
NF
707 }
708 }
709}
710
711
712
713/* read in a %left, %right or %nonassoc declaration and record its information. */
714/* assoc is either LEFT_ASSOC, RIGHT_ASSOC or NON_ASSOC. */
715
716void
118fb205 717parse_assoc_decl (int assoc)
1ff442ca
NF
718{
719 register int k;
720 register char *name = NULL;
943819bf 721 register int prev = 0;
1ff442ca
NF
722
723 lastprec++; /* Assign a new precedence level, never 0. */
724
1ff442ca
NF
725 for (;;)
726 {
727 register int t;
e6011337 728 int tmp_char = ungetc (skip_white_space (), finput);
1ff442ca 729
e6011337 730 if (tmp_char == '%')
1ff442ca 731 return;
e6011337
JT
732 if (tmp_char == EOF)
733 fatals ("Premature EOF after %s", token_buffer);
1ff442ca 734
1ff442ca
NF
735 t = lex();
736
737 switch (t)
738 {
739
740 case TYPENAME:
741 k = strlen(token_buffer);
742 name = NEW2(k + 1, char);
743 strcpy(name, token_buffer);
744 break;
745
746 case COMMA:
747 break;
748
749 case IDENTIFIER:
750 if (symval->prec != 0)
a083fbbf 751 warns(_("redefining precedence of %s"), symval->tag);
1ff442ca
NF
752 symval->prec = lastprec;
753 symval->assoc = assoc;
754 if (symval->class == SNTERM)
a083fbbf 755 warns(_("symbol %s redefined"), symval->tag);
1ff442ca
NF
756 symval->class = STOKEN;
757 if (name)
758 { /* record the type, if one is specified */
759 if (symval->type_name == NULL)
760 symval->type_name = name;
943819bf 761 else if (strcmp(name, symval->type_name) != 0)
a083fbbf 762 warns(_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
763 }
764 break;
765
766 case NUMBER:
767 if (prev == IDENTIFIER)
768 {
769 symval->user_token_number = numval;
770 translations = 1;
771 }
a083fbbf 772 else
943819bf 773 {
a083fbbf 774 warns(_("invalid text (%s) - number should be after identifier"),
943819bf
RS
775 token_buffer);
776 skip_to_char('%');
777 }
1ff442ca
NF
778 break;
779
780 case SEMICOLON:
781 return;
782
783 default:
a083fbbf 784 warns(_("unexpected item: %s"), token_buffer);
943819bf 785 skip_to_char('%');
1ff442ca
NF
786 }
787
788 prev = t;
789
790 }
791}
792
793
794
795/* copy the union declaration into fattrs (and fdefines),
796 where it is made into the
797 definition of YYSTYPE, the type of elements of the parser value stack. */
798
799void
118fb205 800parse_union_decl (void)
1ff442ca
NF
801{
802 register int c;
803 register int count;
804 register int in_comment;
805 int cplus_comment;
806
807 if (typed)
a083fbbf 808 warn(_("multiple %union declarations"));
1ff442ca
NF
809
810 typed = 1;
811
812 if (!nolinesflag)
813 fprintf(fattrs, "\n#line %d \"%s\"\n", lineno, infile);
814 else
815 fprintf(fattrs, "\n");
816
817 fprintf(fattrs, "typedef union");
818 if (fdefines)
819 fprintf(fdefines, "typedef union");
820
821 count = 0;
822 in_comment = 0;
823
824 c = getc(finput);
825
826 while (c != EOF)
827 {
828 putc(c, fattrs);
829 if (fdefines)
830 putc(c, fdefines);
831
832 switch (c)
833 {
834 case '\n':
835 lineno++;
836 break;
837
838 case '/':
839 c = getc(finput);
840 if (c != '*' && c != '/')
841 ungetc(c, finput);
842 else
843 {
844 putc(c, fattrs);
845 if (fdefines)
846 putc(c, fdefines);
847 cplus_comment = (c == '/');
848 in_comment = 1;
849 c = getc(finput);
850 while (in_comment)
851 {
852 putc(c, fattrs);
853 if (fdefines)
854 putc(c, fdefines);
855
856 if (c == '\n')
857 {
858 lineno++;
859 if (cplus_comment)
860 {
861 in_comment = 0;
862 break;
863 }
864 }
865 if (c == EOF)
a083fbbf 866 fatal(_("unterminated comment at end of file"));
1ff442ca
NF
867
868 if (!cplus_comment && c == '*')
869 {
870 c = getc(finput);
871 if (c == '/')
872 {
873 putc('/', fattrs);
874 if (fdefines)
875 putc('/', fdefines);
876 in_comment = 0;
877 }
878 }
879 else
880 c = getc(finput);
881 }
882 }
883 break;
884
885
886 case '{':
887 count++;
888 break;
889
890 case '}':
891 if (count == 0)
a083fbbf 892 warn (_("unmatched close-brace (`}')"));
1ff442ca 893 count--;
943819bf 894 if (count <= 0)
1ff442ca
NF
895 {
896 fprintf(fattrs, " YYSTYPE;\n");
897 if (fdefines)
898 fprintf(fdefines, " YYSTYPE;\n");
899 /* JF don't choke on trailing semi */
900 c=skip_white_space();
901 if(c!=';') ungetc(c,finput);
902 return;
903 }
904 }
905
906 c = getc(finput);
907 }
908}
909
910/* parse the declaration %expect N which says to expect N
911 shift-reduce conflicts. */
912
913void
118fb205 914parse_expect_decl (void)
1ff442ca
NF
915{
916 register int c;
917 register int count;
918 char buffer[20];
919
920 c = getc(finput);
921 while (c == ' ' || c == '\t')
922 c = getc(finput);
923
924 count = 0;
925 while (c >= '0' && c <= '9')
926 {
927 if (count < 20)
928 buffer[count++] = c;
929 c = getc(finput);
930 }
931 buffer[count] = 0;
932
933 ungetc (c, finput);
934
943819bf 935 if (count <= 0 || count > 10)
a083fbbf 936 warn(_("argument of %expect is not an integer"));
1ff442ca
NF
937 expected_conflicts = atoi (buffer);
938}
939
940/* that's all of parsing the declaration section */
941\f
942/* Get the data type (alternative in the union) of the value for symbol n in rule rule. */
943
944char *
118fb205 945get_type_name (int n, symbol_list *rule)
1ff442ca 946{
a083fbbf 947 static char *msg = N_("invalid $ value");
1ff442ca
NF
948
949 register int i;
950 register symbol_list *rp;
951
952 if (n < 0)
943819bf 953 {
a083fbbf 954 warn(_(msg));
943819bf
RS
955 return NULL;
956 }
1ff442ca
NF
957
958 rp = rule;
959 i = 0;
960
961 while (i < n)
962 {
963 rp = rp->next;
964 if (rp == NULL || rp->sym == NULL)
943819bf 965 {
a083fbbf 966 warn(_(msg));
943819bf
RS
967 return NULL;
968 }
1ff442ca
NF
969 i++;
970 }
971
972 return (rp->sym->type_name);
973}
974
975
1ff442ca
NF
976/* after %guard is seen in the input file,
977copy the actual guard into the guards file.
978If the guard is followed by an action, copy that into the actions file.
979stack_offset is the number of values in the current rule so far,
980which says where to find $0 with respect to the top of the stack,
981for the simple parser in which the stack is not popped until after the guard is run. */
982
983void
118fb205 984copy_guard (symbol_list *rule, int stack_offset)
1ff442ca
NF
985{
986 register int c;
987 register int n;
988 register int count;
989 register int match;
990 register int ended;
991 register char *type_name;
992 int brace_flag = 0;
993 int cplus_comment;
994
995 /* offset is always 0 if parser has already popped the stack pointer */
996 if (semantic_parser) stack_offset = 0;
997
998 fprintf(fguard, "\ncase %d:\n", nrules);
999 if (!nolinesflag)
1000 fprintf(fguard, "#line %d \"%s\"\n", lineno, infile);
1001 putc('{', fguard);
1002
1003 count = 0;
1004 c = getc(finput);
1005
1006 while (brace_flag ? (count > 0) : (c != ';'))
1007 {
1008 switch (c)
1009 {
1010 case '\n':
1011 putc(c, fguard);
1012 lineno++;
1013 break;
1014
1015 case '{':
1016 putc(c, fguard);
1017 brace_flag = 1;
1018 count++;
1019 break;
1020
1021 case '}':
1022 putc(c, fguard);
1023 if (count > 0)
1024 count--;
a083fbbf 1025 else
943819bf 1026 {
a083fbbf 1027 warn(_("unmatched right brace (`}')"));
943819bf
RS
1028 c = getc(finput); /* skip it */
1029 }
1ff442ca
NF
1030 break;
1031
1032 case '\'':
1033 case '"':
1034 match = c;
1035 putc(c, fguard);
1036 c = getc(finput);
1037
1038 while (c != match)
1039 {
943819bf 1040 if (c == EOF)
a083fbbf
RS
1041 fatal(_("unterminated string at end of file"));
1042 if (c == '\n')
943819bf 1043 {
a083fbbf 1044 warn(_("unterminated string"));
943819bf
RS
1045 ungetc(c, finput);
1046 c = match; /* invent terminator */
1047 continue;
1048 }
1ff442ca
NF
1049
1050 putc(c, fguard);
a083fbbf 1051
1ff442ca
NF
1052 if (c == '\\')
1053 {
1054 c = getc(finput);
1055 if (c == EOF)
a083fbbf 1056 fatal(_("unterminated string"));
1ff442ca
NF
1057 putc(c, fguard);
1058 if (c == '\n')
1059 lineno++;
1060 }
1061
1062 c = getc(finput);
1063 }
1064
1065 putc(c, fguard);
1066 break;
1067
1068 case '/':
1069 putc(c, fguard);
1070 c = getc(finput);
1071 if (c != '*' && c != '/')
1072 continue;
1073
1074 cplus_comment = (c == '/');
1075 putc(c, fguard);
1076 c = getc(finput);
1077
1078 ended = 0;
1079 while (!ended)
1080 {
1081 if (!cplus_comment && c == '*')
1082 {
1083 while (c == '*')
1084 {
1085 putc(c, fguard);
1086 c = getc(finput);
1087 }
1088
1089 if (c == '/')
1090 {
1091 putc(c, fguard);
1092 ended = 1;
1093 }
1094 }
1095 else if (c == '\n')
1096 {
1097 lineno++;
1098 putc(c, fguard);
1099 if (cplus_comment)
1100 ended = 1;
1101 else
1102 c = getc(finput);
1103 }
1104 else if (c == EOF)
a083fbbf 1105 fatal(_("unterminated comment"));
1ff442ca
NF
1106 else
1107 {
1108 putc(c, fguard);
1109 c = getc(finput);
1110 }
1111 }
1112
1113 break;
1114
1115 case '$':
1116 c = getc(finput);
1117 type_name = NULL;
1118
1119 if (c == '<')
1120 {
1121 register char *cp = token_buffer;
1122
1123 while ((c = getc(finput)) != '>' && c > 0)
118fb205
JT
1124 {
1125 if (cp == token_buffer + maxtoken)
1126 cp = grow_token_buffer(cp);
1127
1128 *cp++ = c;
1129 }
1ff442ca
NF
1130 *cp = 0;
1131 type_name = token_buffer;
1132
1133 c = getc(finput);
1134 }
1135
1136 if (c == '$')
1137 {
1138 fprintf(fguard, "yyval");
1139 if (!type_name) type_name = rule->sym->type_name;
1140 if (type_name)
1141 fprintf(fguard, ".%s", type_name);
943819bf 1142 if(!type_name && typed)
a083fbbf 1143 warns(_("$$ of `%s' has no declared type"), rule->sym->tag);
1ff442ca
NF
1144 }
1145
1146 else if (isdigit(c) || c == '-')
1147 {
1148 ungetc (c, finput);
1149 n = read_signed_integer(finput);
1150 c = getc(finput);
1151
1152 if (!type_name && n > 0)
1153 type_name = get_type_name(n, rule);
1154
1155 fprintf(fguard, "yyvsp[%d]", n - stack_offset);
1156 if (type_name)
1157 fprintf(fguard, ".%s", type_name);
943819bf 1158 if(!type_name && typed)
a083fbbf 1159 warnss(_("$%s of `%s' has no declared type"), int_to_string(n), rule->sym->tag);
1ff442ca
NF
1160 continue;
1161 }
1162 else
aba5ca6d 1163 warns(_("$%s is invalid"), printable_version(c));
1ff442ca
NF
1164
1165 break;
1166
1167 case '@':
1168 c = getc(finput);
1169 if (isdigit(c) || c == '-')
1170 {
1171 ungetc (c, finput);
1172 n = read_signed_integer(finput);
1173 c = getc(finput);
1174 }
1175 else
943819bf 1176 {
aba5ca6d 1177 warns(_("@%s is invalid"), printable_version(c));
943819bf
RS
1178 n = 1;
1179 }
1ff442ca
NF
1180
1181 fprintf(fguard, "yylsp[%d]", n - stack_offset);
1182 yylsp_needed = 1;
1183
1184 continue;
1185
1186 case EOF:
a083fbbf 1187 fatal(_("unterminated %%guard clause"));
1ff442ca
NF
1188
1189 default:
1190 putc(c, fguard);
1191 }
1192
1193 if (c != '}' || count != 0)
1194 c = getc(finput);
1195 }
1196
1197 c = skip_white_space();
1198
1199 fprintf(fguard, ";\n break;}");
1200 if (c == '{')
1201 copy_action(rule, stack_offset);
1202 else if (c == '=')
1203 {
943819bf 1204 c = getc(finput); /* why not skip_white_space -wjh */
1ff442ca
NF
1205 if (c == '{')
1206 copy_action(rule, stack_offset);
1207 }
1208 else
1209 ungetc(c, finput);
1210}
1211
1212
1213
1214/* Assuming that a { has just been seen, copy everything up to the matching }
1215into the actions file.
1216stack_offset is the number of values in the current rule so far,
1217which says where to find $0 with respect to the top of the stack. */
1218
1219void
118fb205 1220copy_action (symbol_list *rule, int stack_offset)
1ff442ca
NF
1221{
1222 register int c;
1223 register int n;
1224 register int count;
1225 register int match;
1226 register int ended;
1227 register char *type_name;
1228 int cplus_comment;
1229
1230 /* offset is always 0 if parser has already popped the stack pointer */
1231 if (semantic_parser) stack_offset = 0;
1232
1233 fprintf(faction, "\ncase %d:\n", nrules);
1234 if (!nolinesflag)
1235 fprintf(faction, "#line %d \"%s\"\n", lineno, infile);
1236 putc('{', faction);
1237
1238 count = 1;
1239 c = getc(finput);
1240
1241 while (count > 0)
1242 {
1243 while (c != '}')
1244 {
1245 switch (c)
1246 {
1247 case '\n':
1248 putc(c, faction);
1249 lineno++;
1250 break;
1251
1252 case '{':
1253 putc(c, faction);
1254 count++;
1255 break;
1256
1257 case '\'':
1258 case '"':
1259 match = c;
1260 putc(c, faction);
1261 c = getc(finput);
1262
1263 while (c != match)
1264 {
943819bf
RS
1265 if (c == '\n')
1266 {
a083fbbf 1267 warn(_("unterminated string"));
943819bf
RS
1268 ungetc(c, finput);
1269 c = match;
1270 continue;
1271 }
1272 else if (c == EOF)
a083fbbf 1273 fatal(_("unterminated string at end of file"));
1ff442ca
NF
1274
1275 putc(c, faction);
1276
1277 if (c == '\\')
1278 {
1279 c = getc(finput);
1280 if (c == EOF)
a083fbbf 1281 fatal(_("unterminated string"));
1ff442ca
NF
1282 putc(c, faction);
1283 if (c == '\n')
1284 lineno++;
1285 }
1286
1287 c = getc(finput);
1288 }
1289
1290 putc(c, faction);
1291 break;
1292
1293 case '/':
1294 putc(c, faction);
1295 c = getc(finput);
1296 if (c != '*' && c != '/')
1297 continue;
1298
1299 cplus_comment = (c == '/');
1300 putc(c, faction);
1301 c = getc(finput);
1302
1303 ended = 0;
1304 while (!ended)
1305 {
1306 if (!cplus_comment && c == '*')
1307 {
1308 while (c == '*')
1309 {
1310 putc(c, faction);
1311 c = getc(finput);
1312 }
1313
1314 if (c == '/')
1315 {
1316 putc(c, faction);
1317 ended = 1;
1318 }
1319 }
1320 else if (c == '\n')
1321 {
1322 lineno++;
1323 putc(c, faction);
1324 if (cplus_comment)
1325 ended = 1;
1326 else
1327 c = getc(finput);
1328 }
1329 else if (c == EOF)
a083fbbf 1330 fatal(_("unterminated comment"));
1ff442ca
NF
1331 else
1332 {
1333 putc(c, faction);
1334 c = getc(finput);
1335 }
1336 }
1337
1338 break;
1339
1340 case '$':
1341 c = getc(finput);
1342 type_name = NULL;
1343
1344 if (c == '<')
1345 {
1346 register char *cp = token_buffer;
1347
1348 while ((c = getc(finput)) != '>' && c > 0)
118fb205
JT
1349 {
1350 if (cp == token_buffer + maxtoken)
1351 cp = grow_token_buffer(cp);
1352
1353 *cp++ = c;
1354 }
1ff442ca
NF
1355 *cp = 0;
1356 type_name = token_buffer;
1357 value_components_used = 1;
1358
1359 c = getc(finput);
1360 }
1361 if (c == '$')
1362 {
1363 fprintf(faction, "yyval");
1364 if (!type_name) type_name = get_type_name(0, rule);
1365 if (type_name)
1366 fprintf(faction, ".%s", type_name);
a083fbbf
RS
1367 if(!type_name && typed)
1368 warns(_("$$ of `%s' has no declared type"), rule->sym->tag);
1ff442ca
NF
1369 }
1370 else if (isdigit(c) || c == '-')
1371 {
1372 ungetc (c, finput);
1373 n = read_signed_integer(finput);
1374 c = getc(finput);
1375
1376 if (!type_name && n > 0)
1377 type_name = get_type_name(n, rule);
1378
1379 fprintf(faction, "yyvsp[%d]", n - stack_offset);
1380 if (type_name)
1381 fprintf(faction, ".%s", type_name);
a083fbbf
RS
1382 if(!type_name && typed)
1383 warnss(_("$%s of `%s' has no declared type"),
943819bf 1384 int_to_string(n), rule->sym->tag);
1ff442ca
NF
1385 continue;
1386 }
1387 else
aba5ca6d 1388 warns(_("$%s is invalid"), printable_version(c));
1ff442ca
NF
1389
1390 break;
1391
1392 case '@':
1393 c = getc(finput);
1394 if (isdigit(c) || c == '-')
1395 {
1396 ungetc (c, finput);
1397 n = read_signed_integer(finput);
1398 c = getc(finput);
1399 }
1400 else
943819bf 1401 {
a083fbbf 1402 warn(_("invalid @-construct"));
943819bf
RS
1403 n = 1;
1404 }
1ff442ca
NF
1405
1406 fprintf(faction, "yylsp[%d]", n - stack_offset);
1407 yylsp_needed = 1;
1408
1409 continue;
1410
1411 case EOF:
a083fbbf 1412 fatal(_("unmatched `{'"));
1ff442ca
NF
1413
1414 default:
1415 putc(c, faction);
1416 }
1417
1418 c = getc(finput);
1419 }
1420
1421 /* above loop exits when c is '}' */
1422
1423 if (--count)
1424 {
1425 putc(c, faction);
1426 c = getc(finput);
1427 }
1428 }
1429
1430 fprintf(faction, ";\n break;}");
1431}
1432
1433
1434
1435/* generate a dummy symbol, a nonterminal,
1436whose name cannot conflict with the user's names. */
1437
1438bucket *
118fb205 1439gensym (void)
1ff442ca
NF
1440{
1441 register bucket *sym;
1442
1443 sprintf (token_buffer, "@%d", ++gensym_count);
1444 sym = getsym(token_buffer);
1445 sym->class = SNTERM;
1446 sym->value = nvars++;
1447 return (sym);
1448}
1449
1450/* Parse the input grammar into a one symbol_list structure.
1451Each rule is represented by a sequence of symbols: the left hand side
1452followed by the contents of the right hand side, followed by a null pointer
1453instead of a symbol to terminate the rule.
1454The next symbol is the lhs of the following rule.
1455
1456All guards and actions are copied out to the appropriate files,
1457labelled by the rule number they apply to. */
1458
1459void
118fb205 1460readgram (void)
1ff442ca
NF
1461{
1462 register int t;
2686a6e7 1463 register bucket *lhs = NULL;
1ff442ca
NF
1464 register symbol_list *p;
1465 register symbol_list *p1;
1466 register bucket *bp;
1467
1468 symbol_list *crule; /* points to first symbol_list of current rule. */
1469 /* its symbol is the lhs of the rule. */
1470 symbol_list *crule1; /* points to the symbol_list preceding crule. */
1471
1472 p1 = NULL;
1473
1474 t = lex();
1475
1476 while (t != TWO_PERCENTS && t != ENDFILE)
1477 {
1478 if (t == IDENTIFIER || t == BAR)
1479 {
1480 register int actionflag = 0;
1481 int rulelength = 0; /* number of symbols in rhs of this rule so far */
1482 int xactions = 0; /* JF for error checking */
1483 bucket *first_rhs = 0;
1484
1485 if (t == IDENTIFIER)
1486 {
1487 lhs = symval;
943819bf
RS
1488
1489 if (!start_flag)
1490 {
1491 startval = lhs;
1492 start_flag = 1;
1493 }
a083fbbf 1494
1ff442ca
NF
1495 t = lex();
1496 if (t != COLON)
943819bf 1497 {
a083fbbf 1498 warn(_("ill-formed rule: initial symbol not followed by colon"));
943819bf
RS
1499 unlex(t);
1500 }
1ff442ca
NF
1501 }
1502
943819bf 1503 if (nrules == 0 && t == BAR)
1ff442ca 1504 {
a083fbbf 1505 warn(_("grammar starts with vertical bar"));
943819bf 1506 lhs = symval; /* BOGUS: use a random symval */
1ff442ca 1507 }
1ff442ca
NF
1508 /* start a new rule and record its lhs. */
1509
1510 nrules++;
1511 nitems++;
1512
1513 record_rule_line ();
1514
1515 p = NEW(symbol_list);
1516 p->sym = lhs;
1517
1518 crule1 = p1;
1519 if (p1)
1520 p1->next = p;
1521 else
1522 grammar = p;
1523
1524 p1 = p;
1525 crule = p;
1526
1527 /* mark the rule's lhs as a nonterminal if not already so. */
1528
1529 if (lhs->class == SUNKNOWN)
1530 {
1531 lhs->class = SNTERM;
1532 lhs->value = nvars;
1533 nvars++;
1534 }
1535 else if (lhs->class == STOKEN)
a083fbbf 1536 warns(_("rule given for %s, which is a token"), lhs->tag);
1ff442ca
NF
1537
1538 /* read the rhs of the rule. */
1539
1540 for (;;)
1541 {
1542 t = lex();
943819bf
RS
1543 if (t == PREC)
1544 {
1545 t = lex();
1546 crule->ruleprec = symval;
1547 t = lex();
1548 }
1ff442ca
NF
1549
1550 if (! (t == IDENTIFIER || t == LEFT_CURLY)) break;
1551
1552 /* If next token is an identifier, see if a colon follows it.
1553 If one does, exit this rule now. */
1554 if (t == IDENTIFIER)
1555 {
1556 register bucket *ssave;
1557 register int t1;
1558
1559 ssave = symval;
1560 t1 = lex();
1561 unlex(t1);
1562 symval = ssave;
1563 if (t1 == COLON) break;
1564
1565 if(!first_rhs) /* JF */
1566 first_rhs = symval;
1567 /* Not followed by colon =>
1568 process as part of this rule's rhs. */
1569 }
1570
1571 /* If we just passed an action, that action was in the middle
1572 of a rule, so make a dummy rule to reduce it to a
1573 non-terminal. */
1574 if (actionflag)
1575 {
1576 register bucket *sdummy;
1577
1578 /* Since the action was written out with this rule's */
943819bf 1579 /* number, we must give the new rule this number */
1ff442ca
NF
1580 /* by inserting the new rule before it. */
1581
1582 /* Make a dummy nonterminal, a gensym. */
1583 sdummy = gensym();
1584
1585 /* Make a new rule, whose body is empty,
1586 before the current one, so that the action
1587 just read can belong to it. */
1588 nrules++;
1589 nitems++;
1590 record_rule_line ();
1591 p = NEW(symbol_list);
1592 if (crule1)
1593 crule1->next = p;
1594 else grammar = p;
1595 p->sym = sdummy;
1596 crule1 = NEW(symbol_list);
1597 p->next = crule1;
1598 crule1->next = crule;
1599
1600 /* insert the dummy generated by that rule into this rule. */
1601 nitems++;
1602 p = NEW(symbol_list);
1603 p->sym = sdummy;
1604 p1->next = p;
1605 p1 = p;
1606
1607 actionflag = 0;
1608 }
1609
1610 if (t == IDENTIFIER)
1611 {
1612 nitems++;
1613 p = NEW(symbol_list);
1614 p->sym = symval;
1615 p1->next = p;
1616 p1 = p;
1617 }
1618 else /* handle an action. */
1619 {
1620 copy_action(crule, rulelength);
1621 actionflag = 1;
1622 xactions++; /* JF */
1623 }
1624 rulelength++;
943819bf 1625 } /* end of read rhs of rule */
1ff442ca
NF
1626
1627 /* Put an empty link in the list to mark the end of this rule */
1628 p = NEW(symbol_list);
1629 p1->next = p;
1630 p1 = p;
1631
1632 if (t == PREC)
1633 {
a083fbbf 1634 warn(_("two @prec's in a row"));
1ff442ca
NF
1635 t = lex();
1636 crule->ruleprec = symval;
1637 t = lex();
1638 }
1639 if (t == GUARD)
1640 {
1641 if (! semantic_parser)
a083fbbf 1642 warn(_("%%guard present but %%semantic_parser not specified"));
1ff442ca
NF
1643
1644 copy_guard(crule, rulelength);
1645 t = lex();
1646 }
1647 else if (t == LEFT_CURLY)
1648 {
943819bf 1649 /* This case never occurs -wjh */
a083fbbf 1650 if (actionflag) warn(_("two actions at end of one rule"));
1ff442ca 1651 copy_action(crule, rulelength);
943819bf
RS
1652 actionflag = 1;
1653 xactions++; /* -wjh */
1ff442ca
NF
1654 t = lex();
1655 }
1656 /* If $$ is being set in default way,
1657 warn if any type mismatch. */
1658 else if (!xactions && first_rhs && lhs->type_name != first_rhs->type_name)
1659 {
1660 if (lhs->type_name == 0 || first_rhs->type_name == 0
1661 || strcmp(lhs->type_name,first_rhs->type_name))
a083fbbf 1662 warnss(_("type clash (`%s' `%s') on default action"),
1ff442ca
NF
1663 lhs->type_name ? lhs->type_name : "",
1664 first_rhs->type_name ? first_rhs->type_name : "");
1665 }
1666 /* Warn if there is no default for $$ but we need one. */
1667 else if (!xactions && !first_rhs && lhs->type_name != 0)
a083fbbf 1668 warn(_("empty rule for typed nonterminal, and no action"));
1ff442ca
NF
1669 if (t == SEMICOLON)
1670 t = lex();
a083fbbf 1671 }
943819bf
RS
1672#if 0
1673 /* these things can appear as alternatives to rules. */
1674/* NO, they cannot.
1675 a) none of the documentation allows them
1676 b) most of them scan forward until finding a next %
1677 thus they may swallow lots of intervening rules
1678*/
1ff442ca
NF
1679 else if (t == TOKEN)
1680 {
1681 parse_token_decl(STOKEN, SNTERM);
1682 t = lex();
1683 }
1684 else if (t == NTERM)
1685 {
1686 parse_token_decl(SNTERM, STOKEN);
1687 t = lex();
1688 }
1689 else if (t == TYPE)
1690 {
1691 t = get_type();
1692 }
1693 else if (t == UNION)
1694 {
1695 parse_union_decl();
1696 t = lex();
1697 }
1698 else if (t == EXPECT)
1699 {
1700 parse_expect_decl();
1701 t = lex();
1702 }
1703 else if (t == START)
1704 {
1705 parse_start_decl();
1706 t = lex();
1707 }
943819bf
RS
1708#endif
1709
1ff442ca 1710 else
943819bf 1711 {
a083fbbf 1712 warns(_("invalid input: %s"), token_buffer);
943819bf
RS
1713 t = lex();
1714 }
1ff442ca
NF
1715 }
1716
943819bf
RS
1717 /* grammar has been read. Do some checking */
1718
1ff442ca 1719 if (nsyms > MAXSHORT)
a083fbbf 1720 fatals(_("too many symbols (tokens plus nonterminals); maximum %s"),
943819bf 1721 int_to_string(MAXSHORT));
1ff442ca 1722 if (nrules == 0)
a083fbbf 1723 fatal(_("no rules in the input grammar"));
1ff442ca
NF
1724
1725 if (typed == 0 /* JF put out same default YYSTYPE as YACC does */
1726 && !value_components_used)
1727 {
1728 /* We used to use `unsigned long' as YYSTYPE on MSDOS,
1729 but it seems better to be consistent.
1730 Most programs should declare their own type anyway. */
1731 fprintf(fattrs, "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n");
1732 if (fdefines)
1733 fprintf(fdefines, "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n");
1734 }
1735
1736 /* Report any undefined symbols and consider them nonterminals. */
1737
1738 for (bp = firstsymbol; bp; bp = bp->next)
1739 if (bp->class == SUNKNOWN)
1740 {
a083fbbf 1741 warns(_("symbol %s is used, but is not defined as a token and has no rules"),
1ff442ca 1742 bp->tag);
1ff442ca
NF
1743 bp->class = SNTERM;
1744 bp->value = nvars++;
1745 }
1746
1747 ntokens = nsyms - nvars;
1748}
1749
1750
1751void
118fb205 1752record_rule_line (void)
1ff442ca
NF
1753{
1754 /* Record each rule's source line number in rline table. */
1755
1756 if (nrules >= rline_allocated)
1757 {
1758 rline_allocated = nrules * 2;
118fb205
JT
1759 rline = (short *) xrealloc ((char *) rline,
1760 rline_allocated * sizeof (short));
1ff442ca
NF
1761 }
1762 rline[nrules] = lineno;
1763}
1764
1765
2686a6e7 1766#if 0
1ff442ca 1767/* read in a %type declaration and record its information for get_type_name to access */
943819bf
RS
1768/* this is unused. it is only called from the #if 0 part of readgram */
1769static int
118fb205 1770get_type (void)
1ff442ca
NF
1771{
1772 register int k;
1773 register int t;
1774 register char *name;
1775
1776 t = lex();
1777
a083fbbf 1778 if (t != TYPENAME)
943819bf 1779 {
a083fbbf 1780 warn(_("ill-formed %type declaration"));
943819bf
RS
1781 return t;
1782 }
1ff442ca
NF
1783
1784 k = strlen(token_buffer);
1785 name = NEW2(k + 1, char);
1786 strcpy(name, token_buffer);
1787
1788 for (;;)
1789 {
1790 t = lex();
1791
1792 switch (t)
1793 {
1794 case SEMICOLON:
1795 return (lex());
1796
1797 case COMMA:
1798 break;
1799
1800 case IDENTIFIER:
1801 if (symval->type_name == NULL)
1802 symval->type_name = name;
943819bf 1803 else if (strcmp(name, symval->type_name) != 0)
a083fbbf 1804 warns(_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
1805
1806 break;
1807
1808 default:
1809 return (t);
1810 }
1811 }
1812}
2686a6e7 1813#endif
1ff442ca
NF
1814
1815
1816/* assign symbol numbers, and write definition of token names into fdefines.
1817Set up vectors tags and sprec of names and precedences of symbols. */
1818
1819void
118fb205 1820packsymbols (void)
1ff442ca
NF
1821{
1822 register bucket *bp;
1823 register int tokno = 1;
1824 register int i;
1825 register int last_user_token_number;
1826
1827 /* int lossage = 0; JF set but not used */
1828
1829 tags = NEW2(nsyms + 1, char *);
1830 tags[0] = "$";
943819bf
RS
1831 user_toknums = NEW2(nsyms + 1, int);
1832 user_toknums[0] = 0;
1ff442ca
NF
1833
1834 sprec = NEW2(nsyms, short);
1835 sassoc = NEW2(nsyms, short);
1836
1837 max_user_token_number = 256;
1838 last_user_token_number = 256;
1839
1840 for (bp = firstsymbol; bp; bp = bp->next)
1841 {
1842 if (bp->class == SNTERM)
1843 {
1844 bp->value += ntokens;
1845 }
943819bf
RS
1846 else if (bp->alias)
1847 {
1848 /* this symbol and its alias are a single token defn.
1849 allocate a tokno, and assign to both
a083fbbf 1850 check agreement of ->prec and ->assoc fields
943819bf
RS
1851 and make both the same
1852 */
1853 if (bp->value == 0)
1854 bp->value = bp->alias->value = tokno++;
1855
1856 if (bp->prec != bp->alias->prec) {
1857 if (bp->prec != 0 && bp->alias->prec != 0
1858 && bp->user_token_number == SALIAS)
a083fbbf 1859 warnss(_("conflicting precedences for %s and %s"),
943819bf
RS
1860 bp->tag, bp->alias->tag);
1861 if (bp->prec != 0) bp->alias->prec = bp->prec;
1862 else bp->prec = bp->alias->prec;
1863 }
1864
1865 if (bp->assoc != bp->alias->assoc) {
1866 if (bp->assoc != 0 && bp->alias->assoc != 0
1867 && bp->user_token_number == SALIAS)
a083fbbf 1868 warnss(_("conflicting assoc values for %s and %s"),
943819bf
RS
1869 bp->tag, bp->alias->tag);
1870 if (bp->assoc != 0) bp->alias->assoc = bp->assoc;
1871 else bp->assoc = bp->alias->assoc;
1872 }
1873
1874 if (bp->user_token_number == SALIAS)
1875 continue; /* do not do processing below for SALIASs */
1876
1877 }
1878 else /* bp->class == STOKEN */
1879 {
1880 bp->value = tokno++;
1881 }
1882
1883 if (bp->class == STOKEN)
1ff442ca
NF
1884 {
1885 if (translations && !(bp->user_token_number))
1886 bp->user_token_number = ++last_user_token_number;
1887 if (bp->user_token_number > max_user_token_number)
1888 max_user_token_number = bp->user_token_number;
1ff442ca
NF
1889 }
1890
1891 tags[bp->value] = bp->tag;
943819bf 1892 user_toknums[bp->value] = bp->user_token_number;
1ff442ca
NF
1893 sprec[bp->value] = bp->prec;
1894 sassoc[bp->value] = bp->assoc;
1895
1896 }
1897
1898 if (translations)
1899 {
1900 register int i;
1901
1902 token_translations = NEW2(max_user_token_number+1, short);
1903
1904 /* initialize all entries for literal tokens to 2,
572909b5
RS
1905 the internal token number for $undefined.,
1906 which represents all invalid inputs. */
1ff442ca 1907 for (i = 0; i <= max_user_token_number; i++)
a083fbbf 1908 token_translations[i] = 2;
1ff442ca 1909
943819bf
RS
1910 for (bp = firstsymbol; bp; bp = bp->next)
1911 {
1912 if (bp->value >= ntokens) continue; /* non-terminal */
a083fbbf 1913 if (bp->user_token_number == SALIAS) continue;
943819bf 1914 if (token_translations[bp->user_token_number] != 2)
a083fbbf 1915 warnsss(_("tokens %s and %s both assigned number %s"),
1ff442ca
NF
1916 tags[token_translations[bp->user_token_number]],
1917 bp->tag,
943819bf
RS
1918 int_to_string(bp->user_token_number));
1919 token_translations[bp->user_token_number] = bp->value;
1920 }
1ff442ca
NF
1921 }
1922
1923 error_token_number = errtoken->value;
1924
943819bf
RS
1925 if (! noparserflag)
1926 output_token_defines(ftable);
1ff442ca
NF
1927
1928 if (startval->class == SUNKNOWN)
a083fbbf 1929 fatals(_("the start symbol %s is undefined"), startval->tag);
1ff442ca 1930 else if (startval->class == STOKEN)
a083fbbf 1931 fatals(_("the start symbol %s is a token"), startval->tag);
1ff442ca
NF
1932
1933 start_symbol = startval->value;
1934
1935 if (definesflag)
1936 {
1937 output_token_defines(fdefines);
1938
1939 if (!pure_parser)
1940 {
1941 if (spec_name_prefix)
1942 fprintf(fdefines, "\nextern YYSTYPE %slval;\n", spec_name_prefix);
1943 else
1944 fprintf(fdefines, "\nextern YYSTYPE yylval;\n");
1945 }
1946
1947 if (semantic_parser)
1948 for (i = ntokens; i < nsyms; i++)
1949 {
1950 /* don't make these for dummy nonterminals made by gensym. */
1951 if (*tags[i] != '@')
1952 fprintf(fdefines, "#define\tNT%s\t%d\n", tags[i], i);
1953 }
1954#if 0
1955 /* `fdefines' is now a temporary file, so we need to copy its
1956 contents in `done', so we can't close it here. */
1957 fclose(fdefines);
1958 fdefines = NULL;
1959#endif
1960 }
1961}
a083fbbf
RS
1962
1963/* For named tokens, but not literal ones, define the name.
1964 The value is the user token number.
943819bf 1965*/
1ff442ca 1966void
118fb205 1967output_token_defines (FILE *file)
1ff442ca
NF
1968{
1969 bucket *bp;
943819bf
RS
1970 register char *cp, *symbol;
1971 register char c;
1ff442ca
NF
1972
1973 for (bp = firstsymbol; bp; bp = bp->next)
1974 {
943819bf 1975 symbol = bp->tag; /* get symbol */
1ff442ca 1976
943819bf
RS
1977 if (bp->value >= ntokens) continue;
1978 if (bp->user_token_number == SALIAS) continue;
1979 if ('\'' == *symbol) continue; /* skip literal character */
1980 if (bp == errtoken) continue; /* skip error token */
a083fbbf 1981 if ('\"' == *symbol)
1ff442ca 1982 {
943819bf
RS
1983 /* use literal string only if given a symbol with an alias */
1984 if (bp->alias)
1985 symbol = bp->alias->tag;
1986 else
1987 continue;
1988 }
1ff442ca 1989
943819bf
RS
1990 /* Don't #define nonliteral tokens whose names contain periods. */
1991 cp = symbol;
1992 while ((c = *cp++) && c != '.');
1993 if (c != '\0') continue;
1ff442ca 1994
943819bf 1995 fprintf(file, "#define\t%s\t%d\n", symbol,
a083fbbf
RS
1996 ((translations && ! rawtoknumflag)
1997 ? bp->user_token_number
943819bf
RS
1998 : bp->value));
1999 if (semantic_parser)
2000 fprintf(file, "#define\tT%s\t%d\n", symbol, bp->value);
1ff442ca
NF
2001 }
2002
2003 putc('\n', file);
2004}
2005
2006
2007
2008/* convert the rules into the representation using rrhs, rlhs and ritems. */
2009
2010void
118fb205 2011packgram (void)
1ff442ca
NF
2012{
2013 register int itemno;
2014 register int ruleno;
2015 register symbol_list *p;
2016/* register bucket *bp; JF unused */
2017
2018 bucket *ruleprec;
2019
2020 ritem = NEW2(nitems + 1, short);
2021 rlhs = NEW2(nrules, short) - 1;
2022 rrhs = NEW2(nrules, short) - 1;
2023 rprec = NEW2(nrules, short) - 1;
2024 rprecsym = NEW2(nrules, short) - 1;
2025 rassoc = NEW2(nrules, short) - 1;
2026
2027 itemno = 0;
2028 ruleno = 1;
2029
2030 p = grammar;
2031 while (p)
2032 {
2033 rlhs[ruleno] = p->sym->value;
2034 rrhs[ruleno] = itemno;
2035 ruleprec = p->ruleprec;
2036
2037 p = p->next;
2038 while (p && p->sym)
2039 {
2040 ritem[itemno++] = p->sym->value;
2041 /* A rule gets by default the precedence and associativity
2042 of the last token in it. */
2043 if (p->sym->class == STOKEN)
2044 {
2045 rprec[ruleno] = p->sym->prec;
2046 rassoc[ruleno] = p->sym->assoc;
2047 }
2048 if (p) p = p->next;
2049 }
2050
2051 /* If this rule has a %prec,
2052 the specified symbol's precedence replaces the default. */
2053 if (ruleprec)
2054 {
2055 rprec[ruleno] = ruleprec->prec;
2056 rassoc[ruleno] = ruleprec->assoc;
2057 rprecsym[ruleno] = ruleprec->value;
2058 }
2059
2060 ritem[itemno++] = -ruleno;
2061 ruleno++;
2062
2063 if (p) p = p->next;
2064 }
2065
2066 ritem[itemno] = 0;
2067}
2068\f
2069/* Read a signed integer from STREAM and return its value. */
2070
2071int
118fb205 2072read_signed_integer (FILE *stream)
1ff442ca
NF
2073{
2074 register int c = getc(stream);
2075 register int sign = 1;
2076 register int n;
2077
2078 if (c == '-')
2079 {
2080 c = getc(stream);
2081 sign = -1;
2082 }
2083 n = 0;
2084 while (isdigit(c))
2085 {
2086 n = 10*n + (c - '0');
2087 c = getc(stream);
2088 }
2089
2090 ungetc(c, stream);
2091
2092 return n * sign;
2093}