]> git.saurik.com Git - bison.git/blame - src/reader.c
Be kind to translators, save some useless translations.
[bison.git] / src / reader.c
CommitLineData
1ff442ca 1/* Input parser for bison
41aca2e0 2 Copyright (C) 1984, 86, 89, 92, 98, 2000 Free Software Foundation, Inc.
1ff442ca 3
41aca2e0 4 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 5
41aca2e0
AD
6 Bison is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
1ff442ca 10
41aca2e0
AD
11 Bison is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
1ff442ca 15
41aca2e0
AD
16 You should have received a copy of the GNU General Public License
17 along with Bison; see the file COPYING. If not, write to
18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
1ff442ca
NF
20
21
41aca2e0
AD
22/* Read in the grammar specification and record it in the format
23 described in gram.h. All guards are copied into the fguard file
24 and all actions into faction, in each case forming the body of a C
25 function (yyguard or yyaction) which contains a switch statement to
26 decide which guard or action to execute.
1ff442ca 27
41aca2e0 28 The entry point is reader (). */
1ff442ca
NF
29
30#include <stdio.h>
1ff442ca
NF
31#include "system.h"
32#include "files.h"
7612000c 33#include "alloc.h"
1ff442ca
NF
34#include "symtab.h"
35#include "lex.h"
36#include "gram.h"
37#include "machine.h"
38
6666f98f
AD
39#define LTYPESTR "\
40\n\
41#ifndef YYLTYPE\n\
42typedef\n\
43 struct yyltype\n\
44\
45 {\n\
46 int timestamp;\n\
47 int first_line;\n\
48 int first_column;\
49\n\
50 int last_line;\n\
51 int last_column;\n\
52 char *text;\n\
53 }\n\
54\
55 yyltype;\n\
56\n\
57#define YYLTYPE yyltype\n\
58#endif\n\
59\n"
1ff442ca
NF
60
61/* Number of slots allocated (but not necessarily used yet) in `rline' */
62int rline_allocated;
63
64extern char *program_name;
65extern int definesflag;
66extern int nolinesflag;
943819bf
RS
67extern int noparserflag;
68extern int rawtoknumflag;
1ff442ca
NF
69extern bucket *symval;
70extern int numval;
1ff442ca
NF
71extern int expected_conflicts;
72extern char *token_buffer;
118fb205
JT
73extern int maxtoken;
74
75extern void init_lex PARAMS((void));
76extern char *grow_token_buffer PARAMS((char *));
77extern void tabinit PARAMS((void));
78extern void output_headers PARAMS((void));
79extern void output_trailers PARAMS((void));
80extern void free_symtab PARAMS((void));
81extern void open_extra_files PARAMS((void));
82extern char *int_to_string PARAMS((int));
83extern char *printable_version PARAMS((int));
84extern void fatal PARAMS((char *));
85extern void fatals PARAMS((char *, char *));
86extern void warn PARAMS((char *));
87extern void warni PARAMS((char *, int));
88extern void warns PARAMS((char *, char *));
89extern void warnss PARAMS((char *, char *, char *));
90extern void warnsss PARAMS((char *, char *, char *, char *));
91extern void unlex PARAMS((int));
92extern void done PARAMS((int));
93
94extern int skip_white_space PARAMS((void));
95extern int parse_percent_token PARAMS((void));
96extern int lex PARAMS((void));
1ff442ca
NF
97
98typedef
99 struct symbol_list
100 {
101 struct symbol_list *next;
102 bucket *sym;
103 bucket *ruleprec;
104 }
105 symbol_list;
106
107
118fb205
JT
108void reader PARAMS((void));
109void reader_output_yylsp PARAMS((FILE *));
110void read_declarations PARAMS((void));
111void copy_definition PARAMS((void));
112void parse_token_decl PARAMS((int, int));
113void parse_start_decl PARAMS((void));
114void parse_type_decl PARAMS((void));
115void parse_assoc_decl PARAMS((int));
116void parse_union_decl PARAMS((void));
117void parse_expect_decl PARAMS((void));
118char *get_type_name PARAMS((int, symbol_list *));
119void copy_guard PARAMS((symbol_list *, int));
120void parse_thong_decl PARAMS((void));
121void copy_action PARAMS((symbol_list *, int));
122bucket *gensym PARAMS((void));
123void readgram PARAMS((void));
124void record_rule_line PARAMS((void));
125void packsymbols PARAMS((void));
126void output_token_defines PARAMS((FILE *));
127void packgram PARAMS((void));
128int read_signed_integer PARAMS((FILE *));
118fb205 129
2686a6e7
JT
130#if 0
131static int get_type PARAMS((void));
132#endif
1ff442ca
NF
133
134int lineno;
135symbol_list *grammar;
136int start_flag;
137bucket *startval;
138char **tags;
943819bf 139int *user_toknums;
1ff442ca
NF
140
141/* Nonzero if components of semantic values are used, implying
142 they must be unions. */
143static int value_components_used;
144
145static int typed; /* nonzero if %union has been seen. */
146
147static int lastprec; /* incremented for each %left, %right or %nonassoc seen */
148
149static int gensym_count; /* incremented for each generated symbol */
150
151static bucket *errtoken;
5b2e3c89 152static bucket *undeftoken;
1ff442ca
NF
153
154/* Nonzero if any action or guard uses the @n construct. */
155static int yylsp_needed;
156
943819bf
RS
157
158static void
118fb205 159skip_to_char (int target)
943819bf
RS
160{
161 int c;
162 if (target == '\n')
a083fbbf 163 warn(_(" Skipping to next \\n"));
943819bf 164 else
a083fbbf 165 warni(_(" Skipping to next %c"), target);
943819bf
RS
166
167 do
168 c = skip_white_space();
169 while (c != target && c != EOF);
a083fbbf 170 if (c != EOF)
943819bf
RS
171 ungetc(c, finput);
172}
173
174
ae3c3164
AD
175/* Dump the string from FINPUT to FOUTPUT. MATCH is the delimiter of
176 the string (either ' or "). */
177
178static inline void
179copy_string (FILE *finput, FILE *foutput, int match)
180{
181 int c;
182
183 putc (match, foutput);
184 c = getc (finput);
185
186 while (c != match)
187 {
188 if (c == EOF)
189 fatal (_("unterminated string at end of file"));
190 if (c == '\n')
191 {
192 warn (_("unterminated string"));
193 ungetc (c, finput);
194 c = match; /* invent terminator */
195 continue;
196 }
197
198 putc(c, foutput);
199
200 if (c == '\\')
201 {
202 c = getc (finput);
203 if (c == EOF)
204 fatal (_("unterminated string at end of file"));
205 putc (c, foutput);
206 if (c == '\n')
207 lineno++;
208 }
209
210 c = getc(finput);
211 }
212
213 putc(c, foutput);
214}
215
216
217/* Dump the comment from FINPUT to FOUTPUT. C is either `*' or `/',
218 depending upon the type of comments used. */
219
220static inline void
221copy_comment (FILE *finput, FILE *foutput, int c)
222{
223 int cplus_comment;
224 register int match;
225 register int ended;
226
227 cplus_comment = (c == '/');
228 putc (c, foutput);
229 c = getc (finput);
230
231 ended = 0;
232 while (!ended)
233 {
234 if (!cplus_comment && c == '*')
235 {
236 while (c == '*')
237 {
238 putc(c, foutput);
239 c = getc(finput);
240 }
241
242 if (c == '/')
243 {
244 putc(c, foutput);
245 ended = 1;
246 }
247 }
248 else if (c == '\n')
249 {
250 lineno++;
251 putc (c, foutput);
252 if (cplus_comment)
253 ended = 1;
254 else
255 c = getc(finput);
256 }
257 else if (c == EOF)
258 fatal (_("unterminated comment"));
259 else
260 {
261 putc (c, foutput);
262 c = getc (finput);
263 }
264 }
265}
266
267
1ff442ca 268void
118fb205 269reader (void)
1ff442ca
NF
270{
271 start_flag = 0;
272 startval = NULL; /* start symbol not specified yet. */
273
274#if 0
275 translations = 0; /* initially assume token number translation not needed. */
276#endif
277 /* Nowadays translations is always set to 1,
278 since we give `error' a user-token-number
279 to satisfy the Posix demand for YYERRCODE==256. */
280 translations = 1;
281
282 nsyms = 1;
283 nvars = 0;
284 nrules = 0;
285 nitems = 0;
286 rline_allocated = 10;
287 rline = NEW2(rline_allocated, short);
288
289 typed = 0;
290 lastprec = 0;
291
292 gensym_count = 0;
293
294 semantic_parser = 0;
295 pure_parser = 0;
296 yylsp_needed = 0;
297
298 grammar = NULL;
299
300 init_lex();
301 lineno = 1;
302
303 /* initialize the symbol table. */
304 tabinit();
305 /* construct the error token */
306 errtoken = getsym("error");
307 errtoken->class = STOKEN;
308 errtoken->user_token_number = 256; /* Value specified by posix. */
309 /* construct a token that represents all undefined literal tokens. */
310 /* it is always token number 2. */
5b2e3c89
JT
311 undeftoken = getsym("$undefined.");
312 undeftoken->class = STOKEN;
313 undeftoken->user_token_number = 2;
1ff442ca
NF
314 /* Read the declaration section. Copy %{ ... %} groups to ftable and fdefines file.
315 Also notice any %token, %left, etc. found there. */
a083fbbf 316 if (noparserflag)
943819bf
RS
317 fprintf(ftable, "\n/* Bison-generated parse tables, made from %s\n",
318 infile);
319 else
320 fprintf(ftable, "\n/* A Bison parser, made from %s\n", infile);
6ed61226 321 fprintf(ftable, " by %s */\n\n", VERSION_STRING);
1ff442ca
NF
322 fprintf(ftable, "#define YYBISON 1 /* Identify Bison output. */\n\n");
323 read_declarations();
1ff442ca
NF
324 /* start writing the guard and action files, if they are needed. */
325 output_headers();
326 /* read in the grammar, build grammar in list form. write out guards and actions. */
327 readgram();
328 /* Now we know whether we need the line-number stack.
329 If we do, write its type into the .tab.h file. */
943819bf
RS
330 if (fdefines)
331 reader_output_yylsp(fdefines);
1ff442ca
NF
332 /* write closing delimiters for actions and guards. */
333 output_trailers();
334 if (yylsp_needed)
335 fprintf(ftable, "#define YYLSP_NEEDED\n\n");
336 /* assign the symbols their symbol numbers.
337 Write #defines for the token symbols into fdefines if requested. */
338 packsymbols();
339 /* convert the grammar into the format described in gram.h. */
340 packgram();
341 /* free the symbol table data structure
342 since symbols are now all referred to by symbol number. */
343 free_symtab();
344}
345
943819bf 346void
118fb205 347reader_output_yylsp (FILE *f)
943819bf
RS
348{
349 if (yylsp_needed)
350 fprintf(f, LTYPESTR);
351}
1ff442ca 352
41aca2e0
AD
353/* Read from finput until `%%' is seen. Discard the `%%'. Handle any
354 `%' declarations, and copy the contents of any `%{ ... %}' groups
355 to fattrs. */
1ff442ca
NF
356
357void
118fb205 358read_declarations (void)
1ff442ca
NF
359{
360 register int c;
361 register int tok;
362
363 for (;;)
364 {
365 c = skip_white_space();
366
367 if (c == '%')
368 {
369 tok = parse_percent_token();
370
371 switch (tok)
372 {
373 case TWO_PERCENTS:
374 return;
375
376 case PERCENT_LEFT_CURLY:
377 copy_definition();
378 break;
379
380 case TOKEN:
381 parse_token_decl (STOKEN, SNTERM);
382 break;
a083fbbf 383
1ff442ca
NF
384 case NTERM:
385 parse_token_decl (SNTERM, STOKEN);
386 break;
a083fbbf 387
1ff442ca
NF
388 case TYPE:
389 parse_type_decl();
390 break;
a083fbbf 391
1ff442ca
NF
392 case START:
393 parse_start_decl();
394 break;
a083fbbf 395
1ff442ca
NF
396 case UNION:
397 parse_union_decl();
398 break;
a083fbbf 399
1ff442ca
NF
400 case EXPECT:
401 parse_expect_decl();
402 break;
943819bf
RS
403 case THONG:
404 parse_thong_decl();
405 break;
1ff442ca
NF
406 case LEFT:
407 parse_assoc_decl(LEFT_ASSOC);
408 break;
409
410 case RIGHT:
411 parse_assoc_decl(RIGHT_ASSOC);
412 break;
413
414 case NONASSOC:
415 parse_assoc_decl(NON_ASSOC);
416 break;
417
418 case SEMANTIC_PARSER:
419 if (semantic_parser == 0)
420 {
421 semantic_parser = 1;
422 open_extra_files();
423 }
424 break;
425
426 case PURE_PARSER:
427 pure_parser = 1;
428 break;
429
943819bf
RS
430 case NOOP:
431 break;
432
1ff442ca 433 default:
a083fbbf 434 warns(_("unrecognized: %s"), token_buffer);
943819bf
RS
435 skip_to_char('%');
436 }
1ff442ca
NF
437 }
438 else if (c == EOF)
a083fbbf 439 fatal(_("no input grammar"));
1ff442ca 440 else
943819bf 441 {
6666f98f
AD
442 warns (_("unknown character: %s"), printable_version(c));
443 skip_to_char('%');
943819bf 444 }
1ff442ca
NF
445 }
446}
447
448
ae3c3164
AD
449/* Copy the contents of a `%{ ... %}' into the definitions file. The
450 `%{' has already been read. Return after reading the `%}'. */
1ff442ca
NF
451
452void
118fb205 453copy_definition (void)
1ff442ca
NF
454{
455 register int c;
ae3c3164
AD
456 /* -1 while reading a character if prev char was %. */
457 register int after_percent;
1ff442ca
NF
458
459 if (!nolinesflag)
460 fprintf(fattrs, "#line %d \"%s\"\n", lineno, infile);
461
462 after_percent = 0;
463
ae3c3164 464 c = getc (finput);
1ff442ca
NF
465
466 for (;;)
467 {
468 switch (c)
469 {
470 case '\n':
471 putc(c, fattrs);
472 lineno++;
473 break;
474
475 case '%':
476 after_percent = -1;
477 break;
a083fbbf 478
1ff442ca
NF
479 case '\'':
480 case '"':
ae3c3164 481 copy_string (finput, fattrs, c);
1ff442ca
NF
482 break;
483
484 case '/':
ae3c3164
AD
485 putc (c, fattrs);
486 c = getc (finput);
1ff442ca
NF
487 if (c != '*' && c != '/')
488 continue;
ae3c3164 489 copy_comment (finput, fattrs, c);
1ff442ca
NF
490 break;
491
492 case EOF:
a083fbbf 493 fatal(_("unterminated `%{' definition"));
1ff442ca
NF
494
495 default:
496 putc(c, fattrs);
497 }
498
499 c = getc(finput);
500
501 if (after_percent)
502 {
503 if (c == '}')
504 return;
505 putc('%', fattrs);
506 }
507 after_percent = 0;
508
509 }
510
511}
512
513
514
515/* parse what comes after %token or %nterm.
516For %token, what_is is STOKEN and what_is_not is SNTERM.
517For %nterm, the arguments are reversed. */
518
519void
118fb205 520parse_token_decl (int what_is, int what_is_not)
1ff442ca 521{
1ff442ca 522 register int token = 0;
1ff442ca 523 register char *typename = 0;
943819bf 524 register struct bucket *symbol = NULL; /* pts to symbol being defined */
1ff442ca
NF
525 int k;
526
1ff442ca
NF
527 for (;;)
528 {
e6011337
JT
529 int tmp_char = ungetc (skip_white_space (), finput);
530
531 if (tmp_char == '%')
1ff442ca 532 return;
e6011337
JT
533 if (tmp_char == EOF)
534 fatals ("Premature EOF after %s", token_buffer);
535
1ff442ca
NF
536 token = lex();
537 if (token == COMMA)
943819bf
RS
538 {
539 symbol = NULL;
540 continue;
541 }
1ff442ca
NF
542 if (token == TYPENAME)
543 {
544 k = strlen(token_buffer);
545 typename = NEW2(k + 1, char);
546 strcpy(typename, token_buffer);
547 value_components_used = 1;
943819bf
RS
548 symbol = NULL;
549 }
550 else if (token == IDENTIFIER && *symval->tag == '\"'
a083fbbf 551 && symbol)
943819bf
RS
552 {
553 translations = 1;
554 symval->class = STOKEN;
555 symval->type_name = typename;
556 symval->user_token_number = symbol->user_token_number;
557 symbol->user_token_number = SALIAS;
558
a083fbbf
RS
559 symval->alias = symbol;
560 symbol->alias = symval;
943819bf
RS
561 symbol = NULL;
562
563 nsyms--; /* symbol and symval combined are only one symbol */
1ff442ca
NF
564 }
565 else if (token == IDENTIFIER)
566 {
567 int oldclass = symval->class;
943819bf 568 symbol = symval;
1ff442ca 569
943819bf 570 if (symbol->class == what_is_not)
a083fbbf 571 warns(_("symbol %s redefined"), symbol->tag);
943819bf 572 symbol->class = what_is;
1ff442ca 573 if (what_is == SNTERM && oldclass != SNTERM)
943819bf 574 symbol->value = nvars++;
1ff442ca
NF
575
576 if (typename)
577 {
943819bf
RS
578 if (symbol->type_name == NULL)
579 symbol->type_name = typename;
580 else if (strcmp(typename, symbol->type_name) != 0)
a083fbbf 581 warns(_("type redeclaration for %s"), symbol->tag);
1ff442ca
NF
582 }
583 }
943819bf 584 else if (symbol && token == NUMBER)
1ff442ca 585 {
943819bf 586 symbol->user_token_number = numval;
1ff442ca
NF
587 translations = 1;
588 }
589 else
943819bf 590 {
a083fbbf
RS
591 warnss(_("`%s' is invalid in %s"),
592 token_buffer,
943819bf
RS
593 (what_is == STOKEN) ? "%token" : "%nterm");
594 skip_to_char('%');
595 }
1ff442ca
NF
596 }
597
598}
599
a083fbbf 600/* parse what comes after %thong
943819bf
RS
601 the full syntax is
602 %thong <type> token number literal
603 the <type> or number may be omitted. The number specifies the
604 user_token_number.
605
606 Two symbols are entered in the table, one for the token symbol and
607 one for the literal. Both are given the <type>, if any, from the declaration.
608 The ->user_token_number of the first is SALIAS and the ->user_token_number
609 of the second is set to the number, if any, from the declaration.
610 The two symbols are linked via pointers in their ->alias fields.
a083fbbf 611
943819bf
RS
612 during output_defines_table, the symbol is reported
613 thereafter, only the literal string is retained
614 it is the literal string that is output to yytname
615*/
616
617void
118fb205 618parse_thong_decl (void)
943819bf
RS
619{
620 register int token;
621 register struct bucket *symbol;
622 register char *typename = 0;
623 int k, usrtoknum;
624
625 translations = 1;
626 token = lex(); /* fetch typename or first token */
627 if (token == TYPENAME) {
628 k = strlen(token_buffer);
629 typename = NEW2(k + 1, char);
630 strcpy(typename, token_buffer);
631 value_components_used = 1;
632 token = lex(); /* fetch first token */
633 }
634
635 /* process first token */
636
a083fbbf 637 if (token != IDENTIFIER)
943819bf 638 {
a083fbbf 639 warns(_("unrecognized item %s, expected an identifier"),
943819bf
RS
640 token_buffer);
641 skip_to_char('%');
642 return;
643 }
644 symval->class = STOKEN;
645 symval->type_name = typename;
646 symval->user_token_number = SALIAS;
647 symbol = symval;
648
649 token = lex(); /* get number or literal string */
a083fbbf 650
943819bf
RS
651 if (token == NUMBER) {
652 usrtoknum = numval;
653 token = lex(); /* okay, did number, now get literal */
654 }
655 else usrtoknum = 0;
656
657 /* process literal string token */
658
a083fbbf 659 if (token != IDENTIFIER || *symval->tag != '\"')
943819bf 660 {
a083fbbf 661 warns(_("expected string constant instead of %s"),
943819bf
RS
662 token_buffer);
663 skip_to_char('%');
664 return;
665 }
666 symval->class = STOKEN;
667 symval->type_name = typename;
668 symval->user_token_number = usrtoknum;
669
a083fbbf
RS
670 symval->alias = symbol;
671 symbol->alias = symval;
943819bf
RS
672
673 nsyms--; /* symbol and symval combined are only one symbol */
674}
1ff442ca
NF
675
676
677/* parse what comes after %start */
678
679void
118fb205 680parse_start_decl (void)
1ff442ca
NF
681{
682 if (start_flag)
a083fbbf 683 warn(_("multiple %start declarations"));
1ff442ca 684 if (lex() != IDENTIFIER)
a083fbbf 685 warn(_("invalid %start declaration"));
943819bf
RS
686 else
687 {
688 start_flag = 1;
689 startval = symval;
690 }
1ff442ca
NF
691}
692
693
694
695/* read in a %type declaration and record its information for get_type_name to access */
696
697void
118fb205 698parse_type_decl (void)
1ff442ca
NF
699{
700 register int k;
701 register char *name;
1ff442ca
NF
702
703 if (lex() != TYPENAME)
943819bf 704 {
a083fbbf 705 warn(_("%type declaration has no <typename>"));
943819bf
RS
706 skip_to_char('%');
707 return;
708 }
1ff442ca
NF
709
710 k = strlen(token_buffer);
711 name = NEW2(k + 1, char);
712 strcpy(name, token_buffer);
713
1ff442ca
NF
714 for (;;)
715 {
716 register int t;
e6011337 717 int tmp_char = ungetc (skip_white_space (), finput);
1ff442ca 718
e6011337 719 if (tmp_char == '%')
1ff442ca 720 return;
e6011337
JT
721 if (tmp_char == EOF)
722 fatals ("Premature EOF after %s", token_buffer);
1ff442ca 723
1ff442ca
NF
724 t = lex();
725
726 switch (t)
727 {
728
729 case COMMA:
730 case SEMICOLON:
731 break;
732
733 case IDENTIFIER:
734 if (symval->type_name == NULL)
735 symval->type_name = name;
943819bf 736 else if (strcmp(name, symval->type_name) != 0)
a083fbbf 737 warns(_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
738
739 break;
740
741 default:
a083fbbf 742 warns(_("invalid %%type declaration due to item: `%s'"), token_buffer);
943819bf 743 skip_to_char('%');
1ff442ca
NF
744 }
745 }
746}
747
748
749
750/* read in a %left, %right or %nonassoc declaration and record its information. */
751/* assoc is either LEFT_ASSOC, RIGHT_ASSOC or NON_ASSOC. */
752
753void
118fb205 754parse_assoc_decl (int assoc)
1ff442ca
NF
755{
756 register int k;
757 register char *name = NULL;
943819bf 758 register int prev = 0;
1ff442ca
NF
759
760 lastprec++; /* Assign a new precedence level, never 0. */
761
1ff442ca
NF
762 for (;;)
763 {
764 register int t;
e6011337 765 int tmp_char = ungetc (skip_white_space (), finput);
1ff442ca 766
e6011337 767 if (tmp_char == '%')
1ff442ca 768 return;
e6011337
JT
769 if (tmp_char == EOF)
770 fatals ("Premature EOF after %s", token_buffer);
1ff442ca 771
1ff442ca
NF
772 t = lex();
773
774 switch (t)
775 {
776
777 case TYPENAME:
778 k = strlen(token_buffer);
779 name = NEW2(k + 1, char);
780 strcpy(name, token_buffer);
781 break;
782
783 case COMMA:
784 break;
785
786 case IDENTIFIER:
787 if (symval->prec != 0)
a083fbbf 788 warns(_("redefining precedence of %s"), symval->tag);
1ff442ca
NF
789 symval->prec = lastprec;
790 symval->assoc = assoc;
791 if (symval->class == SNTERM)
a083fbbf 792 warns(_("symbol %s redefined"), symval->tag);
1ff442ca
NF
793 symval->class = STOKEN;
794 if (name)
795 { /* record the type, if one is specified */
796 if (symval->type_name == NULL)
797 symval->type_name = name;
943819bf 798 else if (strcmp(name, symval->type_name) != 0)
a083fbbf 799 warns(_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
800 }
801 break;
802
803 case NUMBER:
804 if (prev == IDENTIFIER)
805 {
806 symval->user_token_number = numval;
807 translations = 1;
808 }
a083fbbf 809 else
943819bf 810 {
a083fbbf 811 warns(_("invalid text (%s) - number should be after identifier"),
943819bf
RS
812 token_buffer);
813 skip_to_char('%');
814 }
1ff442ca
NF
815 break;
816
817 case SEMICOLON:
818 return;
819
820 default:
a083fbbf 821 warns(_("unexpected item: %s"), token_buffer);
943819bf 822 skip_to_char('%');
1ff442ca
NF
823 }
824
825 prev = t;
826
827 }
828}
829
830
831
832/* copy the union declaration into fattrs (and fdefines),
833 where it is made into the
834 definition of YYSTYPE, the type of elements of the parser value stack. */
835
836void
118fb205 837parse_union_decl (void)
1ff442ca
NF
838{
839 register int c;
840 register int count;
841 register int in_comment;
842 int cplus_comment;
843
844 if (typed)
a083fbbf 845 warn(_("multiple %union declarations"));
1ff442ca
NF
846
847 typed = 1;
848
849 if (!nolinesflag)
850 fprintf(fattrs, "\n#line %d \"%s\"\n", lineno, infile);
851 else
852 fprintf(fattrs, "\n");
853
854 fprintf(fattrs, "typedef union");
855 if (fdefines)
856 fprintf(fdefines, "typedef union");
857
858 count = 0;
859 in_comment = 0;
860
861 c = getc(finput);
862
863 while (c != EOF)
864 {
865 putc(c, fattrs);
866 if (fdefines)
867 putc(c, fdefines);
868
869 switch (c)
870 {
871 case '\n':
872 lineno++;
873 break;
874
875 case '/':
876 c = getc(finput);
877 if (c != '*' && c != '/')
878 ungetc(c, finput);
879 else
880 {
881 putc(c, fattrs);
882 if (fdefines)
883 putc(c, fdefines);
884 cplus_comment = (c == '/');
885 in_comment = 1;
886 c = getc(finput);
887 while (in_comment)
888 {
889 putc(c, fattrs);
890 if (fdefines)
891 putc(c, fdefines);
892
893 if (c == '\n')
894 {
895 lineno++;
896 if (cplus_comment)
897 {
898 in_comment = 0;
899 break;
900 }
901 }
902 if (c == EOF)
a083fbbf 903 fatal(_("unterminated comment at end of file"));
1ff442ca
NF
904
905 if (!cplus_comment && c == '*')
906 {
907 c = getc(finput);
908 if (c == '/')
909 {
910 putc('/', fattrs);
911 if (fdefines)
912 putc('/', fdefines);
913 in_comment = 0;
914 }
915 }
916 else
917 c = getc(finput);
918 }
919 }
920 break;
921
922
923 case '{':
924 count++;
925 break;
926
927 case '}':
928 if (count == 0)
a083fbbf 929 warn (_("unmatched close-brace (`}')"));
1ff442ca 930 count--;
943819bf 931 if (count <= 0)
1ff442ca
NF
932 {
933 fprintf(fattrs, " YYSTYPE;\n");
934 if (fdefines)
935 fprintf(fdefines, " YYSTYPE;\n");
936 /* JF don't choke on trailing semi */
937 c=skip_white_space();
938 if(c!=';') ungetc(c,finput);
939 return;
940 }
941 }
942
943 c = getc(finput);
944 }
945}
946
947/* parse the declaration %expect N which says to expect N
948 shift-reduce conflicts. */
949
950void
118fb205 951parse_expect_decl (void)
1ff442ca
NF
952{
953 register int c;
954 register int count;
955 char buffer[20];
956
957 c = getc(finput);
958 while (c == ' ' || c == '\t')
959 c = getc(finput);
960
961 count = 0;
962 while (c >= '0' && c <= '9')
963 {
964 if (count < 20)
965 buffer[count++] = c;
966 c = getc(finput);
967 }
968 buffer[count] = 0;
969
970 ungetc (c, finput);
971
943819bf 972 if (count <= 0 || count > 10)
a083fbbf 973 warn(_("argument of %expect is not an integer"));
1ff442ca
NF
974 expected_conflicts = atoi (buffer);
975}
976
977/* that's all of parsing the declaration section */
978\f
979/* Get the data type (alternative in the union) of the value for symbol n in rule rule. */
980
981char *
118fb205 982get_type_name (int n, symbol_list *rule)
1ff442ca 983{
a083fbbf 984 static char *msg = N_("invalid $ value");
1ff442ca
NF
985
986 register int i;
987 register symbol_list *rp;
988
989 if (n < 0)
943819bf 990 {
a083fbbf 991 warn(_(msg));
943819bf
RS
992 return NULL;
993 }
1ff442ca
NF
994
995 rp = rule;
996 i = 0;
997
998 while (i < n)
999 {
1000 rp = rp->next;
1001 if (rp == NULL || rp->sym == NULL)
943819bf 1002 {
a083fbbf 1003 warn(_(msg));
943819bf
RS
1004 return NULL;
1005 }
1ff442ca
NF
1006 i++;
1007 }
1008
1009 return (rp->sym->type_name);
1010}
1011
1012
3cef001a 1013
41aca2e0
AD
1014/* After `%guard' is seen in the input file, copy the actual guard
1015 into the guards file. If the guard is followed by an action, copy
1016 that into the actions file. STACK_OFFSET is the number of values
1017 in the current rule so far, which says where to find `$0' with
1018 respect to the top of the stack, for the simple parser in which the
1019 stack is not popped until after the guard is run. */
1ff442ca
NF
1020
1021void
118fb205 1022copy_guard (symbol_list *rule, int stack_offset)
1ff442ca
NF
1023{
1024 register int c;
1025 register int n;
1026 register int count;
1ff442ca
NF
1027 register char *type_name;
1028 int brace_flag = 0;
1ff442ca
NF
1029
1030 /* offset is always 0 if parser has already popped the stack pointer */
1031 if (semantic_parser) stack_offset = 0;
1032
1033 fprintf(fguard, "\ncase %d:\n", nrules);
1034 if (!nolinesflag)
41aca2e0 1035 fprintf (fguard, "#line %d \"%s\"\n", lineno, infile);
1ff442ca
NF
1036 putc('{', fguard);
1037
1038 count = 0;
1039 c = getc(finput);
1040
1041 while (brace_flag ? (count > 0) : (c != ';'))
1042 {
1043 switch (c)
1044 {
1045 case '\n':
1046 putc(c, fguard);
1047 lineno++;
1048 break;
1049
1050 case '{':
1051 putc(c, fguard);
1052 brace_flag = 1;
1053 count++;
1054 break;
1055
1056 case '}':
1057 putc(c, fguard);
1058 if (count > 0)
1059 count--;
a083fbbf 1060 else
943819bf 1061 {
a083fbbf 1062 warn(_("unmatched right brace (`}')"));
943819bf
RS
1063 c = getc(finput); /* skip it */
1064 }
1ff442ca
NF
1065 break;
1066
1067 case '\'':
1068 case '"':
ca36d2ef 1069 copy_string (finput, fguard, c);
1ff442ca
NF
1070 break;
1071
1072 case '/':
3cef001a
AD
1073 putc (c, fguard);
1074 c = getc (finput);
1ff442ca
NF
1075 if (c != '*' && c != '/')
1076 continue;
3cef001a 1077 copy_comment (finput, fguard, c);
1ff442ca
NF
1078 break;
1079
1080 case '$':
1081 c = getc(finput);
1082 type_name = NULL;
1083
1084 if (c == '<')
1085 {
1086 register char *cp = token_buffer;
1087
1088 while ((c = getc(finput)) != '>' && c > 0)
118fb205
JT
1089 {
1090 if (cp == token_buffer + maxtoken)
1091 cp = grow_token_buffer(cp);
1092
1093 *cp++ = c;
1094 }
1ff442ca
NF
1095 *cp = 0;
1096 type_name = token_buffer;
1097
1098 c = getc(finput);
1099 }
1100
1101 if (c == '$')
1102 {
1103 fprintf(fguard, "yyval");
41aca2e0
AD
1104 if (!type_name)
1105 type_name = rule->sym->type_name;
1ff442ca
NF
1106 if (type_name)
1107 fprintf(fguard, ".%s", type_name);
943819bf 1108 if(!type_name && typed)
a083fbbf 1109 warns(_("$$ of `%s' has no declared type"), rule->sym->tag);
1ff442ca 1110 }
1ff442ca
NF
1111 else if (isdigit(c) || c == '-')
1112 {
1113 ungetc (c, finput);
41aca2e0
AD
1114 n = read_signed_integer (finput);
1115 c = getc (finput);
1ff442ca
NF
1116
1117 if (!type_name && n > 0)
1118 type_name = get_type_name(n, rule);
1119
1120 fprintf(fguard, "yyvsp[%d]", n - stack_offset);
1121 if (type_name)
1122 fprintf(fguard, ".%s", type_name);
6666f98f
AD
1123 if (!type_name && typed)
1124 warnss (_("$%s of `%s' has no declared type"),
1125 int_to_string(n), rule->sym->tag);
1ff442ca
NF
1126 continue;
1127 }
1128 else
aba5ca6d 1129 warns(_("$%s is invalid"), printable_version(c));
1ff442ca
NF
1130 break;
1131
1132 case '@':
6666f98f
AD
1133 c = getc (finput);
1134 if (c == '$')
1135 {
1136 fprintf (fguard, "yyloc");
1137 yylsp_needed = 1;
1138 }
1139 else if (isdigit(c) || c == '-')
1ff442ca
NF
1140 {
1141 ungetc (c, finput);
6666f98f
AD
1142 n = read_signed_integer (finput);
1143 c = getc (finput);
1144 fprintf (fguard, "yylsp[%d]", n - stack_offset);
1145 yylsp_needed = 1;
1146 continue;
1ff442ca
NF
1147 }
1148 else
943819bf 1149 {
6666f98f 1150 warns (_("@%s is invalid"), printable_version (c));
943819bf
RS
1151 n = 1;
1152 }
6666f98f 1153 break;
1ff442ca
NF
1154
1155 case EOF:
6666f98f 1156 fatal (_("unterminated %%guard clause"));
1ff442ca
NF
1157
1158 default:
6666f98f 1159 putc (c, fguard);
1ff442ca
NF
1160 }
1161
1162 if (c != '}' || count != 0)
1163 c = getc(finput);
1164 }
1165
1166 c = skip_white_space();
1167
1168 fprintf(fguard, ";\n break;}");
1169 if (c == '{')
1170 copy_action(rule, stack_offset);
1171 else if (c == '=')
1172 {
943819bf 1173 c = getc(finput); /* why not skip_white_space -wjh */
1ff442ca
NF
1174 if (c == '{')
1175 copy_action(rule, stack_offset);
1176 }
1177 else
1178 ungetc(c, finput);
1179}
1180
1181
1182
41aca2e0
AD
1183/* Assuming that a `{' has just been seen, copy everything up to the
1184 matching `}' into the actions file. STACK_OFFSET is the number of
1185 values in the current rule so far, which says where to find `$0'
1186 with respect to the top of the stack. */
1ff442ca
NF
1187
1188void
118fb205 1189copy_action (symbol_list *rule, int stack_offset)
1ff442ca
NF
1190{
1191 register int c;
1192 register int n;
1193 register int count;
1ff442ca 1194 register char *type_name;
1ff442ca
NF
1195
1196 /* offset is always 0 if parser has already popped the stack pointer */
41aca2e0
AD
1197 if (semantic_parser)
1198 stack_offset = 0;
1ff442ca 1199
41aca2e0 1200 fprintf (faction, "\ncase %d:\n", nrules);
1ff442ca 1201 if (!nolinesflag)
41aca2e0
AD
1202 fprintf (faction, "#line %d \"%s\"\n", lineno, infile);
1203 putc ('{', faction);
1ff442ca
NF
1204
1205 count = 1;
1206 c = getc(finput);
1207
1208 while (count > 0)
1209 {
1210 while (c != '}')
1211 {
1212 switch (c)
1213 {
1214 case '\n':
1215 putc(c, faction);
1216 lineno++;
1217 break;
1218
1219 case '{':
1220 putc(c, faction);
1221 count++;
1222 break;
1223
1224 case '\'':
1225 case '"':
ca36d2ef 1226 copy_string (finput, faction, c);
1ff442ca
NF
1227 break;
1228
1229 case '/':
1230 putc(c, faction);
1231 c = getc(finput);
1232 if (c != '*' && c != '/')
1233 continue;
3cef001a 1234 copy_comment (finput, faction, c);
1ff442ca
NF
1235 break;
1236
1237 case '$':
1238 c = getc(finput);
1239 type_name = NULL;
1240
1241 if (c == '<')
1242 {
1243 register char *cp = token_buffer;
1244
1245 while ((c = getc(finput)) != '>' && c > 0)
118fb205
JT
1246 {
1247 if (cp == token_buffer + maxtoken)
1248 cp = grow_token_buffer(cp);
1249
1250 *cp++ = c;
1251 }
1ff442ca
NF
1252 *cp = 0;
1253 type_name = token_buffer;
1254 value_components_used = 1;
1255
1256 c = getc(finput);
1257 }
1258 if (c == '$')
1259 {
1260 fprintf(faction, "yyval");
41aca2e0
AD
1261 if (!type_name)
1262 type_name = get_type_name(0, rule);
1ff442ca
NF
1263 if (type_name)
1264 fprintf(faction, ".%s", type_name);
a083fbbf 1265 if(!type_name && typed)
41aca2e0
AD
1266 warns(_("$$ of `%s' has no declared type"),
1267 rule->sym->tag);
1ff442ca
NF
1268 }
1269 else if (isdigit(c) || c == '-')
1270 {
1271 ungetc (c, finput);
1272 n = read_signed_integer(finput);
1273 c = getc(finput);
1274
1275 if (!type_name && n > 0)
1276 type_name = get_type_name(n, rule);
1277
1278 fprintf(faction, "yyvsp[%d]", n - stack_offset);
1279 if (type_name)
1280 fprintf(faction, ".%s", type_name);
a083fbbf
RS
1281 if(!type_name && typed)
1282 warnss(_("$%s of `%s' has no declared type"),
943819bf 1283 int_to_string(n), rule->sym->tag);
1ff442ca
NF
1284 continue;
1285 }
1286 else
aba5ca6d 1287 warns(_("$%s is invalid"), printable_version(c));
1ff442ca
NF
1288
1289 break;
1290
1291 case '@':
6666f98f
AD
1292 c = getc (finput);
1293 if (c == '$')
1294 {
1295 fprintf (faction, "yyloc");
1296 yylsp_needed = 1;
1297 }
1298 else if (isdigit(c) || c == '-')
1ff442ca
NF
1299 {
1300 ungetc (c, finput);
6666f98f
AD
1301 n = read_signed_integer (finput);
1302 c = getc (finput);
1303 fprintf (faction, "yylsp[%d]", n - stack_offset);
1304 yylsp_needed = 1;
1305 continue;
1ff442ca
NF
1306 }
1307 else
943819bf 1308 {
6666f98f 1309 warns (_("@%s is invalid"), printable_version (c));
943819bf
RS
1310 n = 1;
1311 }
6666f98f 1312 break;
1ff442ca
NF
1313
1314 case EOF:
a083fbbf 1315 fatal(_("unmatched `{'"));
1ff442ca
NF
1316
1317 default:
1318 putc(c, faction);
1319 }
1320
1321 c = getc(finput);
1322 }
1323
1324 /* above loop exits when c is '}' */
1325
1326 if (--count)
1327 {
1328 putc(c, faction);
1329 c = getc(finput);
1330 }
1331 }
1332
1333 fprintf(faction, ";\n break;}");
1334}
1335
1336
1337
1338/* generate a dummy symbol, a nonterminal,
1339whose name cannot conflict with the user's names. */
1340
1341bucket *
118fb205 1342gensym (void)
1ff442ca
NF
1343{
1344 register bucket *sym;
1345
1346 sprintf (token_buffer, "@%d", ++gensym_count);
1347 sym = getsym(token_buffer);
1348 sym->class = SNTERM;
1349 sym->value = nvars++;
1350 return (sym);
1351}
1352
1353/* Parse the input grammar into a one symbol_list structure.
1354Each rule is represented by a sequence of symbols: the left hand side
1355followed by the contents of the right hand side, followed by a null pointer
1356instead of a symbol to terminate the rule.
1357The next symbol is the lhs of the following rule.
1358
1359All guards and actions are copied out to the appropriate files,
1360labelled by the rule number they apply to. */
1361
1362void
118fb205 1363readgram (void)
1ff442ca
NF
1364{
1365 register int t;
2686a6e7 1366 register bucket *lhs = NULL;
1ff442ca
NF
1367 register symbol_list *p;
1368 register symbol_list *p1;
1369 register bucket *bp;
1370
1371 symbol_list *crule; /* points to first symbol_list of current rule. */
1372 /* its symbol is the lhs of the rule. */
1373 symbol_list *crule1; /* points to the symbol_list preceding crule. */
1374
1375 p1 = NULL;
1376
1377 t = lex();
1378
1379 while (t != TWO_PERCENTS && t != ENDFILE)
1380 {
1381 if (t == IDENTIFIER || t == BAR)
1382 {
1383 register int actionflag = 0;
1384 int rulelength = 0; /* number of symbols in rhs of this rule so far */
1385 int xactions = 0; /* JF for error checking */
1386 bucket *first_rhs = 0;
1387
1388 if (t == IDENTIFIER)
1389 {
1390 lhs = symval;
943819bf
RS
1391
1392 if (!start_flag)
1393 {
1394 startval = lhs;
1395 start_flag = 1;
1396 }
a083fbbf 1397
1ff442ca
NF
1398 t = lex();
1399 if (t != COLON)
943819bf 1400 {
a083fbbf 1401 warn(_("ill-formed rule: initial symbol not followed by colon"));
943819bf
RS
1402 unlex(t);
1403 }
1ff442ca
NF
1404 }
1405
943819bf 1406 if (nrules == 0 && t == BAR)
1ff442ca 1407 {
a083fbbf 1408 warn(_("grammar starts with vertical bar"));
943819bf 1409 lhs = symval; /* BOGUS: use a random symval */
1ff442ca 1410 }
1ff442ca
NF
1411 /* start a new rule and record its lhs. */
1412
1413 nrules++;
1414 nitems++;
1415
1416 record_rule_line ();
1417
1418 p = NEW(symbol_list);
1419 p->sym = lhs;
1420
1421 crule1 = p1;
1422 if (p1)
1423 p1->next = p;
1424 else
1425 grammar = p;
1426
1427 p1 = p;
1428 crule = p;
1429
1430 /* mark the rule's lhs as a nonterminal if not already so. */
1431
1432 if (lhs->class == SUNKNOWN)
1433 {
1434 lhs->class = SNTERM;
1435 lhs->value = nvars;
1436 nvars++;
1437 }
1438 else if (lhs->class == STOKEN)
a083fbbf 1439 warns(_("rule given for %s, which is a token"), lhs->tag);
1ff442ca
NF
1440
1441 /* read the rhs of the rule. */
1442
1443 for (;;)
1444 {
1445 t = lex();
943819bf
RS
1446 if (t == PREC)
1447 {
1448 t = lex();
1449 crule->ruleprec = symval;
1450 t = lex();
1451 }
1ff442ca
NF
1452
1453 if (! (t == IDENTIFIER || t == LEFT_CURLY)) break;
1454
1455 /* If next token is an identifier, see if a colon follows it.
1456 If one does, exit this rule now. */
1457 if (t == IDENTIFIER)
1458 {
1459 register bucket *ssave;
1460 register int t1;
1461
1462 ssave = symval;
1463 t1 = lex();
1464 unlex(t1);
1465 symval = ssave;
1466 if (t1 == COLON) break;
1467
1468 if(!first_rhs) /* JF */
1469 first_rhs = symval;
1470 /* Not followed by colon =>
1471 process as part of this rule's rhs. */
1472 }
1473
1474 /* If we just passed an action, that action was in the middle
1475 of a rule, so make a dummy rule to reduce it to a
1476 non-terminal. */
1477 if (actionflag)
1478 {
1479 register bucket *sdummy;
1480
1481 /* Since the action was written out with this rule's */
943819bf 1482 /* number, we must give the new rule this number */
1ff442ca
NF
1483 /* by inserting the new rule before it. */
1484
1485 /* Make a dummy nonterminal, a gensym. */
1486 sdummy = gensym();
1487
1488 /* Make a new rule, whose body is empty,
1489 before the current one, so that the action
1490 just read can belong to it. */
1491 nrules++;
1492 nitems++;
1493 record_rule_line ();
1494 p = NEW(symbol_list);
1495 if (crule1)
1496 crule1->next = p;
1497 else grammar = p;
1498 p->sym = sdummy;
1499 crule1 = NEW(symbol_list);
1500 p->next = crule1;
1501 crule1->next = crule;
1502
1503 /* insert the dummy generated by that rule into this rule. */
1504 nitems++;
1505 p = NEW(symbol_list);
1506 p->sym = sdummy;
1507 p1->next = p;
1508 p1 = p;
1509
1510 actionflag = 0;
1511 }
1512
1513 if (t == IDENTIFIER)
1514 {
1515 nitems++;
1516 p = NEW(symbol_list);
1517 p->sym = symval;
1518 p1->next = p;
1519 p1 = p;
1520 }
1521 else /* handle an action. */
1522 {
1523 copy_action(crule, rulelength);
1524 actionflag = 1;
1525 xactions++; /* JF */
1526 }
1527 rulelength++;
943819bf 1528 } /* end of read rhs of rule */
1ff442ca
NF
1529
1530 /* Put an empty link in the list to mark the end of this rule */
1531 p = NEW(symbol_list);
1532 p1->next = p;
1533 p1 = p;
1534
1535 if (t == PREC)
1536 {
a083fbbf 1537 warn(_("two @prec's in a row"));
1ff442ca
NF
1538 t = lex();
1539 crule->ruleprec = symval;
1540 t = lex();
1541 }
1542 if (t == GUARD)
1543 {
1544 if (! semantic_parser)
a083fbbf 1545 warn(_("%%guard present but %%semantic_parser not specified"));
1ff442ca
NF
1546
1547 copy_guard(crule, rulelength);
1548 t = lex();
1549 }
1550 else if (t == LEFT_CURLY)
1551 {
943819bf 1552 /* This case never occurs -wjh */
6666f98f
AD
1553 if (actionflag)
1554 warn(_("two actions at end of one rule"));
1ff442ca 1555 copy_action(crule, rulelength);
943819bf
RS
1556 actionflag = 1;
1557 xactions++; /* -wjh */
1ff442ca
NF
1558 t = lex();
1559 }
6666f98f
AD
1560 /* If $$ is being set in default way, warn if any type
1561 mismatch. */
1562 else if (!xactions
1563 && first_rhs
1564 && lhs->type_name != first_rhs->type_name)
1ff442ca 1565 {
6666f98f
AD
1566 if (lhs->type_name == 0
1567 || first_rhs->type_name == 0
1ff442ca 1568 || strcmp(lhs->type_name,first_rhs->type_name))
a083fbbf 1569 warnss(_("type clash (`%s' `%s') on default action"),
6666f98f
AD
1570 lhs->type_name ? lhs->type_name : "",
1571 first_rhs->type_name ? first_rhs->type_name : "");
1ff442ca
NF
1572 }
1573 /* Warn if there is no default for $$ but we need one. */
1574 else if (!xactions && !first_rhs && lhs->type_name != 0)
a083fbbf 1575 warn(_("empty rule for typed nonterminal, and no action"));
1ff442ca
NF
1576 if (t == SEMICOLON)
1577 t = lex();
a083fbbf 1578 }
943819bf
RS
1579#if 0
1580 /* these things can appear as alternatives to rules. */
1581/* NO, they cannot.
1582 a) none of the documentation allows them
1583 b) most of them scan forward until finding a next %
1584 thus they may swallow lots of intervening rules
1585*/
1ff442ca
NF
1586 else if (t == TOKEN)
1587 {
1588 parse_token_decl(STOKEN, SNTERM);
1589 t = lex();
1590 }
1591 else if (t == NTERM)
1592 {
1593 parse_token_decl(SNTERM, STOKEN);
1594 t = lex();
1595 }
1596 else if (t == TYPE)
1597 {
1598 t = get_type();
1599 }
1600 else if (t == UNION)
1601 {
1602 parse_union_decl();
1603 t = lex();
1604 }
1605 else if (t == EXPECT)
1606 {
1607 parse_expect_decl();
1608 t = lex();
1609 }
1610 else if (t == START)
1611 {
1612 parse_start_decl();
1613 t = lex();
1614 }
943819bf
RS
1615#endif
1616
1ff442ca 1617 else
943819bf 1618 {
a083fbbf 1619 warns(_("invalid input: %s"), token_buffer);
943819bf
RS
1620 t = lex();
1621 }
1ff442ca
NF
1622 }
1623
943819bf
RS
1624 /* grammar has been read. Do some checking */
1625
1ff442ca 1626 if (nsyms > MAXSHORT)
a083fbbf 1627 fatals(_("too many symbols (tokens plus nonterminals); maximum %s"),
943819bf 1628 int_to_string(MAXSHORT));
1ff442ca 1629 if (nrules == 0)
a083fbbf 1630 fatal(_("no rules in the input grammar"));
1ff442ca
NF
1631
1632 if (typed == 0 /* JF put out same default YYSTYPE as YACC does */
1633 && !value_components_used)
1634 {
1635 /* We used to use `unsigned long' as YYSTYPE on MSDOS,
1636 but it seems better to be consistent.
1637 Most programs should declare their own type anyway. */
1638 fprintf(fattrs, "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n");
1639 if (fdefines)
1640 fprintf(fdefines, "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n");
1641 }
1642
1643 /* Report any undefined symbols and consider them nonterminals. */
1644
1645 for (bp = firstsymbol; bp; bp = bp->next)
1646 if (bp->class == SUNKNOWN)
1647 {
a083fbbf 1648 warns(_("symbol %s is used, but is not defined as a token and has no rules"),
1ff442ca 1649 bp->tag);
1ff442ca
NF
1650 bp->class = SNTERM;
1651 bp->value = nvars++;
1652 }
1653
1654 ntokens = nsyms - nvars;
1655}
1656
1657
1658void
118fb205 1659record_rule_line (void)
1ff442ca
NF
1660{
1661 /* Record each rule's source line number in rline table. */
1662
1663 if (nrules >= rline_allocated)
1664 {
1665 rline_allocated = nrules * 2;
118fb205
JT
1666 rline = (short *) xrealloc ((char *) rline,
1667 rline_allocated * sizeof (short));
1ff442ca
NF
1668 }
1669 rline[nrules] = lineno;
1670}
1671
1672
2686a6e7 1673#if 0
1ff442ca 1674/* read in a %type declaration and record its information for get_type_name to access */
943819bf
RS
1675/* this is unused. it is only called from the #if 0 part of readgram */
1676static int
118fb205 1677get_type (void)
1ff442ca
NF
1678{
1679 register int k;
1680 register int t;
1681 register char *name;
1682
1683 t = lex();
1684
a083fbbf 1685 if (t != TYPENAME)
943819bf 1686 {
a083fbbf 1687 warn(_("ill-formed %type declaration"));
943819bf
RS
1688 return t;
1689 }
1ff442ca
NF
1690
1691 k = strlen(token_buffer);
1692 name = NEW2(k + 1, char);
1693 strcpy(name, token_buffer);
1694
1695 for (;;)
1696 {
1697 t = lex();
1698
1699 switch (t)
1700 {
1701 case SEMICOLON:
1702 return (lex());
1703
1704 case COMMA:
1705 break;
1706
1707 case IDENTIFIER:
1708 if (symval->type_name == NULL)
1709 symval->type_name = name;
943819bf 1710 else if (strcmp(name, symval->type_name) != 0)
a083fbbf 1711 warns(_("type redeclaration for %s"), symval->tag);
1ff442ca
NF
1712
1713 break;
1714
1715 default:
1716 return (t);
1717 }
1718 }
1719}
2686a6e7 1720#endif
1ff442ca
NF
1721
1722
0a6384c4
AD
1723/* Assign symbol numbers, and write definition of token names into
1724 fdefines. Set up vectors tags and sprec of names and precedences
1725 of symbols. */
1ff442ca
NF
1726
1727void
118fb205 1728packsymbols (void)
1ff442ca
NF
1729{
1730 register bucket *bp;
1731 register int tokno = 1;
1732 register int i;
1733 register int last_user_token_number;
1734
1735 /* int lossage = 0; JF set but not used */
1736
1737 tags = NEW2(nsyms + 1, char *);
1738 tags[0] = "$";
943819bf
RS
1739 user_toknums = NEW2(nsyms + 1, int);
1740 user_toknums[0] = 0;
1ff442ca
NF
1741
1742 sprec = NEW2(nsyms, short);
1743 sassoc = NEW2(nsyms, short);
1744
1745 max_user_token_number = 256;
1746 last_user_token_number = 256;
1747
1748 for (bp = firstsymbol; bp; bp = bp->next)
1749 {
1750 if (bp->class == SNTERM)
1751 {
1752 bp->value += ntokens;
1753 }
943819bf
RS
1754 else if (bp->alias)
1755 {
0a6384c4
AD
1756 /* this symbol and its alias are a single token defn.
1757 allocate a tokno, and assign to both check agreement of
1758 ->prec and ->assoc fields and make both the same */
1759 if (bp->value == 0)
1760 bp->value = bp->alias->value = tokno++;
943819bf 1761
0a6384c4
AD
1762 if (bp->prec != bp->alias->prec)
1763 {
1764 if (bp->prec != 0 && bp->alias->prec != 0
1765 && bp->user_token_number == SALIAS)
1766 warnss(_("conflicting precedences for %s and %s"),
1767 bp->tag, bp->alias->tag);
1768 if (bp->prec != 0)
1769 bp->alias->prec = bp->prec;
1770 else
1771 bp->prec = bp->alias->prec;
1772 }
943819bf 1773
0a6384c4
AD
1774 if (bp->assoc != bp->alias->assoc)
1775 {
1776 if (bp->assoc != 0 && bp->alias->assoc != 0
1777 && bp->user_token_number == SALIAS)
1778 warnss(_("conflicting assoc values for %s and %s"),
1779 bp->tag, bp->alias->tag);
1780 if (bp->assoc != 0)
1781 bp->alias->assoc = bp->assoc;
1782 else
1783 bp->assoc = bp->alias->assoc;
1784 }
1785
1786 if (bp->user_token_number == SALIAS)
1787 continue; /* do not do processing below for SALIASs */
943819bf
RS
1788
1789 }
1790 else /* bp->class == STOKEN */
1791 {
1792 bp->value = tokno++;
1793 }
1794
1795 if (bp->class == STOKEN)
1ff442ca
NF
1796 {
1797 if (translations && !(bp->user_token_number))
1798 bp->user_token_number = ++last_user_token_number;
1799 if (bp->user_token_number > max_user_token_number)
1800 max_user_token_number = bp->user_token_number;
1ff442ca
NF
1801 }
1802
1803 tags[bp->value] = bp->tag;
943819bf 1804 user_toknums[bp->value] = bp->user_token_number;
1ff442ca
NF
1805 sprec[bp->value] = bp->prec;
1806 sassoc[bp->value] = bp->assoc;
1807
1808 }
1809
1810 if (translations)
1811 {
1812 register int i;
1813
1814 token_translations = NEW2(max_user_token_number+1, short);
1815
0a6384c4
AD
1816 /* initialize all entries for literal tokens to 2, the internal
1817 token number for $undefined., which represents all invalid
1818 inputs. */
1ff442ca 1819 for (i = 0; i <= max_user_token_number; i++)
a083fbbf 1820 token_translations[i] = 2;
1ff442ca 1821
943819bf
RS
1822 for (bp = firstsymbol; bp; bp = bp->next)
1823 {
0a6384c4
AD
1824 if (bp->value >= ntokens)
1825 continue; /* non-terminal */
1826 if (bp->user_token_number == SALIAS)
1827 continue;
943819bf 1828 if (token_translations[bp->user_token_number] != 2)
a083fbbf 1829 warnsss(_("tokens %s and %s both assigned number %s"),
0a6384c4
AD
1830 tags[token_translations[bp->user_token_number]],
1831 bp->tag,
1832 int_to_string(bp->user_token_number));
943819bf
RS
1833 token_translations[bp->user_token_number] = bp->value;
1834 }
1ff442ca
NF
1835 }
1836
1837 error_token_number = errtoken->value;
1838
943819bf
RS
1839 if (! noparserflag)
1840 output_token_defines(ftable);
1ff442ca
NF
1841
1842 if (startval->class == SUNKNOWN)
a083fbbf 1843 fatals(_("the start symbol %s is undefined"), startval->tag);
1ff442ca 1844 else if (startval->class == STOKEN)
a083fbbf 1845 fatals(_("the start symbol %s is a token"), startval->tag);
1ff442ca
NF
1846
1847 start_symbol = startval->value;
1848
1849 if (definesflag)
1850 {
1851 output_token_defines(fdefines);
1852
1853 if (!pure_parser)
1854 {
1855 if (spec_name_prefix)
1856 fprintf(fdefines, "\nextern YYSTYPE %slval;\n", spec_name_prefix);
1857 else
1858 fprintf(fdefines, "\nextern YYSTYPE yylval;\n");
1859 }
1860
1861 if (semantic_parser)
1862 for (i = ntokens; i < nsyms; i++)
1863 {
1864 /* don't make these for dummy nonterminals made by gensym. */
1865 if (*tags[i] != '@')
1866 fprintf(fdefines, "#define\tNT%s\t%d\n", tags[i], i);
1867 }
1868#if 0
1869 /* `fdefines' is now a temporary file, so we need to copy its
1870 contents in `done', so we can't close it here. */
1871 fclose(fdefines);
1872 fdefines = NULL;
1873#endif
1874 }
1875}
a083fbbf 1876
0a6384c4
AD
1877/* For named tokens, but not literal ones, define the name. The value
1878 is the user token number. */
1879
1ff442ca 1880void
118fb205 1881output_token_defines (FILE *file)
1ff442ca
NF
1882{
1883 bucket *bp;
943819bf
RS
1884 register char *cp, *symbol;
1885 register char c;
1ff442ca
NF
1886
1887 for (bp = firstsymbol; bp; bp = bp->next)
1888 {
943819bf 1889 symbol = bp->tag; /* get symbol */
1ff442ca 1890
943819bf
RS
1891 if (bp->value >= ntokens) continue;
1892 if (bp->user_token_number == SALIAS) continue;
1893 if ('\'' == *symbol) continue; /* skip literal character */
1894 if (bp == errtoken) continue; /* skip error token */
a083fbbf 1895 if ('\"' == *symbol)
1ff442ca 1896 {
943819bf
RS
1897 /* use literal string only if given a symbol with an alias */
1898 if (bp->alias)
1899 symbol = bp->alias->tag;
1900 else
1901 continue;
1902 }
1ff442ca 1903
943819bf
RS
1904 /* Don't #define nonliteral tokens whose names contain periods. */
1905 cp = symbol;
1906 while ((c = *cp++) && c != '.');
1907 if (c != '\0') continue;
1ff442ca 1908
943819bf 1909 fprintf(file, "#define\t%s\t%d\n", symbol,
a083fbbf
RS
1910 ((translations && ! rawtoknumflag)
1911 ? bp->user_token_number
943819bf
RS
1912 : bp->value));
1913 if (semantic_parser)
1914 fprintf(file, "#define\tT%s\t%d\n", symbol, bp->value);
1ff442ca
NF
1915 }
1916
1917 putc('\n', file);
1918}
1919
1920
1921
1922/* convert the rules into the representation using rrhs, rlhs and ritems. */
1923
1924void
118fb205 1925packgram (void)
1ff442ca
NF
1926{
1927 register int itemno;
1928 register int ruleno;
1929 register symbol_list *p;
1930/* register bucket *bp; JF unused */
1931
1932 bucket *ruleprec;
1933
1934 ritem = NEW2(nitems + 1, short);
1935 rlhs = NEW2(nrules, short) - 1;
1936 rrhs = NEW2(nrules, short) - 1;
1937 rprec = NEW2(nrules, short) - 1;
1938 rprecsym = NEW2(nrules, short) - 1;
1939 rassoc = NEW2(nrules, short) - 1;
1940
1941 itemno = 0;
1942 ruleno = 1;
1943
1944 p = grammar;
1945 while (p)
1946 {
1947 rlhs[ruleno] = p->sym->value;
1948 rrhs[ruleno] = itemno;
1949 ruleprec = p->ruleprec;
1950
1951 p = p->next;
1952 while (p && p->sym)
1953 {
1954 ritem[itemno++] = p->sym->value;
1955 /* A rule gets by default the precedence and associativity
1956 of the last token in it. */
1957 if (p->sym->class == STOKEN)
1958 {
1959 rprec[ruleno] = p->sym->prec;
1960 rassoc[ruleno] = p->sym->assoc;
1961 }
1962 if (p) p = p->next;
1963 }
1964
1965 /* If this rule has a %prec,
1966 the specified symbol's precedence replaces the default. */
1967 if (ruleprec)
1968 {
1969 rprec[ruleno] = ruleprec->prec;
1970 rassoc[ruleno] = ruleprec->assoc;
1971 rprecsym[ruleno] = ruleprec->value;
1972 }
1973
1974 ritem[itemno++] = -ruleno;
1975 ruleno++;
1976
1977 if (p) p = p->next;
1978 }
1979
1980 ritem[itemno] = 0;
1981}
1982\f
1983/* Read a signed integer from STREAM and return its value. */
1984
1985int
118fb205 1986read_signed_integer (FILE *stream)
1ff442ca
NF
1987{
1988 register int c = getc(stream);
1989 register int sign = 1;
1990 register int n;
1991
1992 if (c == '-')
1993 {
1994 c = getc(stream);
1995 sign = -1;
1996 }
1997 n = 0;
1998 while (isdigit(c))
1999 {
2000 n = 10*n + (c - '0');
2001 c = getc(stream);
2002 }
2003
2004 ungetc(c, stream);
2005
2006 return n * sign;
2007}