]> git.saurik.com Git - bison.git/blame - src/reader.c
Fix a longstanding bug uncovered by bro-0.9a9/src/parse.y, which I
[bison.git] / src / reader.c
CommitLineData
35dcf428 1/* Input parser for Bison
9c4637fa 2
05ac60f3 3 Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000, 2001, 2002, 2003,
378f4bd8 4 2005, 2006 Free Software Foundation, Inc.
1ff442ca 5
41aca2e0 6 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 7
41aca2e0
AD
8 Bison is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
1ff442ca 12
41aca2e0
AD
13 Bison is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
1ff442ca 17
41aca2e0
AD
18 You should have received a copy of the GNU General Public License
19 along with Bison; see the file COPYING. If not, write to
0fb669f9
PE
20 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 Boston, MA 02110-1301, USA. */
1ff442ca 22
2cec9080 23#include <config.h>
1ff442ca 24#include "system.h"
17ee7397
PE
25
26#include <quotearg.h>
27
28#include "complain.h"
29#include "conflicts.h"
1ff442ca 30#include "files.h"
17ee7397 31#include "getargs.h"
1ff442ca 32#include "gram.h"
17ee7397 33#include "muscle_tab.h"
6c89f1c1 34#include "output.h"
b2ca4022 35#include "reader.h"
17ee7397
PE
36#include "symlist.h"
37#include "symtab.h"
1ff442ca 38
02d12d0d
PE
39static void check_and_convert_grammar (void);
40
17ee7397 41static symbol_list *grammar = NULL;
d0829076 42static bool start_flag = false;
676385e2 43merger_list *merge_functions;
1ff442ca 44
affac613 45/* Was %union seen? */
d0829076 46bool typed = false;
39a06c25
PE
47
48/* Should rules have a default precedence? */
49bool default_prec = true;
0d533154 50\f
e9955c83
AD
51/*-----------------------.
52| Set the start symbol. |
53`-----------------------*/
1ff442ca 54
e9955c83 55void
a737b216 56grammar_start_symbol_set (symbol *sym, location loc)
1ff442ca
NF
57{
58 if (start_flag)
17ee7397 59 complain_at (loc, _("multiple %s declarations"), "%start");
943819bf
RS
60 else
61 {
d0829076 62 start_flag = true;
a737b216 63 startsymbol = sym;
17ee7397 64 startsymbol_location = loc;
943819bf 65 }
1ff442ca
NF
66}
67
1ff442ca 68
d7020c20 69/*----------------------------------------------------------------.
e9955c83
AD
70| There are two prologues: one before %union, one after. Augment |
71| the current one. |
d7020c20 72`----------------------------------------------------------------*/
1ff442ca 73
e9955c83 74void
17ee7397 75prologue_augment (const char *prologue, location loc)
b6610515 76{
e9955c83
AD
77 struct obstack *oout =
78 !typed ? &pre_prologue_obstack : &post_prologue_obstack;
b6610515 79
05ac60f3 80 obstack_fgrow1 (oout, "]b4_syncline(%d, [[", loc.start.line);
17ee7397
PE
81 MUSCLE_OBSTACK_SGROW (oout,
82 quotearg_style (c_quoting_style, loc.start.file));
6c239755 83 obstack_sgrow (oout, "]])[\n");
e9955c83 84 obstack_sgrow (oout, prologue);
b6610515
RA
85}
86
a70083a3
AD
87\f
88
3e6656f9 89/*-------------------------------------------------------------------.
676385e2
PH
90| Return the merger index for a merging function named NAME, whose |
91| arguments have type TYPE. Records the function, if new, in |
95612cfa 92| MERGER_LIST. |
676385e2
PH
93`-------------------------------------------------------------------*/
94
95static int
17ee7397 96get_merge_function (uniqstr name, uniqstr type, location loc)
676385e2
PH
97{
98 merger_list *syms;
99 merger_list head;
100 int n;
101
102 if (! glr_parser)
103 return 0;
104
105 if (type == NULL)
17ee7397 106 type = uniqstr_new ("");
676385e2
PH
107
108 head.next = merge_functions;
affac613 109 for (syms = &head, n = 1; syms->next; syms = syms->next, n += 1)
17ee7397 110 if (UNIQSTR_EQ (name, syms->next->name))
676385e2 111 break;
a5d50994
AD
112 if (syms->next == NULL)
113 {
da2a7671 114 syms->next = xmalloc (sizeof syms->next[0]);
17ee7397
PE
115 syms->next->name = uniqstr_new (name);
116 syms->next->type = uniqstr_new (type);
a5d50994
AD
117 syms->next->next = NULL;
118 merge_functions = head.next;
119 }
17ee7397 120 else if (!UNIQSTR_EQ (type, syms->next->type))
45a8a65d
PE
121 warn_at (loc, _("result type clash on merge function %s: <%s> != <%s>"),
122 name, type, syms->next->type);
676385e2
PH
123 return n;
124}
125
126/*--------------------------------------.
127| Free all merge-function definitions. |
128`--------------------------------------*/
129
130void
131free_merger_functions (void)
132{
affac613
AD
133 merger_list *L0 = merge_functions;
134 while (L0)
676385e2
PH
135 {
136 merger_list *L1 = L0->next;
137 free (L0);
138 L0 = L1;
139 }
140}
141
a70083a3 142\f
107f7dfb 143/*-------------------------------------------------------------------.
17ee7397 144| Parse the input grammar into a one symbol_list structure. Each |
107f7dfb
AD
145| rule is represented by a sequence of symbols: the left hand side |
146| followed by the contents of the right hand side, followed by a |
147| null pointer instead of a symbol to terminate the rule. The next |
148| symbol is the lhs of the following rule. |
149| |
fdbcd8e2
AD
150| All actions are copied out, labelled by the rule number they apply |
151| to. |
107f7dfb 152`-------------------------------------------------------------------*/
1ff442ca 153
f6d0f937 154/* The (currently) last symbol of GRAMMAR. */
04098407 155static symbol_list *grammar_end = NULL;
f6d0f937 156
52328c6e 157/* Append SYM to the grammar. */
8f3596a6 158static void
17ee7397 159grammar_symbol_append (symbol *sym, location loc)
f6d0f937 160{
17ee7397 161 symbol_list *p = symbol_list_new (sym, loc);
f6d0f937
AD
162
163 if (grammar_end)
164 grammar_end->next = p;
165 else
166 grammar = p;
167
168 grammar_end = p;
8f3596a6 169
e3233bf6 170 /* A null SYM stands for an end of rule; it is not an actual
8f3596a6
AD
171 part of it. */
172 if (sym)
173 ++nritems;
f6d0f937
AD
174}
175
8efe435c
AD
176/* The rule currently being defined, and the previous rule.
177 CURRENT_RULE points to the first LHS of the current rule, while
178 PREVIOUS_RULE_END points to the *end* of the previous rule (NULL). */
17ee7397 179symbol_list *current_rule = NULL;
04098407 180static symbol_list *previous_rule_end = NULL;
da4160c3
AD
181
182
8efe435c
AD
183/*----------------------------------------------.
184| Create a new rule for LHS in to the GRAMMAR. |
185`----------------------------------------------*/
da4160c3 186
e9955c83 187void
8f3596a6 188grammar_current_rule_begin (symbol *lhs, location loc)
da4160c3
AD
189{
190 if (!start_flag)
191 {
192 startsymbol = lhs;
17ee7397 193 startsymbol_location = loc;
d0829076 194 start_flag = true;
da4160c3
AD
195 }
196
197 /* Start a new rule and record its lhs. */
198 ++nrules;
8efe435c 199 previous_rule_end = grammar_end;
17ee7397 200 grammar_symbol_append (lhs, loc);
da4160c3
AD
201 current_rule = grammar_end;
202
203 /* Mark the rule's lhs as a nonterminal if not already so. */
da4160c3
AD
204 if (lhs->class == unknown_sym)
205 {
206 lhs->class = nterm_sym;
207 lhs->number = nvars;
208 ++nvars;
209 }
210 else if (lhs->class == token_sym)
17ee7397 211 complain_at (loc, _("rule given for %s, which is a token"), lhs->tag);
da4160c3
AD
212}
213
affac613 214
d40ba6c2
PE
215/*----------------------------------------------------------------------.
216| A symbol should be used if it has a destructor, or if it is a |
217| mid-rule symbol (i.e., the generated LHS replacing a mid-rule |
218| action) that was assigned to, as in "exp: { $$ = 1; } { $$ = $1; }". |
219`----------------------------------------------------------------------*/
84866159
AD
220
221static bool
d40ba6c2 222symbol_should_be_used (symbol_list const *s)
84866159 223{
d40ba6c2 224 return (s->sym->destructor
c66dfadd 225 || (s->midrule && s->midrule->used));
84866159
AD
226}
227
8f3596a6
AD
228/*----------------------------------------------------------------.
229| Check that the rule R is properly defined. For instance, there |
230| should be no type clash on the default action. |
231`----------------------------------------------------------------*/
e9955c83
AD
232
233static void
8f3596a6 234grammar_rule_check (const symbol_list *r)
e9955c83 235{
affac613 236 /* Type check.
e9955c83 237
affac613
AD
238 If there is an action, then there is nothing we can do: the user
239 is allowed to shoot herself in the foot.
3f4c0f80 240
affac613
AD
241 Don't worry about the default action if $$ is untyped, since $$'s
242 value can't be used. */
8f3596a6 243 if (!r->action && r->sym->type_name)
e9955c83 244 {
8f3596a6 245 symbol *first_rhs = r->next->sym;
affac613
AD
246 /* If $$ is being set in default way, report if any type mismatch. */
247 if (first_rhs)
248 {
8f3596a6 249 char const *lhs_type = r->sym->type_name;
affac613
AD
250 const char *rhs_type =
251 first_rhs->type_name ? first_rhs->type_name : "";
252 if (!UNIQSTR_EQ (lhs_type, rhs_type))
8f3596a6 253 warn_at (r->location,
affac613
AD
254 _("type clash on default action: <%s> != <%s>"),
255 lhs_type, rhs_type);
256 }
257 /* Warn if there is no default for $$ but we need one. */
258 else
8f3596a6 259 warn_at (r->location,
affac613
AD
260 _("empty rule for typed nonterminal, and no action"));
261 }
e3233bf6 262
d40ba6c2 263 /* Check that symbol values that should be used are in fact used. */
8f3596a6 264 {
668c5d19 265 symbol_list const *l = r;
8f3596a6
AD
266 int n = 0;
267 for (; l && l->sym; l = l->next, ++n)
268 if (! (l->used
d40ba6c2 269 || !symbol_should_be_used (l)
8f3596a6 270 /* The default action, $$ = $1, `uses' both. */
668c5d19
PE
271 || (!r->action && (n == 0 || n == 1))))
272 {
273 if (n)
274 warn_at (r->location, _("unused value: $%d"), n);
275 else
276 warn_at (r->location, _("unset value: $$"));
277 }
8f3596a6 278 }
e9955c83
AD
279}
280
281
8efe435c
AD
282/*-------------------------------------.
283| End the currently being grown rule. |
284`-------------------------------------*/
e9955c83
AD
285
286void
8f3596a6 287grammar_current_rule_end (location loc)
e9955c83
AD
288{
289 /* Put an empty link in the list to mark the end of this rule */
8efe435c 290 grammar_symbol_append (NULL, grammar_end->location);
17ee7397 291 current_rule->location = loc;
8f3596a6 292 grammar_rule_check (current_rule);
e9955c83
AD
293}
294
295
8efe435c
AD
296/*-------------------------------------------------------------------.
297| The previous action turns out the be a mid-rule action. Attach it |
298| to the current rule, i.e., create a dummy symbol, attach it this |
299| mid-rule action, and append this dummy nonterminal to the current |
300| rule. |
301`-------------------------------------------------------------------*/
1485e106 302
6b702268 303void
1485e106
AD
304grammar_midrule_action (void)
305{
306 /* Since the action was written out with this rule's number, we must
307 give the new rule this number by inserting the new rule before
308 it. */
309
8efe435c
AD
310 /* Make a DUMMY nonterminal, whose location is that of the midrule
311 action. Create the MIDRULE. */
17ee7397
PE
312 location dummy_location = current_rule->action_location;
313 symbol *dummy = dummy_symbol_get (dummy_location);
314 symbol_list *midrule = symbol_list_new (dummy, dummy_location);
1485e106
AD
315
316 /* Make a new rule, whose body is empty, before the current one, so
317 that the action just read can belong to it. */
318 ++nrules;
319 ++nritems;
8efe435c
AD
320 /* Attach its location and actions to that of the DUMMY. */
321 midrule->location = dummy_location;
322 midrule->action = current_rule->action;
323 midrule->action_location = dummy_location;
1485e106 324 current_rule->action = NULL;
378f4bd8
AD
325 /* If $$ was used in the action, the LHS of the enclosing rule was
326 incorrectly flagged as used. */
327 midrule->used = current_rule->used;
328 current_rule->used = false;
1485e106 329
8efe435c
AD
330 if (previous_rule_end)
331 previous_rule_end->next = midrule;
1485e106 332 else
8efe435c 333 grammar = midrule;
1485e106 334
8efe435c 335 /* End the dummy's rule. */
84866159
AD
336 midrule->next = symbol_list_new (NULL, dummy_location);
337 grammar_rule_check (midrule);
338 midrule->next->next = current_rule;
1485e106 339
84866159 340 previous_rule_end = midrule->next;
1485e106 341
8efe435c 342 /* Insert the dummy nonterminal replacing the midrule action into
84866159 343 the current rule. Bind it to its dedicated rule. */
8efe435c 344 grammar_current_rule_symbol_append (dummy, dummy_location);
6ec2c0f2 345 grammar_end->midrule = midrule;
1485e106
AD
346}
347
9af3fbce
AD
348/* Set the precedence symbol of the current rule to PRECSYM. */
349
e9955c83 350void
17ee7397 351grammar_current_rule_prec_set (symbol *precsym, location loc)
9af3fbce
AD
352{
353 if (current_rule->ruleprec)
17ee7397 354 complain_at (loc, _("only one %s allowed per rule"), "%prec");
9af3fbce
AD
355 current_rule->ruleprec = precsym;
356}
357
676385e2
PH
358/* Attach dynamic precedence DPREC to the current rule. */
359
360void
17ee7397 361grammar_current_rule_dprec_set (int dprec, location loc)
676385e2
PH
362{
363 if (! glr_parser)
17ee7397 364 warn_at (loc, _("%s affects only GLR parsers"), "%dprec");
676385e2 365 if (dprec <= 0)
17ee7397 366 complain_at (loc, _("%s must be followed by positive number"), "%dprec");
39f41916 367 else if (current_rule->dprec != 0)
17ee7397 368 complain_at (loc, _("only one %s allowed per rule"), "%dprec");
676385e2
PH
369 current_rule->dprec = dprec;
370}
371
372/* Attach a merge function NAME with argument type TYPE to current
373 rule. */
374
375void
17ee7397 376grammar_current_rule_merge_set (uniqstr name, location loc)
676385e2
PH
377{
378 if (! glr_parser)
17ee7397 379 warn_at (loc, _("%s affects only GLR parsers"), "%merge");
39f41916 380 if (current_rule->merger != 0)
17ee7397 381 complain_at (loc, _("only one %s allowed per rule"), "%merge");
39f41916 382 current_rule->merger =
17ee7397 383 get_merge_function (name, current_rule->sym->type_name, loc);
676385e2
PH
384}
385
17ee7397 386/* Attach SYM to the current rule. If needed, move the previous
2e047461
AD
387 action as a mid-rule action. */
388
e9955c83 389void
17ee7397 390grammar_current_rule_symbol_append (symbol *sym, location loc)
2e047461
AD
391{
392 if (current_rule->action)
393 grammar_midrule_action ();
17ee7397 394 grammar_symbol_append (sym, loc);
2e047461
AD
395}
396
6b702268 397/* Attach an ACTION to the current rule. */
2e047461 398
e9955c83 399void
17ee7397 400grammar_current_rule_action_append (const char *action, location loc)
2e047461 401{
6b702268
PE
402 /* There's no need to invoke grammar_midrule_action here, since the
403 scanner already did it if necessary. */
2e047461 404 current_rule->action = action;
17ee7397 405 current_rule->action_location = loc;
2e047461
AD
406}
407
a70083a3 408\f
a70083a3
AD
409/*---------------------------------------------------------------.
410| Convert the rules into the representation using RRHS, RLHS and |
d9b739c3 411| RITEM. |
a70083a3 412`---------------------------------------------------------------*/
1ff442ca 413
4a120d45 414static void
118fb205 415packgram (void)
1ff442ca 416{
9222837b 417 unsigned int itemno = 0;
17ee7397
PE
418 rule_number ruleno = 0;
419 symbol_list *p = grammar;
1ff442ca 420
e9ad4aec
PE
421 ritem = xnmalloc (nritems + 1, sizeof *ritem);
422
423 /* This sentinel is used by build_relations in gram.c. */
424 *ritem++ = 0;
425
da2a7671 426 rules = xnmalloc (nrules, sizeof *rules);
1ff442ca 427
1ff442ca
NF
428 while (p)
429 {
17ee7397 430 symbol *ruleprec = p->ruleprec;
d7e1f00c 431 rules[ruleno].user_number = ruleno;
c3b407f4 432 rules[ruleno].number = ruleno;
bba97eb2 433 rules[ruleno].lhs = p->sym;
99013900 434 rules[ruleno].rhs = ritem + itemno;
da2a7671
PE
435 rules[ruleno].prec = NULL;
436 rules[ruleno].dprec = p->dprec;
437 rules[ruleno].merger = p->merger;
438 rules[ruleno].precsym = NULL;
8efe435c 439 rules[ruleno].location = p->location;
b4afb6bb 440 rules[ruleno].useful = true;
1a2b5d37 441 rules[ruleno].action = p->action;
8efe435c 442 rules[ruleno].action_location = p->action_location;
1ff442ca
NF
443
444 p = p->next;
445 while (p && p->sym)
446 {
17ee7397 447 /* item_number = symbol_number.
5fbb0954 448 But the former needs to contain more: negative rule numbers. */
a49aecd5 449 ritem[itemno++] = symbol_number_as_item_number (p->sym->number);
1ff442ca
NF
450 /* A rule gets by default the precedence and associativity
451 of the last token in it. */
39a06c25 452 if (p->sym->class == token_sym && default_prec)
03b31c0c 453 rules[ruleno].prec = p->sym;
a70083a3
AD
454 if (p)
455 p = p->next;
1ff442ca
NF
456 }
457
458 /* If this rule has a %prec,
a70083a3 459 the specified symbol's precedence replaces the default. */
1ff442ca
NF
460 if (ruleprec)
461 {
03b31c0c
AD
462 rules[ruleno].precsym = ruleprec;
463 rules[ruleno].prec = ruleprec;
1ff442ca 464 }
4b3d3a8e 465 ritem[itemno++] = rule_number_as_item_number (ruleno);
f3849179 466 ++ruleno;
1ff442ca 467
a70083a3
AD
468 if (p)
469 p = p->next;
1ff442ca
NF
470 }
471
68cae94e 472 assert (itemno == nritems);
3067fbef 473
273a74fa 474 if (trace_flag & trace_sets)
3067fbef 475 ritem_print (stderr);
1ff442ca 476}
a70083a3 477\f
fdbcd8e2
AD
478/*------------------------------------------------------------------.
479| Read in the grammar specification and record it in the format |
480| described in gram.h. All actions are copied into ACTION_OBSTACK, |
481| in each case forming the body of a C function (YYACTION) which |
482| contains a switch statement to decide which action to execute. |
483`------------------------------------------------------------------*/
a70083a3
AD
484
485void
486reader (void)
487{
a70083a3 488 /* Initialize the symbol table. */
db8837cb 489 symbols_new ();
b6610515 490
88bce5a2
AD
491 /* Construct the accept symbol. */
492 accept = symbol_get ("$accept", empty_location);
493 accept->class = nterm_sym;
494 accept->number = nvars++;
30171f79 495
a70083a3 496 /* Construct the error token */
39f41916 497 errtoken = symbol_get ("error", empty_location);
d7020c20 498 errtoken->class = token_sym;
72a23c97 499 errtoken->number = ntokens++;
b6610515 500
a70083a3
AD
501 /* Construct a token that represents all undefined literal tokens.
502 It is always token number 2. */
88bce5a2 503 undeftoken = symbol_get ("$undefined", empty_location);
d7020c20 504 undeftoken->class = token_sym;
72a23c97 505 undeftoken->number = ntokens++;
a70083a3 506
331dbc1b 507 /* Initialize the obstacks. */
0dd1580a
RA
508 obstack_init (&pre_prologue_obstack);
509 obstack_init (&post_prologue_obstack);
331dbc1b 510
2b81e969 511 gram_in = xfopen (grammar_file, "r");
e9955c83 512
473d0a75
AD
513 gram__flex_debug = trace_flag & trace_scan;
514 gram_debug = trace_flag & trace_parse;
1d6412ad 515 scanner_initialize ();
78c3da9e 516 gram_parse ();
331dbc1b 517
02d12d0d
PE
518 if (! complaint_issued)
519 check_and_convert_grammar ();
520
521 xfclose (gram_in);
522}
523
b275314e 524
02d12d0d
PE
525/*-------------------------------------------------------------.
526| Check the grammar that has just been read, and convert it to |
527| internal form. |
528`-------------------------------------------------------------*/
529
530static void
531check_and_convert_grammar (void)
532{
533 /* Grammar has been read. Do some checking. */
e9955c83
AD
534 if (nrules == 0)
535 fatal (_("no rules in the input grammar"));
536
537 /* Report any undefined symbols and consider them nonterminals. */
538 symbols_check_defined ();
b7c49edf 539
88bce5a2
AD
540 /* If the user did not define her ENDTOKEN, do it now. */
541 if (!endtoken)
b7c49edf 542 {
88bce5a2
AD
543 endtoken = symbol_get ("$end", empty_location);
544 endtoken->class = token_sym;
545 endtoken->number = 0;
b7c49edf 546 /* Value specified by POSIX. */
88bce5a2 547 endtoken->user_token_number = 0;
b7c49edf
AD
548 }
549
02d12d0d 550 /* Insert the initial rule, whose line is that of the first rule
e9955c83
AD
551 (not that of the start symbol):
552
88bce5a2 553 accept: %start EOF. */
e9955c83 554 {
17ee7397 555 symbol_list *p = symbol_list_new (accept, empty_location);
8efe435c
AD
556 p->location = grammar->location;
557 p->next = symbol_list_new (startsymbol, empty_location);
88bce5a2 558 p->next->next = symbol_list_new (endtoken, empty_location);
8efe435c 559 p->next->next->next = symbol_list_new (NULL, empty_location);
e9955c83
AD
560 p->next->next->next->next = grammar;
561 nrules += 1;
562 nritems += 3;
563 grammar = p;
564 }
565
68cae94e 566 assert (nsyms <= SYMBOL_NUMBER_MAXIMUM && nsyms == ntokens + nvars);
b0c4483e 567
a70083a3
AD
568 /* Assign the symbols their symbol numbers. Write #defines for the
569 token symbols into FDEFINES if requested. */
2f1afb73 570 symbols_pack ();
93ede233 571
a70083a3
AD
572 /* Convert the grammar into the format described in gram.h. */
573 packgram ();
8419d367 574
17ee7397
PE
575 /* The grammar as a symbol_list is no longer needed. */
576 LIST_FREE (symbol_list, grammar);
a70083a3 577}