]> git.saurik.com Git - bison.git/blame - src/reader.c
Fix bug that mistakes braced code in a declaration in the rules section
[bison.git] / src / reader.c
CommitLineData
35dcf428 1/* Input parser for Bison
9c4637fa 2
05ac60f3 3 Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000, 2001, 2002, 2003,
378f4bd8 4 2005, 2006 Free Software Foundation, Inc.
1ff442ca 5
41aca2e0 6 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 7
41aca2e0
AD
8 Bison is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
1ff442ca 12
41aca2e0
AD
13 Bison is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
1ff442ca 17
41aca2e0
AD
18 You should have received a copy of the GNU General Public License
19 along with Bison; see the file COPYING. If not, write to
0fb669f9
PE
20 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 Boston, MA 02110-1301, USA. */
1ff442ca 22
2cec9080 23#include <config.h>
1ff442ca 24#include "system.h"
e9071366 25#include <assert.h>
17ee7397
PE
26
27#include <quotearg.h>
28
29#include "complain.h"
30#include "conflicts.h"
1ff442ca 31#include "files.h"
17ee7397 32#include "getargs.h"
1ff442ca 33#include "gram.h"
17ee7397 34#include "muscle_tab.h"
b2ca4022 35#include "reader.h"
17ee7397
PE
36#include "symlist.h"
37#include "symtab.h"
e9071366
AD
38#include "scan-gram.h"
39#include "scan-code.h"
1ff442ca 40
02d12d0d
PE
41static void check_and_convert_grammar (void);
42
17ee7397 43static symbol_list *grammar = NULL;
d0829076 44static bool start_flag = false;
676385e2 45merger_list *merge_functions;
1ff442ca 46
34f98f46 47/* Was %union seen? */
d0829076 48bool typed = false;
39a06c25
PE
49
50/* Should rules have a default precedence? */
51bool default_prec = true;
0d533154 52\f
e9955c83
AD
53/*-----------------------.
54| Set the start symbol. |
55`-----------------------*/
1ff442ca 56
e9955c83 57void
a737b216 58grammar_start_symbol_set (symbol *sym, location loc)
1ff442ca
NF
59{
60 if (start_flag)
17ee7397 61 complain_at (loc, _("multiple %s declarations"), "%start");
943819bf
RS
62 else
63 {
d0829076 64 start_flag = true;
a737b216 65 startsymbol = sym;
17ee7397 66 startsymbol_location = loc;
943819bf 67 }
1ff442ca
NF
68}
69
1ff442ca 70
34f98f46
JD
71/*---------------------------------------------------------------------.
72| There are two prologues: one before the first %union and one after. |
73| Augment the one specified by POST. |
74`---------------------------------------------------------------------*/
1ff442ca 75
e9955c83 76void
34f98f46 77prologue_augment (const char *prologue, location loc, bool post)
b6610515 78{
e9955c83 79 struct obstack *oout =
34f98f46 80 !post ? &pre_prologue_obstack : &post_prologue_obstack;
b6610515 81
05ac60f3 82 obstack_fgrow1 (oout, "]b4_syncline(%d, [[", loc.start.line);
e9071366
AD
83 /* FIXME: Protection of M4 characters missing here. See
84 output.c:escaped_output. */
17ee7397
PE
85 MUSCLE_OBSTACK_SGROW (oout,
86 quotearg_style (c_quoting_style, loc.start.file));
6c239755 87 obstack_sgrow (oout, "]])[\n");
e9955c83 88 obstack_sgrow (oout, prologue);
b6610515
RA
89}
90
a70083a3
AD
91\f
92
3e6656f9 93/*-------------------------------------------------------------------.
676385e2
PH
94| Return the merger index for a merging function named NAME, whose |
95| arguments have type TYPE. Records the function, if new, in |
95612cfa 96| MERGER_LIST. |
676385e2
PH
97`-------------------------------------------------------------------*/
98
99static int
17ee7397 100get_merge_function (uniqstr name, uniqstr type, location loc)
676385e2
PH
101{
102 merger_list *syms;
103 merger_list head;
104 int n;
105
106 if (! glr_parser)
107 return 0;
108
109 if (type == NULL)
17ee7397 110 type = uniqstr_new ("");
676385e2
PH
111
112 head.next = merge_functions;
affac613 113 for (syms = &head, n = 1; syms->next; syms = syms->next, n += 1)
17ee7397 114 if (UNIQSTR_EQ (name, syms->next->name))
676385e2 115 break;
a5d50994
AD
116 if (syms->next == NULL)
117 {
da2a7671 118 syms->next = xmalloc (sizeof syms->next[0]);
17ee7397
PE
119 syms->next->name = uniqstr_new (name);
120 syms->next->type = uniqstr_new (type);
a5d50994
AD
121 syms->next->next = NULL;
122 merge_functions = head.next;
123 }
17ee7397 124 else if (!UNIQSTR_EQ (type, syms->next->type))
45a8a65d
PE
125 warn_at (loc, _("result type clash on merge function %s: <%s> != <%s>"),
126 name, type, syms->next->type);
676385e2
PH
127 return n;
128}
129
130/*--------------------------------------.
131| Free all merge-function definitions. |
132`--------------------------------------*/
133
134void
135free_merger_functions (void)
136{
affac613
AD
137 merger_list *L0 = merge_functions;
138 while (L0)
676385e2
PH
139 {
140 merger_list *L1 = L0->next;
141 free (L0);
142 L0 = L1;
143 }
144}
145
a70083a3 146\f
107f7dfb 147/*-------------------------------------------------------------------.
17ee7397 148| Parse the input grammar into a one symbol_list structure. Each |
107f7dfb
AD
149| rule is represented by a sequence of symbols: the left hand side |
150| followed by the contents of the right hand side, followed by a |
151| null pointer instead of a symbol to terminate the rule. The next |
152| symbol is the lhs of the following rule. |
153| |
fdbcd8e2
AD
154| All actions are copied out, labelled by the rule number they apply |
155| to. |
107f7dfb 156`-------------------------------------------------------------------*/
1ff442ca 157
f6d0f937 158/* The (currently) last symbol of GRAMMAR. */
04098407 159static symbol_list *grammar_end = NULL;
f6d0f937 160
52328c6e 161/* Append SYM to the grammar. */
8f3596a6 162static void
17ee7397 163grammar_symbol_append (symbol *sym, location loc)
f6d0f937 164{
17ee7397 165 symbol_list *p = symbol_list_new (sym, loc);
f6d0f937
AD
166
167 if (grammar_end)
168 grammar_end->next = p;
169 else
170 grammar = p;
171
172 grammar_end = p;
8f3596a6 173
e3233bf6 174 /* A null SYM stands for an end of rule; it is not an actual
8f3596a6
AD
175 part of it. */
176 if (sym)
177 ++nritems;
f6d0f937
AD
178}
179
8efe435c
AD
180/* The rule currently being defined, and the previous rule.
181 CURRENT_RULE points to the first LHS of the current rule, while
182 PREVIOUS_RULE_END points to the *end* of the previous rule (NULL). */
17ee7397 183symbol_list *current_rule = NULL;
04098407 184static symbol_list *previous_rule_end = NULL;
da4160c3
AD
185
186
8efe435c
AD
187/*----------------------------------------------.
188| Create a new rule for LHS in to the GRAMMAR. |
189`----------------------------------------------*/
da4160c3 190
e9955c83 191void
8f3596a6 192grammar_current_rule_begin (symbol *lhs, location loc)
da4160c3
AD
193{
194 if (!start_flag)
195 {
196 startsymbol = lhs;
17ee7397 197 startsymbol_location = loc;
d0829076 198 start_flag = true;
da4160c3
AD
199 }
200
201 /* Start a new rule and record its lhs. */
202 ++nrules;
8efe435c 203 previous_rule_end = grammar_end;
17ee7397 204 grammar_symbol_append (lhs, loc);
da4160c3
AD
205 current_rule = grammar_end;
206
207 /* Mark the rule's lhs as a nonterminal if not already so. */
da4160c3
AD
208 if (lhs->class == unknown_sym)
209 {
210 lhs->class = nterm_sym;
211 lhs->number = nvars;
212 ++nvars;
213 }
214 else if (lhs->class == token_sym)
17ee7397 215 complain_at (loc, _("rule given for %s, which is a token"), lhs->tag);
da4160c3
AD
216}
217
affac613 218
d40ba6c2
PE
219/*----------------------------------------------------------------------.
220| A symbol should be used if it has a destructor, or if it is a |
221| mid-rule symbol (i.e., the generated LHS replacing a mid-rule |
222| action) that was assigned to, as in "exp: { $$ = 1; } { $$ = $1; }". |
223`----------------------------------------------------------------------*/
84866159
AD
224
225static bool
d40ba6c2 226symbol_should_be_used (symbol_list const *s)
84866159 227{
d40ba6c2 228 return (s->sym->destructor
c66dfadd 229 || (s->midrule && s->midrule->used));
84866159
AD
230}
231
8f3596a6
AD
232/*----------------------------------------------------------------.
233| Check that the rule R is properly defined. For instance, there |
234| should be no type clash on the default action. |
235`----------------------------------------------------------------*/
e9955c83
AD
236
237static void
8f3596a6 238grammar_rule_check (const symbol_list *r)
e9955c83 239{
affac613 240 /* Type check.
e9955c83 241
affac613
AD
242 If there is an action, then there is nothing we can do: the user
243 is allowed to shoot herself in the foot.
3f4c0f80 244
affac613
AD
245 Don't worry about the default action if $$ is untyped, since $$'s
246 value can't be used. */
8f3596a6 247 if (!r->action && r->sym->type_name)
e9955c83 248 {
8f3596a6 249 symbol *first_rhs = r->next->sym;
affac613
AD
250 /* If $$ is being set in default way, report if any type mismatch. */
251 if (first_rhs)
252 {
8f3596a6 253 char const *lhs_type = r->sym->type_name;
affac613
AD
254 const char *rhs_type =
255 first_rhs->type_name ? first_rhs->type_name : "";
256 if (!UNIQSTR_EQ (lhs_type, rhs_type))
8f3596a6 257 warn_at (r->location,
affac613
AD
258 _("type clash on default action: <%s> != <%s>"),
259 lhs_type, rhs_type);
260 }
261 /* Warn if there is no default for $$ but we need one. */
262 else
8f3596a6 263 warn_at (r->location,
affac613
AD
264 _("empty rule for typed nonterminal, and no action"));
265 }
e3233bf6 266
d40ba6c2 267 /* Check that symbol values that should be used are in fact used. */
8f3596a6 268 {
668c5d19 269 symbol_list const *l = r;
8f3596a6
AD
270 int n = 0;
271 for (; l && l->sym; l = l->next, ++n)
272 if (! (l->used
d40ba6c2 273 || !symbol_should_be_used (l)
8f3596a6 274 /* The default action, $$ = $1, `uses' both. */
668c5d19
PE
275 || (!r->action && (n == 0 || n == 1))))
276 {
277 if (n)
278 warn_at (r->location, _("unused value: $%d"), n);
279 else
280 warn_at (r->location, _("unset value: $$"));
281 }
8f3596a6 282 }
e9955c83
AD
283}
284
285
8efe435c
AD
286/*-------------------------------------.
287| End the currently being grown rule. |
288`-------------------------------------*/
e9955c83
AD
289
290void
8f3596a6 291grammar_current_rule_end (location loc)
e9955c83
AD
292{
293 /* Put an empty link in the list to mark the end of this rule */
8efe435c 294 grammar_symbol_append (NULL, grammar_end->location);
17ee7397 295 current_rule->location = loc;
8f3596a6 296 grammar_rule_check (current_rule);
e9955c83
AD
297}
298
299
8efe435c
AD
300/*-------------------------------------------------------------------.
301| The previous action turns out the be a mid-rule action. Attach it |
302| to the current rule, i.e., create a dummy symbol, attach it this |
303| mid-rule action, and append this dummy nonterminal to the current |
304| rule. |
305`-------------------------------------------------------------------*/
1485e106 306
6b702268 307void
1485e106
AD
308grammar_midrule_action (void)
309{
310 /* Since the action was written out with this rule's number, we must
311 give the new rule this number by inserting the new rule before
312 it. */
313
8efe435c
AD
314 /* Make a DUMMY nonterminal, whose location is that of the midrule
315 action. Create the MIDRULE. */
17ee7397
PE
316 location dummy_location = current_rule->action_location;
317 symbol *dummy = dummy_symbol_get (dummy_location);
318 symbol_list *midrule = symbol_list_new (dummy, dummy_location);
1485e106
AD
319
320 /* Make a new rule, whose body is empty, before the current one, so
321 that the action just read can belong to it. */
322 ++nrules;
323 ++nritems;
8efe435c
AD
324 /* Attach its location and actions to that of the DUMMY. */
325 midrule->location = dummy_location;
326 midrule->action = current_rule->action;
327 midrule->action_location = dummy_location;
1485e106 328 current_rule->action = NULL;
378f4bd8
AD
329 /* If $$ was used in the action, the LHS of the enclosing rule was
330 incorrectly flagged as used. */
331 midrule->used = current_rule->used;
332 current_rule->used = false;
1485e106 333
8efe435c
AD
334 if (previous_rule_end)
335 previous_rule_end->next = midrule;
1485e106 336 else
8efe435c 337 grammar = midrule;
1485e106 338
8efe435c 339 /* End the dummy's rule. */
84866159
AD
340 midrule->next = symbol_list_new (NULL, dummy_location);
341 grammar_rule_check (midrule);
342 midrule->next->next = current_rule;
1485e106 343
84866159 344 previous_rule_end = midrule->next;
1485e106 345
8efe435c 346 /* Insert the dummy nonterminal replacing the midrule action into
84866159 347 the current rule. Bind it to its dedicated rule. */
8efe435c 348 grammar_current_rule_symbol_append (dummy, dummy_location);
6ec2c0f2 349 grammar_end->midrule = midrule;
1485e106
AD
350}
351
9af3fbce
AD
352/* Set the precedence symbol of the current rule to PRECSYM. */
353
e9955c83 354void
17ee7397 355grammar_current_rule_prec_set (symbol *precsym, location loc)
9af3fbce
AD
356{
357 if (current_rule->ruleprec)
17ee7397 358 complain_at (loc, _("only one %s allowed per rule"), "%prec");
9af3fbce
AD
359 current_rule->ruleprec = precsym;
360}
361
676385e2
PH
362/* Attach dynamic precedence DPREC to the current rule. */
363
364void
17ee7397 365grammar_current_rule_dprec_set (int dprec, location loc)
676385e2
PH
366{
367 if (! glr_parser)
17ee7397 368 warn_at (loc, _("%s affects only GLR parsers"), "%dprec");
676385e2 369 if (dprec <= 0)
17ee7397 370 complain_at (loc, _("%s must be followed by positive number"), "%dprec");
39f41916 371 else if (current_rule->dprec != 0)
17ee7397 372 complain_at (loc, _("only one %s allowed per rule"), "%dprec");
676385e2
PH
373 current_rule->dprec = dprec;
374}
375
376/* Attach a merge function NAME with argument type TYPE to current
377 rule. */
378
379void
17ee7397 380grammar_current_rule_merge_set (uniqstr name, location loc)
676385e2
PH
381{
382 if (! glr_parser)
17ee7397 383 warn_at (loc, _("%s affects only GLR parsers"), "%merge");
39f41916 384 if (current_rule->merger != 0)
17ee7397 385 complain_at (loc, _("only one %s allowed per rule"), "%merge");
39f41916 386 current_rule->merger =
17ee7397 387 get_merge_function (name, current_rule->sym->type_name, loc);
676385e2
PH
388}
389
17ee7397 390/* Attach SYM to the current rule. If needed, move the previous
2e047461
AD
391 action as a mid-rule action. */
392
e9955c83 393void
17ee7397 394grammar_current_rule_symbol_append (symbol *sym, location loc)
2e047461
AD
395{
396 if (current_rule->action)
397 grammar_midrule_action ();
17ee7397 398 grammar_symbol_append (sym, loc);
2e047461
AD
399}
400
6b702268 401/* Attach an ACTION to the current rule. */
2e047461 402
e9955c83 403void
17ee7397 404grammar_current_rule_action_append (const char *action, location loc)
2e047461 405{
e9071366 406 current_rule->action = translate_rule_action (current_rule, action, loc);
17ee7397 407 current_rule->action_location = loc;
2e047461
AD
408}
409
a70083a3 410\f
a70083a3
AD
411/*---------------------------------------------------------------.
412| Convert the rules into the representation using RRHS, RLHS and |
d9b739c3 413| RITEM. |
a70083a3 414`---------------------------------------------------------------*/
1ff442ca 415
4a120d45 416static void
118fb205 417packgram (void)
1ff442ca 418{
9222837b 419 unsigned int itemno = 0;
17ee7397
PE
420 rule_number ruleno = 0;
421 symbol_list *p = grammar;
1ff442ca 422
e9ad4aec
PE
423 ritem = xnmalloc (nritems + 1, sizeof *ritem);
424
425 /* This sentinel is used by build_relations in gram.c. */
426 *ritem++ = 0;
427
da2a7671 428 rules = xnmalloc (nrules, sizeof *rules);
1ff442ca 429
1ff442ca
NF
430 while (p)
431 {
e9071366 432 int rule_length = 0;
17ee7397 433 symbol *ruleprec = p->ruleprec;
d7e1f00c 434 rules[ruleno].user_number = ruleno;
c3b407f4 435 rules[ruleno].number = ruleno;
bba97eb2 436 rules[ruleno].lhs = p->sym;
99013900 437 rules[ruleno].rhs = ritem + itemno;
da2a7671
PE
438 rules[ruleno].prec = NULL;
439 rules[ruleno].dprec = p->dprec;
440 rules[ruleno].merger = p->merger;
441 rules[ruleno].precsym = NULL;
8efe435c 442 rules[ruleno].location = p->location;
b4afb6bb 443 rules[ruleno].useful = true;
1a2b5d37 444 rules[ruleno].action = p->action;
8efe435c 445 rules[ruleno].action_location = p->action_location;
1ff442ca 446
e9071366 447 for (p = p->next; p && p->sym; p = p->next)
1ff442ca 448 {
e9071366
AD
449 ++rule_length;
450
451 /* Don't allow rule_length == INT_MAX, since that might
452 cause confusion with strtol if INT_MAX == LONG_MAX. */
453 if (rule_length == INT_MAX)
454 fatal_at (rules[ruleno].location, _("rule is too long"));
455
17ee7397 456 /* item_number = symbol_number.
5fbb0954 457 But the former needs to contain more: negative rule numbers. */
a49aecd5 458 ritem[itemno++] = symbol_number_as_item_number (p->sym->number);
1ff442ca 459 /* A rule gets by default the precedence and associativity
e9071366 460 of its last token. */
39a06c25 461 if (p->sym->class == token_sym && default_prec)
03b31c0c 462 rules[ruleno].prec = p->sym;
1ff442ca
NF
463 }
464
465 /* If this rule has a %prec,
a70083a3 466 the specified symbol's precedence replaces the default. */
1ff442ca
NF
467 if (ruleprec)
468 {
03b31c0c
AD
469 rules[ruleno].precsym = ruleprec;
470 rules[ruleno].prec = ruleprec;
1ff442ca 471 }
e9071366 472 /* An item ends by the rule number (negated). */
4b3d3a8e 473 ritem[itemno++] = rule_number_as_item_number (ruleno);
e9071366 474 assert (itemno < ITEM_NUMBER_MAX);
f3849179 475 ++ruleno;
e9071366 476 assert (ruleno < RULE_NUMBER_MAX);
1ff442ca 477
a70083a3
AD
478 if (p)
479 p = p->next;
1ff442ca
NF
480 }
481
68cae94e 482 assert (itemno == nritems);
3067fbef 483
273a74fa 484 if (trace_flag & trace_sets)
3067fbef 485 ritem_print (stderr);
1ff442ca 486}
a70083a3 487\f
fdbcd8e2
AD
488/*------------------------------------------------------------------.
489| Read in the grammar specification and record it in the format |
490| described in gram.h. All actions are copied into ACTION_OBSTACK, |
491| in each case forming the body of a C function (YYACTION) which |
492| contains a switch statement to decide which action to execute. |
493`------------------------------------------------------------------*/
a70083a3
AD
494
495void
496reader (void)
497{
a70083a3 498 /* Initialize the symbol table. */
db8837cb 499 symbols_new ();
b6610515 500
88bce5a2
AD
501 /* Construct the accept symbol. */
502 accept = symbol_get ("$accept", empty_location);
503 accept->class = nterm_sym;
504 accept->number = nvars++;
30171f79 505
a70083a3 506 /* Construct the error token */
39f41916 507 errtoken = symbol_get ("error", empty_location);
d7020c20 508 errtoken->class = token_sym;
72a23c97 509 errtoken->number = ntokens++;
b6610515 510
a70083a3
AD
511 /* Construct a token that represents all undefined literal tokens.
512 It is always token number 2. */
88bce5a2 513 undeftoken = symbol_get ("$undefined", empty_location);
d7020c20 514 undeftoken->class = token_sym;
72a23c97 515 undeftoken->number = ntokens++;
a70083a3 516
331dbc1b 517 /* Initialize the obstacks. */
0dd1580a
RA
518 obstack_init (&pre_prologue_obstack);
519 obstack_init (&post_prologue_obstack);
331dbc1b 520
2b81e969 521 gram_in = xfopen (grammar_file, "r");
e9955c83 522
473d0a75
AD
523 gram__flex_debug = trace_flag & trace_scan;
524 gram_debug = trace_flag & trace_parse;
e9071366 525 gram_scanner_initialize ();
78c3da9e 526 gram_parse ();
331dbc1b 527
02d12d0d
PE
528 if (! complaint_issued)
529 check_and_convert_grammar ();
530
531 xfclose (gram_in);
532}
533
b275314e 534
02d12d0d
PE
535/*-------------------------------------------------------------.
536| Check the grammar that has just been read, and convert it to |
537| internal form. |
538`-------------------------------------------------------------*/
539
540static void
541check_and_convert_grammar (void)
542{
543 /* Grammar has been read. Do some checking. */
e9955c83
AD
544 if (nrules == 0)
545 fatal (_("no rules in the input grammar"));
546
547 /* Report any undefined symbols and consider them nonterminals. */
548 symbols_check_defined ();
b7c49edf 549
88bce5a2
AD
550 /* If the user did not define her ENDTOKEN, do it now. */
551 if (!endtoken)
b7c49edf 552 {
88bce5a2
AD
553 endtoken = symbol_get ("$end", empty_location);
554 endtoken->class = token_sym;
555 endtoken->number = 0;
b7c49edf 556 /* Value specified by POSIX. */
88bce5a2 557 endtoken->user_token_number = 0;
b7c49edf
AD
558 }
559
02d12d0d 560 /* Insert the initial rule, whose line is that of the first rule
e9955c83
AD
561 (not that of the start symbol):
562
88bce5a2 563 accept: %start EOF. */
e9955c83 564 {
17ee7397 565 symbol_list *p = symbol_list_new (accept, empty_location);
8efe435c
AD
566 p->location = grammar->location;
567 p->next = symbol_list_new (startsymbol, empty_location);
88bce5a2 568 p->next->next = symbol_list_new (endtoken, empty_location);
8efe435c 569 p->next->next->next = symbol_list_new (NULL, empty_location);
e9955c83
AD
570 p->next->next->next->next = grammar;
571 nrules += 1;
572 nritems += 3;
573 grammar = p;
574 }
575
68cae94e 576 assert (nsyms <= SYMBOL_NUMBER_MAXIMUM && nsyms == ntokens + nvars);
b0c4483e 577
a70083a3
AD
578 /* Assign the symbols their symbol numbers. Write #defines for the
579 token symbols into FDEFINES if requested. */
2f1afb73 580 symbols_pack ();
93ede233 581
a70083a3
AD
582 /* Convert the grammar into the format described in gram.h. */
583 packgram ();
8419d367 584
17ee7397
PE
585 /* The grammar as a symbol_list is no longer needed. */
586 LIST_FREE (symbol_list, grammar);
a70083a3 587}