]> git.saurik.com Git - bison.git/blame - src/reader.c
When reducing initial empty rules, Bison parser read an initial
[bison.git] / src / reader.c
CommitLineData
35dcf428 1/* Input parser for Bison
9c4637fa 2
a737b216 3 Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000, 2001, 2002, 2003
a70083a3 4 Free Software Foundation, Inc.
1ff442ca 5
41aca2e0 6 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 7
41aca2e0
AD
8 Bison is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
1ff442ca 12
41aca2e0
AD
13 Bison is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
1ff442ca 17
41aca2e0
AD
18 You should have received a copy of the GNU General Public License
19 along with Bison; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
1ff442ca 22
1ff442ca 23#include "system.h"
17ee7397
PE
24
25#include <quotearg.h>
26
27#include "complain.h"
28#include "conflicts.h"
1ff442ca 29#include "files.h"
17ee7397 30#include "getargs.h"
1ff442ca 31#include "gram.h"
17ee7397 32#include "muscle_tab.h"
6c89f1c1 33#include "output.h"
b2ca4022 34#include "reader.h"
17ee7397
PE
35#include "symlist.h"
36#include "symtab.h"
1ff442ca 37
17ee7397 38static symbol_list *grammar = NULL;
d0829076 39static bool start_flag = false;
676385e2 40merger_list *merge_functions;
1ff442ca 41
d0829076
PE
42/* Has %union been seen? */
43bool typed = false;
0d533154 44\f
e9955c83
AD
45/*-----------------------.
46| Set the start symbol. |
47`-----------------------*/
1ff442ca 48
e9955c83 49void
a737b216 50grammar_start_symbol_set (symbol *sym, location loc)
1ff442ca
NF
51{
52 if (start_flag)
17ee7397 53 complain_at (loc, _("multiple %s declarations"), "%start");
943819bf
RS
54 else
55 {
d0829076 56 start_flag = true;
a737b216 57 startsymbol = sym;
17ee7397 58 startsymbol_location = loc;
943819bf 59 }
1ff442ca
NF
60}
61
1ff442ca 62
d7020c20 63/*----------------------------------------------------------------.
e9955c83
AD
64| There are two prologues: one before %union, one after. Augment |
65| the current one. |
d7020c20 66`----------------------------------------------------------------*/
1ff442ca 67
e9955c83 68void
17ee7397 69prologue_augment (const char *prologue, location loc)
b6610515 70{
e9955c83
AD
71 struct obstack *oout =
72 !typed ? &pre_prologue_obstack : &post_prologue_obstack;
b6610515 73
17ee7397
PE
74 obstack_fgrow1 (oout, "]b4_syncline([[%d]], [[", loc.start.line);
75 MUSCLE_OBSTACK_SGROW (oout,
76 quotearg_style (c_quoting_style, loc.start.file));
6c239755 77 obstack_sgrow (oout, "]])[\n");
e9955c83 78 obstack_sgrow (oout, prologue);
b6610515
RA
79}
80
a70083a3
AD
81\f
82
3e6656f9 83/*-------------------------------------------------------------------.
676385e2
PH
84| Return the merger index for a merging function named NAME, whose |
85| arguments have type TYPE. Records the function, if new, in |
95612cfa 86| MERGER_LIST. |
676385e2
PH
87`-------------------------------------------------------------------*/
88
89static int
17ee7397 90get_merge_function (uniqstr name, uniqstr type, location loc)
676385e2
PH
91{
92 merger_list *syms;
93 merger_list head;
94 int n;
95
96 if (! glr_parser)
97 return 0;
98
99 if (type == NULL)
17ee7397 100 type = uniqstr_new ("");
676385e2
PH
101
102 head.next = merge_functions;
39f41916 103 for (syms = &head, n = 1; syms->next != NULL; syms = syms->next, n += 1)
17ee7397 104 if (UNIQSTR_EQ (name, syms->next->name))
676385e2 105 break;
a5d50994
AD
106 if (syms->next == NULL)
107 {
9c4637fa 108 MALLOC (syms->next, 1);
17ee7397
PE
109 syms->next->name = uniqstr_new (name);
110 syms->next->type = uniqstr_new (type);
a5d50994
AD
111 syms->next->next = NULL;
112 merge_functions = head.next;
113 }
17ee7397 114 else if (!UNIQSTR_EQ (type, syms->next->type))
45a8a65d
PE
115 warn_at (loc, _("result type clash on merge function %s: <%s> != <%s>"),
116 name, type, syms->next->type);
676385e2
PH
117 return n;
118}
119
120/*--------------------------------------.
121| Free all merge-function definitions. |
122`--------------------------------------*/
123
124void
125free_merger_functions (void)
126{
127 merger_list *L0;
128 if (! glr_parser)
129 return;
130 L0 = merge_functions;
131 while (L0 != NULL)
132 {
133 merger_list *L1 = L0->next;
134 free (L0);
135 L0 = L1;
136 }
137}
138
a70083a3 139\f
107f7dfb 140/*-------------------------------------------------------------------.
17ee7397 141| Parse the input grammar into a one symbol_list structure. Each |
107f7dfb
AD
142| rule is represented by a sequence of symbols: the left hand side |
143| followed by the contents of the right hand side, followed by a |
144| null pointer instead of a symbol to terminate the rule. The next |
145| symbol is the lhs of the following rule. |
146| |
fdbcd8e2
AD
147| All actions are copied out, labelled by the rule number they apply |
148| to. |
107f7dfb
AD
149| |
150| Bison used to allow some %directives in the rules sections, but |
151| this is no longer consider appropriate: (i) the documented grammar |
152| doesn't claim it, (ii), it would promote bad style, (iii), error |
153| recovery for %directives consists in skipping the junk until a `%' |
154| is seen and helrp synchronizing. This scheme is definitely wrong |
155| in the rules section. |
156`-------------------------------------------------------------------*/
1ff442ca 157
f6d0f937 158/* The (currently) last symbol of GRAMMAR. */
17ee7397 159symbol_list *grammar_end = NULL;
f6d0f937 160
52328c6e 161/* Append SYM to the grammar. */
e9955c83 162void
17ee7397 163grammar_symbol_append (symbol *sym, location loc)
f6d0f937 164{
17ee7397 165 symbol_list *p = symbol_list_new (sym, loc);
f6d0f937
AD
166
167 if (grammar_end)
168 grammar_end->next = p;
169 else
170 grammar = p;
171
172 grammar_end = p;
173}
174
8efe435c
AD
175/* The rule currently being defined, and the previous rule.
176 CURRENT_RULE points to the first LHS of the current rule, while
177 PREVIOUS_RULE_END points to the *end* of the previous rule (NULL). */
17ee7397
PE
178symbol_list *current_rule = NULL;
179symbol_list *previous_rule_end = NULL;
da4160c3
AD
180
181
8efe435c
AD
182/*----------------------------------------------.
183| Create a new rule for LHS in to the GRAMMAR. |
184`----------------------------------------------*/
da4160c3 185
e9955c83 186void
17ee7397 187grammar_rule_begin (symbol *lhs, location loc)
da4160c3
AD
188{
189 if (!start_flag)
190 {
191 startsymbol = lhs;
17ee7397 192 startsymbol_location = loc;
d0829076 193 start_flag = true;
da4160c3
AD
194 }
195
196 /* Start a new rule and record its lhs. */
197 ++nrules;
198 ++nritems;
199
8efe435c 200 previous_rule_end = grammar_end;
17ee7397 201 grammar_symbol_append (lhs, loc);
da4160c3
AD
202 current_rule = grammar_end;
203
204 /* Mark the rule's lhs as a nonterminal if not already so. */
205
206 if (lhs->class == unknown_sym)
207 {
208 lhs->class = nterm_sym;
209 lhs->number = nvars;
210 ++nvars;
211 }
212 else if (lhs->class == token_sym)
17ee7397 213 complain_at (loc, _("rule given for %s, which is a token"), lhs->tag);
da4160c3
AD
214}
215
e9955c83
AD
216/* Check that the last rule (CURRENT_RULE) is properly defined. For
217 instance, there should be no type clash on the default action. */
218
219static void
220grammar_current_rule_check (void)
221{
17ee7397 222 symbol *lhs = current_rule->sym;
3f4c0f80 223 char const *lhs_type = lhs->type_name;
17ee7397 224 symbol *first_rhs = current_rule->next->sym;
e9955c83
AD
225
226 /* If there is an action, then there is nothing we can do: the user
3f4c0f80 227 is allowed to shoot herself in the foot. */
e9955c83
AD
228 if (current_rule->action)
229 return;
230
3f4c0f80
PE
231 /* Don't worry about the default action if $$ is untyped, since $$'s
232 value can't be used. */
233 if (! lhs_type)
234 return;
235
236 /* If $$ is being set in default way, report if any type mismatch. */
e9955c83
AD
237 if (first_rhs)
238 {
e9955c83 239 const char *rhs_type = first_rhs->type_name ? first_rhs->type_name : "";
17ee7397 240 if (!UNIQSTR_EQ (lhs_type, rhs_type))
e9273511
PE
241 warn_at (current_rule->location,
242 _("type clash on default action: <%s> != <%s>"),
243 lhs_type, rhs_type);
e9955c83
AD
244 }
245 /* Warn if there is no default for $$ but we need one. */
246 else
e9273511
PE
247 warn_at (current_rule->location,
248 _("empty rule for typed nonterminal, and no action"));
e9955c83
AD
249}
250
251
8efe435c
AD
252/*-------------------------------------.
253| End the currently being grown rule. |
254`-------------------------------------*/
e9955c83
AD
255
256void
17ee7397 257grammar_rule_end (location loc)
e9955c83
AD
258{
259 /* Put an empty link in the list to mark the end of this rule */
8efe435c 260 grammar_symbol_append (NULL, grammar_end->location);
17ee7397 261 current_rule->location = loc;
e9955c83
AD
262 grammar_current_rule_check ();
263}
264
265
8efe435c
AD
266/*-------------------------------------------------------------------.
267| The previous action turns out the be a mid-rule action. Attach it |
268| to the current rule, i.e., create a dummy symbol, attach it this |
269| mid-rule action, and append this dummy nonterminal to the current |
270| rule. |
271`-------------------------------------------------------------------*/
1485e106 272
e9955c83 273void
1485e106
AD
274grammar_midrule_action (void)
275{
276 /* Since the action was written out with this rule's number, we must
277 give the new rule this number by inserting the new rule before
278 it. */
279
8efe435c
AD
280 /* Make a DUMMY nonterminal, whose location is that of the midrule
281 action. Create the MIDRULE. */
17ee7397
PE
282 location dummy_location = current_rule->action_location;
283 symbol *dummy = dummy_symbol_get (dummy_location);
284 symbol_list *midrule = symbol_list_new (dummy, dummy_location);
1485e106
AD
285
286 /* Make a new rule, whose body is empty, before the current one, so
287 that the action just read can belong to it. */
288 ++nrules;
289 ++nritems;
8efe435c
AD
290 /* Attach its location and actions to that of the DUMMY. */
291 midrule->location = dummy_location;
292 midrule->action = current_rule->action;
293 midrule->action_location = dummy_location;
1485e106
AD
294 current_rule->action = NULL;
295
8efe435c
AD
296 if (previous_rule_end)
297 previous_rule_end->next = midrule;
1485e106 298 else
8efe435c 299 grammar = midrule;
1485e106 300
8efe435c
AD
301 /* End the dummy's rule. */
302 previous_rule_end = symbol_list_new (NULL, dummy_location);
303 previous_rule_end->next = current_rule;
1485e106 304
8efe435c 305 midrule->next = previous_rule_end;
1485e106 306
8efe435c
AD
307 /* Insert the dummy nonterminal replacing the midrule action into
308 the current rule. */
309 grammar_current_rule_symbol_append (dummy, dummy_location);
1485e106
AD
310}
311
9af3fbce
AD
312/* Set the precedence symbol of the current rule to PRECSYM. */
313
e9955c83 314void
17ee7397 315grammar_current_rule_prec_set (symbol *precsym, location loc)
9af3fbce
AD
316{
317 if (current_rule->ruleprec)
17ee7397 318 complain_at (loc, _("only one %s allowed per rule"), "%prec");
9af3fbce
AD
319 current_rule->ruleprec = precsym;
320}
321
676385e2
PH
322/* Attach dynamic precedence DPREC to the current rule. */
323
324void
17ee7397 325grammar_current_rule_dprec_set (int dprec, location loc)
676385e2
PH
326{
327 if (! glr_parser)
17ee7397 328 warn_at (loc, _("%s affects only GLR parsers"), "%dprec");
676385e2 329 if (dprec <= 0)
17ee7397 330 complain_at (loc, _("%s must be followed by positive number"), "%dprec");
39f41916 331 else if (current_rule->dprec != 0)
17ee7397 332 complain_at (loc, _("only one %s allowed per rule"), "%dprec");
676385e2
PH
333 current_rule->dprec = dprec;
334}
335
336/* Attach a merge function NAME with argument type TYPE to current
337 rule. */
338
339void
17ee7397 340grammar_current_rule_merge_set (uniqstr name, location loc)
676385e2
PH
341{
342 if (! glr_parser)
17ee7397 343 warn_at (loc, _("%s affects only GLR parsers"), "%merge");
39f41916 344 if (current_rule->merger != 0)
17ee7397 345 complain_at (loc, _("only one %s allowed per rule"), "%merge");
39f41916 346 current_rule->merger =
17ee7397 347 get_merge_function (name, current_rule->sym->type_name, loc);
676385e2
PH
348}
349
17ee7397 350/* Attach SYM to the current rule. If needed, move the previous
2e047461
AD
351 action as a mid-rule action. */
352
e9955c83 353void
17ee7397 354grammar_current_rule_symbol_append (symbol *sym, location loc)
2e047461
AD
355{
356 if (current_rule->action)
357 grammar_midrule_action ();
358 ++nritems;
17ee7397 359 grammar_symbol_append (sym, loc);
2e047461
AD
360}
361
2e047461
AD
362/* Attach an ACTION to the current rule. If needed, move the previous
363 action as a mid-rule action. */
364
e9955c83 365void
17ee7397 366grammar_current_rule_action_append (const char *action, location loc)
2e047461
AD
367{
368 if (current_rule->action)
369 grammar_midrule_action ();
370 current_rule->action = action;
17ee7397 371 current_rule->action_location = loc;
2e047461
AD
372}
373
a70083a3 374\f
a70083a3
AD
375/*---------------------------------------------------------------.
376| Convert the rules into the representation using RRHS, RLHS and |
d9b739c3 377| RITEM. |
a70083a3 378`---------------------------------------------------------------*/
1ff442ca 379
4a120d45 380static void
118fb205 381packgram (void)
1ff442ca 382{
9222837b 383 unsigned int itemno = 0;
17ee7397
PE
384 rule_number ruleno = 0;
385 symbol_list *p = grammar;
1ff442ca 386
9c4637fa
PE
387 CALLOC (ritem, nritems);
388 CALLOC (rules, nrules);
1ff442ca 389
1ff442ca
NF
390 while (p)
391 {
17ee7397 392 symbol *ruleprec = p->ruleprec;
d7e1f00c 393 rules[ruleno].user_number = ruleno;
c3b407f4 394 rules[ruleno].number = ruleno;
bba97eb2 395 rules[ruleno].lhs = p->sym;
99013900 396 rules[ruleno].rhs = ritem + itemno;
8efe435c 397 rules[ruleno].location = p->location;
b4afb6bb 398 rules[ruleno].useful = true;
1a2b5d37 399 rules[ruleno].action = p->action;
8efe435c 400 rules[ruleno].action_location = p->action_location;
676385e2
PH
401 rules[ruleno].dprec = p->dprec;
402 rules[ruleno].merger = p->merger;
1ff442ca
NF
403
404 p = p->next;
405 while (p && p->sym)
406 {
17ee7397 407 /* item_number = symbol_number.
5fbb0954 408 But the former needs to contain more: negative rule numbers. */
a49aecd5 409 ritem[itemno++] = symbol_number_as_item_number (p->sym->number);
1ff442ca
NF
410 /* A rule gets by default the precedence and associativity
411 of the last token in it. */
d7020c20 412 if (p->sym->class == token_sym)
03b31c0c 413 rules[ruleno].prec = p->sym;
a70083a3
AD
414 if (p)
415 p = p->next;
1ff442ca
NF
416 }
417
418 /* If this rule has a %prec,
a70083a3 419 the specified symbol's precedence replaces the default. */
1ff442ca
NF
420 if (ruleprec)
421 {
03b31c0c
AD
422 rules[ruleno].precsym = ruleprec;
423 rules[ruleno].prec = ruleprec;
1ff442ca 424 }
4b3d3a8e 425 ritem[itemno++] = rule_number_as_item_number (ruleno);
f3849179 426 ++ruleno;
1ff442ca 427
a70083a3
AD
428 if (p)
429 p = p->next;
1ff442ca
NF
430 }
431
35dcf428
PE
432 if (itemno != nritems)
433 abort ();
3067fbef 434
273a74fa 435 if (trace_flag & trace_sets)
3067fbef 436 ritem_print (stderr);
1ff442ca 437}
a70083a3 438\f
fdbcd8e2
AD
439/*------------------------------------------------------------------.
440| Read in the grammar specification and record it in the format |
441| described in gram.h. All actions are copied into ACTION_OBSTACK, |
442| in each case forming the body of a C function (YYACTION) which |
443| contains a switch statement to decide which action to execute. |
444`------------------------------------------------------------------*/
a70083a3
AD
445
446void
447reader (void)
448{
a70083a3 449 /* Initialize the symbol table. */
db8837cb 450 symbols_new ();
b6610515 451
88bce5a2
AD
452 /* Construct the accept symbol. */
453 accept = symbol_get ("$accept", empty_location);
454 accept->class = nterm_sym;
455 accept->number = nvars++;
30171f79 456
a70083a3 457 /* Construct the error token */
39f41916 458 errtoken = symbol_get ("error", empty_location);
d7020c20 459 errtoken->class = token_sym;
72a23c97 460 errtoken->number = ntokens++;
b6610515 461
a70083a3
AD
462 /* Construct a token that represents all undefined literal tokens.
463 It is always token number 2. */
88bce5a2 464 undeftoken = symbol_get ("$undefined", empty_location);
d7020c20 465 undeftoken->class = token_sym;
72a23c97 466 undeftoken->number = ntokens++;
a70083a3 467
331dbc1b 468 /* Initialize the obstacks. */
0dd1580a
RA
469 obstack_init (&pre_prologue_obstack);
470 obstack_init (&post_prologue_obstack);
331dbc1b 471
95612cfa 472 finput = xfopen (grammar_file, "r");
e9955c83
AD
473 gram_in = finput;
474
473d0a75
AD
475 gram__flex_debug = trace_flag & trace_scan;
476 gram_debug = trace_flag & trace_parse;
1d6412ad 477 scanner_initialize ();
78c3da9e 478 gram_parse ();
331dbc1b 479
b275314e
AD
480 /* If something went wrong during the parsing, don't try to
481 continue. */
b4afb6bb 482 if (complaint_issued)
f956c304 483 return;
b275314e 484
e9955c83
AD
485 /* Grammar has been read. Do some checking */
486 if (nrules == 0)
487 fatal (_("no rules in the input grammar"));
488
489 /* Report any undefined symbols and consider them nonterminals. */
490 symbols_check_defined ();
b7c49edf 491
88bce5a2
AD
492 /* If the user did not define her ENDTOKEN, do it now. */
493 if (!endtoken)
b7c49edf 494 {
88bce5a2
AD
495 endtoken = symbol_get ("$end", empty_location);
496 endtoken->class = token_sym;
497 endtoken->number = 0;
b7c49edf 498 /* Value specified by POSIX. */
88bce5a2 499 endtoken->user_token_number = 0;
b7c49edf
AD
500 }
501
e9955c83
AD
502 /* Insert the initial rule, which line is that of the first rule
503 (not that of the start symbol):
504
88bce5a2 505 accept: %start EOF. */
e9955c83 506 {
17ee7397 507 symbol_list *p = symbol_list_new (accept, empty_location);
8efe435c
AD
508 p->location = grammar->location;
509 p->next = symbol_list_new (startsymbol, empty_location);
88bce5a2 510 p->next->next = symbol_list_new (endtoken, empty_location);
8efe435c 511 p->next->next->next = symbol_list_new (NULL, empty_location);
e9955c83
AD
512 p->next->next->next->next = grammar;
513 nrules += 1;
514 nritems += 3;
515 grammar = p;
516 }
517
17ee7397 518 if (! (nsyms <= SYMBOL_NUMBER_MAXIMUM && nsyms == ntokens + nvars))
35dcf428 519 abort ();
b0c4483e 520
331dbc1b
AD
521 xfclose (finput);
522
a70083a3
AD
523 /* Assign the symbols their symbol numbers. Write #defines for the
524 token symbols into FDEFINES if requested. */
2f1afb73 525 symbols_pack ();
93ede233 526
a70083a3
AD
527 /* Convert the grammar into the format described in gram.h. */
528 packgram ();
8419d367 529
17ee7397
PE
530 /* The grammar as a symbol_list is no longer needed. */
531 LIST_FREE (symbol_list, grammar);
a70083a3 532}