]> git.saurik.com Git - bison.git/blame - src/reader.c
* src/output.c (output_skeleton): Don't disable M4sugar comments
[bison.git] / src / reader.c
CommitLineData
1ff442ca 1/* Input parser for bison
76514394 2 Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000, 2001, 2002
a70083a3 3 Free Software Foundation, Inc.
1ff442ca 4
41aca2e0 5 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 6
41aca2e0
AD
7 Bison is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
1ff442ca 11
41aca2e0
AD
12 Bison is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
1ff442ca 16
41aca2e0
AD
17 You should have received a copy of the GNU General Public License
18 along with Bison; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
1ff442ca
NF
21
22
1ff442ca 23#include "system.h"
2a91a95e
AD
24#include "quotearg.h"
25#include "quote.h"
ceed8467 26#include "getargs.h"
1ff442ca 27#include "files.h"
1ff442ca 28#include "symtab.h"
56c47203 29#include "symlist.h"
1ff442ca 30#include "gram.h"
a0f6b076 31#include "complain.h"
6c89f1c1 32#include "output.h"
b2ca4022 33#include "reader.h"
340ef489 34#include "conflicts.h"
11d82f03 35#include "muscle_tab.h"
1ff442ca 36
56c47203 37static symbol_list_t *grammar = NULL;
280a38c3 38static int start_flag = 0;
676385e2 39merger_list *merge_functions;
1ff442ca 40
d7020c20 41/* Nonzero if %union has been seen. */
e9955c83 42int typed = 0;
0d533154 43\f
e9955c83
AD
44/*-----------------------.
45| Set the start symbol. |
46`-----------------------*/
1ff442ca 47
e9955c83 48void
8efe435c 49grammar_start_symbol_set (symbol_t *s, location_t l)
1ff442ca
NF
50{
51 if (start_flag)
e776192e 52 complain_at (l, _("multiple %s declarations"), "%start");
943819bf
RS
53 else
54 {
55 start_flag = 1;
e9955c83 56 startsymbol = s;
8efe435c 57 startsymbol_location = l;
943819bf 58 }
1ff442ca
NF
59}
60
1ff442ca 61
d7020c20 62/*----------------------------------------------------------------.
e9955c83
AD
63| There are two prologues: one before %union, one after. Augment |
64| the current one. |
d7020c20 65`----------------------------------------------------------------*/
1ff442ca 66
e9955c83 67void
0c15323d 68prologue_augment (const char *prologue, location_t location)
b6610515 69{
e9955c83
AD
70 struct obstack *oout =
71 !typed ? &pre_prologue_obstack : &post_prologue_obstack;
b6610515 72
e9955c83 73 if (!no_lines_flag)
b6610515 74 {
e9955c83 75 obstack_fgrow2 (oout, muscle_find ("linef"),
0c15323d
AD
76 location.first_line,
77 quotearg_style (c_quoting_style,
78 muscle_find ("filename")));
b6610515 79 }
e9955c83 80 obstack_sgrow (oout, prologue);
b6610515
RA
81}
82
2ba3b73c 83
426cf563 84
a870c567 85
e9955c83
AD
86/*----------------------.
87| Handle the epilogue. |
88`----------------------*/
426cf563 89
e9955c83 90void
0c15323d 91epilogue_set (const char *epilogue, location_t location)
2ba3b73c 92{
e9955c83 93 if (!no_lines_flag)
1ff442ca 94 {
592e8d4d 95 obstack_fgrow2 (&muscle_obstack, muscle_find ("linef"),
0c15323d
AD
96 location.first_line,
97 quotearg_style (c_quoting_style,
98 muscle_find ("filename")));
1ff442ca 99 }
592e8d4d
AD
100 obstack_sgrow (&muscle_obstack, epilogue);
101 obstack_1grow (&muscle_obstack, 0);
102 muscle_insert ("epilogue", obstack_finish (&muscle_obstack));
1ff442ca 103}
1ff442ca 104
a70083a3 105
a70083a3
AD
106\f
107
676385e2
PH
108 /*-------------------------------------------------------------------.
109| Return the merger index for a merging function named NAME, whose |
110| arguments have type TYPE. Records the function, if new, in |
111| merger_list. |
112`-------------------------------------------------------------------*/
113
114static int
a5d50994
AD
115get_merge_function (const char* name, const char* type,
116 location_t loc)
676385e2
PH
117{
118 merger_list *syms;
119 merger_list head;
120 int n;
121
122 if (! glr_parser)
123 return 0;
124
125 if (type == NULL)
126 type = "";
127
128 head.next = merge_functions;
39f41916 129 for (syms = &head, n = 1; syms->next != NULL; syms = syms->next, n += 1)
676385e2
PH
130 if (strcmp (name, syms->next->name) == 0)
131 break;
a5d50994
AD
132 if (syms->next == NULL)
133 {
134 syms->next = XMALLOC (merger_list, 1);
b906441c
AD
135 syms->next->name = xstrdup (name);
136 syms->next->type = xstrdup (type);
a5d50994
AD
137 syms->next->next = NULL;
138 merge_functions = head.next;
139 }
140 else if (strcmp (type, syms->next->type) != 0)
141 warn_at (loc, _("result type clash on merge function %s: `%s' vs. `%s'"),
142 name, type, syms->next->type);
676385e2
PH
143 return n;
144}
145
146/*--------------------------------------.
147| Free all merge-function definitions. |
148`--------------------------------------*/
149
150void
151free_merger_functions (void)
152{
153 merger_list *L0;
154 if (! glr_parser)
155 return;
156 L0 = merge_functions;
157 while (L0 != NULL)
158 {
159 merger_list *L1 = L0->next;
160 free (L0);
161 L0 = L1;
162 }
163}
164
a70083a3 165\f
107f7dfb 166/*-------------------------------------------------------------------.
32e1e0a4 167| Parse the input grammar into a one symbol_list_t structure. Each |
107f7dfb
AD
168| rule is represented by a sequence of symbols: the left hand side |
169| followed by the contents of the right hand side, followed by a |
170| null pointer instead of a symbol to terminate the rule. The next |
171| symbol is the lhs of the following rule. |
172| |
fdbcd8e2
AD
173| All actions are copied out, labelled by the rule number they apply |
174| to. |
107f7dfb
AD
175| |
176| Bison used to allow some %directives in the rules sections, but |
177| this is no longer consider appropriate: (i) the documented grammar |
178| doesn't claim it, (ii), it would promote bad style, (iii), error |
179| recovery for %directives consists in skipping the junk until a `%' |
180| is seen and helrp synchronizing. This scheme is definitely wrong |
181| in the rules section. |
182`-------------------------------------------------------------------*/
1ff442ca 183
f6d0f937 184/* The (currently) last symbol of GRAMMAR. */
56c47203 185symbol_list_t *grammar_end = NULL;
f6d0f937
AD
186
187/* Append S to the GRAMMAR. */
e9955c83 188void
8efe435c 189grammar_symbol_append (symbol_t *symbol, location_t location)
f6d0f937 190{
56c47203 191 symbol_list_t *p = symbol_list_new (symbol, location);
f6d0f937
AD
192
193 if (grammar_end)
194 grammar_end->next = p;
195 else
196 grammar = p;
197
198 grammar_end = p;
199}
200
8efe435c
AD
201/* The rule currently being defined, and the previous rule.
202 CURRENT_RULE points to the first LHS of the current rule, while
203 PREVIOUS_RULE_END points to the *end* of the previous rule (NULL). */
56c47203
AD
204symbol_list_t *current_rule = NULL;
205symbol_list_t *previous_rule_end = NULL;
da4160c3
AD
206
207
8efe435c
AD
208/*----------------------------------------------.
209| Create a new rule for LHS in to the GRAMMAR. |
210`----------------------------------------------*/
da4160c3 211
e9955c83 212void
8efe435c 213grammar_rule_begin (symbol_t *lhs, location_t location)
da4160c3
AD
214{
215 if (!start_flag)
216 {
217 startsymbol = lhs;
8efe435c 218 startsymbol_location = location;
da4160c3
AD
219 start_flag = 1;
220 }
221
222 /* Start a new rule and record its lhs. */
223 ++nrules;
224 ++nritems;
225
8efe435c
AD
226 previous_rule_end = grammar_end;
227 grammar_symbol_append (lhs, location);
da4160c3
AD
228 current_rule = grammar_end;
229
230 /* Mark the rule's lhs as a nonterminal if not already so. */
231
232 if (lhs->class == unknown_sym)
233 {
234 lhs->class = nterm_sym;
235 lhs->number = nvars;
236 ++nvars;
237 }
238 else if (lhs->class == token_sym)
e776192e 239 complain_at (location, _("rule given for %s, which is a token"), lhs->tag);
da4160c3
AD
240}
241
e9955c83
AD
242/* Check that the last rule (CURRENT_RULE) is properly defined. For
243 instance, there should be no type clash on the default action. */
244
245static void
246grammar_current_rule_check (void)
247{
248 symbol_t *lhs = current_rule->sym;
3f4c0f80 249 char const *lhs_type = lhs->type_name;
e9955c83
AD
250 symbol_t *first_rhs = current_rule->next->sym;
251
252 /* If there is an action, then there is nothing we can do: the user
3f4c0f80 253 is allowed to shoot herself in the foot. */
e9955c83
AD
254 if (current_rule->action)
255 return;
256
3f4c0f80
PE
257 /* Don't worry about the default action if $$ is untyped, since $$'s
258 value can't be used. */
259 if (! lhs_type)
260 return;
261
262 /* If $$ is being set in default way, report if any type mismatch. */
e9955c83
AD
263 if (first_rhs)
264 {
e9955c83
AD
265 const char *rhs_type = first_rhs->type_name ? first_rhs->type_name : "";
266 if (strcmp (lhs_type, rhs_type))
e776192e
AD
267 complain_at (current_rule->location,
268 _("type clash (`%s' `%s') on default action"),
269 lhs_type, rhs_type);
e9955c83
AD
270 }
271 /* Warn if there is no default for $$ but we need one. */
272 else
3f4c0f80
PE
273 complain_at (current_rule->location,
274 _("empty rule for typed nonterminal, and no action"));
e9955c83
AD
275}
276
277
8efe435c
AD
278/*-------------------------------------.
279| End the currently being grown rule. |
280`-------------------------------------*/
e9955c83
AD
281
282void
8efe435c 283grammar_rule_end (location_t location)
e9955c83
AD
284{
285 /* Put an empty link in the list to mark the end of this rule */
8efe435c
AD
286 grammar_symbol_append (NULL, grammar_end->location);
287 current_rule->location = location;
e9955c83
AD
288 grammar_current_rule_check ();
289}
290
291
8efe435c
AD
292/*-------------------------------------------------------------------.
293| The previous action turns out the be a mid-rule action. Attach it |
294| to the current rule, i.e., create a dummy symbol, attach it this |
295| mid-rule action, and append this dummy nonterminal to the current |
296| rule. |
297`-------------------------------------------------------------------*/
1485e106 298
e9955c83 299void
1485e106
AD
300grammar_midrule_action (void)
301{
302 /* Since the action was written out with this rule's number, we must
303 give the new rule this number by inserting the new rule before
304 it. */
305
8efe435c
AD
306 /* Make a DUMMY nonterminal, whose location is that of the midrule
307 action. Create the MIDRULE. */
8efe435c 308 location_t dummy_location = current_rule->action_location;
39f41916 309 symbol_t *dummy = dummy_symbol_get (dummy_location);
56c47203 310 symbol_list_t *midrule = symbol_list_new (dummy, dummy_location);
1485e106
AD
311
312 /* Make a new rule, whose body is empty, before the current one, so
313 that the action just read can belong to it. */
314 ++nrules;
315 ++nritems;
8efe435c
AD
316 /* Attach its location and actions to that of the DUMMY. */
317 midrule->location = dummy_location;
318 midrule->action = current_rule->action;
319 midrule->action_location = dummy_location;
1485e106
AD
320 current_rule->action = NULL;
321
8efe435c
AD
322 if (previous_rule_end)
323 previous_rule_end->next = midrule;
1485e106 324 else
8efe435c 325 grammar = midrule;
1485e106 326
8efe435c
AD
327 /* End the dummy's rule. */
328 previous_rule_end = symbol_list_new (NULL, dummy_location);
329 previous_rule_end->next = current_rule;
1485e106 330
8efe435c 331 midrule->next = previous_rule_end;
1485e106 332
8efe435c
AD
333 /* Insert the dummy nonterminal replacing the midrule action into
334 the current rule. */
335 grammar_current_rule_symbol_append (dummy, dummy_location);
1485e106
AD
336}
337
9af3fbce
AD
338/* Set the precedence symbol of the current rule to PRECSYM. */
339
e9955c83 340void
e776192e 341grammar_current_rule_prec_set (symbol_t *precsym, location_t location)
9af3fbce
AD
342{
343 if (current_rule->ruleprec)
473d0a75 344 complain_at (location, _("only one %s allowed per rule"), "%prec");
9af3fbce
AD
345 current_rule->ruleprec = precsym;
346}
347
676385e2
PH
348/* Attach dynamic precedence DPREC to the current rule. */
349
350void
351grammar_current_rule_dprec_set (int dprec, location_t location)
352{
353 if (! glr_parser)
473d0a75 354 warn_at (location, _("%s affects only GLR parsers"), "%dprec");
676385e2 355 if (dprec <= 0)
473d0a75
AD
356 complain_at (location,
357 _("%s must be followed by positive number"), "%dprec");
39f41916 358 else if (current_rule->dprec != 0)
473d0a75 359 complain_at (location, _("only one %s allowed per rule"), "%dprec");
676385e2
PH
360 current_rule->dprec = dprec;
361}
362
363/* Attach a merge function NAME with argument type TYPE to current
364 rule. */
365
366void
367grammar_current_rule_merge_set (const char* name, location_t location)
368{
369 if (! glr_parser)
473d0a75 370 warn_at (location, _("%s affects only GLR parsers"), "%merge");
39f41916 371 if (current_rule->merger != 0)
473d0a75 372 complain_at (location, _("only one %s allowed per rule"), "%merge");
39f41916 373 current_rule->merger =
a5d50994 374 get_merge_function (name, current_rule->sym->type_name, location);
676385e2
PH
375}
376
2e047461
AD
377/* Attach a SYMBOL to the current rule. If needed, move the previous
378 action as a mid-rule action. */
379
e9955c83 380void
8efe435c 381grammar_current_rule_symbol_append (symbol_t *symbol, location_t location)
2e047461
AD
382{
383 if (current_rule->action)
384 grammar_midrule_action ();
385 ++nritems;
8efe435c 386 grammar_symbol_append (symbol, location);
2e047461
AD
387}
388
2e047461
AD
389/* Attach an ACTION to the current rule. If needed, move the previous
390 action as a mid-rule action. */
391
e9955c83 392void
8efe435c 393grammar_current_rule_action_append (const char *action, location_t location)
2e047461
AD
394{
395 if (current_rule->action)
396 grammar_midrule_action ();
397 current_rule->action = action;
8efe435c 398 current_rule->action_location = location;
2e047461
AD
399}
400
a70083a3 401\f
a70083a3
AD
402/*---------------------------------------------------------------.
403| Convert the rules into the representation using RRHS, RLHS and |
d9b739c3 404| RITEM. |
a70083a3 405`---------------------------------------------------------------*/
1ff442ca 406
4a120d45 407static void
118fb205 408packgram (void)
1ff442ca 409{
9222837b 410 unsigned int itemno = 0;
4b3d3a8e 411 rule_number_t ruleno = 0;
9222837b 412 symbol_list_t *p = grammar;
1ff442ca 413
a900a624 414 ritem = XCALLOC (item_number_t, nritems);
4b3d3a8e 415 rules = XCALLOC (rule_t, nrules);
1ff442ca 416
1ff442ca
NF
417 while (p)
418 {
db8837cb 419 symbol_t *ruleprec = p->ruleprec;
d7e1f00c 420 rules[ruleno].user_number = ruleno;
c3b407f4 421 rules[ruleno].number = ruleno;
bba97eb2 422 rules[ruleno].lhs = p->sym;
99013900 423 rules[ruleno].rhs = ritem + itemno;
8efe435c 424 rules[ruleno].location = p->location;
b4afb6bb 425 rules[ruleno].useful = true;
1a2b5d37 426 rules[ruleno].action = p->action;
8efe435c 427 rules[ruleno].action_location = p->action_location;
676385e2
PH
428 rules[ruleno].dprec = p->dprec;
429 rules[ruleno].merger = p->merger;
1ff442ca
NF
430
431 p = p->next;
432 while (p && p->sym)
433 {
a49aecd5 434 /* item_number_t = symbol_number_t.
5fbb0954 435 But the former needs to contain more: negative rule numbers. */
a49aecd5 436 ritem[itemno++] = symbol_number_as_item_number (p->sym->number);
1ff442ca
NF
437 /* A rule gets by default the precedence and associativity
438 of the last token in it. */
d7020c20 439 if (p->sym->class == token_sym)
03b31c0c 440 rules[ruleno].prec = p->sym;
a70083a3
AD
441 if (p)
442 p = p->next;
1ff442ca
NF
443 }
444
445 /* If this rule has a %prec,
a70083a3 446 the specified symbol's precedence replaces the default. */
1ff442ca
NF
447 if (ruleprec)
448 {
03b31c0c
AD
449 rules[ruleno].precsym = ruleprec;
450 rules[ruleno].prec = ruleprec;
1ff442ca 451 }
4b3d3a8e 452 ritem[itemno++] = rule_number_as_item_number (ruleno);
f3849179 453 ++ruleno;
1ff442ca 454
a70083a3
AD
455 if (p)
456 p = p->next;
1ff442ca
NF
457 }
458
5123689b 459 assert (itemno == nritems);
3067fbef 460
273a74fa 461 if (trace_flag & trace_sets)
3067fbef 462 ritem_print (stderr);
1ff442ca 463}
a70083a3 464\f
fdbcd8e2
AD
465/*------------------------------------------------------------------.
466| Read in the grammar specification and record it in the format |
467| described in gram.h. All actions are copied into ACTION_OBSTACK, |
468| in each case forming the body of a C function (YYACTION) which |
469| contains a switch statement to decide which action to execute. |
470`------------------------------------------------------------------*/
a70083a3
AD
471
472void
473reader (void)
474{
e9955c83 475 gram_control_t gram_control;
a70083a3
AD
476
477 /* Initialize the symbol table. */
db8837cb 478 symbols_new ();
b6610515 479
88bce5a2
AD
480 /* Construct the accept symbol. */
481 accept = symbol_get ("$accept", empty_location);
482 accept->class = nterm_sym;
483 accept->number = nvars++;
30171f79 484
a70083a3 485 /* Construct the error token */
39f41916 486 errtoken = symbol_get ("error", empty_location);
d7020c20 487 errtoken->class = token_sym;
72a23c97 488 errtoken->number = ntokens++;
b6610515 489
a70083a3
AD
490 /* Construct a token that represents all undefined literal tokens.
491 It is always token number 2. */
88bce5a2 492 undeftoken = symbol_get ("$undefined", empty_location);
d7020c20 493 undeftoken->class = token_sym;
72a23c97 494 undeftoken->number = ntokens++;
a70083a3 495
331dbc1b 496 /* Initialize the obstacks. */
0dd1580a
RA
497 obstack_init (&pre_prologue_obstack);
498 obstack_init (&post_prologue_obstack);
331dbc1b
AD
499
500 finput = xfopen (infile, "r");
e9955c83
AD
501 gram_in = finput;
502
473d0a75
AD
503 gram__flex_debug = trace_flag & trace_scan;
504 gram_debug = trace_flag & trace_parse;
1d6412ad 505 scanner_initialize ();
e9955c83 506 gram_parse (&gram_control);
331dbc1b 507
b275314e
AD
508 /* If something went wrong during the parsing, don't try to
509 continue. */
b4afb6bb 510 if (complaint_issued)
f956c304 511 return;
b275314e 512
e9955c83
AD
513 /* Grammar has been read. Do some checking */
514 if (nrules == 0)
515 fatal (_("no rules in the input grammar"));
516
517 /* Report any undefined symbols and consider them nonterminals. */
518 symbols_check_defined ();
b7c49edf 519
88bce5a2
AD
520 /* If the user did not define her ENDTOKEN, do it now. */
521 if (!endtoken)
b7c49edf 522 {
88bce5a2
AD
523 endtoken = symbol_get ("$end", empty_location);
524 endtoken->class = token_sym;
525 endtoken->number = 0;
b7c49edf 526 /* Value specified by POSIX. */
88bce5a2 527 endtoken->user_token_number = 0;
b7c49edf
AD
528 }
529
e9955c83
AD
530 /* Insert the initial rule, which line is that of the first rule
531 (not that of the start symbol):
532
88bce5a2 533 accept: %start EOF. */
e9955c83 534 {
88bce5a2 535 symbol_list_t *p = symbol_list_new (accept, empty_location);
8efe435c
AD
536 p->location = grammar->location;
537 p->next = symbol_list_new (startsymbol, empty_location);
88bce5a2 538 p->next->next = symbol_list_new (endtoken, empty_location);
8efe435c 539 p->next->next->next = symbol_list_new (NULL, empty_location);
e9955c83
AD
540 p->next->next->next->next = grammar;
541 nrules += 1;
542 nritems += 3;
543 grammar = p;
544 }
545
242a6e48 546 if (SYMBOL_NUMBER_MAX < nsyms)
e9955c83 547 fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
242a6e48 548 SYMBOL_NUMBER_MAX);
e9955c83
AD
549
550 assert (nsyms == ntokens + nvars);
b0c4483e 551
331dbc1b
AD
552 xfclose (finput);
553
a70083a3
AD
554 /* Assign the symbols their symbol numbers. Write #defines for the
555 token symbols into FDEFINES if requested. */
2f1afb73 556 symbols_pack ();
93ede233 557
a70083a3
AD
558 /* Convert the grammar into the format described in gram.h. */
559 packgram ();
8419d367 560
56c47203
AD
561 /* The grammar as a symbol_list_t is no longer needed. */
562 LIST_FREE (symbol_list_t, grammar);
a70083a3 563}