]> git.saurik.com Git - bison.git/blame - src/reader.c
* src/print.c (state_default_rule_compute): New, extracted from...
[bison.git] / src / reader.c
CommitLineData
1ff442ca 1/* Input parser for bison
76514394 2 Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000, 2001, 2002
a70083a3 3 Free Software Foundation, Inc.
1ff442ca 4
41aca2e0 5 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 6
41aca2e0
AD
7 Bison is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
1ff442ca 11
41aca2e0
AD
12 Bison is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
1ff442ca 16
41aca2e0
AD
17 You should have received a copy of the GNU General Public License
18 along with Bison; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
1ff442ca
NF
21
22
1ff442ca 23#include "system.h"
2a91a95e
AD
24#include "quotearg.h"
25#include "quote.h"
ceed8467 26#include "getargs.h"
1ff442ca 27#include "files.h"
1ff442ca 28#include "symtab.h"
56c47203 29#include "symlist.h"
1ff442ca 30#include "gram.h"
a0f6b076 31#include "complain.h"
6c89f1c1 32#include "output.h"
b2ca4022 33#include "reader.h"
340ef489 34#include "conflicts.h"
11d82f03 35#include "muscle_tab.h"
1ff442ca 36
1ff442ca 37int lineno;
56c47203 38static symbol_list_t *grammar = NULL;
280a38c3 39static int start_flag = 0;
676385e2 40merger_list *merge_functions;
1ff442ca 41
d7020c20 42/* Nonzero if %union has been seen. */
e9955c83 43int typed = 0;
0d533154 44\f
e9955c83
AD
45/*-----------------------.
46| Set the start symbol. |
47`-----------------------*/
1ff442ca 48
e9955c83 49void
8efe435c 50grammar_start_symbol_set (symbol_t *s, location_t l)
1ff442ca
NF
51{
52 if (start_flag)
e776192e 53 complain_at (l, _("multiple %s declarations"), "%start");
943819bf
RS
54 else
55 {
56 start_flag = 1;
e9955c83 57 startsymbol = s;
8efe435c 58 startsymbol_location = l;
943819bf 59 }
1ff442ca
NF
60}
61
1ff442ca 62
d7020c20 63/*----------------------------------------------------------------.
e9955c83
AD
64| There are two prologues: one before %union, one after. Augment |
65| the current one. |
d7020c20 66`----------------------------------------------------------------*/
1ff442ca 67
e9955c83 68void
0c15323d 69prologue_augment (const char *prologue, location_t location)
b6610515 70{
e9955c83
AD
71 struct obstack *oout =
72 !typed ? &pre_prologue_obstack : &post_prologue_obstack;
b6610515 73
e9955c83 74 if (!no_lines_flag)
b6610515 75 {
e9955c83 76 obstack_fgrow2 (oout, muscle_find ("linef"),
0c15323d
AD
77 location.first_line,
78 quotearg_style (c_quoting_style,
79 muscle_find ("filename")));
b6610515 80 }
e9955c83 81 obstack_sgrow (oout, prologue);
b6610515
RA
82}
83
2ba3b73c 84
426cf563 85
a870c567 86
e9955c83
AD
87/*----------------------.
88| Handle the epilogue. |
89`----------------------*/
426cf563 90
e9955c83 91void
0c15323d 92epilogue_set (const char *epilogue, location_t location)
2ba3b73c 93{
e9955c83 94 if (!no_lines_flag)
1ff442ca 95 {
592e8d4d 96 obstack_fgrow2 (&muscle_obstack, muscle_find ("linef"),
0c15323d
AD
97 location.first_line,
98 quotearg_style (c_quoting_style,
99 muscle_find ("filename")));
1ff442ca 100 }
592e8d4d
AD
101 obstack_sgrow (&muscle_obstack, epilogue);
102 obstack_1grow (&muscle_obstack, 0);
103 muscle_insert ("epilogue", obstack_finish (&muscle_obstack));
1ff442ca 104}
1ff442ca 105
a70083a3 106
a70083a3
AD
107\f
108
676385e2
PH
109 /*-------------------------------------------------------------------.
110| Return the merger index for a merging function named NAME, whose |
111| arguments have type TYPE. Records the function, if new, in |
112| merger_list. |
113`-------------------------------------------------------------------*/
114
115static int
116get_merge_function (const char* name, const char* type)
117{
118 merger_list *syms;
119 merger_list head;
120 int n;
121
122 if (! glr_parser)
123 return 0;
124
125 if (type == NULL)
126 type = "";
127
128 head.next = merge_functions;
39f41916 129 for (syms = &head, n = 1; syms->next != NULL; syms = syms->next, n += 1)
676385e2
PH
130 if (strcmp (name, syms->next->name) == 0)
131 break;
132 if (syms->next == NULL) {
133 syms->next = XMALLOC (merger_list, 1);
134 syms->next->name = strdup (name);
135 syms->next->type = strdup (type);
136 syms->next->next = NULL;
137 merge_functions = head.next;
138 } else if (strcmp (type, syms->next->type) != 0)
39f41916 139 warn (_("result type clash on merge function %s: `%s' vs. `%s'"),
676385e2
PH
140 name, type, syms->next->type);
141 return n;
142}
143
144/*--------------------------------------.
145| Free all merge-function definitions. |
146`--------------------------------------*/
147
148void
149free_merger_functions (void)
150{
151 merger_list *L0;
152 if (! glr_parser)
153 return;
154 L0 = merge_functions;
155 while (L0 != NULL)
156 {
157 merger_list *L1 = L0->next;
158 free (L0);
159 L0 = L1;
160 }
161}
162
a70083a3 163\f
107f7dfb 164/*-------------------------------------------------------------------.
32e1e0a4 165| Parse the input grammar into a one symbol_list_t structure. Each |
107f7dfb
AD
166| rule is represented by a sequence of symbols: the left hand side |
167| followed by the contents of the right hand side, followed by a |
168| null pointer instead of a symbol to terminate the rule. The next |
169| symbol is the lhs of the following rule. |
170| |
fdbcd8e2
AD
171| All actions are copied out, labelled by the rule number they apply |
172| to. |
107f7dfb
AD
173| |
174| Bison used to allow some %directives in the rules sections, but |
175| this is no longer consider appropriate: (i) the documented grammar |
176| doesn't claim it, (ii), it would promote bad style, (iii), error |
177| recovery for %directives consists in skipping the junk until a `%' |
178| is seen and helrp synchronizing. This scheme is definitely wrong |
179| in the rules section. |
180`-------------------------------------------------------------------*/
1ff442ca 181
f6d0f937 182/* The (currently) last symbol of GRAMMAR. */
56c47203 183symbol_list_t *grammar_end = NULL;
f6d0f937
AD
184
185/* Append S to the GRAMMAR. */
e9955c83 186void
8efe435c 187grammar_symbol_append (symbol_t *symbol, location_t location)
f6d0f937 188{
56c47203 189 symbol_list_t *p = symbol_list_new (symbol, location);
f6d0f937
AD
190
191 if (grammar_end)
192 grammar_end->next = p;
193 else
194 grammar = p;
195
196 grammar_end = p;
197}
198
8efe435c
AD
199/* The rule currently being defined, and the previous rule.
200 CURRENT_RULE points to the first LHS of the current rule, while
201 PREVIOUS_RULE_END points to the *end* of the previous rule (NULL). */
56c47203
AD
202symbol_list_t *current_rule = NULL;
203symbol_list_t *previous_rule_end = NULL;
da4160c3
AD
204
205
8efe435c
AD
206/*----------------------------------------------.
207| Create a new rule for LHS in to the GRAMMAR. |
208`----------------------------------------------*/
da4160c3 209
e9955c83 210void
8efe435c 211grammar_rule_begin (symbol_t *lhs, location_t location)
da4160c3
AD
212{
213 if (!start_flag)
214 {
215 startsymbol = lhs;
8efe435c 216 startsymbol_location = location;
da4160c3
AD
217 start_flag = 1;
218 }
219
220 /* Start a new rule and record its lhs. */
221 ++nrules;
222 ++nritems;
223
8efe435c
AD
224 previous_rule_end = grammar_end;
225 grammar_symbol_append (lhs, location);
da4160c3
AD
226 current_rule = grammar_end;
227
228 /* Mark the rule's lhs as a nonterminal if not already so. */
229
230 if (lhs->class == unknown_sym)
231 {
232 lhs->class = nterm_sym;
233 lhs->number = nvars;
234 ++nvars;
235 }
236 else if (lhs->class == token_sym)
e776192e 237 complain_at (location, _("rule given for %s, which is a token"), lhs->tag);
da4160c3
AD
238}
239
e9955c83
AD
240/* Check that the last rule (CURRENT_RULE) is properly defined. For
241 instance, there should be no type clash on the default action. */
242
243static void
244grammar_current_rule_check (void)
245{
246 symbol_t *lhs = current_rule->sym;
247 symbol_t *first_rhs = current_rule->next->sym;
248
249 /* If there is an action, then there is nothing we can do: the user
250 is allowed to shoot in her foot. */
251 if (current_rule->action)
252 return;
253
254 /* If $$ is being set in default way, report if any type mismatch.
255 */
256 if (first_rhs)
257 {
258 const char *lhs_type = lhs->type_name ? lhs->type_name : "";
259 const char *rhs_type = first_rhs->type_name ? first_rhs->type_name : "";
260 if (strcmp (lhs_type, rhs_type))
e776192e
AD
261 complain_at (current_rule->location,
262 _("type clash (`%s' `%s') on default action"),
263 lhs_type, rhs_type);
e9955c83
AD
264 }
265 /* Warn if there is no default for $$ but we need one. */
266 else
267 {
268 if (lhs->type_name)
e776192e
AD
269 complain_at (current_rule->location,
270 _("empty rule for typed nonterminal, and no action"));
e9955c83
AD
271 }
272}
273
274
8efe435c
AD
275/*-------------------------------------.
276| End the currently being grown rule. |
277`-------------------------------------*/
e9955c83
AD
278
279void
8efe435c 280grammar_rule_end (location_t location)
e9955c83
AD
281{
282 /* Put an empty link in the list to mark the end of this rule */
8efe435c
AD
283 grammar_symbol_append (NULL, grammar_end->location);
284 current_rule->location = location;
e9955c83
AD
285 grammar_current_rule_check ();
286}
287
288
8efe435c
AD
289/*-------------------------------------------------------------------.
290| The previous action turns out the be a mid-rule action. Attach it |
291| to the current rule, i.e., create a dummy symbol, attach it this |
292| mid-rule action, and append this dummy nonterminal to the current |
293| rule. |
294`-------------------------------------------------------------------*/
1485e106 295
e9955c83 296void
1485e106
AD
297grammar_midrule_action (void)
298{
299 /* Since the action was written out with this rule's number, we must
300 give the new rule this number by inserting the new rule before
301 it. */
302
8efe435c
AD
303 /* Make a DUMMY nonterminal, whose location is that of the midrule
304 action. Create the MIDRULE. */
8efe435c 305 location_t dummy_location = current_rule->action_location;
39f41916 306 symbol_t *dummy = dummy_symbol_get (dummy_location);
56c47203 307 symbol_list_t *midrule = symbol_list_new (dummy, dummy_location);
1485e106
AD
308
309 /* Make a new rule, whose body is empty, before the current one, so
310 that the action just read can belong to it. */
311 ++nrules;
312 ++nritems;
8efe435c
AD
313 /* Attach its location and actions to that of the DUMMY. */
314 midrule->location = dummy_location;
315 midrule->action = current_rule->action;
316 midrule->action_location = dummy_location;
1485e106
AD
317 current_rule->action = NULL;
318
8efe435c
AD
319 if (previous_rule_end)
320 previous_rule_end->next = midrule;
1485e106 321 else
8efe435c 322 grammar = midrule;
1485e106 323
8efe435c
AD
324 /* End the dummy's rule. */
325 previous_rule_end = symbol_list_new (NULL, dummy_location);
326 previous_rule_end->next = current_rule;
1485e106 327
8efe435c 328 midrule->next = previous_rule_end;
1485e106 329
8efe435c
AD
330 /* Insert the dummy nonterminal replacing the midrule action into
331 the current rule. */
332 grammar_current_rule_symbol_append (dummy, dummy_location);
1485e106
AD
333}
334
9af3fbce
AD
335/* Set the precedence symbol of the current rule to PRECSYM. */
336
e9955c83 337void
e776192e 338grammar_current_rule_prec_set (symbol_t *precsym, location_t location)
9af3fbce
AD
339{
340 if (current_rule->ruleprec)
e776192e 341 complain_at (location, _("two @prec's in a row"));
9af3fbce
AD
342 current_rule->ruleprec = precsym;
343}
344
676385e2
PH
345/* Attach dynamic precedence DPREC to the current rule. */
346
347void
348grammar_current_rule_dprec_set (int dprec, location_t location)
349{
350 if (! glr_parser)
351 warn_at (location, _("%%dprec affects only GLR parsers"));
352 if (dprec <= 0)
353 complain_at (location, _("%%dprec must be followed by positive number"));
39f41916 354 else if (current_rule->dprec != 0)
676385e2
PH
355 complain_at (location, _("only one %%dprec allowed per rule"));
356 current_rule->dprec = dprec;
357}
358
359/* Attach a merge function NAME with argument type TYPE to current
360 rule. */
361
362void
363grammar_current_rule_merge_set (const char* name, location_t location)
364{
365 if (! glr_parser)
366 warn_at (location, _("%%merge affects only GLR parsers"));
39f41916 367 if (current_rule->merger != 0)
676385e2 368 complain_at (location, _("only one %%merge allowed per rule"));
39f41916 369 current_rule->merger =
676385e2
PH
370 get_merge_function (name, current_rule->sym->type_name);
371}
372
2e047461
AD
373/* Attach a SYMBOL to the current rule. If needed, move the previous
374 action as a mid-rule action. */
375
e9955c83 376void
8efe435c 377grammar_current_rule_symbol_append (symbol_t *symbol, location_t location)
2e047461
AD
378{
379 if (current_rule->action)
380 grammar_midrule_action ();
381 ++nritems;
8efe435c 382 grammar_symbol_append (symbol, location);
2e047461
AD
383}
384
2e047461
AD
385/* Attach an ACTION to the current rule. If needed, move the previous
386 action as a mid-rule action. */
387
e9955c83 388void
8efe435c 389grammar_current_rule_action_append (const char *action, location_t location)
2e047461
AD
390{
391 if (current_rule->action)
392 grammar_midrule_action ();
393 current_rule->action = action;
8efe435c 394 current_rule->action_location = location;
2e047461
AD
395}
396
a70083a3 397\f
a70083a3
AD
398/*---------------------------------------------------------------.
399| Convert the rules into the representation using RRHS, RLHS and |
d9b739c3 400| RITEM. |
a70083a3 401`---------------------------------------------------------------*/
1ff442ca 402
4a120d45 403static void
118fb205 404packgram (void)
1ff442ca 405{
9222837b
AD
406 unsigned int itemno = 0;
407 rule_number_t ruleno = 1;
408 symbol_list_t *p = grammar;
1ff442ca 409
a900a624 410 ritem = XCALLOC (item_number_t, nritems);
1a2b5d37 411 rules = XCALLOC (rule_t, nrules) - 1;
1ff442ca 412
1ff442ca
NF
413 while (p)
414 {
db8837cb 415 symbol_t *ruleprec = p->ruleprec;
d7e1f00c 416 rules[ruleno].user_number = ruleno;
c3b407f4 417 rules[ruleno].number = ruleno;
bba97eb2 418 rules[ruleno].lhs = p->sym;
99013900 419 rules[ruleno].rhs = ritem + itemno;
8efe435c 420 rules[ruleno].location = p->location;
1a2b5d37
AD
421 rules[ruleno].useful = TRUE;
422 rules[ruleno].action = p->action;
8efe435c 423 rules[ruleno].action_location = p->action_location;
676385e2
PH
424 rules[ruleno].dprec = p->dprec;
425 rules[ruleno].merger = p->merger;
1ff442ca
NF
426
427 p = p->next;
428 while (p && p->sym)
429 {
a49aecd5 430 /* item_number_t = symbol_number_t.
5fbb0954 431 But the former needs to contain more: negative rule numbers. */
a49aecd5 432 ritem[itemno++] = symbol_number_as_item_number (p->sym->number);
1ff442ca
NF
433 /* A rule gets by default the precedence and associativity
434 of the last token in it. */
d7020c20 435 if (p->sym->class == token_sym)
03b31c0c 436 rules[ruleno].prec = p->sym;
a70083a3
AD
437 if (p)
438 p = p->next;
1ff442ca
NF
439 }
440
441 /* If this rule has a %prec,
a70083a3 442 the specified symbol's precedence replaces the default. */
1ff442ca
NF
443 if (ruleprec)
444 {
03b31c0c
AD
445 rules[ruleno].precsym = ruleprec;
446 rules[ruleno].prec = ruleprec;
1ff442ca 447 }
1ff442ca 448 ritem[itemno++] = -ruleno;
f3849179 449 ++ruleno;
1ff442ca 450
a70083a3
AD
451 if (p)
452 p = p->next;
1ff442ca
NF
453 }
454
5123689b 455 assert (itemno == nritems);
3067fbef
AD
456
457 if (trace_flag)
458 ritem_print (stderr);
1ff442ca 459}
a70083a3 460\f
fdbcd8e2
AD
461/*------------------------------------------------------------------.
462| Read in the grammar specification and record it in the format |
463| described in gram.h. All actions are copied into ACTION_OBSTACK, |
464| in each case forming the body of a C function (YYACTION) which |
465| contains a switch statement to decide which action to execute. |
466`------------------------------------------------------------------*/
a70083a3
AD
467
468void
469reader (void)
470{
e9955c83 471 gram_control_t gram_control;
a70083a3
AD
472 lineno = 1;
473
474 /* Initialize the symbol table. */
db8837cb 475 symbols_new ();
b6610515 476
30171f79 477 /* Construct the axiom symbol. */
39f41916 478 axiom = symbol_get ("$axiom", empty_location);
30171f79 479 axiom->class = nterm_sym;
d9b739c3 480 axiom->number = nvars++;
30171f79 481
a70083a3 482 /* Construct the error token */
39f41916 483 errtoken = symbol_get ("error", empty_location);
d7020c20 484 errtoken->class = token_sym;
72a23c97 485 errtoken->number = ntokens++;
b6610515 486
a70083a3
AD
487 /* Construct a token that represents all undefined literal tokens.
488 It is always token number 2. */
39f41916 489 undeftoken = symbol_get ("$undefined.", empty_location);
d7020c20 490 undeftoken->class = token_sym;
72a23c97 491 undeftoken->number = ntokens++;
a70083a3 492
331dbc1b 493 /* Initialize the obstacks. */
0dd1580a
RA
494 obstack_init (&pre_prologue_obstack);
495 obstack_init (&post_prologue_obstack);
331dbc1b
AD
496
497 finput = xfopen (infile, "r");
e9955c83
AD
498 gram_in = finput;
499
500 gram_debug = !!getenv ("parse");
501 gram__flex_debug = !!getenv ("scan");
1d6412ad 502 scanner_initialize ();
e9955c83 503 gram_parse (&gram_control);
331dbc1b 504
e9955c83
AD
505 /* Grammar has been read. Do some checking */
506 if (nrules == 0)
507 fatal (_("no rules in the input grammar"));
508
509 /* Report any undefined symbols and consider them nonterminals. */
510 symbols_check_defined ();
b7c49edf
AD
511
512 /* If the user did not define her EOFTOKEN, do it now. */
513 if (!eoftoken)
514 {
39f41916 515 eoftoken = symbol_get ("$", empty_location);
b7c49edf 516 eoftoken->class = token_sym;
72a23c97 517 eoftoken->number = 0;
b7c49edf
AD
518 /* Value specified by POSIX. */
519 eoftoken->user_token_number = 0;
520 }
521
e9955c83
AD
522 /* Insert the initial rule, which line is that of the first rule
523 (not that of the start symbol):
524
525 axiom: %start EOF. */
526 {
56c47203 527 symbol_list_t *p = symbol_list_new (axiom, empty_location);
8efe435c
AD
528 p->location = grammar->location;
529 p->next = symbol_list_new (startsymbol, empty_location);
530 p->next->next = symbol_list_new (eoftoken, empty_location);
531 p->next->next->next = symbol_list_new (NULL, empty_location);
e9955c83
AD
532 p->next->next->next->next = grammar;
533 nrules += 1;
534 nritems += 3;
535 grammar = p;
536 }
537
538 if (nsyms > SHRT_MAX)
539 fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
540 SHRT_MAX);
541
542 assert (nsyms == ntokens + nvars);
b0c4483e 543
331dbc1b
AD
544 xfclose (finput);
545
a70083a3
AD
546 /* Assign the symbols their symbol numbers. Write #defines for the
547 token symbols into FDEFINES if requested. */
2f1afb73 548 symbols_pack ();
93ede233 549
a70083a3
AD
550 /* Convert the grammar into the format described in gram.h. */
551 packgram ();
8419d367 552
56c47203
AD
553 /* The grammar as a symbol_list_t is no longer needed. */
554 LIST_FREE (symbol_list_t, grammar);
a70083a3 555}