]> git.saurik.com Git - bison.git/blame - src/reader.c
* data/yacc.c, data/glr.c, data/lal1.cc: Use similar code to
[bison.git] / src / reader.c
CommitLineData
35dcf428 1/* Input parser for Bison
9c4637fa 2
a737b216 3 Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000, 2001, 2002, 2003
a70083a3 4 Free Software Foundation, Inc.
1ff442ca 5
41aca2e0 6 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 7
41aca2e0
AD
8 Bison is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
1ff442ca 12
41aca2e0
AD
13 Bison is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
1ff442ca 17
41aca2e0
AD
18 You should have received a copy of the GNU General Public License
19 along with Bison; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
1ff442ca 22
1ff442ca 23#include "system.h"
17ee7397
PE
24
25#include <quotearg.h>
26
27#include "complain.h"
28#include "conflicts.h"
1ff442ca 29#include "files.h"
17ee7397 30#include "getargs.h"
1ff442ca 31#include "gram.h"
17ee7397 32#include "muscle_tab.h"
6c89f1c1 33#include "output.h"
b2ca4022 34#include "reader.h"
17ee7397
PE
35#include "symlist.h"
36#include "symtab.h"
1ff442ca 37
17ee7397 38static symbol_list *grammar = NULL;
280a38c3 39static int start_flag = 0;
676385e2 40merger_list *merge_functions;
1ff442ca 41
d7020c20 42/* Nonzero if %union has been seen. */
e9955c83 43int typed = 0;
0d533154 44\f
e9955c83
AD
45/*-----------------------.
46| Set the start symbol. |
47`-----------------------*/
1ff442ca 48
e9955c83 49void
a737b216 50grammar_start_symbol_set (symbol *sym, location loc)
1ff442ca
NF
51{
52 if (start_flag)
17ee7397 53 complain_at (loc, _("multiple %s declarations"), "%start");
943819bf
RS
54 else
55 {
56 start_flag = 1;
a737b216 57 startsymbol = sym;
17ee7397 58 startsymbol_location = loc;
943819bf 59 }
1ff442ca
NF
60}
61
1ff442ca 62
d7020c20 63/*----------------------------------------------------------------.
e9955c83
AD
64| There are two prologues: one before %union, one after. Augment |
65| the current one. |
d7020c20 66`----------------------------------------------------------------*/
1ff442ca 67
e9955c83 68void
17ee7397 69prologue_augment (const char *prologue, location loc)
b6610515 70{
e9955c83
AD
71 struct obstack *oout =
72 !typed ? &pre_prologue_obstack : &post_prologue_obstack;
b6610515 73
17ee7397
PE
74 obstack_fgrow1 (oout, "]b4_syncline([[%d]], [[", loc.start.line);
75 MUSCLE_OBSTACK_SGROW (oout,
76 quotearg_style (c_quoting_style, loc.start.file));
6c239755 77 obstack_sgrow (oout, "]])[\n");
e9955c83 78 obstack_sgrow (oout, prologue);
b6610515
RA
79}
80
2ba3b73c 81
426cf563 82
a870c567 83
e9955c83
AD
84/*----------------------.
85| Handle the epilogue. |
86`----------------------*/
426cf563 87
e9955c83 88void
17ee7397 89epilogue_augment (const char *epilogue, location loc)
2ba3b73c 90{
7ec2d4cd 91 char *extension = NULL;
17ee7397 92 obstack_fgrow1 (&muscle_obstack, "]b4_syncline([[%d]], [[", loc.start.line);
6c239755 93 MUSCLE_OBSTACK_SGROW (&muscle_obstack,
17ee7397 94 quotearg_style (c_quoting_style, loc.start.file));
6c239755 95 obstack_sgrow (&muscle_obstack, "]])[\n");
592e8d4d
AD
96 obstack_sgrow (&muscle_obstack, epilogue);
97 obstack_1grow (&muscle_obstack, 0);
7ec2d4cd
AD
98 extension = obstack_finish (&muscle_obstack);
99 muscle_grow ("epilogue", extension, "");
100 obstack_free (&muscle_obstack, extension);
1ff442ca 101}
1ff442ca 102
a70083a3 103
a70083a3
AD
104\f
105
3e6656f9 106/*-------------------------------------------------------------------.
676385e2
PH
107| Return the merger index for a merging function named NAME, whose |
108| arguments have type TYPE. Records the function, if new, in |
95612cfa 109| MERGER_LIST. |
676385e2
PH
110`-------------------------------------------------------------------*/
111
112static int
17ee7397 113get_merge_function (uniqstr name, uniqstr type, location loc)
676385e2
PH
114{
115 merger_list *syms;
116 merger_list head;
117 int n;
118
119 if (! glr_parser)
120 return 0;
121
122 if (type == NULL)
17ee7397 123 type = uniqstr_new ("");
676385e2
PH
124
125 head.next = merge_functions;
39f41916 126 for (syms = &head, n = 1; syms->next != NULL; syms = syms->next, n += 1)
17ee7397 127 if (UNIQSTR_EQ (name, syms->next->name))
676385e2 128 break;
a5d50994
AD
129 if (syms->next == NULL)
130 {
9c4637fa 131 MALLOC (syms->next, 1);
17ee7397
PE
132 syms->next->name = uniqstr_new (name);
133 syms->next->type = uniqstr_new (type);
a5d50994
AD
134 syms->next->next = NULL;
135 merge_functions = head.next;
136 }
17ee7397 137 else if (!UNIQSTR_EQ (type, syms->next->type))
45a8a65d
PE
138 warn_at (loc, _("result type clash on merge function %s: <%s> != <%s>"),
139 name, type, syms->next->type);
676385e2
PH
140 return n;
141}
142
143/*--------------------------------------.
144| Free all merge-function definitions. |
145`--------------------------------------*/
146
147void
148free_merger_functions (void)
149{
150 merger_list *L0;
151 if (! glr_parser)
152 return;
153 L0 = merge_functions;
154 while (L0 != NULL)
155 {
156 merger_list *L1 = L0->next;
157 free (L0);
158 L0 = L1;
159 }
160}
161
a70083a3 162\f
107f7dfb 163/*-------------------------------------------------------------------.
17ee7397 164| Parse the input grammar into a one symbol_list structure. Each |
107f7dfb
AD
165| rule is represented by a sequence of symbols: the left hand side |
166| followed by the contents of the right hand side, followed by a |
167| null pointer instead of a symbol to terminate the rule. The next |
168| symbol is the lhs of the following rule. |
169| |
fdbcd8e2
AD
170| All actions are copied out, labelled by the rule number they apply |
171| to. |
107f7dfb
AD
172| |
173| Bison used to allow some %directives in the rules sections, but |
174| this is no longer consider appropriate: (i) the documented grammar |
175| doesn't claim it, (ii), it would promote bad style, (iii), error |
176| recovery for %directives consists in skipping the junk until a `%' |
177| is seen and helrp synchronizing. This scheme is definitely wrong |
178| in the rules section. |
179`-------------------------------------------------------------------*/
1ff442ca 180
f6d0f937 181/* The (currently) last symbol of GRAMMAR. */
17ee7397 182symbol_list *grammar_end = NULL;
f6d0f937 183
52328c6e 184/* Append SYM to the grammar. */
e9955c83 185void
17ee7397 186grammar_symbol_append (symbol *sym, location loc)
f6d0f937 187{
17ee7397 188 symbol_list *p = symbol_list_new (sym, loc);
f6d0f937
AD
189
190 if (grammar_end)
191 grammar_end->next = p;
192 else
193 grammar = p;
194
195 grammar_end = p;
196}
197
8efe435c
AD
198/* The rule currently being defined, and the previous rule.
199 CURRENT_RULE points to the first LHS of the current rule, while
200 PREVIOUS_RULE_END points to the *end* of the previous rule (NULL). */
17ee7397
PE
201symbol_list *current_rule = NULL;
202symbol_list *previous_rule_end = NULL;
da4160c3
AD
203
204
8efe435c
AD
205/*----------------------------------------------.
206| Create a new rule for LHS in to the GRAMMAR. |
207`----------------------------------------------*/
da4160c3 208
e9955c83 209void
17ee7397 210grammar_rule_begin (symbol *lhs, location loc)
da4160c3
AD
211{
212 if (!start_flag)
213 {
214 startsymbol = lhs;
17ee7397 215 startsymbol_location = loc;
da4160c3
AD
216 start_flag = 1;
217 }
218
219 /* Start a new rule and record its lhs. */
220 ++nrules;
221 ++nritems;
222
8efe435c 223 previous_rule_end = grammar_end;
17ee7397 224 grammar_symbol_append (lhs, loc);
da4160c3
AD
225 current_rule = grammar_end;
226
227 /* Mark the rule's lhs as a nonterminal if not already so. */
228
229 if (lhs->class == unknown_sym)
230 {
231 lhs->class = nterm_sym;
232 lhs->number = nvars;
233 ++nvars;
234 }
235 else if (lhs->class == token_sym)
17ee7397 236 complain_at (loc, _("rule given for %s, which is a token"), lhs->tag);
da4160c3
AD
237}
238
e9955c83
AD
239/* Check that the last rule (CURRENT_RULE) is properly defined. For
240 instance, there should be no type clash on the default action. */
241
242static void
243grammar_current_rule_check (void)
244{
17ee7397 245 symbol *lhs = current_rule->sym;
3f4c0f80 246 char const *lhs_type = lhs->type_name;
17ee7397 247 symbol *first_rhs = current_rule->next->sym;
e9955c83
AD
248
249 /* If there is an action, then there is nothing we can do: the user
3f4c0f80 250 is allowed to shoot herself in the foot. */
e9955c83
AD
251 if (current_rule->action)
252 return;
253
3f4c0f80
PE
254 /* Don't worry about the default action if $$ is untyped, since $$'s
255 value can't be used. */
256 if (! lhs_type)
257 return;
258
259 /* If $$ is being set in default way, report if any type mismatch. */
e9955c83
AD
260 if (first_rhs)
261 {
e9955c83 262 const char *rhs_type = first_rhs->type_name ? first_rhs->type_name : "";
17ee7397 263 if (!UNIQSTR_EQ (lhs_type, rhs_type))
e9273511
PE
264 warn_at (current_rule->location,
265 _("type clash on default action: <%s> != <%s>"),
266 lhs_type, rhs_type);
e9955c83
AD
267 }
268 /* Warn if there is no default for $$ but we need one. */
269 else
e9273511
PE
270 warn_at (current_rule->location,
271 _("empty rule for typed nonterminal, and no action"));
e9955c83
AD
272}
273
274
8efe435c
AD
275/*-------------------------------------.
276| End the currently being grown rule. |
277`-------------------------------------*/
e9955c83
AD
278
279void
17ee7397 280grammar_rule_end (location loc)
e9955c83
AD
281{
282 /* Put an empty link in the list to mark the end of this rule */
8efe435c 283 grammar_symbol_append (NULL, grammar_end->location);
17ee7397 284 current_rule->location = loc;
e9955c83
AD
285 grammar_current_rule_check ();
286}
287
288
8efe435c
AD
289/*-------------------------------------------------------------------.
290| The previous action turns out the be a mid-rule action. Attach it |
291| to the current rule, i.e., create a dummy symbol, attach it this |
292| mid-rule action, and append this dummy nonterminal to the current |
293| rule. |
294`-------------------------------------------------------------------*/
1485e106 295
e9955c83 296void
1485e106
AD
297grammar_midrule_action (void)
298{
299 /* Since the action was written out with this rule's number, we must
300 give the new rule this number by inserting the new rule before
301 it. */
302
8efe435c
AD
303 /* Make a DUMMY nonterminal, whose location is that of the midrule
304 action. Create the MIDRULE. */
17ee7397
PE
305 location dummy_location = current_rule->action_location;
306 symbol *dummy = dummy_symbol_get (dummy_location);
307 symbol_list *midrule = symbol_list_new (dummy, dummy_location);
1485e106
AD
308
309 /* Make a new rule, whose body is empty, before the current one, so
310 that the action just read can belong to it. */
311 ++nrules;
312 ++nritems;
8efe435c
AD
313 /* Attach its location and actions to that of the DUMMY. */
314 midrule->location = dummy_location;
315 midrule->action = current_rule->action;
316 midrule->action_location = dummy_location;
1485e106
AD
317 current_rule->action = NULL;
318
8efe435c
AD
319 if (previous_rule_end)
320 previous_rule_end->next = midrule;
1485e106 321 else
8efe435c 322 grammar = midrule;
1485e106 323
8efe435c
AD
324 /* End the dummy's rule. */
325 previous_rule_end = symbol_list_new (NULL, dummy_location);
326 previous_rule_end->next = current_rule;
1485e106 327
8efe435c 328 midrule->next = previous_rule_end;
1485e106 329
8efe435c
AD
330 /* Insert the dummy nonterminal replacing the midrule action into
331 the current rule. */
332 grammar_current_rule_symbol_append (dummy, dummy_location);
1485e106
AD
333}
334
9af3fbce
AD
335/* Set the precedence symbol of the current rule to PRECSYM. */
336
e9955c83 337void
17ee7397 338grammar_current_rule_prec_set (symbol *precsym, location loc)
9af3fbce
AD
339{
340 if (current_rule->ruleprec)
17ee7397 341 complain_at (loc, _("only one %s allowed per rule"), "%prec");
9af3fbce
AD
342 current_rule->ruleprec = precsym;
343}
344
676385e2
PH
345/* Attach dynamic precedence DPREC to the current rule. */
346
347void
17ee7397 348grammar_current_rule_dprec_set (int dprec, location loc)
676385e2
PH
349{
350 if (! glr_parser)
17ee7397 351 warn_at (loc, _("%s affects only GLR parsers"), "%dprec");
676385e2 352 if (dprec <= 0)
17ee7397 353 complain_at (loc, _("%s must be followed by positive number"), "%dprec");
39f41916 354 else if (current_rule->dprec != 0)
17ee7397 355 complain_at (loc, _("only one %s allowed per rule"), "%dprec");
676385e2
PH
356 current_rule->dprec = dprec;
357}
358
359/* Attach a merge function NAME with argument type TYPE to current
360 rule. */
361
362void
17ee7397 363grammar_current_rule_merge_set (uniqstr name, location loc)
676385e2
PH
364{
365 if (! glr_parser)
17ee7397 366 warn_at (loc, _("%s affects only GLR parsers"), "%merge");
39f41916 367 if (current_rule->merger != 0)
17ee7397 368 complain_at (loc, _("only one %s allowed per rule"), "%merge");
39f41916 369 current_rule->merger =
17ee7397 370 get_merge_function (name, current_rule->sym->type_name, loc);
676385e2
PH
371}
372
17ee7397 373/* Attach SYM to the current rule. If needed, move the previous
2e047461
AD
374 action as a mid-rule action. */
375
e9955c83 376void
17ee7397 377grammar_current_rule_symbol_append (symbol *sym, location loc)
2e047461
AD
378{
379 if (current_rule->action)
380 grammar_midrule_action ();
381 ++nritems;
17ee7397 382 grammar_symbol_append (sym, loc);
2e047461
AD
383}
384
2e047461
AD
385/* Attach an ACTION to the current rule. If needed, move the previous
386 action as a mid-rule action. */
387
e9955c83 388void
17ee7397 389grammar_current_rule_action_append (const char *action, location loc)
2e047461
AD
390{
391 if (current_rule->action)
392 grammar_midrule_action ();
393 current_rule->action = action;
17ee7397 394 current_rule->action_location = loc;
2e047461
AD
395}
396
a70083a3 397\f
a70083a3
AD
398/*---------------------------------------------------------------.
399| Convert the rules into the representation using RRHS, RLHS and |
d9b739c3 400| RITEM. |
a70083a3 401`---------------------------------------------------------------*/
1ff442ca 402
4a120d45 403static void
118fb205 404packgram (void)
1ff442ca 405{
9222837b 406 unsigned int itemno = 0;
17ee7397
PE
407 rule_number ruleno = 0;
408 symbol_list *p = grammar;
1ff442ca 409
9c4637fa
PE
410 CALLOC (ritem, nritems);
411 CALLOC (rules, nrules);
1ff442ca 412
1ff442ca
NF
413 while (p)
414 {
17ee7397 415 symbol *ruleprec = p->ruleprec;
d7e1f00c 416 rules[ruleno].user_number = ruleno;
c3b407f4 417 rules[ruleno].number = ruleno;
bba97eb2 418 rules[ruleno].lhs = p->sym;
99013900 419 rules[ruleno].rhs = ritem + itemno;
8efe435c 420 rules[ruleno].location = p->location;
b4afb6bb 421 rules[ruleno].useful = true;
1a2b5d37 422 rules[ruleno].action = p->action;
8efe435c 423 rules[ruleno].action_location = p->action_location;
676385e2
PH
424 rules[ruleno].dprec = p->dprec;
425 rules[ruleno].merger = p->merger;
1ff442ca
NF
426
427 p = p->next;
428 while (p && p->sym)
429 {
17ee7397 430 /* item_number = symbol_number.
5fbb0954 431 But the former needs to contain more: negative rule numbers. */
a49aecd5 432 ritem[itemno++] = symbol_number_as_item_number (p->sym->number);
1ff442ca
NF
433 /* A rule gets by default the precedence and associativity
434 of the last token in it. */
d7020c20 435 if (p->sym->class == token_sym)
03b31c0c 436 rules[ruleno].prec = p->sym;
a70083a3
AD
437 if (p)
438 p = p->next;
1ff442ca
NF
439 }
440
441 /* If this rule has a %prec,
a70083a3 442 the specified symbol's precedence replaces the default. */
1ff442ca
NF
443 if (ruleprec)
444 {
03b31c0c
AD
445 rules[ruleno].precsym = ruleprec;
446 rules[ruleno].prec = ruleprec;
1ff442ca 447 }
4b3d3a8e 448 ritem[itemno++] = rule_number_as_item_number (ruleno);
f3849179 449 ++ruleno;
1ff442ca 450
a70083a3
AD
451 if (p)
452 p = p->next;
1ff442ca
NF
453 }
454
35dcf428
PE
455 if (itemno != nritems)
456 abort ();
3067fbef 457
273a74fa 458 if (trace_flag & trace_sets)
3067fbef 459 ritem_print (stderr);
1ff442ca 460}
a70083a3 461\f
fdbcd8e2
AD
462/*------------------------------------------------------------------.
463| Read in the grammar specification and record it in the format |
464| described in gram.h. All actions are copied into ACTION_OBSTACK, |
465| in each case forming the body of a C function (YYACTION) which |
466| contains a switch statement to decide which action to execute. |
467`------------------------------------------------------------------*/
a70083a3
AD
468
469void
470reader (void)
471{
a70083a3 472 /* Initialize the symbol table. */
db8837cb 473 symbols_new ();
b6610515 474
88bce5a2
AD
475 /* Construct the accept symbol. */
476 accept = symbol_get ("$accept", empty_location);
477 accept->class = nterm_sym;
478 accept->number = nvars++;
30171f79 479
a70083a3 480 /* Construct the error token */
39f41916 481 errtoken = symbol_get ("error", empty_location);
d7020c20 482 errtoken->class = token_sym;
72a23c97 483 errtoken->number = ntokens++;
b6610515 484
a70083a3
AD
485 /* Construct a token that represents all undefined literal tokens.
486 It is always token number 2. */
88bce5a2 487 undeftoken = symbol_get ("$undefined", empty_location);
d7020c20 488 undeftoken->class = token_sym;
72a23c97 489 undeftoken->number = ntokens++;
a70083a3 490
331dbc1b 491 /* Initialize the obstacks. */
0dd1580a
RA
492 obstack_init (&pre_prologue_obstack);
493 obstack_init (&post_prologue_obstack);
331dbc1b 494
95612cfa 495 finput = xfopen (grammar_file, "r");
e9955c83
AD
496 gram_in = finput;
497
473d0a75
AD
498 gram__flex_debug = trace_flag & trace_scan;
499 gram_debug = trace_flag & trace_parse;
1d6412ad 500 scanner_initialize ();
78c3da9e 501 gram_parse ();
331dbc1b 502
b275314e
AD
503 /* If something went wrong during the parsing, don't try to
504 continue. */
b4afb6bb 505 if (complaint_issued)
f956c304 506 return;
b275314e 507
e9955c83
AD
508 /* Grammar has been read. Do some checking */
509 if (nrules == 0)
510 fatal (_("no rules in the input grammar"));
511
512 /* Report any undefined symbols and consider them nonterminals. */
513 symbols_check_defined ();
b7c49edf 514
88bce5a2
AD
515 /* If the user did not define her ENDTOKEN, do it now. */
516 if (!endtoken)
b7c49edf 517 {
88bce5a2
AD
518 endtoken = symbol_get ("$end", empty_location);
519 endtoken->class = token_sym;
520 endtoken->number = 0;
b7c49edf 521 /* Value specified by POSIX. */
88bce5a2 522 endtoken->user_token_number = 0;
b7c49edf
AD
523 }
524
e9955c83
AD
525 /* Insert the initial rule, which line is that of the first rule
526 (not that of the start symbol):
527
88bce5a2 528 accept: %start EOF. */
e9955c83 529 {
17ee7397 530 symbol_list *p = symbol_list_new (accept, empty_location);
8efe435c
AD
531 p->location = grammar->location;
532 p->next = symbol_list_new (startsymbol, empty_location);
88bce5a2 533 p->next->next = symbol_list_new (endtoken, empty_location);
8efe435c 534 p->next->next->next = symbol_list_new (NULL, empty_location);
e9955c83
AD
535 p->next->next->next->next = grammar;
536 nrules += 1;
537 nritems += 3;
538 grammar = p;
539 }
540
17ee7397 541 if (! (nsyms <= SYMBOL_NUMBER_MAXIMUM && nsyms == ntokens + nvars))
35dcf428 542 abort ();
b0c4483e 543
331dbc1b
AD
544 xfclose (finput);
545
a70083a3
AD
546 /* Assign the symbols their symbol numbers. Write #defines for the
547 token symbols into FDEFINES if requested. */
2f1afb73 548 symbols_pack ();
93ede233 549
a70083a3
AD
550 /* Convert the grammar into the format described in gram.h. */
551 packgram ();
8419d367 552
17ee7397
PE
553 /* The grammar as a symbol_list is no longer needed. */
554 LIST_FREE (symbol_list, grammar);
a70083a3 555}