]> git.saurik.com Git - bison.git/blame - src/reader.c
* tests/calc.at (_AT_DATA_CALC_Y): Initialize the whole initial
[bison.git] / src / reader.c
CommitLineData
35dcf428 1/* Input parser for Bison
9c4637fa 2
05ac60f3 3 Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000, 2001, 2002, 2003,
378f4bd8 4 2005, 2006 Free Software Foundation, Inc.
1ff442ca 5
41aca2e0 6 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 7
41aca2e0
AD
8 Bison is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
1ff442ca 12
41aca2e0
AD
13 Bison is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
1ff442ca 17
41aca2e0
AD
18 You should have received a copy of the GNU General Public License
19 along with Bison; see the file COPYING. If not, write to
0fb669f9
PE
20 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 Boston, MA 02110-1301, USA. */
1ff442ca 22
2cec9080 23#include <config.h>
1ff442ca 24#include "system.h"
17ee7397
PE
25
26#include <quotearg.h>
27
28#include "complain.h"
29#include "conflicts.h"
1ff442ca 30#include "files.h"
17ee7397 31#include "getargs.h"
1ff442ca 32#include "gram.h"
17ee7397 33#include "muscle_tab.h"
6c89f1c1 34#include "output.h"
b2ca4022 35#include "reader.h"
17ee7397
PE
36#include "symlist.h"
37#include "symtab.h"
1ff442ca 38
17ee7397 39static symbol_list *grammar = NULL;
d0829076 40static bool start_flag = false;
676385e2 41merger_list *merge_functions;
1ff442ca 42
affac613 43/* Was %union seen? */
d0829076 44bool typed = false;
39a06c25
PE
45
46/* Should rules have a default precedence? */
47bool default_prec = true;
0d533154 48\f
e9955c83
AD
49/*-----------------------.
50| Set the start symbol. |
51`-----------------------*/
1ff442ca 52
e9955c83 53void
a737b216 54grammar_start_symbol_set (symbol *sym, location loc)
1ff442ca
NF
55{
56 if (start_flag)
17ee7397 57 complain_at (loc, _("multiple %s declarations"), "%start");
943819bf
RS
58 else
59 {
d0829076 60 start_flag = true;
a737b216 61 startsymbol = sym;
17ee7397 62 startsymbol_location = loc;
943819bf 63 }
1ff442ca
NF
64}
65
1ff442ca 66
d7020c20 67/*----------------------------------------------------------------.
e9955c83
AD
68| There are two prologues: one before %union, one after. Augment |
69| the current one. |
d7020c20 70`----------------------------------------------------------------*/
1ff442ca 71
e9955c83 72void
17ee7397 73prologue_augment (const char *prologue, location loc)
b6610515 74{
e9955c83
AD
75 struct obstack *oout =
76 !typed ? &pre_prologue_obstack : &post_prologue_obstack;
b6610515 77
05ac60f3 78 obstack_fgrow1 (oout, "]b4_syncline(%d, [[", loc.start.line);
17ee7397
PE
79 MUSCLE_OBSTACK_SGROW (oout,
80 quotearg_style (c_quoting_style, loc.start.file));
6c239755 81 obstack_sgrow (oout, "]])[\n");
e9955c83 82 obstack_sgrow (oout, prologue);
b6610515
RA
83}
84
a70083a3
AD
85\f
86
3e6656f9 87/*-------------------------------------------------------------------.
676385e2
PH
88| Return the merger index for a merging function named NAME, whose |
89| arguments have type TYPE. Records the function, if new, in |
95612cfa 90| MERGER_LIST. |
676385e2
PH
91`-------------------------------------------------------------------*/
92
93static int
17ee7397 94get_merge_function (uniqstr name, uniqstr type, location loc)
676385e2
PH
95{
96 merger_list *syms;
97 merger_list head;
98 int n;
99
100 if (! glr_parser)
101 return 0;
102
103 if (type == NULL)
17ee7397 104 type = uniqstr_new ("");
676385e2
PH
105
106 head.next = merge_functions;
affac613 107 for (syms = &head, n = 1; syms->next; syms = syms->next, n += 1)
17ee7397 108 if (UNIQSTR_EQ (name, syms->next->name))
676385e2 109 break;
a5d50994
AD
110 if (syms->next == NULL)
111 {
da2a7671 112 syms->next = xmalloc (sizeof syms->next[0]);
17ee7397
PE
113 syms->next->name = uniqstr_new (name);
114 syms->next->type = uniqstr_new (type);
a5d50994
AD
115 syms->next->next = NULL;
116 merge_functions = head.next;
117 }
17ee7397 118 else if (!UNIQSTR_EQ (type, syms->next->type))
45a8a65d
PE
119 warn_at (loc, _("result type clash on merge function %s: <%s> != <%s>"),
120 name, type, syms->next->type);
676385e2
PH
121 return n;
122}
123
124/*--------------------------------------.
125| Free all merge-function definitions. |
126`--------------------------------------*/
127
128void
129free_merger_functions (void)
130{
affac613
AD
131 merger_list *L0 = merge_functions;
132 while (L0)
676385e2
PH
133 {
134 merger_list *L1 = L0->next;
135 free (L0);
136 L0 = L1;
137 }
138}
139
a70083a3 140\f
107f7dfb 141/*-------------------------------------------------------------------.
17ee7397 142| Parse the input grammar into a one symbol_list structure. Each |
107f7dfb
AD
143| rule is represented by a sequence of symbols: the left hand side |
144| followed by the contents of the right hand side, followed by a |
145| null pointer instead of a symbol to terminate the rule. The next |
146| symbol is the lhs of the following rule. |
147| |
fdbcd8e2
AD
148| All actions are copied out, labelled by the rule number they apply |
149| to. |
107f7dfb 150`-------------------------------------------------------------------*/
1ff442ca 151
f6d0f937 152/* The (currently) last symbol of GRAMMAR. */
04098407 153static symbol_list *grammar_end = NULL;
f6d0f937 154
52328c6e 155/* Append SYM to the grammar. */
8f3596a6 156static void
17ee7397 157grammar_symbol_append (symbol *sym, location loc)
f6d0f937 158{
17ee7397 159 symbol_list *p = symbol_list_new (sym, loc);
f6d0f937
AD
160
161 if (grammar_end)
162 grammar_end->next = p;
163 else
164 grammar = p;
165
166 grammar_end = p;
8f3596a6 167
e3233bf6 168 /* A null SYM stands for an end of rule; it is not an actual
8f3596a6
AD
169 part of it. */
170 if (sym)
171 ++nritems;
f6d0f937
AD
172}
173
8efe435c
AD
174/* The rule currently being defined, and the previous rule.
175 CURRENT_RULE points to the first LHS of the current rule, while
176 PREVIOUS_RULE_END points to the *end* of the previous rule (NULL). */
17ee7397 177symbol_list *current_rule = NULL;
04098407 178static symbol_list *previous_rule_end = NULL;
da4160c3
AD
179
180
8efe435c
AD
181/*----------------------------------------------.
182| Create a new rule for LHS in to the GRAMMAR. |
183`----------------------------------------------*/
da4160c3 184
e9955c83 185void
8f3596a6 186grammar_current_rule_begin (symbol *lhs, location loc)
da4160c3
AD
187{
188 if (!start_flag)
189 {
190 startsymbol = lhs;
17ee7397 191 startsymbol_location = loc;
d0829076 192 start_flag = true;
da4160c3
AD
193 }
194
195 /* Start a new rule and record its lhs. */
196 ++nrules;
8efe435c 197 previous_rule_end = grammar_end;
17ee7397 198 grammar_symbol_append (lhs, loc);
da4160c3
AD
199 current_rule = grammar_end;
200
201 /* Mark the rule's lhs as a nonterminal if not already so. */
da4160c3
AD
202 if (lhs->class == unknown_sym)
203 {
204 lhs->class = nterm_sym;
205 lhs->number = nvars;
206 ++nvars;
207 }
208 else if (lhs->class == token_sym)
17ee7397 209 complain_at (loc, _("rule given for %s, which is a token"), lhs->tag);
da4160c3
AD
210}
211
affac613 212
84866159
AD
213/*-----------------------------------------------------------------.
214| A symbol is typed if it has a declared %type, or if it is a |
215| mid-rule symbol (i.e., the generated LHS replacing a mid-rule |
216| action) that was assigned to, as in `exp: { $$ = 1; } { $$ = $1; |
217| }'. |
218`-----------------------------------------------------------------*/
219
220static bool
221symbol_typed_p (const symbol_list *s)
222{
223 return (s->sym->type_name
6ec2c0f2 224 || s->midrule && s->midrule->used);
84866159
AD
225}
226
8f3596a6
AD
227/*----------------------------------------------------------------.
228| Check that the rule R is properly defined. For instance, there |
229| should be no type clash on the default action. |
230`----------------------------------------------------------------*/
e9955c83
AD
231
232static void
8f3596a6 233grammar_rule_check (const symbol_list *r)
e9955c83 234{
affac613 235 /* Type check.
e9955c83 236
affac613
AD
237 If there is an action, then there is nothing we can do: the user
238 is allowed to shoot herself in the foot.
3f4c0f80 239
affac613
AD
240 Don't worry about the default action if $$ is untyped, since $$'s
241 value can't be used. */
8f3596a6 242 if (!r->action && r->sym->type_name)
e9955c83 243 {
8f3596a6 244 symbol *first_rhs = r->next->sym;
affac613
AD
245 /* If $$ is being set in default way, report if any type mismatch. */
246 if (first_rhs)
247 {
8f3596a6 248 char const *lhs_type = r->sym->type_name;
affac613
AD
249 const char *rhs_type =
250 first_rhs->type_name ? first_rhs->type_name : "";
251 if (!UNIQSTR_EQ (lhs_type, rhs_type))
8f3596a6 252 warn_at (r->location,
affac613
AD
253 _("type clash on default action: <%s> != <%s>"),
254 lhs_type, rhs_type);
255 }
256 /* Warn if there is no default for $$ but we need one. */
257 else
8f3596a6 258 warn_at (r->location,
affac613
AD
259 _("empty rule for typed nonterminal, and no action"));
260 }
e3233bf6 261
8f3596a6
AD
262 /* Check that typed symbol values are used. */
263 {
668c5d19 264 symbol_list const *l = r;
8f3596a6
AD
265 int n = 0;
266 for (; l && l->sym; l = l->next, ++n)
267 if (! (l->used
84866159 268 || !symbol_typed_p (l)
8f3596a6 269 /* The default action, $$ = $1, `uses' both. */
668c5d19
PE
270 || (!r->action && (n == 0 || n == 1))))
271 {
272 if (n)
273 warn_at (r->location, _("unused value: $%d"), n);
274 else
275 warn_at (r->location, _("unset value: $$"));
276 }
8f3596a6 277 }
e9955c83
AD
278}
279
280
8efe435c
AD
281/*-------------------------------------.
282| End the currently being grown rule. |
283`-------------------------------------*/
e9955c83
AD
284
285void
8f3596a6 286grammar_current_rule_end (location loc)
e9955c83
AD
287{
288 /* Put an empty link in the list to mark the end of this rule */
8efe435c 289 grammar_symbol_append (NULL, grammar_end->location);
17ee7397 290 current_rule->location = loc;
8f3596a6 291 grammar_rule_check (current_rule);
e9955c83
AD
292}
293
294
8efe435c
AD
295/*-------------------------------------------------------------------.
296| The previous action turns out the be a mid-rule action. Attach it |
297| to the current rule, i.e., create a dummy symbol, attach it this |
298| mid-rule action, and append this dummy nonterminal to the current |
299| rule. |
300`-------------------------------------------------------------------*/
1485e106 301
8f3596a6 302static void
1485e106
AD
303grammar_midrule_action (void)
304{
305 /* Since the action was written out with this rule's number, we must
306 give the new rule this number by inserting the new rule before
307 it. */
308
8efe435c
AD
309 /* Make a DUMMY nonterminal, whose location is that of the midrule
310 action. Create the MIDRULE. */
17ee7397
PE
311 location dummy_location = current_rule->action_location;
312 symbol *dummy = dummy_symbol_get (dummy_location);
313 symbol_list *midrule = symbol_list_new (dummy, dummy_location);
1485e106
AD
314
315 /* Make a new rule, whose body is empty, before the current one, so
316 that the action just read can belong to it. */
317 ++nrules;
318 ++nritems;
8efe435c
AD
319 /* Attach its location and actions to that of the DUMMY. */
320 midrule->location = dummy_location;
321 midrule->action = current_rule->action;
322 midrule->action_location = dummy_location;
1485e106 323 current_rule->action = NULL;
378f4bd8
AD
324 /* If $$ was used in the action, the LHS of the enclosing rule was
325 incorrectly flagged as used. */
326 midrule->used = current_rule->used;
327 current_rule->used = false;
1485e106 328
8efe435c
AD
329 if (previous_rule_end)
330 previous_rule_end->next = midrule;
1485e106 331 else
8efe435c 332 grammar = midrule;
1485e106 333
8efe435c 334 /* End the dummy's rule. */
84866159
AD
335 midrule->next = symbol_list_new (NULL, dummy_location);
336 grammar_rule_check (midrule);
337 midrule->next->next = current_rule;
1485e106 338
84866159 339 previous_rule_end = midrule->next;
1485e106 340
8efe435c 341 /* Insert the dummy nonterminal replacing the midrule action into
84866159 342 the current rule. Bind it to its dedicated rule. */
8efe435c 343 grammar_current_rule_symbol_append (dummy, dummy_location);
6ec2c0f2 344 grammar_end->midrule = midrule;
1485e106
AD
345}
346
9af3fbce
AD
347/* Set the precedence symbol of the current rule to PRECSYM. */
348
e9955c83 349void
17ee7397 350grammar_current_rule_prec_set (symbol *precsym, location loc)
9af3fbce
AD
351{
352 if (current_rule->ruleprec)
17ee7397 353 complain_at (loc, _("only one %s allowed per rule"), "%prec");
9af3fbce
AD
354 current_rule->ruleprec = precsym;
355}
356
676385e2
PH
357/* Attach dynamic precedence DPREC to the current rule. */
358
359void
17ee7397 360grammar_current_rule_dprec_set (int dprec, location loc)
676385e2
PH
361{
362 if (! glr_parser)
17ee7397 363 warn_at (loc, _("%s affects only GLR parsers"), "%dprec");
676385e2 364 if (dprec <= 0)
17ee7397 365 complain_at (loc, _("%s must be followed by positive number"), "%dprec");
39f41916 366 else if (current_rule->dprec != 0)
17ee7397 367 complain_at (loc, _("only one %s allowed per rule"), "%dprec");
676385e2
PH
368 current_rule->dprec = dprec;
369}
370
371/* Attach a merge function NAME with argument type TYPE to current
372 rule. */
373
374void
17ee7397 375grammar_current_rule_merge_set (uniqstr name, location loc)
676385e2
PH
376{
377 if (! glr_parser)
17ee7397 378 warn_at (loc, _("%s affects only GLR parsers"), "%merge");
39f41916 379 if (current_rule->merger != 0)
17ee7397 380 complain_at (loc, _("only one %s allowed per rule"), "%merge");
39f41916 381 current_rule->merger =
17ee7397 382 get_merge_function (name, current_rule->sym->type_name, loc);
676385e2
PH
383}
384
17ee7397 385/* Attach SYM to the current rule. If needed, move the previous
2e047461
AD
386 action as a mid-rule action. */
387
e9955c83 388void
17ee7397 389grammar_current_rule_symbol_append (symbol *sym, location loc)
2e047461
AD
390{
391 if (current_rule->action)
392 grammar_midrule_action ();
17ee7397 393 grammar_symbol_append (sym, loc);
2e047461
AD
394}
395
2e047461
AD
396/* Attach an ACTION to the current rule. If needed, move the previous
397 action as a mid-rule action. */
398
e9955c83 399void
17ee7397 400grammar_current_rule_action_append (const char *action, location loc)
2e047461
AD
401{
402 if (current_rule->action)
403 grammar_midrule_action ();
404 current_rule->action = action;
17ee7397 405 current_rule->action_location = loc;
2e047461
AD
406}
407
a70083a3 408\f
a70083a3
AD
409/*---------------------------------------------------------------.
410| Convert the rules into the representation using RRHS, RLHS and |
d9b739c3 411| RITEM. |
a70083a3 412`---------------------------------------------------------------*/
1ff442ca 413
4a120d45 414static void
118fb205 415packgram (void)
1ff442ca 416{
9222837b 417 unsigned int itemno = 0;
17ee7397
PE
418 rule_number ruleno = 0;
419 symbol_list *p = grammar;
1ff442ca 420
e9ad4aec
PE
421 ritem = xnmalloc (nritems + 1, sizeof *ritem);
422
423 /* This sentinel is used by build_relations in gram.c. */
424 *ritem++ = 0;
425
da2a7671 426 rules = xnmalloc (nrules, sizeof *rules);
1ff442ca 427
1ff442ca
NF
428 while (p)
429 {
17ee7397 430 symbol *ruleprec = p->ruleprec;
d7e1f00c 431 rules[ruleno].user_number = ruleno;
c3b407f4 432 rules[ruleno].number = ruleno;
bba97eb2 433 rules[ruleno].lhs = p->sym;
99013900 434 rules[ruleno].rhs = ritem + itemno;
da2a7671
PE
435 rules[ruleno].prec = NULL;
436 rules[ruleno].dprec = p->dprec;
437 rules[ruleno].merger = p->merger;
438 rules[ruleno].precsym = NULL;
8efe435c 439 rules[ruleno].location = p->location;
b4afb6bb 440 rules[ruleno].useful = true;
1a2b5d37 441 rules[ruleno].action = p->action;
8efe435c 442 rules[ruleno].action_location = p->action_location;
1ff442ca
NF
443
444 p = p->next;
445 while (p && p->sym)
446 {
17ee7397 447 /* item_number = symbol_number.
5fbb0954 448 But the former needs to contain more: negative rule numbers. */
a49aecd5 449 ritem[itemno++] = symbol_number_as_item_number (p->sym->number);
1ff442ca
NF
450 /* A rule gets by default the precedence and associativity
451 of the last token in it. */
39a06c25 452 if (p->sym->class == token_sym && default_prec)
03b31c0c 453 rules[ruleno].prec = p->sym;
a70083a3
AD
454 if (p)
455 p = p->next;
1ff442ca
NF
456 }
457
458 /* If this rule has a %prec,
a70083a3 459 the specified symbol's precedence replaces the default. */
1ff442ca
NF
460 if (ruleprec)
461 {
03b31c0c
AD
462 rules[ruleno].precsym = ruleprec;
463 rules[ruleno].prec = ruleprec;
1ff442ca 464 }
4b3d3a8e 465 ritem[itemno++] = rule_number_as_item_number (ruleno);
f3849179 466 ++ruleno;
1ff442ca 467
a70083a3
AD
468 if (p)
469 p = p->next;
1ff442ca
NF
470 }
471
35dcf428
PE
472 if (itemno != nritems)
473 abort ();
3067fbef 474
273a74fa 475 if (trace_flag & trace_sets)
3067fbef 476 ritem_print (stderr);
1ff442ca 477}
a70083a3 478\f
fdbcd8e2
AD
479/*------------------------------------------------------------------.
480| Read in the grammar specification and record it in the format |
481| described in gram.h. All actions are copied into ACTION_OBSTACK, |
482| in each case forming the body of a C function (YYACTION) which |
483| contains a switch statement to decide which action to execute. |
484`------------------------------------------------------------------*/
a70083a3
AD
485
486void
487reader (void)
488{
a70083a3 489 /* Initialize the symbol table. */
db8837cb 490 symbols_new ();
b6610515 491
88bce5a2
AD
492 /* Construct the accept symbol. */
493 accept = symbol_get ("$accept", empty_location);
494 accept->class = nterm_sym;
495 accept->number = nvars++;
30171f79 496
a70083a3 497 /* Construct the error token */
39f41916 498 errtoken = symbol_get ("error", empty_location);
d7020c20 499 errtoken->class = token_sym;
72a23c97 500 errtoken->number = ntokens++;
b6610515 501
a70083a3
AD
502 /* Construct a token that represents all undefined literal tokens.
503 It is always token number 2. */
88bce5a2 504 undeftoken = symbol_get ("$undefined", empty_location);
d7020c20 505 undeftoken->class = token_sym;
72a23c97 506 undeftoken->number = ntokens++;
a70083a3 507
331dbc1b 508 /* Initialize the obstacks. */
0dd1580a
RA
509 obstack_init (&pre_prologue_obstack);
510 obstack_init (&post_prologue_obstack);
331dbc1b 511
2b81e969 512 gram_in = xfopen (grammar_file, "r");
e9955c83 513
473d0a75
AD
514 gram__flex_debug = trace_flag & trace_scan;
515 gram_debug = trace_flag & trace_parse;
1d6412ad 516 scanner_initialize ();
78c3da9e 517 gram_parse ();
331dbc1b 518
b275314e
AD
519 /* If something went wrong during the parsing, don't try to
520 continue. */
b4afb6bb 521 if (complaint_issued)
f956c304 522 return;
b275314e 523
e9955c83
AD
524 /* Grammar has been read. Do some checking */
525 if (nrules == 0)
526 fatal (_("no rules in the input grammar"));
527
528 /* Report any undefined symbols and consider them nonterminals. */
529 symbols_check_defined ();
b7c49edf 530
88bce5a2
AD
531 /* If the user did not define her ENDTOKEN, do it now. */
532 if (!endtoken)
b7c49edf 533 {
88bce5a2
AD
534 endtoken = symbol_get ("$end", empty_location);
535 endtoken->class = token_sym;
536 endtoken->number = 0;
b7c49edf 537 /* Value specified by POSIX. */
88bce5a2 538 endtoken->user_token_number = 0;
b7c49edf
AD
539 }
540
e9955c83
AD
541 /* Insert the initial rule, which line is that of the first rule
542 (not that of the start symbol):
543
88bce5a2 544 accept: %start EOF. */
e9955c83 545 {
17ee7397 546 symbol_list *p = symbol_list_new (accept, empty_location);
8efe435c
AD
547 p->location = grammar->location;
548 p->next = symbol_list_new (startsymbol, empty_location);
88bce5a2 549 p->next->next = symbol_list_new (endtoken, empty_location);
8efe435c 550 p->next->next->next = symbol_list_new (NULL, empty_location);
e9955c83
AD
551 p->next->next->next->next = grammar;
552 nrules += 1;
553 nritems += 3;
554 grammar = p;
555 }
556
17ee7397 557 if (! (nsyms <= SYMBOL_NUMBER_MAXIMUM && nsyms == ntokens + nvars))
35dcf428 558 abort ();
b0c4483e 559
2b81e969 560 xfclose (gram_in);
331dbc1b 561
a70083a3
AD
562 /* Assign the symbols their symbol numbers. Write #defines for the
563 token symbols into FDEFINES if requested. */
2f1afb73 564 symbols_pack ();
93ede233 565
a70083a3
AD
566 /* Convert the grammar into the format described in gram.h. */
567 packgram ();
8419d367 568
17ee7397
PE
569 /* The grammar as a symbol_list is no longer needed. */
570 LIST_FREE (symbol_list, grammar);
a70083a3 571}