]> git.saurik.com Git - bison.git/blame - src/reader.c
* data/glr.c (yyreportSyntaxError): Fix off-by-one error in
[bison.git] / src / reader.c
CommitLineData
35dcf428 1/* Input parser for Bison
9c4637fa 2
05ac60f3 3 Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000, 2001, 2002, 2003,
378f4bd8 4 2005, 2006 Free Software Foundation, Inc.
1ff442ca 5
41aca2e0 6 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 7
41aca2e0
AD
8 Bison is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
1ff442ca 12
41aca2e0
AD
13 Bison is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
1ff442ca 17
41aca2e0
AD
18 You should have received a copy of the GNU General Public License
19 along with Bison; see the file COPYING. If not, write to
0fb669f9
PE
20 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 Boston, MA 02110-1301, USA. */
1ff442ca 22
2cec9080 23#include <config.h>
1ff442ca 24#include "system.h"
17ee7397
PE
25
26#include <quotearg.h>
27
28#include "complain.h"
29#include "conflicts.h"
1ff442ca 30#include "files.h"
17ee7397 31#include "getargs.h"
1ff442ca 32#include "gram.h"
17ee7397 33#include "muscle_tab.h"
b2ca4022 34#include "reader.h"
17ee7397
PE
35#include "symlist.h"
36#include "symtab.h"
1ff442ca 37
02d12d0d
PE
38static void check_and_convert_grammar (void);
39
17ee7397 40static symbol_list *grammar = NULL;
d0829076 41static bool start_flag = false;
676385e2 42merger_list *merge_functions;
1ff442ca 43
affac613 44/* Was %union seen? */
d0829076 45bool typed = false;
39a06c25
PE
46
47/* Should rules have a default precedence? */
48bool default_prec = true;
0d533154 49\f
e9955c83
AD
50/*-----------------------.
51| Set the start symbol. |
52`-----------------------*/
1ff442ca 53
e9955c83 54void
a737b216 55grammar_start_symbol_set (symbol *sym, location loc)
1ff442ca
NF
56{
57 if (start_flag)
17ee7397 58 complain_at (loc, _("multiple %s declarations"), "%start");
943819bf
RS
59 else
60 {
d0829076 61 start_flag = true;
a737b216 62 startsymbol = sym;
17ee7397 63 startsymbol_location = loc;
943819bf 64 }
1ff442ca
NF
65}
66
1ff442ca 67
d7020c20 68/*----------------------------------------------------------------.
e9955c83
AD
69| There are two prologues: one before %union, one after. Augment |
70| the current one. |
d7020c20 71`----------------------------------------------------------------*/
1ff442ca 72
e9955c83 73void
17ee7397 74prologue_augment (const char *prologue, location loc)
b6610515 75{
e9955c83
AD
76 struct obstack *oout =
77 !typed ? &pre_prologue_obstack : &post_prologue_obstack;
b6610515 78
05ac60f3 79 obstack_fgrow1 (oout, "]b4_syncline(%d, [[", loc.start.line);
17ee7397
PE
80 MUSCLE_OBSTACK_SGROW (oout,
81 quotearg_style (c_quoting_style, loc.start.file));
6c239755 82 obstack_sgrow (oout, "]])[\n");
e9955c83 83 obstack_sgrow (oout, prologue);
b6610515
RA
84}
85
a70083a3
AD
86\f
87
3e6656f9 88/*-------------------------------------------------------------------.
676385e2
PH
89| Return the merger index for a merging function named NAME, whose |
90| arguments have type TYPE. Records the function, if new, in |
95612cfa 91| MERGER_LIST. |
676385e2
PH
92`-------------------------------------------------------------------*/
93
94static int
17ee7397 95get_merge_function (uniqstr name, uniqstr type, location loc)
676385e2
PH
96{
97 merger_list *syms;
98 merger_list head;
99 int n;
100
101 if (! glr_parser)
102 return 0;
103
104 if (type == NULL)
17ee7397 105 type = uniqstr_new ("");
676385e2
PH
106
107 head.next = merge_functions;
affac613 108 for (syms = &head, n = 1; syms->next; syms = syms->next, n += 1)
17ee7397 109 if (UNIQSTR_EQ (name, syms->next->name))
676385e2 110 break;
a5d50994
AD
111 if (syms->next == NULL)
112 {
da2a7671 113 syms->next = xmalloc (sizeof syms->next[0]);
17ee7397
PE
114 syms->next->name = uniqstr_new (name);
115 syms->next->type = uniqstr_new (type);
a5d50994
AD
116 syms->next->next = NULL;
117 merge_functions = head.next;
118 }
17ee7397 119 else if (!UNIQSTR_EQ (type, syms->next->type))
45a8a65d
PE
120 warn_at (loc, _("result type clash on merge function %s: <%s> != <%s>"),
121 name, type, syms->next->type);
676385e2
PH
122 return n;
123}
124
125/*--------------------------------------.
126| Free all merge-function definitions. |
127`--------------------------------------*/
128
129void
130free_merger_functions (void)
131{
affac613
AD
132 merger_list *L0 = merge_functions;
133 while (L0)
676385e2
PH
134 {
135 merger_list *L1 = L0->next;
136 free (L0);
137 L0 = L1;
138 }
139}
140
a70083a3 141\f
107f7dfb 142/*-------------------------------------------------------------------.
17ee7397 143| Parse the input grammar into a one symbol_list structure. Each |
107f7dfb
AD
144| rule is represented by a sequence of symbols: the left hand side |
145| followed by the contents of the right hand side, followed by a |
146| null pointer instead of a symbol to terminate the rule. The next |
147| symbol is the lhs of the following rule. |
148| |
fdbcd8e2
AD
149| All actions are copied out, labelled by the rule number they apply |
150| to. |
107f7dfb 151`-------------------------------------------------------------------*/
1ff442ca 152
f6d0f937 153/* The (currently) last symbol of GRAMMAR. */
04098407 154static symbol_list *grammar_end = NULL;
f6d0f937 155
52328c6e 156/* Append SYM to the grammar. */
8f3596a6 157static void
17ee7397 158grammar_symbol_append (symbol *sym, location loc)
f6d0f937 159{
17ee7397 160 symbol_list *p = symbol_list_new (sym, loc);
f6d0f937
AD
161
162 if (grammar_end)
163 grammar_end->next = p;
164 else
165 grammar = p;
166
167 grammar_end = p;
8f3596a6 168
e3233bf6 169 /* A null SYM stands for an end of rule; it is not an actual
8f3596a6
AD
170 part of it. */
171 if (sym)
172 ++nritems;
f6d0f937
AD
173}
174
8efe435c
AD
175/* The rule currently being defined, and the previous rule.
176 CURRENT_RULE points to the first LHS of the current rule, while
177 PREVIOUS_RULE_END points to the *end* of the previous rule (NULL). */
17ee7397 178symbol_list *current_rule = NULL;
04098407 179static symbol_list *previous_rule_end = NULL;
da4160c3
AD
180
181
8efe435c
AD
182/*----------------------------------------------.
183| Create a new rule for LHS in to the GRAMMAR. |
184`----------------------------------------------*/
da4160c3 185
e9955c83 186void
8f3596a6 187grammar_current_rule_begin (symbol *lhs, location loc)
da4160c3
AD
188{
189 if (!start_flag)
190 {
191 startsymbol = lhs;
17ee7397 192 startsymbol_location = loc;
d0829076 193 start_flag = true;
da4160c3
AD
194 }
195
196 /* Start a new rule and record its lhs. */
197 ++nrules;
8efe435c 198 previous_rule_end = grammar_end;
17ee7397 199 grammar_symbol_append (lhs, loc);
da4160c3
AD
200 current_rule = grammar_end;
201
202 /* Mark the rule's lhs as a nonterminal if not already so. */
da4160c3
AD
203 if (lhs->class == unknown_sym)
204 {
205 lhs->class = nterm_sym;
206 lhs->number = nvars;
207 ++nvars;
208 }
209 else if (lhs->class == token_sym)
17ee7397 210 complain_at (loc, _("rule given for %s, which is a token"), lhs->tag);
da4160c3
AD
211}
212
affac613 213
d40ba6c2
PE
214/*----------------------------------------------------------------------.
215| A symbol should be used if it has a destructor, or if it is a |
216| mid-rule symbol (i.e., the generated LHS replacing a mid-rule |
217| action) that was assigned to, as in "exp: { $$ = 1; } { $$ = $1; }". |
218`----------------------------------------------------------------------*/
84866159
AD
219
220static bool
d40ba6c2 221symbol_should_be_used (symbol_list const *s)
84866159 222{
d40ba6c2 223 return (s->sym->destructor
c66dfadd 224 || (s->midrule && s->midrule->used));
84866159
AD
225}
226
8f3596a6
AD
227/*----------------------------------------------------------------.
228| Check that the rule R is properly defined. For instance, there |
229| should be no type clash on the default action. |
230`----------------------------------------------------------------*/
e9955c83
AD
231
232static void
8f3596a6 233grammar_rule_check (const symbol_list *r)
e9955c83 234{
affac613 235 /* Type check.
e9955c83 236
affac613
AD
237 If there is an action, then there is nothing we can do: the user
238 is allowed to shoot herself in the foot.
3f4c0f80 239
affac613
AD
240 Don't worry about the default action if $$ is untyped, since $$'s
241 value can't be used. */
8f3596a6 242 if (!r->action && r->sym->type_name)
e9955c83 243 {
8f3596a6 244 symbol *first_rhs = r->next->sym;
affac613
AD
245 /* If $$ is being set in default way, report if any type mismatch. */
246 if (first_rhs)
247 {
8f3596a6 248 char const *lhs_type = r->sym->type_name;
affac613
AD
249 const char *rhs_type =
250 first_rhs->type_name ? first_rhs->type_name : "";
251 if (!UNIQSTR_EQ (lhs_type, rhs_type))
8f3596a6 252 warn_at (r->location,
affac613
AD
253 _("type clash on default action: <%s> != <%s>"),
254 lhs_type, rhs_type);
255 }
256 /* Warn if there is no default for $$ but we need one. */
257 else
8f3596a6 258 warn_at (r->location,
affac613
AD
259 _("empty rule for typed nonterminal, and no action"));
260 }
e3233bf6 261
d40ba6c2 262 /* Check that symbol values that should be used are in fact used. */
8f3596a6 263 {
668c5d19 264 symbol_list const *l = r;
8f3596a6
AD
265 int n = 0;
266 for (; l && l->sym; l = l->next, ++n)
267 if (! (l->used
d40ba6c2 268 || !symbol_should_be_used (l)
8f3596a6 269 /* The default action, $$ = $1, `uses' both. */
668c5d19
PE
270 || (!r->action && (n == 0 || n == 1))))
271 {
272 if (n)
273 warn_at (r->location, _("unused value: $%d"), n);
274 else
275 warn_at (r->location, _("unset value: $$"));
276 }
8f3596a6 277 }
e9955c83
AD
278}
279
280
8efe435c
AD
281/*-------------------------------------.
282| End the currently being grown rule. |
283`-------------------------------------*/
e9955c83
AD
284
285void
8f3596a6 286grammar_current_rule_end (location loc)
e9955c83
AD
287{
288 /* Put an empty link in the list to mark the end of this rule */
8efe435c 289 grammar_symbol_append (NULL, grammar_end->location);
17ee7397 290 current_rule->location = loc;
8f3596a6 291 grammar_rule_check (current_rule);
e9955c83
AD
292}
293
294
8efe435c
AD
295/*-------------------------------------------------------------------.
296| The previous action turns out the be a mid-rule action. Attach it |
297| to the current rule, i.e., create a dummy symbol, attach it this |
298| mid-rule action, and append this dummy nonterminal to the current |
299| rule. |
300`-------------------------------------------------------------------*/
1485e106 301
6b702268 302void
1485e106
AD
303grammar_midrule_action (void)
304{
305 /* Since the action was written out with this rule's number, we must
306 give the new rule this number by inserting the new rule before
307 it. */
308
8efe435c
AD
309 /* Make a DUMMY nonterminal, whose location is that of the midrule
310 action. Create the MIDRULE. */
17ee7397
PE
311 location dummy_location = current_rule->action_location;
312 symbol *dummy = dummy_symbol_get (dummy_location);
313 symbol_list *midrule = symbol_list_new (dummy, dummy_location);
1485e106
AD
314
315 /* Make a new rule, whose body is empty, before the current one, so
316 that the action just read can belong to it. */
317 ++nrules;
318 ++nritems;
8efe435c
AD
319 /* Attach its location and actions to that of the DUMMY. */
320 midrule->location = dummy_location;
321 midrule->action = current_rule->action;
322 midrule->action_location = dummy_location;
1485e106 323 current_rule->action = NULL;
378f4bd8
AD
324 /* If $$ was used in the action, the LHS of the enclosing rule was
325 incorrectly flagged as used. */
326 midrule->used = current_rule->used;
327 current_rule->used = false;
1485e106 328
8efe435c
AD
329 if (previous_rule_end)
330 previous_rule_end->next = midrule;
1485e106 331 else
8efe435c 332 grammar = midrule;
1485e106 333
8efe435c 334 /* End the dummy's rule. */
84866159
AD
335 midrule->next = symbol_list_new (NULL, dummy_location);
336 grammar_rule_check (midrule);
337 midrule->next->next = current_rule;
1485e106 338
84866159 339 previous_rule_end = midrule->next;
1485e106 340
8efe435c 341 /* Insert the dummy nonterminal replacing the midrule action into
84866159 342 the current rule. Bind it to its dedicated rule. */
8efe435c 343 grammar_current_rule_symbol_append (dummy, dummy_location);
6ec2c0f2 344 grammar_end->midrule = midrule;
1485e106
AD
345}
346
9af3fbce
AD
347/* Set the precedence symbol of the current rule to PRECSYM. */
348
e9955c83 349void
17ee7397 350grammar_current_rule_prec_set (symbol *precsym, location loc)
9af3fbce
AD
351{
352 if (current_rule->ruleprec)
17ee7397 353 complain_at (loc, _("only one %s allowed per rule"), "%prec");
9af3fbce
AD
354 current_rule->ruleprec = precsym;
355}
356
676385e2
PH
357/* Attach dynamic precedence DPREC to the current rule. */
358
359void
17ee7397 360grammar_current_rule_dprec_set (int dprec, location loc)
676385e2
PH
361{
362 if (! glr_parser)
17ee7397 363 warn_at (loc, _("%s affects only GLR parsers"), "%dprec");
676385e2 364 if (dprec <= 0)
17ee7397 365 complain_at (loc, _("%s must be followed by positive number"), "%dprec");
39f41916 366 else if (current_rule->dprec != 0)
17ee7397 367 complain_at (loc, _("only one %s allowed per rule"), "%dprec");
676385e2
PH
368 current_rule->dprec = dprec;
369}
370
371/* Attach a merge function NAME with argument type TYPE to current
372 rule. */
373
374void
17ee7397 375grammar_current_rule_merge_set (uniqstr name, location loc)
676385e2
PH
376{
377 if (! glr_parser)
17ee7397 378 warn_at (loc, _("%s affects only GLR parsers"), "%merge");
39f41916 379 if (current_rule->merger != 0)
17ee7397 380 complain_at (loc, _("only one %s allowed per rule"), "%merge");
39f41916 381 current_rule->merger =
17ee7397 382 get_merge_function (name, current_rule->sym->type_name, loc);
676385e2
PH
383}
384
17ee7397 385/* Attach SYM to the current rule. If needed, move the previous
2e047461
AD
386 action as a mid-rule action. */
387
e9955c83 388void
17ee7397 389grammar_current_rule_symbol_append (symbol *sym, location loc)
2e047461
AD
390{
391 if (current_rule->action)
392 grammar_midrule_action ();
17ee7397 393 grammar_symbol_append (sym, loc);
2e047461
AD
394}
395
6b702268 396/* Attach an ACTION to the current rule. */
2e047461 397
e9955c83 398void
17ee7397 399grammar_current_rule_action_append (const char *action, location loc)
2e047461 400{
6b702268
PE
401 /* There's no need to invoke grammar_midrule_action here, since the
402 scanner already did it if necessary. */
2e047461 403 current_rule->action = action;
17ee7397 404 current_rule->action_location = loc;
2e047461
AD
405}
406
a70083a3 407\f
a70083a3
AD
408/*---------------------------------------------------------------.
409| Convert the rules into the representation using RRHS, RLHS and |
d9b739c3 410| RITEM. |
a70083a3 411`---------------------------------------------------------------*/
1ff442ca 412
4a120d45 413static void
118fb205 414packgram (void)
1ff442ca 415{
9222837b 416 unsigned int itemno = 0;
17ee7397
PE
417 rule_number ruleno = 0;
418 symbol_list *p = grammar;
1ff442ca 419
e9ad4aec
PE
420 ritem = xnmalloc (nritems + 1, sizeof *ritem);
421
422 /* This sentinel is used by build_relations in gram.c. */
423 *ritem++ = 0;
424
da2a7671 425 rules = xnmalloc (nrules, sizeof *rules);
1ff442ca 426
1ff442ca
NF
427 while (p)
428 {
17ee7397 429 symbol *ruleprec = p->ruleprec;
d7e1f00c 430 rules[ruleno].user_number = ruleno;
c3b407f4 431 rules[ruleno].number = ruleno;
bba97eb2 432 rules[ruleno].lhs = p->sym;
99013900 433 rules[ruleno].rhs = ritem + itemno;
da2a7671
PE
434 rules[ruleno].prec = NULL;
435 rules[ruleno].dprec = p->dprec;
436 rules[ruleno].merger = p->merger;
437 rules[ruleno].precsym = NULL;
8efe435c 438 rules[ruleno].location = p->location;
b4afb6bb 439 rules[ruleno].useful = true;
1a2b5d37 440 rules[ruleno].action = p->action;
8efe435c 441 rules[ruleno].action_location = p->action_location;
1ff442ca
NF
442
443 p = p->next;
444 while (p && p->sym)
445 {
17ee7397 446 /* item_number = symbol_number.
5fbb0954 447 But the former needs to contain more: negative rule numbers. */
a49aecd5 448 ritem[itemno++] = symbol_number_as_item_number (p->sym->number);
1ff442ca
NF
449 /* A rule gets by default the precedence and associativity
450 of the last token in it. */
39a06c25 451 if (p->sym->class == token_sym && default_prec)
03b31c0c 452 rules[ruleno].prec = p->sym;
a70083a3
AD
453 if (p)
454 p = p->next;
1ff442ca
NF
455 }
456
457 /* If this rule has a %prec,
a70083a3 458 the specified symbol's precedence replaces the default. */
1ff442ca
NF
459 if (ruleprec)
460 {
03b31c0c
AD
461 rules[ruleno].precsym = ruleprec;
462 rules[ruleno].prec = ruleprec;
1ff442ca 463 }
4b3d3a8e 464 ritem[itemno++] = rule_number_as_item_number (ruleno);
f3849179 465 ++ruleno;
1ff442ca 466
a70083a3
AD
467 if (p)
468 p = p->next;
1ff442ca
NF
469 }
470
68cae94e 471 assert (itemno == nritems);
3067fbef 472
273a74fa 473 if (trace_flag & trace_sets)
3067fbef 474 ritem_print (stderr);
1ff442ca 475}
a70083a3 476\f
fdbcd8e2
AD
477/*------------------------------------------------------------------.
478| Read in the grammar specification and record it in the format |
479| described in gram.h. All actions are copied into ACTION_OBSTACK, |
480| in each case forming the body of a C function (YYACTION) which |
481| contains a switch statement to decide which action to execute. |
482`------------------------------------------------------------------*/
a70083a3
AD
483
484void
485reader (void)
486{
a70083a3 487 /* Initialize the symbol table. */
db8837cb 488 symbols_new ();
b6610515 489
88bce5a2
AD
490 /* Construct the accept symbol. */
491 accept = symbol_get ("$accept", empty_location);
492 accept->class = nterm_sym;
493 accept->number = nvars++;
30171f79 494
a70083a3 495 /* Construct the error token */
39f41916 496 errtoken = symbol_get ("error", empty_location);
d7020c20 497 errtoken->class = token_sym;
72a23c97 498 errtoken->number = ntokens++;
b6610515 499
a70083a3
AD
500 /* Construct a token that represents all undefined literal tokens.
501 It is always token number 2. */
88bce5a2 502 undeftoken = symbol_get ("$undefined", empty_location);
d7020c20 503 undeftoken->class = token_sym;
72a23c97 504 undeftoken->number = ntokens++;
a70083a3 505
331dbc1b 506 /* Initialize the obstacks. */
0dd1580a
RA
507 obstack_init (&pre_prologue_obstack);
508 obstack_init (&post_prologue_obstack);
331dbc1b 509
2b81e969 510 gram_in = xfopen (grammar_file, "r");
e9955c83 511
473d0a75
AD
512 gram__flex_debug = trace_flag & trace_scan;
513 gram_debug = trace_flag & trace_parse;
1d6412ad 514 scanner_initialize ();
78c3da9e 515 gram_parse ();
331dbc1b 516
02d12d0d
PE
517 if (! complaint_issued)
518 check_and_convert_grammar ();
519
520 xfclose (gram_in);
521}
522
b275314e 523
02d12d0d
PE
524/*-------------------------------------------------------------.
525| Check the grammar that has just been read, and convert it to |
526| internal form. |
527`-------------------------------------------------------------*/
528
529static void
530check_and_convert_grammar (void)
531{
532 /* Grammar has been read. Do some checking. */
e9955c83
AD
533 if (nrules == 0)
534 fatal (_("no rules in the input grammar"));
535
536 /* Report any undefined symbols and consider them nonterminals. */
537 symbols_check_defined ();
b7c49edf 538
88bce5a2
AD
539 /* If the user did not define her ENDTOKEN, do it now. */
540 if (!endtoken)
b7c49edf 541 {
88bce5a2
AD
542 endtoken = symbol_get ("$end", empty_location);
543 endtoken->class = token_sym;
544 endtoken->number = 0;
b7c49edf 545 /* Value specified by POSIX. */
88bce5a2 546 endtoken->user_token_number = 0;
b7c49edf
AD
547 }
548
02d12d0d 549 /* Insert the initial rule, whose line is that of the first rule
e9955c83
AD
550 (not that of the start symbol):
551
88bce5a2 552 accept: %start EOF. */
e9955c83 553 {
17ee7397 554 symbol_list *p = symbol_list_new (accept, empty_location);
8efe435c
AD
555 p->location = grammar->location;
556 p->next = symbol_list_new (startsymbol, empty_location);
88bce5a2 557 p->next->next = symbol_list_new (endtoken, empty_location);
8efe435c 558 p->next->next->next = symbol_list_new (NULL, empty_location);
e9955c83
AD
559 p->next->next->next->next = grammar;
560 nrules += 1;
561 nritems += 3;
562 grammar = p;
563 }
564
68cae94e 565 assert (nsyms <= SYMBOL_NUMBER_MAXIMUM && nsyms == ntokens + nvars);
b0c4483e 566
a70083a3
AD
567 /* Assign the symbols their symbol numbers. Write #defines for the
568 token symbols into FDEFINES if requested. */
2f1afb73 569 symbols_pack ();
93ede233 570
a70083a3
AD
571 /* Convert the grammar into the format described in gram.h. */
572 packgram ();
8419d367 573
17ee7397
PE
574 /* The grammar as a symbol_list is no longer needed. */
575 LIST_FREE (symbol_list, grammar);
a70083a3 576}