]> git.saurik.com Git - bison.git/blame - src/reader.c
* data/glr.c (YYCHK1): Do not assume YYE is in range.
[bison.git] / src / reader.c
CommitLineData
35dcf428 1/* Input parser for Bison
9c4637fa 2
05ac60f3
PE
3 Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000, 2001, 2002, 2003,
4 2005 Free Software Foundation, Inc.
1ff442ca 5
41aca2e0 6 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 7
41aca2e0
AD
8 Bison is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
1ff442ca 12
41aca2e0
AD
13 Bison is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
1ff442ca 17
41aca2e0
AD
18 You should have received a copy of the GNU General Public License
19 along with Bison; see the file COPYING. If not, write to
0fb669f9
PE
20 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 Boston, MA 02110-1301, USA. */
1ff442ca 22
1ff442ca 23#include "system.h"
17ee7397
PE
24
25#include <quotearg.h>
26
27#include "complain.h"
28#include "conflicts.h"
1ff442ca 29#include "files.h"
17ee7397 30#include "getargs.h"
1ff442ca 31#include "gram.h"
17ee7397 32#include "muscle_tab.h"
6c89f1c1 33#include "output.h"
b2ca4022 34#include "reader.h"
17ee7397
PE
35#include "symlist.h"
36#include "symtab.h"
1ff442ca 37
17ee7397 38static symbol_list *grammar = NULL;
d0829076 39static bool start_flag = false;
676385e2 40merger_list *merge_functions;
1ff442ca 41
d0829076
PE
42/* Has %union been seen? */
43bool typed = false;
39a06c25
PE
44
45/* Should rules have a default precedence? */
46bool default_prec = true;
0d533154 47\f
e9955c83
AD
48/*-----------------------.
49| Set the start symbol. |
50`-----------------------*/
1ff442ca 51
e9955c83 52void
a737b216 53grammar_start_symbol_set (symbol *sym, location loc)
1ff442ca
NF
54{
55 if (start_flag)
17ee7397 56 complain_at (loc, _("multiple %s declarations"), "%start");
943819bf
RS
57 else
58 {
d0829076 59 start_flag = true;
a737b216 60 startsymbol = sym;
17ee7397 61 startsymbol_location = loc;
943819bf 62 }
1ff442ca
NF
63}
64
1ff442ca 65
d7020c20 66/*----------------------------------------------------------------.
e9955c83
AD
67| There are two prologues: one before %union, one after. Augment |
68| the current one. |
d7020c20 69`----------------------------------------------------------------*/
1ff442ca 70
e9955c83 71void
17ee7397 72prologue_augment (const char *prologue, location loc)
b6610515 73{
e9955c83
AD
74 struct obstack *oout =
75 !typed ? &pre_prologue_obstack : &post_prologue_obstack;
b6610515 76
05ac60f3 77 obstack_fgrow1 (oout, "]b4_syncline(%d, [[", loc.start.line);
17ee7397
PE
78 MUSCLE_OBSTACK_SGROW (oout,
79 quotearg_style (c_quoting_style, loc.start.file));
6c239755 80 obstack_sgrow (oout, "]])[\n");
e9955c83 81 obstack_sgrow (oout, prologue);
b6610515
RA
82}
83
a70083a3
AD
84\f
85
3e6656f9 86/*-------------------------------------------------------------------.
676385e2
PH
87| Return the merger index for a merging function named NAME, whose |
88| arguments have type TYPE. Records the function, if new, in |
95612cfa 89| MERGER_LIST. |
676385e2
PH
90`-------------------------------------------------------------------*/
91
92static int
17ee7397 93get_merge_function (uniqstr name, uniqstr type, location loc)
676385e2
PH
94{
95 merger_list *syms;
96 merger_list head;
97 int n;
98
99 if (! glr_parser)
100 return 0;
101
102 if (type == NULL)
17ee7397 103 type = uniqstr_new ("");
676385e2
PH
104
105 head.next = merge_functions;
39f41916 106 for (syms = &head, n = 1; syms->next != NULL; syms = syms->next, n += 1)
17ee7397 107 if (UNIQSTR_EQ (name, syms->next->name))
676385e2 108 break;
a5d50994
AD
109 if (syms->next == NULL)
110 {
da2a7671 111 syms->next = xmalloc (sizeof syms->next[0]);
17ee7397
PE
112 syms->next->name = uniqstr_new (name);
113 syms->next->type = uniqstr_new (type);
a5d50994
AD
114 syms->next->next = NULL;
115 merge_functions = head.next;
116 }
17ee7397 117 else if (!UNIQSTR_EQ (type, syms->next->type))
45a8a65d
PE
118 warn_at (loc, _("result type clash on merge function %s: <%s> != <%s>"),
119 name, type, syms->next->type);
676385e2
PH
120 return n;
121}
122
123/*--------------------------------------.
124| Free all merge-function definitions. |
125`--------------------------------------*/
126
127void
128free_merger_functions (void)
129{
130 merger_list *L0;
131 if (! glr_parser)
132 return;
133 L0 = merge_functions;
134 while (L0 != NULL)
135 {
136 merger_list *L1 = L0->next;
137 free (L0);
138 L0 = L1;
139 }
140}
141
a70083a3 142\f
107f7dfb 143/*-------------------------------------------------------------------.
17ee7397 144| Parse the input grammar into a one symbol_list structure. Each |
107f7dfb
AD
145| rule is represented by a sequence of symbols: the left hand side |
146| followed by the contents of the right hand side, followed by a |
147| null pointer instead of a symbol to terminate the rule. The next |
148| symbol is the lhs of the following rule. |
149| |
fdbcd8e2
AD
150| All actions are copied out, labelled by the rule number they apply |
151| to. |
107f7dfb
AD
152| |
153| Bison used to allow some %directives in the rules sections, but |
154| this is no longer consider appropriate: (i) the documented grammar |
155| doesn't claim it, (ii), it would promote bad style, (iii), error |
156| recovery for %directives consists in skipping the junk until a `%' |
157| is seen and helrp synchronizing. This scheme is definitely wrong |
158| in the rules section. |
159`-------------------------------------------------------------------*/
1ff442ca 160
f6d0f937 161/* The (currently) last symbol of GRAMMAR. */
04098407 162static symbol_list *grammar_end = NULL;
f6d0f937 163
52328c6e 164/* Append SYM to the grammar. */
e9955c83 165void
17ee7397 166grammar_symbol_append (symbol *sym, location loc)
f6d0f937 167{
17ee7397 168 symbol_list *p = symbol_list_new (sym, loc);
f6d0f937
AD
169
170 if (grammar_end)
171 grammar_end->next = p;
172 else
173 grammar = p;
174
175 grammar_end = p;
176}
177
8efe435c
AD
178/* The rule currently being defined, and the previous rule.
179 CURRENT_RULE points to the first LHS of the current rule, while
180 PREVIOUS_RULE_END points to the *end* of the previous rule (NULL). */
17ee7397 181symbol_list *current_rule = NULL;
04098407 182static symbol_list *previous_rule_end = NULL;
da4160c3
AD
183
184
8efe435c
AD
185/*----------------------------------------------.
186| Create a new rule for LHS in to the GRAMMAR. |
187`----------------------------------------------*/
da4160c3 188
e9955c83 189void
17ee7397 190grammar_rule_begin (symbol *lhs, location loc)
da4160c3
AD
191{
192 if (!start_flag)
193 {
194 startsymbol = lhs;
17ee7397 195 startsymbol_location = loc;
d0829076 196 start_flag = true;
da4160c3
AD
197 }
198
199 /* Start a new rule and record its lhs. */
200 ++nrules;
201 ++nritems;
202
8efe435c 203 previous_rule_end = grammar_end;
17ee7397 204 grammar_symbol_append (lhs, loc);
da4160c3
AD
205 current_rule = grammar_end;
206
207 /* Mark the rule's lhs as a nonterminal if not already so. */
208
209 if (lhs->class == unknown_sym)
210 {
211 lhs->class = nterm_sym;
212 lhs->number = nvars;
213 ++nvars;
214 }
215 else if (lhs->class == token_sym)
17ee7397 216 complain_at (loc, _("rule given for %s, which is a token"), lhs->tag);
da4160c3
AD
217}
218
e9955c83
AD
219/* Check that the last rule (CURRENT_RULE) is properly defined. For
220 instance, there should be no type clash on the default action. */
221
222static void
223grammar_current_rule_check (void)
224{
17ee7397 225 symbol *lhs = current_rule->sym;
3f4c0f80 226 char const *lhs_type = lhs->type_name;
17ee7397 227 symbol *first_rhs = current_rule->next->sym;
e9955c83
AD
228
229 /* If there is an action, then there is nothing we can do: the user
3f4c0f80 230 is allowed to shoot herself in the foot. */
e9955c83
AD
231 if (current_rule->action)
232 return;
233
3f4c0f80
PE
234 /* Don't worry about the default action if $$ is untyped, since $$'s
235 value can't be used. */
236 if (! lhs_type)
237 return;
238
239 /* If $$ is being set in default way, report if any type mismatch. */
e9955c83
AD
240 if (first_rhs)
241 {
e9955c83 242 const char *rhs_type = first_rhs->type_name ? first_rhs->type_name : "";
17ee7397 243 if (!UNIQSTR_EQ (lhs_type, rhs_type))
e9273511
PE
244 warn_at (current_rule->location,
245 _("type clash on default action: <%s> != <%s>"),
246 lhs_type, rhs_type);
e9955c83
AD
247 }
248 /* Warn if there is no default for $$ but we need one. */
249 else
e9273511
PE
250 warn_at (current_rule->location,
251 _("empty rule for typed nonterminal, and no action"));
e9955c83
AD
252}
253
254
8efe435c
AD
255/*-------------------------------------.
256| End the currently being grown rule. |
257`-------------------------------------*/
e9955c83
AD
258
259void
17ee7397 260grammar_rule_end (location loc)
e9955c83
AD
261{
262 /* Put an empty link in the list to mark the end of this rule */
8efe435c 263 grammar_symbol_append (NULL, grammar_end->location);
17ee7397 264 current_rule->location = loc;
e9955c83
AD
265 grammar_current_rule_check ();
266}
267
268
8efe435c
AD
269/*-------------------------------------------------------------------.
270| The previous action turns out the be a mid-rule action. Attach it |
271| to the current rule, i.e., create a dummy symbol, attach it this |
272| mid-rule action, and append this dummy nonterminal to the current |
273| rule. |
274`-------------------------------------------------------------------*/
1485e106 275
e9955c83 276void
1485e106
AD
277grammar_midrule_action (void)
278{
279 /* Since the action was written out with this rule's number, we must
280 give the new rule this number by inserting the new rule before
281 it. */
282
8efe435c
AD
283 /* Make a DUMMY nonterminal, whose location is that of the midrule
284 action. Create the MIDRULE. */
17ee7397
PE
285 location dummy_location = current_rule->action_location;
286 symbol *dummy = dummy_symbol_get (dummy_location);
287 symbol_list *midrule = symbol_list_new (dummy, dummy_location);
1485e106
AD
288
289 /* Make a new rule, whose body is empty, before the current one, so
290 that the action just read can belong to it. */
291 ++nrules;
292 ++nritems;
8efe435c
AD
293 /* Attach its location and actions to that of the DUMMY. */
294 midrule->location = dummy_location;
295 midrule->action = current_rule->action;
296 midrule->action_location = dummy_location;
1485e106
AD
297 current_rule->action = NULL;
298
8efe435c
AD
299 if (previous_rule_end)
300 previous_rule_end->next = midrule;
1485e106 301 else
8efe435c 302 grammar = midrule;
1485e106 303
8efe435c
AD
304 /* End the dummy's rule. */
305 previous_rule_end = symbol_list_new (NULL, dummy_location);
306 previous_rule_end->next = current_rule;
1485e106 307
8efe435c 308 midrule->next = previous_rule_end;
1485e106 309
8efe435c
AD
310 /* Insert the dummy nonterminal replacing the midrule action into
311 the current rule. */
312 grammar_current_rule_symbol_append (dummy, dummy_location);
1485e106
AD
313}
314
9af3fbce
AD
315/* Set the precedence symbol of the current rule to PRECSYM. */
316
e9955c83 317void
17ee7397 318grammar_current_rule_prec_set (symbol *precsym, location loc)
9af3fbce
AD
319{
320 if (current_rule->ruleprec)
17ee7397 321 complain_at (loc, _("only one %s allowed per rule"), "%prec");
9af3fbce
AD
322 current_rule->ruleprec = precsym;
323}
324
676385e2
PH
325/* Attach dynamic precedence DPREC to the current rule. */
326
327void
17ee7397 328grammar_current_rule_dprec_set (int dprec, location loc)
676385e2
PH
329{
330 if (! glr_parser)
17ee7397 331 warn_at (loc, _("%s affects only GLR parsers"), "%dprec");
676385e2 332 if (dprec <= 0)
17ee7397 333 complain_at (loc, _("%s must be followed by positive number"), "%dprec");
39f41916 334 else if (current_rule->dprec != 0)
17ee7397 335 complain_at (loc, _("only one %s allowed per rule"), "%dprec");
676385e2
PH
336 current_rule->dprec = dprec;
337}
338
339/* Attach a merge function NAME with argument type TYPE to current
340 rule. */
341
342void
17ee7397 343grammar_current_rule_merge_set (uniqstr name, location loc)
676385e2
PH
344{
345 if (! glr_parser)
17ee7397 346 warn_at (loc, _("%s affects only GLR parsers"), "%merge");
39f41916 347 if (current_rule->merger != 0)
17ee7397 348 complain_at (loc, _("only one %s allowed per rule"), "%merge");
39f41916 349 current_rule->merger =
17ee7397 350 get_merge_function (name, current_rule->sym->type_name, loc);
676385e2
PH
351}
352
17ee7397 353/* Attach SYM to the current rule. If needed, move the previous
2e047461
AD
354 action as a mid-rule action. */
355
e9955c83 356void
17ee7397 357grammar_current_rule_symbol_append (symbol *sym, location loc)
2e047461
AD
358{
359 if (current_rule->action)
360 grammar_midrule_action ();
361 ++nritems;
17ee7397 362 grammar_symbol_append (sym, loc);
2e047461
AD
363}
364
2e047461
AD
365/* Attach an ACTION to the current rule. If needed, move the previous
366 action as a mid-rule action. */
367
e9955c83 368void
17ee7397 369grammar_current_rule_action_append (const char *action, location loc)
2e047461
AD
370{
371 if (current_rule->action)
372 grammar_midrule_action ();
373 current_rule->action = action;
17ee7397 374 current_rule->action_location = loc;
2e047461
AD
375}
376
a70083a3 377\f
a70083a3
AD
378/*---------------------------------------------------------------.
379| Convert the rules into the representation using RRHS, RLHS and |
d9b739c3 380| RITEM. |
a70083a3 381`---------------------------------------------------------------*/
1ff442ca 382
4a120d45 383static void
118fb205 384packgram (void)
1ff442ca 385{
9222837b 386 unsigned int itemno = 0;
17ee7397
PE
387 rule_number ruleno = 0;
388 symbol_list *p = grammar;
1ff442ca 389
da2a7671
PE
390 ritem = xnmalloc (nritems, sizeof *ritem);
391 rules = xnmalloc (nrules, sizeof *rules);
1ff442ca 392
1ff442ca
NF
393 while (p)
394 {
17ee7397 395 symbol *ruleprec = p->ruleprec;
d7e1f00c 396 rules[ruleno].user_number = ruleno;
c3b407f4 397 rules[ruleno].number = ruleno;
bba97eb2 398 rules[ruleno].lhs = p->sym;
99013900 399 rules[ruleno].rhs = ritem + itemno;
da2a7671
PE
400 rules[ruleno].prec = NULL;
401 rules[ruleno].dprec = p->dprec;
402 rules[ruleno].merger = p->merger;
403 rules[ruleno].precsym = NULL;
8efe435c 404 rules[ruleno].location = p->location;
b4afb6bb 405 rules[ruleno].useful = true;
1a2b5d37 406 rules[ruleno].action = p->action;
8efe435c 407 rules[ruleno].action_location = p->action_location;
1ff442ca
NF
408
409 p = p->next;
410 while (p && p->sym)
411 {
17ee7397 412 /* item_number = symbol_number.
5fbb0954 413 But the former needs to contain more: negative rule numbers. */
a49aecd5 414 ritem[itemno++] = symbol_number_as_item_number (p->sym->number);
1ff442ca
NF
415 /* A rule gets by default the precedence and associativity
416 of the last token in it. */
39a06c25 417 if (p->sym->class == token_sym && default_prec)
03b31c0c 418 rules[ruleno].prec = p->sym;
a70083a3
AD
419 if (p)
420 p = p->next;
1ff442ca
NF
421 }
422
423 /* If this rule has a %prec,
a70083a3 424 the specified symbol's precedence replaces the default. */
1ff442ca
NF
425 if (ruleprec)
426 {
03b31c0c
AD
427 rules[ruleno].precsym = ruleprec;
428 rules[ruleno].prec = ruleprec;
1ff442ca 429 }
4b3d3a8e 430 ritem[itemno++] = rule_number_as_item_number (ruleno);
f3849179 431 ++ruleno;
1ff442ca 432
a70083a3
AD
433 if (p)
434 p = p->next;
1ff442ca
NF
435 }
436
35dcf428
PE
437 if (itemno != nritems)
438 abort ();
3067fbef 439
273a74fa 440 if (trace_flag & trace_sets)
3067fbef 441 ritem_print (stderr);
1ff442ca 442}
a70083a3 443\f
fdbcd8e2
AD
444/*------------------------------------------------------------------.
445| Read in the grammar specification and record it in the format |
446| described in gram.h. All actions are copied into ACTION_OBSTACK, |
447| in each case forming the body of a C function (YYACTION) which |
448| contains a switch statement to decide which action to execute. |
449`------------------------------------------------------------------*/
a70083a3
AD
450
451void
452reader (void)
453{
a70083a3 454 /* Initialize the symbol table. */
db8837cb 455 symbols_new ();
b6610515 456
88bce5a2
AD
457 /* Construct the accept symbol. */
458 accept = symbol_get ("$accept", empty_location);
459 accept->class = nterm_sym;
460 accept->number = nvars++;
30171f79 461
a70083a3 462 /* Construct the error token */
39f41916 463 errtoken = symbol_get ("error", empty_location);
d7020c20 464 errtoken->class = token_sym;
72a23c97 465 errtoken->number = ntokens++;
b6610515 466
a70083a3
AD
467 /* Construct a token that represents all undefined literal tokens.
468 It is always token number 2. */
88bce5a2 469 undeftoken = symbol_get ("$undefined", empty_location);
d7020c20 470 undeftoken->class = token_sym;
72a23c97 471 undeftoken->number = ntokens++;
a70083a3 472
331dbc1b 473 /* Initialize the obstacks. */
0dd1580a
RA
474 obstack_init (&pre_prologue_obstack);
475 obstack_init (&post_prologue_obstack);
331dbc1b 476
95612cfa 477 finput = xfopen (grammar_file, "r");
e9955c83
AD
478 gram_in = finput;
479
473d0a75
AD
480 gram__flex_debug = trace_flag & trace_scan;
481 gram_debug = trace_flag & trace_parse;
1d6412ad 482 scanner_initialize ();
78c3da9e 483 gram_parse ();
331dbc1b 484
b275314e
AD
485 /* If something went wrong during the parsing, don't try to
486 continue. */
b4afb6bb 487 if (complaint_issued)
f956c304 488 return;
b275314e 489
e9955c83
AD
490 /* Grammar has been read. Do some checking */
491 if (nrules == 0)
492 fatal (_("no rules in the input grammar"));
493
494 /* Report any undefined symbols and consider them nonterminals. */
495 symbols_check_defined ();
b7c49edf 496
88bce5a2
AD
497 /* If the user did not define her ENDTOKEN, do it now. */
498 if (!endtoken)
b7c49edf 499 {
88bce5a2
AD
500 endtoken = symbol_get ("$end", empty_location);
501 endtoken->class = token_sym;
502 endtoken->number = 0;
b7c49edf 503 /* Value specified by POSIX. */
88bce5a2 504 endtoken->user_token_number = 0;
b7c49edf
AD
505 }
506
e9955c83
AD
507 /* Insert the initial rule, which line is that of the first rule
508 (not that of the start symbol):
509
88bce5a2 510 accept: %start EOF. */
e9955c83 511 {
17ee7397 512 symbol_list *p = symbol_list_new (accept, empty_location);
8efe435c
AD
513 p->location = grammar->location;
514 p->next = symbol_list_new (startsymbol, empty_location);
88bce5a2 515 p->next->next = symbol_list_new (endtoken, empty_location);
8efe435c 516 p->next->next->next = symbol_list_new (NULL, empty_location);
e9955c83
AD
517 p->next->next->next->next = grammar;
518 nrules += 1;
519 nritems += 3;
520 grammar = p;
521 }
522
17ee7397 523 if (! (nsyms <= SYMBOL_NUMBER_MAXIMUM && nsyms == ntokens + nvars))
35dcf428 524 abort ();
b0c4483e 525
331dbc1b
AD
526 xfclose (finput);
527
a70083a3
AD
528 /* Assign the symbols their symbol numbers. Write #defines for the
529 token symbols into FDEFINES if requested. */
2f1afb73 530 symbols_pack ();
93ede233 531
a70083a3
AD
532 /* Convert the grammar into the format described in gram.h. */
533 packgram ();
8419d367 534
17ee7397
PE
535 /* The grammar as a symbol_list is no longer needed. */
536 LIST_FREE (symbol_list, grammar);
a70083a3 537}