]> git.saurik.com Git - bison.git/blame - src/reader.c
and Akim Demaille <akim@epita.fr>
[bison.git] / src / reader.c
CommitLineData
1ff442ca 1/* Input parser for bison
76514394 2 Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000, 2001, 2002
a70083a3 3 Free Software Foundation, Inc.
1ff442ca 4
41aca2e0 5 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 6
41aca2e0
AD
7 Bison is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
1ff442ca 11
41aca2e0
AD
12 Bison is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
1ff442ca 16
41aca2e0
AD
17 You should have received a copy of the GNU General Public License
18 along with Bison; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
1ff442ca
NF
21
22
1ff442ca 23#include "system.h"
2a91a95e
AD
24#include "quotearg.h"
25#include "quote.h"
ceed8467 26#include "getargs.h"
1ff442ca 27#include "files.h"
1ff442ca 28#include "symtab.h"
56c47203 29#include "symlist.h"
82b6d266 30#include "options.h"
1ff442ca 31#include "gram.h"
a0f6b076 32#include "complain.h"
6c89f1c1 33#include "output.h"
b2ca4022 34#include "reader.h"
340ef489 35#include "conflicts.h"
11d82f03 36#include "muscle_tab.h"
1ff442ca 37
1ff442ca 38int lineno;
56c47203 39static symbol_list_t *grammar = NULL;
280a38c3 40static int start_flag = 0;
1ff442ca 41
d7020c20 42/* Nonzero if %union has been seen. */
e9955c83 43int typed = 0;
0d533154 44\f
e9955c83
AD
45/*-----------------------.
46| Set the start symbol. |
47`-----------------------*/
1ff442ca 48
e9955c83 49void
8efe435c 50grammar_start_symbol_set (symbol_t *s, location_t l)
1ff442ca
NF
51{
52 if (start_flag)
27821bff 53 complain (_("multiple %s declarations"), "%start");
943819bf
RS
54 else
55 {
56 start_flag = 1;
e9955c83 57 startsymbol = s;
8efe435c 58 startsymbol_location = l;
943819bf 59 }
1ff442ca
NF
60}
61
1ff442ca 62
d7020c20 63/*----------------------------------------------------------------.
e9955c83
AD
64| There are two prologues: one before %union, one after. Augment |
65| the current one. |
d7020c20 66`----------------------------------------------------------------*/
1ff442ca 67
e9955c83 68void
0c15323d 69prologue_augment (const char *prologue, location_t location)
b6610515 70{
e9955c83
AD
71 struct obstack *oout =
72 !typed ? &pre_prologue_obstack : &post_prologue_obstack;
b6610515 73
e9955c83 74 if (!no_lines_flag)
b6610515 75 {
e9955c83 76 obstack_fgrow2 (oout, muscle_find ("linef"),
0c15323d
AD
77 location.first_line,
78 quotearg_style (c_quoting_style,
79 muscle_find ("filename")));
b6610515 80 }
e9955c83 81 obstack_sgrow (oout, prologue);
b6610515
RA
82}
83
2ba3b73c 84
426cf563 85
a870c567 86
e9955c83
AD
87/*----------------------.
88| Handle the epilogue. |
89`----------------------*/
426cf563 90
e9955c83 91void
0c15323d 92epilogue_set (const char *epilogue, location_t location)
2ba3b73c 93{
e9955c83 94 if (!no_lines_flag)
1ff442ca 95 {
592e8d4d 96 obstack_fgrow2 (&muscle_obstack, muscle_find ("linef"),
0c15323d
AD
97 location.first_line,
98 quotearg_style (c_quoting_style,
99 muscle_find ("filename")));
1ff442ca 100 }
592e8d4d
AD
101 obstack_sgrow (&muscle_obstack, epilogue);
102 obstack_1grow (&muscle_obstack, 0);
103 muscle_insert ("epilogue", obstack_finish (&muscle_obstack));
1ff442ca 104}
1ff442ca 105
a70083a3 106
a70083a3
AD
107\f
108
a70083a3
AD
109/*-------------------------------------------------------------------.
110| Generate a dummy symbol, a nonterminal, whose name cannot conflict |
111| with the user's names. |
112`-------------------------------------------------------------------*/
1ff442ca 113
db8837cb 114static symbol_t *
ee000ba4 115gensym (location_t location)
1ff442ca 116{
274d42ce
AD
117 /* Incremented for each generated symbol */
118 static int gensym_count = 0;
119 static char buf[256];
120
db8837cb 121 symbol_t *sym;
1ff442ca 122
274d42ce 123 sprintf (buf, "@%d", ++gensym_count);
ee000ba4 124 sym = getsym (buf, location);
d7020c20 125 sym->class = nterm_sym;
d9b739c3 126 sym->number = nvars++;
36281465 127 return sym;
1ff442ca 128}
a70083a3 129\f
107f7dfb 130/*-------------------------------------------------------------------.
56c47203 131| Parse the input grammar into a one symbol_list_t structure. Each |
107f7dfb
AD
132| rule is represented by a sequence of symbols: the left hand side |
133| followed by the contents of the right hand side, followed by a |
134| null pointer instead of a symbol to terminate the rule. The next |
135| symbol is the lhs of the following rule. |
136| |
fdbcd8e2
AD
137| All actions are copied out, labelled by the rule number they apply |
138| to. |
107f7dfb
AD
139| |
140| Bison used to allow some %directives in the rules sections, but |
141| this is no longer consider appropriate: (i) the documented grammar |
142| doesn't claim it, (ii), it would promote bad style, (iii), error |
143| recovery for %directives consists in skipping the junk until a `%' |
144| is seen and helrp synchronizing. This scheme is definitely wrong |
145| in the rules section. |
146`-------------------------------------------------------------------*/
1ff442ca 147
f6d0f937 148/* The (currently) last symbol of GRAMMAR. */
56c47203 149symbol_list_t *grammar_end = NULL;
f6d0f937
AD
150
151/* Append S to the GRAMMAR. */
e9955c83 152void
8efe435c 153grammar_symbol_append (symbol_t *symbol, location_t location)
f6d0f937 154{
56c47203 155 symbol_list_t *p = symbol_list_new (symbol, location);
f6d0f937
AD
156
157 if (grammar_end)
158 grammar_end->next = p;
159 else
160 grammar = p;
161
162 grammar_end = p;
163}
164
8efe435c
AD
165/* The rule currently being defined, and the previous rule.
166 CURRENT_RULE points to the first LHS of the current rule, while
167 PREVIOUS_RULE_END points to the *end* of the previous rule (NULL). */
56c47203
AD
168symbol_list_t *current_rule = NULL;
169symbol_list_t *previous_rule_end = NULL;
da4160c3
AD
170
171
8efe435c
AD
172/*----------------------------------------------.
173| Create a new rule for LHS in to the GRAMMAR. |
174`----------------------------------------------*/
da4160c3 175
e9955c83 176void
8efe435c 177grammar_rule_begin (symbol_t *lhs, location_t location)
da4160c3
AD
178{
179 if (!start_flag)
180 {
181 startsymbol = lhs;
8efe435c 182 startsymbol_location = location;
da4160c3
AD
183 start_flag = 1;
184 }
185
186 /* Start a new rule and record its lhs. */
187 ++nrules;
188 ++nritems;
189
8efe435c
AD
190 previous_rule_end = grammar_end;
191 grammar_symbol_append (lhs, location);
da4160c3
AD
192 current_rule = grammar_end;
193
194 /* Mark the rule's lhs as a nonterminal if not already so. */
195
196 if (lhs->class == unknown_sym)
197 {
198 lhs->class = nterm_sym;
199 lhs->number = nvars;
200 ++nvars;
201 }
202 else if (lhs->class == token_sym)
203 complain (_("rule given for %s, which is a token"), lhs->tag);
204}
205
e9955c83
AD
206/* Check that the last rule (CURRENT_RULE) is properly defined. For
207 instance, there should be no type clash on the default action. */
208
209static void
210grammar_current_rule_check (void)
211{
212 symbol_t *lhs = current_rule->sym;
213 symbol_t *first_rhs = current_rule->next->sym;
214
215 /* If there is an action, then there is nothing we can do: the user
216 is allowed to shoot in her foot. */
217 if (current_rule->action)
218 return;
219
220 /* If $$ is being set in default way, report if any type mismatch.
221 */
222 if (first_rhs)
223 {
224 const char *lhs_type = lhs->type_name ? lhs->type_name : "";
225 const char *rhs_type = first_rhs->type_name ? first_rhs->type_name : "";
226 if (strcmp (lhs_type, rhs_type))
227 complain (_("type clash (`%s' `%s') on default action"),
228 lhs_type, rhs_type);
229 }
230 /* Warn if there is no default for $$ but we need one. */
231 else
232 {
233 if (lhs->type_name)
234 complain (_("empty rule for typed nonterminal, and no action"));
235 }
236}
237
238
8efe435c
AD
239/*-------------------------------------.
240| End the currently being grown rule. |
241`-------------------------------------*/
e9955c83
AD
242
243void
8efe435c 244grammar_rule_end (location_t location)
e9955c83
AD
245{
246 /* Put an empty link in the list to mark the end of this rule */
8efe435c
AD
247 grammar_symbol_append (NULL, grammar_end->location);
248 current_rule->location = location;
e9955c83
AD
249 grammar_current_rule_check ();
250}
251
252
8efe435c
AD
253/*-------------------------------------------------------------------.
254| The previous action turns out the be a mid-rule action. Attach it |
255| to the current rule, i.e., create a dummy symbol, attach it this |
256| mid-rule action, and append this dummy nonterminal to the current |
257| rule. |
258`-------------------------------------------------------------------*/
1485e106 259
e9955c83 260void
1485e106
AD
261grammar_midrule_action (void)
262{
263 /* Since the action was written out with this rule's number, we must
264 give the new rule this number by inserting the new rule before
265 it. */
266
8efe435c
AD
267 /* Make a DUMMY nonterminal, whose location is that of the midrule
268 action. Create the MIDRULE. */
8efe435c 269 location_t dummy_location = current_rule->action_location;
ee000ba4 270 symbol_t *dummy = gensym (dummy_location);
56c47203 271 symbol_list_t *midrule = symbol_list_new (dummy, dummy_location);
1485e106
AD
272
273 /* Make a new rule, whose body is empty, before the current one, so
274 that the action just read can belong to it. */
275 ++nrules;
276 ++nritems;
8efe435c
AD
277 /* Attach its location and actions to that of the DUMMY. */
278 midrule->location = dummy_location;
279 midrule->action = current_rule->action;
280 midrule->action_location = dummy_location;
1485e106
AD
281 current_rule->action = NULL;
282
8efe435c
AD
283 if (previous_rule_end)
284 previous_rule_end->next = midrule;
1485e106 285 else
8efe435c 286 grammar = midrule;
1485e106 287
8efe435c
AD
288 /* End the dummy's rule. */
289 previous_rule_end = symbol_list_new (NULL, dummy_location);
290 previous_rule_end->next = current_rule;
1485e106 291
8efe435c 292 midrule->next = previous_rule_end;
1485e106 293
8efe435c
AD
294 /* Insert the dummy nonterminal replacing the midrule action into
295 the current rule. */
296 grammar_current_rule_symbol_append (dummy, dummy_location);
1485e106
AD
297}
298
9af3fbce
AD
299/* Set the precedence symbol of the current rule to PRECSYM. */
300
e9955c83 301void
9af3fbce
AD
302grammar_current_rule_prec_set (symbol_t *precsym)
303{
304 if (current_rule->ruleprec)
305 complain (_("two @prec's in a row"));
306 current_rule->ruleprec = precsym;
307}
308
2e047461
AD
309/* Attach a SYMBOL to the current rule. If needed, move the previous
310 action as a mid-rule action. */
311
e9955c83 312void
8efe435c 313grammar_current_rule_symbol_append (symbol_t *symbol, location_t location)
2e047461
AD
314{
315 if (current_rule->action)
316 grammar_midrule_action ();
317 ++nritems;
8efe435c 318 grammar_symbol_append (symbol, location);
2e047461
AD
319}
320
321
322/* Attach an ACTION to the current rule. If needed, move the previous
323 action as a mid-rule action. */
324
e9955c83 325void
8efe435c 326grammar_current_rule_action_append (const char *action, location_t location)
2e047461
AD
327{
328 if (current_rule->action)
329 grammar_midrule_action ();
330 current_rule->action = action;
8efe435c 331 current_rule->action_location = location;
2e047461
AD
332}
333
a70083a3 334\f
a70083a3
AD
335/*---------------------------------------------------------------.
336| Convert the rules into the representation using RRHS, RLHS and |
d9b739c3 337| RITEM. |
a70083a3 338`---------------------------------------------------------------*/
1ff442ca 339
4a120d45 340static void
118fb205 341packgram (void)
1ff442ca 342{
0c2d3f4c 343 unsigned int itemno;
a70083a3 344 int ruleno;
56c47203 345 symbol_list_t *p;
1ff442ca 346
a900a624 347 ritem = XCALLOC (item_number_t, nritems);
1a2b5d37 348 rules = XCALLOC (rule_t, nrules) - 1;
1ff442ca
NF
349
350 itemno = 0;
351 ruleno = 1;
352
353 p = grammar;
354 while (p)
355 {
db8837cb 356 symbol_t *ruleprec = p->ruleprec;
d7e1f00c 357 rules[ruleno].user_number = ruleno;
c3b407f4 358 rules[ruleno].number = ruleno;
bba97eb2 359 rules[ruleno].lhs = p->sym;
99013900 360 rules[ruleno].rhs = ritem + itemno;
8efe435c 361 rules[ruleno].location = p->location;
1a2b5d37
AD
362 rules[ruleno].useful = TRUE;
363 rules[ruleno].action = p->action;
8efe435c 364 rules[ruleno].action_location = p->action_location;
1ff442ca
NF
365
366 p = p->next;
367 while (p && p->sym)
368 {
a49aecd5 369 /* item_number_t = symbol_number_t.
5fbb0954 370 But the former needs to contain more: negative rule numbers. */
a49aecd5 371 ritem[itemno++] = symbol_number_as_item_number (p->sym->number);
1ff442ca
NF
372 /* A rule gets by default the precedence and associativity
373 of the last token in it. */
d7020c20 374 if (p->sym->class == token_sym)
03b31c0c 375 rules[ruleno].prec = p->sym;
a70083a3
AD
376 if (p)
377 p = p->next;
1ff442ca
NF
378 }
379
380 /* If this rule has a %prec,
a70083a3 381 the specified symbol's precedence replaces the default. */
1ff442ca
NF
382 if (ruleprec)
383 {
03b31c0c
AD
384 rules[ruleno].precsym = ruleprec;
385 rules[ruleno].prec = ruleprec;
1ff442ca 386 }
1ff442ca 387 ritem[itemno++] = -ruleno;
f3849179 388 ++ruleno;
1ff442ca 389
a70083a3
AD
390 if (p)
391 p = p->next;
1ff442ca
NF
392 }
393
5123689b 394 assert (itemno == nritems);
3067fbef
AD
395
396 if (trace_flag)
397 ritem_print (stderr);
1ff442ca 398}
a70083a3 399\f
fdbcd8e2
AD
400/*------------------------------------------------------------------.
401| Read in the grammar specification and record it in the format |
402| described in gram.h. All actions are copied into ACTION_OBSTACK, |
403| in each case forming the body of a C function (YYACTION) which |
404| contains a switch statement to decide which action to execute. |
405`------------------------------------------------------------------*/
a70083a3
AD
406
407void
408reader (void)
409{
e9955c83 410 gram_control_t gram_control;
a70083a3
AD
411 lineno = 1;
412
413 /* Initialize the symbol table. */
db8837cb 414 symbols_new ();
b6610515 415
30171f79 416 /* Construct the axiom symbol. */
ee000ba4 417 axiom = getsym ("$axiom", empty_location);
30171f79 418 axiom->class = nterm_sym;
d9b739c3 419 axiom->number = nvars++;
30171f79 420
a70083a3 421 /* Construct the error token */
ee000ba4 422 errtoken = getsym ("error", empty_location);
d7020c20 423 errtoken->class = token_sym;
72a23c97 424 errtoken->number = ntokens++;
b6610515 425
a70083a3
AD
426 /* Construct a token that represents all undefined literal tokens.
427 It is always token number 2. */
ee000ba4 428 undeftoken = getsym ("$undefined.", empty_location);
d7020c20 429 undeftoken->class = token_sym;
72a23c97 430 undeftoken->number = ntokens++;
a70083a3 431
331dbc1b 432 /* Initialize the obstacks. */
0dd1580a
RA
433 obstack_init (&pre_prologue_obstack);
434 obstack_init (&post_prologue_obstack);
331dbc1b
AD
435
436 finput = xfopen (infile, "r");
e9955c83
AD
437 gram_in = finput;
438
439 gram_debug = !!getenv ("parse");
440 gram__flex_debug = !!getenv ("scan");
1d6412ad 441 scanner_initialize ();
e9955c83 442 gram_parse (&gram_control);
331dbc1b 443
e9955c83
AD
444 /* Grammar has been read. Do some checking */
445 if (nrules == 0)
446 fatal (_("no rules in the input grammar"));
447
448 /* Report any undefined symbols and consider them nonterminals. */
449 symbols_check_defined ();
b7c49edf
AD
450
451 /* If the user did not define her EOFTOKEN, do it now. */
452 if (!eoftoken)
453 {
ee000ba4 454 eoftoken = getsym ("$", empty_location);
b7c49edf 455 eoftoken->class = token_sym;
72a23c97 456 eoftoken->number = 0;
b7c49edf
AD
457 /* Value specified by POSIX. */
458 eoftoken->user_token_number = 0;
459 }
460
e9955c83
AD
461 /* Insert the initial rule, which line is that of the first rule
462 (not that of the start symbol):
463
464 axiom: %start EOF. */
465 {
56c47203 466 symbol_list_t *p = symbol_list_new (axiom, empty_location);
8efe435c
AD
467 p->location = grammar->location;
468 p->next = symbol_list_new (startsymbol, empty_location);
469 p->next->next = symbol_list_new (eoftoken, empty_location);
470 p->next->next->next = symbol_list_new (NULL, empty_location);
e9955c83
AD
471 p->next->next->next->next = grammar;
472 nrules += 1;
473 nritems += 3;
474 grammar = p;
475 }
476
477 if (nsyms > SHRT_MAX)
478 fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
479 SHRT_MAX);
480
481 assert (nsyms == ntokens + nvars);
b0c4483e 482
331dbc1b
AD
483 xfclose (finput);
484
a70083a3
AD
485 /* Assign the symbols their symbol numbers. Write #defines for the
486 token symbols into FDEFINES if requested. */
2f1afb73 487 symbols_pack ();
93ede233 488
a70083a3
AD
489 /* Convert the grammar into the format described in gram.h. */
490 packgram ();
8419d367 491
56c47203
AD
492 /* The grammar as a symbol_list_t is no longer needed. */
493 LIST_FREE (symbol_list_t, grammar);
a70083a3 494}