]> git.saurik.com Git - bison.git/blame - src/reader.c
Have Bison grammars parsed by a Bison grammar.
[bison.git] / src / reader.c
CommitLineData
1ff442ca 1/* Input parser for bison
76514394 2 Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000, 2001, 2002
a70083a3 3 Free Software Foundation, Inc.
1ff442ca 4
41aca2e0 5 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 6
41aca2e0
AD
7 Bison is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
1ff442ca 11
41aca2e0
AD
12 Bison is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
1ff442ca 16
41aca2e0
AD
17 You should have received a copy of the GNU General Public License
18 along with Bison; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
1ff442ca
NF
21
22
1ff442ca 23#include "system.h"
2a91a95e
AD
24#include "quotearg.h"
25#include "quote.h"
ceed8467 26#include "getargs.h"
1ff442ca 27#include "files.h"
1ff442ca 28#include "symtab.h"
82b6d266 29#include "options.h"
1ff442ca 30#include "gram.h"
a0f6b076 31#include "complain.h"
6c89f1c1 32#include "output.h"
b2ca4022 33#include "reader.h"
340ef489 34#include "conflicts.h"
11d82f03 35#include "muscle_tab.h"
1ff442ca 36
1ff442ca 37int lineno;
280a38c3
AD
38static symbol_list *grammar = NULL;
39static int start_flag = 0;
1ff442ca 40
d7020c20 41/* Nonzero if %union has been seen. */
e9955c83 42int typed = 0;
1ff442ca 43
6255b435 44static symbol_list *
db8837cb 45symbol_list_new (symbol_t *sym)
b29b2ed5
AD
46{
47 symbol_list *res = XMALLOC (symbol_list, 1);
48 res->next = NULL;
49 res->sym = sym;
50 res->line = lineno;
d945f5cd
AD
51 res->action = NULL;
52 res->action_line = 0;
b29b2ed5
AD
53 res->ruleprec = NULL;
54 return res;
55}
56
0d533154 57\f
79282c5a
AD
58/*--------------------------------------------------------------.
59| Get the data type (alternative in the union) of the value for |
60| symbol N in rule RULE. |
61`--------------------------------------------------------------*/
62
e9955c83 63char *
b29b2ed5 64get_type_name (int n, symbol_list *rule)
79282c5a
AD
65{
66 int i;
67 symbol_list *rp;
68
69 if (n < 0)
70 {
71 complain (_("invalid $ value"));
72 return NULL;
73 }
74
75 rp = rule;
76 i = 0;
77
78 while (i < n)
79 {
80 rp = rp->next;
81 if (rp == NULL || rp->sym == NULL)
82 {
83 complain (_("invalid $ value"));
84 return NULL;
85 }
f3849179 86 ++i;
79282c5a
AD
87 }
88
89 return rp->sym->type_name;
90}
a083fbbf 91
1ff442ca 92
e9955c83
AD
93/*-----------------------.
94| Set the start symbol. |
95`-----------------------*/
1ff442ca 96
e9955c83
AD
97void
98grammar_start_symbol_set (symbol_t *s)
1ff442ca
NF
99{
100 if (start_flag)
27821bff 101 complain (_("multiple %s declarations"), "%start");
943819bf
RS
102 else
103 {
104 start_flag = 1;
e9955c83 105 startsymbol = s;
943819bf 106 }
1ff442ca
NF
107}
108
1ff442ca 109
d7020c20 110/*----------------------------------------------------------------.
e9955c83
AD
111| There are two prologues: one before %union, one after. Augment |
112| the current one. |
d7020c20 113`----------------------------------------------------------------*/
1ff442ca 114
e9955c83
AD
115void
116prologue_augment (const char *prologue, int location)
b6610515 117{
e9955c83
AD
118 struct obstack *oout =
119 !typed ? &pre_prologue_obstack : &post_prologue_obstack;
b6610515 120
e9955c83 121 if (!no_lines_flag)
b6610515 122 {
e9955c83
AD
123 obstack_fgrow2 (oout, muscle_find ("linef"),
124 location, quotearg_style (c_quoting_style,
125 muscle_find ("filename")));
b6610515 126 }
e9955c83 127 obstack_sgrow (oout, prologue);
b6610515
RA
128}
129
2ba3b73c 130
426cf563 131
a870c567 132
e9955c83
AD
133/*----------------------.
134| Handle the epilogue. |
135`----------------------*/
426cf563 136
e9955c83
AD
137void
138epilogue_set (const char *epilogue, int location)
2ba3b73c 139{
e9955c83
AD
140 struct obstack el_obstack;
141 obstack_init (&el_obstack);
1ff442ca 142
e9955c83 143 if (!no_lines_flag)
1ff442ca 144 {
e9955c83
AD
145 obstack_fgrow2 (&el_obstack, muscle_find ("linef"),
146 location, quotearg_style (c_quoting_style,
147 muscle_find ("filename")));
1ff442ca 148 }
e9955c83
AD
149 obstack_sgrow (&el_obstack, epilogue);
150 obstack_1grow (&el_obstack, 0);
151 muscle_insert ("epilogue", obstack_finish (&el_obstack));
1ff442ca 152}
1ff442ca 153
a70083a3 154
a70083a3
AD
155\f
156
a70083a3
AD
157/*-------------------------------------------------------------------.
158| Generate a dummy symbol, a nonterminal, whose name cannot conflict |
159| with the user's names. |
160`-------------------------------------------------------------------*/
1ff442ca 161
db8837cb 162static symbol_t *
118fb205 163gensym (void)
1ff442ca 164{
274d42ce
AD
165 /* Incremented for each generated symbol */
166 static int gensym_count = 0;
167 static char buf[256];
168
db8837cb 169 symbol_t *sym;
1ff442ca 170
274d42ce 171 sprintf (buf, "@%d", ++gensym_count);
e9955c83 172 sym = getsym (buf);
d7020c20 173 sym->class = nterm_sym;
d9b739c3 174 sym->number = nvars++;
36281465 175 return sym;
1ff442ca 176}
a70083a3 177\f
107f7dfb
AD
178/*-------------------------------------------------------------------.
179| Parse the input grammar into a one symbol_list structure. Each |
180| rule is represented by a sequence of symbols: the left hand side |
181| followed by the contents of the right hand side, followed by a |
182| null pointer instead of a symbol to terminate the rule. The next |
183| symbol is the lhs of the following rule. |
184| |
fdbcd8e2
AD
185| All actions are copied out, labelled by the rule number they apply |
186| to. |
107f7dfb
AD
187| |
188| Bison used to allow some %directives in the rules sections, but |
189| this is no longer consider appropriate: (i) the documented grammar |
190| doesn't claim it, (ii), it would promote bad style, (iii), error |
191| recovery for %directives consists in skipping the junk until a `%' |
192| is seen and helrp synchronizing. This scheme is definitely wrong |
193| in the rules section. |
194`-------------------------------------------------------------------*/
1ff442ca 195
f6d0f937
AD
196/* The (currently) last symbol of GRAMMAR. */
197symbol_list *grammar_end = NULL;
198
199/* Append S to the GRAMMAR. */
e9955c83 200void
f6d0f937
AD
201grammar_symbol_append (symbol_t *s)
202{
203 symbol_list *p = symbol_list_new (s);
204
205 if (grammar_end)
206 grammar_end->next = p;
207 else
208 grammar = p;
209
210 grammar_end = p;
211}
212
da4160c3
AD
213/* The rule currently being defined, and the previous rule. Point to
214 the first symbol of each list: their lhs. */
215symbol_list *current_rule = NULL;
216symbol_list *previous_rule = NULL;
217
218
219/* Create a new rule for LHS in to the GRAMMAR. */
220
e9955c83 221void
da4160c3
AD
222grammar_rule_begin (symbol_t *lhs)
223{
224 if (!start_flag)
225 {
226 startsymbol = lhs;
227 start_flag = 1;
228 }
229
230 /* Start a new rule and record its lhs. */
231 ++nrules;
232 ++nritems;
233
234 previous_rule = grammar_end;
235 grammar_symbol_append (lhs);
236 current_rule = grammar_end;
237
238 /* Mark the rule's lhs as a nonterminal if not already so. */
239
240 if (lhs->class == unknown_sym)
241 {
242 lhs->class = nterm_sym;
243 lhs->number = nvars;
244 ++nvars;
245 }
246 else if (lhs->class == token_sym)
247 complain (_("rule given for %s, which is a token"), lhs->tag);
248}
249
e9955c83
AD
250/* Check that the last rule (CURRENT_RULE) is properly defined. For
251 instance, there should be no type clash on the default action. */
252
253static void
254grammar_current_rule_check (void)
255{
256 symbol_t *lhs = current_rule->sym;
257 symbol_t *first_rhs = current_rule->next->sym;
258
259 /* If there is an action, then there is nothing we can do: the user
260 is allowed to shoot in her foot. */
261 if (current_rule->action)
262 return;
263
264 /* If $$ is being set in default way, report if any type mismatch.
265 */
266 if (first_rhs)
267 {
268 const char *lhs_type = lhs->type_name ? lhs->type_name : "";
269 const char *rhs_type = first_rhs->type_name ? first_rhs->type_name : "";
270 if (strcmp (lhs_type, rhs_type))
271 complain (_("type clash (`%s' `%s') on default action"),
272 lhs_type, rhs_type);
273 }
274 /* Warn if there is no default for $$ but we need one. */
275 else
276 {
277 if (lhs->type_name)
278 complain (_("empty rule for typed nonterminal, and no action"));
279 }
280}
281
282
283/* End the currently being grown rule. */
284
285void
286grammar_rule_end (void)
287{
288 /* Put an empty link in the list to mark the end of this rule */
289 grammar_symbol_append (NULL);
290 grammar_current_rule_check ();
291}
292
293
1485e106
AD
294/* The previous action turns out the be a mid-rule action. Attach it
295 to the current rule, i.e., create a dummy symbol, attach it this
296 mid-rule action, and append this dummy nonterminal to the current
297 rule. */
298
e9955c83 299void
1485e106
AD
300grammar_midrule_action (void)
301{
302 /* Since the action was written out with this rule's number, we must
303 give the new rule this number by inserting the new rule before
304 it. */
305
306 /* Make a dummy nonterminal, a gensym. */
307 symbol_t *sdummy = gensym ();
308 symbol_list *midrule_action = symbol_list_new (sdummy);
309
310 /* Make a new rule, whose body is empty, before the current one, so
311 that the action just read can belong to it. */
312 ++nrules;
313 ++nritems;
314 /* Attach its lineno to that of the host rule. */
315 midrule_action->line = current_rule->line;
316 /* Move the action from the host rule to this one. */
317 midrule_action->action = current_rule->action;
318 midrule_action->action_line = current_rule->action_line;
319 current_rule->action = NULL;
320
321 if (previous_rule)
322 previous_rule->next = midrule_action;
323 else
324 grammar = midrule_action;
325
326 /* End of the rule. */
327 previous_rule = symbol_list_new (NULL);
328 previous_rule->next = current_rule;
329
330 midrule_action->next = previous_rule;
331
332 /* Insert the dummy generated by that rule into this rule. */
333 ++nritems;
334 grammar_symbol_append (sdummy);
335}
336
9af3fbce
AD
337/* Set the precedence symbol of the current rule to PRECSYM. */
338
e9955c83 339void
9af3fbce
AD
340grammar_current_rule_prec_set (symbol_t *precsym)
341{
342 if (current_rule->ruleprec)
343 complain (_("two @prec's in a row"));
344 current_rule->ruleprec = precsym;
345}
346
2e047461
AD
347/* Attach a SYMBOL to the current rule. If needed, move the previous
348 action as a mid-rule action. */
349
e9955c83 350void
2e047461
AD
351grammar_current_rule_symbol_append (symbol_t *symbol)
352{
353 if (current_rule->action)
354 grammar_midrule_action ();
355 ++nritems;
356 grammar_symbol_append (symbol);
357}
358
359
360/* Attach an ACTION to the current rule. If needed, move the previous
361 action as a mid-rule action. */
362
e9955c83
AD
363void
364grammar_current_rule_action_append (const char *action, int action_line)
2e047461
AD
365{
366 if (current_rule->action)
367 grammar_midrule_action ();
368 current_rule->action = action;
369 current_rule->action_line = action_line;
370}
371
a70083a3 372\f
a70083a3
AD
373/*---------------------------------------------------------------.
374| Convert the rules into the representation using RRHS, RLHS and |
d9b739c3 375| RITEM. |
a70083a3 376`---------------------------------------------------------------*/
1ff442ca 377
4a120d45 378static void
118fb205 379packgram (void)
1ff442ca 380{
0c2d3f4c 381 unsigned int itemno;
a70083a3
AD
382 int ruleno;
383 symbol_list *p;
1ff442ca 384
a900a624 385 ritem = XCALLOC (item_number_t, nritems);
1a2b5d37 386 rules = XCALLOC (rule_t, nrules) - 1;
1ff442ca
NF
387
388 itemno = 0;
389 ruleno = 1;
390
391 p = grammar;
392 while (p)
393 {
db8837cb 394 symbol_t *ruleprec = p->ruleprec;
d7e1f00c 395 rules[ruleno].user_number = ruleno;
c3b407f4 396 rules[ruleno].number = ruleno;
bba97eb2 397 rules[ruleno].lhs = p->sym;
99013900 398 rules[ruleno].rhs = ritem + itemno;
1a2b5d37
AD
399 rules[ruleno].line = p->line;
400 rules[ruleno].useful = TRUE;
401 rules[ruleno].action = p->action;
402 rules[ruleno].action_line = p->action_line;
1ff442ca
NF
403
404 p = p->next;
405 while (p && p->sym)
406 {
a49aecd5 407 /* item_number_t = symbol_number_t.
5fbb0954 408 But the former needs to contain more: negative rule numbers. */
a49aecd5 409 ritem[itemno++] = symbol_number_as_item_number (p->sym->number);
1ff442ca
NF
410 /* A rule gets by default the precedence and associativity
411 of the last token in it. */
d7020c20 412 if (p->sym->class == token_sym)
03b31c0c 413 rules[ruleno].prec = p->sym;
a70083a3
AD
414 if (p)
415 p = p->next;
1ff442ca
NF
416 }
417
418 /* If this rule has a %prec,
a70083a3 419 the specified symbol's precedence replaces the default. */
1ff442ca
NF
420 if (ruleprec)
421 {
03b31c0c
AD
422 rules[ruleno].precsym = ruleprec;
423 rules[ruleno].prec = ruleprec;
1ff442ca 424 }
1ff442ca 425 ritem[itemno++] = -ruleno;
f3849179 426 ++ruleno;
1ff442ca 427
a70083a3
AD
428 if (p)
429 p = p->next;
1ff442ca
NF
430 }
431
5123689b 432 assert (itemno == nritems);
3067fbef
AD
433
434 if (trace_flag)
435 ritem_print (stderr);
1ff442ca 436}
a70083a3 437\f
fdbcd8e2
AD
438/*------------------------------------------------------------------.
439| Read in the grammar specification and record it in the format |
440| described in gram.h. All actions are copied into ACTION_OBSTACK, |
441| in each case forming the body of a C function (YYACTION) which |
442| contains a switch statement to decide which action to execute. |
443`------------------------------------------------------------------*/
a70083a3
AD
444
445void
446reader (void)
447{
e9955c83 448 gram_control_t gram_control;
a70083a3
AD
449 lineno = 1;
450
11d82f03
MA
451 /* Initialize the muscle obstack. */
452 obstack_init (&muscle_obstack);
82e236e2 453
a70083a3 454 /* Initialize the symbol table. */
db8837cb 455 symbols_new ();
b6610515 456
30171f79
AD
457 /* Construct the axiom symbol. */
458 axiom = getsym ("$axiom");
459 axiom->class = nterm_sym;
d9b739c3 460 axiom->number = nvars++;
30171f79 461
a70083a3
AD
462 /* Construct the error token */
463 errtoken = getsym ("error");
d7020c20 464 errtoken->class = token_sym;
72a23c97 465 errtoken->number = ntokens++;
b6610515 466
a70083a3
AD
467 /* Construct a token that represents all undefined literal tokens.
468 It is always token number 2. */
469 undeftoken = getsym ("$undefined.");
d7020c20 470 undeftoken->class = token_sym;
72a23c97 471 undeftoken->number = ntokens++;
a70083a3 472
331dbc1b
AD
473 /* Initialize the obstacks. */
474 obstack_init (&action_obstack);
331dbc1b 475 obstack_init (&output_obstack);
0dd1580a
RA
476 obstack_init (&pre_prologue_obstack);
477 obstack_init (&post_prologue_obstack);
331dbc1b
AD
478
479 finput = xfopen (infile, "r");
e9955c83
AD
480 gram_in = finput;
481
482 gram_debug = !!getenv ("parse");
483 gram__flex_debug = !!getenv ("scan");
484 gram_parse (&gram_control);
331dbc1b 485
e9955c83
AD
486 /* Grammar has been read. Do some checking */
487 if (nrules == 0)
488 fatal (_("no rules in the input grammar"));
489
490 /* Report any undefined symbols and consider them nonterminals. */
491 symbols_check_defined ();
b7c49edf
AD
492
493 /* If the user did not define her EOFTOKEN, do it now. */
494 if (!eoftoken)
495 {
496 eoftoken = getsym ("$");
497 eoftoken->class = token_sym;
72a23c97 498 eoftoken->number = 0;
b7c49edf
AD
499 /* Value specified by POSIX. */
500 eoftoken->user_token_number = 0;
501 }
502
e9955c83
AD
503 /* Insert the initial rule, which line is that of the first rule
504 (not that of the start symbol):
505
506 axiom: %start EOF. */
507 {
508 symbol_list *p = symbol_list_new (axiom);
509 p->line = grammar->line;
510 p->next = symbol_list_new (startsymbol);
511 p->next->next = symbol_list_new (eoftoken);
512 p->next->next->next = symbol_list_new (NULL);
513 p->next->next->next->next = grammar;
514 nrules += 1;
515 nritems += 3;
516 grammar = p;
517 }
518
519 if (nsyms > SHRT_MAX)
520 fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
521 SHRT_MAX);
522
523 assert (nsyms == ntokens + nvars);
b0c4483e 524
331dbc1b
AD
525 xfclose (finput);
526
a70083a3
AD
527 /* Assign the symbols their symbol numbers. Write #defines for the
528 token symbols into FDEFINES if requested. */
2f1afb73 529 symbols_pack ();
93ede233 530
a70083a3
AD
531 /* Convert the grammar into the format described in gram.h. */
532 packgram ();
8419d367
AD
533
534 /* The grammar as a symbol_list is no longer needed. */
535 LIST_FREE (symbol_list, grammar);
a70083a3 536}