]> git.saurik.com Git - bison.git/blame - src/reader.c
* src/reader.h, src/reader.c (symbol_list_new): Export it.
[bison.git] / src / reader.c
CommitLineData
1ff442ca 1/* Input parser for bison
76514394 2 Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000, 2001, 2002
a70083a3 3 Free Software Foundation, Inc.
1ff442ca 4
41aca2e0 5 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 6
41aca2e0
AD
7 Bison is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
1ff442ca 11
41aca2e0
AD
12 Bison is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
1ff442ca 16
41aca2e0
AD
17 You should have received a copy of the GNU General Public License
18 along with Bison; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
1ff442ca
NF
21
22
1ff442ca 23#include "system.h"
2a91a95e
AD
24#include "quotearg.h"
25#include "quote.h"
ceed8467 26#include "getargs.h"
1ff442ca 27#include "files.h"
1ff442ca 28#include "symtab.h"
82b6d266 29#include "options.h"
1ff442ca 30#include "gram.h"
a0f6b076 31#include "complain.h"
6c89f1c1 32#include "output.h"
b2ca4022 33#include "reader.h"
340ef489 34#include "conflicts.h"
11d82f03 35#include "muscle_tab.h"
1ff442ca 36
1ff442ca 37int lineno;
280a38c3
AD
38static symbol_list *grammar = NULL;
39static int start_flag = 0;
1ff442ca 40
d7020c20 41/* Nonzero if %union has been seen. */
e9955c83 42int typed = 0;
1ff442ca 43
1e0bab92 44symbol_list *
8efe435c 45symbol_list_new (symbol_t *sym, location_t location)
b29b2ed5
AD
46{
47 symbol_list *res = XMALLOC (symbol_list, 1);
48 res->next = NULL;
49 res->sym = sym;
8efe435c 50 res->location = location;
d945f5cd 51 res->action = NULL;
b29b2ed5
AD
52 res->ruleprec = NULL;
53 return res;
54}
55
1e0bab92
AD
56symbol_list *
57symbol_list_prepend (symbol_list *list, symbol_t *symbol, location_t location)
58{
59 symbol_list *res = symbol_list_new (symbol, location);
60 res->next = list;
61 return res;
62}
63
0d533154 64\f
79282c5a
AD
65/*--------------------------------------------------------------.
66| Get the data type (alternative in the union) of the value for |
67| symbol N in rule RULE. |
68`--------------------------------------------------------------*/
69
e9955c83 70char *
b29b2ed5 71get_type_name (int n, symbol_list *rule)
79282c5a
AD
72{
73 int i;
74 symbol_list *rp;
75
76 if (n < 0)
77 {
78 complain (_("invalid $ value"));
79 return NULL;
80 }
81
82 rp = rule;
83 i = 0;
84
85 while (i < n)
86 {
87 rp = rp->next;
88 if (rp == NULL || rp->sym == NULL)
89 {
90 complain (_("invalid $ value"));
91 return NULL;
92 }
f3849179 93 ++i;
79282c5a
AD
94 }
95
96 return rp->sym->type_name;
97}
a083fbbf 98
1ff442ca 99
e9955c83
AD
100/*-----------------------.
101| Set the start symbol. |
102`-----------------------*/
1ff442ca 103
e9955c83 104void
8efe435c 105grammar_start_symbol_set (symbol_t *s, location_t l)
1ff442ca
NF
106{
107 if (start_flag)
27821bff 108 complain (_("multiple %s declarations"), "%start");
943819bf
RS
109 else
110 {
111 start_flag = 1;
e9955c83 112 startsymbol = s;
8efe435c 113 startsymbol_location = l;
943819bf 114 }
1ff442ca
NF
115}
116
1ff442ca 117
d7020c20 118/*----------------------------------------------------------------.
e9955c83
AD
119| There are two prologues: one before %union, one after. Augment |
120| the current one. |
d7020c20 121`----------------------------------------------------------------*/
1ff442ca 122
e9955c83 123void
0c15323d 124prologue_augment (const char *prologue, location_t location)
b6610515 125{
e9955c83
AD
126 struct obstack *oout =
127 !typed ? &pre_prologue_obstack : &post_prologue_obstack;
b6610515 128
e9955c83 129 if (!no_lines_flag)
b6610515 130 {
e9955c83 131 obstack_fgrow2 (oout, muscle_find ("linef"),
0c15323d
AD
132 location.first_line,
133 quotearg_style (c_quoting_style,
134 muscle_find ("filename")));
b6610515 135 }
e9955c83 136 obstack_sgrow (oout, prologue);
b6610515
RA
137}
138
2ba3b73c 139
426cf563 140
a870c567 141
e9955c83
AD
142/*----------------------.
143| Handle the epilogue. |
144`----------------------*/
426cf563 145
e9955c83 146void
0c15323d 147epilogue_set (const char *epilogue, location_t location)
2ba3b73c 148{
e9955c83 149 if (!no_lines_flag)
1ff442ca 150 {
592e8d4d 151 obstack_fgrow2 (&muscle_obstack, muscle_find ("linef"),
0c15323d
AD
152 location.first_line,
153 quotearg_style (c_quoting_style,
154 muscle_find ("filename")));
1ff442ca 155 }
592e8d4d
AD
156 obstack_sgrow (&muscle_obstack, epilogue);
157 obstack_1grow (&muscle_obstack, 0);
158 muscle_insert ("epilogue", obstack_finish (&muscle_obstack));
1ff442ca 159}
1ff442ca 160
a70083a3 161
a70083a3
AD
162\f
163
a70083a3
AD
164/*-------------------------------------------------------------------.
165| Generate a dummy symbol, a nonterminal, whose name cannot conflict |
166| with the user's names. |
167`-------------------------------------------------------------------*/
1ff442ca 168
db8837cb 169static symbol_t *
ee000ba4 170gensym (location_t location)
1ff442ca 171{
274d42ce
AD
172 /* Incremented for each generated symbol */
173 static int gensym_count = 0;
174 static char buf[256];
175
db8837cb 176 symbol_t *sym;
1ff442ca 177
274d42ce 178 sprintf (buf, "@%d", ++gensym_count);
ee000ba4 179 sym = getsym (buf, location);
d7020c20 180 sym->class = nterm_sym;
d9b739c3 181 sym->number = nvars++;
36281465 182 return sym;
1ff442ca 183}
a70083a3 184\f
107f7dfb
AD
185/*-------------------------------------------------------------------.
186| Parse the input grammar into a one symbol_list structure. Each |
187| rule is represented by a sequence of symbols: the left hand side |
188| followed by the contents of the right hand side, followed by a |
189| null pointer instead of a symbol to terminate the rule. The next |
190| symbol is the lhs of the following rule. |
191| |
fdbcd8e2
AD
192| All actions are copied out, labelled by the rule number they apply |
193| to. |
107f7dfb
AD
194| |
195| Bison used to allow some %directives in the rules sections, but |
196| this is no longer consider appropriate: (i) the documented grammar |
197| doesn't claim it, (ii), it would promote bad style, (iii), error |
198| recovery for %directives consists in skipping the junk until a `%' |
199| is seen and helrp synchronizing. This scheme is definitely wrong |
200| in the rules section. |
201`-------------------------------------------------------------------*/
1ff442ca 202
f6d0f937
AD
203/* The (currently) last symbol of GRAMMAR. */
204symbol_list *grammar_end = NULL;
205
206/* Append S to the GRAMMAR. */
e9955c83 207void
8efe435c 208grammar_symbol_append (symbol_t *symbol, location_t location)
f6d0f937 209{
8efe435c 210 symbol_list *p = symbol_list_new (symbol, location);
f6d0f937
AD
211
212 if (grammar_end)
213 grammar_end->next = p;
214 else
215 grammar = p;
216
217 grammar_end = p;
218}
219
8efe435c
AD
220/* The rule currently being defined, and the previous rule.
221 CURRENT_RULE points to the first LHS of the current rule, while
222 PREVIOUS_RULE_END points to the *end* of the previous rule (NULL). */
da4160c3 223symbol_list *current_rule = NULL;
8efe435c 224symbol_list *previous_rule_end = NULL;
da4160c3
AD
225
226
8efe435c
AD
227/*----------------------------------------------.
228| Create a new rule for LHS in to the GRAMMAR. |
229`----------------------------------------------*/
da4160c3 230
e9955c83 231void
8efe435c 232grammar_rule_begin (symbol_t *lhs, location_t location)
da4160c3
AD
233{
234 if (!start_flag)
235 {
236 startsymbol = lhs;
8efe435c 237 startsymbol_location = location;
da4160c3
AD
238 start_flag = 1;
239 }
240
241 /* Start a new rule and record its lhs. */
242 ++nrules;
243 ++nritems;
244
8efe435c
AD
245 previous_rule_end = grammar_end;
246 grammar_symbol_append (lhs, location);
da4160c3
AD
247 current_rule = grammar_end;
248
249 /* Mark the rule's lhs as a nonterminal if not already so. */
250
251 if (lhs->class == unknown_sym)
252 {
253 lhs->class = nterm_sym;
254 lhs->number = nvars;
255 ++nvars;
256 }
257 else if (lhs->class == token_sym)
258 complain (_("rule given for %s, which is a token"), lhs->tag);
259}
260
e9955c83
AD
261/* Check that the last rule (CURRENT_RULE) is properly defined. For
262 instance, there should be no type clash on the default action. */
263
264static void
265grammar_current_rule_check (void)
266{
267 symbol_t *lhs = current_rule->sym;
268 symbol_t *first_rhs = current_rule->next->sym;
269
270 /* If there is an action, then there is nothing we can do: the user
271 is allowed to shoot in her foot. */
272 if (current_rule->action)
273 return;
274
275 /* If $$ is being set in default way, report if any type mismatch.
276 */
277 if (first_rhs)
278 {
279 const char *lhs_type = lhs->type_name ? lhs->type_name : "";
280 const char *rhs_type = first_rhs->type_name ? first_rhs->type_name : "";
281 if (strcmp (lhs_type, rhs_type))
282 complain (_("type clash (`%s' `%s') on default action"),
283 lhs_type, rhs_type);
284 }
285 /* Warn if there is no default for $$ but we need one. */
286 else
287 {
288 if (lhs->type_name)
289 complain (_("empty rule for typed nonterminal, and no action"));
290 }
291}
292
293
8efe435c
AD
294/*-------------------------------------.
295| End the currently being grown rule. |
296`-------------------------------------*/
e9955c83
AD
297
298void
8efe435c 299grammar_rule_end (location_t location)
e9955c83
AD
300{
301 /* Put an empty link in the list to mark the end of this rule */
8efe435c
AD
302 grammar_symbol_append (NULL, grammar_end->location);
303 current_rule->location = location;
e9955c83
AD
304 grammar_current_rule_check ();
305}
306
307
8efe435c
AD
308/*-------------------------------------------------------------------.
309| The previous action turns out the be a mid-rule action. Attach it |
310| to the current rule, i.e., create a dummy symbol, attach it this |
311| mid-rule action, and append this dummy nonterminal to the current |
312| rule. |
313`-------------------------------------------------------------------*/
1485e106 314
e9955c83 315void
1485e106
AD
316grammar_midrule_action (void)
317{
318 /* Since the action was written out with this rule's number, we must
319 give the new rule this number by inserting the new rule before
320 it. */
321
8efe435c
AD
322 /* Make a DUMMY nonterminal, whose location is that of the midrule
323 action. Create the MIDRULE. */
8efe435c 324 location_t dummy_location = current_rule->action_location;
ee000ba4 325 symbol_t *dummy = gensym (dummy_location);
8efe435c 326 symbol_list *midrule = symbol_list_new (dummy, dummy_location);
1485e106
AD
327
328 /* Make a new rule, whose body is empty, before the current one, so
329 that the action just read can belong to it. */
330 ++nrules;
331 ++nritems;
8efe435c
AD
332 /* Attach its location and actions to that of the DUMMY. */
333 midrule->location = dummy_location;
334 midrule->action = current_rule->action;
335 midrule->action_location = dummy_location;
1485e106
AD
336 current_rule->action = NULL;
337
8efe435c
AD
338 if (previous_rule_end)
339 previous_rule_end->next = midrule;
1485e106 340 else
8efe435c 341 grammar = midrule;
1485e106 342
8efe435c
AD
343 /* End the dummy's rule. */
344 previous_rule_end = symbol_list_new (NULL, dummy_location);
345 previous_rule_end->next = current_rule;
1485e106 346
8efe435c 347 midrule->next = previous_rule_end;
1485e106 348
8efe435c
AD
349 /* Insert the dummy nonterminal replacing the midrule action into
350 the current rule. */
351 grammar_current_rule_symbol_append (dummy, dummy_location);
1485e106
AD
352}
353
9af3fbce
AD
354/* Set the precedence symbol of the current rule to PRECSYM. */
355
e9955c83 356void
9af3fbce
AD
357grammar_current_rule_prec_set (symbol_t *precsym)
358{
359 if (current_rule->ruleprec)
360 complain (_("two @prec's in a row"));
361 current_rule->ruleprec = precsym;
362}
363
2e047461
AD
364/* Attach a SYMBOL to the current rule. If needed, move the previous
365 action as a mid-rule action. */
366
e9955c83 367void
8efe435c 368grammar_current_rule_symbol_append (symbol_t *symbol, location_t location)
2e047461
AD
369{
370 if (current_rule->action)
371 grammar_midrule_action ();
372 ++nritems;
8efe435c 373 grammar_symbol_append (symbol, location);
2e047461
AD
374}
375
376
377/* Attach an ACTION to the current rule. If needed, move the previous
378 action as a mid-rule action. */
379
e9955c83 380void
8efe435c 381grammar_current_rule_action_append (const char *action, location_t location)
2e047461
AD
382{
383 if (current_rule->action)
384 grammar_midrule_action ();
385 current_rule->action = action;
8efe435c 386 current_rule->action_location = location;
2e047461
AD
387}
388
a70083a3 389\f
a70083a3
AD
390/*---------------------------------------------------------------.
391| Convert the rules into the representation using RRHS, RLHS and |
d9b739c3 392| RITEM. |
a70083a3 393`---------------------------------------------------------------*/
1ff442ca 394
4a120d45 395static void
118fb205 396packgram (void)
1ff442ca 397{
0c2d3f4c 398 unsigned int itemno;
a70083a3
AD
399 int ruleno;
400 symbol_list *p;
1ff442ca 401
a900a624 402 ritem = XCALLOC (item_number_t, nritems);
1a2b5d37 403 rules = XCALLOC (rule_t, nrules) - 1;
1ff442ca
NF
404
405 itemno = 0;
406 ruleno = 1;
407
408 p = grammar;
409 while (p)
410 {
db8837cb 411 symbol_t *ruleprec = p->ruleprec;
d7e1f00c 412 rules[ruleno].user_number = ruleno;
c3b407f4 413 rules[ruleno].number = ruleno;
bba97eb2 414 rules[ruleno].lhs = p->sym;
99013900 415 rules[ruleno].rhs = ritem + itemno;
8efe435c 416 rules[ruleno].location = p->location;
1a2b5d37
AD
417 rules[ruleno].useful = TRUE;
418 rules[ruleno].action = p->action;
8efe435c 419 rules[ruleno].action_location = p->action_location;
1ff442ca
NF
420
421 p = p->next;
422 while (p && p->sym)
423 {
a49aecd5 424 /* item_number_t = symbol_number_t.
5fbb0954 425 But the former needs to contain more: negative rule numbers. */
a49aecd5 426 ritem[itemno++] = symbol_number_as_item_number (p->sym->number);
1ff442ca
NF
427 /* A rule gets by default the precedence and associativity
428 of the last token in it. */
d7020c20 429 if (p->sym->class == token_sym)
03b31c0c 430 rules[ruleno].prec = p->sym;
a70083a3
AD
431 if (p)
432 p = p->next;
1ff442ca
NF
433 }
434
435 /* If this rule has a %prec,
a70083a3 436 the specified symbol's precedence replaces the default. */
1ff442ca
NF
437 if (ruleprec)
438 {
03b31c0c
AD
439 rules[ruleno].precsym = ruleprec;
440 rules[ruleno].prec = ruleprec;
1ff442ca 441 }
1ff442ca 442 ritem[itemno++] = -ruleno;
f3849179 443 ++ruleno;
1ff442ca 444
a70083a3
AD
445 if (p)
446 p = p->next;
1ff442ca
NF
447 }
448
5123689b 449 assert (itemno == nritems);
3067fbef
AD
450
451 if (trace_flag)
452 ritem_print (stderr);
1ff442ca 453}
a70083a3 454\f
fdbcd8e2
AD
455/*------------------------------------------------------------------.
456| Read in the grammar specification and record it in the format |
457| described in gram.h. All actions are copied into ACTION_OBSTACK, |
458| in each case forming the body of a C function (YYACTION) which |
459| contains a switch statement to decide which action to execute. |
460`------------------------------------------------------------------*/
a70083a3
AD
461
462void
463reader (void)
464{
e9955c83 465 gram_control_t gram_control;
a70083a3
AD
466 lineno = 1;
467
468 /* Initialize the symbol table. */
db8837cb 469 symbols_new ();
b6610515 470
30171f79 471 /* Construct the axiom symbol. */
ee000ba4 472 axiom = getsym ("$axiom", empty_location);
30171f79 473 axiom->class = nterm_sym;
d9b739c3 474 axiom->number = nvars++;
30171f79 475
a70083a3 476 /* Construct the error token */
ee000ba4 477 errtoken = getsym ("error", empty_location);
d7020c20 478 errtoken->class = token_sym;
72a23c97 479 errtoken->number = ntokens++;
b6610515 480
a70083a3
AD
481 /* Construct a token that represents all undefined literal tokens.
482 It is always token number 2. */
ee000ba4 483 undeftoken = getsym ("$undefined.", empty_location);
d7020c20 484 undeftoken->class = token_sym;
72a23c97 485 undeftoken->number = ntokens++;
a70083a3 486
331dbc1b 487 /* Initialize the obstacks. */
0dd1580a
RA
488 obstack_init (&pre_prologue_obstack);
489 obstack_init (&post_prologue_obstack);
331dbc1b
AD
490
491 finput = xfopen (infile, "r");
e9955c83
AD
492 gram_in = finput;
493
494 gram_debug = !!getenv ("parse");
495 gram__flex_debug = !!getenv ("scan");
1d6412ad 496 scanner_initialize ();
e9955c83 497 gram_parse (&gram_control);
331dbc1b 498
e9955c83
AD
499 /* Grammar has been read. Do some checking */
500 if (nrules == 0)
501 fatal (_("no rules in the input grammar"));
502
503 /* Report any undefined symbols and consider them nonterminals. */
504 symbols_check_defined ();
b7c49edf
AD
505
506 /* If the user did not define her EOFTOKEN, do it now. */
507 if (!eoftoken)
508 {
ee000ba4 509 eoftoken = getsym ("$", empty_location);
b7c49edf 510 eoftoken->class = token_sym;
72a23c97 511 eoftoken->number = 0;
b7c49edf
AD
512 /* Value specified by POSIX. */
513 eoftoken->user_token_number = 0;
514 }
515
e9955c83
AD
516 /* Insert the initial rule, which line is that of the first rule
517 (not that of the start symbol):
518
519 axiom: %start EOF. */
520 {
8efe435c
AD
521 symbol_list *p = symbol_list_new (axiom, empty_location);
522 p->location = grammar->location;
523 p->next = symbol_list_new (startsymbol, empty_location);
524 p->next->next = symbol_list_new (eoftoken, empty_location);
525 p->next->next->next = symbol_list_new (NULL, empty_location);
e9955c83
AD
526 p->next->next->next->next = grammar;
527 nrules += 1;
528 nritems += 3;
529 grammar = p;
530 }
531
532 if (nsyms > SHRT_MAX)
533 fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
534 SHRT_MAX);
535
536 assert (nsyms == ntokens + nvars);
b0c4483e 537
331dbc1b
AD
538 xfclose (finput);
539
a70083a3
AD
540 /* Assign the symbols their symbol numbers. Write #defines for the
541 token symbols into FDEFINES if requested. */
2f1afb73 542 symbols_pack ();
93ede233 543
a70083a3
AD
544 /* Convert the grammar into the format described in gram.h. */
545 packgram ();
8419d367
AD
546
547 /* The grammar as a symbol_list is no longer needed. */
548 LIST_FREE (symbol_list, grammar);
a70083a3 549}