]> git.saurik.com Git - bison.git/blame - src/reader.c
* src/parse-gram.y (YYLLOC_DEFAULT, current_lhs_location): New.
[bison.git] / src / reader.c
CommitLineData
1ff442ca 1/* Input parser for bison
76514394 2 Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000, 2001, 2002
a70083a3 3 Free Software Foundation, Inc.
1ff442ca 4
41aca2e0 5 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 6
41aca2e0
AD
7 Bison is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
1ff442ca 11
41aca2e0
AD
12 Bison is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
1ff442ca 16
41aca2e0
AD
17 You should have received a copy of the GNU General Public License
18 along with Bison; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
1ff442ca
NF
21
22
1ff442ca 23#include "system.h"
2a91a95e
AD
24#include "quotearg.h"
25#include "quote.h"
ceed8467 26#include "getargs.h"
1ff442ca 27#include "files.h"
1ff442ca 28#include "symtab.h"
82b6d266 29#include "options.h"
1ff442ca 30#include "gram.h"
a0f6b076 31#include "complain.h"
6c89f1c1 32#include "output.h"
b2ca4022 33#include "reader.h"
340ef489 34#include "conflicts.h"
11d82f03 35#include "muscle_tab.h"
1ff442ca 36
1ff442ca 37int lineno;
280a38c3
AD
38static symbol_list *grammar = NULL;
39static int start_flag = 0;
1ff442ca 40
d7020c20 41/* Nonzero if %union has been seen. */
e9955c83 42int typed = 0;
1ff442ca 43
6255b435 44static symbol_list *
8efe435c 45symbol_list_new (symbol_t *sym, location_t location)
b29b2ed5
AD
46{
47 symbol_list *res = XMALLOC (symbol_list, 1);
48 res->next = NULL;
49 res->sym = sym;
8efe435c 50 res->location = location;
d945f5cd 51 res->action = NULL;
b29b2ed5
AD
52 res->ruleprec = NULL;
53 return res;
54}
55
0d533154 56\f
79282c5a
AD
57/*--------------------------------------------------------------.
58| Get the data type (alternative in the union) of the value for |
59| symbol N in rule RULE. |
60`--------------------------------------------------------------*/
61
e9955c83 62char *
b29b2ed5 63get_type_name (int n, symbol_list *rule)
79282c5a
AD
64{
65 int i;
66 symbol_list *rp;
67
68 if (n < 0)
69 {
70 complain (_("invalid $ value"));
71 return NULL;
72 }
73
74 rp = rule;
75 i = 0;
76
77 while (i < n)
78 {
79 rp = rp->next;
80 if (rp == NULL || rp->sym == NULL)
81 {
82 complain (_("invalid $ value"));
83 return NULL;
84 }
f3849179 85 ++i;
79282c5a
AD
86 }
87
88 return rp->sym->type_name;
89}
a083fbbf 90
1ff442ca 91
e9955c83
AD
92/*-----------------------.
93| Set the start symbol. |
94`-----------------------*/
1ff442ca 95
e9955c83 96void
8efe435c 97grammar_start_symbol_set (symbol_t *s, location_t l)
1ff442ca
NF
98{
99 if (start_flag)
27821bff 100 complain (_("multiple %s declarations"), "%start");
943819bf
RS
101 else
102 {
103 start_flag = 1;
e9955c83 104 startsymbol = s;
8efe435c 105 startsymbol_location = l;
943819bf 106 }
1ff442ca
NF
107}
108
1ff442ca 109
d7020c20 110/*----------------------------------------------------------------.
e9955c83
AD
111| There are two prologues: one before %union, one after. Augment |
112| the current one. |
d7020c20 113`----------------------------------------------------------------*/
1ff442ca 114
e9955c83 115void
0c15323d 116prologue_augment (const char *prologue, location_t location)
b6610515 117{
e9955c83
AD
118 struct obstack *oout =
119 !typed ? &pre_prologue_obstack : &post_prologue_obstack;
b6610515 120
e9955c83 121 if (!no_lines_flag)
b6610515 122 {
e9955c83 123 obstack_fgrow2 (oout, muscle_find ("linef"),
0c15323d
AD
124 location.first_line,
125 quotearg_style (c_quoting_style,
126 muscle_find ("filename")));
b6610515 127 }
e9955c83 128 obstack_sgrow (oout, prologue);
b6610515
RA
129}
130
2ba3b73c 131
426cf563 132
a870c567 133
e9955c83
AD
134/*----------------------.
135| Handle the epilogue. |
136`----------------------*/
426cf563 137
e9955c83 138void
0c15323d 139epilogue_set (const char *epilogue, location_t location)
2ba3b73c 140{
e9955c83 141 if (!no_lines_flag)
1ff442ca 142 {
592e8d4d 143 obstack_fgrow2 (&muscle_obstack, muscle_find ("linef"),
0c15323d
AD
144 location.first_line,
145 quotearg_style (c_quoting_style,
146 muscle_find ("filename")));
1ff442ca 147 }
592e8d4d
AD
148 obstack_sgrow (&muscle_obstack, epilogue);
149 obstack_1grow (&muscle_obstack, 0);
150 muscle_insert ("epilogue", obstack_finish (&muscle_obstack));
1ff442ca 151}
1ff442ca 152
a70083a3 153
a70083a3
AD
154\f
155
a70083a3
AD
156/*-------------------------------------------------------------------.
157| Generate a dummy symbol, a nonterminal, whose name cannot conflict |
158| with the user's names. |
159`-------------------------------------------------------------------*/
1ff442ca 160
db8837cb 161static symbol_t *
118fb205 162gensym (void)
1ff442ca 163{
274d42ce
AD
164 /* Incremented for each generated symbol */
165 static int gensym_count = 0;
166 static char buf[256];
167
db8837cb 168 symbol_t *sym;
1ff442ca 169
274d42ce 170 sprintf (buf, "@%d", ++gensym_count);
e9955c83 171 sym = getsym (buf);
d7020c20 172 sym->class = nterm_sym;
d9b739c3 173 sym->number = nvars++;
36281465 174 return sym;
1ff442ca 175}
a70083a3 176\f
107f7dfb
AD
177/*-------------------------------------------------------------------.
178| Parse the input grammar into a one symbol_list structure. Each |
179| rule is represented by a sequence of symbols: the left hand side |
180| followed by the contents of the right hand side, followed by a |
181| null pointer instead of a symbol to terminate the rule. The next |
182| symbol is the lhs of the following rule. |
183| |
fdbcd8e2
AD
184| All actions are copied out, labelled by the rule number they apply |
185| to. |
107f7dfb
AD
186| |
187| Bison used to allow some %directives in the rules sections, but |
188| this is no longer consider appropriate: (i) the documented grammar |
189| doesn't claim it, (ii), it would promote bad style, (iii), error |
190| recovery for %directives consists in skipping the junk until a `%' |
191| is seen and helrp synchronizing. This scheme is definitely wrong |
192| in the rules section. |
193`-------------------------------------------------------------------*/
1ff442ca 194
f6d0f937
AD
195/* The (currently) last symbol of GRAMMAR. */
196symbol_list *grammar_end = NULL;
197
198/* Append S to the GRAMMAR. */
e9955c83 199void
8efe435c 200grammar_symbol_append (symbol_t *symbol, location_t location)
f6d0f937 201{
8efe435c 202 symbol_list *p = symbol_list_new (symbol, location);
f6d0f937
AD
203
204 if (grammar_end)
205 grammar_end->next = p;
206 else
207 grammar = p;
208
209 grammar_end = p;
210}
211
8efe435c
AD
212/* The rule currently being defined, and the previous rule.
213 CURRENT_RULE points to the first LHS of the current rule, while
214 PREVIOUS_RULE_END points to the *end* of the previous rule (NULL). */
da4160c3 215symbol_list *current_rule = NULL;
8efe435c 216symbol_list *previous_rule_end = NULL;
da4160c3
AD
217
218
8efe435c
AD
219/*----------------------------------------------.
220| Create a new rule for LHS in to the GRAMMAR. |
221`----------------------------------------------*/
da4160c3 222
e9955c83 223void
8efe435c 224grammar_rule_begin (symbol_t *lhs, location_t location)
da4160c3
AD
225{
226 if (!start_flag)
227 {
228 startsymbol = lhs;
8efe435c 229 startsymbol_location = location;
da4160c3
AD
230 start_flag = 1;
231 }
232
233 /* Start a new rule and record its lhs. */
234 ++nrules;
235 ++nritems;
236
8efe435c
AD
237 previous_rule_end = grammar_end;
238 grammar_symbol_append (lhs, location);
da4160c3
AD
239 current_rule = grammar_end;
240
241 /* Mark the rule's lhs as a nonterminal if not already so. */
242
243 if (lhs->class == unknown_sym)
244 {
245 lhs->class = nterm_sym;
246 lhs->number = nvars;
247 ++nvars;
248 }
249 else if (lhs->class == token_sym)
250 complain (_("rule given for %s, which is a token"), lhs->tag);
251}
252
e9955c83
AD
253/* Check that the last rule (CURRENT_RULE) is properly defined. For
254 instance, there should be no type clash on the default action. */
255
256static void
257grammar_current_rule_check (void)
258{
259 symbol_t *lhs = current_rule->sym;
260 symbol_t *first_rhs = current_rule->next->sym;
261
262 /* If there is an action, then there is nothing we can do: the user
263 is allowed to shoot in her foot. */
264 if (current_rule->action)
265 return;
266
267 /* If $$ is being set in default way, report if any type mismatch.
268 */
269 if (first_rhs)
270 {
271 const char *lhs_type = lhs->type_name ? lhs->type_name : "";
272 const char *rhs_type = first_rhs->type_name ? first_rhs->type_name : "";
273 if (strcmp (lhs_type, rhs_type))
274 complain (_("type clash (`%s' `%s') on default action"),
275 lhs_type, rhs_type);
276 }
277 /* Warn if there is no default for $$ but we need one. */
278 else
279 {
280 if (lhs->type_name)
281 complain (_("empty rule for typed nonterminal, and no action"));
282 }
283}
284
285
8efe435c
AD
286/*-------------------------------------.
287| End the currently being grown rule. |
288`-------------------------------------*/
e9955c83
AD
289
290void
8efe435c 291grammar_rule_end (location_t location)
e9955c83
AD
292{
293 /* Put an empty link in the list to mark the end of this rule */
8efe435c
AD
294 grammar_symbol_append (NULL, grammar_end->location);
295 current_rule->location = location;
e9955c83
AD
296 grammar_current_rule_check ();
297}
298
299
8efe435c
AD
300/*-------------------------------------------------------------------.
301| The previous action turns out the be a mid-rule action. Attach it |
302| to the current rule, i.e., create a dummy symbol, attach it this |
303| mid-rule action, and append this dummy nonterminal to the current |
304| rule. |
305`-------------------------------------------------------------------*/
1485e106 306
e9955c83 307void
1485e106
AD
308grammar_midrule_action (void)
309{
310 /* Since the action was written out with this rule's number, we must
311 give the new rule this number by inserting the new rule before
312 it. */
313
8efe435c
AD
314 /* Make a DUMMY nonterminal, whose location is that of the midrule
315 action. Create the MIDRULE. */
316 symbol_t *dummy = gensym ();
317 location_t dummy_location = current_rule->action_location;
318 symbol_list *midrule = symbol_list_new (dummy, dummy_location);
1485e106
AD
319
320 /* Make a new rule, whose body is empty, before the current one, so
321 that the action just read can belong to it. */
322 ++nrules;
323 ++nritems;
8efe435c
AD
324 /* Attach its location and actions to that of the DUMMY. */
325 midrule->location = dummy_location;
326 midrule->action = current_rule->action;
327 midrule->action_location = dummy_location;
1485e106
AD
328 current_rule->action = NULL;
329
8efe435c
AD
330 if (previous_rule_end)
331 previous_rule_end->next = midrule;
1485e106 332 else
8efe435c 333 grammar = midrule;
1485e106 334
8efe435c
AD
335 /* End the dummy's rule. */
336 previous_rule_end = symbol_list_new (NULL, dummy_location);
337 previous_rule_end->next = current_rule;
1485e106 338
8efe435c 339 midrule->next = previous_rule_end;
1485e106 340
8efe435c
AD
341 /* Insert the dummy nonterminal replacing the midrule action into
342 the current rule. */
343 grammar_current_rule_symbol_append (dummy, dummy_location);
1485e106
AD
344}
345
9af3fbce
AD
346/* Set the precedence symbol of the current rule to PRECSYM. */
347
e9955c83 348void
9af3fbce
AD
349grammar_current_rule_prec_set (symbol_t *precsym)
350{
351 if (current_rule->ruleprec)
352 complain (_("two @prec's in a row"));
353 current_rule->ruleprec = precsym;
354}
355
2e047461
AD
356/* Attach a SYMBOL to the current rule. If needed, move the previous
357 action as a mid-rule action. */
358
e9955c83 359void
8efe435c 360grammar_current_rule_symbol_append (symbol_t *symbol, location_t location)
2e047461
AD
361{
362 if (current_rule->action)
363 grammar_midrule_action ();
364 ++nritems;
8efe435c 365 grammar_symbol_append (symbol, location);
2e047461
AD
366}
367
368
369/* Attach an ACTION to the current rule. If needed, move the previous
370 action as a mid-rule action. */
371
e9955c83 372void
8efe435c 373grammar_current_rule_action_append (const char *action, location_t location)
2e047461
AD
374{
375 if (current_rule->action)
376 grammar_midrule_action ();
377 current_rule->action = action;
8efe435c 378 current_rule->action_location = location;
2e047461
AD
379}
380
a70083a3 381\f
a70083a3
AD
382/*---------------------------------------------------------------.
383| Convert the rules into the representation using RRHS, RLHS and |
d9b739c3 384| RITEM. |
a70083a3 385`---------------------------------------------------------------*/
1ff442ca 386
4a120d45 387static void
118fb205 388packgram (void)
1ff442ca 389{
0c2d3f4c 390 unsigned int itemno;
a70083a3
AD
391 int ruleno;
392 symbol_list *p;
1ff442ca 393
a900a624 394 ritem = XCALLOC (item_number_t, nritems);
1a2b5d37 395 rules = XCALLOC (rule_t, nrules) - 1;
1ff442ca
NF
396
397 itemno = 0;
398 ruleno = 1;
399
400 p = grammar;
401 while (p)
402 {
db8837cb 403 symbol_t *ruleprec = p->ruleprec;
d7e1f00c 404 rules[ruleno].user_number = ruleno;
c3b407f4 405 rules[ruleno].number = ruleno;
bba97eb2 406 rules[ruleno].lhs = p->sym;
99013900 407 rules[ruleno].rhs = ritem + itemno;
8efe435c 408 rules[ruleno].location = p->location;
1a2b5d37
AD
409 rules[ruleno].useful = TRUE;
410 rules[ruleno].action = p->action;
8efe435c 411 rules[ruleno].action_location = p->action_location;
1ff442ca
NF
412
413 p = p->next;
414 while (p && p->sym)
415 {
a49aecd5 416 /* item_number_t = symbol_number_t.
5fbb0954 417 But the former needs to contain more: negative rule numbers. */
a49aecd5 418 ritem[itemno++] = symbol_number_as_item_number (p->sym->number);
1ff442ca
NF
419 /* A rule gets by default the precedence and associativity
420 of the last token in it. */
d7020c20 421 if (p->sym->class == token_sym)
03b31c0c 422 rules[ruleno].prec = p->sym;
a70083a3
AD
423 if (p)
424 p = p->next;
1ff442ca
NF
425 }
426
427 /* If this rule has a %prec,
a70083a3 428 the specified symbol's precedence replaces the default. */
1ff442ca
NF
429 if (ruleprec)
430 {
03b31c0c
AD
431 rules[ruleno].precsym = ruleprec;
432 rules[ruleno].prec = ruleprec;
1ff442ca 433 }
1ff442ca 434 ritem[itemno++] = -ruleno;
f3849179 435 ++ruleno;
1ff442ca 436
a70083a3
AD
437 if (p)
438 p = p->next;
1ff442ca
NF
439 }
440
5123689b 441 assert (itemno == nritems);
3067fbef
AD
442
443 if (trace_flag)
444 ritem_print (stderr);
1ff442ca 445}
a70083a3 446\f
fdbcd8e2
AD
447/*------------------------------------------------------------------.
448| Read in the grammar specification and record it in the format |
449| described in gram.h. All actions are copied into ACTION_OBSTACK, |
450| in each case forming the body of a C function (YYACTION) which |
451| contains a switch statement to decide which action to execute. |
452`------------------------------------------------------------------*/
a70083a3
AD
453
454void
455reader (void)
456{
e9955c83 457 gram_control_t gram_control;
a70083a3
AD
458 lineno = 1;
459
460 /* Initialize the symbol table. */
db8837cb 461 symbols_new ();
b6610515 462
30171f79
AD
463 /* Construct the axiom symbol. */
464 axiom = getsym ("$axiom");
465 axiom->class = nterm_sym;
d9b739c3 466 axiom->number = nvars++;
30171f79 467
a70083a3
AD
468 /* Construct the error token */
469 errtoken = getsym ("error");
d7020c20 470 errtoken->class = token_sym;
72a23c97 471 errtoken->number = ntokens++;
b6610515 472
a70083a3
AD
473 /* Construct a token that represents all undefined literal tokens.
474 It is always token number 2. */
475 undeftoken = getsym ("$undefined.");
d7020c20 476 undeftoken->class = token_sym;
72a23c97 477 undeftoken->number = ntokens++;
a70083a3 478
331dbc1b 479 /* Initialize the obstacks. */
0dd1580a
RA
480 obstack_init (&pre_prologue_obstack);
481 obstack_init (&post_prologue_obstack);
331dbc1b
AD
482
483 finput = xfopen (infile, "r");
e9955c83
AD
484 gram_in = finput;
485
486 gram_debug = !!getenv ("parse");
487 gram__flex_debug = !!getenv ("scan");
1d6412ad 488 scanner_initialize ();
e9955c83 489 gram_parse (&gram_control);
331dbc1b 490
e9955c83
AD
491 /* Grammar has been read. Do some checking */
492 if (nrules == 0)
493 fatal (_("no rules in the input grammar"));
494
495 /* Report any undefined symbols and consider them nonterminals. */
496 symbols_check_defined ();
b7c49edf
AD
497
498 /* If the user did not define her EOFTOKEN, do it now. */
499 if (!eoftoken)
500 {
501 eoftoken = getsym ("$");
502 eoftoken->class = token_sym;
72a23c97 503 eoftoken->number = 0;
b7c49edf
AD
504 /* Value specified by POSIX. */
505 eoftoken->user_token_number = 0;
506 }
507
e9955c83
AD
508 /* Insert the initial rule, which line is that of the first rule
509 (not that of the start symbol):
510
511 axiom: %start EOF. */
512 {
8efe435c
AD
513 symbol_list *p = symbol_list_new (axiom, empty_location);
514 p->location = grammar->location;
515 p->next = symbol_list_new (startsymbol, empty_location);
516 p->next->next = symbol_list_new (eoftoken, empty_location);
517 p->next->next->next = symbol_list_new (NULL, empty_location);
e9955c83
AD
518 p->next->next->next->next = grammar;
519 nrules += 1;
520 nritems += 3;
521 grammar = p;
522 }
523
524 if (nsyms > SHRT_MAX)
525 fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
526 SHRT_MAX);
527
528 assert (nsyms == ntokens + nvars);
b0c4483e 529
331dbc1b
AD
530 xfclose (finput);
531
a70083a3
AD
532 /* Assign the symbols their symbol numbers. Write #defines for the
533 token symbols into FDEFINES if requested. */
2f1afb73 534 symbols_pack ();
93ede233 535
a70083a3
AD
536 /* Convert the grammar into the format described in gram.h. */
537 packgram ();
8419d367
AD
538
539 /* The grammar as a symbol_list is no longer needed. */
540 LIST_FREE (symbol_list, grammar);
a70083a3 541}