]> git.saurik.com Git - bison.git/blame - src/reader.c
* src/location.h: New, extracted from...
[bison.git] / src / reader.c
CommitLineData
1ff442ca 1/* Input parser for bison
76514394 2 Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000, 2001, 2002
a70083a3 3 Free Software Foundation, Inc.
1ff442ca 4
41aca2e0 5 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 6
41aca2e0
AD
7 Bison is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
1ff442ca 11
41aca2e0
AD
12 Bison is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
1ff442ca 16
41aca2e0
AD
17 You should have received a copy of the GNU General Public License
18 along with Bison; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
1ff442ca
NF
21
22
1ff442ca 23#include "system.h"
2a91a95e
AD
24#include "quotearg.h"
25#include "quote.h"
ceed8467 26#include "getargs.h"
1ff442ca 27#include "files.h"
1ff442ca 28#include "symtab.h"
82b6d266 29#include "options.h"
1ff442ca 30#include "gram.h"
a0f6b076 31#include "complain.h"
6c89f1c1 32#include "output.h"
b2ca4022 33#include "reader.h"
340ef489 34#include "conflicts.h"
11d82f03 35#include "muscle_tab.h"
1ff442ca 36
1ff442ca 37int lineno;
280a38c3
AD
38static symbol_list *grammar = NULL;
39static int start_flag = 0;
1ff442ca 40
d7020c20 41/* Nonzero if %union has been seen. */
e9955c83 42int typed = 0;
1ff442ca 43
6255b435 44static symbol_list *
db8837cb 45symbol_list_new (symbol_t *sym)
b29b2ed5
AD
46{
47 symbol_list *res = XMALLOC (symbol_list, 1);
48 res->next = NULL;
49 res->sym = sym;
50 res->line = lineno;
d945f5cd
AD
51 res->action = NULL;
52 res->action_line = 0;
b29b2ed5
AD
53 res->ruleprec = NULL;
54 return res;
55}
56
0d533154 57\f
79282c5a
AD
58/*--------------------------------------------------------------.
59| Get the data type (alternative in the union) of the value for |
60| symbol N in rule RULE. |
61`--------------------------------------------------------------*/
62
e9955c83 63char *
b29b2ed5 64get_type_name (int n, symbol_list *rule)
79282c5a
AD
65{
66 int i;
67 symbol_list *rp;
68
69 if (n < 0)
70 {
71 complain (_("invalid $ value"));
72 return NULL;
73 }
74
75 rp = rule;
76 i = 0;
77
78 while (i < n)
79 {
80 rp = rp->next;
81 if (rp == NULL || rp->sym == NULL)
82 {
83 complain (_("invalid $ value"));
84 return NULL;
85 }
f3849179 86 ++i;
79282c5a
AD
87 }
88
89 return rp->sym->type_name;
90}
a083fbbf 91
1ff442ca 92
e9955c83
AD
93/*-----------------------.
94| Set the start symbol. |
95`-----------------------*/
1ff442ca 96
e9955c83
AD
97void
98grammar_start_symbol_set (symbol_t *s)
1ff442ca
NF
99{
100 if (start_flag)
27821bff 101 complain (_("multiple %s declarations"), "%start");
943819bf
RS
102 else
103 {
104 start_flag = 1;
e9955c83 105 startsymbol = s;
943819bf 106 }
1ff442ca
NF
107}
108
1ff442ca 109
d7020c20 110/*----------------------------------------------------------------.
e9955c83
AD
111| There are two prologues: one before %union, one after. Augment |
112| the current one. |
d7020c20 113`----------------------------------------------------------------*/
1ff442ca 114
e9955c83 115void
0c15323d 116prologue_augment (const char *prologue, location_t location)
b6610515 117{
e9955c83
AD
118 struct obstack *oout =
119 !typed ? &pre_prologue_obstack : &post_prologue_obstack;
b6610515 120
e9955c83 121 if (!no_lines_flag)
b6610515 122 {
e9955c83 123 obstack_fgrow2 (oout, muscle_find ("linef"),
0c15323d
AD
124 location.first_line,
125 quotearg_style (c_quoting_style,
126 muscle_find ("filename")));
b6610515 127 }
e9955c83 128 obstack_sgrow (oout, prologue);
b6610515
RA
129}
130
2ba3b73c 131
426cf563 132
a870c567 133
e9955c83
AD
134/*----------------------.
135| Handle the epilogue. |
136`----------------------*/
426cf563 137
e9955c83 138void
0c15323d 139epilogue_set (const char *epilogue, location_t location)
2ba3b73c 140{
e9955c83
AD
141 struct obstack el_obstack;
142 obstack_init (&el_obstack);
1ff442ca 143
e9955c83 144 if (!no_lines_flag)
1ff442ca 145 {
e9955c83 146 obstack_fgrow2 (&el_obstack, muscle_find ("linef"),
0c15323d
AD
147 location.first_line,
148 quotearg_style (c_quoting_style,
149 muscle_find ("filename")));
1ff442ca 150 }
e9955c83
AD
151 obstack_sgrow (&el_obstack, epilogue);
152 obstack_1grow (&el_obstack, 0);
153 muscle_insert ("epilogue", obstack_finish (&el_obstack));
1ff442ca 154}
1ff442ca 155
a70083a3 156
a70083a3
AD
157\f
158
a70083a3
AD
159/*-------------------------------------------------------------------.
160| Generate a dummy symbol, a nonterminal, whose name cannot conflict |
161| with the user's names. |
162`-------------------------------------------------------------------*/
1ff442ca 163
db8837cb 164static symbol_t *
118fb205 165gensym (void)
1ff442ca 166{
274d42ce
AD
167 /* Incremented for each generated symbol */
168 static int gensym_count = 0;
169 static char buf[256];
170
db8837cb 171 symbol_t *sym;
1ff442ca 172
274d42ce 173 sprintf (buf, "@%d", ++gensym_count);
e9955c83 174 sym = getsym (buf);
d7020c20 175 sym->class = nterm_sym;
d9b739c3 176 sym->number = nvars++;
36281465 177 return sym;
1ff442ca 178}
a70083a3 179\f
107f7dfb
AD
180/*-------------------------------------------------------------------.
181| Parse the input grammar into a one symbol_list structure. Each |
182| rule is represented by a sequence of symbols: the left hand side |
183| followed by the contents of the right hand side, followed by a |
184| null pointer instead of a symbol to terminate the rule. The next |
185| symbol is the lhs of the following rule. |
186| |
fdbcd8e2
AD
187| All actions are copied out, labelled by the rule number they apply |
188| to. |
107f7dfb
AD
189| |
190| Bison used to allow some %directives in the rules sections, but |
191| this is no longer consider appropriate: (i) the documented grammar |
192| doesn't claim it, (ii), it would promote bad style, (iii), error |
193| recovery for %directives consists in skipping the junk until a `%' |
194| is seen and helrp synchronizing. This scheme is definitely wrong |
195| in the rules section. |
196`-------------------------------------------------------------------*/
1ff442ca 197
f6d0f937
AD
198/* The (currently) last symbol of GRAMMAR. */
199symbol_list *grammar_end = NULL;
200
201/* Append S to the GRAMMAR. */
e9955c83 202void
f6d0f937
AD
203grammar_symbol_append (symbol_t *s)
204{
205 symbol_list *p = symbol_list_new (s);
206
207 if (grammar_end)
208 grammar_end->next = p;
209 else
210 grammar = p;
211
212 grammar_end = p;
213}
214
da4160c3
AD
215/* The rule currently being defined, and the previous rule. Point to
216 the first symbol of each list: their lhs. */
217symbol_list *current_rule = NULL;
218symbol_list *previous_rule = NULL;
219
220
221/* Create a new rule for LHS in to the GRAMMAR. */
222
e9955c83 223void
da4160c3
AD
224grammar_rule_begin (symbol_t *lhs)
225{
226 if (!start_flag)
227 {
228 startsymbol = lhs;
229 start_flag = 1;
230 }
231
232 /* Start a new rule and record its lhs. */
233 ++nrules;
234 ++nritems;
235
236 previous_rule = grammar_end;
237 grammar_symbol_append (lhs);
238 current_rule = grammar_end;
239
240 /* Mark the rule's lhs as a nonterminal if not already so. */
241
242 if (lhs->class == unknown_sym)
243 {
244 lhs->class = nterm_sym;
245 lhs->number = nvars;
246 ++nvars;
247 }
248 else if (lhs->class == token_sym)
249 complain (_("rule given for %s, which is a token"), lhs->tag);
250}
251
e9955c83
AD
252/* Check that the last rule (CURRENT_RULE) is properly defined. For
253 instance, there should be no type clash on the default action. */
254
255static void
256grammar_current_rule_check (void)
257{
258 symbol_t *lhs = current_rule->sym;
259 symbol_t *first_rhs = current_rule->next->sym;
260
261 /* If there is an action, then there is nothing we can do: the user
262 is allowed to shoot in her foot. */
263 if (current_rule->action)
264 return;
265
266 /* If $$ is being set in default way, report if any type mismatch.
267 */
268 if (first_rhs)
269 {
270 const char *lhs_type = lhs->type_name ? lhs->type_name : "";
271 const char *rhs_type = first_rhs->type_name ? first_rhs->type_name : "";
272 if (strcmp (lhs_type, rhs_type))
273 complain (_("type clash (`%s' `%s') on default action"),
274 lhs_type, rhs_type);
275 }
276 /* Warn if there is no default for $$ but we need one. */
277 else
278 {
279 if (lhs->type_name)
280 complain (_("empty rule for typed nonterminal, and no action"));
281 }
282}
283
284
285/* End the currently being grown rule. */
286
287void
288grammar_rule_end (void)
289{
290 /* Put an empty link in the list to mark the end of this rule */
291 grammar_symbol_append (NULL);
292 grammar_current_rule_check ();
293}
294
295
1485e106
AD
296/* The previous action turns out the be a mid-rule action. Attach it
297 to the current rule, i.e., create a dummy symbol, attach it this
298 mid-rule action, and append this dummy nonterminal to the current
299 rule. */
300
e9955c83 301void
1485e106
AD
302grammar_midrule_action (void)
303{
304 /* Since the action was written out with this rule's number, we must
305 give the new rule this number by inserting the new rule before
306 it. */
307
308 /* Make a dummy nonterminal, a gensym. */
309 symbol_t *sdummy = gensym ();
310 symbol_list *midrule_action = symbol_list_new (sdummy);
311
312 /* Make a new rule, whose body is empty, before the current one, so
313 that the action just read can belong to it. */
314 ++nrules;
315 ++nritems;
316 /* Attach its lineno to that of the host rule. */
317 midrule_action->line = current_rule->line;
318 /* Move the action from the host rule to this one. */
319 midrule_action->action = current_rule->action;
320 midrule_action->action_line = current_rule->action_line;
321 current_rule->action = NULL;
322
323 if (previous_rule)
324 previous_rule->next = midrule_action;
325 else
326 grammar = midrule_action;
327
328 /* End of the rule. */
329 previous_rule = symbol_list_new (NULL);
330 previous_rule->next = current_rule;
331
332 midrule_action->next = previous_rule;
333
334 /* Insert the dummy generated by that rule into this rule. */
335 ++nritems;
336 grammar_symbol_append (sdummy);
337}
338
9af3fbce
AD
339/* Set the precedence symbol of the current rule to PRECSYM. */
340
e9955c83 341void
9af3fbce
AD
342grammar_current_rule_prec_set (symbol_t *precsym)
343{
344 if (current_rule->ruleprec)
345 complain (_("two @prec's in a row"));
346 current_rule->ruleprec = precsym;
347}
348
2e047461
AD
349/* Attach a SYMBOL to the current rule. If needed, move the previous
350 action as a mid-rule action. */
351
e9955c83 352void
2e047461
AD
353grammar_current_rule_symbol_append (symbol_t *symbol)
354{
355 if (current_rule->action)
356 grammar_midrule_action ();
357 ++nritems;
358 grammar_symbol_append (symbol);
359}
360
361
362/* Attach an ACTION to the current rule. If needed, move the previous
363 action as a mid-rule action. */
364
e9955c83
AD
365void
366grammar_current_rule_action_append (const char *action, int action_line)
2e047461
AD
367{
368 if (current_rule->action)
369 grammar_midrule_action ();
370 current_rule->action = action;
371 current_rule->action_line = action_line;
372}
373
a70083a3 374\f
a70083a3
AD
375/*---------------------------------------------------------------.
376| Convert the rules into the representation using RRHS, RLHS and |
d9b739c3 377| RITEM. |
a70083a3 378`---------------------------------------------------------------*/
1ff442ca 379
4a120d45 380static void
118fb205 381packgram (void)
1ff442ca 382{
0c2d3f4c 383 unsigned int itemno;
a70083a3
AD
384 int ruleno;
385 symbol_list *p;
1ff442ca 386
a900a624 387 ritem = XCALLOC (item_number_t, nritems);
1a2b5d37 388 rules = XCALLOC (rule_t, nrules) - 1;
1ff442ca
NF
389
390 itemno = 0;
391 ruleno = 1;
392
393 p = grammar;
394 while (p)
395 {
db8837cb 396 symbol_t *ruleprec = p->ruleprec;
d7e1f00c 397 rules[ruleno].user_number = ruleno;
c3b407f4 398 rules[ruleno].number = ruleno;
bba97eb2 399 rules[ruleno].lhs = p->sym;
99013900 400 rules[ruleno].rhs = ritem + itemno;
1a2b5d37
AD
401 rules[ruleno].line = p->line;
402 rules[ruleno].useful = TRUE;
403 rules[ruleno].action = p->action;
404 rules[ruleno].action_line = p->action_line;
1ff442ca
NF
405
406 p = p->next;
407 while (p && p->sym)
408 {
a49aecd5 409 /* item_number_t = symbol_number_t.
5fbb0954 410 But the former needs to contain more: negative rule numbers. */
a49aecd5 411 ritem[itemno++] = symbol_number_as_item_number (p->sym->number);
1ff442ca
NF
412 /* A rule gets by default the precedence and associativity
413 of the last token in it. */
d7020c20 414 if (p->sym->class == token_sym)
03b31c0c 415 rules[ruleno].prec = p->sym;
a70083a3
AD
416 if (p)
417 p = p->next;
1ff442ca
NF
418 }
419
420 /* If this rule has a %prec,
a70083a3 421 the specified symbol's precedence replaces the default. */
1ff442ca
NF
422 if (ruleprec)
423 {
03b31c0c
AD
424 rules[ruleno].precsym = ruleprec;
425 rules[ruleno].prec = ruleprec;
1ff442ca 426 }
1ff442ca 427 ritem[itemno++] = -ruleno;
f3849179 428 ++ruleno;
1ff442ca 429
a70083a3
AD
430 if (p)
431 p = p->next;
1ff442ca
NF
432 }
433
5123689b 434 assert (itemno == nritems);
3067fbef
AD
435
436 if (trace_flag)
437 ritem_print (stderr);
1ff442ca 438}
a70083a3 439\f
fdbcd8e2
AD
440/*------------------------------------------------------------------.
441| Read in the grammar specification and record it in the format |
442| described in gram.h. All actions are copied into ACTION_OBSTACK, |
443| in each case forming the body of a C function (YYACTION) which |
444| contains a switch statement to decide which action to execute. |
445`------------------------------------------------------------------*/
a70083a3
AD
446
447void
448reader (void)
449{
e9955c83 450 gram_control_t gram_control;
a70083a3
AD
451 lineno = 1;
452
11d82f03
MA
453 /* Initialize the muscle obstack. */
454 obstack_init (&muscle_obstack);
82e236e2 455
a70083a3 456 /* Initialize the symbol table. */
db8837cb 457 symbols_new ();
b6610515 458
30171f79
AD
459 /* Construct the axiom symbol. */
460 axiom = getsym ("$axiom");
461 axiom->class = nterm_sym;
d9b739c3 462 axiom->number = nvars++;
30171f79 463
a70083a3
AD
464 /* Construct the error token */
465 errtoken = getsym ("error");
d7020c20 466 errtoken->class = token_sym;
72a23c97 467 errtoken->number = ntokens++;
b6610515 468
a70083a3
AD
469 /* Construct a token that represents all undefined literal tokens.
470 It is always token number 2. */
471 undeftoken = getsym ("$undefined.");
d7020c20 472 undeftoken->class = token_sym;
72a23c97 473 undeftoken->number = ntokens++;
a70083a3 474
331dbc1b
AD
475 /* Initialize the obstacks. */
476 obstack_init (&action_obstack);
331dbc1b 477 obstack_init (&output_obstack);
0dd1580a
RA
478 obstack_init (&pre_prologue_obstack);
479 obstack_init (&post_prologue_obstack);
331dbc1b
AD
480
481 finput = xfopen (infile, "r");
e9955c83
AD
482 gram_in = finput;
483
484 gram_debug = !!getenv ("parse");
485 gram__flex_debug = !!getenv ("scan");
1d6412ad 486 scanner_initialize ();
e9955c83 487 gram_parse (&gram_control);
331dbc1b 488
e9955c83
AD
489 /* Grammar has been read. Do some checking */
490 if (nrules == 0)
491 fatal (_("no rules in the input grammar"));
492
493 /* Report any undefined symbols and consider them nonterminals. */
494 symbols_check_defined ();
b7c49edf
AD
495
496 /* If the user did not define her EOFTOKEN, do it now. */
497 if (!eoftoken)
498 {
499 eoftoken = getsym ("$");
500 eoftoken->class = token_sym;
72a23c97 501 eoftoken->number = 0;
b7c49edf
AD
502 /* Value specified by POSIX. */
503 eoftoken->user_token_number = 0;
504 }
505
e9955c83
AD
506 /* Insert the initial rule, which line is that of the first rule
507 (not that of the start symbol):
508
509 axiom: %start EOF. */
510 {
511 symbol_list *p = symbol_list_new (axiom);
512 p->line = grammar->line;
513 p->next = symbol_list_new (startsymbol);
514 p->next->next = symbol_list_new (eoftoken);
515 p->next->next->next = symbol_list_new (NULL);
516 p->next->next->next->next = grammar;
517 nrules += 1;
518 nritems += 3;
519 grammar = p;
520 }
521
522 if (nsyms > SHRT_MAX)
523 fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
524 SHRT_MAX);
525
526 assert (nsyms == ntokens + nvars);
b0c4483e 527
331dbc1b
AD
528 xfclose (finput);
529
a70083a3
AD
530 /* Assign the symbols their symbol numbers. Write #defines for the
531 token symbols into FDEFINES if requested. */
2f1afb73 532 symbols_pack ();
93ede233 533
a70083a3
AD
534 /* Convert the grammar into the format described in gram.h. */
535 packgram ();
8419d367
AD
536
537 /* The grammar as a symbol_list is no longer needed. */
538 LIST_FREE (symbol_list, grammar);
a70083a3 539}