]> git.saurik.com Git - bison.git/blame - src/reader.c
Renamed file from bison.glr.
[bison.git] / src / reader.c
CommitLineData
1ff442ca 1/* Input parser for bison
76514394 2 Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000, 2001, 2002
a70083a3 3 Free Software Foundation, Inc.
1ff442ca 4
41aca2e0 5 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 6
41aca2e0
AD
7 Bison is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
1ff442ca 11
41aca2e0
AD
12 Bison is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
1ff442ca 16
41aca2e0
AD
17 You should have received a copy of the GNU General Public License
18 along with Bison; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
1ff442ca
NF
21
22
1ff442ca 23#include "system.h"
2a91a95e
AD
24#include "quotearg.h"
25#include "quote.h"
ceed8467 26#include "getargs.h"
1ff442ca 27#include "files.h"
1ff442ca 28#include "symtab.h"
56c47203 29#include "symlist.h"
1ff442ca 30#include "gram.h"
a0f6b076 31#include "complain.h"
6c89f1c1 32#include "output.h"
b2ca4022 33#include "reader.h"
340ef489 34#include "conflicts.h"
11d82f03 35#include "muscle_tab.h"
1ff442ca 36
1ff442ca 37int lineno;
56c47203 38static symbol_list_t *grammar = NULL;
280a38c3 39static int start_flag = 0;
1ff442ca 40
d7020c20 41/* Nonzero if %union has been seen. */
e9955c83 42int typed = 0;
0d533154 43\f
e9955c83
AD
44/*-----------------------.
45| Set the start symbol. |
46`-----------------------*/
1ff442ca 47
e9955c83 48void
8efe435c 49grammar_start_symbol_set (symbol_t *s, location_t l)
1ff442ca
NF
50{
51 if (start_flag)
e776192e 52 complain_at (l, _("multiple %s declarations"), "%start");
943819bf
RS
53 else
54 {
55 start_flag = 1;
e9955c83 56 startsymbol = s;
8efe435c 57 startsymbol_location = l;
943819bf 58 }
1ff442ca
NF
59}
60
1ff442ca 61
d7020c20 62/*----------------------------------------------------------------.
e9955c83
AD
63| There are two prologues: one before %union, one after. Augment |
64| the current one. |
d7020c20 65`----------------------------------------------------------------*/
1ff442ca 66
e9955c83 67void
0c15323d 68prologue_augment (const char *prologue, location_t location)
b6610515 69{
e9955c83
AD
70 struct obstack *oout =
71 !typed ? &pre_prologue_obstack : &post_prologue_obstack;
b6610515 72
e9955c83 73 if (!no_lines_flag)
b6610515 74 {
e9955c83 75 obstack_fgrow2 (oout, muscle_find ("linef"),
0c15323d
AD
76 location.first_line,
77 quotearg_style (c_quoting_style,
78 muscle_find ("filename")));
b6610515 79 }
e9955c83 80 obstack_sgrow (oout, prologue);
b6610515
RA
81}
82
2ba3b73c 83
426cf563 84
a870c567 85
e9955c83
AD
86/*----------------------.
87| Handle the epilogue. |
88`----------------------*/
426cf563 89
e9955c83 90void
0c15323d 91epilogue_set (const char *epilogue, location_t location)
2ba3b73c 92{
e9955c83 93 if (!no_lines_flag)
1ff442ca 94 {
592e8d4d 95 obstack_fgrow2 (&muscle_obstack, muscle_find ("linef"),
0c15323d
AD
96 location.first_line,
97 quotearg_style (c_quoting_style,
98 muscle_find ("filename")));
1ff442ca 99 }
592e8d4d
AD
100 obstack_sgrow (&muscle_obstack, epilogue);
101 obstack_1grow (&muscle_obstack, 0);
102 muscle_insert ("epilogue", obstack_finish (&muscle_obstack));
1ff442ca 103}
1ff442ca 104
a70083a3 105
a70083a3
AD
106\f
107
a70083a3
AD
108/*-------------------------------------------------------------------.
109| Generate a dummy symbol, a nonterminal, whose name cannot conflict |
110| with the user's names. |
111`-------------------------------------------------------------------*/
1ff442ca 112
db8837cb 113static symbol_t *
ee000ba4 114gensym (location_t location)
1ff442ca 115{
274d42ce
AD
116 /* Incremented for each generated symbol */
117 static int gensym_count = 0;
118 static char buf[256];
119
db8837cb 120 symbol_t *sym;
1ff442ca 121
274d42ce 122 sprintf (buf, "@%d", ++gensym_count);
ee000ba4 123 sym = getsym (buf, location);
d7020c20 124 sym->class = nterm_sym;
d9b739c3 125 sym->number = nvars++;
36281465 126 return sym;
1ff442ca 127}
a70083a3 128\f
107f7dfb 129/*-------------------------------------------------------------------.
56c47203 130| Parse the input grammar into a one symbol_list_t structure. Each |
107f7dfb
AD
131| rule is represented by a sequence of symbols: the left hand side |
132| followed by the contents of the right hand side, followed by a |
133| null pointer instead of a symbol to terminate the rule. The next |
134| symbol is the lhs of the following rule. |
135| |
fdbcd8e2
AD
136| All actions are copied out, labelled by the rule number they apply |
137| to. |
107f7dfb
AD
138| |
139| Bison used to allow some %directives in the rules sections, but |
140| this is no longer consider appropriate: (i) the documented grammar |
141| doesn't claim it, (ii), it would promote bad style, (iii), error |
142| recovery for %directives consists in skipping the junk until a `%' |
143| is seen and helrp synchronizing. This scheme is definitely wrong |
144| in the rules section. |
145`-------------------------------------------------------------------*/
1ff442ca 146
f6d0f937 147/* The (currently) last symbol of GRAMMAR. */
56c47203 148symbol_list_t *grammar_end = NULL;
f6d0f937
AD
149
150/* Append S to the GRAMMAR. */
e9955c83 151void
8efe435c 152grammar_symbol_append (symbol_t *symbol, location_t location)
f6d0f937 153{
56c47203 154 symbol_list_t *p = symbol_list_new (symbol, location);
f6d0f937
AD
155
156 if (grammar_end)
157 grammar_end->next = p;
158 else
159 grammar = p;
160
161 grammar_end = p;
162}
163
8efe435c
AD
164/* The rule currently being defined, and the previous rule.
165 CURRENT_RULE points to the first LHS of the current rule, while
166 PREVIOUS_RULE_END points to the *end* of the previous rule (NULL). */
56c47203
AD
167symbol_list_t *current_rule = NULL;
168symbol_list_t *previous_rule_end = NULL;
da4160c3
AD
169
170
8efe435c
AD
171/*----------------------------------------------.
172| Create a new rule for LHS in to the GRAMMAR. |
173`----------------------------------------------*/
da4160c3 174
e9955c83 175void
8efe435c 176grammar_rule_begin (symbol_t *lhs, location_t location)
da4160c3
AD
177{
178 if (!start_flag)
179 {
180 startsymbol = lhs;
8efe435c 181 startsymbol_location = location;
da4160c3
AD
182 start_flag = 1;
183 }
184
185 /* Start a new rule and record its lhs. */
186 ++nrules;
187 ++nritems;
188
8efe435c
AD
189 previous_rule_end = grammar_end;
190 grammar_symbol_append (lhs, location);
da4160c3
AD
191 current_rule = grammar_end;
192
193 /* Mark the rule's lhs as a nonterminal if not already so. */
194
195 if (lhs->class == unknown_sym)
196 {
197 lhs->class = nterm_sym;
198 lhs->number = nvars;
199 ++nvars;
200 }
201 else if (lhs->class == token_sym)
e776192e 202 complain_at (location, _("rule given for %s, which is a token"), lhs->tag);
da4160c3
AD
203}
204
e9955c83
AD
205/* Check that the last rule (CURRENT_RULE) is properly defined. For
206 instance, there should be no type clash on the default action. */
207
208static void
209grammar_current_rule_check (void)
210{
211 symbol_t *lhs = current_rule->sym;
212 symbol_t *first_rhs = current_rule->next->sym;
213
214 /* If there is an action, then there is nothing we can do: the user
215 is allowed to shoot in her foot. */
216 if (current_rule->action)
217 return;
218
219 /* If $$ is being set in default way, report if any type mismatch.
220 */
221 if (first_rhs)
222 {
223 const char *lhs_type = lhs->type_name ? lhs->type_name : "";
224 const char *rhs_type = first_rhs->type_name ? first_rhs->type_name : "";
225 if (strcmp (lhs_type, rhs_type))
e776192e
AD
226 complain_at (current_rule->location,
227 _("type clash (`%s' `%s') on default action"),
228 lhs_type, rhs_type);
e9955c83
AD
229 }
230 /* Warn if there is no default for $$ but we need one. */
231 else
232 {
233 if (lhs->type_name)
e776192e
AD
234 complain_at (current_rule->location,
235 _("empty rule for typed nonterminal, and no action"));
e9955c83
AD
236 }
237}
238
239
8efe435c
AD
240/*-------------------------------------.
241| End the currently being grown rule. |
242`-------------------------------------*/
e9955c83
AD
243
244void
8efe435c 245grammar_rule_end (location_t location)
e9955c83
AD
246{
247 /* Put an empty link in the list to mark the end of this rule */
8efe435c
AD
248 grammar_symbol_append (NULL, grammar_end->location);
249 current_rule->location = location;
e9955c83
AD
250 grammar_current_rule_check ();
251}
252
253
8efe435c
AD
254/*-------------------------------------------------------------------.
255| The previous action turns out the be a mid-rule action. Attach it |
256| to the current rule, i.e., create a dummy symbol, attach it this |
257| mid-rule action, and append this dummy nonterminal to the current |
258| rule. |
259`-------------------------------------------------------------------*/
1485e106 260
e9955c83 261void
1485e106
AD
262grammar_midrule_action (void)
263{
264 /* Since the action was written out with this rule's number, we must
265 give the new rule this number by inserting the new rule before
266 it. */
267
8efe435c
AD
268 /* Make a DUMMY nonterminal, whose location is that of the midrule
269 action. Create the MIDRULE. */
8efe435c 270 location_t dummy_location = current_rule->action_location;
ee000ba4 271 symbol_t *dummy = gensym (dummy_location);
56c47203 272 symbol_list_t *midrule = symbol_list_new (dummy, dummy_location);
1485e106
AD
273
274 /* Make a new rule, whose body is empty, before the current one, so
275 that the action just read can belong to it. */
276 ++nrules;
277 ++nritems;
8efe435c
AD
278 /* Attach its location and actions to that of the DUMMY. */
279 midrule->location = dummy_location;
280 midrule->action = current_rule->action;
281 midrule->action_location = dummy_location;
1485e106
AD
282 current_rule->action = NULL;
283
8efe435c
AD
284 if (previous_rule_end)
285 previous_rule_end->next = midrule;
1485e106 286 else
8efe435c 287 grammar = midrule;
1485e106 288
8efe435c
AD
289 /* End the dummy's rule. */
290 previous_rule_end = symbol_list_new (NULL, dummy_location);
291 previous_rule_end->next = current_rule;
1485e106 292
8efe435c 293 midrule->next = previous_rule_end;
1485e106 294
8efe435c
AD
295 /* Insert the dummy nonterminal replacing the midrule action into
296 the current rule. */
297 grammar_current_rule_symbol_append (dummy, dummy_location);
1485e106
AD
298}
299
9af3fbce
AD
300/* Set the precedence symbol of the current rule to PRECSYM. */
301
e9955c83 302void
e776192e 303grammar_current_rule_prec_set (symbol_t *precsym, location_t location)
9af3fbce
AD
304{
305 if (current_rule->ruleprec)
e776192e 306 complain_at (location, _("two @prec's in a row"));
9af3fbce
AD
307 current_rule->ruleprec = precsym;
308}
309
2e047461
AD
310/* Attach a SYMBOL to the current rule. If needed, move the previous
311 action as a mid-rule action. */
312
e9955c83 313void
8efe435c 314grammar_current_rule_symbol_append (symbol_t *symbol, location_t location)
2e047461
AD
315{
316 if (current_rule->action)
317 grammar_midrule_action ();
318 ++nritems;
8efe435c 319 grammar_symbol_append (symbol, location);
2e047461
AD
320}
321
322
323/* Attach an ACTION to the current rule. If needed, move the previous
324 action as a mid-rule action. */
325
e9955c83 326void
8efe435c 327grammar_current_rule_action_append (const char *action, location_t location)
2e047461
AD
328{
329 if (current_rule->action)
330 grammar_midrule_action ();
331 current_rule->action = action;
8efe435c 332 current_rule->action_location = location;
2e047461
AD
333}
334
a70083a3 335\f
a70083a3
AD
336/*---------------------------------------------------------------.
337| Convert the rules into the representation using RRHS, RLHS and |
d9b739c3 338| RITEM. |
a70083a3 339`---------------------------------------------------------------*/
1ff442ca 340
4a120d45 341static void
118fb205 342packgram (void)
1ff442ca 343{
0c2d3f4c 344 unsigned int itemno;
a70083a3 345 int ruleno;
56c47203 346 symbol_list_t *p;
1ff442ca 347
a900a624 348 ritem = XCALLOC (item_number_t, nritems);
1a2b5d37 349 rules = XCALLOC (rule_t, nrules) - 1;
1ff442ca
NF
350
351 itemno = 0;
352 ruleno = 1;
353
354 p = grammar;
355 while (p)
356 {
db8837cb 357 symbol_t *ruleprec = p->ruleprec;
d7e1f00c 358 rules[ruleno].user_number = ruleno;
c3b407f4 359 rules[ruleno].number = ruleno;
bba97eb2 360 rules[ruleno].lhs = p->sym;
99013900 361 rules[ruleno].rhs = ritem + itemno;
8efe435c 362 rules[ruleno].location = p->location;
1a2b5d37
AD
363 rules[ruleno].useful = TRUE;
364 rules[ruleno].action = p->action;
8efe435c 365 rules[ruleno].action_location = p->action_location;
1ff442ca
NF
366
367 p = p->next;
368 while (p && p->sym)
369 {
a49aecd5 370 /* item_number_t = symbol_number_t.
5fbb0954 371 But the former needs to contain more: negative rule numbers. */
a49aecd5 372 ritem[itemno++] = symbol_number_as_item_number (p->sym->number);
1ff442ca
NF
373 /* A rule gets by default the precedence and associativity
374 of the last token in it. */
d7020c20 375 if (p->sym->class == token_sym)
03b31c0c 376 rules[ruleno].prec = p->sym;
a70083a3
AD
377 if (p)
378 p = p->next;
1ff442ca
NF
379 }
380
381 /* If this rule has a %prec,
a70083a3 382 the specified symbol's precedence replaces the default. */
1ff442ca
NF
383 if (ruleprec)
384 {
03b31c0c
AD
385 rules[ruleno].precsym = ruleprec;
386 rules[ruleno].prec = ruleprec;
1ff442ca 387 }
1ff442ca 388 ritem[itemno++] = -ruleno;
f3849179 389 ++ruleno;
1ff442ca 390
a70083a3
AD
391 if (p)
392 p = p->next;
1ff442ca
NF
393 }
394
5123689b 395 assert (itemno == nritems);
3067fbef
AD
396
397 if (trace_flag)
398 ritem_print (stderr);
1ff442ca 399}
a70083a3 400\f
fdbcd8e2
AD
401/*------------------------------------------------------------------.
402| Read in the grammar specification and record it in the format |
403| described in gram.h. All actions are copied into ACTION_OBSTACK, |
404| in each case forming the body of a C function (YYACTION) which |
405| contains a switch statement to decide which action to execute. |
406`------------------------------------------------------------------*/
a70083a3
AD
407
408void
409reader (void)
410{
e9955c83 411 gram_control_t gram_control;
a70083a3
AD
412 lineno = 1;
413
414 /* Initialize the symbol table. */
db8837cb 415 symbols_new ();
b6610515 416
30171f79 417 /* Construct the axiom symbol. */
ee000ba4 418 axiom = getsym ("$axiom", empty_location);
30171f79 419 axiom->class = nterm_sym;
d9b739c3 420 axiom->number = nvars++;
30171f79 421
a70083a3 422 /* Construct the error token */
ee000ba4 423 errtoken = getsym ("error", empty_location);
d7020c20 424 errtoken->class = token_sym;
72a23c97 425 errtoken->number = ntokens++;
b6610515 426
a70083a3
AD
427 /* Construct a token that represents all undefined literal tokens.
428 It is always token number 2. */
ee000ba4 429 undeftoken = getsym ("$undefined.", empty_location);
d7020c20 430 undeftoken->class = token_sym;
72a23c97 431 undeftoken->number = ntokens++;
a70083a3 432
331dbc1b 433 /* Initialize the obstacks. */
0dd1580a
RA
434 obstack_init (&pre_prologue_obstack);
435 obstack_init (&post_prologue_obstack);
331dbc1b
AD
436
437 finput = xfopen (infile, "r");
e9955c83
AD
438 gram_in = finput;
439
440 gram_debug = !!getenv ("parse");
441 gram__flex_debug = !!getenv ("scan");
1d6412ad 442 scanner_initialize ();
e9955c83 443 gram_parse (&gram_control);
331dbc1b 444
e9955c83
AD
445 /* Grammar has been read. Do some checking */
446 if (nrules == 0)
447 fatal (_("no rules in the input grammar"));
448
449 /* Report any undefined symbols and consider them nonterminals. */
450 symbols_check_defined ();
b7c49edf
AD
451
452 /* If the user did not define her EOFTOKEN, do it now. */
453 if (!eoftoken)
454 {
ee000ba4 455 eoftoken = getsym ("$", empty_location);
b7c49edf 456 eoftoken->class = token_sym;
72a23c97 457 eoftoken->number = 0;
b7c49edf
AD
458 /* Value specified by POSIX. */
459 eoftoken->user_token_number = 0;
460 }
461
e9955c83
AD
462 /* Insert the initial rule, which line is that of the first rule
463 (not that of the start symbol):
464
465 axiom: %start EOF. */
466 {
56c47203 467 symbol_list_t *p = symbol_list_new (axiom, empty_location);
8efe435c
AD
468 p->location = grammar->location;
469 p->next = symbol_list_new (startsymbol, empty_location);
470 p->next->next = symbol_list_new (eoftoken, empty_location);
471 p->next->next->next = symbol_list_new (NULL, empty_location);
e9955c83
AD
472 p->next->next->next->next = grammar;
473 nrules += 1;
474 nritems += 3;
475 grammar = p;
476 }
477
478 if (nsyms > SHRT_MAX)
479 fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
480 SHRT_MAX);
481
482 assert (nsyms == ntokens + nvars);
b0c4483e 483
331dbc1b
AD
484 xfclose (finput);
485
a70083a3
AD
486 /* Assign the symbols their symbol numbers. Write #defines for the
487 token symbols into FDEFINES if requested. */
2f1afb73 488 symbols_pack ();
93ede233 489
a70083a3
AD
490 /* Convert the grammar into the format described in gram.h. */
491 packgram ();
8419d367 492
56c47203
AD
493 /* The grammar as a symbol_list_t is no longer needed. */
494 LIST_FREE (symbol_list_t, grammar);
a70083a3 495}