]> git.saurik.com Git - bison.git/blame - src/reader.c
* src/state.h, src/state.c (state_new): New, extracted from...
[bison.git] / src / reader.c
CommitLineData
1ff442ca 1/* Input parser for bison
76514394 2 Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000, 2001, 2002
a70083a3 3 Free Software Foundation, Inc.
1ff442ca 4
41aca2e0 5 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 6
41aca2e0
AD
7 Bison is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
1ff442ca 11
41aca2e0
AD
12 Bison is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
1ff442ca 16
41aca2e0
AD
17 You should have received a copy of the GNU General Public License
18 along with Bison; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
1ff442ca
NF
21
22
1ff442ca 23#include "system.h"
2a91a95e
AD
24#include "quotearg.h"
25#include "quote.h"
ceed8467 26#include "getargs.h"
1ff442ca 27#include "files.h"
1ff442ca 28#include "symtab.h"
56c47203 29#include "symlist.h"
1ff442ca 30#include "gram.h"
a0f6b076 31#include "complain.h"
6c89f1c1 32#include "output.h"
b2ca4022 33#include "reader.h"
340ef489 34#include "conflicts.h"
11d82f03 35#include "muscle_tab.h"
1ff442ca 36
1ff442ca 37int lineno;
56c47203 38static symbol_list_t *grammar = NULL;
280a38c3 39static int start_flag = 0;
676385e2 40merger_list *merge_functions;
1ff442ca 41
d7020c20 42/* Nonzero if %union has been seen. */
e9955c83 43int typed = 0;
0d533154 44\f
e9955c83
AD
45/*-----------------------.
46| Set the start symbol. |
47`-----------------------*/
1ff442ca 48
e9955c83 49void
8efe435c 50grammar_start_symbol_set (symbol_t *s, location_t l)
1ff442ca
NF
51{
52 if (start_flag)
e776192e 53 complain_at (l, _("multiple %s declarations"), "%start");
943819bf
RS
54 else
55 {
56 start_flag = 1;
e9955c83 57 startsymbol = s;
8efe435c 58 startsymbol_location = l;
943819bf 59 }
1ff442ca
NF
60}
61
1ff442ca 62
d7020c20 63/*----------------------------------------------------------------.
e9955c83
AD
64| There are two prologues: one before %union, one after. Augment |
65| the current one. |
d7020c20 66`----------------------------------------------------------------*/
1ff442ca 67
e9955c83 68void
0c15323d 69prologue_augment (const char *prologue, location_t location)
b6610515 70{
e9955c83
AD
71 struct obstack *oout =
72 !typed ? &pre_prologue_obstack : &post_prologue_obstack;
b6610515 73
e9955c83 74 if (!no_lines_flag)
b6610515 75 {
e9955c83 76 obstack_fgrow2 (oout, muscle_find ("linef"),
0c15323d
AD
77 location.first_line,
78 quotearg_style (c_quoting_style,
79 muscle_find ("filename")));
b6610515 80 }
e9955c83 81 obstack_sgrow (oout, prologue);
b6610515
RA
82}
83
2ba3b73c 84
426cf563 85
a870c567 86
e9955c83
AD
87/*----------------------.
88| Handle the epilogue. |
89`----------------------*/
426cf563 90
e9955c83 91void
0c15323d 92epilogue_set (const char *epilogue, location_t location)
2ba3b73c 93{
e9955c83 94 if (!no_lines_flag)
1ff442ca 95 {
592e8d4d 96 obstack_fgrow2 (&muscle_obstack, muscle_find ("linef"),
0c15323d
AD
97 location.first_line,
98 quotearg_style (c_quoting_style,
99 muscle_find ("filename")));
1ff442ca 100 }
592e8d4d
AD
101 obstack_sgrow (&muscle_obstack, epilogue);
102 obstack_1grow (&muscle_obstack, 0);
103 muscle_insert ("epilogue", obstack_finish (&muscle_obstack));
1ff442ca 104}
1ff442ca 105
a70083a3 106
a70083a3
AD
107\f
108
676385e2
PH
109 /*-------------------------------------------------------------------.
110| Return the merger index for a merging function named NAME, whose |
111| arguments have type TYPE. Records the function, if new, in |
112| merger_list. |
113`-------------------------------------------------------------------*/
114
115static int
116get_merge_function (const char* name, const char* type)
117{
118 merger_list *syms;
119 merger_list head;
120 int n;
121
122 if (! glr_parser)
123 return 0;
124
125 if (type == NULL)
126 type = "";
127
128 head.next = merge_functions;
39f41916 129 for (syms = &head, n = 1; syms->next != NULL; syms = syms->next, n += 1)
676385e2
PH
130 if (strcmp (name, syms->next->name) == 0)
131 break;
132 if (syms->next == NULL) {
133 syms->next = XMALLOC (merger_list, 1);
134 syms->next->name = strdup (name);
135 syms->next->type = strdup (type);
136 syms->next->next = NULL;
137 merge_functions = head.next;
138 } else if (strcmp (type, syms->next->type) != 0)
39f41916 139 warn (_("result type clash on merge function %s: `%s' vs. `%s'"),
676385e2
PH
140 name, type, syms->next->type);
141 return n;
142}
143
144/*--------------------------------------.
145| Free all merge-function definitions. |
146`--------------------------------------*/
147
148void
149free_merger_functions (void)
150{
151 merger_list *L0;
152 if (! glr_parser)
153 return;
154 L0 = merge_functions;
155 while (L0 != NULL)
156 {
157 merger_list *L1 = L0->next;
158 free (L0);
159 L0 = L1;
160 }
161}
162
a70083a3 163\f
107f7dfb 164/*-------------------------------------------------------------------.
56c47203 165| Parse the input grammar into a one symbol_list_t structure. Each |
107f7dfb
AD
166| rule is represented by a sequence of symbols: the left hand side |
167| followed by the contents of the right hand side, followed by a |
168| null pointer instead of a symbol to terminate the rule. The next |
169| symbol is the lhs of the following rule. |
170| |
fdbcd8e2
AD
171| All actions are copied out, labelled by the rule number they apply |
172| to. |
107f7dfb
AD
173| |
174| Bison used to allow some %directives in the rules sections, but |
175| this is no longer consider appropriate: (i) the documented grammar |
176| doesn't claim it, (ii), it would promote bad style, (iii), error |
177| recovery for %directives consists in skipping the junk until a `%' |
178| is seen and helrp synchronizing. This scheme is definitely wrong |
179| in the rules section. |
180`-------------------------------------------------------------------*/
1ff442ca 181
f6d0f937 182/* The (currently) last symbol of GRAMMAR. */
56c47203 183symbol_list_t *grammar_end = NULL;
f6d0f937
AD
184
185/* Append S to the GRAMMAR. */
e9955c83 186void
8efe435c 187grammar_symbol_append (symbol_t *symbol, location_t location)
f6d0f937 188{
56c47203 189 symbol_list_t *p = symbol_list_new (symbol, location);
f6d0f937
AD
190
191 if (grammar_end)
192 grammar_end->next = p;
193 else
194 grammar = p;
195
196 grammar_end = p;
197}
198
8efe435c
AD
199/* The rule currently being defined, and the previous rule.
200 CURRENT_RULE points to the first LHS of the current rule, while
201 PREVIOUS_RULE_END points to the *end* of the previous rule (NULL). */
56c47203
AD
202symbol_list_t *current_rule = NULL;
203symbol_list_t *previous_rule_end = NULL;
da4160c3
AD
204
205
8efe435c
AD
206/*----------------------------------------------.
207| Create a new rule for LHS in to the GRAMMAR. |
208`----------------------------------------------*/
da4160c3 209
e9955c83 210void
8efe435c 211grammar_rule_begin (symbol_t *lhs, location_t location)
da4160c3
AD
212{
213 if (!start_flag)
214 {
215 startsymbol = lhs;
8efe435c 216 startsymbol_location = location;
da4160c3
AD
217 start_flag = 1;
218 }
219
220 /* Start a new rule and record its lhs. */
221 ++nrules;
222 ++nritems;
223
8efe435c
AD
224 previous_rule_end = grammar_end;
225 grammar_symbol_append (lhs, location);
da4160c3
AD
226 current_rule = grammar_end;
227
228 /* Mark the rule's lhs as a nonterminal if not already so. */
229
230 if (lhs->class == unknown_sym)
231 {
232 lhs->class = nterm_sym;
233 lhs->number = nvars;
234 ++nvars;
235 }
236 else if (lhs->class == token_sym)
e776192e 237 complain_at (location, _("rule given for %s, which is a token"), lhs->tag);
da4160c3
AD
238}
239
e9955c83
AD
240/* Check that the last rule (CURRENT_RULE) is properly defined. For
241 instance, there should be no type clash on the default action. */
242
243static void
244grammar_current_rule_check (void)
245{
246 symbol_t *lhs = current_rule->sym;
247 symbol_t *first_rhs = current_rule->next->sym;
248
249 /* If there is an action, then there is nothing we can do: the user
250 is allowed to shoot in her foot. */
251 if (current_rule->action)
252 return;
253
254 /* If $$ is being set in default way, report if any type mismatch.
255 */
256 if (first_rhs)
257 {
258 const char *lhs_type = lhs->type_name ? lhs->type_name : "";
259 const char *rhs_type = first_rhs->type_name ? first_rhs->type_name : "";
260 if (strcmp (lhs_type, rhs_type))
e776192e
AD
261 complain_at (current_rule->location,
262 _("type clash (`%s' `%s') on default action"),
263 lhs_type, rhs_type);
e9955c83
AD
264 }
265 /* Warn if there is no default for $$ but we need one. */
266 else
267 {
268 if (lhs->type_name)
e776192e
AD
269 complain_at (current_rule->location,
270 _("empty rule for typed nonterminal, and no action"));
e9955c83
AD
271 }
272}
273
274
8efe435c
AD
275/*-------------------------------------.
276| End the currently being grown rule. |
277`-------------------------------------*/
e9955c83
AD
278
279void
8efe435c 280grammar_rule_end (location_t location)
e9955c83
AD
281{
282 /* Put an empty link in the list to mark the end of this rule */
8efe435c
AD
283 grammar_symbol_append (NULL, grammar_end->location);
284 current_rule->location = location;
e9955c83
AD
285 grammar_current_rule_check ();
286}
287
288
8efe435c
AD
289/*-------------------------------------------------------------------.
290| The previous action turns out the be a mid-rule action. Attach it |
291| to the current rule, i.e., create a dummy symbol, attach it this |
292| mid-rule action, and append this dummy nonterminal to the current |
293| rule. |
294`-------------------------------------------------------------------*/
1485e106 295
e9955c83 296void
1485e106
AD
297grammar_midrule_action (void)
298{
299 /* Since the action was written out with this rule's number, we must
300 give the new rule this number by inserting the new rule before
301 it. */
302
8efe435c
AD
303 /* Make a DUMMY nonterminal, whose location is that of the midrule
304 action. Create the MIDRULE. */
8efe435c 305 location_t dummy_location = current_rule->action_location;
39f41916 306 symbol_t *dummy = dummy_symbol_get (dummy_location);
56c47203 307 symbol_list_t *midrule = symbol_list_new (dummy, dummy_location);
1485e106
AD
308
309 /* Make a new rule, whose body is empty, before the current one, so
310 that the action just read can belong to it. */
311 ++nrules;
312 ++nritems;
8efe435c
AD
313 /* Attach its location and actions to that of the DUMMY. */
314 midrule->location = dummy_location;
315 midrule->action = current_rule->action;
316 midrule->action_location = dummy_location;
1485e106
AD
317 current_rule->action = NULL;
318
8efe435c
AD
319 if (previous_rule_end)
320 previous_rule_end->next = midrule;
1485e106 321 else
8efe435c 322 grammar = midrule;
1485e106 323
8efe435c
AD
324 /* End the dummy's rule. */
325 previous_rule_end = symbol_list_new (NULL, dummy_location);
326 previous_rule_end->next = current_rule;
1485e106 327
8efe435c 328 midrule->next = previous_rule_end;
1485e106 329
8efe435c
AD
330 /* Insert the dummy nonterminal replacing the midrule action into
331 the current rule. */
332 grammar_current_rule_symbol_append (dummy, dummy_location);
1485e106
AD
333}
334
9af3fbce
AD
335/* Set the precedence symbol of the current rule to PRECSYM. */
336
e9955c83 337void
e776192e 338grammar_current_rule_prec_set (symbol_t *precsym, location_t location)
9af3fbce
AD
339{
340 if (current_rule->ruleprec)
e776192e 341 complain_at (location, _("two @prec's in a row"));
9af3fbce
AD
342 current_rule->ruleprec = precsym;
343}
344
676385e2
PH
345/* Attach dynamic precedence DPREC to the current rule. */
346
347void
348grammar_current_rule_dprec_set (int dprec, location_t location)
349{
350 if (! glr_parser)
351 warn_at (location, _("%%dprec affects only GLR parsers"));
352 if (dprec <= 0)
353 complain_at (location, _("%%dprec must be followed by positive number"));
39f41916 354 else if (current_rule->dprec != 0)
676385e2
PH
355 complain_at (location, _("only one %%dprec allowed per rule"));
356 current_rule->dprec = dprec;
357}
358
359/* Attach a merge function NAME with argument type TYPE to current
360 rule. */
361
362void
363grammar_current_rule_merge_set (const char* name, location_t location)
364{
365 if (! glr_parser)
366 warn_at (location, _("%%merge affects only GLR parsers"));
39f41916 367 if (current_rule->merger != 0)
676385e2 368 complain_at (location, _("only one %%merge allowed per rule"));
39f41916 369 current_rule->merger =
676385e2
PH
370 get_merge_function (name, current_rule->sym->type_name);
371}
372
2e047461
AD
373/* Attach a SYMBOL to the current rule. If needed, move the previous
374 action as a mid-rule action. */
375
e9955c83 376void
8efe435c 377grammar_current_rule_symbol_append (symbol_t *symbol, location_t location)
2e047461
AD
378{
379 if (current_rule->action)
380 grammar_midrule_action ();
381 ++nritems;
8efe435c 382 grammar_symbol_append (symbol, location);
2e047461
AD
383}
384
2e047461
AD
385/* Attach an ACTION to the current rule. If needed, move the previous
386 action as a mid-rule action. */
387
e9955c83 388void
8efe435c 389grammar_current_rule_action_append (const char *action, location_t location)
2e047461
AD
390{
391 if (current_rule->action)
392 grammar_midrule_action ();
393 current_rule->action = action;
8efe435c 394 current_rule->action_location = location;
2e047461
AD
395}
396
a70083a3 397\f
a70083a3
AD
398/*---------------------------------------------------------------.
399| Convert the rules into the representation using RRHS, RLHS and |
d9b739c3 400| RITEM. |
a70083a3 401`---------------------------------------------------------------*/
1ff442ca 402
4a120d45 403static void
118fb205 404packgram (void)
1ff442ca 405{
0c2d3f4c 406 unsigned int itemno;
a70083a3 407 int ruleno;
56c47203 408 symbol_list_t *p;
1ff442ca 409
a900a624 410 ritem = XCALLOC (item_number_t, nritems);
1a2b5d37 411 rules = XCALLOC (rule_t, nrules) - 1;
1ff442ca
NF
412
413 itemno = 0;
414 ruleno = 1;
415
416 p = grammar;
417 while (p)
418 {
db8837cb 419 symbol_t *ruleprec = p->ruleprec;
d7e1f00c 420 rules[ruleno].user_number = ruleno;
c3b407f4 421 rules[ruleno].number = ruleno;
bba97eb2 422 rules[ruleno].lhs = p->sym;
99013900 423 rules[ruleno].rhs = ritem + itemno;
8efe435c 424 rules[ruleno].location = p->location;
1a2b5d37
AD
425 rules[ruleno].useful = TRUE;
426 rules[ruleno].action = p->action;
8efe435c 427 rules[ruleno].action_location = p->action_location;
676385e2
PH
428 rules[ruleno].dprec = p->dprec;
429 rules[ruleno].merger = p->merger;
1ff442ca
NF
430
431 p = p->next;
432 while (p && p->sym)
433 {
a49aecd5 434 /* item_number_t = symbol_number_t.
5fbb0954 435 But the former needs to contain more: negative rule numbers. */
a49aecd5 436 ritem[itemno++] = symbol_number_as_item_number (p->sym->number);
1ff442ca
NF
437 /* A rule gets by default the precedence and associativity
438 of the last token in it. */
d7020c20 439 if (p->sym->class == token_sym)
03b31c0c 440 rules[ruleno].prec = p->sym;
a70083a3
AD
441 if (p)
442 p = p->next;
1ff442ca
NF
443 }
444
445 /* If this rule has a %prec,
a70083a3 446 the specified symbol's precedence replaces the default. */
1ff442ca
NF
447 if (ruleprec)
448 {
03b31c0c
AD
449 rules[ruleno].precsym = ruleprec;
450 rules[ruleno].prec = ruleprec;
1ff442ca 451 }
1ff442ca 452 ritem[itemno++] = -ruleno;
f3849179 453 ++ruleno;
1ff442ca 454
a70083a3
AD
455 if (p)
456 p = p->next;
1ff442ca
NF
457 }
458
5123689b 459 assert (itemno == nritems);
3067fbef
AD
460
461 if (trace_flag)
462 ritem_print (stderr);
1ff442ca 463}
a70083a3 464\f
fdbcd8e2
AD
465/*------------------------------------------------------------------.
466| Read in the grammar specification and record it in the format |
467| described in gram.h. All actions are copied into ACTION_OBSTACK, |
468| in each case forming the body of a C function (YYACTION) which |
469| contains a switch statement to decide which action to execute. |
470`------------------------------------------------------------------*/
a70083a3
AD
471
472void
473reader (void)
474{
e9955c83 475 gram_control_t gram_control;
a70083a3
AD
476 lineno = 1;
477
478 /* Initialize the symbol table. */
db8837cb 479 symbols_new ();
b6610515 480
30171f79 481 /* Construct the axiom symbol. */
39f41916 482 axiom = symbol_get ("$axiom", empty_location);
30171f79 483 axiom->class = nterm_sym;
d9b739c3 484 axiom->number = nvars++;
30171f79 485
a70083a3 486 /* Construct the error token */
39f41916 487 errtoken = symbol_get ("error", empty_location);
d7020c20 488 errtoken->class = token_sym;
72a23c97 489 errtoken->number = ntokens++;
b6610515 490
a70083a3
AD
491 /* Construct a token that represents all undefined literal tokens.
492 It is always token number 2. */
39f41916 493 undeftoken = symbol_get ("$undefined.", empty_location);
d7020c20 494 undeftoken->class = token_sym;
72a23c97 495 undeftoken->number = ntokens++;
a70083a3 496
331dbc1b 497 /* Initialize the obstacks. */
0dd1580a
RA
498 obstack_init (&pre_prologue_obstack);
499 obstack_init (&post_prologue_obstack);
331dbc1b
AD
500
501 finput = xfopen (infile, "r");
e9955c83
AD
502 gram_in = finput;
503
504 gram_debug = !!getenv ("parse");
505 gram__flex_debug = !!getenv ("scan");
1d6412ad 506 scanner_initialize ();
e9955c83 507 gram_parse (&gram_control);
331dbc1b 508
e9955c83
AD
509 /* Grammar has been read. Do some checking */
510 if (nrules == 0)
511 fatal (_("no rules in the input grammar"));
512
513 /* Report any undefined symbols and consider them nonterminals. */
514 symbols_check_defined ();
b7c49edf
AD
515
516 /* If the user did not define her EOFTOKEN, do it now. */
517 if (!eoftoken)
518 {
39f41916 519 eoftoken = symbol_get ("$", empty_location);
b7c49edf 520 eoftoken->class = token_sym;
72a23c97 521 eoftoken->number = 0;
b7c49edf
AD
522 /* Value specified by POSIX. */
523 eoftoken->user_token_number = 0;
524 }
525
e9955c83
AD
526 /* Insert the initial rule, which line is that of the first rule
527 (not that of the start symbol):
528
529 axiom: %start EOF. */
530 {
56c47203 531 symbol_list_t *p = symbol_list_new (axiom, empty_location);
8efe435c
AD
532 p->location = grammar->location;
533 p->next = symbol_list_new (startsymbol, empty_location);
534 p->next->next = symbol_list_new (eoftoken, empty_location);
535 p->next->next->next = symbol_list_new (NULL, empty_location);
e9955c83
AD
536 p->next->next->next->next = grammar;
537 nrules += 1;
538 nritems += 3;
539 grammar = p;
540 }
541
542 if (nsyms > SHRT_MAX)
543 fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
544 SHRT_MAX);
545
546 assert (nsyms == ntokens + nvars);
b0c4483e 547
331dbc1b
AD
548 xfclose (finput);
549
a70083a3
AD
550 /* Assign the symbols their symbol numbers. Write #defines for the
551 token symbols into FDEFINES if requested. */
2f1afb73 552 symbols_pack ();
93ede233 553
a70083a3
AD
554 /* Convert the grammar into the format described in gram.h. */
555 packgram ();
8419d367 556
56c47203
AD
557 /* The grammar as a symbol_list_t is no longer needed. */
558 LIST_FREE (symbol_list_t, grammar);
a70083a3 559}