]> git.saurik.com Git - bison.git/blame - src/reader.c
Initial revision.
[bison.git] / src / reader.c
CommitLineData
35dcf428 1/* Input parser for Bison
76514394 2 Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000, 2001, 2002
a70083a3 3 Free Software Foundation, Inc.
1ff442ca 4
41aca2e0 5 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 6
41aca2e0
AD
7 Bison is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
1ff442ca 11
41aca2e0
AD
12 Bison is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
1ff442ca 16
41aca2e0
AD
17 You should have received a copy of the GNU General Public License
18 along with Bison; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
1ff442ca
NF
21
22
1ff442ca 23#include "system.h"
2a91a95e 24#include "quotearg.h"
ceed8467 25#include "getargs.h"
1ff442ca 26#include "files.h"
1ff442ca 27#include "symtab.h"
56c47203 28#include "symlist.h"
1ff442ca 29#include "gram.h"
a0f6b076 30#include "complain.h"
6c89f1c1 31#include "output.h"
b2ca4022 32#include "reader.h"
340ef489 33#include "conflicts.h"
11d82f03 34#include "muscle_tab.h"
1ff442ca 35
56c47203 36static symbol_list_t *grammar = NULL;
280a38c3 37static int start_flag = 0;
676385e2 38merger_list *merge_functions;
1ff442ca 39
d7020c20 40/* Nonzero if %union has been seen. */
e9955c83 41int typed = 0;
0d533154 42\f
e9955c83
AD
43/*-----------------------.
44| Set the start symbol. |
45`-----------------------*/
1ff442ca 46
e9955c83 47void
8efe435c 48grammar_start_symbol_set (symbol_t *s, location_t l)
1ff442ca
NF
49{
50 if (start_flag)
e776192e 51 complain_at (l, _("multiple %s declarations"), "%start");
943819bf
RS
52 else
53 {
54 start_flag = 1;
e9955c83 55 startsymbol = s;
8efe435c 56 startsymbol_location = l;
943819bf 57 }
1ff442ca
NF
58}
59
1ff442ca 60
d7020c20 61/*----------------------------------------------------------------.
e9955c83
AD
62| There are two prologues: one before %union, one after. Augment |
63| the current one. |
d7020c20 64`----------------------------------------------------------------*/
1ff442ca 65
e9955c83 66void
0c15323d 67prologue_augment (const char *prologue, location_t location)
b6610515 68{
e9955c83
AD
69 struct obstack *oout =
70 !typed ? &pre_prologue_obstack : &post_prologue_obstack;
b6610515 71
6c239755 72 obstack_fgrow1 (oout, "]b4_syncline([[%d]], [[",
2073702c
PE
73 location.start.line);
74 MUSCLE_OBSTACK_SGROW (oout, quotearg_style (c_quoting_style,
75 location.start.file));
6c239755 76 obstack_sgrow (oout, "]])[\n");
e9955c83 77 obstack_sgrow (oout, prologue);
b6610515
RA
78}
79
2ba3b73c 80
426cf563 81
a870c567 82
e9955c83
AD
83/*----------------------.
84| Handle the epilogue. |
85`----------------------*/
426cf563 86
e9955c83 87void
7ec2d4cd 88epilogue_augment (const char *epilogue, location_t location)
2ba3b73c 89{
7ec2d4cd 90 char *extension = NULL;
6c239755 91 obstack_fgrow1 (&muscle_obstack, "]b4_syncline([[%d]], [[",
2073702c 92 location.start.line);
6c239755 93 MUSCLE_OBSTACK_SGROW (&muscle_obstack,
2073702c 94 quotearg_style (c_quoting_style, location.start.file));
6c239755 95 obstack_sgrow (&muscle_obstack, "]])[\n");
592e8d4d
AD
96 obstack_sgrow (&muscle_obstack, epilogue);
97 obstack_1grow (&muscle_obstack, 0);
7ec2d4cd
AD
98 extension = obstack_finish (&muscle_obstack);
99 muscle_grow ("epilogue", extension, "");
100 obstack_free (&muscle_obstack, extension);
1ff442ca 101}
1ff442ca 102
a70083a3 103
a70083a3
AD
104\f
105
3e6656f9 106/*-------------------------------------------------------------------.
676385e2
PH
107| Return the merger index for a merging function named NAME, whose |
108| arguments have type TYPE. Records the function, if new, in |
95612cfa 109| MERGER_LIST. |
676385e2
PH
110`-------------------------------------------------------------------*/
111
112static int
95612cfa 113get_merge_function (struniq_t name, struniq_t type, location_t loc)
676385e2
PH
114{
115 merger_list *syms;
116 merger_list head;
117 int n;
118
119 if (! glr_parser)
120 return 0;
121
122 if (type == NULL)
95612cfa 123 type = struniq_new ("");
676385e2
PH
124
125 head.next = merge_functions;
39f41916 126 for (syms = &head, n = 1; syms->next != NULL; syms = syms->next, n += 1)
95612cfa 127 if (STRUNIQ_EQ (name, syms->next->name))
676385e2 128 break;
a5d50994
AD
129 if (syms->next == NULL)
130 {
131 syms->next = XMALLOC (merger_list, 1);
95612cfa
AD
132 syms->next->name = struniq_new (name);
133 syms->next->type = struniq_new (type);
a5d50994
AD
134 syms->next->next = NULL;
135 merge_functions = head.next;
136 }
95612cfa 137 else if (!STRUNIQ_EQ (type, syms->next->type))
45a8a65d
PE
138 warn_at (loc, _("result type clash on merge function %s: <%s> != <%s>"),
139 name, type, syms->next->type);
676385e2
PH
140 return n;
141}
142
143/*--------------------------------------.
144| Free all merge-function definitions. |
145`--------------------------------------*/
146
147void
148free_merger_functions (void)
149{
150 merger_list *L0;
151 if (! glr_parser)
152 return;
153 L0 = merge_functions;
154 while (L0 != NULL)
155 {
156 merger_list *L1 = L0->next;
157 free (L0);
158 L0 = L1;
159 }
160}
161
a70083a3 162\f
107f7dfb 163/*-------------------------------------------------------------------.
32e1e0a4 164| Parse the input grammar into a one symbol_list_t structure. Each |
107f7dfb
AD
165| rule is represented by a sequence of symbols: the left hand side |
166| followed by the contents of the right hand side, followed by a |
167| null pointer instead of a symbol to terminate the rule. The next |
168| symbol is the lhs of the following rule. |
169| |
fdbcd8e2
AD
170| All actions are copied out, labelled by the rule number they apply |
171| to. |
107f7dfb
AD
172| |
173| Bison used to allow some %directives in the rules sections, but |
174| this is no longer consider appropriate: (i) the documented grammar |
175| doesn't claim it, (ii), it would promote bad style, (iii), error |
176| recovery for %directives consists in skipping the junk until a `%' |
177| is seen and helrp synchronizing. This scheme is definitely wrong |
178| in the rules section. |
179`-------------------------------------------------------------------*/
1ff442ca 180
f6d0f937 181/* The (currently) last symbol of GRAMMAR. */
56c47203 182symbol_list_t *grammar_end = NULL;
f6d0f937
AD
183
184/* Append S to the GRAMMAR. */
e9955c83 185void
8efe435c 186grammar_symbol_append (symbol_t *symbol, location_t location)
f6d0f937 187{
56c47203 188 symbol_list_t *p = symbol_list_new (symbol, location);
f6d0f937
AD
189
190 if (grammar_end)
191 grammar_end->next = p;
192 else
193 grammar = p;
194
195 grammar_end = p;
196}
197
8efe435c
AD
198/* The rule currently being defined, and the previous rule.
199 CURRENT_RULE points to the first LHS of the current rule, while
200 PREVIOUS_RULE_END points to the *end* of the previous rule (NULL). */
56c47203
AD
201symbol_list_t *current_rule = NULL;
202symbol_list_t *previous_rule_end = NULL;
da4160c3
AD
203
204
8efe435c
AD
205/*----------------------------------------------.
206| Create a new rule for LHS in to the GRAMMAR. |
207`----------------------------------------------*/
da4160c3 208
e9955c83 209void
8efe435c 210grammar_rule_begin (symbol_t *lhs, location_t location)
da4160c3
AD
211{
212 if (!start_flag)
213 {
214 startsymbol = lhs;
8efe435c 215 startsymbol_location = location;
da4160c3
AD
216 start_flag = 1;
217 }
218
219 /* Start a new rule and record its lhs. */
220 ++nrules;
221 ++nritems;
222
8efe435c
AD
223 previous_rule_end = grammar_end;
224 grammar_symbol_append (lhs, location);
da4160c3
AD
225 current_rule = grammar_end;
226
227 /* Mark the rule's lhs as a nonterminal if not already so. */
228
229 if (lhs->class == unknown_sym)
230 {
231 lhs->class = nterm_sym;
232 lhs->number = nvars;
233 ++nvars;
234 }
235 else if (lhs->class == token_sym)
e776192e 236 complain_at (location, _("rule given for %s, which is a token"), lhs->tag);
da4160c3
AD
237}
238
e9955c83
AD
239/* Check that the last rule (CURRENT_RULE) is properly defined. For
240 instance, there should be no type clash on the default action. */
241
242static void
243grammar_current_rule_check (void)
244{
245 symbol_t *lhs = current_rule->sym;
3f4c0f80 246 char const *lhs_type = lhs->type_name;
e9955c83
AD
247 symbol_t *first_rhs = current_rule->next->sym;
248
249 /* If there is an action, then there is nothing we can do: the user
3f4c0f80 250 is allowed to shoot herself in the foot. */
e9955c83
AD
251 if (current_rule->action)
252 return;
253
3f4c0f80
PE
254 /* Don't worry about the default action if $$ is untyped, since $$'s
255 value can't be used. */
256 if (! lhs_type)
257 return;
258
259 /* If $$ is being set in default way, report if any type mismatch. */
e9955c83
AD
260 if (first_rhs)
261 {
e9955c83 262 const char *rhs_type = first_rhs->type_name ? first_rhs->type_name : "";
95612cfa 263 if (!STRUNIQ_EQ (lhs_type, rhs_type))
e9273511
PE
264 warn_at (current_rule->location,
265 _("type clash on default action: <%s> != <%s>"),
266 lhs_type, rhs_type);
e9955c83
AD
267 }
268 /* Warn if there is no default for $$ but we need one. */
269 else
e9273511
PE
270 warn_at (current_rule->location,
271 _("empty rule for typed nonterminal, and no action"));
e9955c83
AD
272}
273
274
8efe435c
AD
275/*-------------------------------------.
276| End the currently being grown rule. |
277`-------------------------------------*/
e9955c83
AD
278
279void
8efe435c 280grammar_rule_end (location_t location)
e9955c83
AD
281{
282 /* Put an empty link in the list to mark the end of this rule */
8efe435c
AD
283 grammar_symbol_append (NULL, grammar_end->location);
284 current_rule->location = location;
e9955c83
AD
285 grammar_current_rule_check ();
286}
287
288
8efe435c
AD
289/*-------------------------------------------------------------------.
290| The previous action turns out the be a mid-rule action. Attach it |
291| to the current rule, i.e., create a dummy symbol, attach it this |
292| mid-rule action, and append this dummy nonterminal to the current |
293| rule. |
294`-------------------------------------------------------------------*/
1485e106 295
e9955c83 296void
1485e106
AD
297grammar_midrule_action (void)
298{
299 /* Since the action was written out with this rule's number, we must
300 give the new rule this number by inserting the new rule before
301 it. */
302
8efe435c
AD
303 /* Make a DUMMY nonterminal, whose location is that of the midrule
304 action. Create the MIDRULE. */
8efe435c 305 location_t dummy_location = current_rule->action_location;
39f41916 306 symbol_t *dummy = dummy_symbol_get (dummy_location);
56c47203 307 symbol_list_t *midrule = symbol_list_new (dummy, dummy_location);
1485e106
AD
308
309 /* Make a new rule, whose body is empty, before the current one, so
310 that the action just read can belong to it. */
311 ++nrules;
312 ++nritems;
8efe435c
AD
313 /* Attach its location and actions to that of the DUMMY. */
314 midrule->location = dummy_location;
315 midrule->action = current_rule->action;
316 midrule->action_location = dummy_location;
1485e106
AD
317 current_rule->action = NULL;
318
8efe435c
AD
319 if (previous_rule_end)
320 previous_rule_end->next = midrule;
1485e106 321 else
8efe435c 322 grammar = midrule;
1485e106 323
8efe435c
AD
324 /* End the dummy's rule. */
325 previous_rule_end = symbol_list_new (NULL, dummy_location);
326 previous_rule_end->next = current_rule;
1485e106 327
8efe435c 328 midrule->next = previous_rule_end;
1485e106 329
8efe435c
AD
330 /* Insert the dummy nonterminal replacing the midrule action into
331 the current rule. */
332 grammar_current_rule_symbol_append (dummy, dummy_location);
1485e106
AD
333}
334
9af3fbce
AD
335/* Set the precedence symbol of the current rule to PRECSYM. */
336
e9955c83 337void
e776192e 338grammar_current_rule_prec_set (symbol_t *precsym, location_t location)
9af3fbce
AD
339{
340 if (current_rule->ruleprec)
473d0a75 341 complain_at (location, _("only one %s allowed per rule"), "%prec");
9af3fbce
AD
342 current_rule->ruleprec = precsym;
343}
344
676385e2
PH
345/* Attach dynamic precedence DPREC to the current rule. */
346
347void
348grammar_current_rule_dprec_set (int dprec, location_t location)
349{
350 if (! glr_parser)
473d0a75 351 warn_at (location, _("%s affects only GLR parsers"), "%dprec");
676385e2 352 if (dprec <= 0)
473d0a75
AD
353 complain_at (location,
354 _("%s must be followed by positive number"), "%dprec");
39f41916 355 else if (current_rule->dprec != 0)
473d0a75 356 complain_at (location, _("only one %s allowed per rule"), "%dprec");
676385e2
PH
357 current_rule->dprec = dprec;
358}
359
360/* Attach a merge function NAME with argument type TYPE to current
361 rule. */
362
363void
95612cfa 364grammar_current_rule_merge_set (struniq_t name, location_t location)
676385e2
PH
365{
366 if (! glr_parser)
473d0a75 367 warn_at (location, _("%s affects only GLR parsers"), "%merge");
39f41916 368 if (current_rule->merger != 0)
473d0a75 369 complain_at (location, _("only one %s allowed per rule"), "%merge");
39f41916 370 current_rule->merger =
a5d50994 371 get_merge_function (name, current_rule->sym->type_name, location);
676385e2
PH
372}
373
2e047461
AD
374/* Attach a SYMBOL to the current rule. If needed, move the previous
375 action as a mid-rule action. */
376
e9955c83 377void
8efe435c 378grammar_current_rule_symbol_append (symbol_t *symbol, location_t location)
2e047461
AD
379{
380 if (current_rule->action)
381 grammar_midrule_action ();
382 ++nritems;
8efe435c 383 grammar_symbol_append (symbol, location);
2e047461
AD
384}
385
2e047461
AD
386/* Attach an ACTION to the current rule. If needed, move the previous
387 action as a mid-rule action. */
388
e9955c83 389void
8efe435c 390grammar_current_rule_action_append (const char *action, location_t location)
2e047461
AD
391{
392 if (current_rule->action)
393 grammar_midrule_action ();
394 current_rule->action = action;
8efe435c 395 current_rule->action_location = location;
2e047461
AD
396}
397
a70083a3 398\f
a70083a3
AD
399/*---------------------------------------------------------------.
400| Convert the rules into the representation using RRHS, RLHS and |
d9b739c3 401| RITEM. |
a70083a3 402`---------------------------------------------------------------*/
1ff442ca 403
4a120d45 404static void
118fb205 405packgram (void)
1ff442ca 406{
9222837b 407 unsigned int itemno = 0;
4b3d3a8e 408 rule_number_t ruleno = 0;
9222837b 409 symbol_list_t *p = grammar;
1ff442ca 410
a900a624 411 ritem = XCALLOC (item_number_t, nritems);
4b3d3a8e 412 rules = XCALLOC (rule_t, nrules);
1ff442ca 413
1ff442ca
NF
414 while (p)
415 {
db8837cb 416 symbol_t *ruleprec = p->ruleprec;
d7e1f00c 417 rules[ruleno].user_number = ruleno;
c3b407f4 418 rules[ruleno].number = ruleno;
bba97eb2 419 rules[ruleno].lhs = p->sym;
99013900 420 rules[ruleno].rhs = ritem + itemno;
8efe435c 421 rules[ruleno].location = p->location;
b4afb6bb 422 rules[ruleno].useful = true;
1a2b5d37 423 rules[ruleno].action = p->action;
8efe435c 424 rules[ruleno].action_location = p->action_location;
676385e2
PH
425 rules[ruleno].dprec = p->dprec;
426 rules[ruleno].merger = p->merger;
1ff442ca
NF
427
428 p = p->next;
429 while (p && p->sym)
430 {
a49aecd5 431 /* item_number_t = symbol_number_t.
5fbb0954 432 But the former needs to contain more: negative rule numbers. */
a49aecd5 433 ritem[itemno++] = symbol_number_as_item_number (p->sym->number);
1ff442ca
NF
434 /* A rule gets by default the precedence and associativity
435 of the last token in it. */
d7020c20 436 if (p->sym->class == token_sym)
03b31c0c 437 rules[ruleno].prec = p->sym;
a70083a3
AD
438 if (p)
439 p = p->next;
1ff442ca
NF
440 }
441
442 /* If this rule has a %prec,
a70083a3 443 the specified symbol's precedence replaces the default. */
1ff442ca
NF
444 if (ruleprec)
445 {
03b31c0c
AD
446 rules[ruleno].precsym = ruleprec;
447 rules[ruleno].prec = ruleprec;
1ff442ca 448 }
4b3d3a8e 449 ritem[itemno++] = rule_number_as_item_number (ruleno);
f3849179 450 ++ruleno;
1ff442ca 451
a70083a3
AD
452 if (p)
453 p = p->next;
1ff442ca
NF
454 }
455
35dcf428
PE
456 if (itemno != nritems)
457 abort ();
3067fbef 458
273a74fa 459 if (trace_flag & trace_sets)
3067fbef 460 ritem_print (stderr);
1ff442ca 461}
a70083a3 462\f
fdbcd8e2
AD
463/*------------------------------------------------------------------.
464| Read in the grammar specification and record it in the format |
465| described in gram.h. All actions are copied into ACTION_OBSTACK, |
466| in each case forming the body of a C function (YYACTION) which |
467| contains a switch statement to decide which action to execute. |
468`------------------------------------------------------------------*/
a70083a3
AD
469
470void
471reader (void)
472{
a70083a3 473 /* Initialize the symbol table. */
db8837cb 474 symbols_new ();
b6610515 475
88bce5a2
AD
476 /* Construct the accept symbol. */
477 accept = symbol_get ("$accept", empty_location);
478 accept->class = nterm_sym;
479 accept->number = nvars++;
30171f79 480
a70083a3 481 /* Construct the error token */
39f41916 482 errtoken = symbol_get ("error", empty_location);
d7020c20 483 errtoken->class = token_sym;
72a23c97 484 errtoken->number = ntokens++;
b6610515 485
a70083a3
AD
486 /* Construct a token that represents all undefined literal tokens.
487 It is always token number 2. */
88bce5a2 488 undeftoken = symbol_get ("$undefined", empty_location);
d7020c20 489 undeftoken->class = token_sym;
72a23c97 490 undeftoken->number = ntokens++;
a70083a3 491
331dbc1b 492 /* Initialize the obstacks. */
0dd1580a
RA
493 obstack_init (&pre_prologue_obstack);
494 obstack_init (&post_prologue_obstack);
331dbc1b 495
95612cfa 496 finput = xfopen (grammar_file, "r");
e9955c83
AD
497 gram_in = finput;
498
473d0a75
AD
499 gram__flex_debug = trace_flag & trace_scan;
500 gram_debug = trace_flag & trace_parse;
1d6412ad 501 scanner_initialize ();
78c3da9e 502 gram_parse ();
331dbc1b 503
b275314e
AD
504 /* If something went wrong during the parsing, don't try to
505 continue. */
b4afb6bb 506 if (complaint_issued)
f956c304 507 return;
b275314e 508
e9955c83
AD
509 /* Grammar has been read. Do some checking */
510 if (nrules == 0)
511 fatal (_("no rules in the input grammar"));
512
513 /* Report any undefined symbols and consider them nonterminals. */
514 symbols_check_defined ();
b7c49edf 515
88bce5a2
AD
516 /* If the user did not define her ENDTOKEN, do it now. */
517 if (!endtoken)
b7c49edf 518 {
88bce5a2
AD
519 endtoken = symbol_get ("$end", empty_location);
520 endtoken->class = token_sym;
521 endtoken->number = 0;
b7c49edf 522 /* Value specified by POSIX. */
88bce5a2 523 endtoken->user_token_number = 0;
b7c49edf
AD
524 }
525
e9955c83
AD
526 /* Insert the initial rule, which line is that of the first rule
527 (not that of the start symbol):
528
88bce5a2 529 accept: %start EOF. */
e9955c83 530 {
88bce5a2 531 symbol_list_t *p = symbol_list_new (accept, empty_location);
8efe435c
AD
532 p->location = grammar->location;
533 p->next = symbol_list_new (startsymbol, empty_location);
88bce5a2 534 p->next->next = symbol_list_new (endtoken, empty_location);
8efe435c 535 p->next->next->next = symbol_list_new (NULL, empty_location);
e9955c83
AD
536 p->next->next->next->next = grammar;
537 nrules += 1;
538 nritems += 3;
539 grammar = p;
540 }
541
35dcf428
PE
542 if (! (nsyms <= SYMBOL_NUMBER_MAX && nsyms == ntokens + nvars))
543 abort ();
b0c4483e 544
331dbc1b
AD
545 xfclose (finput);
546
a70083a3
AD
547 /* Assign the symbols their symbol numbers. Write #defines for the
548 token symbols into FDEFINES if requested. */
2f1afb73 549 symbols_pack ();
93ede233 550
a70083a3
AD
551 /* Convert the grammar into the format described in gram.h. */
552 packgram ();
8419d367 553
56c47203
AD
554 /* The grammar as a symbol_list_t is no longer needed. */
555 LIST_FREE (symbol_list_t, grammar);
a70083a3 556}