]> git.saurik.com Git - bison.git/blame - src/reader.c
(_AT_TEST_GLR_CXXTYPES): Do not include <assert.h>.
[bison.git] / src / reader.c
CommitLineData
35dcf428 1/* Input parser for Bison
76514394 2 Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000, 2001, 2002
a70083a3 3 Free Software Foundation, Inc.
1ff442ca 4
41aca2e0 5 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 6
41aca2e0
AD
7 Bison is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
1ff442ca 11
41aca2e0
AD
12 Bison is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
1ff442ca 16
41aca2e0
AD
17 You should have received a copy of the GNU General Public License
18 along with Bison; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
1ff442ca
NF
21
22
1ff442ca 23#include "system.h"
2a91a95e 24#include "quotearg.h"
ceed8467 25#include "getargs.h"
1ff442ca 26#include "files.h"
1ff442ca 27#include "symtab.h"
56c47203 28#include "symlist.h"
1ff442ca 29#include "gram.h"
a0f6b076 30#include "complain.h"
6c89f1c1 31#include "output.h"
b2ca4022 32#include "reader.h"
340ef489 33#include "conflicts.h"
11d82f03 34#include "muscle_tab.h"
1ff442ca 35
56c47203 36static symbol_list_t *grammar = NULL;
280a38c3 37static int start_flag = 0;
676385e2 38merger_list *merge_functions;
1ff442ca 39
d7020c20 40/* Nonzero if %union has been seen. */
e9955c83 41int typed = 0;
0d533154 42\f
e9955c83
AD
43/*-----------------------.
44| Set the start symbol. |
45`-----------------------*/
1ff442ca 46
e9955c83 47void
8efe435c 48grammar_start_symbol_set (symbol_t *s, location_t l)
1ff442ca
NF
49{
50 if (start_flag)
e776192e 51 complain_at (l, _("multiple %s declarations"), "%start");
943819bf
RS
52 else
53 {
54 start_flag = 1;
e9955c83 55 startsymbol = s;
8efe435c 56 startsymbol_location = l;
943819bf 57 }
1ff442ca
NF
58}
59
1ff442ca 60
d7020c20 61/*----------------------------------------------------------------.
e9955c83
AD
62| There are two prologues: one before %union, one after. Augment |
63| the current one. |
d7020c20 64`----------------------------------------------------------------*/
1ff442ca 65
e9955c83 66void
0c15323d 67prologue_augment (const char *prologue, location_t location)
b6610515 68{
e9955c83
AD
69 struct obstack *oout =
70 !typed ? &pre_prologue_obstack : &post_prologue_obstack;
b6610515 71
6c239755
PE
72 obstack_fgrow1 (oout, "]b4_syncline([[%d]], [[",
73 location.first_line);
74 MUSCLE_OBSTACK_SGROW (oout, quotearg_style (c_quoting_style, location.file));
75 obstack_sgrow (oout, "]])[\n");
e9955c83 76 obstack_sgrow (oout, prologue);
b6610515
RA
77}
78
2ba3b73c 79
426cf563 80
a870c567 81
e9955c83
AD
82/*----------------------.
83| Handle the epilogue. |
84`----------------------*/
426cf563 85
e9955c83 86void
7ec2d4cd 87epilogue_augment (const char *epilogue, location_t location)
2ba3b73c 88{
7ec2d4cd 89 char *extension = NULL;
6c239755
PE
90 obstack_fgrow1 (&muscle_obstack, "]b4_syncline([[%d]], [[",
91 location.first_line);
92 MUSCLE_OBSTACK_SGROW (&muscle_obstack,
93 quotearg_style (c_quoting_style, location.file));
94 obstack_sgrow (&muscle_obstack, "]])[\n");
592e8d4d
AD
95 obstack_sgrow (&muscle_obstack, epilogue);
96 obstack_1grow (&muscle_obstack, 0);
7ec2d4cd
AD
97 extension = obstack_finish (&muscle_obstack);
98 muscle_grow ("epilogue", extension, "");
99 obstack_free (&muscle_obstack, extension);
1ff442ca 100}
1ff442ca 101
a70083a3 102
a70083a3
AD
103\f
104
3e6656f9 105/*-------------------------------------------------------------------.
676385e2
PH
106| Return the merger index for a merging function named NAME, whose |
107| arguments have type TYPE. Records the function, if new, in |
95612cfa 108| MERGER_LIST. |
676385e2
PH
109`-------------------------------------------------------------------*/
110
111static int
95612cfa 112get_merge_function (struniq_t name, struniq_t type, location_t loc)
676385e2
PH
113{
114 merger_list *syms;
115 merger_list head;
116 int n;
117
118 if (! glr_parser)
119 return 0;
120
121 if (type == NULL)
95612cfa 122 type = struniq_new ("");
676385e2
PH
123
124 head.next = merge_functions;
39f41916 125 for (syms = &head, n = 1; syms->next != NULL; syms = syms->next, n += 1)
95612cfa 126 if (STRUNIQ_EQ (name, syms->next->name))
676385e2 127 break;
a5d50994
AD
128 if (syms->next == NULL)
129 {
130 syms->next = XMALLOC (merger_list, 1);
95612cfa
AD
131 syms->next->name = struniq_new (name);
132 syms->next->type = struniq_new (type);
a5d50994
AD
133 syms->next->next = NULL;
134 merge_functions = head.next;
135 }
95612cfa 136 else if (!STRUNIQ_EQ (type, syms->next->type))
45a8a65d
PE
137 warn_at (loc, _("result type clash on merge function %s: <%s> != <%s>"),
138 name, type, syms->next->type);
676385e2
PH
139 return n;
140}
141
142/*--------------------------------------.
143| Free all merge-function definitions. |
144`--------------------------------------*/
145
146void
147free_merger_functions (void)
148{
149 merger_list *L0;
150 if (! glr_parser)
151 return;
152 L0 = merge_functions;
153 while (L0 != NULL)
154 {
155 merger_list *L1 = L0->next;
156 free (L0);
157 L0 = L1;
158 }
159}
160
a70083a3 161\f
107f7dfb 162/*-------------------------------------------------------------------.
32e1e0a4 163| Parse the input grammar into a one symbol_list_t structure. Each |
107f7dfb
AD
164| rule is represented by a sequence of symbols: the left hand side |
165| followed by the contents of the right hand side, followed by a |
166| null pointer instead of a symbol to terminate the rule. The next |
167| symbol is the lhs of the following rule. |
168| |
fdbcd8e2
AD
169| All actions are copied out, labelled by the rule number they apply |
170| to. |
107f7dfb
AD
171| |
172| Bison used to allow some %directives in the rules sections, but |
173| this is no longer consider appropriate: (i) the documented grammar |
174| doesn't claim it, (ii), it would promote bad style, (iii), error |
175| recovery for %directives consists in skipping the junk until a `%' |
176| is seen and helrp synchronizing. This scheme is definitely wrong |
177| in the rules section. |
178`-------------------------------------------------------------------*/
1ff442ca 179
f6d0f937 180/* The (currently) last symbol of GRAMMAR. */
56c47203 181symbol_list_t *grammar_end = NULL;
f6d0f937
AD
182
183/* Append S to the GRAMMAR. */
e9955c83 184void
8efe435c 185grammar_symbol_append (symbol_t *symbol, location_t location)
f6d0f937 186{
56c47203 187 symbol_list_t *p = symbol_list_new (symbol, location);
f6d0f937
AD
188
189 if (grammar_end)
190 grammar_end->next = p;
191 else
192 grammar = p;
193
194 grammar_end = p;
195}
196
8efe435c
AD
197/* The rule currently being defined, and the previous rule.
198 CURRENT_RULE points to the first LHS of the current rule, while
199 PREVIOUS_RULE_END points to the *end* of the previous rule (NULL). */
56c47203
AD
200symbol_list_t *current_rule = NULL;
201symbol_list_t *previous_rule_end = NULL;
da4160c3
AD
202
203
8efe435c
AD
204/*----------------------------------------------.
205| Create a new rule for LHS in to the GRAMMAR. |
206`----------------------------------------------*/
da4160c3 207
e9955c83 208void
8efe435c 209grammar_rule_begin (symbol_t *lhs, location_t location)
da4160c3
AD
210{
211 if (!start_flag)
212 {
213 startsymbol = lhs;
8efe435c 214 startsymbol_location = location;
da4160c3
AD
215 start_flag = 1;
216 }
217
218 /* Start a new rule and record its lhs. */
219 ++nrules;
220 ++nritems;
221
8efe435c
AD
222 previous_rule_end = grammar_end;
223 grammar_symbol_append (lhs, location);
da4160c3
AD
224 current_rule = grammar_end;
225
226 /* Mark the rule's lhs as a nonterminal if not already so. */
227
228 if (lhs->class == unknown_sym)
229 {
230 lhs->class = nterm_sym;
231 lhs->number = nvars;
232 ++nvars;
233 }
234 else if (lhs->class == token_sym)
e776192e 235 complain_at (location, _("rule given for %s, which is a token"), lhs->tag);
da4160c3
AD
236}
237
e9955c83
AD
238/* Check that the last rule (CURRENT_RULE) is properly defined. For
239 instance, there should be no type clash on the default action. */
240
241static void
242grammar_current_rule_check (void)
243{
244 symbol_t *lhs = current_rule->sym;
3f4c0f80 245 char const *lhs_type = lhs->type_name;
e9955c83
AD
246 symbol_t *first_rhs = current_rule->next->sym;
247
248 /* If there is an action, then there is nothing we can do: the user
3f4c0f80 249 is allowed to shoot herself in the foot. */
e9955c83
AD
250 if (current_rule->action)
251 return;
252
3f4c0f80
PE
253 /* Don't worry about the default action if $$ is untyped, since $$'s
254 value can't be used. */
255 if (! lhs_type)
256 return;
257
258 /* If $$ is being set in default way, report if any type mismatch. */
e9955c83
AD
259 if (first_rhs)
260 {
e9955c83 261 const char *rhs_type = first_rhs->type_name ? first_rhs->type_name : "";
95612cfa 262 if (!STRUNIQ_EQ (lhs_type, rhs_type))
e776192e 263 complain_at (current_rule->location,
45a8a65d 264 _("type clash on default action: <%s> != <%s>"),
e776192e 265 lhs_type, rhs_type);
e9955c83
AD
266 }
267 /* Warn if there is no default for $$ but we need one. */
268 else
3f4c0f80
PE
269 complain_at (current_rule->location,
270 _("empty rule for typed nonterminal, and no action"));
e9955c83
AD
271}
272
273
8efe435c
AD
274/*-------------------------------------.
275| End the currently being grown rule. |
276`-------------------------------------*/
e9955c83
AD
277
278void
8efe435c 279grammar_rule_end (location_t location)
e9955c83
AD
280{
281 /* Put an empty link in the list to mark the end of this rule */
8efe435c
AD
282 grammar_symbol_append (NULL, grammar_end->location);
283 current_rule->location = location;
e9955c83
AD
284 grammar_current_rule_check ();
285}
286
287
8efe435c
AD
288/*-------------------------------------------------------------------.
289| The previous action turns out the be a mid-rule action. Attach it |
290| to the current rule, i.e., create a dummy symbol, attach it this |
291| mid-rule action, and append this dummy nonterminal to the current |
292| rule. |
293`-------------------------------------------------------------------*/
1485e106 294
e9955c83 295void
1485e106
AD
296grammar_midrule_action (void)
297{
298 /* Since the action was written out with this rule's number, we must
299 give the new rule this number by inserting the new rule before
300 it. */
301
8efe435c
AD
302 /* Make a DUMMY nonterminal, whose location is that of the midrule
303 action. Create the MIDRULE. */
8efe435c 304 location_t dummy_location = current_rule->action_location;
39f41916 305 symbol_t *dummy = dummy_symbol_get (dummy_location);
56c47203 306 symbol_list_t *midrule = symbol_list_new (dummy, dummy_location);
1485e106
AD
307
308 /* Make a new rule, whose body is empty, before the current one, so
309 that the action just read can belong to it. */
310 ++nrules;
311 ++nritems;
8efe435c
AD
312 /* Attach its location and actions to that of the DUMMY. */
313 midrule->location = dummy_location;
314 midrule->action = current_rule->action;
315 midrule->action_location = dummy_location;
1485e106
AD
316 current_rule->action = NULL;
317
8efe435c
AD
318 if (previous_rule_end)
319 previous_rule_end->next = midrule;
1485e106 320 else
8efe435c 321 grammar = midrule;
1485e106 322
8efe435c
AD
323 /* End the dummy's rule. */
324 previous_rule_end = symbol_list_new (NULL, dummy_location);
325 previous_rule_end->next = current_rule;
1485e106 326
8efe435c 327 midrule->next = previous_rule_end;
1485e106 328
8efe435c
AD
329 /* Insert the dummy nonterminal replacing the midrule action into
330 the current rule. */
331 grammar_current_rule_symbol_append (dummy, dummy_location);
1485e106
AD
332}
333
9af3fbce
AD
334/* Set the precedence symbol of the current rule to PRECSYM. */
335
e9955c83 336void
e776192e 337grammar_current_rule_prec_set (symbol_t *precsym, location_t location)
9af3fbce
AD
338{
339 if (current_rule->ruleprec)
473d0a75 340 complain_at (location, _("only one %s allowed per rule"), "%prec");
9af3fbce
AD
341 current_rule->ruleprec = precsym;
342}
343
676385e2
PH
344/* Attach dynamic precedence DPREC to the current rule. */
345
346void
347grammar_current_rule_dprec_set (int dprec, location_t location)
348{
349 if (! glr_parser)
473d0a75 350 warn_at (location, _("%s affects only GLR parsers"), "%dprec");
676385e2 351 if (dprec <= 0)
473d0a75
AD
352 complain_at (location,
353 _("%s must be followed by positive number"), "%dprec");
39f41916 354 else if (current_rule->dprec != 0)
473d0a75 355 complain_at (location, _("only one %s allowed per rule"), "%dprec");
676385e2
PH
356 current_rule->dprec = dprec;
357}
358
359/* Attach a merge function NAME with argument type TYPE to current
360 rule. */
361
362void
95612cfa 363grammar_current_rule_merge_set (struniq_t name, location_t location)
676385e2
PH
364{
365 if (! glr_parser)
473d0a75 366 warn_at (location, _("%s affects only GLR parsers"), "%merge");
39f41916 367 if (current_rule->merger != 0)
473d0a75 368 complain_at (location, _("only one %s allowed per rule"), "%merge");
39f41916 369 current_rule->merger =
a5d50994 370 get_merge_function (name, current_rule->sym->type_name, location);
676385e2
PH
371}
372
2e047461
AD
373/* Attach a SYMBOL to the current rule. If needed, move the previous
374 action as a mid-rule action. */
375
e9955c83 376void
8efe435c 377grammar_current_rule_symbol_append (symbol_t *symbol, location_t location)
2e047461
AD
378{
379 if (current_rule->action)
380 grammar_midrule_action ();
381 ++nritems;
8efe435c 382 grammar_symbol_append (symbol, location);
2e047461
AD
383}
384
2e047461
AD
385/* Attach an ACTION to the current rule. If needed, move the previous
386 action as a mid-rule action. */
387
e9955c83 388void
8efe435c 389grammar_current_rule_action_append (const char *action, location_t location)
2e047461
AD
390{
391 if (current_rule->action)
392 grammar_midrule_action ();
393 current_rule->action = action;
8efe435c 394 current_rule->action_location = location;
2e047461
AD
395}
396
a70083a3 397\f
a70083a3
AD
398/*---------------------------------------------------------------.
399| Convert the rules into the representation using RRHS, RLHS and |
d9b739c3 400| RITEM. |
a70083a3 401`---------------------------------------------------------------*/
1ff442ca 402
4a120d45 403static void
118fb205 404packgram (void)
1ff442ca 405{
9222837b 406 unsigned int itemno = 0;
4b3d3a8e 407 rule_number_t ruleno = 0;
9222837b 408 symbol_list_t *p = grammar;
1ff442ca 409
a900a624 410 ritem = XCALLOC (item_number_t, nritems);
4b3d3a8e 411 rules = XCALLOC (rule_t, nrules);
1ff442ca 412
1ff442ca
NF
413 while (p)
414 {
db8837cb 415 symbol_t *ruleprec = p->ruleprec;
d7e1f00c 416 rules[ruleno].user_number = ruleno;
c3b407f4 417 rules[ruleno].number = ruleno;
bba97eb2 418 rules[ruleno].lhs = p->sym;
99013900 419 rules[ruleno].rhs = ritem + itemno;
8efe435c 420 rules[ruleno].location = p->location;
b4afb6bb 421 rules[ruleno].useful = true;
1a2b5d37 422 rules[ruleno].action = p->action;
8efe435c 423 rules[ruleno].action_location = p->action_location;
676385e2
PH
424 rules[ruleno].dprec = p->dprec;
425 rules[ruleno].merger = p->merger;
1ff442ca
NF
426
427 p = p->next;
428 while (p && p->sym)
429 {
a49aecd5 430 /* item_number_t = symbol_number_t.
5fbb0954 431 But the former needs to contain more: negative rule numbers. */
a49aecd5 432 ritem[itemno++] = symbol_number_as_item_number (p->sym->number);
1ff442ca
NF
433 /* A rule gets by default the precedence and associativity
434 of the last token in it. */
d7020c20 435 if (p->sym->class == token_sym)
03b31c0c 436 rules[ruleno].prec = p->sym;
a70083a3
AD
437 if (p)
438 p = p->next;
1ff442ca
NF
439 }
440
441 /* If this rule has a %prec,
a70083a3 442 the specified symbol's precedence replaces the default. */
1ff442ca
NF
443 if (ruleprec)
444 {
03b31c0c
AD
445 rules[ruleno].precsym = ruleprec;
446 rules[ruleno].prec = ruleprec;
1ff442ca 447 }
4b3d3a8e 448 ritem[itemno++] = rule_number_as_item_number (ruleno);
f3849179 449 ++ruleno;
1ff442ca 450
a70083a3
AD
451 if (p)
452 p = p->next;
1ff442ca
NF
453 }
454
35dcf428
PE
455 if (itemno != nritems)
456 abort ();
3067fbef 457
273a74fa 458 if (trace_flag & trace_sets)
3067fbef 459 ritem_print (stderr);
1ff442ca 460}
a70083a3 461\f
fdbcd8e2
AD
462/*------------------------------------------------------------------.
463| Read in the grammar specification and record it in the format |
464| described in gram.h. All actions are copied into ACTION_OBSTACK, |
465| in each case forming the body of a C function (YYACTION) which |
466| contains a switch statement to decide which action to execute. |
467`------------------------------------------------------------------*/
a70083a3
AD
468
469void
470reader (void)
471{
e9955c83 472 gram_control_t gram_control;
a70083a3
AD
473
474 /* Initialize the symbol table. */
db8837cb 475 symbols_new ();
b6610515 476
88bce5a2
AD
477 /* Construct the accept symbol. */
478 accept = symbol_get ("$accept", empty_location);
479 accept->class = nterm_sym;
480 accept->number = nvars++;
30171f79 481
a70083a3 482 /* Construct the error token */
39f41916 483 errtoken = symbol_get ("error", empty_location);
d7020c20 484 errtoken->class = token_sym;
72a23c97 485 errtoken->number = ntokens++;
b6610515 486
a70083a3
AD
487 /* Construct a token that represents all undefined literal tokens.
488 It is always token number 2. */
88bce5a2 489 undeftoken = symbol_get ("$undefined", empty_location);
d7020c20 490 undeftoken->class = token_sym;
72a23c97 491 undeftoken->number = ntokens++;
a70083a3 492
331dbc1b 493 /* Initialize the obstacks. */
0dd1580a
RA
494 obstack_init (&pre_prologue_obstack);
495 obstack_init (&post_prologue_obstack);
331dbc1b 496
95612cfa 497 finput = xfopen (grammar_file, "r");
e9955c83
AD
498 gram_in = finput;
499
473d0a75
AD
500 gram__flex_debug = trace_flag & trace_scan;
501 gram_debug = trace_flag & trace_parse;
1d6412ad 502 scanner_initialize ();
e9955c83 503 gram_parse (&gram_control);
331dbc1b 504
b275314e
AD
505 /* If something went wrong during the parsing, don't try to
506 continue. */
b4afb6bb 507 if (complaint_issued)
f956c304 508 return;
b275314e 509
e9955c83
AD
510 /* Grammar has been read. Do some checking */
511 if (nrules == 0)
512 fatal (_("no rules in the input grammar"));
513
514 /* Report any undefined symbols and consider them nonterminals. */
515 symbols_check_defined ();
b7c49edf 516
88bce5a2
AD
517 /* If the user did not define her ENDTOKEN, do it now. */
518 if (!endtoken)
b7c49edf 519 {
88bce5a2
AD
520 endtoken = symbol_get ("$end", empty_location);
521 endtoken->class = token_sym;
522 endtoken->number = 0;
b7c49edf 523 /* Value specified by POSIX. */
88bce5a2 524 endtoken->user_token_number = 0;
b7c49edf
AD
525 }
526
e9955c83
AD
527 /* Insert the initial rule, which line is that of the first rule
528 (not that of the start symbol):
529
88bce5a2 530 accept: %start EOF. */
e9955c83 531 {
88bce5a2 532 symbol_list_t *p = symbol_list_new (accept, empty_location);
8efe435c
AD
533 p->location = grammar->location;
534 p->next = symbol_list_new (startsymbol, empty_location);
88bce5a2 535 p->next->next = symbol_list_new (endtoken, empty_location);
8efe435c 536 p->next->next->next = symbol_list_new (NULL, empty_location);
e9955c83
AD
537 p->next->next->next->next = grammar;
538 nrules += 1;
539 nritems += 3;
540 grammar = p;
541 }
542
35dcf428
PE
543 if (! (nsyms <= SYMBOL_NUMBER_MAX && nsyms == ntokens + nvars))
544 abort ();
b0c4483e 545
331dbc1b
AD
546 xfclose (finput);
547
a70083a3
AD
548 /* Assign the symbols their symbol numbers. Write #defines for the
549 token symbols into FDEFINES if requested. */
2f1afb73 550 symbols_pack ();
93ede233 551
a70083a3
AD
552 /* Convert the grammar into the format described in gram.h. */
553 packgram ();
8419d367 554
56c47203
AD
555 /* The grammar as a symbol_list_t is no longer needed. */
556 LIST_FREE (symbol_list_t, grammar);
a70083a3 557}