]> git.saurik.com Git - bison.git/blame - src/reader.c
Initial check-in introducing experimental GLR parsing. See entry in
[bison.git] / src / reader.c
CommitLineData
1ff442ca 1/* Input parser for bison
76514394 2 Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000, 2001, 2002
a70083a3 3 Free Software Foundation, Inc.
1ff442ca 4
41aca2e0 5 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 6
41aca2e0
AD
7 Bison is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
1ff442ca 11
41aca2e0
AD
12 Bison is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
1ff442ca 16
41aca2e0
AD
17 You should have received a copy of the GNU General Public License
18 along with Bison; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
1ff442ca
NF
21
22
1ff442ca 23#include "system.h"
2a91a95e
AD
24#include "quotearg.h"
25#include "quote.h"
ceed8467 26#include "getargs.h"
1ff442ca 27#include "files.h"
1ff442ca 28#include "symtab.h"
56c47203 29#include "symlist.h"
1ff442ca 30#include "gram.h"
a0f6b076 31#include "complain.h"
6c89f1c1 32#include "output.h"
b2ca4022 33#include "reader.h"
340ef489 34#include "conflicts.h"
11d82f03 35#include "muscle_tab.h"
1ff442ca 36
1ff442ca 37int lineno;
56c47203 38static symbol_list_t *grammar = NULL;
280a38c3 39static int start_flag = 0;
676385e2 40merger_list *merge_functions;
1ff442ca 41
d7020c20 42/* Nonzero if %union has been seen. */
e9955c83 43int typed = 0;
0d533154 44\f
e9955c83
AD
45/*-----------------------.
46| Set the start symbol. |
47`-----------------------*/
1ff442ca 48
e9955c83 49void
8efe435c 50grammar_start_symbol_set (symbol_t *s, location_t l)
1ff442ca
NF
51{
52 if (start_flag)
e776192e 53 complain_at (l, _("multiple %s declarations"), "%start");
943819bf
RS
54 else
55 {
56 start_flag = 1;
e9955c83 57 startsymbol = s;
8efe435c 58 startsymbol_location = l;
943819bf 59 }
1ff442ca
NF
60}
61
1ff442ca 62
d7020c20 63/*----------------------------------------------------------------.
e9955c83
AD
64| There are two prologues: one before %union, one after. Augment |
65| the current one. |
d7020c20 66`----------------------------------------------------------------*/
1ff442ca 67
e9955c83 68void
0c15323d 69prologue_augment (const char *prologue, location_t location)
b6610515 70{
e9955c83
AD
71 struct obstack *oout =
72 !typed ? &pre_prologue_obstack : &post_prologue_obstack;
b6610515 73
e9955c83 74 if (!no_lines_flag)
b6610515 75 {
e9955c83 76 obstack_fgrow2 (oout, muscle_find ("linef"),
0c15323d
AD
77 location.first_line,
78 quotearg_style (c_quoting_style,
79 muscle_find ("filename")));
b6610515 80 }
e9955c83 81 obstack_sgrow (oout, prologue);
b6610515
RA
82}
83
2ba3b73c 84
426cf563 85
a870c567 86
e9955c83
AD
87/*----------------------.
88| Handle the epilogue. |
89`----------------------*/
426cf563 90
e9955c83 91void
0c15323d 92epilogue_set (const char *epilogue, location_t location)
2ba3b73c 93{
e9955c83 94 if (!no_lines_flag)
1ff442ca 95 {
592e8d4d 96 obstack_fgrow2 (&muscle_obstack, muscle_find ("linef"),
0c15323d
AD
97 location.first_line,
98 quotearg_style (c_quoting_style,
99 muscle_find ("filename")));
1ff442ca 100 }
592e8d4d
AD
101 obstack_sgrow (&muscle_obstack, epilogue);
102 obstack_1grow (&muscle_obstack, 0);
103 muscle_insert ("epilogue", obstack_finish (&muscle_obstack));
1ff442ca 104}
1ff442ca 105
a70083a3 106
a70083a3
AD
107\f
108
676385e2
PH
109 /*-------------------------------------------------------------------.
110| Return the merger index for a merging function named NAME, whose |
111| arguments have type TYPE. Records the function, if new, in |
112| merger_list. |
113`-------------------------------------------------------------------*/
114
115static int
116get_merge_function (const char* name, const char* type)
117{
118 merger_list *syms;
119 merger_list head;
120 int n;
121
122 if (! glr_parser)
123 return 0;
124
125 if (type == NULL)
126 type = "";
127
128 head.next = merge_functions;
129 for (syms = &head, n = 1; syms->next != NULL; syms = syms->next, n += 1)
130 if (strcmp (name, syms->next->name) == 0)
131 break;
132 if (syms->next == NULL) {
133 syms->next = XMALLOC (merger_list, 1);
134 syms->next->name = strdup (name);
135 syms->next->type = strdup (type);
136 syms->next->next = NULL;
137 merge_functions = head.next;
138 } else if (strcmp (type, syms->next->type) != 0)
139 warn (_("result type clash on merge function %s: `%s' vs. `%s'"),
140 name, type, syms->next->type);
141 return n;
142}
143
144/*--------------------------------------.
145| Free all merge-function definitions. |
146`--------------------------------------*/
147
148void
149free_merger_functions (void)
150{
151 merger_list *L0;
152 if (! glr_parser)
153 return;
154 L0 = merge_functions;
155 while (L0 != NULL)
156 {
157 merger_list *L1 = L0->next;
158 free (L0);
159 L0 = L1;
160 }
161}
162
a70083a3
AD
163/*-------------------------------------------------------------------.
164| Generate a dummy symbol, a nonterminal, whose name cannot conflict |
165| with the user's names. |
166`-------------------------------------------------------------------*/
1ff442ca 167
db8837cb 168static symbol_t *
ee000ba4 169gensym (location_t location)
1ff442ca 170{
274d42ce
AD
171 /* Incremented for each generated symbol */
172 static int gensym_count = 0;
173 static char buf[256];
174
db8837cb 175 symbol_t *sym;
1ff442ca 176
274d42ce 177 sprintf (buf, "@%d", ++gensym_count);
ee000ba4 178 sym = getsym (buf, location);
d7020c20 179 sym->class = nterm_sym;
d9b739c3 180 sym->number = nvars++;
36281465 181 return sym;
1ff442ca 182}
a70083a3 183\f
107f7dfb 184/*-------------------------------------------------------------------.
56c47203 185| Parse the input grammar into a one symbol_list_t structure. Each |
107f7dfb
AD
186| rule is represented by a sequence of symbols: the left hand side |
187| followed by the contents of the right hand side, followed by a |
188| null pointer instead of a symbol to terminate the rule. The next |
189| symbol is the lhs of the following rule. |
190| |
fdbcd8e2
AD
191| All actions are copied out, labelled by the rule number they apply |
192| to. |
107f7dfb
AD
193| |
194| Bison used to allow some %directives in the rules sections, but |
195| this is no longer consider appropriate: (i) the documented grammar |
196| doesn't claim it, (ii), it would promote bad style, (iii), error |
197| recovery for %directives consists in skipping the junk until a `%' |
198| is seen and helrp synchronizing. This scheme is definitely wrong |
199| in the rules section. |
200`-------------------------------------------------------------------*/
1ff442ca 201
f6d0f937 202/* The (currently) last symbol of GRAMMAR. */
56c47203 203symbol_list_t *grammar_end = NULL;
f6d0f937
AD
204
205/* Append S to the GRAMMAR. */
e9955c83 206void
8efe435c 207grammar_symbol_append (symbol_t *symbol, location_t location)
f6d0f937 208{
56c47203 209 symbol_list_t *p = symbol_list_new (symbol, location);
f6d0f937
AD
210
211 if (grammar_end)
212 grammar_end->next = p;
213 else
214 grammar = p;
215
216 grammar_end = p;
217}
218
8efe435c
AD
219/* The rule currently being defined, and the previous rule.
220 CURRENT_RULE points to the first LHS of the current rule, while
221 PREVIOUS_RULE_END points to the *end* of the previous rule (NULL). */
56c47203
AD
222symbol_list_t *current_rule = NULL;
223symbol_list_t *previous_rule_end = NULL;
da4160c3
AD
224
225
8efe435c
AD
226/*----------------------------------------------.
227| Create a new rule for LHS in to the GRAMMAR. |
228`----------------------------------------------*/
da4160c3 229
e9955c83 230void
8efe435c 231grammar_rule_begin (symbol_t *lhs, location_t location)
da4160c3
AD
232{
233 if (!start_flag)
234 {
235 startsymbol = lhs;
8efe435c 236 startsymbol_location = location;
da4160c3
AD
237 start_flag = 1;
238 }
239
240 /* Start a new rule and record its lhs. */
241 ++nrules;
242 ++nritems;
243
8efe435c
AD
244 previous_rule_end = grammar_end;
245 grammar_symbol_append (lhs, location);
da4160c3
AD
246 current_rule = grammar_end;
247
248 /* Mark the rule's lhs as a nonterminal if not already so. */
249
250 if (lhs->class == unknown_sym)
251 {
252 lhs->class = nterm_sym;
253 lhs->number = nvars;
254 ++nvars;
255 }
256 else if (lhs->class == token_sym)
e776192e 257 complain_at (location, _("rule given for %s, which is a token"), lhs->tag);
da4160c3
AD
258}
259
e9955c83
AD
260/* Check that the last rule (CURRENT_RULE) is properly defined. For
261 instance, there should be no type clash on the default action. */
262
263static void
264grammar_current_rule_check (void)
265{
266 symbol_t *lhs = current_rule->sym;
267 symbol_t *first_rhs = current_rule->next->sym;
268
269 /* If there is an action, then there is nothing we can do: the user
270 is allowed to shoot in her foot. */
271 if (current_rule->action)
272 return;
273
274 /* If $$ is being set in default way, report if any type mismatch.
275 */
276 if (first_rhs)
277 {
278 const char *lhs_type = lhs->type_name ? lhs->type_name : "";
279 const char *rhs_type = first_rhs->type_name ? first_rhs->type_name : "";
280 if (strcmp (lhs_type, rhs_type))
e776192e
AD
281 complain_at (current_rule->location,
282 _("type clash (`%s' `%s') on default action"),
283 lhs_type, rhs_type);
e9955c83
AD
284 }
285 /* Warn if there is no default for $$ but we need one. */
286 else
287 {
288 if (lhs->type_name)
e776192e
AD
289 complain_at (current_rule->location,
290 _("empty rule for typed nonterminal, and no action"));
e9955c83
AD
291 }
292}
293
294
8efe435c
AD
295/*-------------------------------------.
296| End the currently being grown rule. |
297`-------------------------------------*/
e9955c83
AD
298
299void
8efe435c 300grammar_rule_end (location_t location)
e9955c83
AD
301{
302 /* Put an empty link in the list to mark the end of this rule */
8efe435c
AD
303 grammar_symbol_append (NULL, grammar_end->location);
304 current_rule->location = location;
e9955c83
AD
305 grammar_current_rule_check ();
306}
307
308
8efe435c
AD
309/*-------------------------------------------------------------------.
310| The previous action turns out the be a mid-rule action. Attach it |
311| to the current rule, i.e., create a dummy symbol, attach it this |
312| mid-rule action, and append this dummy nonterminal to the current |
313| rule. |
314`-------------------------------------------------------------------*/
1485e106 315
e9955c83 316void
1485e106
AD
317grammar_midrule_action (void)
318{
319 /* Since the action was written out with this rule's number, we must
320 give the new rule this number by inserting the new rule before
321 it. */
322
8efe435c
AD
323 /* Make a DUMMY nonterminal, whose location is that of the midrule
324 action. Create the MIDRULE. */
8efe435c 325 location_t dummy_location = current_rule->action_location;
ee000ba4 326 symbol_t *dummy = gensym (dummy_location);
56c47203 327 symbol_list_t *midrule = symbol_list_new (dummy, dummy_location);
1485e106
AD
328
329 /* Make a new rule, whose body is empty, before the current one, so
330 that the action just read can belong to it. */
331 ++nrules;
332 ++nritems;
8efe435c
AD
333 /* Attach its location and actions to that of the DUMMY. */
334 midrule->location = dummy_location;
335 midrule->action = current_rule->action;
336 midrule->action_location = dummy_location;
1485e106
AD
337 current_rule->action = NULL;
338
8efe435c
AD
339 if (previous_rule_end)
340 previous_rule_end->next = midrule;
1485e106 341 else
8efe435c 342 grammar = midrule;
1485e106 343
8efe435c
AD
344 /* End the dummy's rule. */
345 previous_rule_end = symbol_list_new (NULL, dummy_location);
346 previous_rule_end->next = current_rule;
1485e106 347
8efe435c 348 midrule->next = previous_rule_end;
1485e106 349
8efe435c
AD
350 /* Insert the dummy nonterminal replacing the midrule action into
351 the current rule. */
352 grammar_current_rule_symbol_append (dummy, dummy_location);
1485e106
AD
353}
354
9af3fbce
AD
355/* Set the precedence symbol of the current rule to PRECSYM. */
356
e9955c83 357void
e776192e 358grammar_current_rule_prec_set (symbol_t *precsym, location_t location)
9af3fbce
AD
359{
360 if (current_rule->ruleprec)
e776192e 361 complain_at (location, _("two @prec's in a row"));
9af3fbce
AD
362 current_rule->ruleprec = precsym;
363}
364
676385e2
PH
365/* Attach dynamic precedence DPREC to the current rule. */
366
367void
368grammar_current_rule_dprec_set (int dprec, location_t location)
369{
370 if (! glr_parser)
371 warn_at (location, _("%%dprec affects only GLR parsers"));
372 if (dprec <= 0)
373 complain_at (location, _("%%dprec must be followed by positive number"));
374 else if (current_rule->dprec != 0)
375 complain_at (location, _("only one %%dprec allowed per rule"));
376 current_rule->dprec = dprec;
377}
378
379/* Attach a merge function NAME with argument type TYPE to current
380 rule. */
381
382void
383grammar_current_rule_merge_set (const char* name, location_t location)
384{
385 if (! glr_parser)
386 warn_at (location, _("%%merge affects only GLR parsers"));
387 if (current_rule->merger != 0)
388 complain_at (location, _("only one %%merge allowed per rule"));
389 current_rule->merger =
390 get_merge_function (name, current_rule->sym->type_name);
391}
392
2e047461
AD
393/* Attach a SYMBOL to the current rule. If needed, move the previous
394 action as a mid-rule action. */
395
e9955c83 396void
8efe435c 397grammar_current_rule_symbol_append (symbol_t *symbol, location_t location)
2e047461
AD
398{
399 if (current_rule->action)
400 grammar_midrule_action ();
401 ++nritems;
8efe435c 402 grammar_symbol_append (symbol, location);
2e047461
AD
403}
404
2e047461
AD
405/* Attach an ACTION to the current rule. If needed, move the previous
406 action as a mid-rule action. */
407
e9955c83 408void
8efe435c 409grammar_current_rule_action_append (const char *action, location_t location)
2e047461
AD
410{
411 if (current_rule->action)
412 grammar_midrule_action ();
413 current_rule->action = action;
8efe435c 414 current_rule->action_location = location;
2e047461
AD
415}
416
a70083a3 417\f
a70083a3
AD
418/*---------------------------------------------------------------.
419| Convert the rules into the representation using RRHS, RLHS and |
d9b739c3 420| RITEM. |
a70083a3 421`---------------------------------------------------------------*/
1ff442ca 422
4a120d45 423static void
118fb205 424packgram (void)
1ff442ca 425{
0c2d3f4c 426 unsigned int itemno;
a70083a3 427 int ruleno;
56c47203 428 symbol_list_t *p;
1ff442ca 429
a900a624 430 ritem = XCALLOC (item_number_t, nritems);
1a2b5d37 431 rules = XCALLOC (rule_t, nrules) - 1;
1ff442ca
NF
432
433 itemno = 0;
434 ruleno = 1;
435
436 p = grammar;
437 while (p)
438 {
db8837cb 439 symbol_t *ruleprec = p->ruleprec;
d7e1f00c 440 rules[ruleno].user_number = ruleno;
c3b407f4 441 rules[ruleno].number = ruleno;
bba97eb2 442 rules[ruleno].lhs = p->sym;
99013900 443 rules[ruleno].rhs = ritem + itemno;
8efe435c 444 rules[ruleno].location = p->location;
1a2b5d37
AD
445 rules[ruleno].useful = TRUE;
446 rules[ruleno].action = p->action;
8efe435c 447 rules[ruleno].action_location = p->action_location;
676385e2
PH
448 rules[ruleno].dprec = p->dprec;
449 rules[ruleno].merger = p->merger;
1ff442ca
NF
450
451 p = p->next;
452 while (p && p->sym)
453 {
a49aecd5 454 /* item_number_t = symbol_number_t.
5fbb0954 455 But the former needs to contain more: negative rule numbers. */
a49aecd5 456 ritem[itemno++] = symbol_number_as_item_number (p->sym->number);
1ff442ca
NF
457 /* A rule gets by default the precedence and associativity
458 of the last token in it. */
d7020c20 459 if (p->sym->class == token_sym)
03b31c0c 460 rules[ruleno].prec = p->sym;
a70083a3
AD
461 if (p)
462 p = p->next;
1ff442ca
NF
463 }
464
465 /* If this rule has a %prec,
a70083a3 466 the specified symbol's precedence replaces the default. */
1ff442ca
NF
467 if (ruleprec)
468 {
03b31c0c
AD
469 rules[ruleno].precsym = ruleprec;
470 rules[ruleno].prec = ruleprec;
1ff442ca 471 }
1ff442ca 472 ritem[itemno++] = -ruleno;
f3849179 473 ++ruleno;
1ff442ca 474
a70083a3
AD
475 if (p)
476 p = p->next;
1ff442ca
NF
477 }
478
5123689b 479 assert (itemno == nritems);
3067fbef
AD
480
481 if (trace_flag)
482 ritem_print (stderr);
1ff442ca 483}
a70083a3 484\f
fdbcd8e2
AD
485/*------------------------------------------------------------------.
486| Read in the grammar specification and record it in the format |
487| described in gram.h. All actions are copied into ACTION_OBSTACK, |
488| in each case forming the body of a C function (YYACTION) which |
489| contains a switch statement to decide which action to execute. |
490`------------------------------------------------------------------*/
a70083a3
AD
491
492void
493reader (void)
494{
e9955c83 495 gram_control_t gram_control;
a70083a3
AD
496 lineno = 1;
497
498 /* Initialize the symbol table. */
db8837cb 499 symbols_new ();
b6610515 500
30171f79 501 /* Construct the axiom symbol. */
ee000ba4 502 axiom = getsym ("$axiom", empty_location);
30171f79 503 axiom->class = nterm_sym;
d9b739c3 504 axiom->number = nvars++;
30171f79 505
a70083a3 506 /* Construct the error token */
ee000ba4 507 errtoken = getsym ("error", empty_location);
d7020c20 508 errtoken->class = token_sym;
72a23c97 509 errtoken->number = ntokens++;
b6610515 510
a70083a3
AD
511 /* Construct a token that represents all undefined literal tokens.
512 It is always token number 2. */
ee000ba4 513 undeftoken = getsym ("$undefined.", empty_location);
d7020c20 514 undeftoken->class = token_sym;
72a23c97 515 undeftoken->number = ntokens++;
a70083a3 516
331dbc1b 517 /* Initialize the obstacks. */
0dd1580a
RA
518 obstack_init (&pre_prologue_obstack);
519 obstack_init (&post_prologue_obstack);
331dbc1b
AD
520
521 finput = xfopen (infile, "r");
e9955c83
AD
522 gram_in = finput;
523
524 gram_debug = !!getenv ("parse");
525 gram__flex_debug = !!getenv ("scan");
1d6412ad 526 scanner_initialize ();
e9955c83 527 gram_parse (&gram_control);
331dbc1b 528
e9955c83
AD
529 /* Grammar has been read. Do some checking */
530 if (nrules == 0)
531 fatal (_("no rules in the input grammar"));
532
533 /* Report any undefined symbols and consider them nonterminals. */
534 symbols_check_defined ();
b7c49edf
AD
535
536 /* If the user did not define her EOFTOKEN, do it now. */
537 if (!eoftoken)
538 {
ee000ba4 539 eoftoken = getsym ("$", empty_location);
b7c49edf 540 eoftoken->class = token_sym;
72a23c97 541 eoftoken->number = 0;
b7c49edf
AD
542 /* Value specified by POSIX. */
543 eoftoken->user_token_number = 0;
544 }
545
e9955c83
AD
546 /* Insert the initial rule, which line is that of the first rule
547 (not that of the start symbol):
548
549 axiom: %start EOF. */
550 {
56c47203 551 symbol_list_t *p = symbol_list_new (axiom, empty_location);
8efe435c
AD
552 p->location = grammar->location;
553 p->next = symbol_list_new (startsymbol, empty_location);
554 p->next->next = symbol_list_new (eoftoken, empty_location);
555 p->next->next->next = symbol_list_new (NULL, empty_location);
e9955c83
AD
556 p->next->next->next->next = grammar;
557 nrules += 1;
558 nritems += 3;
559 grammar = p;
560 }
561
562 if (nsyms > SHRT_MAX)
563 fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
564 SHRT_MAX);
565
566 assert (nsyms == ntokens + nvars);
b0c4483e 567
331dbc1b
AD
568 xfclose (finput);
569
a70083a3
AD
570 /* Assign the symbols their symbol numbers. Write #defines for the
571 token symbols into FDEFINES if requested. */
2f1afb73 572 symbols_pack ();
93ede233 573
a70083a3
AD
574 /* Convert the grammar into the format described in gram.h. */
575 packgram ();
8419d367 576
56c47203
AD
577 /* The grammar as a symbol_list_t is no longer needed. */
578 LIST_FREE (symbol_list_t, grammar);
a70083a3 579}