]> git.saurik.com Git - bison.git/blame - src/reader.c
(MUSCLE_GROW_STRING_PAIR): Remove; unused.
[bison.git] / src / reader.c
CommitLineData
1ff442ca 1/* Input parser for bison
76514394 2 Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000, 2001, 2002
a70083a3 3 Free Software Foundation, Inc.
1ff442ca 4
41aca2e0 5 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 6
41aca2e0
AD
7 Bison is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
1ff442ca 11
41aca2e0
AD
12 Bison is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
1ff442ca 16
41aca2e0
AD
17 You should have received a copy of the GNU General Public License
18 along with Bison; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
1ff442ca
NF
21
22
1ff442ca 23#include "system.h"
2a91a95e 24#include "quotearg.h"
ceed8467 25#include "getargs.h"
1ff442ca 26#include "files.h"
1ff442ca 27#include "symtab.h"
56c47203 28#include "symlist.h"
1ff442ca 29#include "gram.h"
a0f6b076 30#include "complain.h"
6c89f1c1 31#include "output.h"
b2ca4022 32#include "reader.h"
340ef489 33#include "conflicts.h"
11d82f03 34#include "muscle_tab.h"
1ff442ca 35
56c47203 36static symbol_list_t *grammar = NULL;
280a38c3 37static int start_flag = 0;
676385e2 38merger_list *merge_functions;
1ff442ca 39
d7020c20 40/* Nonzero if %union has been seen. */
e9955c83 41int typed = 0;
0d533154 42\f
e9955c83
AD
43/*-----------------------.
44| Set the start symbol. |
45`-----------------------*/
1ff442ca 46
e9955c83 47void
8efe435c 48grammar_start_symbol_set (symbol_t *s, location_t l)
1ff442ca
NF
49{
50 if (start_flag)
e776192e 51 complain_at (l, _("multiple %s declarations"), "%start");
943819bf
RS
52 else
53 {
54 start_flag = 1;
e9955c83 55 startsymbol = s;
8efe435c 56 startsymbol_location = l;
943819bf 57 }
1ff442ca
NF
58}
59
1ff442ca 60
d7020c20 61/*----------------------------------------------------------------.
e9955c83
AD
62| There are two prologues: one before %union, one after. Augment |
63| the current one. |
d7020c20 64`----------------------------------------------------------------*/
1ff442ca 65
e9955c83 66void
0c15323d 67prologue_augment (const char *prologue, location_t location)
b6610515 68{
e9955c83
AD
69 struct obstack *oout =
70 !typed ? &pre_prologue_obstack : &post_prologue_obstack;
b6610515 71
437c2d80
AD
72 obstack_fgrow2 (oout, "]b4_syncline([[%d]], [[%s]])[\n",
73 location.first_line,
74 quotearg_style (escape_quoting_style, location.file));
e9955c83 75 obstack_sgrow (oout, prologue);
b6610515
RA
76}
77
2ba3b73c 78
426cf563 79
a870c567 80
e9955c83
AD
81/*----------------------.
82| Handle the epilogue. |
83`----------------------*/
426cf563 84
e9955c83 85void
0c15323d 86epilogue_set (const char *epilogue, location_t location)
2ba3b73c 87{
437c2d80
AD
88 obstack_fgrow2 (&muscle_obstack, "]b4_syncline([[%d]], [[%s]])[\n",
89 location.first_line,
90 quotearg_style (escape_quoting_style, location.file));
592e8d4d
AD
91 obstack_sgrow (&muscle_obstack, epilogue);
92 obstack_1grow (&muscle_obstack, 0);
93 muscle_insert ("epilogue", obstack_finish (&muscle_obstack));
1ff442ca 94}
1ff442ca 95
a70083a3 96
a70083a3
AD
97\f
98
676385e2
PH
99 /*-------------------------------------------------------------------.
100| Return the merger index for a merging function named NAME, whose |
101| arguments have type TYPE. Records the function, if new, in |
102| merger_list. |
103`-------------------------------------------------------------------*/
104
105static int
a5d50994
AD
106get_merge_function (const char* name, const char* type,
107 location_t loc)
676385e2
PH
108{
109 merger_list *syms;
110 merger_list head;
111 int n;
112
113 if (! glr_parser)
114 return 0;
115
116 if (type == NULL)
117 type = "";
118
119 head.next = merge_functions;
39f41916 120 for (syms = &head, n = 1; syms->next != NULL; syms = syms->next, n += 1)
676385e2
PH
121 if (strcmp (name, syms->next->name) == 0)
122 break;
a5d50994
AD
123 if (syms->next == NULL)
124 {
125 syms->next = XMALLOC (merger_list, 1);
b906441c
AD
126 syms->next->name = xstrdup (name);
127 syms->next->type = xstrdup (type);
a5d50994
AD
128 syms->next->next = NULL;
129 merge_functions = head.next;
130 }
131 else if (strcmp (type, syms->next->type) != 0)
45a8a65d
PE
132 warn_at (loc, _("result type clash on merge function %s: <%s> != <%s>"),
133 name, type, syms->next->type);
676385e2
PH
134 return n;
135}
136
137/*--------------------------------------.
138| Free all merge-function definitions. |
139`--------------------------------------*/
140
141void
142free_merger_functions (void)
143{
144 merger_list *L0;
145 if (! glr_parser)
146 return;
147 L0 = merge_functions;
148 while (L0 != NULL)
149 {
150 merger_list *L1 = L0->next;
151 free (L0);
152 L0 = L1;
153 }
154}
155
a70083a3 156\f
107f7dfb 157/*-------------------------------------------------------------------.
32e1e0a4 158| Parse the input grammar into a one symbol_list_t structure. Each |
107f7dfb
AD
159| rule is represented by a sequence of symbols: the left hand side |
160| followed by the contents of the right hand side, followed by a |
161| null pointer instead of a symbol to terminate the rule. The next |
162| symbol is the lhs of the following rule. |
163| |
fdbcd8e2
AD
164| All actions are copied out, labelled by the rule number they apply |
165| to. |
107f7dfb
AD
166| |
167| Bison used to allow some %directives in the rules sections, but |
168| this is no longer consider appropriate: (i) the documented grammar |
169| doesn't claim it, (ii), it would promote bad style, (iii), error |
170| recovery for %directives consists in skipping the junk until a `%' |
171| is seen and helrp synchronizing. This scheme is definitely wrong |
172| in the rules section. |
173`-------------------------------------------------------------------*/
1ff442ca 174
f6d0f937 175/* The (currently) last symbol of GRAMMAR. */
56c47203 176symbol_list_t *grammar_end = NULL;
f6d0f937
AD
177
178/* Append S to the GRAMMAR. */
e9955c83 179void
8efe435c 180grammar_symbol_append (symbol_t *symbol, location_t location)
f6d0f937 181{
56c47203 182 symbol_list_t *p = symbol_list_new (symbol, location);
f6d0f937
AD
183
184 if (grammar_end)
185 grammar_end->next = p;
186 else
187 grammar = p;
188
189 grammar_end = p;
190}
191
8efe435c
AD
192/* The rule currently being defined, and the previous rule.
193 CURRENT_RULE points to the first LHS of the current rule, while
194 PREVIOUS_RULE_END points to the *end* of the previous rule (NULL). */
56c47203
AD
195symbol_list_t *current_rule = NULL;
196symbol_list_t *previous_rule_end = NULL;
da4160c3
AD
197
198
8efe435c
AD
199/*----------------------------------------------.
200| Create a new rule for LHS in to the GRAMMAR. |
201`----------------------------------------------*/
da4160c3 202
e9955c83 203void
8efe435c 204grammar_rule_begin (symbol_t *lhs, location_t location)
da4160c3
AD
205{
206 if (!start_flag)
207 {
208 startsymbol = lhs;
8efe435c 209 startsymbol_location = location;
da4160c3
AD
210 start_flag = 1;
211 }
212
213 /* Start a new rule and record its lhs. */
214 ++nrules;
215 ++nritems;
216
8efe435c
AD
217 previous_rule_end = grammar_end;
218 grammar_symbol_append (lhs, location);
da4160c3
AD
219 current_rule = grammar_end;
220
221 /* Mark the rule's lhs as a nonterminal if not already so. */
222
223 if (lhs->class == unknown_sym)
224 {
225 lhs->class = nterm_sym;
226 lhs->number = nvars;
227 ++nvars;
228 }
229 else if (lhs->class == token_sym)
e776192e 230 complain_at (location, _("rule given for %s, which is a token"), lhs->tag);
da4160c3
AD
231}
232
e9955c83
AD
233/* Check that the last rule (CURRENT_RULE) is properly defined. For
234 instance, there should be no type clash on the default action. */
235
236static void
237grammar_current_rule_check (void)
238{
239 symbol_t *lhs = current_rule->sym;
3f4c0f80 240 char const *lhs_type = lhs->type_name;
e9955c83
AD
241 symbol_t *first_rhs = current_rule->next->sym;
242
243 /* If there is an action, then there is nothing we can do: the user
3f4c0f80 244 is allowed to shoot herself in the foot. */
e9955c83
AD
245 if (current_rule->action)
246 return;
247
3f4c0f80
PE
248 /* Don't worry about the default action if $$ is untyped, since $$'s
249 value can't be used. */
250 if (! lhs_type)
251 return;
252
253 /* If $$ is being set in default way, report if any type mismatch. */
e9955c83
AD
254 if (first_rhs)
255 {
e9955c83
AD
256 const char *rhs_type = first_rhs->type_name ? first_rhs->type_name : "";
257 if (strcmp (lhs_type, rhs_type))
e776192e 258 complain_at (current_rule->location,
45a8a65d 259 _("type clash on default action: <%s> != <%s>"),
e776192e 260 lhs_type, rhs_type);
e9955c83
AD
261 }
262 /* Warn if there is no default for $$ but we need one. */
263 else
3f4c0f80
PE
264 complain_at (current_rule->location,
265 _("empty rule for typed nonterminal, and no action"));
e9955c83
AD
266}
267
268
8efe435c
AD
269/*-------------------------------------.
270| End the currently being grown rule. |
271`-------------------------------------*/
e9955c83
AD
272
273void
8efe435c 274grammar_rule_end (location_t location)
e9955c83
AD
275{
276 /* Put an empty link in the list to mark the end of this rule */
8efe435c
AD
277 grammar_symbol_append (NULL, grammar_end->location);
278 current_rule->location = location;
e9955c83
AD
279 grammar_current_rule_check ();
280}
281
282
8efe435c
AD
283/*-------------------------------------------------------------------.
284| The previous action turns out the be a mid-rule action. Attach it |
285| to the current rule, i.e., create a dummy symbol, attach it this |
286| mid-rule action, and append this dummy nonterminal to the current |
287| rule. |
288`-------------------------------------------------------------------*/
1485e106 289
e9955c83 290void
1485e106
AD
291grammar_midrule_action (void)
292{
293 /* Since the action was written out with this rule's number, we must
294 give the new rule this number by inserting the new rule before
295 it. */
296
8efe435c
AD
297 /* Make a DUMMY nonterminal, whose location is that of the midrule
298 action. Create the MIDRULE. */
8efe435c 299 location_t dummy_location = current_rule->action_location;
39f41916 300 symbol_t *dummy = dummy_symbol_get (dummy_location);
56c47203 301 symbol_list_t *midrule = symbol_list_new (dummy, dummy_location);
1485e106
AD
302
303 /* Make a new rule, whose body is empty, before the current one, so
304 that the action just read can belong to it. */
305 ++nrules;
306 ++nritems;
8efe435c
AD
307 /* Attach its location and actions to that of the DUMMY. */
308 midrule->location = dummy_location;
309 midrule->action = current_rule->action;
310 midrule->action_location = dummy_location;
1485e106
AD
311 current_rule->action = NULL;
312
8efe435c
AD
313 if (previous_rule_end)
314 previous_rule_end->next = midrule;
1485e106 315 else
8efe435c 316 grammar = midrule;
1485e106 317
8efe435c
AD
318 /* End the dummy's rule. */
319 previous_rule_end = symbol_list_new (NULL, dummy_location);
320 previous_rule_end->next = current_rule;
1485e106 321
8efe435c 322 midrule->next = previous_rule_end;
1485e106 323
8efe435c
AD
324 /* Insert the dummy nonterminal replacing the midrule action into
325 the current rule. */
326 grammar_current_rule_symbol_append (dummy, dummy_location);
1485e106
AD
327}
328
9af3fbce
AD
329/* Set the precedence symbol of the current rule to PRECSYM. */
330
e9955c83 331void
e776192e 332grammar_current_rule_prec_set (symbol_t *precsym, location_t location)
9af3fbce
AD
333{
334 if (current_rule->ruleprec)
473d0a75 335 complain_at (location, _("only one %s allowed per rule"), "%prec");
9af3fbce
AD
336 current_rule->ruleprec = precsym;
337}
338
676385e2
PH
339/* Attach dynamic precedence DPREC to the current rule. */
340
341void
342grammar_current_rule_dprec_set (int dprec, location_t location)
343{
344 if (! glr_parser)
473d0a75 345 warn_at (location, _("%s affects only GLR parsers"), "%dprec");
676385e2 346 if (dprec <= 0)
473d0a75
AD
347 complain_at (location,
348 _("%s must be followed by positive number"), "%dprec");
39f41916 349 else if (current_rule->dprec != 0)
473d0a75 350 complain_at (location, _("only one %s allowed per rule"), "%dprec");
676385e2
PH
351 current_rule->dprec = dprec;
352}
353
354/* Attach a merge function NAME with argument type TYPE to current
355 rule. */
356
357void
358grammar_current_rule_merge_set (const char* name, location_t location)
359{
360 if (! glr_parser)
473d0a75 361 warn_at (location, _("%s affects only GLR parsers"), "%merge");
39f41916 362 if (current_rule->merger != 0)
473d0a75 363 complain_at (location, _("only one %s allowed per rule"), "%merge");
39f41916 364 current_rule->merger =
a5d50994 365 get_merge_function (name, current_rule->sym->type_name, location);
676385e2
PH
366}
367
2e047461
AD
368/* Attach a SYMBOL to the current rule. If needed, move the previous
369 action as a mid-rule action. */
370
e9955c83 371void
8efe435c 372grammar_current_rule_symbol_append (symbol_t *symbol, location_t location)
2e047461
AD
373{
374 if (current_rule->action)
375 grammar_midrule_action ();
376 ++nritems;
8efe435c 377 grammar_symbol_append (symbol, location);
2e047461
AD
378}
379
2e047461
AD
380/* Attach an ACTION to the current rule. If needed, move the previous
381 action as a mid-rule action. */
382
e9955c83 383void
8efe435c 384grammar_current_rule_action_append (const char *action, location_t location)
2e047461
AD
385{
386 if (current_rule->action)
387 grammar_midrule_action ();
388 current_rule->action = action;
8efe435c 389 current_rule->action_location = location;
2e047461
AD
390}
391
a70083a3 392\f
a70083a3
AD
393/*---------------------------------------------------------------.
394| Convert the rules into the representation using RRHS, RLHS and |
d9b739c3 395| RITEM. |
a70083a3 396`---------------------------------------------------------------*/
1ff442ca 397
4a120d45 398static void
118fb205 399packgram (void)
1ff442ca 400{
9222837b 401 unsigned int itemno = 0;
4b3d3a8e 402 rule_number_t ruleno = 0;
9222837b 403 symbol_list_t *p = grammar;
1ff442ca 404
a900a624 405 ritem = XCALLOC (item_number_t, nritems);
4b3d3a8e 406 rules = XCALLOC (rule_t, nrules);
1ff442ca 407
1ff442ca
NF
408 while (p)
409 {
db8837cb 410 symbol_t *ruleprec = p->ruleprec;
d7e1f00c 411 rules[ruleno].user_number = ruleno;
c3b407f4 412 rules[ruleno].number = ruleno;
bba97eb2 413 rules[ruleno].lhs = p->sym;
99013900 414 rules[ruleno].rhs = ritem + itemno;
8efe435c 415 rules[ruleno].location = p->location;
b4afb6bb 416 rules[ruleno].useful = true;
1a2b5d37 417 rules[ruleno].action = p->action;
8efe435c 418 rules[ruleno].action_location = p->action_location;
676385e2
PH
419 rules[ruleno].dprec = p->dprec;
420 rules[ruleno].merger = p->merger;
1ff442ca
NF
421
422 p = p->next;
423 while (p && p->sym)
424 {
a49aecd5 425 /* item_number_t = symbol_number_t.
5fbb0954 426 But the former needs to contain more: negative rule numbers. */
a49aecd5 427 ritem[itemno++] = symbol_number_as_item_number (p->sym->number);
1ff442ca
NF
428 /* A rule gets by default the precedence and associativity
429 of the last token in it. */
d7020c20 430 if (p->sym->class == token_sym)
03b31c0c 431 rules[ruleno].prec = p->sym;
a70083a3
AD
432 if (p)
433 p = p->next;
1ff442ca
NF
434 }
435
436 /* If this rule has a %prec,
a70083a3 437 the specified symbol's precedence replaces the default. */
1ff442ca
NF
438 if (ruleprec)
439 {
03b31c0c
AD
440 rules[ruleno].precsym = ruleprec;
441 rules[ruleno].prec = ruleprec;
1ff442ca 442 }
4b3d3a8e 443 ritem[itemno++] = rule_number_as_item_number (ruleno);
f3849179 444 ++ruleno;
1ff442ca 445
a70083a3
AD
446 if (p)
447 p = p->next;
1ff442ca
NF
448 }
449
5123689b 450 assert (itemno == nritems);
3067fbef 451
273a74fa 452 if (trace_flag & trace_sets)
3067fbef 453 ritem_print (stderr);
1ff442ca 454}
a70083a3 455\f
fdbcd8e2
AD
456/*------------------------------------------------------------------.
457| Read in the grammar specification and record it in the format |
458| described in gram.h. All actions are copied into ACTION_OBSTACK, |
459| in each case forming the body of a C function (YYACTION) which |
460| contains a switch statement to decide which action to execute. |
461`------------------------------------------------------------------*/
a70083a3
AD
462
463void
464reader (void)
465{
e9955c83 466 gram_control_t gram_control;
a70083a3
AD
467
468 /* Initialize the symbol table. */
db8837cb 469 symbols_new ();
b6610515 470
88bce5a2
AD
471 /* Construct the accept symbol. */
472 accept = symbol_get ("$accept", empty_location);
473 accept->class = nterm_sym;
474 accept->number = nvars++;
30171f79 475
a70083a3 476 /* Construct the error token */
39f41916 477 errtoken = symbol_get ("error", empty_location);
d7020c20 478 errtoken->class = token_sym;
72a23c97 479 errtoken->number = ntokens++;
b6610515 480
a70083a3
AD
481 /* Construct a token that represents all undefined literal tokens.
482 It is always token number 2. */
88bce5a2 483 undeftoken = symbol_get ("$undefined", empty_location);
d7020c20 484 undeftoken->class = token_sym;
72a23c97 485 undeftoken->number = ntokens++;
a70083a3 486
331dbc1b 487 /* Initialize the obstacks. */
0dd1580a
RA
488 obstack_init (&pre_prologue_obstack);
489 obstack_init (&post_prologue_obstack);
331dbc1b
AD
490
491 finput = xfopen (infile, "r");
e9955c83
AD
492 gram_in = finput;
493
473d0a75
AD
494 gram__flex_debug = trace_flag & trace_scan;
495 gram_debug = trace_flag & trace_parse;
1d6412ad 496 scanner_initialize ();
e9955c83 497 gram_parse (&gram_control);
331dbc1b 498
b275314e
AD
499 /* If something went wrong during the parsing, don't try to
500 continue. */
b4afb6bb 501 if (complaint_issued)
f956c304 502 return;
b275314e 503
e9955c83
AD
504 /* Grammar has been read. Do some checking */
505 if (nrules == 0)
506 fatal (_("no rules in the input grammar"));
507
508 /* Report any undefined symbols and consider them nonterminals. */
509 symbols_check_defined ();
b7c49edf 510
88bce5a2
AD
511 /* If the user did not define her ENDTOKEN, do it now. */
512 if (!endtoken)
b7c49edf 513 {
88bce5a2
AD
514 endtoken = symbol_get ("$end", empty_location);
515 endtoken->class = token_sym;
516 endtoken->number = 0;
b7c49edf 517 /* Value specified by POSIX. */
88bce5a2 518 endtoken->user_token_number = 0;
b7c49edf
AD
519 }
520
e9955c83
AD
521 /* Insert the initial rule, which line is that of the first rule
522 (not that of the start symbol):
523
88bce5a2 524 accept: %start EOF. */
e9955c83 525 {
88bce5a2 526 symbol_list_t *p = symbol_list_new (accept, empty_location);
8efe435c
AD
527 p->location = grammar->location;
528 p->next = symbol_list_new (startsymbol, empty_location);
88bce5a2 529 p->next->next = symbol_list_new (endtoken, empty_location);
8efe435c 530 p->next->next->next = symbol_list_new (NULL, empty_location);
e9955c83
AD
531 p->next->next->next->next = grammar;
532 nrules += 1;
533 nritems += 3;
534 grammar = p;
535 }
536
242a6e48 537 if (SYMBOL_NUMBER_MAX < nsyms)
e9955c83 538 fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
242a6e48 539 SYMBOL_NUMBER_MAX);
e9955c83
AD
540
541 assert (nsyms == ntokens + nvars);
b0c4483e 542
331dbc1b
AD
543 xfclose (finput);
544
a70083a3
AD
545 /* Assign the symbols their symbol numbers. Write #defines for the
546 token symbols into FDEFINES if requested. */
2f1afb73 547 symbols_pack ();
93ede233 548
a70083a3
AD
549 /* Convert the grammar into the format described in gram.h. */
550 packgram ();
8419d367 551
56c47203
AD
552 /* The grammar as a symbol_list_t is no longer needed. */
553 LIST_FREE (symbol_list_t, grammar);
a70083a3 554}