]> git.saurik.com Git - bison.git/blame - src/reader.c
Work around portability problem on Solaris 10: flex-generated
[bison.git] / src / reader.c
CommitLineData
35dcf428 1/* Input parser for Bison
9c4637fa 2
05ac60f3
PE
3 Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000, 2001, 2002, 2003,
4 2005 Free Software Foundation, Inc.
1ff442ca 5
41aca2e0 6 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 7
41aca2e0
AD
8 Bison is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
1ff442ca 12
41aca2e0
AD
13 Bison is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
1ff442ca 17
41aca2e0
AD
18 You should have received a copy of the GNU General Public License
19 along with Bison; see the file COPYING. If not, write to
0fb669f9
PE
20 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 Boston, MA 02110-1301, USA. */
1ff442ca 22
2cec9080 23#include <config.h>
1ff442ca 24#include "system.h"
17ee7397
PE
25
26#include <quotearg.h>
27
28#include "complain.h"
29#include "conflicts.h"
1ff442ca 30#include "files.h"
17ee7397 31#include "getargs.h"
1ff442ca 32#include "gram.h"
17ee7397 33#include "muscle_tab.h"
6c89f1c1 34#include "output.h"
b2ca4022 35#include "reader.h"
17ee7397
PE
36#include "symlist.h"
37#include "symtab.h"
1ff442ca 38
17ee7397 39static symbol_list *grammar = NULL;
d0829076 40static bool start_flag = false;
676385e2 41merger_list *merge_functions;
1ff442ca 42
d0829076
PE
43/* Has %union been seen? */
44bool typed = false;
39a06c25
PE
45
46/* Should rules have a default precedence? */
47bool default_prec = true;
0d533154 48\f
e9955c83
AD
49/*-----------------------.
50| Set the start symbol. |
51`-----------------------*/
1ff442ca 52
e9955c83 53void
a737b216 54grammar_start_symbol_set (symbol *sym, location loc)
1ff442ca
NF
55{
56 if (start_flag)
17ee7397 57 complain_at (loc, _("multiple %s declarations"), "%start");
943819bf
RS
58 else
59 {
d0829076 60 start_flag = true;
a737b216 61 startsymbol = sym;
17ee7397 62 startsymbol_location = loc;
943819bf 63 }
1ff442ca
NF
64}
65
1ff442ca 66
d7020c20 67/*----------------------------------------------------------------.
e9955c83
AD
68| There are two prologues: one before %union, one after. Augment |
69| the current one. |
d7020c20 70`----------------------------------------------------------------*/
1ff442ca 71
e9955c83 72void
17ee7397 73prologue_augment (const char *prologue, location loc)
b6610515 74{
e9955c83
AD
75 struct obstack *oout =
76 !typed ? &pre_prologue_obstack : &post_prologue_obstack;
b6610515 77
05ac60f3 78 obstack_fgrow1 (oout, "]b4_syncline(%d, [[", loc.start.line);
17ee7397
PE
79 MUSCLE_OBSTACK_SGROW (oout,
80 quotearg_style (c_quoting_style, loc.start.file));
6c239755 81 obstack_sgrow (oout, "]])[\n");
e9955c83 82 obstack_sgrow (oout, prologue);
b6610515
RA
83}
84
a70083a3
AD
85\f
86
3e6656f9 87/*-------------------------------------------------------------------.
676385e2
PH
88| Return the merger index for a merging function named NAME, whose |
89| arguments have type TYPE. Records the function, if new, in |
95612cfa 90| MERGER_LIST. |
676385e2
PH
91`-------------------------------------------------------------------*/
92
93static int
17ee7397 94get_merge_function (uniqstr name, uniqstr type, location loc)
676385e2
PH
95{
96 merger_list *syms;
97 merger_list head;
98 int n;
99
100 if (! glr_parser)
101 return 0;
102
103 if (type == NULL)
17ee7397 104 type = uniqstr_new ("");
676385e2
PH
105
106 head.next = merge_functions;
39f41916 107 for (syms = &head, n = 1; syms->next != NULL; syms = syms->next, n += 1)
17ee7397 108 if (UNIQSTR_EQ (name, syms->next->name))
676385e2 109 break;
a5d50994
AD
110 if (syms->next == NULL)
111 {
da2a7671 112 syms->next = xmalloc (sizeof syms->next[0]);
17ee7397
PE
113 syms->next->name = uniqstr_new (name);
114 syms->next->type = uniqstr_new (type);
a5d50994
AD
115 syms->next->next = NULL;
116 merge_functions = head.next;
117 }
17ee7397 118 else if (!UNIQSTR_EQ (type, syms->next->type))
45a8a65d
PE
119 warn_at (loc, _("result type clash on merge function %s: <%s> != <%s>"),
120 name, type, syms->next->type);
676385e2
PH
121 return n;
122}
123
124/*--------------------------------------.
125| Free all merge-function definitions. |
126`--------------------------------------*/
127
128void
129free_merger_functions (void)
130{
131 merger_list *L0;
132 if (! glr_parser)
133 return;
134 L0 = merge_functions;
135 while (L0 != NULL)
136 {
137 merger_list *L1 = L0->next;
138 free (L0);
139 L0 = L1;
140 }
141}
142
a70083a3 143\f
107f7dfb 144/*-------------------------------------------------------------------.
17ee7397 145| Parse the input grammar into a one symbol_list structure. Each |
107f7dfb
AD
146| rule is represented by a sequence of symbols: the left hand side |
147| followed by the contents of the right hand side, followed by a |
148| null pointer instead of a symbol to terminate the rule. The next |
149| symbol is the lhs of the following rule. |
150| |
fdbcd8e2
AD
151| All actions are copied out, labelled by the rule number they apply |
152| to. |
107f7dfb
AD
153| |
154| Bison used to allow some %directives in the rules sections, but |
155| this is no longer consider appropriate: (i) the documented grammar |
156| doesn't claim it, (ii), it would promote bad style, (iii), error |
157| recovery for %directives consists in skipping the junk until a `%' |
158| is seen and helrp synchronizing. This scheme is definitely wrong |
159| in the rules section. |
160`-------------------------------------------------------------------*/
1ff442ca 161
f6d0f937 162/* The (currently) last symbol of GRAMMAR. */
04098407 163static symbol_list *grammar_end = NULL;
f6d0f937 164
52328c6e 165/* Append SYM to the grammar. */
e9955c83 166void
17ee7397 167grammar_symbol_append (symbol *sym, location loc)
f6d0f937 168{
17ee7397 169 symbol_list *p = symbol_list_new (sym, loc);
f6d0f937
AD
170
171 if (grammar_end)
172 grammar_end->next = p;
173 else
174 grammar = p;
175
176 grammar_end = p;
177}
178
8efe435c
AD
179/* The rule currently being defined, and the previous rule.
180 CURRENT_RULE points to the first LHS of the current rule, while
181 PREVIOUS_RULE_END points to the *end* of the previous rule (NULL). */
17ee7397 182symbol_list *current_rule = NULL;
04098407 183static symbol_list *previous_rule_end = NULL;
da4160c3
AD
184
185
8efe435c
AD
186/*----------------------------------------------.
187| Create a new rule for LHS in to the GRAMMAR. |
188`----------------------------------------------*/
da4160c3 189
e9955c83 190void
17ee7397 191grammar_rule_begin (symbol *lhs, location loc)
da4160c3
AD
192{
193 if (!start_flag)
194 {
195 startsymbol = lhs;
17ee7397 196 startsymbol_location = loc;
d0829076 197 start_flag = true;
da4160c3
AD
198 }
199
200 /* Start a new rule and record its lhs. */
201 ++nrules;
202 ++nritems;
203
8efe435c 204 previous_rule_end = grammar_end;
17ee7397 205 grammar_symbol_append (lhs, loc);
da4160c3
AD
206 current_rule = grammar_end;
207
208 /* Mark the rule's lhs as a nonterminal if not already so. */
209
210 if (lhs->class == unknown_sym)
211 {
212 lhs->class = nterm_sym;
213 lhs->number = nvars;
214 ++nvars;
215 }
216 else if (lhs->class == token_sym)
17ee7397 217 complain_at (loc, _("rule given for %s, which is a token"), lhs->tag);
da4160c3
AD
218}
219
e9955c83
AD
220/* Check that the last rule (CURRENT_RULE) is properly defined. For
221 instance, there should be no type clash on the default action. */
222
223static void
224grammar_current_rule_check (void)
225{
17ee7397 226 symbol *lhs = current_rule->sym;
3f4c0f80 227 char const *lhs_type = lhs->type_name;
17ee7397 228 symbol *first_rhs = current_rule->next->sym;
e9955c83
AD
229
230 /* If there is an action, then there is nothing we can do: the user
3f4c0f80 231 is allowed to shoot herself in the foot. */
e9955c83
AD
232 if (current_rule->action)
233 return;
234
3f4c0f80
PE
235 /* Don't worry about the default action if $$ is untyped, since $$'s
236 value can't be used. */
237 if (! lhs_type)
238 return;
239
240 /* If $$ is being set in default way, report if any type mismatch. */
e9955c83
AD
241 if (first_rhs)
242 {
e9955c83 243 const char *rhs_type = first_rhs->type_name ? first_rhs->type_name : "";
17ee7397 244 if (!UNIQSTR_EQ (lhs_type, rhs_type))
e9273511
PE
245 warn_at (current_rule->location,
246 _("type clash on default action: <%s> != <%s>"),
247 lhs_type, rhs_type);
e9955c83
AD
248 }
249 /* Warn if there is no default for $$ but we need one. */
250 else
e9273511
PE
251 warn_at (current_rule->location,
252 _("empty rule for typed nonterminal, and no action"));
e9955c83
AD
253}
254
255
8efe435c
AD
256/*-------------------------------------.
257| End the currently being grown rule. |
258`-------------------------------------*/
e9955c83
AD
259
260void
17ee7397 261grammar_rule_end (location loc)
e9955c83
AD
262{
263 /* Put an empty link in the list to mark the end of this rule */
8efe435c 264 grammar_symbol_append (NULL, grammar_end->location);
17ee7397 265 current_rule->location = loc;
e9955c83
AD
266 grammar_current_rule_check ();
267}
268
269
8efe435c
AD
270/*-------------------------------------------------------------------.
271| The previous action turns out the be a mid-rule action. Attach it |
272| to the current rule, i.e., create a dummy symbol, attach it this |
273| mid-rule action, and append this dummy nonterminal to the current |
274| rule. |
275`-------------------------------------------------------------------*/
1485e106 276
e9955c83 277void
1485e106
AD
278grammar_midrule_action (void)
279{
280 /* Since the action was written out with this rule's number, we must
281 give the new rule this number by inserting the new rule before
282 it. */
283
8efe435c
AD
284 /* Make a DUMMY nonterminal, whose location is that of the midrule
285 action. Create the MIDRULE. */
17ee7397
PE
286 location dummy_location = current_rule->action_location;
287 symbol *dummy = dummy_symbol_get (dummy_location);
288 symbol_list *midrule = symbol_list_new (dummy, dummy_location);
1485e106
AD
289
290 /* Make a new rule, whose body is empty, before the current one, so
291 that the action just read can belong to it. */
292 ++nrules;
293 ++nritems;
8efe435c
AD
294 /* Attach its location and actions to that of the DUMMY. */
295 midrule->location = dummy_location;
296 midrule->action = current_rule->action;
297 midrule->action_location = dummy_location;
1485e106
AD
298 current_rule->action = NULL;
299
8efe435c
AD
300 if (previous_rule_end)
301 previous_rule_end->next = midrule;
1485e106 302 else
8efe435c 303 grammar = midrule;
1485e106 304
8efe435c
AD
305 /* End the dummy's rule. */
306 previous_rule_end = symbol_list_new (NULL, dummy_location);
307 previous_rule_end->next = current_rule;
1485e106 308
8efe435c 309 midrule->next = previous_rule_end;
1485e106 310
8efe435c
AD
311 /* Insert the dummy nonterminal replacing the midrule action into
312 the current rule. */
313 grammar_current_rule_symbol_append (dummy, dummy_location);
1485e106
AD
314}
315
9af3fbce
AD
316/* Set the precedence symbol of the current rule to PRECSYM. */
317
e9955c83 318void
17ee7397 319grammar_current_rule_prec_set (symbol *precsym, location loc)
9af3fbce
AD
320{
321 if (current_rule->ruleprec)
17ee7397 322 complain_at (loc, _("only one %s allowed per rule"), "%prec");
9af3fbce
AD
323 current_rule->ruleprec = precsym;
324}
325
676385e2
PH
326/* Attach dynamic precedence DPREC to the current rule. */
327
328void
17ee7397 329grammar_current_rule_dprec_set (int dprec, location loc)
676385e2
PH
330{
331 if (! glr_parser)
17ee7397 332 warn_at (loc, _("%s affects only GLR parsers"), "%dprec");
676385e2 333 if (dprec <= 0)
17ee7397 334 complain_at (loc, _("%s must be followed by positive number"), "%dprec");
39f41916 335 else if (current_rule->dprec != 0)
17ee7397 336 complain_at (loc, _("only one %s allowed per rule"), "%dprec");
676385e2
PH
337 current_rule->dprec = dprec;
338}
339
340/* Attach a merge function NAME with argument type TYPE to current
341 rule. */
342
343void
17ee7397 344grammar_current_rule_merge_set (uniqstr name, location loc)
676385e2
PH
345{
346 if (! glr_parser)
17ee7397 347 warn_at (loc, _("%s affects only GLR parsers"), "%merge");
39f41916 348 if (current_rule->merger != 0)
17ee7397 349 complain_at (loc, _("only one %s allowed per rule"), "%merge");
39f41916 350 current_rule->merger =
17ee7397 351 get_merge_function (name, current_rule->sym->type_name, loc);
676385e2
PH
352}
353
17ee7397 354/* Attach SYM to the current rule. If needed, move the previous
2e047461
AD
355 action as a mid-rule action. */
356
e9955c83 357void
17ee7397 358grammar_current_rule_symbol_append (symbol *sym, location loc)
2e047461
AD
359{
360 if (current_rule->action)
361 grammar_midrule_action ();
362 ++nritems;
17ee7397 363 grammar_symbol_append (sym, loc);
2e047461
AD
364}
365
2e047461
AD
366/* Attach an ACTION to the current rule. If needed, move the previous
367 action as a mid-rule action. */
368
e9955c83 369void
17ee7397 370grammar_current_rule_action_append (const char *action, location loc)
2e047461
AD
371{
372 if (current_rule->action)
373 grammar_midrule_action ();
374 current_rule->action = action;
17ee7397 375 current_rule->action_location = loc;
2e047461
AD
376}
377
a70083a3 378\f
a70083a3
AD
379/*---------------------------------------------------------------.
380| Convert the rules into the representation using RRHS, RLHS and |
d9b739c3 381| RITEM. |
a70083a3 382`---------------------------------------------------------------*/
1ff442ca 383
4a120d45 384static void
118fb205 385packgram (void)
1ff442ca 386{
9222837b 387 unsigned int itemno = 0;
17ee7397
PE
388 rule_number ruleno = 0;
389 symbol_list *p = grammar;
1ff442ca 390
da2a7671
PE
391 ritem = xnmalloc (nritems, sizeof *ritem);
392 rules = xnmalloc (nrules, sizeof *rules);
1ff442ca 393
1ff442ca
NF
394 while (p)
395 {
17ee7397 396 symbol *ruleprec = p->ruleprec;
d7e1f00c 397 rules[ruleno].user_number = ruleno;
c3b407f4 398 rules[ruleno].number = ruleno;
bba97eb2 399 rules[ruleno].lhs = p->sym;
99013900 400 rules[ruleno].rhs = ritem + itemno;
da2a7671
PE
401 rules[ruleno].prec = NULL;
402 rules[ruleno].dprec = p->dprec;
403 rules[ruleno].merger = p->merger;
404 rules[ruleno].precsym = NULL;
8efe435c 405 rules[ruleno].location = p->location;
b4afb6bb 406 rules[ruleno].useful = true;
1a2b5d37 407 rules[ruleno].action = p->action;
8efe435c 408 rules[ruleno].action_location = p->action_location;
1ff442ca
NF
409
410 p = p->next;
411 while (p && p->sym)
412 {
17ee7397 413 /* item_number = symbol_number.
5fbb0954 414 But the former needs to contain more: negative rule numbers. */
a49aecd5 415 ritem[itemno++] = symbol_number_as_item_number (p->sym->number);
1ff442ca
NF
416 /* A rule gets by default the precedence and associativity
417 of the last token in it. */
39a06c25 418 if (p->sym->class == token_sym && default_prec)
03b31c0c 419 rules[ruleno].prec = p->sym;
a70083a3
AD
420 if (p)
421 p = p->next;
1ff442ca
NF
422 }
423
424 /* If this rule has a %prec,
a70083a3 425 the specified symbol's precedence replaces the default. */
1ff442ca
NF
426 if (ruleprec)
427 {
03b31c0c
AD
428 rules[ruleno].precsym = ruleprec;
429 rules[ruleno].prec = ruleprec;
1ff442ca 430 }
4b3d3a8e 431 ritem[itemno++] = rule_number_as_item_number (ruleno);
f3849179 432 ++ruleno;
1ff442ca 433
a70083a3
AD
434 if (p)
435 p = p->next;
1ff442ca
NF
436 }
437
35dcf428
PE
438 if (itemno != nritems)
439 abort ();
3067fbef 440
273a74fa 441 if (trace_flag & trace_sets)
3067fbef 442 ritem_print (stderr);
1ff442ca 443}
a70083a3 444\f
fdbcd8e2
AD
445/*------------------------------------------------------------------.
446| Read in the grammar specification and record it in the format |
447| described in gram.h. All actions are copied into ACTION_OBSTACK, |
448| in each case forming the body of a C function (YYACTION) which |
449| contains a switch statement to decide which action to execute. |
450`------------------------------------------------------------------*/
a70083a3
AD
451
452void
453reader (void)
454{
a70083a3 455 /* Initialize the symbol table. */
db8837cb 456 symbols_new ();
b6610515 457
88bce5a2
AD
458 /* Construct the accept symbol. */
459 accept = symbol_get ("$accept", empty_location);
460 accept->class = nterm_sym;
461 accept->number = nvars++;
30171f79 462
a70083a3 463 /* Construct the error token */
39f41916 464 errtoken = symbol_get ("error", empty_location);
d7020c20 465 errtoken->class = token_sym;
72a23c97 466 errtoken->number = ntokens++;
b6610515 467
a70083a3
AD
468 /* Construct a token that represents all undefined literal tokens.
469 It is always token number 2. */
88bce5a2 470 undeftoken = symbol_get ("$undefined", empty_location);
d7020c20 471 undeftoken->class = token_sym;
72a23c97 472 undeftoken->number = ntokens++;
a70083a3 473
331dbc1b 474 /* Initialize the obstacks. */
0dd1580a
RA
475 obstack_init (&pre_prologue_obstack);
476 obstack_init (&post_prologue_obstack);
331dbc1b 477
2b81e969 478 gram_in = xfopen (grammar_file, "r");
e9955c83 479
473d0a75
AD
480 gram__flex_debug = trace_flag & trace_scan;
481 gram_debug = trace_flag & trace_parse;
1d6412ad 482 scanner_initialize ();
78c3da9e 483 gram_parse ();
331dbc1b 484
b275314e
AD
485 /* If something went wrong during the parsing, don't try to
486 continue. */
b4afb6bb 487 if (complaint_issued)
f956c304 488 return;
b275314e 489
e9955c83
AD
490 /* Grammar has been read. Do some checking */
491 if (nrules == 0)
492 fatal (_("no rules in the input grammar"));
493
494 /* Report any undefined symbols and consider them nonterminals. */
495 symbols_check_defined ();
b7c49edf 496
88bce5a2
AD
497 /* If the user did not define her ENDTOKEN, do it now. */
498 if (!endtoken)
b7c49edf 499 {
88bce5a2
AD
500 endtoken = symbol_get ("$end", empty_location);
501 endtoken->class = token_sym;
502 endtoken->number = 0;
b7c49edf 503 /* Value specified by POSIX. */
88bce5a2 504 endtoken->user_token_number = 0;
b7c49edf
AD
505 }
506
e9955c83
AD
507 /* Insert the initial rule, which line is that of the first rule
508 (not that of the start symbol):
509
88bce5a2 510 accept: %start EOF. */
e9955c83 511 {
17ee7397 512 symbol_list *p = symbol_list_new (accept, empty_location);
8efe435c
AD
513 p->location = grammar->location;
514 p->next = symbol_list_new (startsymbol, empty_location);
88bce5a2 515 p->next->next = symbol_list_new (endtoken, empty_location);
8efe435c 516 p->next->next->next = symbol_list_new (NULL, empty_location);
e9955c83
AD
517 p->next->next->next->next = grammar;
518 nrules += 1;
519 nritems += 3;
520 grammar = p;
521 }
522
17ee7397 523 if (! (nsyms <= SYMBOL_NUMBER_MAXIMUM && nsyms == ntokens + nvars))
35dcf428 524 abort ();
b0c4483e 525
2b81e969 526 xfclose (gram_in);
331dbc1b 527
a70083a3
AD
528 /* Assign the symbols their symbol numbers. Write #defines for the
529 token symbols into FDEFINES if requested. */
2f1afb73 530 symbols_pack ();
93ede233 531
a70083a3
AD
532 /* Convert the grammar into the format described in gram.h. */
533 packgram ();
8419d367 534
17ee7397
PE
535 /* The grammar as a symbol_list is no longer needed. */
536 LIST_FREE (symbol_list, grammar);
a70083a3 537}