]> git.saurik.com Git - bison.git/blame - src/reader.c
* src/getargs.c (report_argmatch): Initialize strtok().
[bison.git] / src / reader.c
CommitLineData
1ff442ca 1/* Input parser for bison
76514394 2 Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000, 2001, 2002
a70083a3 3 Free Software Foundation, Inc.
1ff442ca 4
41aca2e0 5 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 6
41aca2e0
AD
7 Bison is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
1ff442ca 11
41aca2e0
AD
12 Bison is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
1ff442ca 16
41aca2e0
AD
17 You should have received a copy of the GNU General Public License
18 along with Bison; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
1ff442ca
NF
21
22
1ff442ca 23#include "system.h"
2a91a95e
AD
24#include "quotearg.h"
25#include "quote.h"
ceed8467 26#include "getargs.h"
1ff442ca 27#include "files.h"
1ff442ca 28#include "symtab.h"
56c47203 29#include "symlist.h"
82b6d266 30#include "options.h"
1ff442ca 31#include "gram.h"
a0f6b076 32#include "complain.h"
6c89f1c1 33#include "output.h"
b2ca4022 34#include "reader.h"
340ef489 35#include "conflicts.h"
11d82f03 36#include "muscle_tab.h"
1ff442ca 37
1ff442ca 38int lineno;
56c47203 39static symbol_list_t *grammar = NULL;
280a38c3 40static int start_flag = 0;
1ff442ca 41
d7020c20 42/* Nonzero if %union has been seen. */
e9955c83 43int typed = 0;
0d533154 44\f
e9955c83
AD
45/*-----------------------.
46| Set the start symbol. |
47`-----------------------*/
1ff442ca 48
e9955c83 49void
8efe435c 50grammar_start_symbol_set (symbol_t *s, location_t l)
1ff442ca
NF
51{
52 if (start_flag)
e776192e 53 complain_at (l, _("multiple %s declarations"), "%start");
943819bf
RS
54 else
55 {
56 start_flag = 1;
e9955c83 57 startsymbol = s;
8efe435c 58 startsymbol_location = l;
943819bf 59 }
1ff442ca
NF
60}
61
1ff442ca 62
d7020c20 63/*----------------------------------------------------------------.
e9955c83
AD
64| There are two prologues: one before %union, one after. Augment |
65| the current one. |
d7020c20 66`----------------------------------------------------------------*/
1ff442ca 67
e9955c83 68void
0c15323d 69prologue_augment (const char *prologue, location_t location)
b6610515 70{
e9955c83
AD
71 struct obstack *oout =
72 !typed ? &pre_prologue_obstack : &post_prologue_obstack;
b6610515 73
e9955c83 74 if (!no_lines_flag)
b6610515 75 {
e9955c83 76 obstack_fgrow2 (oout, muscle_find ("linef"),
0c15323d
AD
77 location.first_line,
78 quotearg_style (c_quoting_style,
79 muscle_find ("filename")));
b6610515 80 }
e9955c83 81 obstack_sgrow (oout, prologue);
b6610515
RA
82}
83
2ba3b73c 84
426cf563 85
a870c567 86
e9955c83
AD
87/*----------------------.
88| Handle the epilogue. |
89`----------------------*/
426cf563 90
e9955c83 91void
0c15323d 92epilogue_set (const char *epilogue, location_t location)
2ba3b73c 93{
e9955c83 94 if (!no_lines_flag)
1ff442ca 95 {
592e8d4d 96 obstack_fgrow2 (&muscle_obstack, muscle_find ("linef"),
0c15323d
AD
97 location.first_line,
98 quotearg_style (c_quoting_style,
99 muscle_find ("filename")));
1ff442ca 100 }
592e8d4d
AD
101 obstack_sgrow (&muscle_obstack, epilogue);
102 obstack_1grow (&muscle_obstack, 0);
103 muscle_insert ("epilogue", obstack_finish (&muscle_obstack));
1ff442ca 104}
1ff442ca 105
a70083a3 106
a70083a3
AD
107\f
108
a70083a3
AD
109/*-------------------------------------------------------------------.
110| Generate a dummy symbol, a nonterminal, whose name cannot conflict |
111| with the user's names. |
112`-------------------------------------------------------------------*/
1ff442ca 113
db8837cb 114static symbol_t *
ee000ba4 115gensym (location_t location)
1ff442ca 116{
274d42ce
AD
117 /* Incremented for each generated symbol */
118 static int gensym_count = 0;
119 static char buf[256];
120
db8837cb 121 symbol_t *sym;
1ff442ca 122
274d42ce 123 sprintf (buf, "@%d", ++gensym_count);
ee000ba4 124 sym = getsym (buf, location);
d7020c20 125 sym->class = nterm_sym;
d9b739c3 126 sym->number = nvars++;
36281465 127 return sym;
1ff442ca 128}
a70083a3 129\f
107f7dfb 130/*-------------------------------------------------------------------.
56c47203 131| Parse the input grammar into a one symbol_list_t structure. Each |
107f7dfb
AD
132| rule is represented by a sequence of symbols: the left hand side |
133| followed by the contents of the right hand side, followed by a |
134| null pointer instead of a symbol to terminate the rule. The next |
135| symbol is the lhs of the following rule. |
136| |
fdbcd8e2
AD
137| All actions are copied out, labelled by the rule number they apply |
138| to. |
107f7dfb
AD
139| |
140| Bison used to allow some %directives in the rules sections, but |
141| this is no longer consider appropriate: (i) the documented grammar |
142| doesn't claim it, (ii), it would promote bad style, (iii), error |
143| recovery for %directives consists in skipping the junk until a `%' |
144| is seen and helrp synchronizing. This scheme is definitely wrong |
145| in the rules section. |
146`-------------------------------------------------------------------*/
1ff442ca 147
f6d0f937 148/* The (currently) last symbol of GRAMMAR. */
56c47203 149symbol_list_t *grammar_end = NULL;
f6d0f937
AD
150
151/* Append S to the GRAMMAR. */
e9955c83 152void
8efe435c 153grammar_symbol_append (symbol_t *symbol, location_t location)
f6d0f937 154{
56c47203 155 symbol_list_t *p = symbol_list_new (symbol, location);
f6d0f937
AD
156
157 if (grammar_end)
158 grammar_end->next = p;
159 else
160 grammar = p;
161
162 grammar_end = p;
163}
164
8efe435c
AD
165/* The rule currently being defined, and the previous rule.
166 CURRENT_RULE points to the first LHS of the current rule, while
167 PREVIOUS_RULE_END points to the *end* of the previous rule (NULL). */
56c47203
AD
168symbol_list_t *current_rule = NULL;
169symbol_list_t *previous_rule_end = NULL;
da4160c3
AD
170
171
8efe435c
AD
172/*----------------------------------------------.
173| Create a new rule for LHS in to the GRAMMAR. |
174`----------------------------------------------*/
da4160c3 175
e9955c83 176void
8efe435c 177grammar_rule_begin (symbol_t *lhs, location_t location)
da4160c3
AD
178{
179 if (!start_flag)
180 {
181 startsymbol = lhs;
8efe435c 182 startsymbol_location = location;
da4160c3
AD
183 start_flag = 1;
184 }
185
186 /* Start a new rule and record its lhs. */
187 ++nrules;
188 ++nritems;
189
8efe435c
AD
190 previous_rule_end = grammar_end;
191 grammar_symbol_append (lhs, location);
da4160c3
AD
192 current_rule = grammar_end;
193
194 /* Mark the rule's lhs as a nonterminal if not already so. */
195
196 if (lhs->class == unknown_sym)
197 {
198 lhs->class = nterm_sym;
199 lhs->number = nvars;
200 ++nvars;
201 }
202 else if (lhs->class == token_sym)
e776192e 203 complain_at (location, _("rule given for %s, which is a token"), lhs->tag);
da4160c3
AD
204}
205
e9955c83
AD
206/* Check that the last rule (CURRENT_RULE) is properly defined. For
207 instance, there should be no type clash on the default action. */
208
209static void
210grammar_current_rule_check (void)
211{
212 symbol_t *lhs = current_rule->sym;
213 symbol_t *first_rhs = current_rule->next->sym;
214
215 /* If there is an action, then there is nothing we can do: the user
216 is allowed to shoot in her foot. */
217 if (current_rule->action)
218 return;
219
220 /* If $$ is being set in default way, report if any type mismatch.
221 */
222 if (first_rhs)
223 {
224 const char *lhs_type = lhs->type_name ? lhs->type_name : "";
225 const char *rhs_type = first_rhs->type_name ? first_rhs->type_name : "";
226 if (strcmp (lhs_type, rhs_type))
e776192e
AD
227 complain_at (current_rule->location,
228 _("type clash (`%s' `%s') on default action"),
229 lhs_type, rhs_type);
e9955c83
AD
230 }
231 /* Warn if there is no default for $$ but we need one. */
232 else
233 {
234 if (lhs->type_name)
e776192e
AD
235 complain_at (current_rule->location,
236 _("empty rule for typed nonterminal, and no action"));
e9955c83
AD
237 }
238}
239
240
8efe435c
AD
241/*-------------------------------------.
242| End the currently being grown rule. |
243`-------------------------------------*/
e9955c83
AD
244
245void
8efe435c 246grammar_rule_end (location_t location)
e9955c83
AD
247{
248 /* Put an empty link in the list to mark the end of this rule */
8efe435c
AD
249 grammar_symbol_append (NULL, grammar_end->location);
250 current_rule->location = location;
e9955c83
AD
251 grammar_current_rule_check ();
252}
253
254
8efe435c
AD
255/*-------------------------------------------------------------------.
256| The previous action turns out the be a mid-rule action. Attach it |
257| to the current rule, i.e., create a dummy symbol, attach it this |
258| mid-rule action, and append this dummy nonterminal to the current |
259| rule. |
260`-------------------------------------------------------------------*/
1485e106 261
e9955c83 262void
1485e106
AD
263grammar_midrule_action (void)
264{
265 /* Since the action was written out with this rule's number, we must
266 give the new rule this number by inserting the new rule before
267 it. */
268
8efe435c
AD
269 /* Make a DUMMY nonterminal, whose location is that of the midrule
270 action. Create the MIDRULE. */
8efe435c 271 location_t dummy_location = current_rule->action_location;
ee000ba4 272 symbol_t *dummy = gensym (dummy_location);
56c47203 273 symbol_list_t *midrule = symbol_list_new (dummy, dummy_location);
1485e106
AD
274
275 /* Make a new rule, whose body is empty, before the current one, so
276 that the action just read can belong to it. */
277 ++nrules;
278 ++nritems;
8efe435c
AD
279 /* Attach its location and actions to that of the DUMMY. */
280 midrule->location = dummy_location;
281 midrule->action = current_rule->action;
282 midrule->action_location = dummy_location;
1485e106
AD
283 current_rule->action = NULL;
284
8efe435c
AD
285 if (previous_rule_end)
286 previous_rule_end->next = midrule;
1485e106 287 else
8efe435c 288 grammar = midrule;
1485e106 289
8efe435c
AD
290 /* End the dummy's rule. */
291 previous_rule_end = symbol_list_new (NULL, dummy_location);
292 previous_rule_end->next = current_rule;
1485e106 293
8efe435c 294 midrule->next = previous_rule_end;
1485e106 295
8efe435c
AD
296 /* Insert the dummy nonterminal replacing the midrule action into
297 the current rule. */
298 grammar_current_rule_symbol_append (dummy, dummy_location);
1485e106
AD
299}
300
9af3fbce
AD
301/* Set the precedence symbol of the current rule to PRECSYM. */
302
e9955c83 303void
e776192e 304grammar_current_rule_prec_set (symbol_t *precsym, location_t location)
9af3fbce
AD
305{
306 if (current_rule->ruleprec)
e776192e 307 complain_at (location, _("two @prec's in a row"));
9af3fbce
AD
308 current_rule->ruleprec = precsym;
309}
310
2e047461
AD
311/* Attach a SYMBOL to the current rule. If needed, move the previous
312 action as a mid-rule action. */
313
e9955c83 314void
8efe435c 315grammar_current_rule_symbol_append (symbol_t *symbol, location_t location)
2e047461
AD
316{
317 if (current_rule->action)
318 grammar_midrule_action ();
319 ++nritems;
8efe435c 320 grammar_symbol_append (symbol, location);
2e047461
AD
321}
322
323
324/* Attach an ACTION to the current rule. If needed, move the previous
325 action as a mid-rule action. */
326
e9955c83 327void
8efe435c 328grammar_current_rule_action_append (const char *action, location_t location)
2e047461
AD
329{
330 if (current_rule->action)
331 grammar_midrule_action ();
332 current_rule->action = action;
8efe435c 333 current_rule->action_location = location;
2e047461
AD
334}
335
a70083a3 336\f
a70083a3
AD
337/*---------------------------------------------------------------.
338| Convert the rules into the representation using RRHS, RLHS and |
d9b739c3 339| RITEM. |
a70083a3 340`---------------------------------------------------------------*/
1ff442ca 341
4a120d45 342static void
118fb205 343packgram (void)
1ff442ca 344{
0c2d3f4c 345 unsigned int itemno;
a70083a3 346 int ruleno;
56c47203 347 symbol_list_t *p;
1ff442ca 348
a900a624 349 ritem = XCALLOC (item_number_t, nritems);
1a2b5d37 350 rules = XCALLOC (rule_t, nrules) - 1;
1ff442ca
NF
351
352 itemno = 0;
353 ruleno = 1;
354
355 p = grammar;
356 while (p)
357 {
db8837cb 358 symbol_t *ruleprec = p->ruleprec;
d7e1f00c 359 rules[ruleno].user_number = ruleno;
c3b407f4 360 rules[ruleno].number = ruleno;
bba97eb2 361 rules[ruleno].lhs = p->sym;
99013900 362 rules[ruleno].rhs = ritem + itemno;
8efe435c 363 rules[ruleno].location = p->location;
1a2b5d37
AD
364 rules[ruleno].useful = TRUE;
365 rules[ruleno].action = p->action;
8efe435c 366 rules[ruleno].action_location = p->action_location;
1ff442ca
NF
367
368 p = p->next;
369 while (p && p->sym)
370 {
a49aecd5 371 /* item_number_t = symbol_number_t.
5fbb0954 372 But the former needs to contain more: negative rule numbers. */
a49aecd5 373 ritem[itemno++] = symbol_number_as_item_number (p->sym->number);
1ff442ca
NF
374 /* A rule gets by default the precedence and associativity
375 of the last token in it. */
d7020c20 376 if (p->sym->class == token_sym)
03b31c0c 377 rules[ruleno].prec = p->sym;
a70083a3
AD
378 if (p)
379 p = p->next;
1ff442ca
NF
380 }
381
382 /* If this rule has a %prec,
a70083a3 383 the specified symbol's precedence replaces the default. */
1ff442ca
NF
384 if (ruleprec)
385 {
03b31c0c
AD
386 rules[ruleno].precsym = ruleprec;
387 rules[ruleno].prec = ruleprec;
1ff442ca 388 }
1ff442ca 389 ritem[itemno++] = -ruleno;
f3849179 390 ++ruleno;
1ff442ca 391
a70083a3
AD
392 if (p)
393 p = p->next;
1ff442ca
NF
394 }
395
5123689b 396 assert (itemno == nritems);
3067fbef
AD
397
398 if (trace_flag)
399 ritem_print (stderr);
1ff442ca 400}
a70083a3 401\f
fdbcd8e2
AD
402/*------------------------------------------------------------------.
403| Read in the grammar specification and record it in the format |
404| described in gram.h. All actions are copied into ACTION_OBSTACK, |
405| in each case forming the body of a C function (YYACTION) which |
406| contains a switch statement to decide which action to execute. |
407`------------------------------------------------------------------*/
a70083a3
AD
408
409void
410reader (void)
411{
e9955c83 412 gram_control_t gram_control;
a70083a3
AD
413 lineno = 1;
414
415 /* Initialize the symbol table. */
db8837cb 416 symbols_new ();
b6610515 417
30171f79 418 /* Construct the axiom symbol. */
ee000ba4 419 axiom = getsym ("$axiom", empty_location);
30171f79 420 axiom->class = nterm_sym;
d9b739c3 421 axiom->number = nvars++;
30171f79 422
a70083a3 423 /* Construct the error token */
ee000ba4 424 errtoken = getsym ("error", empty_location);
d7020c20 425 errtoken->class = token_sym;
72a23c97 426 errtoken->number = ntokens++;
b6610515 427
a70083a3
AD
428 /* Construct a token that represents all undefined literal tokens.
429 It is always token number 2. */
ee000ba4 430 undeftoken = getsym ("$undefined.", empty_location);
d7020c20 431 undeftoken->class = token_sym;
72a23c97 432 undeftoken->number = ntokens++;
a70083a3 433
331dbc1b 434 /* Initialize the obstacks. */
0dd1580a
RA
435 obstack_init (&pre_prologue_obstack);
436 obstack_init (&post_prologue_obstack);
331dbc1b
AD
437
438 finput = xfopen (infile, "r");
e9955c83
AD
439 gram_in = finput;
440
441 gram_debug = !!getenv ("parse");
442 gram__flex_debug = !!getenv ("scan");
1d6412ad 443 scanner_initialize ();
e9955c83 444 gram_parse (&gram_control);
331dbc1b 445
e9955c83
AD
446 /* Grammar has been read. Do some checking */
447 if (nrules == 0)
448 fatal (_("no rules in the input grammar"));
449
450 /* Report any undefined symbols and consider them nonterminals. */
451 symbols_check_defined ();
b7c49edf
AD
452
453 /* If the user did not define her EOFTOKEN, do it now. */
454 if (!eoftoken)
455 {
ee000ba4 456 eoftoken = getsym ("$", empty_location);
b7c49edf 457 eoftoken->class = token_sym;
72a23c97 458 eoftoken->number = 0;
b7c49edf
AD
459 /* Value specified by POSIX. */
460 eoftoken->user_token_number = 0;
461 }
462
e9955c83
AD
463 /* Insert the initial rule, which line is that of the first rule
464 (not that of the start symbol):
465
466 axiom: %start EOF. */
467 {
56c47203 468 symbol_list_t *p = symbol_list_new (axiom, empty_location);
8efe435c
AD
469 p->location = grammar->location;
470 p->next = symbol_list_new (startsymbol, empty_location);
471 p->next->next = symbol_list_new (eoftoken, empty_location);
472 p->next->next->next = symbol_list_new (NULL, empty_location);
e9955c83
AD
473 p->next->next->next->next = grammar;
474 nrules += 1;
475 nritems += 3;
476 grammar = p;
477 }
478
479 if (nsyms > SHRT_MAX)
480 fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
481 SHRT_MAX);
482
483 assert (nsyms == ntokens + nvars);
b0c4483e 484
331dbc1b
AD
485 xfclose (finput);
486
a70083a3
AD
487 /* Assign the symbols their symbol numbers. Write #defines for the
488 token symbols into FDEFINES if requested. */
2f1afb73 489 symbols_pack ();
93ede233 490
a70083a3
AD
491 /* Convert the grammar into the format described in gram.h. */
492 packgram ();
8419d367 493
56c47203
AD
494 /* The grammar as a symbol_list_t is no longer needed. */
495 LIST_FREE (symbol_list_t, grammar);
a70083a3 496}