]> git.saurik.com Git - bison.git/blame - src/reader.c
maint: Valgrind on OS X.
[bison.git] / src / reader.c
CommitLineData
35dcf428 1/* Input parser for Bison
9c4637fa 2
ea0a7676 3 Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000-2003, 2005-2007,
c932d613 4 2009-2012 Free Software Foundation, Inc.
1ff442ca 5
41aca2e0 6 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 7
f16b0819 8 This program is free software: you can redistribute it and/or modify
41aca2e0 9 it under the terms of the GNU General Public License as published by
f16b0819
PE
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
1ff442ca 12
f16b0819 13 This program is distributed in the hope that it will be useful,
41aca2e0
AD
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
1ff442ca 17
41aca2e0 18 You should have received a copy of the GNU General Public License
f16b0819 19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
1ff442ca 20
2cec9080 21#include <config.h>
1ff442ca 22#include "system.h"
17ee7397 23
7685e2f7 24#include <quote.h>
17ee7397
PE
25
26#include "complain.h"
27#include "conflicts.h"
1ff442ca 28#include "files.h"
17ee7397 29#include "getargs.h"
1ff442ca 30#include "gram.h"
23ec25b7 31#include "muscle-tab.h"
b2ca4022 32#include "reader.h"
17ee7397
PE
33#include "symlist.h"
34#include "symtab.h"
e9071366
AD
35#include "scan-gram.h"
36#include "scan-code.h"
1ff442ca 37
07c0db18 38static void prepare_percent_define_front_end_variables (void);
02d12d0d
PE
39static void check_and_convert_grammar (void);
40
17ee7397 41static symbol_list *grammar = NULL;
d0829076 42static bool start_flag = false;
676385e2 43merger_list *merge_functions;
1ff442ca 44
34f98f46 45/* Was %union seen? */
ddc8ede1
PE
46bool union_seen = false;
47
48/* Was a tag seen? */
49bool tag_seen = false;
39a06c25
PE
50
51/* Should rules have a default precedence? */
52bool default_prec = true;
0d533154 53\f
e9955c83
AD
54/*-----------------------.
55| Set the start symbol. |
56`-----------------------*/
1ff442ca 57
e9955c83 58void
a737b216 59grammar_start_symbol_set (symbol *sym, location loc)
1ff442ca
NF
60{
61 if (start_flag)
17ee7397 62 complain_at (loc, _("multiple %s declarations"), "%start");
943819bf
RS
63 else
64 {
d0829076 65 start_flag = true;
a737b216 66 startsymbol = sym;
17ee7397 67 startsymbol_location = loc;
943819bf 68 }
1ff442ca
NF
69}
70
a70083a3
AD
71\f
72
8ee5b538
JD
73/*------------------------------------------------------------------------.
74| Return the merger index for a merging function named NAME. Records the |
75| function, if new, in MERGER_LIST. |
76`------------------------------------------------------------------------*/
676385e2
PH
77
78static int
8ee5b538 79get_merge_function (uniqstr name)
676385e2
PH
80{
81 merger_list *syms;
82 merger_list head;
83 int n;
84
85 if (! glr_parser)
86 return 0;
87
676385e2 88 head.next = merge_functions;
affac613 89 for (syms = &head, n = 1; syms->next; syms = syms->next, n += 1)
17ee7397 90 if (UNIQSTR_EQ (name, syms->next->name))
676385e2 91 break;
a5d50994
AD
92 if (syms->next == NULL)
93 {
da2a7671 94 syms->next = xmalloc (sizeof syms->next[0]);
17ee7397 95 syms->next->name = uniqstr_new (name);
8ee5b538
JD
96 /* After all symbol type declarations have been parsed, packgram invokes
97 record_merge_function_type to set the type. */
98 syms->next->type = NULL;
a5d50994
AD
99 syms->next->next = NULL;
100 merge_functions = head.next;
101 }
676385e2
PH
102 return n;
103}
104
8ee5b538
JD
105/*-------------------------------------------------------------------------.
106| For the existing merging function with index MERGER, record the result |
107| type as TYPE as required by the lhs of the rule whose %merge declaration |
108| is at DECLARATION_LOC. |
109`-------------------------------------------------------------------------*/
110
111static void
112record_merge_function_type (int merger, uniqstr type, location declaration_loc)
113{
114 int merger_find;
115 merger_list *merge_function;
116
117 if (merger <= 0)
118 return;
119
120 if (type == NULL)
121 type = uniqstr_new ("");
122
123 merger_find = 1;
124 for (merge_function = merge_functions;
125 merge_function != NULL && merger_find != merger;
126 merge_function = merge_function->next)
127 merger_find += 1;
4f82b42a 128 aver (merge_function != NULL && merger_find == merger);
dd60572a 129 if (merge_function->type != NULL && !UNIQSTR_EQ (merge_function->type, type))
8ee5b538 130 {
3b452f4e 131 complain_at (declaration_loc,
4c787a31
AD
132 _("result type clash on merge function %s: <%s> != <%s>"),
133 quote (merge_function->name), type, merge_function->type);
3b452f4e
JD
134 complain_at (merge_function->type_declaration_location,
135 _("previous declaration"));
8ee5b538 136 }
dd60572a
JD
137 merge_function->type = uniqstr_new (type);
138 merge_function->type_declaration_location = declaration_loc;
8ee5b538
JD
139}
140
676385e2
PH
141/*--------------------------------------.
142| Free all merge-function definitions. |
143`--------------------------------------*/
144
145void
146free_merger_functions (void)
147{
affac613
AD
148 merger_list *L0 = merge_functions;
149 while (L0)
676385e2
PH
150 {
151 merger_list *L1 = L0->next;
152 free (L0);
153 L0 = L1;
154 }
155}
156
a70083a3 157\f
107f7dfb 158/*-------------------------------------------------------------------.
17ee7397 159| Parse the input grammar into a one symbol_list structure. Each |
107f7dfb
AD
160| rule is represented by a sequence of symbols: the left hand side |
161| followed by the contents of the right hand side, followed by a |
162| null pointer instead of a symbol to terminate the rule. The next |
163| symbol is the lhs of the following rule. |
164| |
fdbcd8e2
AD
165| All actions are copied out, labelled by the rule number they apply |
166| to. |
107f7dfb 167`-------------------------------------------------------------------*/
1ff442ca 168
f6d0f937 169/* The (currently) last symbol of GRAMMAR. */
04098407 170static symbol_list *grammar_end = NULL;
f6d0f937 171
52328c6e 172/* Append SYM to the grammar. */
7685e2f7 173static symbol_list *
17ee7397 174grammar_symbol_append (symbol *sym, location loc)
f6d0f937 175{
3be03b13 176 symbol_list *p = symbol_list_sym_new (sym, loc);
f6d0f937
AD
177
178 if (grammar_end)
179 grammar_end->next = p;
180 else
181 grammar = p;
182
183 grammar_end = p;
8f3596a6 184
e3233bf6 185 /* A null SYM stands for an end of rule; it is not an actual
8f3596a6
AD
186 part of it. */
187 if (sym)
188 ++nritems;
7685e2f7
AR
189
190 return p;
f6d0f937
AD
191}
192
d5e8574b 193static void
992e874a 194assign_named_ref (symbol_list *p, named_ref *name)
d5e8574b
AR
195{
196 symbol *sym = p->content.sym;
197
992e874a 198 if (name->id == sym->tag)
d5e8574b 199 {
992e874a 200 warn_at (name->loc,
d5e8574b
AR
201 _("duplicated symbol name for %s ignored"),
202 quote (sym->tag));
992e874a 203 named_ref_free (name);
d5e8574b
AR
204 }
205 else
992e874a 206 p->named_ref = name;
d5e8574b
AR
207}
208
209
8efe435c
AD
210/* The rule currently being defined, and the previous rule.
211 CURRENT_RULE points to the first LHS of the current rule, while
212 PREVIOUS_RULE_END points to the *end* of the previous rule (NULL). */
e256e17f 213static symbol_list *current_rule = NULL;
04098407 214static symbol_list *previous_rule_end = NULL;
da4160c3
AD
215
216
8efe435c
AD
217/*----------------------------------------------.
218| Create a new rule for LHS in to the GRAMMAR. |
219`----------------------------------------------*/
da4160c3 220
e9955c83 221void
7685e2f7 222grammar_current_rule_begin (symbol *lhs, location loc,
992e874a 223 named_ref *lhs_name)
da4160c3 224{
7685e2f7
AR
225 symbol_list* p;
226
da4160c3
AD
227 /* Start a new rule and record its lhs. */
228 ++nrules;
8efe435c 229 previous_rule_end = grammar_end;
7685e2f7
AR
230
231 p = grammar_symbol_append (lhs, loc);
992e874a 232 if (lhs_name)
8f462efe 233 assign_named_ref (p, named_ref_copy (lhs_name));
7685e2f7 234
da4160c3
AD
235 current_rule = grammar_end;
236
237 /* Mark the rule's lhs as a nonterminal if not already so. */
da4160c3
AD
238 if (lhs->class == unknown_sym)
239 {
240 lhs->class = nterm_sym;
241 lhs->number = nvars;
242 ++nvars;
243 }
244 else if (lhs->class == token_sym)
17ee7397 245 complain_at (loc, _("rule given for %s, which is a token"), lhs->tag);
da4160c3
AD
246}
247
affac613 248
d40ba6c2 249/*----------------------------------------------------------------------.
17bd8a73
JD
250| A symbol should be used if either: |
251| 1. It has a destructor. |
8ffd7912
JD
252| 2. The symbol is a mid-rule symbol (i.e., the generated LHS |
253| replacing a mid-rule action) that was assigned to or used, as in |
254| "exp: { $$ = 1; } { $$ = $1; }". |
d40ba6c2 255`----------------------------------------------------------------------*/
84866159
AD
256
257static bool
8ffd7912 258symbol_should_be_used (symbol_list const *s, bool *midrule_warning)
84866159 259{
95021767 260 if (symbol_destructor_get (s->content.sym)->code)
17bd8a73 261 return true;
8ffd7912
JD
262 if ((s->midrule && s->midrule->action_props.is_value_used)
263 || (s->midrule_parent_rule
264 && symbol_list_n_get (s->midrule_parent_rule,
265 s->midrule_parent_rhs_index)
266 ->action_props.is_value_used))
267 {
268 *midrule_warning = true;
269 return true;
270 }
17bd8a73 271 return false;
84866159
AD
272}
273
8f3596a6
AD
274/*----------------------------------------------------------------.
275| Check that the rule R is properly defined. For instance, there |
276| should be no type clash on the default action. |
277`----------------------------------------------------------------*/
e9955c83
AD
278
279static void
8f3596a6 280grammar_rule_check (const symbol_list *r)
e9955c83 281{
affac613 282 /* Type check.
e9955c83 283
affac613
AD
284 If there is an action, then there is nothing we can do: the user
285 is allowed to shoot herself in the foot.
3f4c0f80 286
affac613
AD
287 Don't worry about the default action if $$ is untyped, since $$'s
288 value can't be used. */
f6857bbf 289 if (!r->action_props.code && r->content.sym->type_name)
e9955c83 290 {
3be03b13 291 symbol *first_rhs = r->next->content.sym;
affac613
AD
292 /* If $$ is being set in default way, report if any type mismatch. */
293 if (first_rhs)
294 {
3be03b13 295 char const *lhs_type = r->content.sym->type_name;
affac613
AD
296 const char *rhs_type =
297 first_rhs->type_name ? first_rhs->type_name : "";
298 if (!UNIQSTR_EQ (lhs_type, rhs_type))
8f3596a6 299 warn_at (r->location,
affac613
AD
300 _("type clash on default action: <%s> != <%s>"),
301 lhs_type, rhs_type);
302 }
303 /* Warn if there is no default for $$ but we need one. */
304 else
8f3596a6 305 warn_at (r->location,
affac613
AD
306 _("empty rule for typed nonterminal, and no action"));
307 }
e3233bf6 308
d40ba6c2 309 /* Check that symbol values that should be used are in fact used. */
8f3596a6 310 {
668c5d19 311 symbol_list const *l = r;
8f3596a6 312 int n = 0;
3be03b13 313 for (; l && l->content.sym; l = l->next, ++n)
8ffd7912
JD
314 {
315 bool midrule_warning = false;
316 if (!l->action_props.is_value_used
317 && symbol_should_be_used (l, &midrule_warning)
318 /* The default action, $$ = $1, `uses' both. */
319 && (r->action_props.code || (n != 0 && n != 1)))
320 {
321 void (*warn_at_ptr)(location, char const*, ...) =
322 midrule_warning ? midrule_value_at : warn_at;
323 if (n)
324 warn_at_ptr (r->location, _("unused value: $%d"), n);
325 else
326 warn_at_ptr (r->location, _("unset value: $$"));
327 }
328 }
8f3596a6 329 }
2c203528
JD
330
331 /* See comments in grammar_current_rule_prec_set for how POSIX
332 mandates this complaint. It's only for identifiers, so skip
333 it for char literals and strings, which are always tokens. */
334 if (r->ruleprec
335 && r->ruleprec->tag[0] != '\'' && r->ruleprec->tag[0] != '"'
336 && !r->ruleprec->declared && !r->ruleprec->prec)
e02df72c
JD
337 warn_at (r->location, _("token for %%prec is not defined: %s"),
338 r->ruleprec->tag);
e9955c83
AD
339}
340
341
8efe435c
AD
342/*-------------------------------------.
343| End the currently being grown rule. |
344`-------------------------------------*/
e9955c83
AD
345
346void
8f3596a6 347grammar_current_rule_end (location loc)
e9955c83
AD
348{
349 /* Put an empty link in the list to mark the end of this rule */
8efe435c 350 grammar_symbol_append (NULL, grammar_end->location);
17ee7397 351 current_rule->location = loc;
e9955c83
AD
352}
353
354
8efe435c
AD
355/*-------------------------------------------------------------------.
356| The previous action turns out the be a mid-rule action. Attach it |
357| to the current rule, i.e., create a dummy symbol, attach it this |
358| mid-rule action, and append this dummy nonterminal to the current |
359| rule. |
360`-------------------------------------------------------------------*/
1485e106 361
6b702268 362void
1485e106
AD
363grammar_midrule_action (void)
364{
365 /* Since the action was written out with this rule's number, we must
366 give the new rule this number by inserting the new rule before
367 it. */
368
8efe435c
AD
369 /* Make a DUMMY nonterminal, whose location is that of the midrule
370 action. Create the MIDRULE. */
f6857bbf 371 location dummy_location = current_rule->action_props.location;
17ee7397 372 symbol *dummy = dummy_symbol_get (dummy_location);
3be03b13 373 symbol_list *midrule = symbol_list_sym_new (dummy, dummy_location);
1485e106 374
d5e8574b 375 /* Remember named_ref of previous action. */
992e874a 376 named_ref *action_name = current_rule->action_props.named_ref;
7685e2f7 377
1485e106
AD
378 /* Make a new rule, whose body is empty, before the current one, so
379 that the action just read can belong to it. */
380 ++nrules;
381 ++nritems;
8efe435c
AD
382 /* Attach its location and actions to that of the DUMMY. */
383 midrule->location = dummy_location;
f6857bbf
JD
384 code_props_rule_action_init (&midrule->action_props,
385 current_rule->action_props.code,
386 current_rule->action_props.location,
7685e2f7 387 midrule, 0);
f6857bbf 388 code_props_none_init (&current_rule->action_props);
1485e106 389
8efe435c
AD
390 if (previous_rule_end)
391 previous_rule_end->next = midrule;
1485e106 392 else
8efe435c 393 grammar = midrule;
1485e106 394
8efe435c 395 /* End the dummy's rule. */
3be03b13 396 midrule->next = symbol_list_sym_new (NULL, dummy_location);
84866159 397 midrule->next->next = current_rule;
1485e106 398
84866159 399 previous_rule_end = midrule->next;
1485e106 400
8efe435c 401 /* Insert the dummy nonterminal replacing the midrule action into
84866159 402 the current rule. Bind it to its dedicated rule. */
992e874a
AR
403 grammar_current_rule_symbol_append (dummy, dummy_location,
404 action_name);
6ec2c0f2 405 grammar_end->midrule = midrule;
ffa4ba3a
JD
406 midrule->midrule_parent_rule = current_rule;
407 midrule->midrule_parent_rhs_index = symbol_list_length (current_rule->next);
1485e106
AD
408}
409
9af3fbce
AD
410/* Set the precedence symbol of the current rule to PRECSYM. */
411
e9955c83 412void
17ee7397 413grammar_current_rule_prec_set (symbol *precsym, location loc)
9af3fbce 414{
2c203528
JD
415 /* POSIX says that any identifier is a nonterminal if it does not
416 appear on the LHS of a grammar rule and is not defined by %token
417 or by one of the directives that assigns precedence to a token. We
418 ignore this here because the only kind of identifier that POSIX
419 allows to follow a %prec is a token and because assuming it's a
420 token now can produce more logical error messages. Nevertheless,
421 grammar_rule_check does obey what we believe is the real intent of
422 POSIX here: that an error be reported for any identifier that
423 appears after %prec but that is not defined separately as a
424 token. */
26b8a438 425 symbol_class_set (precsym, token_sym, loc, false);
9af3fbce 426 if (current_rule->ruleprec)
17ee7397 427 complain_at (loc, _("only one %s allowed per rule"), "%prec");
9af3fbce
AD
428 current_rule->ruleprec = precsym;
429}
430
676385e2
PH
431/* Attach dynamic precedence DPREC to the current rule. */
432
433void
17ee7397 434grammar_current_rule_dprec_set (int dprec, location loc)
676385e2
PH
435{
436 if (! glr_parser)
17ee7397 437 warn_at (loc, _("%s affects only GLR parsers"), "%dprec");
676385e2 438 if (dprec <= 0)
17ee7397 439 complain_at (loc, _("%s must be followed by positive number"), "%dprec");
39f41916 440 else if (current_rule->dprec != 0)
17ee7397 441 complain_at (loc, _("only one %s allowed per rule"), "%dprec");
676385e2
PH
442 current_rule->dprec = dprec;
443}
444
445/* Attach a merge function NAME with argument type TYPE to current
446 rule. */
447
448void
17ee7397 449grammar_current_rule_merge_set (uniqstr name, location loc)
676385e2
PH
450{
451 if (! glr_parser)
17ee7397 452 warn_at (loc, _("%s affects only GLR parsers"), "%merge");
39f41916 453 if (current_rule->merger != 0)
17ee7397 454 complain_at (loc, _("only one %s allowed per rule"), "%merge");
8ee5b538
JD
455 current_rule->merger = get_merge_function (name);
456 current_rule->merger_declaration_location = loc;
676385e2
PH
457}
458
17ee7397 459/* Attach SYM to the current rule. If needed, move the previous
2e047461
AD
460 action as a mid-rule action. */
461
e9955c83 462void
7685e2f7 463grammar_current_rule_symbol_append (symbol *sym, location loc,
992e874a 464 named_ref *name)
2e047461 465{
7685e2f7 466 symbol_list *p;
f6857bbf 467 if (current_rule->action_props.code)
2e047461 468 grammar_midrule_action ();
7685e2f7 469 p = grammar_symbol_append (sym, loc);
992e874a
AR
470 if (name)
471 assign_named_ref(p, name);
2e047461
AD
472}
473
6b702268 474/* Attach an ACTION to the current rule. */
2e047461 475
e9955c83 476void
7685e2f7 477grammar_current_rule_action_append (const char *action, location loc,
992e874a 478 named_ref *name)
2e047461 479{
f6857bbf 480 if (current_rule->action_props.code)
381ecb06 481 grammar_midrule_action ();
ffa4ba3a 482 /* After all symbol declarations have been parsed, packgram invokes
f6857bbf
JD
483 code_props_translate_code. */
484 code_props_rule_action_init (&current_rule->action_props, action, loc,
992e874a 485 current_rule, name);
2e047461
AD
486}
487
a70083a3 488\f
a70083a3
AD
489/*---------------------------------------------------------------.
490| Convert the rules into the representation using RRHS, RLHS and |
d9b739c3 491| RITEM. |
a70083a3 492`---------------------------------------------------------------*/
1ff442ca 493
4a120d45 494static void
118fb205 495packgram (void)
1ff442ca 496{
9222837b 497 unsigned int itemno = 0;
17ee7397
PE
498 rule_number ruleno = 0;
499 symbol_list *p = grammar;
1ff442ca 500
e9ad4aec
PE
501 ritem = xnmalloc (nritems + 1, sizeof *ritem);
502
503 /* This sentinel is used by build_relations in gram.c. */
504 *ritem++ = 0;
505
da2a7671 506 rules = xnmalloc (nrules, sizeof *rules);
1ff442ca 507
1ff442ca
NF
508 while (p)
509 {
e9071366 510 int rule_length = 0;
17ee7397 511 symbol *ruleprec = p->ruleprec;
3be03b13 512 record_merge_function_type (p->merger, p->content.sym->type_name,
8ee5b538 513 p->merger_declaration_location);
d7e1f00c 514 rules[ruleno].user_number = ruleno;
c3b407f4 515 rules[ruleno].number = ruleno;
3be03b13 516 rules[ruleno].lhs = p->content.sym;
99013900 517 rules[ruleno].rhs = ritem + itemno;
da2a7671
PE
518 rules[ruleno].prec = NULL;
519 rules[ruleno].dprec = p->dprec;
520 rules[ruleno].merger = p->merger;
521 rules[ruleno].precsym = NULL;
8efe435c 522 rules[ruleno].location = p->location;
b4afb6bb 523 rules[ruleno].useful = true;
f6857bbf
JD
524 rules[ruleno].action = p->action_props.code;
525 rules[ruleno].action_location = p->action_props.location;
1ff442ca 526
f91b1629
JD
527 /* If the midrule's $$ is set or its $n is used, remove the `$' from the
528 symbol name so that it's a user-defined symbol so that the default
529 %destructor and %printer apply. */
530 if (p->midrule_parent_rule
f6857bbf 531 && (p->action_props.is_value_used
b0f4c4ea 532 || symbol_list_n_get (p->midrule_parent_rule,
f6857bbf
JD
533 p->midrule_parent_rhs_index)
534 ->action_props.is_value_used))
f91b1629
JD
535 p->content.sym->tag += 1;
536
868d2d96
JD
537 /* Don't check the generated rule 0. It has no action, so some rhs
538 symbols may appear unused, but the parsing algorithm ensures that
539 %destructor's are invoked appropriately. */
540 if (p != grammar)
541 grammar_rule_check (p);
ffa4ba3a 542
3be03b13 543 for (p = p->next; p && p->content.sym; p = p->next)
1ff442ca 544 {
e9071366
AD
545 ++rule_length;
546
547 /* Don't allow rule_length == INT_MAX, since that might
548 cause confusion with strtol if INT_MAX == LONG_MAX. */
549 if (rule_length == INT_MAX)
550 fatal_at (rules[ruleno].location, _("rule is too long"));
551
17ee7397 552 /* item_number = symbol_number.
5fbb0954 553 But the former needs to contain more: negative rule numbers. */
3be03b13
JD
554 ritem[itemno++] =
555 symbol_number_as_item_number (p->content.sym->number);
1ff442ca 556 /* A rule gets by default the precedence and associativity
e9071366 557 of its last token. */
3be03b13
JD
558 if (p->content.sym->class == token_sym && default_prec)
559 rules[ruleno].prec = p->content.sym;
1ff442ca
NF
560 }
561
562 /* If this rule has a %prec,
a70083a3 563 the specified symbol's precedence replaces the default. */
1ff442ca
NF
564 if (ruleprec)
565 {
03b31c0c
AD
566 rules[ruleno].precsym = ruleprec;
567 rules[ruleno].prec = ruleprec;
1ff442ca 568 }
e9071366 569 /* An item ends by the rule number (negated). */
4b3d3a8e 570 ritem[itemno++] = rule_number_as_item_number (ruleno);
4f82b42a 571 aver (itemno < ITEM_NUMBER_MAX);
f3849179 572 ++ruleno;
4f82b42a 573 aver (ruleno < RULE_NUMBER_MAX);
1ff442ca 574
a70083a3
AD
575 if (p)
576 p = p->next;
1ff442ca
NF
577 }
578
4f82b42a 579 aver (itemno == nritems);
3067fbef 580
273a74fa 581 if (trace_flag & trace_sets)
3067fbef 582 ritem_print (stderr);
1ff442ca 583}
a70083a3 584\f
fdbcd8e2
AD
585/*------------------------------------------------------------------.
586| Read in the grammar specification and record it in the format |
587| described in gram.h. All actions are copied into ACTION_OBSTACK, |
588| in each case forming the body of a C function (YYACTION) which |
589| contains a switch statement to decide which action to execute. |
590`------------------------------------------------------------------*/
a70083a3
AD
591
592void
593reader (void)
594{
a70083a3 595 /* Initialize the symbol table. */
db8837cb 596 symbols_new ();
b6610515 597
88bce5a2
AD
598 /* Construct the accept symbol. */
599 accept = symbol_get ("$accept", empty_location);
600 accept->class = nterm_sym;
601 accept->number = nvars++;
30171f79 602
a70083a3 603 /* Construct the error token */
39f41916 604 errtoken = symbol_get ("error", empty_location);
d7020c20 605 errtoken->class = token_sym;
72a23c97 606 errtoken->number = ntokens++;
b6610515 607
a70083a3
AD
608 /* Construct a token that represents all undefined literal tokens.
609 It is always token number 2. */
88bce5a2 610 undeftoken = symbol_get ("$undefined", empty_location);
d7020c20 611 undeftoken->class = token_sym;
72a23c97 612 undeftoken->number = ntokens++;
a70083a3 613
2b81e969 614 gram_in = xfopen (grammar_file, "r");
e9955c83 615
473d0a75
AD
616 gram__flex_debug = trace_flag & trace_scan;
617 gram_debug = trace_flag & trace_parse;
e9071366 618 gram_scanner_initialize ();
78c3da9e 619 gram_parse ();
07c0db18 620 prepare_percent_define_front_end_variables ();
331dbc1b 621
07c0db18
JD
622 if (! complaint_issued)
623 check_and_convert_grammar ();
624
625 xfclose (gram_in);
626}
627
628static void
629prepare_percent_define_front_end_variables (void)
630{
631 /* Set %define front-end variable defaults. */
812775a0 632 muscle_percent_define_default ("lr.keep-unreachable-states", "false");
f805dfcb
JD
633 {
634 char *lr_type;
1c4aa81d
JD
635 /* IELR would be a better default, but LALR is historically the
636 default. */
3a414bbf 637 muscle_percent_define_default ("lr.type", "lalr");
f805dfcb 638 lr_type = muscle_percent_define_get ("lr.type");
3a414bbf 639 if (0 != strcmp (lr_type, "canonical-lr"))
a6e5a280 640 muscle_percent_define_default ("lr.default-reductions", "most");
f805dfcb 641 else
1d0f55cc 642 muscle_percent_define_default ("lr.default-reductions", "accepting");
f805dfcb
JD
643 free (lr_type);
644 }
03c07b03 645
07c0db18 646 /* Check %define front-end variables. */
03c07b03
JD
647 {
648 static char const * const values[] = {
3a414bbf 649 "lr.type", "lalr", "ielr", "canonical-lr", NULL,
a6e5a280 650 "lr.default-reductions", "most", "consistent", "accepting", NULL,
03c07b03
JD
651 NULL
652 };
653 muscle_percent_define_check_values (values);
654 }
02d12d0d
PE
655}
656
b275314e 657
02d12d0d
PE
658/*-------------------------------------------------------------.
659| Check the grammar that has just been read, and convert it to |
660| internal form. |
661`-------------------------------------------------------------*/
662
663static void
664check_and_convert_grammar (void)
665{
666 /* Grammar has been read. Do some checking. */
e9955c83
AD
667 if (nrules == 0)
668 fatal (_("no rules in the input grammar"));
669
88bce5a2
AD
670 /* If the user did not define her ENDTOKEN, do it now. */
671 if (!endtoken)
b7c49edf 672 {
88bce5a2
AD
673 endtoken = symbol_get ("$end", empty_location);
674 endtoken->class = token_sym;
675 endtoken->number = 0;
b7c49edf 676 /* Value specified by POSIX. */
88bce5a2 677 endtoken->user_token_number = 0;
b7c49edf
AD
678 }
679
24985964
JD
680 /* Report any undefined symbols and consider them nonterminals. */
681 symbols_check_defined ();
682
4d7370cb
JD
683 /* Find the start symbol if no %start. */
684 if (!start_flag)
685 {
686 symbol_list *node;
687 for (node = grammar;
3be03b13 688 node != NULL && symbol_is_dummy (node->content.sym);
4d7370cb
JD
689 node = node->next)
690 {
691 for (node = node->next;
3be03b13 692 node != NULL && node->content.sym != NULL;
4d7370cb
JD
693 node = node->next)
694 ;
695 }
4f82b42a 696 aver (node != NULL);
3be03b13
JD
697 grammar_start_symbol_set (node->content.sym,
698 node->content.sym->location);
4d7370cb
JD
699 }
700
02d12d0d 701 /* Insert the initial rule, whose line is that of the first rule
e9955c83
AD
702 (not that of the start symbol):
703
88bce5a2 704 accept: %start EOF. */
e9955c83 705 {
3be03b13 706 symbol_list *p = symbol_list_sym_new (accept, empty_location);
8efe435c 707 p->location = grammar->location;
3be03b13
JD
708 p->next = symbol_list_sym_new (startsymbol, empty_location);
709 p->next->next = symbol_list_sym_new (endtoken, empty_location);
710 p->next->next->next = symbol_list_sym_new (NULL, empty_location);
e9955c83
AD
711 p->next->next->next->next = grammar;
712 nrules += 1;
713 nritems += 3;
714 grammar = p;
715 }
716
4f82b42a 717 aver (nsyms <= SYMBOL_NUMBER_MAXIMUM && nsyms == ntokens + nvars);
b0c4483e 718
a70083a3
AD
719 /* Assign the symbols their symbol numbers. Write #defines for the
720 token symbols into FDEFINES if requested. */
2f1afb73 721 symbols_pack ();
93ede233 722
574add85
JD
723 /* Scan rule actions after invoking symbol_check_alias_consistency (in
724 symbols_pack above) so that token types are set correctly before the rule
725 action type checking.
726
727 Before invoking grammar_rule_check (in packgram below) on any rule, make
728 sure all actions have already been scanned in order to set `used' flags.
729 Otherwise, checking that a midrule's $$ should be set will not always work
730 properly because the check must forward-reference the midrule's parent
731 rule. For the same reason, all the `used' flags must be set before
732 checking whether to remove `$' from any midrule symbol name (also in
733 packgram). */
14462c2b
JD
734 {
735 symbol_list *sym;
736 for (sym = grammar; sym; sym = sym->next)
737 code_props_translate_code (&sym->action_props);
738 }
574add85 739
a70083a3 740 /* Convert the grammar into the format described in gram.h. */
6d0ef4ec 741 packgram ();
8419d367 742
17ee7397 743 /* The grammar as a symbol_list is no longer needed. */
17bd8a73 744 symbol_list_free (grammar);
a70083a3 745}