]> git.saurik.com Git - bison.git/blame - src/reader.c
xml: match DOT output and xml2dot.xsl processing
[bison.git] / src / reader.c
CommitLineData
35dcf428 1/* Input parser for Bison
9c4637fa 2
ea0a7676 3 Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000-2003, 2005-2007,
c932d613 4 2009-2012 Free Software Foundation, Inc.
1ff442ca 5
41aca2e0 6 This file is part of Bison, the GNU Compiler Compiler.
1ff442ca 7
f16b0819 8 This program is free software: you can redistribute it and/or modify
41aca2e0 9 it under the terms of the GNU General Public License as published by
f16b0819
PE
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
1ff442ca 12
f16b0819 13 This program is distributed in the hope that it will be useful,
41aca2e0
AD
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
1ff442ca 17
41aca2e0 18 You should have received a copy of the GNU General Public License
f16b0819 19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
1ff442ca 20
2cec9080 21#include <config.h>
1ff442ca 22#include "system.h"
17ee7397 23
7685e2f7 24#include <quote.h>
17ee7397
PE
25
26#include "complain.h"
27#include "conflicts.h"
1ff442ca 28#include "files.h"
17ee7397 29#include "getargs.h"
1ff442ca 30#include "gram.h"
23ec25b7 31#include "muscle-tab.h"
b2ca4022 32#include "reader.h"
17ee7397
PE
33#include "symlist.h"
34#include "symtab.h"
e9071366
AD
35#include "scan-gram.h"
36#include "scan-code.h"
1ff442ca 37
07c0db18 38static void prepare_percent_define_front_end_variables (void);
02d12d0d
PE
39static void check_and_convert_grammar (void);
40
17ee7397 41static symbol_list *grammar = NULL;
d0829076 42static bool start_flag = false;
676385e2 43merger_list *merge_functions;
1ff442ca 44
34f98f46 45/* Was %union seen? */
ddc8ede1
PE
46bool union_seen = false;
47
48/* Was a tag seen? */
49bool tag_seen = false;
39a06c25
PE
50
51/* Should rules have a default precedence? */
52bool default_prec = true;
0d533154 53\f
e9955c83
AD
54/*-----------------------.
55| Set the start symbol. |
56`-----------------------*/
1ff442ca 57
e9955c83 58void
a737b216 59grammar_start_symbol_set (symbol *sym, location loc)
1ff442ca
NF
60{
61 if (start_flag)
17ee7397 62 complain_at (loc, _("multiple %s declarations"), "%start");
943819bf
RS
63 else
64 {
d0829076 65 start_flag = true;
a737b216 66 startsymbol = sym;
17ee7397 67 startsymbol_location = loc;
943819bf 68 }
1ff442ca
NF
69}
70
a70083a3
AD
71\f
72
8ee5b538
JD
73/*------------------------------------------------------------------------.
74| Return the merger index for a merging function named NAME. Records the |
75| function, if new, in MERGER_LIST. |
76`------------------------------------------------------------------------*/
676385e2
PH
77
78static int
8ee5b538 79get_merge_function (uniqstr name)
676385e2
PH
80{
81 merger_list *syms;
82 merger_list head;
83 int n;
84
85 if (! glr_parser)
86 return 0;
87
676385e2 88 head.next = merge_functions;
affac613 89 for (syms = &head, n = 1; syms->next; syms = syms->next, n += 1)
17ee7397 90 if (UNIQSTR_EQ (name, syms->next->name))
676385e2 91 break;
a5d50994
AD
92 if (syms->next == NULL)
93 {
da2a7671 94 syms->next = xmalloc (sizeof syms->next[0]);
17ee7397 95 syms->next->name = uniqstr_new (name);
8ee5b538
JD
96 /* After all symbol type declarations have been parsed, packgram invokes
97 record_merge_function_type to set the type. */
98 syms->next->type = NULL;
a5d50994
AD
99 syms->next->next = NULL;
100 merge_functions = head.next;
101 }
676385e2
PH
102 return n;
103}
104
8ee5b538
JD
105/*-------------------------------------------------------------------------.
106| For the existing merging function with index MERGER, record the result |
107| type as TYPE as required by the lhs of the rule whose %merge declaration |
108| is at DECLARATION_LOC. |
109`-------------------------------------------------------------------------*/
110
111static void
112record_merge_function_type (int merger, uniqstr type, location declaration_loc)
113{
114 int merger_find;
115 merger_list *merge_function;
116
117 if (merger <= 0)
118 return;
119
120 if (type == NULL)
121 type = uniqstr_new ("");
122
123 merger_find = 1;
124 for (merge_function = merge_functions;
125 merge_function != NULL && merger_find != merger;
126 merge_function = merge_function->next)
127 merger_find += 1;
4f82b42a 128 aver (merge_function != NULL && merger_find == merger);
dd60572a 129 if (merge_function->type != NULL && !UNIQSTR_EQ (merge_function->type, type))
8ee5b538 130 {
24d96dd3
TR
131 unsigned indent = 0;
132 complain_at_indent (declaration_loc, &indent,
133 _("result type clash on merge function %s: "
134 "<%s> != <%s>"),
135 quote (merge_function->name), type,
136 merge_function->type);
137 indent += SUB_INDENT;
138 complain_at_indent (merge_function->type_declaration_location, &indent,
139 _("previous declaration"));
140 }
dd60572a
JD
141 merge_function->type = uniqstr_new (type);
142 merge_function->type_declaration_location = declaration_loc;
8ee5b538
JD
143}
144
676385e2
PH
145/*--------------------------------------.
146| Free all merge-function definitions. |
147`--------------------------------------*/
148
149void
150free_merger_functions (void)
151{
affac613
AD
152 merger_list *L0 = merge_functions;
153 while (L0)
676385e2
PH
154 {
155 merger_list *L1 = L0->next;
156 free (L0);
157 L0 = L1;
158 }
159}
160
a70083a3 161\f
107f7dfb 162/*-------------------------------------------------------------------.
17ee7397 163| Parse the input grammar into a one symbol_list structure. Each |
107f7dfb
AD
164| rule is represented by a sequence of symbols: the left hand side |
165| followed by the contents of the right hand side, followed by a |
166| null pointer instead of a symbol to terminate the rule. The next |
167| symbol is the lhs of the following rule. |
168| |
fdbcd8e2
AD
169| All actions are copied out, labelled by the rule number they apply |
170| to. |
107f7dfb 171`-------------------------------------------------------------------*/
1ff442ca 172
f6d0f937 173/* The (currently) last symbol of GRAMMAR. */
04098407 174static symbol_list *grammar_end = NULL;
f6d0f937 175
52328c6e 176/* Append SYM to the grammar. */
7685e2f7 177static symbol_list *
17ee7397 178grammar_symbol_append (symbol *sym, location loc)
f6d0f937 179{
3be03b13 180 symbol_list *p = symbol_list_sym_new (sym, loc);
f6d0f937
AD
181
182 if (grammar_end)
183 grammar_end->next = p;
184 else
185 grammar = p;
186
187 grammar_end = p;
8f3596a6 188
e3233bf6 189 /* A null SYM stands for an end of rule; it is not an actual
8f3596a6
AD
190 part of it. */
191 if (sym)
192 ++nritems;
7685e2f7
AR
193
194 return p;
f6d0f937
AD
195}
196
d5e8574b 197static void
992e874a 198assign_named_ref (symbol_list *p, named_ref *name)
d5e8574b
AR
199{
200 symbol *sym = p->content.sym;
201
992e874a 202 if (name->id == sym->tag)
d5e8574b 203 {
992e874a 204 warn_at (name->loc,
d5e8574b
AR
205 _("duplicated symbol name for %s ignored"),
206 quote (sym->tag));
992e874a 207 named_ref_free (name);
d5e8574b
AR
208 }
209 else
992e874a 210 p->named_ref = name;
d5e8574b
AR
211}
212
213
8efe435c
AD
214/* The rule currently being defined, and the previous rule.
215 CURRENT_RULE points to the first LHS of the current rule, while
216 PREVIOUS_RULE_END points to the *end* of the previous rule (NULL). */
e256e17f 217static symbol_list *current_rule = NULL;
04098407 218static symbol_list *previous_rule_end = NULL;
da4160c3
AD
219
220
8efe435c
AD
221/*----------------------------------------------.
222| Create a new rule for LHS in to the GRAMMAR. |
223`----------------------------------------------*/
da4160c3 224
e9955c83 225void
7685e2f7 226grammar_current_rule_begin (symbol *lhs, location loc,
992e874a 227 named_ref *lhs_name)
da4160c3 228{
7685e2f7
AR
229 symbol_list* p;
230
da4160c3
AD
231 /* Start a new rule and record its lhs. */
232 ++nrules;
8efe435c 233 previous_rule_end = grammar_end;
7685e2f7
AR
234
235 p = grammar_symbol_append (lhs, loc);
992e874a 236 if (lhs_name)
8f462efe 237 assign_named_ref (p, named_ref_copy (lhs_name));
7685e2f7 238
da4160c3
AD
239 current_rule = grammar_end;
240
241 /* Mark the rule's lhs as a nonterminal if not already so. */
da4160c3
AD
242 if (lhs->class == unknown_sym)
243 {
244 lhs->class = nterm_sym;
245 lhs->number = nvars;
246 ++nvars;
247 }
248 else if (lhs->class == token_sym)
17ee7397 249 complain_at (loc, _("rule given for %s, which is a token"), lhs->tag);
da4160c3
AD
250}
251
affac613 252
d40ba6c2 253/*----------------------------------------------------------------------.
17bd8a73
JD
254| A symbol should be used if either: |
255| 1. It has a destructor. |
8ffd7912
JD
256| 2. The symbol is a mid-rule symbol (i.e., the generated LHS |
257| replacing a mid-rule action) that was assigned to or used, as in |
258| "exp: { $$ = 1; } { $$ = $1; }". |
d40ba6c2 259`----------------------------------------------------------------------*/
84866159
AD
260
261static bool
8ffd7912 262symbol_should_be_used (symbol_list const *s, bool *midrule_warning)
84866159 263{
95021767 264 if (symbol_destructor_get (s->content.sym)->code)
17bd8a73 265 return true;
8ffd7912
JD
266 if ((s->midrule && s->midrule->action_props.is_value_used)
267 || (s->midrule_parent_rule
268 && symbol_list_n_get (s->midrule_parent_rule,
269 s->midrule_parent_rhs_index)
270 ->action_props.is_value_used))
271 {
272 *midrule_warning = true;
273 return true;
274 }
17bd8a73 275 return false;
84866159
AD
276}
277
8f3596a6
AD
278/*----------------------------------------------------------------.
279| Check that the rule R is properly defined. For instance, there |
280| should be no type clash on the default action. |
281`----------------------------------------------------------------*/
e9955c83
AD
282
283static void
8f3596a6 284grammar_rule_check (const symbol_list *r)
e9955c83 285{
affac613 286 /* Type check.
e9955c83 287
affac613
AD
288 If there is an action, then there is nothing we can do: the user
289 is allowed to shoot herself in the foot.
3f4c0f80 290
affac613
AD
291 Don't worry about the default action if $$ is untyped, since $$'s
292 value can't be used. */
f6857bbf 293 if (!r->action_props.code && r->content.sym->type_name)
e9955c83 294 {
3be03b13 295 symbol *first_rhs = r->next->content.sym;
affac613
AD
296 /* If $$ is being set in default way, report if any type mismatch. */
297 if (first_rhs)
298 {
3be03b13 299 char const *lhs_type = r->content.sym->type_name;
affac613
AD
300 const char *rhs_type =
301 first_rhs->type_name ? first_rhs->type_name : "";
302 if (!UNIQSTR_EQ (lhs_type, rhs_type))
8f3596a6 303 warn_at (r->location,
affac613
AD
304 _("type clash on default action: <%s> != <%s>"),
305 lhs_type, rhs_type);
306 }
307 /* Warn if there is no default for $$ but we need one. */
308 else
8f3596a6 309 warn_at (r->location,
affac613
AD
310 _("empty rule for typed nonterminal, and no action"));
311 }
e3233bf6 312
d40ba6c2 313 /* Check that symbol values that should be used are in fact used. */
8f3596a6 314 {
668c5d19 315 symbol_list const *l = r;
8f3596a6 316 int n = 0;
3be03b13 317 for (; l && l->content.sym; l = l->next, ++n)
8ffd7912
JD
318 {
319 bool midrule_warning = false;
320 if (!l->action_props.is_value_used
321 && symbol_should_be_used (l, &midrule_warning)
322 /* The default action, $$ = $1, `uses' both. */
323 && (r->action_props.code || (n != 0 && n != 1)))
324 {
325 void (*warn_at_ptr)(location, char const*, ...) =
326 midrule_warning ? midrule_value_at : warn_at;
327 if (n)
328 warn_at_ptr (r->location, _("unused value: $%d"), n);
329 else
330 warn_at_ptr (r->location, _("unset value: $$"));
331 }
332 }
8f3596a6 333 }
2c203528
JD
334
335 /* See comments in grammar_current_rule_prec_set for how POSIX
336 mandates this complaint. It's only for identifiers, so skip
337 it for char literals and strings, which are always tokens. */
338 if (r->ruleprec
339 && r->ruleprec->tag[0] != '\'' && r->ruleprec->tag[0] != '"'
340 && !r->ruleprec->declared && !r->ruleprec->prec)
e02df72c
JD
341 warn_at (r->location, _("token for %%prec is not defined: %s"),
342 r->ruleprec->tag);
e9955c83
AD
343}
344
345
8efe435c
AD
346/*-------------------------------------.
347| End the currently being grown rule. |
348`-------------------------------------*/
e9955c83
AD
349
350void
8f3596a6 351grammar_current_rule_end (location loc)
e9955c83
AD
352{
353 /* Put an empty link in the list to mark the end of this rule */
8efe435c 354 grammar_symbol_append (NULL, grammar_end->location);
17ee7397 355 current_rule->location = loc;
e9955c83
AD
356}
357
358
8efe435c
AD
359/*-------------------------------------------------------------------.
360| The previous action turns out the be a mid-rule action. Attach it |
361| to the current rule, i.e., create a dummy symbol, attach it this |
362| mid-rule action, and append this dummy nonterminal to the current |
363| rule. |
364`-------------------------------------------------------------------*/
1485e106 365
6b702268 366void
1485e106
AD
367grammar_midrule_action (void)
368{
369 /* Since the action was written out with this rule's number, we must
370 give the new rule this number by inserting the new rule before
371 it. */
372
8efe435c
AD
373 /* Make a DUMMY nonterminal, whose location is that of the midrule
374 action. Create the MIDRULE. */
f6857bbf 375 location dummy_location = current_rule->action_props.location;
17ee7397 376 symbol *dummy = dummy_symbol_get (dummy_location);
3be03b13 377 symbol_list *midrule = symbol_list_sym_new (dummy, dummy_location);
1485e106 378
d5e8574b 379 /* Remember named_ref of previous action. */
992e874a 380 named_ref *action_name = current_rule->action_props.named_ref;
7685e2f7 381
1485e106
AD
382 /* Make a new rule, whose body is empty, before the current one, so
383 that the action just read can belong to it. */
384 ++nrules;
385 ++nritems;
8efe435c
AD
386 /* Attach its location and actions to that of the DUMMY. */
387 midrule->location = dummy_location;
f6857bbf
JD
388 code_props_rule_action_init (&midrule->action_props,
389 current_rule->action_props.code,
390 current_rule->action_props.location,
7685e2f7 391 midrule, 0);
f6857bbf 392 code_props_none_init (&current_rule->action_props);
1485e106 393
8efe435c
AD
394 if (previous_rule_end)
395 previous_rule_end->next = midrule;
1485e106 396 else
8efe435c 397 grammar = midrule;
1485e106 398
8efe435c 399 /* End the dummy's rule. */
3be03b13 400 midrule->next = symbol_list_sym_new (NULL, dummy_location);
84866159 401 midrule->next->next = current_rule;
1485e106 402
84866159 403 previous_rule_end = midrule->next;
1485e106 404
8efe435c 405 /* Insert the dummy nonterminal replacing the midrule action into
84866159 406 the current rule. Bind it to its dedicated rule. */
992e874a
AR
407 grammar_current_rule_symbol_append (dummy, dummy_location,
408 action_name);
6ec2c0f2 409 grammar_end->midrule = midrule;
ffa4ba3a
JD
410 midrule->midrule_parent_rule = current_rule;
411 midrule->midrule_parent_rhs_index = symbol_list_length (current_rule->next);
1485e106
AD
412}
413
9af3fbce
AD
414/* Set the precedence symbol of the current rule to PRECSYM. */
415
e9955c83 416void
17ee7397 417grammar_current_rule_prec_set (symbol *precsym, location loc)
9af3fbce 418{
2c203528
JD
419 /* POSIX says that any identifier is a nonterminal if it does not
420 appear on the LHS of a grammar rule and is not defined by %token
421 or by one of the directives that assigns precedence to a token. We
422 ignore this here because the only kind of identifier that POSIX
423 allows to follow a %prec is a token and because assuming it's a
424 token now can produce more logical error messages. Nevertheless,
425 grammar_rule_check does obey what we believe is the real intent of
426 POSIX here: that an error be reported for any identifier that
427 appears after %prec but that is not defined separately as a
428 token. */
26b8a438 429 symbol_class_set (precsym, token_sym, loc, false);
9af3fbce 430 if (current_rule->ruleprec)
17ee7397 431 complain_at (loc, _("only one %s allowed per rule"), "%prec");
9af3fbce
AD
432 current_rule->ruleprec = precsym;
433}
434
676385e2
PH
435/* Attach dynamic precedence DPREC to the current rule. */
436
437void
17ee7397 438grammar_current_rule_dprec_set (int dprec, location loc)
676385e2
PH
439{
440 if (! glr_parser)
17ee7397 441 warn_at (loc, _("%s affects only GLR parsers"), "%dprec");
676385e2 442 if (dprec <= 0)
17ee7397 443 complain_at (loc, _("%s must be followed by positive number"), "%dprec");
39f41916 444 else if (current_rule->dprec != 0)
17ee7397 445 complain_at (loc, _("only one %s allowed per rule"), "%dprec");
676385e2
PH
446 current_rule->dprec = dprec;
447}
448
449/* Attach a merge function NAME with argument type TYPE to current
450 rule. */
451
452void
17ee7397 453grammar_current_rule_merge_set (uniqstr name, location loc)
676385e2
PH
454{
455 if (! glr_parser)
17ee7397 456 warn_at (loc, _("%s affects only GLR parsers"), "%merge");
39f41916 457 if (current_rule->merger != 0)
17ee7397 458 complain_at (loc, _("only one %s allowed per rule"), "%merge");
8ee5b538
JD
459 current_rule->merger = get_merge_function (name);
460 current_rule->merger_declaration_location = loc;
676385e2
PH
461}
462
17ee7397 463/* Attach SYM to the current rule. If needed, move the previous
2e047461
AD
464 action as a mid-rule action. */
465
e9955c83 466void
7685e2f7 467grammar_current_rule_symbol_append (symbol *sym, location loc,
992e874a 468 named_ref *name)
2e047461 469{
7685e2f7 470 symbol_list *p;
f6857bbf 471 if (current_rule->action_props.code)
2e047461 472 grammar_midrule_action ();
7685e2f7 473 p = grammar_symbol_append (sym, loc);
992e874a
AR
474 if (name)
475 assign_named_ref(p, name);
2e047461
AD
476}
477
6b702268 478/* Attach an ACTION to the current rule. */
2e047461 479
e9955c83 480void
7685e2f7 481grammar_current_rule_action_append (const char *action, location loc,
992e874a 482 named_ref *name)
2e047461 483{
f6857bbf 484 if (current_rule->action_props.code)
381ecb06 485 grammar_midrule_action ();
ffa4ba3a 486 /* After all symbol declarations have been parsed, packgram invokes
f6857bbf
JD
487 code_props_translate_code. */
488 code_props_rule_action_init (&current_rule->action_props, action, loc,
992e874a 489 current_rule, name);
2e047461
AD
490}
491
a70083a3 492\f
a70083a3
AD
493/*---------------------------------------------------------------.
494| Convert the rules into the representation using RRHS, RLHS and |
d9b739c3 495| RITEM. |
a70083a3 496`---------------------------------------------------------------*/
1ff442ca 497
4a120d45 498static void
118fb205 499packgram (void)
1ff442ca 500{
9222837b 501 unsigned int itemno = 0;
17ee7397
PE
502 rule_number ruleno = 0;
503 symbol_list *p = grammar;
1ff442ca 504
e9ad4aec
PE
505 ritem = xnmalloc (nritems + 1, sizeof *ritem);
506
507 /* This sentinel is used by build_relations in gram.c. */
508 *ritem++ = 0;
509
da2a7671 510 rules = xnmalloc (nrules, sizeof *rules);
1ff442ca 511
1ff442ca
NF
512 while (p)
513 {
e9071366 514 int rule_length = 0;
17ee7397 515 symbol *ruleprec = p->ruleprec;
3be03b13 516 record_merge_function_type (p->merger, p->content.sym->type_name,
8ee5b538 517 p->merger_declaration_location);
d7e1f00c 518 rules[ruleno].user_number = ruleno;
c3b407f4 519 rules[ruleno].number = ruleno;
3be03b13 520 rules[ruleno].lhs = p->content.sym;
99013900 521 rules[ruleno].rhs = ritem + itemno;
da2a7671
PE
522 rules[ruleno].prec = NULL;
523 rules[ruleno].dprec = p->dprec;
524 rules[ruleno].merger = p->merger;
525 rules[ruleno].precsym = NULL;
8efe435c 526 rules[ruleno].location = p->location;
b4afb6bb 527 rules[ruleno].useful = true;
f6857bbf
JD
528 rules[ruleno].action = p->action_props.code;
529 rules[ruleno].action_location = p->action_props.location;
1ff442ca 530
f91b1629
JD
531 /* If the midrule's $$ is set or its $n is used, remove the `$' from the
532 symbol name so that it's a user-defined symbol so that the default
533 %destructor and %printer apply. */
534 if (p->midrule_parent_rule
f6857bbf 535 && (p->action_props.is_value_used
b0f4c4ea 536 || symbol_list_n_get (p->midrule_parent_rule,
f6857bbf
JD
537 p->midrule_parent_rhs_index)
538 ->action_props.is_value_used))
f91b1629
JD
539 p->content.sym->tag += 1;
540
868d2d96
JD
541 /* Don't check the generated rule 0. It has no action, so some rhs
542 symbols may appear unused, but the parsing algorithm ensures that
543 %destructor's are invoked appropriately. */
544 if (p != grammar)
545 grammar_rule_check (p);
ffa4ba3a 546
3be03b13 547 for (p = p->next; p && p->content.sym; p = p->next)
1ff442ca 548 {
e9071366
AD
549 ++rule_length;
550
551 /* Don't allow rule_length == INT_MAX, since that might
552 cause confusion with strtol if INT_MAX == LONG_MAX. */
553 if (rule_length == INT_MAX)
554 fatal_at (rules[ruleno].location, _("rule is too long"));
555
17ee7397 556 /* item_number = symbol_number.
5fbb0954 557 But the former needs to contain more: negative rule numbers. */
3be03b13
JD
558 ritem[itemno++] =
559 symbol_number_as_item_number (p->content.sym->number);
1ff442ca 560 /* A rule gets by default the precedence and associativity
e9071366 561 of its last token. */
3be03b13
JD
562 if (p->content.sym->class == token_sym && default_prec)
563 rules[ruleno].prec = p->content.sym;
1ff442ca
NF
564 }
565
566 /* If this rule has a %prec,
a70083a3 567 the specified symbol's precedence replaces the default. */
1ff442ca
NF
568 if (ruleprec)
569 {
03b31c0c
AD
570 rules[ruleno].precsym = ruleprec;
571 rules[ruleno].prec = ruleprec;
1ff442ca 572 }
e9071366 573 /* An item ends by the rule number (negated). */
4b3d3a8e 574 ritem[itemno++] = rule_number_as_item_number (ruleno);
4f82b42a 575 aver (itemno < ITEM_NUMBER_MAX);
f3849179 576 ++ruleno;
4f82b42a 577 aver (ruleno < RULE_NUMBER_MAX);
1ff442ca 578
a70083a3
AD
579 if (p)
580 p = p->next;
1ff442ca
NF
581 }
582
4f82b42a 583 aver (itemno == nritems);
3067fbef 584
273a74fa 585 if (trace_flag & trace_sets)
3067fbef 586 ritem_print (stderr);
1ff442ca 587}
a70083a3 588\f
fdbcd8e2
AD
589/*------------------------------------------------------------------.
590| Read in the grammar specification and record it in the format |
591| described in gram.h. All actions are copied into ACTION_OBSTACK, |
592| in each case forming the body of a C function (YYACTION) which |
593| contains a switch statement to decide which action to execute. |
594`------------------------------------------------------------------*/
a70083a3
AD
595
596void
597reader (void)
598{
a70083a3 599 /* Initialize the symbol table. */
db8837cb 600 symbols_new ();
b6610515 601
88bce5a2
AD
602 /* Construct the accept symbol. */
603 accept = symbol_get ("$accept", empty_location);
604 accept->class = nterm_sym;
605 accept->number = nvars++;
30171f79 606
a70083a3 607 /* Construct the error token */
39f41916 608 errtoken = symbol_get ("error", empty_location);
d7020c20 609 errtoken->class = token_sym;
72a23c97 610 errtoken->number = ntokens++;
b6610515 611
a70083a3
AD
612 /* Construct a token that represents all undefined literal tokens.
613 It is always token number 2. */
88bce5a2 614 undeftoken = symbol_get ("$undefined", empty_location);
d7020c20 615 undeftoken->class = token_sym;
72a23c97 616 undeftoken->number = ntokens++;
a70083a3 617
2b81e969 618 gram_in = xfopen (grammar_file, "r");
e9955c83 619
473d0a75
AD
620 gram__flex_debug = trace_flag & trace_scan;
621 gram_debug = trace_flag & trace_parse;
e9071366 622 gram_scanner_initialize ();
78c3da9e 623 gram_parse ();
07c0db18 624 prepare_percent_define_front_end_variables ();
331dbc1b 625
07c0db18
JD
626 if (! complaint_issued)
627 check_and_convert_grammar ();
628
629 xfclose (gram_in);
630}
631
632static void
633prepare_percent_define_front_end_variables (void)
634{
635 /* Set %define front-end variable defaults. */
812775a0 636 muscle_percent_define_default ("lr.keep-unreachable-states", "false");
f805dfcb
JD
637 {
638 char *lr_type;
1c4aa81d
JD
639 /* IELR would be a better default, but LALR is historically the
640 default. */
3a414bbf 641 muscle_percent_define_default ("lr.type", "lalr");
f805dfcb 642 lr_type = muscle_percent_define_get ("lr.type");
3a414bbf 643 if (0 != strcmp (lr_type, "canonical-lr"))
a6e5a280 644 muscle_percent_define_default ("lr.default-reductions", "most");
f805dfcb 645 else
1d0f55cc 646 muscle_percent_define_default ("lr.default-reductions", "accepting");
f805dfcb
JD
647 free (lr_type);
648 }
03c07b03 649
07c0db18 650 /* Check %define front-end variables. */
03c07b03
JD
651 {
652 static char const * const values[] = {
3a414bbf 653 "lr.type", "lalr", "ielr", "canonical-lr", NULL,
a6e5a280 654 "lr.default-reductions", "most", "consistent", "accepting", NULL,
03c07b03
JD
655 NULL
656 };
657 muscle_percent_define_check_values (values);
658 }
02d12d0d
PE
659}
660
b275314e 661
02d12d0d
PE
662/*-------------------------------------------------------------.
663| Check the grammar that has just been read, and convert it to |
664| internal form. |
665`-------------------------------------------------------------*/
666
667static void
668check_and_convert_grammar (void)
669{
670 /* Grammar has been read. Do some checking. */
e9955c83
AD
671 if (nrules == 0)
672 fatal (_("no rules in the input grammar"));
673
88bce5a2
AD
674 /* If the user did not define her ENDTOKEN, do it now. */
675 if (!endtoken)
b7c49edf 676 {
88bce5a2
AD
677 endtoken = symbol_get ("$end", empty_location);
678 endtoken->class = token_sym;
679 endtoken->number = 0;
b7c49edf 680 /* Value specified by POSIX. */
88bce5a2 681 endtoken->user_token_number = 0;
b7c49edf
AD
682 }
683
24985964
JD
684 /* Report any undefined symbols and consider them nonterminals. */
685 symbols_check_defined ();
686
4d7370cb
JD
687 /* Find the start symbol if no %start. */
688 if (!start_flag)
689 {
690 symbol_list *node;
691 for (node = grammar;
3be03b13 692 node != NULL && symbol_is_dummy (node->content.sym);
4d7370cb
JD
693 node = node->next)
694 {
695 for (node = node->next;
3be03b13 696 node != NULL && node->content.sym != NULL;
4d7370cb
JD
697 node = node->next)
698 ;
699 }
4f82b42a 700 aver (node != NULL);
3be03b13
JD
701 grammar_start_symbol_set (node->content.sym,
702 node->content.sym->location);
4d7370cb
JD
703 }
704
02d12d0d 705 /* Insert the initial rule, whose line is that of the first rule
e9955c83
AD
706 (not that of the start symbol):
707
88bce5a2 708 accept: %start EOF. */
e9955c83 709 {
3be03b13 710 symbol_list *p = symbol_list_sym_new (accept, empty_location);
8efe435c 711 p->location = grammar->location;
3be03b13
JD
712 p->next = symbol_list_sym_new (startsymbol, empty_location);
713 p->next->next = symbol_list_sym_new (endtoken, empty_location);
714 p->next->next->next = symbol_list_sym_new (NULL, empty_location);
e9955c83
AD
715 p->next->next->next->next = grammar;
716 nrules += 1;
717 nritems += 3;
718 grammar = p;
719 }
720
4f82b42a 721 aver (nsyms <= SYMBOL_NUMBER_MAXIMUM && nsyms == ntokens + nvars);
b0c4483e 722
a70083a3
AD
723 /* Assign the symbols their symbol numbers. Write #defines for the
724 token symbols into FDEFINES if requested. */
2f1afb73 725 symbols_pack ();
93ede233 726
574add85
JD
727 /* Scan rule actions after invoking symbol_check_alias_consistency (in
728 symbols_pack above) so that token types are set correctly before the rule
729 action type checking.
730
731 Before invoking grammar_rule_check (in packgram below) on any rule, make
732 sure all actions have already been scanned in order to set `used' flags.
733 Otherwise, checking that a midrule's $$ should be set will not always work
734 properly because the check must forward-reference the midrule's parent
735 rule. For the same reason, all the `used' flags must be set before
736 checking whether to remove `$' from any midrule symbol name (also in
737 packgram). */
14462c2b
JD
738 {
739 symbol_list *sym;
740 for (sym = grammar; sym; sym = sym->next)
741 code_props_translate_code (&sym->action_props);
742 }
574add85 743
a70083a3 744 /* Convert the grammar into the format described in gram.h. */
6d0ef4ec 745 packgram ();
8419d367 746
17ee7397 747 /* The grammar as a symbol_list is no longer needed. */
17bd8a73 748 symbol_list_free (grammar);
a70083a3 749}