X-Git-Url: https://git.saurik.com/bison.git/blobdiff_plain/5bab9d08626390e7db164e57e1d402bf54da61f6..70b7c357476ed3525ddb5d2739e70690cfebb207:/src/reader.c?ds=sidebyside diff --git a/src/reader.c b/src/reader.c index 78faa6b3..15392b50 100644 --- a/src/reader.c +++ b/src/reader.c @@ -1,7 +1,7 @@ /* Input parser for Bison - Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000, 2001, 2002, 2003, - 2005, 2006, 2007, 2009 Free Software Foundation, Inc. + Copyright (C) 1984, 1986, 1989, 1992, 1998, 2000-2003, 2005-2007, + 2009-2013 Free Software Foundation, Inc. This file is part of Bison, the GNU Compiler Compiler. @@ -21,7 +21,7 @@ #include #include "system.h" -#include +#include #include "complain.h" #include "conflicts.h" @@ -35,6 +35,7 @@ #include "scan-gram.h" #include "scan-code.h" +static void prepare_percent_define_front_end_variables (void); static void check_and_convert_grammar (void); static symbol_list *grammar = NULL; @@ -58,7 +59,7 @@ void grammar_start_symbol_set (symbol *sym, location loc) { if (start_flag) - complain_at (loc, _("multiple %s declarations"), "%start"); + complain (&loc, complaint, _("multiple %s declarations"), "%start"); else { start_flag = true; @@ -93,7 +94,7 @@ get_merge_function (uniqstr name) syms->next = xmalloc (sizeof syms->next[0]); syms->next->name = uniqstr_new (name); /* After all symbol type declarations have been parsed, packgram invokes - record_merge_function_type to set the type. */ + record_merge_function_type to set the type. */ syms->next->type = NULL; syms->next->next = NULL; merge_functions = head.next; @@ -127,18 +128,23 @@ record_merge_function_type (int merger, uniqstr type, location declaration_loc) aver (merge_function != NULL && merger_find == merger); if (merge_function->type != NULL && !UNIQSTR_EQ (merge_function->type, type)) { - complain_at (declaration_loc, - _("result type clash on merge function `%s': <%s> != <%s>"), - merge_function->name, type, merge_function->type); - complain_at (merge_function->type_declaration_location, - _("previous declaration")); + unsigned indent = 0; + complain_indent (&declaration_loc, complaint, &indent, + _("result type clash on merge function %s: " + "<%s> != <%s>"), + quote (merge_function->name), type, + merge_function->type); + indent += SUB_INDENT; + complain_indent (&merge_function->type_declaration_location, complaint, + &indent, + _("previous declaration")); } merge_function->type = uniqstr_new (type); merge_function->type_declaration_location = declaration_loc; } /*--------------------------------------. -| Free all merge-function definitions. | +| Free all merge-function definitions. | `--------------------------------------*/ void @@ -169,7 +175,7 @@ free_merger_functions (void) static symbol_list *grammar_end = NULL; /* Append SYM to the grammar. */ -static void +static symbol_list * grammar_symbol_append (symbol *sym, location loc) { symbol_list *p = symbol_list_sym_new (sym, loc); @@ -185,8 +191,27 @@ grammar_symbol_append (symbol *sym, location loc) part of it. */ if (sym) ++nritems; + + return p; } +static void +assign_named_ref (symbol_list *p, named_ref *name) +{ + symbol *sym = p->content.sym; + + if (name->id == sym->tag) + { + complain (&name->loc, Wother, + _("duplicated symbol name for %s ignored"), + quote (sym->tag)); + named_ref_free (name); + } + else + p->named_ref = name; +} + + /* The rule currently being defined, and the previous rule. CURRENT_RULE points to the first LHS of the current rule, while PREVIOUS_RULE_END points to the *end* of the previous rule (NULL). */ @@ -199,12 +224,19 @@ static symbol_list *previous_rule_end = NULL; `----------------------------------------------*/ void -grammar_current_rule_begin (symbol *lhs, location loc) +grammar_current_rule_begin (symbol *lhs, location loc, + named_ref *lhs_name) { + symbol_list* p; + /* Start a new rule and record its lhs. */ ++nrules; previous_rule_end = grammar_end; - grammar_symbol_append (lhs, loc); + + p = grammar_symbol_append (lhs, loc); + if (lhs_name) + assign_named_ref (p, named_ref_copy (lhs_name)); + current_rule = grammar_end; /* Mark the rule's lhs as a nonterminal if not already so. */ @@ -215,29 +247,33 @@ grammar_current_rule_begin (symbol *lhs, location loc) ++nvars; } else if (lhs->class == token_sym) - complain_at (loc, _("rule given for %s, which is a token"), lhs->tag); + complain (&loc, complaint, _("rule given for %s, which is a token"), + lhs->tag); } /*----------------------------------------------------------------------. | A symbol should be used if either: | | 1. It has a destructor. | -| 2. --warnings=midrule-values and the symbol is a mid-rule symbol | -| (i.e., the generated LHS replacing a mid-rule action) that was | -| assigned to or used, as in "exp: { $$ = 1; } { $$ = $1; }". | +| 2. The symbol is a mid-rule symbol (i.e., the generated LHS | +| replacing a mid-rule action) that was assigned to or used, as in | +| "exp: { $$ = 1; } { $$ = $1; }". | `----------------------------------------------------------------------*/ static bool -symbol_should_be_used (symbol_list const *s) +symbol_should_be_used (symbol_list const *s, bool *midrule_warning) { - if (symbol_destructor_get (s->content.sym)->code) + if (symbol_code_props_get (s->content.sym, destructor)->code) return true; - if (warnings_flag & warnings_midrule_values) - return ((s->midrule && s->midrule->action_props.is_value_used) - || (s->midrule_parent_rule - && symbol_list_n_get (s->midrule_parent_rule, - s->midrule_parent_rhs_index) - ->action_props.is_value_used)); + if ((s->midrule && s->midrule->action_props.is_value_used) + || (s->midrule_parent_rule + && symbol_list_n_get (s->midrule_parent_rule, + s->midrule_parent_rhs_index) + ->action_props.is_value_used)) + { + *midrule_warning = true; + return true; + } return false; } @@ -261,19 +297,19 @@ grammar_rule_check (const symbol_list *r) symbol *first_rhs = r->next->content.sym; /* If $$ is being set in default way, report if any type mismatch. */ if (first_rhs) - { - char const *lhs_type = r->content.sym->type_name; - const char *rhs_type = - first_rhs->type_name ? first_rhs->type_name : ""; - if (!UNIQSTR_EQ (lhs_type, rhs_type)) - warn_at (r->location, - _("type clash on default action: <%s> != <%s>"), - lhs_type, rhs_type); - } + { + char const *lhs_type = r->content.sym->type_name; + const char *rhs_type = + first_rhs->type_name ? first_rhs->type_name : ""; + if (!UNIQSTR_EQ (lhs_type, rhs_type)) + complain (&r->location, Wother, + _("type clash on default action: <%s> != <%s>"), + lhs_type, rhs_type); + } /* Warn if there is no default for $$ but we need one. */ else - warn_at (r->location, - _("empty rule for typed nonterminal, and no action")); + complain (&r->location, Wother, + _("empty rule for typed nonterminal, and no action")); } /* Check that symbol values that should be used are in fact used. */ @@ -281,17 +317,30 @@ grammar_rule_check (const symbol_list *r) symbol_list const *l = r; int n = 0; for (; l && l->content.sym; l = l->next, ++n) - if (! (l->action_props.is_value_used - || !symbol_should_be_used (l) - /* The default action, $$ = $1, `uses' both. */ - || (!r->action_props.code && (n == 0 || n == 1)))) - { - if (n) - warn_at (r->location, _("unused value: $%d"), n); - else - warn_at (r->location, _("unset value: $$")); - } + { + bool midrule_warning = false; + if (!l->action_props.is_value_used + && symbol_should_be_used (l, &midrule_warning) + /* The default action, $$ = $1, `uses' both. */ + && (r->action_props.code || (n != 0 && n != 1))) + { + warnings warn_flag = midrule_warning ? Wmidrule_values : Wother; + if (n) + complain (&l->location, warn_flag, _("unused value: $%d"), n); + else + complain (&l->location, warn_flag, _("unset value: $$")); + } + } } + + /* See comments in grammar_current_rule_prec_set for how POSIX + mandates this complaint. It's only for identifiers, so skip + it for char literals and strings, which are always tokens. */ + if (r->ruleprec + && r->ruleprec->tag[0] != '\'' && r->ruleprec->tag[0] != '"' + && r->ruleprec->status != declared && !r->ruleprec->prec) + complain (&r->location, Wother, + _("token for %%prec is not defined: %s"), r->ruleprec->tag); } @@ -328,6 +377,9 @@ grammar_midrule_action (void) symbol *dummy = dummy_symbol_get (dummy_location); symbol_list *midrule = symbol_list_sym_new (dummy, dummy_location); + /* Remember named_ref of previous action. */ + named_ref *action_name = current_rule->action_props.named_ref; + /* Make a new rule, whose body is empty, before the current one, so that the action just read can belong to it. */ ++nrules; @@ -337,7 +389,8 @@ grammar_midrule_action (void) code_props_rule_action_init (&midrule->action_props, current_rule->action_props.code, current_rule->action_props.location, - midrule); + midrule, 0, + current_rule->action_props.is_predicate); code_props_none_init (¤t_rule->action_props); if (previous_rule_end) @@ -353,7 +406,8 @@ grammar_midrule_action (void) /* Insert the dummy nonterminal replacing the midrule action into the current rule. Bind it to its dedicated rule. */ - grammar_current_rule_symbol_append (dummy, dummy_location); + grammar_current_rule_symbol_append (dummy, dummy_location, + action_name); grammar_end->midrule = midrule; midrule->midrule_parent_rule = current_rule; midrule->midrule_parent_rhs_index = symbol_list_length (current_rule->next); @@ -364,9 +418,19 @@ grammar_midrule_action (void) void grammar_current_rule_prec_set (symbol *precsym, location loc) { + /* POSIX says that any identifier is a nonterminal if it does not + appear on the LHS of a grammar rule and is not defined by %token + or by one of the directives that assigns precedence to a token. We + ignore this here because the only kind of identifier that POSIX + allows to follow a %prec is a token and because assuming it's a + token now can produce more logical error messages. Nevertheless, + grammar_rule_check does obey what we believe is the real intent of + POSIX here: that an error be reported for any identifier that + appears after %prec but that is not defined separately as a + token. */ symbol_class_set (precsym, token_sym, loc, false); if (current_rule->ruleprec) - complain_at (loc, _("only one %s allowed per rule"), "%prec"); + complain (&loc, complaint, _("only one %s allowed per rule"), "%prec"); current_rule->ruleprec = precsym; } @@ -376,11 +440,13 @@ void grammar_current_rule_dprec_set (int dprec, location loc) { if (! glr_parser) - warn_at (loc, _("%s affects only GLR parsers"), "%dprec"); + complain (&loc, Wother, _("%s affects only GLR parsers"), + "%dprec"); if (dprec <= 0) - complain_at (loc, _("%s must be followed by positive number"), "%dprec"); + complain (&loc, complaint, _("%s must be followed by positive number"), + "%dprec"); else if (current_rule->dprec != 0) - complain_at (loc, _("only one %s allowed per rule"), "%dprec"); + complain (&loc, complaint, _("only one %s allowed per rule"), "%dprec"); current_rule->dprec = dprec; } @@ -391,9 +457,10 @@ void grammar_current_rule_merge_set (uniqstr name, location loc) { if (! glr_parser) - warn_at (loc, _("%s affects only GLR parsers"), "%merge"); + complain (&loc, Wother, _("%s affects only GLR parsers"), + "%merge"); if (current_rule->merger != 0) - complain_at (loc, _("only one %s allowed per rule"), "%merge"); + complain (&loc, complaint, _("only one %s allowed per rule"), "%merge"); current_rule->merger = get_merge_function (name); current_rule->merger_declaration_location = loc; } @@ -402,24 +469,31 @@ grammar_current_rule_merge_set (uniqstr name, location loc) action as a mid-rule action. */ void -grammar_current_rule_symbol_append (symbol *sym, location loc) +grammar_current_rule_symbol_append (symbol *sym, location loc, + named_ref *name) { + symbol_list *p; if (current_rule->action_props.code) grammar_midrule_action (); - grammar_symbol_append (sym, loc); + p = grammar_symbol_append (sym, loc); + if (name) + assign_named_ref(p, name); + if (sym->status == undeclared || sym->status == used) + sym->status = needed; } /* Attach an ACTION to the current rule. */ void -grammar_current_rule_action_append (const char *action, location loc) +grammar_current_rule_action_append (const char *action, location loc, + named_ref *name, bool is_predicate) { if (current_rule->action_props.code) grammar_midrule_action (); /* After all symbol declarations have been parsed, packgram invokes code_props_translate_code. */ code_props_rule_action_init (¤t_rule->action_props, action, loc, - current_rule); + current_rule, name, is_predicate); } @@ -447,7 +521,7 @@ packgram (void) int rule_length = 0; symbol *ruleprec = p->ruleprec; record_merge_function_type (p->merger, p->content.sym->type_name, - p->merger_declaration_location); + p->merger_declaration_location); rules[ruleno].user_number = ruleno; rules[ruleno].number = ruleno; rules[ruleno].lhs = p->content.sym; @@ -460,49 +534,50 @@ packgram (void) rules[ruleno].useful = true; rules[ruleno].action = p->action_props.code; rules[ruleno].action_location = p->action_props.location; + rules[ruleno].is_predicate = p->action_props.is_predicate; /* If the midrule's $$ is set or its $n is used, remove the `$' from the - symbol name so that it's a user-defined symbol so that the default - %destructor and %printer apply. */ + symbol name so that it's a user-defined symbol so that the default + %destructor and %printer apply. */ if (p->midrule_parent_rule && (p->action_props.is_value_used - || symbol_list_n_get (p->midrule_parent_rule, - p->midrule_parent_rhs_index) + || symbol_list_n_get (p->midrule_parent_rule, + p->midrule_parent_rhs_index) ->action_props.is_value_used)) - p->content.sym->tag += 1; + p->content.sym->tag += 1; /* Don't check the generated rule 0. It has no action, so some rhs - symbols may appear unused, but the parsing algorithm ensures that - %destructor's are invoked appropriately. */ + symbols may appear unused, but the parsing algorithm ensures that + %destructor's are invoked appropriately. */ if (p != grammar) - grammar_rule_check (p); + grammar_rule_check (p); for (p = p->next; p && p->content.sym; p = p->next) - { - ++rule_length; + { + ++rule_length; - /* Don't allow rule_length == INT_MAX, since that might - cause confusion with strtol if INT_MAX == LONG_MAX. */ - if (rule_length == INT_MAX) - fatal_at (rules[ruleno].location, _("rule is too long")); + /* Don't allow rule_length == INT_MAX, since that might + cause confusion with strtol if INT_MAX == LONG_MAX. */ + if (rule_length == INT_MAX) + complain (&rules[ruleno].location, fatal, _("rule is too long")); - /* item_number = symbol_number. - But the former needs to contain more: negative rule numbers. */ - ritem[itemno++] = + /* item_number = symbol_number. + But the former needs to contain more: negative rule numbers. */ + ritem[itemno++] = symbol_number_as_item_number (p->content.sym->number); - /* A rule gets by default the precedence and associativity - of its last token. */ - if (p->content.sym->class == token_sym && default_prec) - rules[ruleno].prec = p->content.sym; - } + /* A rule gets by default the precedence and associativity + of its last token. */ + if (p->content.sym->class == token_sym && default_prec) + rules[ruleno].prec = p->content.sym; + } /* If this rule has a %prec, the specified symbol's precedence replaces the default. */ if (ruleprec) - { - rules[ruleno].precsym = ruleprec; - rules[ruleno].prec = ruleprec; - } + { + rules[ruleno].precsym = ruleprec; + rules[ruleno].prec = ruleprec; + } /* An item ends by the rule number (negated). */ ritem[itemno++] = rule_number_as_item_number (ruleno); aver (itemno < ITEM_NUMBER_MAX); @@ -510,7 +585,7 @@ packgram (void) aver (ruleno < RULE_NUMBER_MAX); if (p) - p = p->next; + p = p->next; } aver (itemno == nritems); @@ -554,39 +629,47 @@ reader (void) gram_debug = trace_flag & trace_parse; gram_scanner_initialize (); gram_parse (); + prepare_percent_define_front_end_variables (); + + if (complaint_status < status_complaint) + check_and_convert_grammar (); - /* IELR would be a better default, but LALR is historically the default. */ + xfclose (gram_in); +} + +static void +prepare_percent_define_front_end_variables (void) +{ + /* Set %define front-end variable defaults. */ + muscle_percent_define_default ("lr.keep-unreachable-state", "false"); { char *lr_type; - muscle_percent_define_default ("lr.type", "LALR"); + /* IELR would be a better default, but LALR is historically the + default. */ + muscle_percent_define_default ("lr.type", "lalr"); lr_type = muscle_percent_define_get ("lr.type"); - if (0 != strcmp (lr_type, "canonical LR")) - muscle_percent_define_default ("lr.default-reductions", "all"); + if (STRNEQ (lr_type, "canonical-lr")) + muscle_percent_define_default ("lr.default-reduction", "most"); else - muscle_percent_define_default ("lr.default-reductions", "accepting"); + muscle_percent_define_default ("lr.default-reduction", "accepting"); free (lr_type); } - /* Check front-end %define variable values. */ + /* Check %define front-end variables. */ { static char const * const values[] = { - "lr.type", "LALR", "IELR", "canonical LR", NULL, - "lr.default-reductions", "all", "consistent", "accepting", NULL, + "lr.type", "lalr", "ielr", "canonical-lr", NULL, + "lr.default-reduction", "most", "consistent", "accepting", NULL, NULL }; muscle_percent_define_check_values (values); } - - if (! complaint_issued) - check_and_convert_grammar (); - - xfclose (gram_in); } /*-------------------------------------------------------------. | Check the grammar that has just been read, and convert it to | -| internal form. | +| internal form. | `-------------------------------------------------------------*/ static void @@ -594,10 +677,7 @@ check_and_convert_grammar (void) { /* Grammar has been read. Do some checking. */ if (nrules == 0) - fatal (_("no rules in the input grammar")); - - /* Report any undefined symbols and consider them nonterminals. */ - symbols_check_defined (); + complain (NULL, fatal, _("no rules in the input grammar")); /* If the user did not define her ENDTOKEN, do it now. */ if (!endtoken) @@ -609,6 +689,9 @@ check_and_convert_grammar (void) endtoken->user_token_number = 0; } + /* Report any undefined symbols and consider them nonterminals. */ + symbols_check_defined (); + /* Find the start symbol if no %start. */ if (!start_flag) {