From 965537bc543191a048b6afb2404c878f06ab877f Mon Sep 17 00:00:00 2001 From: "Joel E. Denny" Date: Fri, 18 Aug 2006 10:25:50 +0000 Subject: [PATCH] Don't allow an undeclared string literal, but allow a string literal to be used before its declaration. * src/reader.c (check_and_convert_grammar): Don't invoke packgram if symbols_pack complained. * src/symtab.c (symbol_new): Don't count a string literal as a new symbol. (symbol_class_set): Don't count a string literal as a new token, and don't assign it a symbol number since symbol_make_alias does that. (symbol_make_alias): It's not necessary to decrement the symbol and token counts anymore. Don't assume that an alias declaration occurs before any uses of the identifier or string, and thus don't assert that one of them has the highest symbol number so far. (symbol_check_alias_consistency): Complain if there's a string literal that wasn't declared as an alias. (symbols_pack): Bail if symbol_check_alias_consistency failed since symbol_pack asserts that every token has been assigned a symbol number although undeclared string literals have not. * tests/regression.at (String alias declared after use, Undeclared string literal): New test case. (Characters Escapes, Web2c Actions): Declare string literals as aliases. * tests/sets.at (Firsts): Likewise. --- ChangeLog | 25 +++++++++++++++++++++ src/reader.c | 3 ++- src/symtab.c | 18 ++++++++------- tests/regression.at | 53 ++++++++++++++++++++++++++++++++++++++++++++- tests/sets.at | 1 + 5 files changed, 90 insertions(+), 10 deletions(-) diff --git a/ChangeLog b/ChangeLog index 8fa43858..3a683551 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,28 @@ +2006-08-18 Joel E. Denny + + Don't allow an undeclared string literal, but allow a string literal to + be used before its declaration. + * src/reader.c (check_and_convert_grammar): Don't invoke packgram if + symbols_pack complained. + * src/symtab.c (symbol_new): Don't count a string literal as a new + symbol. + (symbol_class_set): Don't count a string literal as a new token, and + don't assign it a symbol number since symbol_make_alias does that. + (symbol_make_alias): It's not necessary to decrement the symbol and + token counts anymore. Don't assume that an alias declaration occurs + before any uses of the identifier or string, and thus don't assert that + one of them has the highest symbol number so far. + (symbol_check_alias_consistency): Complain if there's a string literal + that wasn't declared as an alias. + (symbols_pack): Bail if symbol_check_alias_consistency failed since + symbol_pack asserts that every token has been assigned a symbol number + although undeclared string literals have not. + * tests/regression.at (String alias declared after use, Undeclared + string literal): New test case. + (Characters Escapes, Web2c Actions): Declare string literals as + aliases. + * tests/sets.at (Firsts): Likewise. + 2006-08-14 Joel E. Denny In the grammar scanner, STRING_FINISH unclosed constructs and return diff --git a/src/reader.c b/src/reader.c index 5084618f..e31fe4c5 100644 --- a/src/reader.c +++ b/src/reader.c @@ -630,7 +630,8 @@ check_and_convert_grammar (void) symbols_pack (); /* Convert the grammar into the format described in gram.h. */ - packgram (); + if (!complaint_issued) + packgram (); /* The grammar as a symbol_list is no longer needed. */ LIST_FREE (symbol_list, grammar); diff --git a/src/symtab.c b/src/symtab.c index 8b3e30d3..28f49fb4 100644 --- a/src/symtab.c +++ b/src/symtab.c @@ -79,7 +79,8 @@ symbol_new (uniqstr tag, location loc) if (nsyms == SYMBOL_NUMBER_MAXIMUM) fatal (_("too many symbols in input grammar (limit is %d)"), SYMBOL_NUMBER_MAXIMUM); - nsyms++; + if (tag[0] != '"') + nsyms++; return res; } @@ -266,7 +267,8 @@ symbol_class_set (symbol *sym, symbol_class class, location loc, bool declaring) if (class == nterm_sym && sym->class != nterm_sym) sym->number = nvars++; - else if (class == token_sym && sym->number == NUMBER_UNDEFINED) + else if (class == token_sym && sym->number == NUMBER_UNDEFINED + && sym->tag[0] != '"') sym->number = ntokens++; sym->class = class; @@ -361,12 +363,7 @@ symbol_make_alias (symbol *sym, symbol *symval, location loc) sym->user_token_number = USER_NUMBER_ALIAS; symval->alias = sym; sym->alias = symval; - /* sym and symval combined are only one symbol. */ - nsyms--; - ntokens--; - assert (ntokens == sym->number || ntokens == symval->number); - sym->number = symval->number = - (symval->number < sym->number) ? symval->number : sym->number; + symval->number = sym->number; symbol_type_set (symval, sym->type_name, loc); } } @@ -383,6 +380,9 @@ symbol_check_alias_consistency (symbol *this) symbol *alias = this; symbol *orig = this->alias; + if (this->tag[0] == '"' && !this->alias) + complain_at (this->location, _("%s undeclared"), this->tag); + /* Check only those that _are_ the aliases. */ if (!(this->alias && this->user_token_number == USER_NUMBER_ALIAS)) return; @@ -723,6 +723,8 @@ symbols_pack (void) symbols = xcalloc (nsyms, sizeof *symbols); symbols_do (symbol_check_alias_consistency_processor, NULL); + if (complaint_issued) + return; symbols_do (symbol_pack_processor, NULL); symbols_token_translations_init (); diff --git a/tests/regression.at b/tests/regression.at index 923a8933..1c5ccbbf 100644 --- a/tests/regression.at +++ b/tests/regression.at @@ -489,7 +489,9 @@ AT_DATA_GRAMMAR([input.y], void yyerror (const char *s); int yylex (void); %} -[%% +[%token QUOTES "\"" +%token TICK "'" +%% exp: '\'' "\'" | '\"' "\"" @@ -700,6 +702,10 @@ statement: struct_stat; struct_stat: /* empty. */ | if else; if: "if" "const" "then" statement; else: "else" statement; +%token IF "if"; +%token CONST "const"; +%token THEN "then"; +%token ELSE "else"; %% ]]) @@ -1108,3 +1114,48 @@ Stack now 0 ]]) AT_CLEANUP + + + +## --------------------------------- ## +## String alias declared after use. ## +## --------------------------------- ## + +AT_SETUP([String alias declared after use]) + +# Bison once incorrectly asserted that the symbol number for either a token or +# its alias was the highest symbol number so far at the point of the alias +# declaration. That was true unless the declaration appeared after their first +# uses. + +AT_DATA([input.y], +[[%% +start: 'a' "A" 'b'; +%token 'a' "A"; +]]) + +AT_CHECK([bison -t -o input.c input.y]) + +AT_CLEANUP + + + +## --------------------------- ## +## Undeclared string literal. ## +## --------------------------- ## + +AT_SETUP([Undeclared string literal]) + +# Bison once allowed a string literal to be used in the grammar without any +# declaration assigning it as an alias of another token. + +AT_DATA([input.y], +[[%% +start: "abc"; +]]) + +AT_CHECK([bison -t -o input.c input.y], [1], [], +[[input.y:2.8-12: "abc" undeclared +]]) + +AT_CLEANUP diff --git a/tests/sets.at b/tests/sets.at index a731315b..941d0cc2 100644 --- a/tests/sets.at +++ b/tests/sets.at @@ -196,6 +196,7 @@ AT_DATA([input.y], [[%nonassoc '<' '>' %left '+' '-' %right '^' '=' +%token EXP "exp" %% exp: exp '<' exp -- 2.45.2