From 007a50a493714fdda37f9600e068cf705c8ec3f1 Mon Sep 17 00:00:00 2001 From: Akim Demaille Date: Tue, 9 Apr 2002 19:19:59 +0000 Subject: [PATCH] * src/gram.h, src/gram.c (error_token_number): Remove, use errtoken->number. * src/reader.c (reader): Don't specify the user token number (2) for $undefined, as it uselessly prevents using it. * src/gram.h (token_number_t): Move to... * src/symtab.h: here. (state_t.number): Is a token_number_t. * src/print.c, src/reader.c: Use undeftoken->number instead of hard coded 2. (Even though this 2 is not the same as above: the number of the undeftoken remains being 2, it is its user token number which might not be 2). * src/output.c (prepare_tokens): Rename the `maxtok' muscle with `user_token_number_max'. Output `undef_token_number'. * data/bison.simple, data/bison.c++: Use them. Be sure to map invalid yylex return values to `undef_token_number'. This saves us from gratuitous SEGV. * tests/conflicts.at (Solved SR Conflicts) (Unresolved SR Conflicts): Adjust. * tests/regression.at (Web2c Actions): Adjust. --- ChangeLog | 25 +++++++++++++++++++++++++ NEWS | 8 ++++++++ data/bison.c++ | 11 ++++++++--- data/bison.simple | 9 ++++++--- src/gram.c | 2 -- src/gram.h | 5 ----- src/output.c | 7 ++++--- src/print.c | 2 +- src/reader.c | 7 ++----- src/state.h | 2 +- src/symtab.h | 4 +++- tests/conflicts.at | 8 ++++---- tests/regression.at | 10 +++++----- 13 files changed, 67 insertions(+), 33 deletions(-) diff --git a/ChangeLog b/ChangeLog index ae96b84d..06220777 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,28 @@ +2002-04-09 Akim Demaille + + * src/gram.h, src/gram.c (error_token_number): Remove, use + errtoken->number. + * src/reader.c (reader): Don't specify the user token number (2) + for $undefined, as it uselessly prevents using it. + * src/gram.h (token_number_t): Move to... + * src/symtab.h: here. + (state_t.number): Is a token_number_t. + * src/print.c, src/reader.c: Use undeftoken->number instead of + hard coded 2. + (Even though this 2 is not the same as above: the number of the + undeftoken remains being 2, it is its user token number which + might not be 2). + * src/output.c (prepare_tokens): Rename the `maxtok' muscle with + `user_token_number_max'. + Output `undef_token_number'. + * data/bison.simple, data/bison.c++: Use them. + Be sure to map invalid yylex return values to + `undef_token_number'. This saves us from gratuitous SEGV. + + * tests/conflicts.at (Solved SR Conflicts) + (Unresolved SR Conflicts): Adjust. + * tests/regression.at (Web2c Actions): Adjust. + 2002-04-08 Akim Demaille * data/bison.c++: s/b4_item_number_max/b4_rhs_number_max/. diff --git a/NEWS b/NEWS index 7abc9511..f339c659 100644 --- a/NEWS +++ b/NEWS @@ -3,6 +3,14 @@ Bison News Changes in version 1.49a: +* Undefined token + The undefined token was systematically mapped to 2 which prevented + the use of 2 from the user. This is no longer the case. + +* Undefined token + If yylex returned a code out of range, yyparse could die. This is + no longer the case. + * Large grammars Are now supported (large token numbers, large grammar size (= sum of the LHS and RHS lengths). diff --git a/data/bison.c++ b/data/bison.c++ index d5f22ce8..bfb7fc7a 100644 --- a/data/bison.c++ +++ b/data/bison.c++ @@ -257,7 +257,8 @@ namespace yy static const int errcode_; static const int ntokens_; static const int initdepth_; - static const unsigned maxtok_; + static const unsigned user_token_number_max_; + static const TokenNumberType undef_token_; /* State. */ int n_; @@ -722,7 +723,10 @@ yy::b4_name::translate_ (int token) { b4_translate }; - return (unsigned)(token) <= maxtok_ ? translate_[[token]] : nsym_; + if ((unsigned) token <= user_token_number_max_) + return translate_[[token]]; + else + return undef_token_; } const int yy::b4_name::eof_ = 0; @@ -737,7 +741,8 @@ const int yy::b4_name::errcode_ = 256; const int yy::b4_name::ntokens_ = b4_ntokens; const int yy::b4_name::initdepth_ = b4_initdepth; -const unsigned yy::b4_name::maxtok_ = b4_maxtok; +const unsigned yy::b4_name::user_token_number_max_ = b4_user_token_number_max; +const yy::b4_name::TokenNumberType yy::b4_name::undef_token_ = b4_undef_token_number; b4_epilogue diff --git a/data/bison.simple b/data/bison.simple index 4de66f50..b5591b66 100644 --- a/data/bison.simple +++ b/data/bison.simple @@ -1,4 +1,4 @@ -m4_divert(-1) +m4_divert(-1) -*- C -*- # b4_sint_type(MAX) # ----------------- @@ -265,11 +265,14 @@ b4_token_defines(b4_tokens) #define YYNRULES b4_nrules /* YYNRULES -- Number of states. */ #define YYNSTATES b4_nstates -#define YYMAXUTOK b4_maxtok /* YYTRANSLATE(YYLEX) -- Bison symbol number corresponding to YYLEX. */ +#define YYUNDEFTOK b4_undef_token_number +#define YYMAXUTOK b4_user_token_number_max + typedef b4_uint_type(b4_token_number_max) yy_token_number_type; -#define YYTRANSLATE(x) ((unsigned)(x) <= b4_maxtok ? yytranslate[[x]] : b4_nsym) +#define YYTRANSLATE(X) \ + ((unsigned)(X) <= YYMAXUTOK ? yytranslate[[X]] : YYUNDEFTOK) /* YYTRANSLATE[[YYLEX]] -- Bison symbol number corresponding to YYLEX. */ static const yy_token_number_type yytranslate[[]] = diff --git a/src/gram.c b/src/gram.c index d13da817..9189a1cf 100644 --- a/src/gram.c +++ b/src/gram.c @@ -48,8 +48,6 @@ int semantic_parser = 0; int pure_parser = 0; -int error_token_number = 0; - /*--------------------------------------. | Return the number of symbols in RHS. | diff --git a/src/gram.h b/src/gram.h index 26682913..d7b9df4e 100644 --- a/src/gram.h +++ b/src/gram.h @@ -154,7 +154,6 @@ extern symbol_t **symbols; /* TOKEN_TRANSLATION -- a table indexed by a token number as returned by the user's yylex routine, it yields the internal token number used by the parser and throughout bison. */ -typedef short token_number_t; extern token_number_t *token_translations; extern int max_user_token_number; @@ -169,10 +168,6 @@ extern int semantic_parser; extern int pure_parser; -/* ERROR_TOKEN_NUMBER is the token number of the error token. */ - -extern int error_token_number; - /* Report the length of the RHS. */ int rule_rhs_length PARAMS ((rule_t *rule)); diff --git a/src/output.c b/src/output.c index edb75700..035d0ad8 100644 --- a/src/output.c +++ b/src/output.c @@ -417,7 +417,7 @@ action_row (state_t *state) /* Do not use any default reduction if there is a shift for error */ - if (symbol == error_token_number) + if (symbol == errtoken->number) nodefault = 1; } @@ -614,7 +614,7 @@ token_definitions_output (FILE *out) if (number == SALIAS) continue; /* Skip error token. */ - if (symbol->number == error_token_number) + if (symbol == errtoken) continue; if (symbol->tag[0] == '\'') continue; /* skip literal character */ @@ -1078,7 +1078,8 @@ prepare (void) MUSCLE_INSERT_INT ("nsym", nsyms); MUSCLE_INSERT_INT ("debug", debug_flag); MUSCLE_INSERT_INT ("final", final_state); - MUSCLE_INSERT_INT ("maxtok", max_user_token_number); + MUSCLE_INSERT_INT ("undef_token_number", undeftoken->number); + MUSCLE_INSERT_INT ("user_token_number_max", max_user_token_number); MUSCLE_INSERT_INT ("error_verbose", error_verbose); MUSCLE_INSERT_STRING ("prefix", spec_name_prefix ? spec_name_prefix : "yy"); diff --git a/src/print.c b/src/print.c index d1df2fd7..e8c4bd07 100644 --- a/src/print.c +++ b/src/print.c @@ -386,7 +386,7 @@ print_grammar (FILE *out) /* TERMINAL (type #) : rule #s terminal is on RHS */ fprintf (out, "%s\n\n", _("Terminals, with rules where they appear")); for (i = 0; i < max_user_token_number + 1; i++) - if (token_translations[i] != 2) + if (token_translations[i] != undeftoken->number) { buffer[0] = 0; column = strlen (escape (symbols[token_translations[i]]->tag)); diff --git a/src/reader.c b/src/reader.c index 97118c53..f76f5c84 100644 --- a/src/reader.c +++ b/src/reader.c @@ -237,7 +237,7 @@ symbol_translation (symbol_t *this) && this->user_token_number != SALIAS) { /* A token which translation has already been set? */ - if (token_translations[this->user_token_number] != 2) + if (token_translations[this->user_token_number] != undeftoken->number) complain (_("tokens %s and %s both assigned number %d"), symbols[token_translations[this->user_token_number]]->tag, this->tag, this->user_token_number); @@ -1697,7 +1697,7 @@ token_translations_init (void) token number for $undefined., which represents all invalid inputs. */ for (i = 0; i < max_user_token_number + 1; i++) - token_translations[i] = 2; + token_translations[i] = undeftoken->number; symbols_do (symbol_translation, NULL); } @@ -1718,8 +1718,6 @@ packsymbols (void) token_translations_init (); - error_token_number = errtoken->number; - if (startval->class == unknown_sym) fatal (_("the start symbol %s is undefined"), startval->tag); else if (startval->class == token_sym) @@ -1831,7 +1829,6 @@ reader (void) undeftoken = getsym ("$undefined."); undeftoken->class = token_sym; undeftoken->number = ntokens++; - undeftoken->user_token_number = 2; /* Initialize the obstacks. */ obstack_init (&action_obstack); diff --git a/src/state.h b/src/state.h index b4fa6b08..3dc74acf 100644 --- a/src/state.h +++ b/src/state.h @@ -122,7 +122,7 @@ shifts *shifts_new PARAMS ((int n)); /* Is the SHIFTS->shifts[Shift] then handling of the error token?. */ #define SHIFT_IS_ERROR(Shifts, Shift) \ - (SHIFT_SYMBOL (Shifts, Shift) == error_token_number) + (SHIFT_SYMBOL (Shifts, Shift) == errtoken->number) /* When resolving a SR conflicts, if the reduction wins, the shift is disabled. */ diff --git a/src/symtab.h b/src/symtab.h index caa04793..df566eb5 100644 --- a/src/symtab.h +++ b/src/symtab.h @@ -41,6 +41,8 @@ typedef enum nterm_sym /* non-terminal */ } symbol_class; +/* Internal token numbers. */ +typedef short token_number_t; #define SUNDEF -1 /* For undefined user number. */ #define SALIAS -9991 /* for symbol generated with an alias */ @@ -50,7 +52,7 @@ struct symbol_s char *tag; /* Its type. */ char *type_name; - short number; + token_number_t number; short prec; associativity assoc; int user_token_number; diff --git a/tests/conflicts.at b/tests/conflicts.at index 46add71b..713b49e0 100644 --- a/tests/conflicts.at +++ b/tests/conflicts.at @@ -151,8 +151,8 @@ Terminals, with rules where they appear $ (0) 0 error (256) -NUM (257) 2 -OP (258) 1 +NUM (258) 2 +OP (259) 1 Nonterminals, with rules where they appear @@ -257,8 +257,8 @@ Terminals, with rules where they appear $ (0) 0 error (256) -NUM (257) 2 -OP (258) 1 +NUM (258) 2 +OP (259) 1 Nonterminals, with rules where they appear diff --git a/tests/regression.at b/tests/regression.at index 51c87ca2..4487dee5 100644 --- a/tests/regression.at +++ b/tests/regression.at @@ -336,8 +336,8 @@ $ (0) 0 ';' (59) 5 '=' (61) 5 error (256) -undef_id_tok (257) 5 -const_id_tok (258) 5 +undef_id_tok (258) 5 +const_id_tok (259) 5 Nonterminals, with rules where they appear $axiom (7) on left: 0 @@ -460,8 +460,8 @@ AT_CHECK([[sed -n 's/ *$//;/^static const.*\[\] =/,/^}/p' input.c]], 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 1, 3, 4, 5, - 6 + 2, 2, 2, 2, 2, 2, 1, 2, 3, 4, + 5, 6 }; static const short yyprhs[] = { @@ -483,7 +483,7 @@ static const char *const yytname[] = }; static const short yytoknum[] = { - 0, 256, 2, 257, 258, 259, 260, -1 + 0, 256, 257, 258, 259, 260, 261, -1 }; static const short yyr1[] = { -- 2.45.2