From: Paul Eggert Date: Sun, 9 Jul 2006 20:36:33 +0000 (+0000) Subject: * NEWS: Instead of %union, you can define and use your own union type X-Git-Tag: v2.3b~332 X-Git-Url: https://git.saurik.com/bison.git/commitdiff_plain/ddc8ede1abfd807faf5ddb0ee6c56cead0db84e3?ds=sidebyside * NEWS: Instead of %union, you can define and use your own union type YYSTYPE if your grammar contains at least one tag. Your YYSTYPE need not be a macro; it can be a typedef. * doc/bison.texinfo (Value Type, Multiple Types, Location Type): (Union Decl, Decl Summary): Document this. * data/glr.c (YYSTYPE): Implement this. * data/glr.cc (YYSTYPE): Likewise. * data/lalr1.cc (YYSTYPE): Likewise. * data/yacc.c (YYSTYPE): Likewise. * src/output.c (prepare): Output tag_seen_flag. * src/parse-gram.y (declaration, grammar_declaration): Use 'union_seen' rather than 'typed' to determine whether %union has been seen, since grammars can now be typed without %union. (symbol_declaration, type.opt, symbol_def): Keep track of whether a tag has been seen. * src/reader.c (union_seen, tag_seen): New vars. (typed): remove. * src/reader.h (union_seen, tag_seen, typed): Likewise. * src/scan-code.l (untyped_var_seen): New variable. (handle_action_dollar): Adjust to above changes. (handle_action_dollar, handle_action_at): Improve overflow checking for outlandish numbers. * tests/input.at (AT_CHECK_UNUSED_VALUES): Redo test to avoid new diagnostics generated by above changes. * tests/regression.at (YYSTYPE typedef): Add test to check for type tags without %union. --- diff --git a/ChangeLog b/ChangeLog index 8a95c859..8aecae0b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,46 @@ +2006-07-09 Paul Eggert + + * NEWS: Instead of %union, you can define and use your own union type + YYSTYPE if your grammar contains at least one tag. + Your YYSTYPE need not be a macro; it can be a typedef. + * doc/bison.texinfo (Value Type, Multiple Types, Location Type): + (Union Decl, Decl Summary): Document this. + * data/glr.c (YYSTYPE): Implement this. + * data/glr.cc (YYSTYPE): Likewise. + * data/lalr1.cc (YYSTYPE): Likewise. + * data/yacc.c (YYSTYPE): Likewise. + * src/output.c (prepare): Output tag_seen_flag. + * src/parse-gram.y (declaration, grammar_declaration): + Use 'union_seen' rather than 'typed' to determine whether + %union has been seen, since grammars can now be typed without + %union. + (symbol_declaration, type.opt, symbol_def): + Keep track of whether a tag has been seen. + * src/reader.c (union_seen, tag_seen): New vars. + (typed): remove. + * src/reader.h (union_seen, tag_seen, typed): Likewise. + * src/scan-code.l (untyped_var_seen): New variable. + (handle_action_dollar): Adjust to above changes. + (handle_action_dollar, handle_action_at): + Improve overflow checking for outlandish numbers. + * tests/input.at (AT_CHECK_UNUSED_VALUES): Redo test to + avoid new diagnostics generated by above changes. + * tests/regression.at (YYSTYPE typedef): Add test to check + for type tags without %union. + + * src/symlist.c (symbol_list_length): Return int, not unsigned + int, since callers expect int. This may need to get revisited + once we have proper integer overflow checking. + + * src/scan-gram.h (gram_scanner_cursor): Remove decl, since this + object is now static. + + * src/getargs.c (flags_argmatch): Return void, not int, + to pacify ./configure --enable-gcc-warnings. + + * src/flex-scanner.h (STRING_FREE): Don't use FLEX_PREFIX (last_string) + since last_string is already defined to FLEX_PREFIX (last_string). + 2006-07-09 Akim Demaille Implement --warnings/-W. diff --git a/NEWS b/NEWS index 0b6b2341..d133f193 100644 --- a/NEWS +++ b/NEWS @@ -3,6 +3,12 @@ Bison News Changes in version 2.3+: +* Instead of %union, you can define and use your own union type + YYSTYPE if your grammar contains at least one tag. + Your YYSTYPE need not be a macro; it can be a typedef. + This change is for compatibility with other Yacc implementations, + and is required by POSIX. + * Locations columns and lines start at 1. In accordance with the GNU Coding Standards and Emacs. @@ -19,7 +25,7 @@ Changes in version 2.3+: `%{ ... %}' syntax. To generate the pre-prologue, Bison concatenates all prologue blocks that you've declared before the first %union. To generate the post-prologue, Bison concatenates all prologue blocks that you've - declared after the first %union. + declared after the first %union. Previous releases of Bison inserted the pre-prologue into both the header file and the code file in all cases except for LALR(1) parsers in C. In the diff --git a/data/glr.c b/data/glr.c index 8da20487..836df503 100644 --- a/data/glr.c +++ b/data/glr.c @@ -175,14 +175,15 @@ m4_define([b4_shared_declarations], b4_token_enums(b4_tokens) -[#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED +[#ifndef YYSTYPE ]m4_ifdef([b4_stype], -[typedef union b4_union_name -b4_user_stype - YYSTYPE;], -[typedef int YYSTYPE;])[ -# define YYSTYPE_IS_DECLARED 1 -# define YYSTYPE_IS_TRIVIAL 1 +[[typedef union ]b4_union_name[ +]b4_user_stype[ + YYSTYPE; +# define YYSTYPE_IS_TRIVIAL 1]], +[m4_if(b4_tag_seen_flag, 0, +[[typedef int YYSTYPE; +# define YYSTYPE_IS_TRIVIAL 1]])])[ #endif #if ! defined YYLTYPE && ! defined YYLTYPE_IS_DECLARED diff --git a/data/glr.cc b/data/glr.cc index e742c03d..59b9e3dd 100644 --- a/data/glr.cc +++ b/data/glr.cc @@ -288,7 +288,9 @@ namespace ]b4_namespace[ [ union semantic_type b4_user_stype ;], -[ typedef int semantic_type;])[ +[m4_if(b4_tag_seen_flag, 0, +[[ typedef int semantic_type;]], +[[ typedef YYSTYPE semantic_type;]])])[ #else typedef YYSTYPE semantic_type; #endif diff --git a/data/lalr1.cc b/data/lalr1.cc index 87b64fa1..000cd0d1 100644 --- a/data/lalr1.cc +++ b/data/lalr1.cc @@ -110,7 +110,9 @@ namespace ]b4_namespace[ [ union semantic_type b4_user_stype ;], -[ typedef int semantic_type;])[ +[m4_if(b4_tag_seen_flag, 0, +[[ typedef int semantic_type;]], +[[ typedef YYSTYPE semantic_type;]])])[ #else typedef YYSTYPE semantic_type; #endif diff --git a/data/yacc.c b/data/yacc.c index e297c643..cdc69203 100644 --- a/data/yacc.c +++ b/data/yacc.c @@ -189,13 +189,15 @@ b4_token_enums_defines(b4_tokens)[ #if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED ]m4_ifdef([b4_stype], -[[typedef union ]b4_union_name -b4_user_stype - YYSTYPE;], -[typedef int YYSTYPE;])[ +[[typedef union ]b4_union_name[ +]b4_user_stype[ + YYSTYPE; +# define YYSTYPE_IS_TRIVIAL 1]], +[m4_if(b4_tag_seen_flag, 0, +[[typedef int YYSTYPE; +# define YYSTYPE_IS_TRIVIAL 1]])])[ # define yystype YYSTYPE /* obsolescent; will be withdrawn */ # define YYSTYPE_IS_DECLARED 1 -# define YYSTYPE_IS_TRIVIAL 1 #endif ]b4_locations_if([#if ! defined YYLTYPE && ! defined YYLTYPE_IS_DECLARED @@ -1499,13 +1501,15 @@ b4_token_enums_defines(b4_tokens) [#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED ]m4_ifdef([b4_stype], -[[typedef union ]b4_union_name -b4_user_stype - YYSTYPE;], -[typedef int YYSTYPE;])[ +[[typedef union ]b4_union_name[ +]b4_user_stype[ + YYSTYPE; +# define YYSTYPE_IS_TRIVIAL 1]], +[m4_if(b4_tag_seen_flag, 0, +[[typedef int YYSTYPE; +# define YYSTYPE_IS_TRIVIAL 1]])])[ # define yystype YYSTYPE /* obsolescent; will be withdrawn */ # define YYSTYPE_IS_DECLARED 1 -# define YYSTYPE_IS_TRIVIAL 1 #endif ]b4_pure_if([], diff --git a/doc/bison.texinfo b/doc/bison.texinfo index 0e181b86..73afb8c3 100644 --- a/doc/bison.texinfo +++ b/doc/bison.texinfo @@ -3092,7 +3092,8 @@ the semantic values of all language constructs. This was true in the @acronym{RPN} and infix calculator examples (@pxref{RPN Calc, ,Reverse Polish Notation Calculator}). -Bison's default is to use type @code{int} for all semantic values. To +Bison normally uses the type @code{int} for semantic values if your +program uses the same data type for all language constructs. To specify some other type, define @code{YYSTYPE} as a macro, like this: @example @@ -3119,9 +3120,11 @@ requires you to do two things: @itemize @bullet @item -Specify the entire collection of possible data types, with the +Specify the entire collection of possible data types, either by using the @code{%union} Bison declaration (@pxref{Union Decl, ,The Collection of -Value Types}). +Value Types}), or by using a @code{typedef} or a @code{#define} to +define @code{YYSTYPE} to be a union type whose member names are +the type tags. @item Choose one of those types for each symbol (terminal or nonterminal) for @@ -3500,7 +3503,7 @@ since all tokens and groupings always use the same type. You can specify the type of locations by defining a macro called @code{YYLTYPE}, just as you can specify the semantic value type by -defining @code{YYSTYPE} (@pxref{Value Type}). +defining a @code{YYSTYPE} macro (@pxref{Value Type}). When @code{YYLTYPE} is not defined, Bison uses a default structure type with four members: @@ -3895,6 +3898,35 @@ only the first @code{%union} declaration can specify a tag. Note that, unlike making a @code{union} declaration in C, you need not write a semicolon after the closing brace. +Instead of @code{%union}, you can define and use your own union type +@code{YYSTYPE} if your grammar contains at least one +@samp{<@var{type}>} tag. For example, you can put the following into +a header file @file{parser.h}: + +@example +@group +union YYSTYPE @{ + double val; + symrec *tptr; +@}; +typedef union YYSTYPE YYSTYPE; +@end group +@end example + +@noindent +and then your grammar can use the following +instead of @code{%union}: + +@example +@group +%@{ +#include "parser.h" +%@} +%type expr +%token ID +@end group +@end example + @node Type Decl @subsection Nonterminal Symbols @cindex declaring value types, nonterminals @@ -4212,10 +4244,13 @@ names defined in the grammar as well as a few other declarations. If the parser output file is named @file{@var{name}.c} then this file is named @file{@var{name}.h}. -Unless @code{YYSTYPE} is already defined as a macro, the output header -declares @code{YYSTYPE}. Therefore, if you are using a @code{%union} +For C parsers, the output header declares @code{YYSTYPE} unless unless +@code{YYSTYPE} is already defined as a macro or you have used a +@code{<@var{type}>} tag without using @code{%union}. +Therefore, if you are using a @code{%union} (@pxref{Multiple Types, ,More Than One Value Type}) with components that require other definitions, or if you have defined a @code{YYSTYPE} macro +or type definition (@pxref{Value Type, ,Data Types of Semantic Values}), you need to arrange for these definitions to be propagated to all modules, e.g., by putting them in a prerequisite header that is included both by your @@ -4227,7 +4262,7 @@ Parser}. If you have also used locations, the output header declares @code{YYLTYPE} and @code{yylloc} using a protocol similar to that of -@code{YYSTYPE} and @code{yylval}. @xref{Locations, ,Tracking +the @code{YYSTYPE} macro and @code{yylval}. @xref{Locations, ,Tracking Locations}. This output file is normally essential if you wish to put the definition diff --git a/src/output.c b/src/output.c index 0d648770..534d82af 100644 --- a/src/output.c +++ b/src/output.c @@ -590,6 +590,7 @@ prepare (void) MUSCLE_INSERT_BOOL ("locations_flag", locations_flag); MUSCLE_INSERT_BOOL ("pure_flag", pure_parser); MUSCLE_INSERT_BOOL ("synclines_flag", !no_lines_flag); + MUSCLE_INSERT_BOOL ("tag_seen_flag", tag_seen); MUSCLE_INSERT_BOOL ("yacc_flag", yacc_flag); /* File names. */ diff --git a/src/parse-gram.y b/src/parse-gram.y index 16ae62b8..11fa187f 100644 --- a/src/parse-gram.y +++ b/src/parse-gram.y @@ -57,7 +57,7 @@ static char const *char_name (char); static void add_param (char const *, char *, location); static symbol_class current_class = unknown_sym; -static uniqstr current_type = 0; +static uniqstr current_type = NULL; static symbol *current_lhs; static location current_lhs_location; static int current_prec = 0; @@ -209,7 +209,7 @@ declaration: grammar_declaration | PROLOGUE { - prologue_augment (translate_code ($1, @1), @1, typed); + prologue_augment (translate_code ($1, @1), @1, union_seen); } | "%after-header" "{...}" { @@ -322,7 +322,7 @@ grammar_declaration: { char const *body = $3; - if (typed) + if (union_seen) { /* Concatenate the union bodies, turning the first one's trailing '}' into '\n', and omitting the second one's '{'. */ @@ -331,7 +331,7 @@ grammar_declaration: body++; } - typed = true; + union_seen = true; muscle_code_grow ("stype", body, @3); } ; @@ -352,6 +352,7 @@ symbol_declaration: } | "%type" TYPE symbols.1 { + tag_seen = true; symbol_list *list; for (list = $3; list; list = list->next) symbol_type_set (list->sym, $2, @2); @@ -382,7 +383,7 @@ precedence_declarator: type.opt: /* Nothing. */ { current_type = NULL; } -| TYPE { current_type = $1; } +| TYPE { current_type = $1; tag_seen = true; } ; /* One or more nonterminals to be %typed. */ @@ -396,6 +397,7 @@ symbol_def: TYPE { current_type = $1; + tag_seen = true; } | id { diff --git a/src/reader.c b/src/reader.c index aca5fb62..800ecc22 100644 --- a/src/reader.c +++ b/src/reader.c @@ -45,7 +45,10 @@ static bool start_flag = false; merger_list *merge_functions; /* Was %union seen? */ -bool typed = false; +bool union_seen = false; + +/* Was a tag seen? */ +bool tag_seen = false; /* Should rules have a default precedence? */ bool default_prec = true; diff --git a/src/reader.h b/src/reader.h index 52c059c4..43985965 100644 --- a/src/reader.h +++ b/src/reader.h @@ -59,7 +59,10 @@ void free_merger_functions (void); extern merger_list *merge_functions; /* Was %union seen? */ -extern bool typed; +extern bool union_seen; + +/* Was a tag seen? */ +extern bool tag_seen; /* Should rules have a default precedence? */ extern bool default_prec; diff --git a/src/scan-code.l b/src/scan-code.l index 703f2a6f..ec01f354 100644 --- a/src/scan-code.l +++ b/src/scan-code.l @@ -55,6 +55,9 @@ static void handle_action_dollar (symbol_list *rule, char *cp, static void handle_action_at (symbol_list *rule, char *cp, location at_loc); static location the_location; static location *loc = &the_location; + +/* True if an untyped $$ or $n was seen. */ +static bool untyped_var_seen; %} /* C and C++ comments in code. */ %x SC_COMMENT SC_LINE_COMMENT @@ -262,48 +265,62 @@ handle_action_dollar (symbol_list *rule, char *text, location dollar_loc) ++cp; *cp = '\0'; ++cp; + if (untyped_var_seen) + complain_at (dollar_loc, _("explicit type given in untyped grammar")); + tag_seen = true; } if (*cp == '$') { if (!type_name) type_name = symbol_list_n_type_name_get (rule, dollar_loc, 0); - if (!type_name && typed) + + if (!type_name) { - if (rule->midrule_parent_rule) - complain_at (dollar_loc, - _("$$ for the midrule at $%d of `%s' has no declared" - " type"), - rule->midrule_parent_rhs_index, - effective_rule->sym->tag); + if (union_seen | tag_seen) + { + if (rule->midrule_parent_rule) + complain_at (dollar_loc, + _("$$ for the midrule at $%d of `%s'" + " has no declared type"), + rule->midrule_parent_rhs_index, + effective_rule->sym->tag); + else + complain_at (dollar_loc, _("$$ of `%s' has no declared type"), + rule->sym->tag); + } else - complain_at (dollar_loc, _("$$ of `%s' has no declared type"), - rule->sym->tag); + untyped_var_seen = true; + type_name = ""; } - if (!type_name) - type_name = ""; + obstack_fgrow1 (&obstack_for_string, "]b4_lhs_value([%s])[", type_name); rule->used = true; } else { - long int num; - set_errno (0); - num = strtol (cp, 0, 10); - if (INT_MIN <= num && num <= effective_rule_length && ! get_errno ()) + long int num = strtol (cp, NULL, 10); + + if (1 - INT_MAX + effective_rule_length <= num + && num <= effective_rule_length) { int n = num; - if (1-n > max_left_semantic_context) - max_left_semantic_context = 1-n; - if (!type_name && n > 0) + if (max_left_semantic_context < 1 - n) + max_left_semantic_context = 1 - n; + if (!type_name && 0 < n) type_name = symbol_list_n_type_name_get (effective_rule, dollar_loc, n); - if (!type_name && typed) - complain_at (dollar_loc, _("$%d of `%s' has no declared type"), - n, effective_rule->sym->tag); if (!type_name) - type_name = ""; + { + if (union_seen | tag_seen) + complain_at (dollar_loc, _("$%d of `%s' has no declared type"), + n, effective_rule->sym->tag); + else + untyped_var_seen = true; + type_name = ""; + } + obstack_fgrow3 (&obstack_for_string, "]b4_rhs_value(%d, %d, [%s])[", effective_rule_length, n, type_name); @@ -336,11 +353,10 @@ handle_action_at (symbol_list *rule, char *text, location at_loc) obstack_sgrow (&obstack_for_string, "]b4_lhs_location["); else { - long int num; - set_errno (0); - num = strtol (cp, 0, 10); + long int num = strtol (cp, NULL, 10); - if (INT_MIN <= num && num <= effective_rule_length && ! get_errno ()) + if (1 - INT_MAX + effective_rule_length <= num + && num <= effective_rule_length) { int n = num; obstack_fgrow2 (&obstack_for_string, "]b4_rhs_location(%d, %d)[", @@ -356,9 +372,9 @@ handle_action_at (symbol_list *rule, char *text, location at_loc) | Initialize the scanner. | `-------------------------*/ -/* Translate the dollars and ats in \a a, whose location is l. - Depending on the \a sc_context (SC_RULE_ACTION, SC_SYMBOL_ACTION, - INITIAL), the processing is different. */ +/* Translate the dollars and ats in \a a, whose location is \a l. The + translation is for \a rule, in the context \a sc_context + (SC_RULE_ACTION, SC_SYMBOL_ACTION, INITIAL). */ static const char * translate_action (int sc_context, symbol_list *rule, const char *a, location l) diff --git a/tests/input.at b/tests/input.at index df1ab704..223428f1 100644 --- a/tests/input.at +++ b/tests/input.at @@ -107,12 +107,12 @@ c: INT | INT { $]1[ } INT { } INT { }; d: INT | INT { } INT { $]1[ } INT { }; e: INT | INT { } INT { } INT { $]1[ }; f: INT | INT { } INT { } INT { $]$[ = $]1[ + $]3[ + $]5[; }; -g: INT | INT { $]$[ } INT { $]$[ } INT { }; -h: INT | INT { $]$[ } INT { $]$[ = $]2[ } INT { }; +g: INT | INT { $$; } INT { $$; } INT { }; +h: INT | INT { $$; } INT { $$ = $2; } INT { }; i: INT | INT INT { } { $]$[ = $]1[ + $]2[; }; j: INT | INT INT { $$ = 1; } { $]$[ = $]1[ + $]2[; }; -k: INT | INT INT { $]$[; } { $]$[ = $]3[; } { }; -l: INT | INT { $]$[ = $]1[; } INT { $]$[ = $]2[ + $]3[; } INT { $]$[ = $]4[ + $]5[; };]]m4_ifval($1, [ +k: INT | INT INT { $$; } { $$ = $3; } { }; +l: INT | INT { $$ = $1; } INT { $$ = $2 + $3; } INT { $$ = $4 + $5; };]]m4_ifval($1, [ _AT_UNUSED_VALUES_DECLARATIONS]) ) @@ -131,22 +131,22 @@ input.y:14.10-35: warning: unused value: $]5[ input.y:15.10-36: warning: unset value: $]$[ input.y:15.10-36: warning: unused value: $]3[ input.y:15.10-36: warning: unused value: $]5[ -input.y:17.10-38: warning: unset value: $]$[ -input.y:17.10-38: warning: unused value: $]1[ -input.y:17.10-38: warning: unused value: $]2[ -input.y:17.10-38: warning: unused value: $]3[ -input.y:17.10-38: warning: unused value: $]4[ -input.y:17.10-38: warning: unused value: $]5[ -input.y:18.10-43: warning: unset value: $]$[ -input.y:18.10-43: warning: unused value: $]1[ -input.y:18.10-43: warning: unused value: $]3[ -input.y:18.10-43: warning: unused value: $]4[ -input.y:18.10-43: warning: unused value: $]5[ +input.y:17.10-58: warning: unset value: $]$[ +input.y:17.10-58: warning: unused value: $]1[ +input.y:17.10-58: warning: unused value: $]2[ +input.y:17.10-58: warning: unused value: $]3[ +input.y:17.10-58: warning: unused value: $]4[ +input.y:17.10-58: warning: unused value: $]5[ +input.y:18.10-72: warning: unset value: $]$[ +input.y:18.10-72: warning: unused value: $]1[ +input.y:18.10-72: warning: unused value: $]3[ +input.y:18.10-72: warning: unused value: $]4[ +input.y:18.10-72: warning: unused value: $]5[ input.y:20.10-55: warning: unused value: $]3[ -input.y:21.10-41: warning: unset value: $]$[ -input.y:21.10-41: warning: unused value: $]1[ -input.y:21.10-41: warning: unused value: $]2[ -input.y:21.10-41: warning: unused value: $]4[ +input.y:21.10-68: warning: unset value: $]$[ +input.y:21.10-68: warning: unused value: $]1[ +input.y:21.10-68: warning: unused value: $]2[ +input.y:21.10-68: warning: unused value: $]4[ ]])]) diff --git a/tests/regression.at b/tests/regression.at index 7e22a90c..70133797 100644 --- a/tests/regression.at +++ b/tests/regression.at @@ -49,6 +49,33 @@ AT_CLEANUP +## ----------------- ## +## YYSTYPE typedef. ## +## ----------------- ## + +AT_SETUP([YYSTYPE typedef]) + +AT_DATA_GRAMMAR([input.y], +[[%{ +void yyerror (char const *); +int yylex (void); +typedef union { char const *val; } YYSTYPE; +%} + +%type program + +%% + +program: { $$ = ""; }; +]]) + +AT_CHECK([bison -o input.c input.y]) +AT_COMPILE([input.o], [-c input.c]) + +AT_CLEANUP + + + ## ------------------------------------- ## ## Early token definitions with --yacc. ## ## ------------------------------------- ##