From: Akim Demaille Date: Tue, 21 Oct 2008 23:00:29 +0000 (-0500) Subject: Use variants to support objects as semantic values. X-Git-Tag: v2.7.90~1186 X-Git-Url: https://git.saurik.com/bison.git/commitdiff_plain/5ab8c47bcf5088cf420db6e03cd44bfa68e92ca0?ds=sidebyside Use variants to support objects as semantic values. This patch was inspired by work by Michiel De Wilde. But he used Boost variants which (i) requires Boost on the user side, (ii) is slow, and (iii) has useless overhead (the parser knows the type of the semantic value there is no reason to duplicate this information as Boost.Variants do). This implementation reserves a buffer large enough to store the largest objects. yy::variant implements this buffer. It was implemented with Quentin Hocquet. * src/output.c (type_names_output): New. (output_skeleton): Invoke it. * data/c++.m4 (b4_variant_if): New. (b4_symbol_value): If needed, provide a definition for variants. * data/lalr1.cc (b4_symbol_value, b4_symbol_action_) (b4_symbol_variant, _b4_char_sizeof_counter, _b4_char_sizeof_dummy) (b4_char_sizeof, yy::variant): New. (parser::parse): If variants are requested, define parser::union_type, parser::variant, change the definition of semantic_type, construct $$ before running the user action instead of performing a default $$ = $1. * examples/variant.yy: New. Based on an example by Michiel De Wilde. --- diff --git a/ChangeLog b/ChangeLog index 83ae569c..a7e2b729 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,29 @@ +2008-11-03 Akim Demaille + + Use variants to support objects as semantic values. + This patch was inspired by work by Michiel De Wilde. But he used Boost + variants which (i) requires Boost on the user side, (ii) is slow, and + (iii) has useless overhead (the parser knows the type of the semantic value + there is no reason to duplicate this information as Boost.Variants do). + + This implementation reserves a buffer large enough to store the largest + objects. yy::variant implements this buffer. It was implemented with + Quentin Hocquet. + + * src/output.c (type_names_output): New. + (output_skeleton): Invoke it. + * data/c++.m4 (b4_variant_if): New. + (b4_symbol_value): If needed, provide a definition for variants. + * data/lalr1.cc (b4_symbol_value, b4_symbol_action_) + (b4_symbol_variant, _b4_char_sizeof_counter, _b4_char_sizeof_dummy) + (b4_char_sizeof, yy::variant): New. + (parser::parse): If variants are requested, define + parser::union_type, parser::variant, change the definition of + semantic_type, construct $$ before running the user action instead + of performing a default $$ = $1. + * examples/variant.yy: New. + Based on an example by Michiel De Wilde. + 2008-11-03 Akim Demaille Parameterize the extraction of semantic values. diff --git a/THANKS b/THANKS index 6785e2a8..91459166 100644 --- a/THANKS +++ b/THANKS @@ -55,6 +55,7 @@ Martin Nylin martin.nylin@linuxmail.org Matt Kraai kraai@alumni.cmu.edu Matt Rosing rosing@peakfive.com Michael Hayes m.hayes@elec.canterbury.ac.nz +Michiel De Wilde mdewilde.agilent@gmail.com Mickael Labau labau_m@epita.fr Mike Castle dalgoda@ix.netcom.com Neil Booth NeilB@earthling.net @@ -71,6 +72,7 @@ Per Allansson per@appgate.com Peter Fales psfales@lucent.com Peter Hamorsky hamo@upjs.sk Piotr Gackiewicz gacek@intertel.com.pl +Quentin Hocquet hocquet@gostai.com Quoc Peyrot chojin@lrde.epita.fr R Blake blakers@mac.com Raja R Harinath harinath@cs.umn.edu diff --git a/data/c++.m4 b/data/c++.m4 index a896b778..ba17dff1 100644 --- a/data/c++.m4 +++ b/data/c++.m4 @@ -97,6 +97,11 @@ m4_map_sep([ b4_token_enum], [, ## Semantic Values. ## ## ----------------- ## +# b4_variant_if([IF-VARIANT-ARE-USED], [IF-NOT]) +# ---------------------------------------------- +m4_define([b4_variant_if], +[b4_percent_define_ifdef([[variant]], [$1], [$2])]) + # b4_lhs_value([TYPE]) # -------------------- diff --git a/data/lalr1.cc b/data/lalr1.cc index 07a301ff..de2c1def 100644 --- a/data/lalr1.cc +++ b/data/lalr1.cc @@ -18,6 +18,75 @@ m4_include(b4_pkgdatadir/[c++.m4]) +# How the semantic value is extracted when using variants. +b4_variant_if([ + # b4_symbol_value(VAL, [TYPE]) + # ---------------------------- + m4_define([b4_symbol_value], + [m4_ifval([$2], + [$1.as<$2>()], + [$1])]) +]) # b4_variant_if + + +# b4_symbol_action_(SYMBOL-TAG, SYMBOL-NUM, SYMBOL-TYPENAME) +# ---------------------------------------------------------- +# Invoke b4_dollar_dollar(SYMBOL_TYPENAME) for each symbol. +m4_define([b4_symbol_action_], +[m4_ifval($3, +[ case $2: // $1 + b4_dollar_dollar($@); + break; +])]) + + +# b4_symbol_variant(YYTYPE, YYVAL, ACTION) +# ---------------------------------------- +# Run some ACTION ("build", or "destroy") on YYVAL of symbol type +# YYTYPE. +m4_define([b4_symbol_variant], +[m4_pushdef([b4_dollar_dollar], + [$2.$3<$][3>()])dnl + switch ($1) + { +m4_map([b4_symbol_action_], m4_defn([b4_type_names])) + default: + break; + } +m4_popdef([b4_dollar_dollar])dnl +]) + + +# _b4_char_sizeof_counter +# ----------------------- +# A counter used by _b4_char_sizeof_dummy to create fresh symbols. +m4_define([_b4_char_sizeof_counter], +[0]) + +# _b4_char_sizeof_dummy +# --------------------- +# At each call return a new C++ identifier. +m4_define([_b4_char_sizeof_dummy], +[m4_define([_b4_char_sizeof_counter], m4_incr(_b4_char_sizeof_counter))dnl +dummy[]_b4_char_sizeof_counter]) + + +# b4_char_sizeof(SYMBOL-TAG, SYMBOL-NUM, SYMBOL-TYPENAME) +# ------------------------------------------------------- +# To be mapped on the list of type names to produce: +# +# char dummy1[sizeof(type_name_1)]; +# char dummy2[sizeof(type_name_2)]; +# +# for defined type names. +# $3 is doubly-quoted, do not quote it again. +m4_define([b4_char_sizeof], +[m4_ifval($3, +[ + char _b4_char_sizeof_dummy@{sizeof($3)@}; // $1])dnl +]) + + m4_define([b4_parser_class_name], [b4_percent_define_get([[parser_class_name]])]) @@ -52,6 +121,51 @@ dnl FIXME: This is wrong, we want computed header guards. ]b4_namespace_open[ class position; class location; +]b4_variant_if([[ + /// A char[S] buffer to store and retrieve objects. + /// + /// Sort of a variant, but does not keep track of the nature + /// of the stored data, since that knowledge is available + /// via the current state. + template + struct variant + { + /// Instantiate a \a T in here. + template + inline void + build() + { + new (buffer) T; + } + + /// Destroy the stored \a T. + template + inline void + destroy() + { + reinterpret_cast(buffer).~T(); + } + + /// Accessor to a built \a T. + template + inline T& + as() + { + return reinterpret_cast(buffer); + } + + /// Const accessor to a built \a T (for %printer). + template + inline const T& + as() const + { + return reinterpret_cast(buffer); + } + + /// A buffer large enough to store any of the semantic values. + char buffer[S]; + }; +]])[ ]b4_namespace_close[ #include "location.hh" @@ -99,16 +213,23 @@ do { \ class ]b4_parser_class_name[ { public: - /// Symbol semantic values. #ifndef YYSTYPE -]m4_ifdef([b4_stype], +]b4_variant_if( +[ /// An auxiliary type to compute the largest semantic type. + union union_type + {]m4_map([b4_char_sizeof], m4_defn([b4_type_names]))[ + }; + + /// Symbol semantic values. + typedef variant semantic_type;], +[ /// Symbol semantic values. +m4_ifdef([b4_stype], [ union semantic_type - { -b4_user_stype + {b4_user_stype };], [m4_if(b4_tag_seen_flag, 0, [[ typedef int semantic_type;]], -[[ typedef YYSTYPE semantic_type;]])])[ +[[ typedef YYSTYPE semantic_type;]])])])[ #else typedef YYSTYPE semantic_type; #endif @@ -631,7 +752,6 @@ m4_ifdef([b4_lex_param], [, ]b4_lex_param))[; /* Discard the token being shifted. */ yychar = yyempty_; - yysemantic_stack_.push (yylval); yylocation_stack_.push (yylloc); @@ -656,7 +776,11 @@ m4_ifdef([b4_lex_param], [, ]b4_lex_param))[; | yyreduce -- Do a reduction. | `-----------------------------*/ yyreduce: - yylen = yyr2_[yyn]; + yylen = yyr2_[yyn];]b4_variant_if([ + /* Variants are always initialized to an empty instance of the + correct type. The default $$=$1 rule is NOT applied when using + variants */ + ]b4_symbol_variant([[yyr1_@{yyn@}]], [yyval], [build])[],[ /* If YYLEN is nonzero, implement the default value of the action: `$$ = $1'. Otherwise, use the top of the stack. @@ -666,7 +790,7 @@ m4_ifdef([b4_lex_param], [, ]b4_lex_param))[; if (yylen) yyval = yysemantic_stack_@{yylen - 1@}; else - yyval = yysemantic_stack_@{0@}; + yyval = yysemantic_stack_@{0@};])[ { slice slice (yylocation_stack_, yylen); @@ -684,7 +808,6 @@ m4_ifdef([b4_lex_param], [, ]b4_lex_param))[; yypop_ (yylen); yylen = 0; YY_STACK_PRINT (); - yysemantic_stack_.push (yyval); yylocation_stack_.push (yyloc); diff --git a/examples/variant.yy b/examples/variant.yy new file mode 100644 index 00000000..fafb6556 --- /dev/null +++ b/examples/variant.yy @@ -0,0 +1,108 @@ +/* Test file for C++ parsers using variants. + Based on an example by Michiel De Wilde . */ +%language "C++" +%debug +%defines +%define variant + +%code requires // *.hh +{ +#include +} + +%code // *.cc +{ +#include +#include +#include + +static yy::parser::token_type yylex(yy::parser::semantic_type* yylval); +} + +%token TEXT +%token NUMBER +%printer { debug_stream() << $$; } +%token END_OF_FILE 0 + +%type text result + +%% + +result: + text { std::cout << $1 << std::endl; } +; + +text: + /* nothing */ { /* This will generate an empty string */ } +| text TEXT { std::swap($$,$1); $$.append($2); } +| text NUMBER { + std::swap($$,$1); + std::ostringstream o; + o << ' ' << $2; + $$.append(o.str()); + } +; +%% + +// The yylex function providing subsequent tokens: +// TEXT "I have three numbers for you:" +// NUMBER 1 +// NUMBER 2 +// NUMBER 3 +// TEXT " and that's all!" +// END_OF_FILE + +static +yy::parser::token_type +yylex(yy::parser::semantic_type* yylval) +{ + static int stage = 0; + yy::parser::token_type result; + + switch (stage) + { + case 0: + yylval->build(); + yylval->as() = std::string("I have three numbers for you:"); + result = yy::parser::token::TEXT; + break; + case 1: + case 2: + case 3: + yylval->build(); + yylval->as() = stage; + result = yy::parser::token::NUMBER; + break; + case 4: + yylval->build(); + yylval->as() = std::string(" and that's all!"); + result = yy::parser::token::TEXT; + break; + default: + result = yy::parser::token::END_OF_FILE; + break; + } + + stage++; + return result; +} + +// Mandatory error function +void +yy::parser::error(const yy::parser::location_type& yylloc, + const std::string& message) +{ + std::cerr << yylloc << ": " << message << std::endl; +} + +int +main(int argc, char *argv[]) +{ + yy::parser p; + p.set_debug_level(!!getenv("YYDEBUG")); + p.parse(); +} + +// Local Variables: +// mode: C++ +// End: diff --git a/src/output.c b/src/output.c index 585b90e0..8457ba19 100644 --- a/src/output.c +++ b/src/output.c @@ -283,6 +283,31 @@ prepare_states (void) +/*-----------------------------------------------. +| For each symbol type, its tags and type name. | +`-----------------------------------------------*/ + +static void +type_names_output (FILE *out) +{ + int i; + char const *sep = ""; + + fputs ("m4_define([b4_type_names],\n[", out); + for (i = 0; i < nsyms; ++i) + { + symbol *sym = symbols[i]; + /* Symbol-name, Symbol-number, optional typename. */ + fprintf (out, "%s[", i ? ",\n" : ""); + escaped_output (out, sym->tag); + fprintf (out, ", %d, [[%s]]]", + sym->number, + sym->type_name ? sym->type_name : ""); + } + fputs ("])\n\n", out); +} + + /*---------------------------------. | Output the user actions to OUT. | `---------------------------------*/ @@ -461,7 +486,6 @@ prepare_actions (void) muscle_insert_unsigned_int_table ("conflicting_rules", conflict_list, 0, 1, conflict_list_cnt); } - /*---------------------------. | Call the skeleton parser. | @@ -567,6 +591,7 @@ output_skeleton (void) /* Output the definitions of all the muscles. */ fputs ("m4_init()\n", out); + type_names_output (out); user_actions_output (out); merger_output (out); token_definitions_output (out);