From 092778750428c0d35378691bb4db94ef5b5ab988 Mon Sep 17 00:00:00 2001 From: Akim Demaille <demaille@gostai.com> Date: Tue, 19 Aug 2008 21:39:03 +0200 Subject: [PATCH] Make it possible to return a symbol_type from yylex. * data/lalr1.cc (b4_lex_symbol_if): New. (parse): When lex_symbol is defined, expected yylex to return the complete lookahead. * etc/bench.pl.in (generate_grammar_list): Extend to support this yylex interface. (bench_variant_parser): Exercise it. --- ChangeLog | 10 ++++++ data/lalr1.cc | 15 ++++++--- etc/bench.pl.in | 83 +++++++++++++++++++++++++++++++------------------ 3 files changed, 73 insertions(+), 35 deletions(-) diff --git a/ChangeLog b/ChangeLog index 5e9114d0..fe8e5504 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,13 @@ +2008-11-11 Akim Demaille <demaille@gostai.com> + + Make it possible to return a symbol_type from yylex. + * data/lalr1.cc (b4_lex_symbol_if): New. + (parse): When lex_symbol is defined, expected yylex to return the + complete lookahead. + * etc/bench.pl.in (generate_grammar_list): Extend to support this + yylex interface. + (bench_variant_parser): Exercise it. + 2008-11-11 Akim Demaille <demaille@gostai.com> Remove useless bench case. diff --git a/data/lalr1.cc b/data/lalr1.cc index c286ee08..5a9d5c08 100644 --- a/data/lalr1.cc +++ b/data/lalr1.cc @@ -53,6 +53,12 @@ b4_variant_if([ ]) # b4_variant_if +# b4_lex_symbol_if([IF-YYLEX-RETURNS-A-COMPLETE-SYMBOL], [IF-NOT]) +# ---------------------------------------------------------------- +m4_define([b4_lex_symbol_if], +[b4_percent_define_ifdef([[lex_symbol]], [$1], [$2])]) + + # b4_assert_if([IF-ASSERTIONS-ARE-USED], [IF-NOT]) # ------------------------------------------------ m4_define([b4_assert_if], @@ -1144,14 +1150,15 @@ m4_popdef([b4_at_dollar])])dnl /* Read a lookahead token. */ if (yyempty) { - YYCDEBUG << "Reading a token: "; - yyla.type = yytranslate_ (]b4_c_function_call([yylex], [int], + YYCDEBUG << "Reading a token: "; +]b4_lex_symbol_if( +[ yyla = yylex();], +[ yyla.type = yytranslate_ (b4_c_function_call([yylex], [int], [[YYSTYPE*], [&yyla.value]][]dnl b4_locations_if([, [[location*], [&yyla.location]]])dnl -m4_ifdef([b4_lex_param], [, ]b4_lex_param))[); +m4_ifdef([b4_lex_param], [, ]b4_lex_param)));])[ yyempty = false; } - YY_SYMBOL_PRINT ("Next token is", yyla); /* If the proper action on seeing token YYLA.TYPE is to reduce or diff --git a/etc/bench.pl.in b/etc/bench.pl.in index c7bd83f2..f8fca8ac 100755 --- a/etc/bench.pl.in +++ b/etc/bench.pl.in @@ -580,11 +580,13 @@ sub generate_grammar_list ($$@) my ($base, $max, @directive) = @_; my $directives = directives ($base, @directive); my $variant = grep { /%define "?variant"?/ } @directive; + my $lex_symbol = grep { /%define "?lex_symbol"?/ } @directive; my $out = new IO::File ">$base.y" or die; print $out <<EOF; %language "C++" %defines +%locations $directives %code requires // *.h @@ -598,22 +600,18 @@ $directives #include <iostream> #include <sstream> -// Prototype of the yylex function providing subsequent tokens. -static yy::parser::token_type yylex(yy::parser::semantic_type* yylval); - #define STAGE_MAX ($max * 10) // max = $max +#define USE_LEX_SYMBOL $lex_symbol #define USE_VARIANTS $variant -#if USE_VARIANTS -# define IF_VARIANTS(True, False) True -#else -# define IF_VARIANTS(True, False) False -#endif -#ifdef ONE_STAGE_BUILD -# define IF_ONE_STAGE_BUILD(True, False) True + // Prototype of the yylex function providing subsequent tokens. + static +#if USE_LEX_SYMBOL + yy::parser::symbol_type yylex(); #else -# define IF_ONE_STAGE_BUILD(True, False) False + yy::parser::token_type yylex(yy::parser::semantic_type* yylval, + yy::parser::location_type* yylloc); #endif // Conversion to string. @@ -627,6 +625,8 @@ static yy::parser::token_type yylex(yy::parser::semantic_type* yylval); return o.str (); } } + +%token END_OF_FILE 0 EOF if ($variant) @@ -636,7 +636,6 @@ EOF %token <int> NUMBER %printer { std::cerr << "Number: " << $$; } <int> %printer { std::cerr << "Text: " << $$; } <std::string> -%token END_OF_FILE 0 %type <std::string> text result %% @@ -660,7 +659,6 @@ EOF %token <ival> NUMBER %printer { std::cerr << "Number: " << $$; } <ival> %printer { std::cerr << "Text: " << *$$; } <sval> -%token END_OF_FILE 0 %type <sval> text result %% @@ -678,39 +676,63 @@ EOF print $out <<'EOF'; %% +# + static -yy::parser::token_type -yylex(yy::parser::semantic_type* yylval) +#if USE_LEX_SYMBOL +yy::parser::symbol_type yylex() +#else +yy::parser::token_type yylex(yy::parser::semantic_type* yylval, + yy::parser::location_type* yylloc) +#endif { + typedef yy::parser::token token; static int stage = -1; ++stage; if (stage == STAGE_MAX) - return yy::parser::token::END_OF_FILE; + { +#if USE_LEX_SYMBOL + return yy::parser::make_symbol <token::END_OF_FILE> (yy::location()); +#else + *yylloc = yy::location (); + return token::END_OF_FILE; +#endif + } else if (stage % 2) { -#if USE_VARIANTS -# ifdef ONE_STAGE_BUILD +#if USE_LEX_SYMBOL + return yy::parser::make_symbol <token::NUMBER> (stage, yy::location()); +#elif defined ONE_STAGE_BUILD yylval->build(stage); -# else + *yylloc = yy::location (); + return token::NUMBER; +#elif USE_VARIANTS yylval->build<int>() = stage; -# endif + *yylloc = yy::location (); + return token::NUMBER; #else yylval->ival = stage; + *yylloc = yy::location (); + return token::NUMBER; #endif - return yy::parser::token::NUMBER; } else { -#if USE_VARIANTS -# ifdef ONE_STAGE_BUILD +#if USE_LEX_SYMBOL + return yy::parser::make_symbol <token::TEXT> ("A string.", yy::location()); +#elif defined ONE_STAGE_BUILD yylval->build(std::string("A string.")); -# else + *yylloc = yy::location (); + return token::TEXT; +#elif USE_VARIANTS yylval->build<std::string>() = std::string("A string."); -# endif + *yylloc = yy::location (); + return token::TEXT; #else yylval->sval = new std::string("A string."); + *yylloc = yy::location (); + return token::TEXT; #endif - return yy::parser::token::TEXT; } abort(); } @@ -886,7 +908,7 @@ sub bench_push_parser () =item C<bench_variant_parser ()> -Bench the C++ lalr1.cc parser using Boost.Variants or %union. +Bench the C++ lalr1.cc parser using variants or %union. =cut @@ -894,11 +916,10 @@ sub bench_variant_parser () { bench ('list', qw( - [ %debug ] - & - [ %d variant + [ + %d variant & - [ #d ONE_STAGE_BUILD ] + [ #d ONE_STAGE_BUILD | %d lex_symbol ] ] ) ); -- 2.47.2