From cb823b6f0c5d025e3667f994222037476c41ea1a Mon Sep 17 00:00:00 2001 From: Akim Demaille Date: Thu, 23 Oct 2008 20:01:48 -0500 Subject: [PATCH] Support parametric types. There are two issues to handle: first scanning nested angle bracket pairs to support types such as std::pair< std::string, std::list > >. Another issue is to address idiosyncracies of C++: do not glue two closing angle brackets together (otherwise it's operator>>), and avoid sticking blindly a TYPE to the opening <, as it can result in '<:' which is a digraph for '['. * src/scan-gram.l (brace_level): Rename as... (nesting): this. (SC_TAG): New. Implement support for complex tags. (tag): Accept \n, but not <. * data/lalr1.cc (b4_symbol_value, b4_symbol_value_template) (b4_symbol_variant): Leave space around types as parameters. * examples/variant.yy: Use nested template types and leading ::. * src/parse-gram.y (TYPE, TYPE_TAG_ANY, TYPE_TAG_NONE, type.opt): Rename as... (TAG, TAG_ANY, TAG_NONE, tag.opt): these. * tests/c++.at: Test parametric types. --- ChangeLog | 25 ++++++++++++++ data/lalr1.cc | 6 ++-- examples/variant.yy | 9 ++--- src/parse-gram.y | 30 ++++++++--------- src/scan-gram.l | 80 ++++++++++++++++++++++++++++++++++++--------- tests/c++.at | 24 ++++++++------ 6 files changed, 127 insertions(+), 47 deletions(-) diff --git a/ChangeLog b/ChangeLog index 70fd84d8..4543fdd4 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,28 @@ +2008-11-15 Akim Demaille + + Support parametric types. + There are two issues to handle: first scanning nested angle bracket pairs + to support types such as std::pair< std::string, std::list > >. + + Another issue is to address idiosyncracies of C++: do not glue two closing + angle brackets together (otherwise it's operator>>), and avoid sticking + blindly a TYPE to the opening <, as it can result in '<:' which is a + digraph for '['. + + * src/scan-gram.l (brace_level): Rename as... + (nesting): this. + (SC_TAG): New. + Implement support for complex tags. + (tag): Accept + , but not <. + * data/lalr1.cc (b4_symbol_value, b4_symbol_value_template) + (b4_symbol_variant): Leave space around types as parameters. + * examples/variant.yy: Use nested template types and leading ::. + * src/parse-gram.y (TYPE, TYPE_TAG_ANY, TYPE_TAG_NONE, type.opt): + Rename as... + (TAG, TAG_ANY, TAG_NONE, tag.opt): these. + * tests/c++.at: Test parametric types. + 2008-11-15 Akim Demaille Test token.prefix. diff --git a/data/lalr1.cc b/data/lalr1.cc index c3678887..6e5042d4 100644 --- a/data/lalr1.cc +++ b/data/lalr1.cc @@ -72,7 +72,7 @@ b4_variant_if([ # ---------------------------- m4_define([b4_symbol_value], [m4_ifval([$2], - [$1.as<$2>()], + [$1.as< $2 >()], [$1])]) # b4_symbol_value_template(VAL, [TYPE]) @@ -80,7 +80,7 @@ b4_variant_if([ # Same as b4_symbol_value, but used in a template method. m4_define([b4_symbol_value_template], [m4_ifval([$2], - [$1.template as<$2>()], + [$1.template as< $2 >()], [$1])]) ]) # b4_variant_if @@ -366,7 +366,7 @@ m4_map([b4_symbol_constructor_definition_], m4_defn([b4_symbol_numbers]))])]) # YYTYPE. m4_define([b4_symbol_variant], [m4_pushdef([b4_dollar_dollar], - [$2.$3<$][3>(m4_shift3($@))])dnl + [$2.$3< $][3 >(m4_shift3($@))])dnl switch ($1) { m4_map([b4_type_action_], m4_defn([b4_type_names]))[]dnl diff --git a/examples/variant.yy b/examples/variant.yy index 016c4b2a..fe5dce0f 100644 --- a/examples/variant.yy +++ b/examples/variant.yy @@ -46,13 +46,14 @@ typedef std::list strings_type; } } -%token TEXT; +%token <::std::string> TEXT; %token NUMBER; -%printer { debug_stream () << $$; } ; +%printer { debug_stream () << $$; } + <::std::string> <::std::list>; %token END_OF_FILE 0; -%type item; -%type list; +%type <::std::string> item; +%type <::std::list> list; %% diff --git a/src/parse-gram.y b/src/parse-gram.y index 8e3b7320..cada04f7 100644 --- a/src/parse-gram.y +++ b/src/parse-gram.y @@ -167,9 +167,9 @@ static int current_prec = 0; %token PIPE "|" %token PROLOGUE "%{...%}" %token SEMICOLON ";" -%token TYPE "type" -%token TYPE_TAG_ANY "<*>" -%token TYPE_TAG_NONE "<>" +%token TAG "" +%token TAG_ANY "<*>" +%token TAG_NONE "<>" %type CHAR %printer { fputs (char_name ($$), stderr); } CHAR @@ -183,8 +183,8 @@ static int current_prec = 0; %printer { fprintf (stderr, "{\n%s\n}", $$); } braceless content.opt "{...}" "%{...%}" EPILOGUE -%type TYPE ID ID_COLON variable -%printer { fprintf (stderr, "<%s>", $$); } TYPE +%type TAG ID ID_COLON variable +%printer { fprintf (stderr, "<%s>", $$); } TAG %printer { fputs ($$, stderr); } ID variable %printer { fprintf (stderr, "%s:", $$); } ID_COLON @@ -387,7 +387,7 @@ symbol_declaration: current_class = unknown_sym; current_type = NULL; } -| "%type" TYPE symbols.1 +| "%type" TAG symbols.1 { symbol_list *list; tag_seen = true; @@ -398,7 +398,7 @@ symbol_declaration: ; precedence_declaration: - precedence_declarator type.opt symbols.prec + precedence_declarator tag.opt symbols.prec { symbol_list *list; ++current_prec; @@ -419,9 +419,9 @@ precedence_declarator: | "%precedence" { $$ = precedence_assoc; } ; -type.opt: +tag.opt: /* Nothing. */ { current_type = NULL; } -| TYPE { current_type = $1; tag_seen = true; } +| TAG { current_type = $1; tag_seen = true; } ; /* Just like symbols.1 but accept INT for the sake of POSIX. */ @@ -451,15 +451,15 @@ generic_symlist: ; generic_symlist_item: - symbol { $$ = symbol_list_sym_new ($1, @1); } -| TYPE { $$ = symbol_list_type_new ($1, @1); } -| "<*>" { $$ = symbol_list_default_tagged_new (@1); } -| "<>" { $$ = symbol_list_default_tagless_new (@1); } + symbol { $$ = symbol_list_sym_new ($1, @1); } +| TAG { $$ = symbol_list_type_new ($1, @1); } +| "<*>" { $$ = symbol_list_default_tagged_new (@1); } +| "<>" { $$ = symbol_list_default_tagless_new (@1); } ; /* One token definition. */ symbol_def: - TYPE + TAG { current_type = $1; tag_seen = true; @@ -538,7 +538,7 @@ rhs: { grammar_current_rule_prec_set ($3, @3); } | rhs "%dprec" INT { grammar_current_rule_dprec_set ($3, @3); } -| rhs "%merge" TYPE +| rhs "%merge" TAG { grammar_current_rule_merge_set ($3, @3); } ; diff --git a/src/scan-gram.l b/src/scan-gram.l index 7ea0d685..5ef8edba 100644 --- a/src/scan-gram.l +++ b/src/scan-gram.l @@ -78,6 +78,8 @@ static void unexpected_newline (boundary, char const *); /* A identifier was just read in directives/rules. Special state to capture the sequence `identifier :'. */ %x SC_AFTER_IDENTIFIER + /* A complex tag, with nested angles brackets. */ +%x SC_TAG /* Three types of user code: - prologue (code between `%{' `%}' in the first section, before %%); @@ -96,8 +98,10 @@ int [0-9]+ /* POSIX says that a tag must be both an id and a C union member, but historically almost any character is allowed in a tag. We disallow - NUL and newline, as this simplifies our implementation. */ -tag [^\0\n>]+ + NUL, as this simplifies our implementation. We disallow angle + bracket to match them in nested pairs: several languages use them + for generics/template types. */ +tag [^\0<>]+ /* Zero or more instances of backslash-newline. Following GCC, allow white space between the backslash and the newline. */ @@ -105,8 +109,9 @@ splice (\\[ \f\t\v]*\n)* %% %{ - /* Nesting level of the current code in braces. */ - int braces_level IF_LINT (= 0); + /* Nesting level. Either for nested braces, or nested angle brackets + (but not mixed). */ + int nesting IF_LINT (= 0); /* Parent context state, when applicable. */ int context_state IF_LINT (= 0); @@ -205,8 +210,6 @@ splice (\\[ \f\t\v]*\n)* "=" return EQUAL; "|" return PIPE; ";" return SEMICOLON; - "<*>" return TYPE_TAG_ANY; - "<>" return TYPE_TAG_NONE; {id} { val->uniqstr = uniqstr_new (yytext); @@ -235,18 +238,25 @@ splice (\\[ \f\t\v]*\n)* /* Code in between braces. */ "{" { STRING_GROW; - braces_level = 0; + nesting = 0; code_start = loc->start; BEGIN SC_BRACED_CODE; } /* A type. */ + "<*>" return TAG_ANY; + "<>" return TAG_NONE; "<"{tag}">" { obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2); STRING_FINISH; val->uniqstr = uniqstr_new (last_string); STRING_FREE; - return TYPE; + return TAG; + } + "<" { + nesting = 0; + token_start = loc->start; + BEGIN SC_TAG; } "%%" { @@ -267,6 +277,17 @@ splice (\\[ \f\t\v]*\n)* } + /*--------------------------------------------------------------. + | Supporting \0 complexifies our implementation for no expected | + | added value. | + `--------------------------------------------------------------*/ + + +{ + \0 complain_at (*loc, _("invalid null character")); +} + + /*-----------------------------------------------------------------. | Scanning after an identifier, checking whether a colon is next. | `-----------------------------------------------------------------*/ @@ -386,11 +407,40 @@ splice (\\[ \f\t\v]*\n)* } } - + /*-----------------------------------------------------------. + | Scanning a Bison nested tag. The initial angle bracket is | + | already eaten. | + `-----------------------------------------------------------*/ + + { - \0 complain_at (*loc, _("invalid null character")); -} + ">" { + --nesting; + if (nesting < 0) + { + STRING_FINISH; + loc->start = token_start; + val->uniqstr = uniqstr_new (last_string); + STRING_FREE; + BEGIN INITIAL; + return TAG; + } + STRING_GROW; + } + + [^<>]+ STRING_GROW; + "<"+ STRING_GROW; nesting += yyleng; + <> { + unexpected_eof (token_start, ">"); + STRING_FINISH; + loc->start = token_start; + val->uniqstr = uniqstr_new (last_string); + STRING_FREE; + BEGIN INITIAL; + return TAG; + } +} /*----------------------------. | Decode escaped characters. | @@ -509,13 +559,13 @@ splice (\\[ \f\t\v]*\n)* { - "{"|"<"{splice}"%" STRING_GROW; braces_level++; - "%"{splice}">" STRING_GROW; braces_level--; + "{"|"<"{splice}"%" STRING_GROW; nesting++; + "%"{splice}">" STRING_GROW; nesting--; "}" { obstack_1grow (&obstack_for_string, '}'); - --braces_level; - if (braces_level < 0) + --nesting; + if (nesting < 0) { STRING_FINISH; loc->start = code_start; diff --git a/tests/c++.at b/tests/c++.at index d236f619..0cfcaf05 100644 --- a/tests/c++.at +++ b/tests/c++.at @@ -51,7 +51,7 @@ typedef std::list strings_type; #include #include - static + static #if defined USE_LEX_SYMBOL yy::parser::symbol_type yylex (); #else @@ -86,26 +86,30 @@ typedef std::list strings_type; %token TEXT; %token NUMBER; -%printer { debug_stream() << $][$; } ; %token END_OF_FILE 0; %type item; -%type list result; +// Using the template type to exercize its parsing. +// Starting with :: to ensure we don't output "<::" which starts by the +// digraph for the left square bracket. +%type <::std::list> list result; +%printer { debug_stream() << $][$; } + <::std::string> <::std::list<::std::string>>; %% result: - list { std::cout << $][1; } + list { std::cout << $][1; } ; list: - /* nothing */ { /* Generates an empty string list */ } -| list item { std::swap($][$,$][1); $$.push_back($][2); } + /* nothing */ { /* Generates an empty string list */ } +| list item { std::swap($][$,$][1); $$.push_back($][2); } ; item: - TEXT { std::swap($][$,$][1); } -| NUMBER { $][$ = string_cast($][1); } + TEXT { std::swap($][$,$][1); } +| NUMBER { $][$ = string_cast($][1); } ; %% @@ -164,7 +168,7 @@ yy::parser::token_type yylex(yy::parser::semantic_type* yylval, void yy::parser::error(const yy::parser::location_type&, - const std::string& message) + const std::string& message) { std::cerr << message << std::endl; } @@ -363,5 +367,5 @@ AT_CHECK_NAMESPACE([[foo: :bar]], [[-]]) # contains single occurrences of `:'. AT_CHECK_NAMESPACE([[foo[3]::bar::baz]], [[-]]) AT_CHECK_NAMESPACE([[foo::bar,baz]], [[-]]) -AT_CHECK_NAMESPACE([[foo::bar::(baz]], [[-]]) +AT_CHECK_NAMESPACE([[foo::bar::(baz /* Pacify Emacs ) */]], [[-]]) AT_CLEANUP -- 2.45.2