Support parametric types.

author Akim Demaille <demaille@gostai.com>

Fri, 24 Oct 2008 01:01:48 +0000 (20:01 -0500)

committer Akim Demaille <demaille@gostai.com>

Sat, 15 Nov 2008 13:30:05 +0000 (14:30 +0100)
author Akim Demaille <demaille@gostai.com>
Fri, 24 Oct 2008 01:01:48 +0000 (20:01 -0500)
committer Akim Demaille <demaille@gostai.com>
Sat, 15 Nov 2008 13:30:05 +0000 (14:30 +0100)
diff --git a/ChangeLog b/ChangeLog

index 70fd84d88fa66b9e473dc2cefdaa3853e013feaa..4543fdd47db62e9edf58f72dd23c099d6ae896e2 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,28 @@
+2008-11-15  Akim Demaille  <demaille@gostai.com>
+
+       Support parametric types.
+       There are two issues to handle: first scanning nested angle bracket pairs
+       to support types such as std::pair< std::string, std::list<std::string> > >.
+       
+       Another issue is to address idiosyncracies of C++: do not glue two closing
+       angle brackets together (otherwise it's operator>>), and avoid sticking
+       blindly a TYPE to the opening <, as it can result in '<:' which is a
+       digraph for '['.
+       
+       * src/scan-gram.l (brace_level): Rename as...
+       (nesting): this.
+       (SC_TAG): New.
+       Implement support for complex tags.
+       (tag): Accept 
+       , but not <.
+       * data/lalr1.cc (b4_symbol_value, b4_symbol_value_template)
+       (b4_symbol_variant): Leave space around types as parameters.
+       * examples/variant.yy: Use nested template types and leading ::.
+       * src/parse-gram.y (TYPE, TYPE_TAG_ANY, TYPE_TAG_NONE, type.opt):
+       Rename as...
+       (TAG, TAG_ANY, TAG_NONE, tag.opt): these.
+       * tests/c++.at: Test parametric types.
+
  2008-11-15  Akim Demaille  <akim@betelgeuse.gostai.ensta.fr>
  
         Test token.prefix.
diff --git a/data/lalr1.cc b/data/lalr1.cc

index c3678887386728d64805d2ac7217748f958a86b2..6e5042d410c04b1ed3f63d52651b3b7c982219e0 100644 (file)
--- a/data/lalr1.cc
+++ b/data/lalr1.cc
@@ -72,7 +72,7 @@ b4_variant_if([
    # ----------------------------
    m4_define([b4_symbol_value],
    [m4_ifval([$2],
-            [$1.as<$2>()],
+            [$1.as< $2 >()],
              [$1])])
  
    # b4_symbol_value_template(VAL, [TYPE])
@@ -80,7 +80,7 @@ b4_variant_if([
    # Same as b4_symbol_value, but used in a template method.
    m4_define([b4_symbol_value_template],
    [m4_ifval([$2],
-            [$1.template as<$2>()],
+            [$1.template as< $2 >()],
              [$1])])
  ]) # b4_variant_if
  
@@ -366,7 +366,7 @@ m4_map([b4_symbol_constructor_definition_], m4_defn([b4_symbol_numbers]))])])
  # YYTYPE.
  m4_define([b4_symbol_variant],
  [m4_pushdef([b4_dollar_dollar],
-            [$2.$3<$][3>(m4_shift3($@))])dnl
+            [$2.$3< $][3 >(m4_shift3($@))])dnl
    switch ($1)
      {
  m4_map([b4_type_action_], m4_defn([b4_type_names]))[]dnl
diff --git a/examples/variant.yy b/examples/variant.yy

index 016c4b2ad07d3c4a6a20d5fd138a3cb30cc04b86..fe5dce0fa9219b7b85ee1a4997c38fcf7f6a3c22 100644 (file)
--- a/examples/variant.yy
+++ b/examples/variant.yy
@@ -46,13 +46,14 @@ typedef std::list<std::string> strings_type;
    }
  }
  
-%token <std::string> TEXT;
+%token <::std::string> TEXT;
  %token <int> NUMBER;
-%printer { debug_stream () << $$; } <int> <std::string> <strings_type>;
+%printer { debug_stream () << $$; }
+   <int> <::std::string> <::std::list<std::string>>;
  %token END_OF_FILE 0;
  
-%type <std::string> item;
-%type <strings_type> list;
+%type <::std::string> item;
+%type <::std::list<std::string>> list;
  
  %%
  
diff --git a/src/parse-gram.y b/src/parse-gram.y

index 8e3b73208b72a3dde420fed4c1237c431e03f122..cada04f7687adf9f8a1188ae32e05d3a3069cd45 100644 (file)
--- a/src/parse-gram.y
+++ b/src/parse-gram.y
@@ -167,9 +167,9 @@ static int current_prec = 0;
  %token PIPE            "|"
  %token PROLOGUE        "%{...%}"
  %token SEMICOLON       ";"
-%token TYPE            "type"
-%token TYPE_TAG_ANY    "<*>"
-%token TYPE_TAG_NONE   "<>"
+%token TAG             "<tag>"
+%token TAG_ANY         "<*>"
+%token TAG_NONE        "<>"
  
  %type <character> CHAR
  %printer { fputs (char_name ($$), stderr); } CHAR
@@ -183,8 +183,8 @@ static int current_prec = 0;
  %printer { fprintf (stderr, "{\n%s\n}", $$); }
          braceless content.opt "{...}" "%{...%}" EPILOGUE
  
-%type <uniqstr> TYPE ID ID_COLON variable
-%printer { fprintf (stderr, "<%s>", $$); } TYPE
+%type <uniqstr> TAG ID ID_COLON variable
+%printer { fprintf (stderr, "<%s>", $$); } TAG
  %printer { fputs ($$, stderr); } ID variable
  %printer { fprintf (stderr, "%s:", $$); } ID_COLON
  
@@ -387,7 +387,7 @@ symbol_declaration:
        current_class = unknown_sym;
        current_type = NULL;
      }
-| "%type" TYPE symbols.1
+| "%type" TAG symbols.1
      {
        symbol_list *list;
        tag_seen = true;
@@ -398,7 +398,7 @@ symbol_declaration:
  ;
  
  precedence_declaration:
-  precedence_declarator type.opt symbols.prec
+  precedence_declarator tag.opt symbols.prec
      {
        symbol_list *list;
        ++current_prec;
@@ -419,9 +419,9 @@ precedence_declarator:
  | "%precedence" { $$ = precedence_assoc; }
  ;
  
-type.opt:
+tag.opt:
    /* Nothing. */ { current_type = NULL; }
-| TYPE           { current_type = $1; tag_seen = true; }
+| TAG            { current_type = $1; tag_seen = true; }
  ;
  
  /* Just like symbols.1 but accept INT for the sake of POSIX.  */
@@ -451,15 +451,15 @@ generic_symlist:
  ;
  
  generic_symlist_item:
-  symbol            { $$ = symbol_list_sym_new ($1, @1); }
-| TYPE              { $$ = symbol_list_type_new ($1, @1); }
-| "<*>"             { $$ = symbol_list_default_tagged_new (@1); }
-| "<>"             { $$ = symbol_list_default_tagless_new (@1); }
+  symbol    { $$ = symbol_list_sym_new ($1, @1); }
+| TAG       { $$ = symbol_list_type_new ($1, @1); }
+| "<*>"     { $$ = symbol_list_default_tagged_new (@1); }
+| "<>"      { $$ = symbol_list_default_tagless_new (@1); }
  ;
  
  /* One token definition.  */
  symbol_def:
-  TYPE
+  TAG
       {
         current_type = $1;
         tag_seen = true;
@@ -538,7 +538,7 @@ rhs:
      { grammar_current_rule_prec_set ($3, @3); }
  | rhs "%dprec" INT
      { grammar_current_rule_dprec_set ($3, @3); }
-| rhs "%merge" TYPE
+| rhs "%merge" TAG
      { grammar_current_rule_merge_set ($3, @3); }
  ;
  
diff --git a/src/scan-gram.l b/src/scan-gram.l

index 7ea0d6855d448e985cc3cd679b5456484b8098f6..5ef8edba25c30cd857acee5538f34e9e25d172f8 100644 (file)
--- a/src/scan-gram.l
+++ b/src/scan-gram.l
@@ -78,6 +78,8 @@ static void unexpected_newline (boundary, char const *);
   /* A identifier was just read in directives/rules.  Special state
      to capture the sequence `identifier :'. */
  %x SC_AFTER_IDENTIFIER
+ /* A complex tag, with nested angles brackets. */
+%x SC_TAG
  
   /* Three types of user code:
      - prologue (code between `%{' `%}' in the first section, before %%);
@@ -96,8 +98,10 @@ int    [0-9]+
  
  /* POSIX says that a tag must be both an id and a C union member, but
     historically almost any character is allowed in a tag.  We disallow
-   NUL and newline, as this simplifies our implementation.  */
-tag     [^\0\n>]+
+   NUL, as this simplifies our implementation.  We disallow angle
+   bracket to match them in nested pairs: several languages use them
+   for generics/template types.  */
+tag     [^\0<>]+
  
  /* Zero or more instances of backslash-newline.  Following GCC, allow
     white space between the backslash and the newline.  */
@@ -105,8 +109,9 @@ splice       (\\[ \f\t\v]*\n)*
  
  %%
  %{
-  /* Nesting level of the current code in braces.  */
-  int braces_level IF_LINT (= 0);
+  /* Nesting level.  Either for nested braces, or nested angle brackets
+     (but not mixed).  */
+  int nesting IF_LINT (= 0);
  
    /* Parent context state, when applicable.  */
    int context_state IF_LINT (= 0);
@@ -205,8 +210,6 @@ splice       (\\[ \f\t\v]*\n)*
    "="                     return EQUAL;
    "|"                     return PIPE;
    ";"                     return SEMICOLON;
-  "<*>"                   return TYPE_TAG_ANY;
-  "<>"                    return TYPE_TAG_NONE;
  
    {id} {
      val->uniqstr = uniqstr_new (yytext);
@@ -235,18 +238,25 @@ splice     (\\[ \f\t\v]*\n)*
    /* Code in between braces.  */
    "{" {
      STRING_GROW;
-    braces_level = 0;
+    nesting = 0;
      code_start = loc->start;
      BEGIN SC_BRACED_CODE;
    }
  
    /* A type. */
+  "<*>"       return TAG_ANY;
+  "<>"        return TAG_NONE;
    "<"{tag}">" {
      obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
      STRING_FINISH;
      val->uniqstr = uniqstr_new (last_string);
      STRING_FREE;
-    return TYPE;
+    return TAG;
+  }
+  "<"         {
+    nesting = 0;
+    token_start = loc->start;
+    BEGIN SC_TAG;
    }
  
    "%%" {
@@ -267,6 +277,17 @@ splice      (\\[ \f\t\v]*\n)*
  }
  
  
+  /*--------------------------------------------------------------.
+  | Supporting \0 complexifies our implementation for no expected |
+  | added value.                                                  |
+  `--------------------------------------------------------------*/
+
+<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING,SC_TAG>
+{
+  \0       complain_at (*loc, _("invalid null character"));
+}
+
+
    /*-----------------------------------------------------------------.
    | Scanning after an identifier, checking whether a colon is next.  |
    `-----------------------------------------------------------------*/
@@ -386,11 +407,40 @@ splice     (\\[ \f\t\v]*\n)*
    }
  }
  
-<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING>
+  /*-----------------------------------------------------------.
+  | Scanning a Bison nested tag.  The initial angle bracket is |
+  | already eaten.                                             |
+  `-----------------------------------------------------------*/
+
+<SC_TAG>
  {
-  \0       complain_at (*loc, _("invalid null character"));
-}
+  ">" {
+    --nesting;
+    if (nesting < 0)
+      {
+        STRING_FINISH;
+        loc->start = token_start;
+        val->uniqstr = uniqstr_new (last_string);
+        STRING_FREE;
+        BEGIN INITIAL;
+        return TAG;
+      }
+    STRING_GROW;
+  }
+
+  [^<>]+ STRING_GROW;
+  "<"+   STRING_GROW; nesting += yyleng;
  
+  <<EOF>> {
+    unexpected_eof (token_start, ">");
+    STRING_FINISH;
+    loc->start = token_start;
+    val->uniqstr = uniqstr_new (last_string);
+    STRING_FREE;
+    BEGIN INITIAL;
+    return TAG;
+  }
+}
  
    /*----------------------------.
    | Decode escaped characters.  |
@@ -509,13 +559,13 @@ splice     (\\[ \f\t\v]*\n)*
  
  <SC_BRACED_CODE>
  {
-  "{"|"<"{splice}"%"  STRING_GROW; braces_level++;
-  "%"{splice}">"      STRING_GROW; braces_level--;
+  "{"|"<"{splice}"%"  STRING_GROW; nesting++;
+  "%"{splice}">"      STRING_GROW; nesting--;
    "}" {
      obstack_1grow (&obstack_for_string, '}');
  
-    --braces_level;
-    if (braces_level < 0)
+    --nesting;
+    if (nesting < 0)
        {
         STRING_FINISH;
         loc->start = code_start;
diff --git a/tests/c++.at b/tests/c++.at

index d236f61903cf8b630f119ee812fca94fdcfadbba..0cfcaf0549e45dbc9969342931ea1f5d745e4244 100644 (file)
--- a/tests/c++.at
+++ b/tests/c++.at
@@ -51,7 +51,7 @@ typedef std::list<std::string> strings_type;
  #include <iterator>
  #include <sstream>
  
- static
+  static
  #if defined USE_LEX_SYMBOL
    yy::parser::symbol_type yylex ();
  #else
@@ -86,26 +86,30 @@ typedef std::list<std::string> strings_type;
  
  %token <std::string> TEXT;
  %token <int> NUMBER;
-%printer { debug_stream() << $][$; } <int> <std::string> <strings_type>;
  %token END_OF_FILE 0;
  
  %type <std::string> item;
-%type <strings_type> list result;
+// Using the template type to exercize its parsing.
+// Starting with :: to ensure we don't output "<::" which starts by the
+// digraph for the left square bracket.
+%type <::std::list<std::string>> list result;
  
+%printer { debug_stream() << $][$; }
+  <int> <::std::string> <::std::list<::std::string>>;
  %%
  
  result:
-  list         { std::cout << $][1; }
+  list          { std::cout << $][1; }
  ;
  
  list:
-  /* nothing */        { /* Generates an empty string list */ }
-| list item    { std::swap($][$,$][1); $$.push_back($][2); }
+  /* nothing */ { /* Generates an empty string list */ }
+| list item     { std::swap($][$,$][1); $$.push_back($][2); }
  ;
  
  item:
-  TEXT         { std::swap($][$,$][1); }
-| NUMBER       { $][$ = string_cast($][1); }
+  TEXT          { std::swap($][$,$][1); }
+| NUMBER        { $][$ = string_cast($][1); }
  ;
  %%
  
@@ -164,7 +168,7 @@ yy::parser::token_type yylex(yy::parser::semantic_type* yylval,
  
  void
  yy::parser::error(const yy::parser::location_type&,
-                 const std::string& message)
+                  const std::string& message)
  {
    std::cerr << message << std::endl;
  }
@@ -363,5 +367,5 @@ AT_CHECK_NAMESPACE([[foo: :bar]], [[-]])
  # contains single occurrences of `:'.
  AT_CHECK_NAMESPACE([[foo[3]::bar::baz]], [[-]])
  AT_CHECK_NAMESPACE([[foo::bar,baz]], [[-]])
-AT_CHECK_NAMESPACE([[foo::bar::(baz]], [[-]])
+AT_CHECK_NAMESPACE([[foo::bar::(baz /* Pacify Emacs ) */]], [[-]])
  AT_CLEANUP
author	Akim Demaille <demaille@gostai.com>
	Fri, 24 Oct 2008 01:01:48 +0000 (20:01 -0500)
committer	Akim Demaille <demaille@gostai.com>
	Sat, 15 Nov 2008 13:30:05 +0000 (14:30 +0100)
ChangeLog		patch \| blob \| blame \| history
data/lalr1.cc		patch \| blob \| blame \| history
examples/variant.yy		patch \| blob \| blame \| history
src/parse-gram.y		patch \| blob \| blame \| history
src/scan-gram.l		patch \| blob \| blame \| history
tests/c++.at		patch \| blob \| blame \| history