Add support for hex token numbers.

[bison.git] / src / scan-gram.l
diff --git a/src/scan-gram.l b/src/scan-gram.l

index 77fe5beb63823a7595e0423110c49565135a221f..849b72e15fece6fd555cbc5a261b7667c002699a 100644 (file)
--- a/src/scan-gram.l
+++ b/src/scan-gram.l
@@ -1,6 +1,6 @@
  /* Bison Grammar Scanner                             -*- C -*-
  
-   Copyright (C) 2002, 2003 Free Software Foundation, Inc.
+   Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
  
     This file is part of Bison, the GNU Compiler Compiler.
  
@@ -20,7 +20,7 @@
     02111-1307  USA
  */
  
-%option debug nodefault noyywrap never-interactive
+%option debug nodefault nounput noyywrap never-interactive
  %option prefix="gram_" outfile="lex.yy.c"
  
  %{
@@ -101,8 +101,9 @@ static int rule_length;
  static void handle_dollar (int token_type, char *cp, location loc);
  static void handle_at (int token_type, char *cp, location loc);
  static void handle_syncline (char *args);
+static unsigned long int scan_integer (char const *p, int base, location loc);
  static int convert_ucn_to_byte (char const *hex_text);
-static void unexpected_end_of_file (boundary, char const *);
+static void unexpected_eof (boundary, char const *);
  
  %}
  %x SC_COMMENT SC_LINE_COMMENT SC_YACC_COMMENT
@@ -182,6 +183,7 @@ splice       (\\[ \f\t\v]*\n)*
  {
    "%binary"               return PERCENT_NONASSOC;
    "%debug"                return PERCENT_DEBUG;
+  "%default"[-_]"prec"    return PERCENT_DEFAULT_PREC;
    "%define"               return PERCENT_DEFINE;
    "%defines"              return PERCENT_DEFINES;
    "%destructor"                  token_type = PERCENT_DESTRUCTOR; BEGIN SC_PRE_CODE;
@@ -190,14 +192,17 @@ splice     (\\[ \f\t\v]*\n)*
    "%expect"               return PERCENT_EXPECT;
    "%file-prefix"          return PERCENT_FILE_PREFIX;
    "%fixed"[-_]"output"[-_]"files"   return PERCENT_YACC;
+  "%initial-action"       token_type = PERCENT_INITIAL_ACTION; BEGIN SC_PRE_CODE;
    "%glr-parser"           return PERCENT_GLR_PARSER;
    "%left"                 return PERCENT_LEFT;
    "%lex-param"           token_type = PERCENT_LEX_PARAM; BEGIN SC_PRE_CODE;
    "%locations"            return PERCENT_LOCATIONS;
    "%merge"               return PERCENT_MERGE;
    "%name"[-_]"prefix"     return PERCENT_NAME_PREFIX;
+  "%no"[-_]"default"[-_]"prec" return PERCENT_NO_DEFAULT_PREC;
    "%no"[-_]"lines"        return PERCENT_NO_LINES;
    "%nonassoc"             return PERCENT_NONASSOC;
+  "%nondeterministic-parser"   return PERCENT_NONDETERMINISTIC_PARSER;
    "%nterm"                return PERCENT_NTERM;
    "%output"               return PERCENT_OUTPUT;
    "%parse-param"         token_type = PERCENT_PARSE_PARAM; BEGIN SC_PRE_CODE;
@@ -231,15 +236,11 @@ splice     (\\[ \f\t\v]*\n)*
    }
  
    {int} {
-    unsigned long num;
-    set_errno (0);
-    num = strtoul (yytext, 0, 10);
-    if (INT_MAX < num || get_errno ())
-      {
-       complain_at (*loc, _("integer out of range: %s"), quote (yytext));
-       num = INT_MAX;
-      }
-    val->integer = num;
+    val->integer = scan_integer (yytext, 10, *loc);
+    return INT;
+  }
+  0[xX][0-9abcdefABCDEF]+ {
+    val->integer = scan_integer (yytext, 16, *loc);
      return INT;
    }
  
@@ -323,7 +324,7 @@ splice       (\\[ \f\t\v]*\n)*
  {
    "*/"     BEGIN context_state;
    .|\n    ;
-  <<EOF>>  unexpected_end_of_file (token_start, "*/");
+  <<EOF>>  unexpected_eof (token_start, "*/"); BEGIN context_state;
  }
  
  
@@ -334,7 +335,7 @@ splice       (\\[ \f\t\v]*\n)*
  <SC_COMMENT>
  {
    "*"{splice}"/"  STRING_GROW; BEGIN context_state;
-  <<EOF>>        unexpected_end_of_file (token_start, "*/");
+  <<EOF>>        unexpected_eof (token_start, "*/"); BEGIN context_state;
  }
  
  
@@ -367,8 +368,9 @@ splice       (\\[ \f\t\v]*\n)*
      return STRING;
    }
  
+  \0       complain_at (*loc, _("invalid null character"));
    .|\n     STRING_GROW;
-  <<EOF>>   unexpected_end_of_file (token_start, "\"");
+  <<EOF>>   unexpected_eof (token_start, "\""); BEGIN INITIAL;
  }
  
    /*---------------------------------------------------------------.
@@ -393,8 +395,9 @@ splice       (\\[ \f\t\v]*\n)*
      return ID;
    }
  
+  \0       complain_at (*loc, _("invalid null character"));
    .|\n     STRING_GROW;
-  <<EOF>>   unexpected_end_of_file (token_start, "'");
+  <<EOF>>   unexpected_eof (token_start, "'"); BEGIN INITIAL;
  }
  
  
@@ -405,19 +408,23 @@ splice     (\\[ \f\t\v]*\n)*
  <SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
  {
    \\[0-7]{1,3} {
-    unsigned long c = strtoul (yytext + 1, 0, 8);
+    unsigned long int c = strtoul (yytext + 1, 0, 8);
      if (UCHAR_MAX < c)
        complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
+    else if (! c) 
+      complain_at (*loc, _("invalid null character: %s"), quote (yytext));
      else
        obstack_1grow (&obstack_for_string, c);
    }
  
    \\x[0-9abcdefABCDEF]+ {
-    unsigned long c;
+    unsigned long int c;
      set_errno (0);
      c = strtoul (yytext + 2, 0, 16);
      if (UCHAR_MAX < c || get_errno ())
        complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
+    else if (! c)
+      complain_at (*loc, _("invalid null character: %s"), quote (yytext));
      else
        obstack_1grow (&obstack_for_string, c);
    }
@@ -437,6 +444,8 @@ splice       (\\[ \f\t\v]*\n)*
      int c = convert_ucn_to_byte (yytext);
      if (c < 0)
        complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
+    else if (! c)
+      complain_at (*loc, _("invalid null character: %s"), quote (yytext));
      else
        obstack_1grow (&obstack_for_string, c);
    }
@@ -456,7 +465,7 @@ splice       (\\[ \f\t\v]*\n)*
  {
    "'"                  STRING_GROW; BEGIN context_state;
    \\{splice}[^$@\[\]]  STRING_GROW;
-  <<EOF>>              unexpected_end_of_file (token_start, "'");
+  <<EOF>>              unexpected_eof (token_start, "'"); BEGIN context_state;
  }
  
  
@@ -469,7 +478,10 @@ splice      (\\[ \f\t\v]*\n)*
  {
    "\""                 STRING_GROW; BEGIN context_state;
    \\{splice}[^$@\[\]]  STRING_GROW;
-  <<EOF>>              unexpected_end_of_file (token_start, "\"");
+  <<EOF>> {
+    unexpected_eof (token_start, "\"");
+    BEGIN context_state;
+  }
  }
  
  
@@ -536,7 +548,7 @@ splice       (\\[ \f\t\v]*\n)*
        }
    }
  
-  <<EOF>>  unexpected_end_of_file (scanner_cursor, "{}");
+  <<EOF>>  unexpected_eof (scanner_cursor, "{}"); BEGIN INITIAL;
  }
  
  
@@ -589,7 +601,7 @@ splice       (\\[ \f\t\v]*\n)*
    "$"("<"{tag}">")?(-?[0-9]+|"$")  handle_dollar (token_type, yytext, *loc);
    "@"(-?[0-9]+|"$")               handle_at (token_type, yytext, *loc);
  
-  <<EOF>>  unexpected_end_of_file (code_start, "}");
+  <<EOF>>  unexpected_eof (code_start, "}"); BEGIN INITIAL;
  }
  
  
@@ -607,7 +619,7 @@ splice       (\\[ \f\t\v]*\n)*
      return PROLOGUE;
    }
  
-  <<EOF>>  unexpected_end_of_file (code_start, "%}");
+  <<EOF>>  unexpected_eof (code_start, "%}"); BEGIN INITIAL;
  }
  
  
@@ -645,6 +657,11 @@ splice      (\\[ \f\t\v]*\n)*
  
  %%
  
+/* Keeps track of the maximum number of semantic values to the left of
+   a handle (those referenced by $0, $-1, etc.) are required by the
+   semantic actions of this grammar. */
+int max_left_semantic_context = 0;
+
  /* Set *LOC and adjust scanner cursor to account for token TOKEN of
     size SIZE.  */
  
@@ -770,13 +787,15 @@ handle_action_dollar (char *text, location loc)
      }
    else
      {
-      long num;
+      long int num;
        set_errno (0);
        num = strtol (cp, 0, 10);
  
        if (INT_MIN <= num && num <= rule_length && ! get_errno ())
         {
           int n = num;
+         if (1-n > max_left_semantic_context)
+           max_left_semantic_context = 1-n;
           if (!type_name && n > 0)
             type_name = symbol_list_n_type_name_get (current_rule, loc, n);
           if (!type_name && typed)
@@ -796,10 +815,10 @@ handle_action_dollar (char *text, location loc)
  }
  
  
-/*-----------------------------------------------------------------.
-| Dispatch onto handle_action_dollar, or handle_destructor_dollar, |
-| depending upon TOKEN_TYPE.                                       |
-`-----------------------------------------------------------------*/
+/*----------------------------------------------------------------.
+| Map `$?' onto the proper M4 symbol, depending on its TOKEN_TYPE |
+| (are we in an action?).                                         |
+`----------------------------------------------------------------*/
  
  static void
  handle_dollar (int token_type, char *text, location loc)
@@ -812,6 +831,7 @@ handle_dollar (int token_type, char *text, location loc)
        break;
  
      case PERCENT_DESTRUCTOR:
+    case PERCENT_INITIAL_ACTION:
      case PERCENT_PRINTER:
        if (text[1] == '$')
         {
@@ -837,7 +857,7 @@ static inline bool
  handle_action_at (char *text, location loc)
  {
    char *cp = text + 1;
-  locations_flag = 1;
+  locations_flag = true;
  
    if (! current_rule)
      return false;
@@ -846,7 +866,7 @@ handle_action_at (char *text, location loc)
      obstack_sgrow (&obstack_for_string, "]b4_lhs_location[");
    else
      {
-      long num;
+      long int num;
        set_errno (0);
        num = strtol (cp, 0, 10);
  
@@ -864,10 +884,10 @@ handle_action_at (char *text, location loc)
  }
  
  
-/*-------------------------------------------------------------------.
-| Dispatch onto handle_action_at, or handle_destructor_at, depending |
-| upon CODE_KIND.                                                    |
-`-------------------------------------------------------------------*/
+/*----------------------------------------------------------------.
+| Map `@?' onto the proper M4 symbol, depending on its TOKEN_TYPE |
+| (are we in an action?).                                         |
+`----------------------------------------------------------------*/
  
  static void
  handle_at (int token_type, char *text, location loc)
@@ -878,6 +898,7 @@ handle_at (int token_type, char *text, location loc)
        handle_action_at (text, loc);
        return;
  
+    case PERCENT_INITIAL_ACTION:
      case PERCENT_DESTRUCTOR:
      case PERCENT_PRINTER:
        if (text[1] == '$')
@@ -895,6 +916,25 @@ handle_at (int token_type, char *text, location loc)
  }
  
  
+/*------------------------------------------------------.
+| Scan NUMBER for a base-BASE integer at location LOC.  |
+`------------------------------------------------------*/
+
+static unsigned long int
+scan_integer (char const *number, int base, location loc)
+{
+  unsigned long int num;
+  set_errno (0);
+  num = strtoul (number, 0, base);
+  if (INT_MAX < num || get_errno ())
+    {
+      complain_at (loc, _("integer out of range: %s"), quote (number));
+      num = INT_MAX;
+    }
+  return num;
+}
+
+
  /*------------------------------------------------------------------.
  | Convert universal character name UCN to a single-byte character,  |
  | and return that character.  Return -1 if UCN does not correspond  |
@@ -904,7 +944,7 @@ handle_at (int token_type, char *text, location loc)
  static int
  convert_ucn_to_byte (char const *ucn)
  {
-  unsigned long code = strtoul (ucn + 2, 0, 16);
+  unsigned long int code = strtoul (ucn + 2, 0, 16);
  
    /* FIXME: Currently we assume Unicode-compatible unibyte characters
       on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes).  On
@@ -971,25 +1011,15 @@ handle_syncline (char *args)
  /*------------------------------------------------------------------------.
  | Report an unexpected EOF in a token or comment starting at START.       |
  | An end of file was encountered and the expected TOKEN_END was missing.  |
-| After reporting the problem, pretend that TOKEN_END was found.          |
  `------------------------------------------------------------------------*/
  
  static void
-unexpected_end_of_file (boundary start, char const *token_end)
+unexpected_eof (boundary start, char const *token_end)
  {
-  size_t i = strlen (token_end);
-
    location loc;
    loc.start = start;
    loc.end = scanner_cursor;
    complain_at (loc, _("missing `%s' at end of file"), token_end);
-
-  /* Adjust scanner cursor so that any later message does not count
-     the characters about to be inserted.  */
-  scanner_cursor.column -= i;
-
-  while (i != 0)
-    unput (token_end[--i]);
  }