* src/output.c (output, prepare): Make sure the values of the

[bison.git] / src / lex.c
diff --git a/src/lex.c b/src/lex.c

index 4eefa737abb4e1a9ca24d3104c8de7f72bace11d..78b8a8f391bf84f8db7981a39e008575f18dd8e4 100644 (file)
--- a/src/lex.c
+++ b/src/lex.c
@@ -1,5 +1,5 @@
  /* Token-reader for Bison's input parser,
  /* Token-reader for Bison's input parser,
-   Copyright 1984, 1986, 1989, 1992, 2000 Free Software Foundation, Inc.
+   Copyright 1984, 1986, 1989, 1992, 2000, 2001 Free Software Foundation, Inc.
  
     This file is part of Bison, the GNU Compiler Compiler.
  
  
     This file is part of Bison, the GNU Compiler Compiler.
  
@@ -21,30 +21,37 @@
  #include "system.h"
  #include "getargs.h"
  #include "files.h"
  #include "system.h"
  #include "getargs.h"
  #include "files.h"
-#include "getopt.h"            /* for optarg */
  #include "symtab.h"
  #include "symtab.h"
+#include "options.h"
  #include "lex.h"
  #include "lex.h"
-#include "xalloc.h"
  #include "complain.h"
  #include "gram.h"
  #include "quote.h"
  
  /* Buffer for storing the current token.  */
  #include "complain.h"
  #include "gram.h"
  #include "quote.h"
  
  /* Buffer for storing the current token.  */
-struct obstack token_obstack;
-char *token_buffer = NULL;
+static struct obstack token_obstack;
+const char *token_buffer = NULL;
  
  
-bucket *symval;
+bucket *symval = NULL;
  int numval;
  
  int numval;
  
-static int unlexed;            /* these two describe a token to be reread */
-static bucket *unlexed_symval; /* by the next call to lex */
-
+/* A token to be reread, see unlex and lex. */
+static token_t unlexed = tok_undef;
+static bucket *unlexed_symval = NULL;
+static const char *unlexed_token_buffer = NULL;
  
  void
  
  void
-init_lex (void)
+lex_init (void)
  {
    obstack_init (&token_obstack);
  {
    obstack_init (&token_obstack);
-  unlexed = -1;
+  unlexed = tok_undef;
+}
+
+
+void
+lex_free (void)
+{
+  obstack_free (&token_obstack, NULL);
  }
  
  
  }
  
  
@@ -145,7 +152,7 @@ xgetc (FILE *f)
     being, I prefer have literalchar behave like quotearg, and change
     my mind later if I was wrong.  */
  
     being, I prefer have literalchar behave like quotearg, and change
     my mind later if I was wrong.  */
  
-static int
+int
  literalchar (struct obstack *out, int *pcode, char term)
  {
    int c;
  literalchar (struct obstack *out, int *pcode, char term)
  {
    int c;
@@ -234,10 +241,10 @@ literalchar (struct obstack *out, int *pcode, char term)
         }
        else
         {
         }
        else
         {
-         char buf [] = "c";
-         buf[0] = c;
+         char badchar [] = "c";
+         badchar[0] = c;
           complain (_("unknown escape sequence: `\\' followed by `%s'"),
           complain (_("unknown escape sequence: `\\' followed by `%s'"),
-                   quote (buf));
+                   quote (badchar));
           code = '?';
         }
      }                          /* has \ */
           code = '?';
         }
      }                          /* has \ */
@@ -313,9 +320,10 @@ literalchar (struct obstack *out, int *pcode, char term)
  
  
  void
  
  
  void
-unlex (int token)
+unlex (token_t token)
  {
    unlexed = token;
  {
    unlexed = token;
+  unlexed_token_buffer = token_buffer;
    unlexed_symval = symval;
  }
  
    unlexed_symval = symval;
  }
  
@@ -356,12 +364,13 @@ lex (void)
    /* Just to make sure. */
    token_buffer = NULL;
  
    /* Just to make sure. */
    token_buffer = NULL;
  
-  if (unlexed >= 0)
+  if (unlexed != tok_undef)
      {
      {
+      token_t res = unlexed;
        symval = unlexed_symval;
        symval = unlexed_symval;
-      c = unlexed;
-      unlexed = -1;
-      return c;
+      token_buffer = unlexed_token_buffer;
+      unlexed = tok_undef;
+      return res;
      }
  
    c = skip_white_space ();
      }
  
    c = skip_white_space ();
@@ -417,9 +426,8 @@ lex (void)
      case '\'':
        /* parse the literal token and compute character code in  code  */
  
      case '\'':
        /* parse the literal token and compute character code in  code  */
  
-      translations = -1;
        {
        {
-       int code, discode;
+       int code;
  
         obstack_1grow (&token_obstack, '\'');
         literalchar (&token_obstack, &code, '\'');
  
         obstack_1grow (&token_obstack, '\'');
         literalchar (&token_obstack, &code, '\'');
@@ -427,6 +435,7 @@ lex (void)
         c = getc (finput);
         if (c != '\'')
           {
         c = getc (finput);
         if (c != '\'')
           {
+           int discode;
             complain (_("use \"...\" for multi-character literal tokens"));
             while (1)
               if (!literalchar (0, &discode, '\''))
             complain (_("use \"...\" for multi-character literal tokens"));
             while (1)
               if (!literalchar (0, &discode, '\''))
@@ -437,7 +446,7 @@ lex (void)
         token_buffer = obstack_finish (&token_obstack);
         symval = getsym (token_buffer);
         symval->class = token_sym;
         token_buffer = obstack_finish (&token_obstack);
         symval = getsym (token_buffer);
         symval->class = token_sym;
-       if (!symval->user_token_number)
+       if (symval->user_token_number == SUNDEF)
           symval->user_token_number = code;
         return tok_identifier;
        }
           symval->user_token_number = code;
         return tok_identifier;
        }
@@ -445,7 +454,6 @@ lex (void)
      case '\"':
        /* parse the literal string token and treat as an identifier */
  
      case '\"':
        /* parse the literal string token and treat as an identifier */
  
-      translations = -1;
        {
         int code;               /* ignored here */
  
        {
         int code;               /* ignored here */
  
@@ -463,32 +471,40 @@ lex (void)
        }
  
      case ',':
        }
  
      case ',':
+      token_buffer = ",";
        return tok_comma;
  
      case ':':
        return tok_comma;
  
      case ':':
+      token_buffer = ":";
        return tok_colon;
  
      case ';':
        return tok_colon;
  
      case ';':
+      token_buffer = ";";
        return tok_semicolon;
  
      case '|':
        return tok_semicolon;
  
      case '|':
+      token_buffer = "|";
        return tok_bar;
  
      case '{':
        return tok_bar;
  
      case '{':
+      token_buffer = "{";
        return tok_left_curly;
  
      case '=':
        return tok_left_curly;
  
      case '=':
+      obstack_1grow (&token_obstack, c);
        do
         {
           c = getc (finput);
        do
         {
           c = getc (finput);
+         obstack_1grow (&token_obstack, c);
           if (c == '\n')
             lineno++;
         }
        while (c == ' ' || c == '\n' || c == '\t');
           if (c == '\n')
             lineno++;
         }
        while (c == ' ' || c == '\n' || c == '\t');
+      obstack_1grow (&token_obstack, '\0');
+      token_buffer = obstack_finish (&token_obstack);
  
        if (c == '{')
         {
  
        if (c == '{')
         {
-         token_buffer = "={";
           return tok_left_curly;
         }
        else
           return tok_left_curly;
         }
        else
@@ -505,71 +521,44 @@ lex (void)
        return parse_percent_token ();
  
      default:
        return parse_percent_token ();
  
      default:
+      obstack_1grow (&token_obstack, c);
+      obstack_1grow (&token_obstack, '\0');
+      token_buffer = obstack_finish (&token_obstack);
        return tok_illegal;
      }
  }
  
        return tok_illegal;
      }
  }
  
-/* the following table dictates the action taken for the various %
-   directives.  A set_flag value causes the named flag to be set.  A
-   retval action returns the code.  */
-struct percent_table_struct
-{
-  const char *name;
-  void *set_flag;
-  int retval;
-};
+/* This function is a strcmp, which doesn't differentiate `-' and `_'
+   chars.  */
  
  
-struct percent_table_struct percent_table[] =
+static int
+option_strcmp (const char *left, const char *right)
  {
  {
-  { "token",           NULL,                   tok_token },
-  { "term",            NULL,                   tok_token },
-  { "nterm",           NULL,                   tok_nterm },
-  { "type",            NULL,                   tok_type },
-  { "guard",           NULL,                   tok_guard },
-  { "union",           NULL,                   tok_union },
-  { "expect",          NULL,                   tok_expect },
-  { "thong",           NULL,                   tok_thong },
-  { "start",           NULL,                   tok_start },
-  { "left",            NULL,                   tok_left },
-  { "right",           NULL,                   tok_right },
-  { "nonassoc",                NULL,                   tok_nonassoc },
-  { "binary",          NULL,                   tok_nonassoc },
-  { "prec",            NULL,                   tok_prec },
-  { "locations",       &locations_flag,        tok_noop },     /* -l */
-  { "no_lines",                &no_lines_flag,         tok_noop },     /* -l */
-  { "raw",             &raw_flag,              tok_noop },     /* -r */
-  { "token_table",     &token_table_flag,      tok_noop },     /* -k */
-  { "yacc",            &yacc_flag,             tok_noop },     /* -y */
-  { "fixed_output_files",&yacc_flag,           tok_noop },     /* -y */
-  { "defines",         &defines_flag,          tok_noop },     /* -d */
-  { "no_parser",       &no_parser_flag,        tok_noop },     /* -n */
-#if 0
-  /* For the time being, this is not enabled yet, while it's possible
-     though, since we use obstacks.  The only risk is with semantic
-     parsers which will output an `include' of an output file: be sure
-     that the naem included is indeed the name of the output file.  */
-  { "output_file",     &spec_outfile,          tok_setopt },   /* -o */
-  { "file_prefix",     &spec_file_prefix,      tok_setopt },   /* -b */
-  { "name_prefix",     &spec_name_prefix,      tok_setopt },   /* -p */
-#endif
-  { "verbose",         &verbose_flag,          tok_noop },     /* -v */
-  { "debug",           &debug_flag,            tok_noop },     /* -t */
-  { "semantic_parser", &semantic_parser,       tok_noop },
-  { "pure_parser",     &pure_parser,           tok_noop },
-
-  { NULL, NULL, tok_illegal}
-};
+  const unsigned char *l, *r;
+  int c;
+
+  assert (left);
+  assert (right);
+  l = (const unsigned char *)left;
+  r = (const unsigned char *)right;
+  while (((c = *l - *r++) == 0 && *l != '\0')
+        || ((*l == '-' || *l == '_') && (*r == '_' || *r == '-')))
+    l++;
+  return c;
+}
  
  /* Parse a token which starts with %.
     Assumes the % has already been read and discarded.  */
  
  
  /* Parse a token which starts with %.
     Assumes the % has already been read and discarded.  */
  
-int
+token_t
  parse_percent_token (void)
  {
  parse_percent_token (void)
  {
-  int c;
-  struct percent_table_struct *tx;
+  const struct option_table_struct *tx = NULL;
+  const char *arg = NULL;
+  /* Where the ARG was found in token_buffer. */
+  size_t arg_offset = 0;
  
  
-  c = getc (finput);
+  int c = getc (finput);
  
    switch (c)
      {
  
    switch (c)
      {
@@ -579,6 +568,8 @@ parse_percent_token (void)
      case '{':
        return tok_percent_left_curly;
  
      case '{':
        return tok_percent_left_curly;
  
+      /* FIXME: Who the heck are those 5 guys!?! `%<' = `%left'!!!
+        Let's ask for there removal.  */
      case '<':
        return tok_left;
  
      case '<':
        return tok_left;
  
@@ -601,30 +592,83 @@ parse_percent_token (void)
    obstack_1grow (&token_obstack, '%');
    while (isalpha (c) || c == '_' || c == '-')
      {
    obstack_1grow (&token_obstack, '%');
    while (isalpha (c) || c == '_' || c == '-')
      {
-      if (c == '-')
-       c = '_';
+      if (c == '_')
+       c = '-';
        obstack_1grow (&token_obstack, c);
        c = getc (finput);
      }
  
        obstack_1grow (&token_obstack, c);
        c = getc (finput);
      }
  
-  ungetc (c, finput);
+  /* %DIRECTIVE="ARG".  Separate into
+     TOKEN_BUFFER = `%DIRECTIVE\0ARG\0'.
+     This is a bit hackish, but once we move to a Bison parser,
+     things will be cleaned up.  */
+  if (c == '=')
+    {
+      /* End of the directive.  We skip the `='. */
+      obstack_1grow (&token_obstack, '\0');
+      /* Fetch the ARG if present. */
+      c = getc (finput);
+      if (c == '"')
+       {
+         int code;
+         arg_offset = obstack_object_size (&token_obstack);
+         /* Read up to and including `"'.  Do not append the closing
+            `"' in the output: it's not part of the ARG.  */
+         while (literalchar (NULL, &code, '"'))
+           obstack_1grow (&token_obstack, code);
+       }
+      /* else: should be an error. */
+    }
+  else
+    ungetc (c, finput);
+
    obstack_1grow (&token_obstack, '\0');
    token_buffer = obstack_finish (&token_obstack);
    obstack_1grow (&token_obstack, '\0');
    token_buffer = obstack_finish (&token_obstack);
+  if (arg_offset)
+    arg = token_buffer + arg_offset;
  
    /* table lookup % directive */
  
    /* table lookup % directive */
-  for (tx = percent_table; tx->name; tx++)
-    if (strcmp (token_buffer + 1, tx->name) == 0)
+  for (tx = option_table; tx->name; tx++)
+    if ((tx->access == opt_percent || tx->access == opt_both)
+       && option_strcmp (token_buffer + 1, tx->name) == 0)
        break;
  
        break;
  
-  if (tx->retval == tok_setopt)
+  if (arg && tx->ret_val != tok_stropt)
+    fatal (_("`%s' supports no argument: %s"), token_buffer, quote (arg));
+
+
+  switch (tx->ret_val)
      {
      {
-      *((char **) (tx->set_flag)) = optarg;
+    case tok_stropt:
+      assert (tx->set_flag);
+      if (arg)
+       {
+         /* Keep only the first assignment: command line options have
+            already been processed, and we want them to have
+            precedence.  Side effect: if this %-option is used
+            several times, only the first is honored.  Bah.  */
+         if (!*((char **) (tx->set_flag)))
+           *((char **) (tx->set_flag)) = xstrdup (arg);
+       }
+      else
+       fatal (_("`%s' requires an argument"), token_buffer);
        return tok_noop;
        return tok_noop;
-    }
-  if (tx->set_flag)
-    {
+      break;
+
+    case tok_intopt:
+      assert (tx->set_flag);
        *((int *) (tx->set_flag)) = 1;
        return tok_noop;
        *((int *) (tx->set_flag)) = 1;
        return tok_noop;
+      break;
+
+    case tok_obsolete:
+      fatal (_("`%s' is no longer supported"), token_buffer);
+      return tok_noop;
+      break;
+
+    default:
+      return tx->ret_val;
+      break;
      }
      }
-  return tx->retval;
+  abort ();
  }
  }