X-Git-Url: https://git.saurik.com/bison.git/blobdiff_plain/f17bcd1f66dab24135ca3352491ac8fa4385fe37..e89a22bfab22e4d2ee73be49dcb66b51f8d0e892:/src/lex.c diff --git a/src/lex.c b/src/lex.c index 987777be..1d2424e2 100644 --- a/src/lex.c +++ b/src/lex.c @@ -1,5 +1,5 @@ /* Token-reader for Bison's input parser, - Copyright 1984, 1986, 1989, 1992, 2000 Free Software Foundation, Inc. + Copyright 1984, 1986, 1989, 1992, 2000, 2001 Free Software Foundation, Inc. This file is part of Bison, the GNU Compiler Compiler. @@ -21,30 +21,36 @@ #include "system.h" #include "getargs.h" #include "files.h" -#include "getopt.h" /* for optarg */ #include "symtab.h" #include "lex.h" -#include "xalloc.h" #include "complain.h" #include "gram.h" #include "quote.h" /* Buffer for storing the current token. */ struct obstack token_obstack; -char *token_buffer = NULL; +const char *token_buffer = NULL; -bucket *symval; +bucket *symval = NULL; int numval; -static int unlexed; /* these two describe a token to be reread */ -static bucket *unlexed_symval; /* by the next call to lex */ - +/* A token to be reread, see unlex and lex. */ +static token_t unlexed = tok_undef; +static bucket *unlexed_symval = NULL; +static const char *unlexed_token_buffer = NULL; void -init_lex (void) +lex_init (void) { obstack_init (&token_obstack); - unlexed = -1; + unlexed = tok_undef; +} + + +void +lex_free (void) +{ + obstack_free (&token_obstack, NULL); } @@ -234,10 +240,10 @@ literalchar (struct obstack *out, int *pcode, char term) } else { - char buf [] = "c"; - buf[0] = c; + char badchar [] = "c"; + badchar[0] = c; complain (_("unknown escape sequence: `\\' followed by `%s'"), - quote (buf)); + quote (badchar)); code = '?'; } } /* has \ */ @@ -313,9 +319,10 @@ literalchar (struct obstack *out, int *pcode, char term) void -unlex (int token) +unlex (token_t token) { unlexed = token; + unlexed_token_buffer = token_buffer; unlexed_symval = symval; } @@ -356,12 +363,13 @@ lex (void) /* Just to make sure. */ token_buffer = NULL; - if (unlexed >= 0) + if (unlexed != tok_undef) { + token_t res = unlexed; symval = unlexed_symval; - c = unlexed; - unlexed = -1; - return c; + token_buffer = unlexed_token_buffer; + unlexed = tok_undef; + return res; } c = skip_white_space (); @@ -417,9 +425,8 @@ lex (void) case '\'': /* parse the literal token and compute character code in code */ - translations = -1; { - int code, discode; + int code; obstack_1grow (&token_obstack, '\''); literalchar (&token_obstack, &code, '\''); @@ -427,6 +434,7 @@ lex (void) c = getc (finput); if (c != '\'') { + int discode; complain (_("use \"...\" for multi-character literal tokens")); while (1) if (!literalchar (0, &discode, '\'')) @@ -437,7 +445,7 @@ lex (void) token_buffer = obstack_finish (&token_obstack); symval = getsym (token_buffer); symval->class = token_sym; - if (!symval->user_token_number) + if (symval->user_token_number == SUNDEF) symval->user_token_number = code; return tok_identifier; } @@ -445,7 +453,6 @@ lex (void) case '\"': /* parse the literal string token and treat as an identifier */ - translations = -1; { int code; /* ignored here */ @@ -463,32 +470,40 @@ lex (void) } case ',': + token_buffer = ","; return tok_comma; case ':': + token_buffer = ":"; return tok_colon; case ';': + token_buffer = ";"; return tok_semicolon; case '|': + token_buffer = "|"; return tok_bar; case '{': + token_buffer = "{"; return tok_left_curly; case '=': + obstack_1grow (&token_obstack, c); do { c = getc (finput); + obstack_1grow (&token_obstack, c); if (c == '\n') lineno++; } while (c == ' ' || c == '\n' || c == '\t'); + obstack_1grow (&token_obstack, '\0'); + token_buffer = obstack_finish (&token_obstack); if (c == '{') { - token_buffer = "={"; return tok_left_curly; } else @@ -505,6 +520,9 @@ lex (void) return parse_percent_token (); default: + obstack_1grow (&token_obstack, c); + obstack_1grow (&token_obstack, '\0'); + token_buffer = obstack_finish (&token_obstack); return tok_illegal; } } @@ -516,7 +534,7 @@ struct percent_table_struct { const char *name; void *set_flag; - int retval; + token_t retval; }; struct percent_table_struct percent_table[] = @@ -535,97 +553,162 @@ struct percent_table_struct percent_table[] = { "nonassoc", NULL, tok_nonassoc }, { "binary", NULL, tok_nonassoc }, { "prec", NULL, tok_prec }, - { "locations", &locations_flag, tok_noop }, /* -l */ - { "no_lines", &no_lines_flag, tok_noop }, /* -l */ - { "raw", &raw_flag, tok_noop }, /* -r */ - { "token_table", &token_table_flag, tok_noop }, /* -k */ - { "yacc", &yacc_flag, tok_noop }, /* -y */ - { "fixed_output_files",&yacc_flag, tok_noop }, /* -y */ - { "defines", &defines_flag, tok_noop }, /* -d */ - { "no_parser", &no_parser_flag, tok_noop }, /* -n */ -#if 0 - /* For the time being, this is not enabled yet, while it's possible - though, since we use obstacks. The only risk is with semantic - parsers which will output an `include' of an output file: be sure - that the naem included is indeed the name of the output file. */ - { "output_file", &spec_outfile, tok_setopt }, /* -o */ - { "file_prefix", &spec_file_prefix, tok_setopt }, /* -b */ - { "name_prefix", &spec_name_prefix, tok_setopt }, /* -p */ -#endif - { "verbose", &verbose_flag, tok_noop }, /* -v */ - { "debug", &debug_flag, tok_noop }, /* -t */ - { "semantic_parser", &semantic_parser, tok_noop }, - { "pure_parser", &pure_parser, tok_noop }, -/* {"help", , tok_noop}, *//* -h */ -/* {"version", , tok_noop}, *//* -V */ + { "locations", &locations_flag, tok_intopt }, /* -l */ + { "no-lines", &no_lines_flag, tok_intopt }, /* -l */ + { "raw", NULL, tok_obsolete }, /* -r */ + { "token-table", &token_table_flag, tok_intopt }, /* -k */ + { "yacc", &yacc_flag, tok_intopt }, /* -y */ + { "fixed-output-files",&yacc_flag, tok_intopt }, /* -y */ + { "defines", &defines_flag, tok_intopt }, /* -d */ + { "no-parser", &no_parser_flag, tok_intopt }, /* -n */ + { "graph", &graph_flag, tok_intopt }, /* -g */ + + /* FIXME: semantic parsers which will output an `include' of an + output file: be sure that the name included is indeed the name of + the output file. */ + { "output", &spec_outfile, tok_stropt }, /* -o */ + { "file-prefix", &spec_file_prefix, tok_stropt }, /* -b */ + { "name-prefix", &spec_name_prefix, tok_stropt }, /* -p */ + + { "verbose", &verbose_flag, tok_intopt }, /* -v */ + { "debug", &debug_flag, tok_intopt }, /* -t */ + { "semantic-parser", &semantic_parser, tok_intopt }, + { "pure-parser", &pure_parser, tok_intopt }, + { NULL, NULL, tok_illegal} }; /* Parse a token which starts with %. Assumes the % has already been read and discarded. */ -int +token_t parse_percent_token (void) { - int c; - struct percent_table_struct *tx; + struct percent_table_struct *tx = NULL; + const char *arg = NULL; + /* Where the ARG was found in token_buffer. */ + size_t arg_offset = 0; - c = getc (finput); + int c = getc (finput); + obstack_1grow (&token_obstack, '%'); + obstack_1grow (&token_obstack, c); switch (c) { case '%': + token_buffer = obstack_finish (&token_obstack); return tok_two_percents; case '{': + token_buffer = obstack_finish (&token_obstack); return tok_percent_left_curly; + /* The following guys are here for backward compatibility with + very ancient Yacc versions. The paper of Johnson mentions + them (as ancient :). */ case '<': + token_buffer = obstack_finish (&token_obstack); return tok_left; case '>': + token_buffer = obstack_finish (&token_obstack); return tok_right; case '2': + token_buffer = obstack_finish (&token_obstack); return tok_nonassoc; case '0': + token_buffer = obstack_finish (&token_obstack); return tok_token; case '=': + token_buffer = obstack_finish (&token_obstack); return tok_prec; } if (!isalpha (c)) - return tok_illegal; + { + token_buffer = obstack_finish (&token_obstack); + return tok_illegal; + } - obstack_1grow (&token_obstack, '%'); - while (isalpha (c) || c == '_' || c == '-') + while (c = getc (finput), isalpha (c) || c == '_' || c == '-') { - if (c == '-') - c = '_'; + if (c == '_') + c = '-'; obstack_1grow (&token_obstack, c); + } + + /* %DIRECTIVE="ARG". Separate into + TOKEN_BUFFER = `%DIRECTIVE\0ARG\0'. + This is a bit hackish, but once we move to a Bison parser, + things will be cleaned up. */ + if (c == '=') + { + /* End of the directive. We skip the `='. */ + obstack_1grow (&token_obstack, '\0'); + /* Fetch the ARG if present. */ c = getc (finput); + if (c == '"') + { + int code; + arg_offset = obstack_object_size (&token_obstack); + /* Read up to and including `"'. Do not append the closing + `"' in the output: it's not part of the ARG. */ + while (literalchar (NULL, &code, '"')) + obstack_1grow (&token_obstack, code); + } + /* else: should be an error. */ } + else + ungetc (c, finput); - ungetc (c, finput); obstack_1grow (&token_obstack, '\0'); token_buffer = obstack_finish (&token_obstack); + if (arg_offset) + arg = token_buffer + arg_offset; /* table lookup % directive */ for (tx = percent_table; tx->name; tx++) if (strcmp (token_buffer + 1, tx->name) == 0) break; - if (tx->retval == tok_setopt) + if (arg && tx->retval != tok_stropt) + fatal (_("`%s' supports no argument: %s"), token_buffer, quote (arg)); + + switch (tx->retval) { - *((char **) (tx->set_flag)) = optarg; + case tok_stropt: + assert (tx->set_flag); + if (arg) + { + /* Keep only the first assignment: command line options have + already been processed, and we want them to have + precedence. Side effect: if this %-option is used + several times, only the first is honored. Bah. */ + if (!*((char **) (tx->set_flag))) + *((char **) (tx->set_flag)) = xstrdup (arg); + } + else + fatal (_("`%s' requires an argument"), token_buffer); return tok_noop; - } - if (tx->set_flag) - { + break; + + case tok_intopt: + assert (tx->set_flag); *((int *) (tx->set_flag)) = 1; return tok_noop; + break; + + case tok_obsolete: + fatal (_("`%s' is no longer supported"), token_buffer); + return tok_noop; + break; + + default: + return tx->retval; + break; } - return tx->retval; + abort (); }