X-Git-Url: https://git.saurik.com/bison.git/blobdiff_plain/342b8b6e2156e923797745af2da2596e3086ccd5..ea6cfe9ebd960e2cb4573c2c6f02b02931ac1f86:/src/lex.c?ds=sidebyside diff --git a/src/lex.c b/src/lex.c index 4eb8f313..489fdf9c 100644 --- a/src/lex.c +++ b/src/lex.c @@ -21,7 +21,6 @@ #include "system.h" #include "getargs.h" #include "files.h" -#include "getopt.h" /* for optarg */ #include "symtab.h" #include "options.h" #include "lex.h" @@ -30,17 +29,16 @@ #include "quote.h" /* Buffer for storing the current token. */ -struct obstack token_obstack; +static struct obstack token_obstack; const char *token_buffer = NULL; -bucket *symval; +bucket *symval = NULL; int numval; -/* these two describe a token to be reread */ +/* A token to be reread, see unlex and lex. */ static token_t unlexed = tok_undef; -/* by the next call to lex */ static bucket *unlexed_symval = NULL; - +static const char *unlexed_token_buffer = NULL; void lex_init (void) @@ -132,7 +130,7 @@ skip_white_space (void) | Do a getc, but give error message if EOF encountered | `-----------------------------------------------------*/ -static int +int xgetc (FILE *f) { int c = getc (f); @@ -142,75 +140,62 @@ xgetc (FILE *f) } -/*------------------------------------------------------------------. -| Read one literal character from finput. Process \ escapes. | -| Append the normalized string version of the char to OUT. Assign | -| the character code to *PCODE. Return 1 unless the character is an | -| unescaped `term' or \n report error for \n. | -`------------------------------------------------------------------*/ +/*---------------------------------------------------------------. +| Read one literal character from FINPUT, process \-escapes, and | +| return the character. | +`---------------------------------------------------------------*/ -/* FIXME: We could directly work in the obstack, but that would make - it more difficult to move to quotearg some day. So for the time - being, I prefer have literalchar behave like quotearg, and change - my mind later if I was wrong. */ - -static int -literalchar (struct obstack *out, int *pcode, char term) +char +literalchar (void) { int c; - char buf[4096]; - char *cp; - int code; - int wasquote = 0; + int res; c = xgetc (finput); if (c == '\n') { complain (_("unescaped newline in constant")); ungetc (c, finput); - code = '?'; - wasquote = 1; + res = '?'; } else if (c != '\\') { - code = c; - if (c == term) - wasquote = 1; + res = c; } else { c = xgetc (finput); if (c == 't') - code = '\t'; + res = '\t'; else if (c == 'n') - code = '\n'; + res = '\n'; else if (c == 'a') - code = '\007'; + res = '\007'; else if (c == 'r') - code = '\r'; + res = '\r'; else if (c == 'f') - code = '\f'; + res = '\f'; else if (c == 'b') - code = '\b'; + res = '\b'; else if (c == 'v') - code = '\013'; + res = '\013'; else if (c == '\\') - code = '\\'; + res = '\\'; else if (c == '\'') - code = '\''; + res = '\''; else if (c == '\"') - code = '\"'; + res = '\"'; else if (c <= '7' && c >= '0') { - code = 0; + res = 0; while (c <= '7' && c >= '0') { - code = (code * 8) + (c - '0'); - if (code >= 256 || code < 0) + res = (res * 8) + (c - '0'); + if (res >= 256 || res < 0) { complain (_("octal value outside range 0...255: `\\%o'"), - code); - code &= 0xFF; + res); + res &= 0xFF; break; } c = xgetc (finput); @@ -220,21 +205,21 @@ literalchar (struct obstack *out, int *pcode, char term) else if (c == 'x') { c = xgetc (finput); - code = 0; + res = 0; while (1) { if (c >= '0' && c <= '9') - code *= 16, code += c - '0'; + res *= 16, res += c - '0'; else if (c >= 'a' && c <= 'f') - code *= 16, code += c - 'a' + 10; + res *= 16, res += c - 'a' + 10; else if (c >= 'A' && c <= 'F') - code *= 16, code += c - 'A' + 10; + res *= 16, res += c - 'A' + 10; else break; - if (code >= 256 || code < 0) + if (res >= 256 || res < 0) { - complain (_("hexadecimal value above 255: `\\x%x'"), code); - code &= 0xFF; + complain (_("hexadecimal value above 255: `\\x%x'"), res); + res &= 0xFF; break; } c = xgetc (finput); @@ -247,77 +232,11 @@ literalchar (struct obstack *out, int *pcode, char term) badchar[0] = c; complain (_("unknown escape sequence: `\\' followed by `%s'"), quote (badchar)); - code = '?'; + res = '?'; } } /* has \ */ - /* now fill BUF with the canonical name for this character as a - literal token. Do not use what the user typed, so that `\012' - and `\n' can be interchangeable. */ - - cp = buf; - if (code == term && wasquote) - *cp++ = code; - else if (code == '\\') - { - *cp++ = '\\'; - *cp++ = '\\'; - } - else if (code == '\'') - { - *cp++ = '\\'; - *cp++ = '\''; - } - else if (code == '\"') - { - *cp++ = '\\'; - *cp++ = '\"'; - } - else if (code >= 040 && code < 0177) - *cp++ = code; - else if (code == '\t') - { - *cp++ = '\\'; - *cp++ = 't'; - } - else if (code == '\n') - { - *cp++ = '\\'; - *cp++ = 'n'; - } - else if (code == '\r') - { - *cp++ = '\\'; - *cp++ = 'r'; - } - else if (code == '\v') - { - *cp++ = '\\'; - *cp++ = 'v'; - } - else if (code == '\b') - { - *cp++ = '\\'; - *cp++ = 'b'; - } - else if (code == '\f') - { - *cp++ = '\\'; - *cp++ = 'f'; - } - else - { - *cp++ = '\\'; - *cp++ = code / 0100 + '0'; - *cp++ = ((code / 010) & 07) + '0'; - *cp++ = (code & 07) + '0'; - } - *cp = '\0'; - - if (out) - obstack_sgrow (out, buf); - *pcode = code; - return !wasquote; + return res; } @@ -325,6 +244,7 @@ void unlex (token_t token) { unlexed = token; + unlexed_token_buffer = token_buffer; unlexed_symval = symval; } @@ -369,6 +289,7 @@ lex (void) { token_t res = unlexed; symval = unlexed_symval; + token_buffer = unlexed_token_buffer; unlexed = tok_undef; return res; } @@ -427,25 +348,24 @@ lex (void) /* parse the literal token and compute character code in code */ { - int code, discode; + int code = literalchar (); obstack_1grow (&token_obstack, '\''); - literalchar (&token_obstack, &code, '\''); + obstack_1grow (&token_obstack, code); c = getc (finput); if (c != '\'') { complain (_("use \"...\" for multi-character literal tokens")); - while (1) - if (!literalchar (0, &discode, '\'')) - break; + while (literalchar () != '\'') + /* Skip. */; } obstack_1grow (&token_obstack, '\''); obstack_1grow (&token_obstack, '\0'); token_buffer = obstack_finish (&token_obstack); symval = getsym (token_buffer); symval->class = token_sym; - if (!symval->user_token_number) + if (symval->user_token_number == SUNDEF) symval->user_token_number = code; return tok_identifier; } @@ -458,8 +378,12 @@ lex (void) obstack_1grow (&token_obstack, '\"'); /* Read up to and including ". */ - while (literalchar (&token_obstack, &code, '\"')) - /* nothing */; + do + { + code = literalchar (); + obstack_1grow (&token_obstack, code); + } + while (code != '\"'); obstack_1grow (&token_obstack, '\0'); token_buffer = obstack_finish (&token_obstack); @@ -552,49 +476,90 @@ option_strcmp (const char *left, const char *right) token_t parse_percent_token (void) { - const struct option_table_struct *tx; + const struct option_table_struct *tx = NULL; + const char *arg = NULL; + /* Where the ARG was found in token_buffer. */ + size_t arg_offset = 0; int c = getc (finput); + obstack_1grow (&token_obstack, '%'); + obstack_1grow (&token_obstack, c); switch (c) { case '%': + token_buffer = obstack_finish (&token_obstack); return tok_two_percents; case '{': + token_buffer = obstack_finish (&token_obstack); return tok_percent_left_curly; + /* The following guys are here for backward compatibility with + very ancient Yacc versions. The paper of Johnson mentions + them (as ancient :). */ case '<': + token_buffer = obstack_finish (&token_obstack); return tok_left; case '>': + token_buffer = obstack_finish (&token_obstack); return tok_right; case '2': + token_buffer = obstack_finish (&token_obstack); return tok_nonassoc; case '0': + token_buffer = obstack_finish (&token_obstack); return tok_token; case '=': + token_buffer = obstack_finish (&token_obstack); return tok_prec; } if (!isalpha (c)) - return tok_illegal; + { + token_buffer = obstack_finish (&token_obstack); + return tok_illegal; + } - obstack_1grow (&token_obstack, '%'); - while (isalpha (c) || c == '_' || c == '-') + while (c = getc (finput), isalpha (c) || c == '_' || c == '-') { if (c == '_') c = '-'; obstack_1grow (&token_obstack, c); + } + + /* %DIRECTIVE="ARG". Separate into + TOKEN_BUFFER = `%DIRECTIVE\0ARG\0'. + This is a bit hackish, but once we move to a Bison parser, + things will be cleaned up. */ + if (c == '=') + { + /* End of the directive. We skip the `='. */ + obstack_1grow (&token_obstack, '\0'); + /* Fetch the ARG if present. */ c = getc (finput); + if (c == '"') + { + int code; + arg_offset = obstack_object_size (&token_obstack); + /* Read up to and including `"'. Do not append the closing + `"' in the output: it's not part of the ARG. */ + while ((code = literalchar ()) != '"') + obstack_1grow (&token_obstack, code); + } + /* else: should be an error. */ } + else + ungetc (c, finput); - ungetc (c, finput); obstack_1grow (&token_obstack, '\0'); token_buffer = obstack_finish (&token_obstack); + if (arg_offset) + arg = token_buffer + arg_offset; /* table lookup % directive */ for (tx = option_table; tx->name; tx++) @@ -602,27 +567,42 @@ parse_percent_token (void) && option_strcmp (token_buffer + 1, tx->name) == 0) break; - if (tx->set_flag) - { - *((int *) (tx->set_flag)) = 1; - return tok_noop; - } + if (arg && tx->ret_val != tok_stropt) + fatal (_("`%s' supports no argument: %s"), token_buffer, quote (arg)); + switch (tx->ret_val) { - case tok_setopt: - *((char **) (tx->set_flag)) = optarg; + case tok_stropt: + assert (tx->set_flag); + if (arg) + { + /* Keep only the first assignment: command line options have + already been processed, and we want them to have + precedence. Side effect: if this %-option is used + several times, only the first is honored. Bah. */ + if (!*((char **) (tx->set_flag))) + *((char **) (tx->set_flag)) = xstrdup (arg); + } + else + fatal (_("`%s' requires an argument"), token_buffer); + return tok_noop; + break; + + case tok_intopt: + assert (tx->set_flag); + *((int *) (tx->set_flag)) = 1; return tok_noop; break; case tok_obsolete: fatal (_("`%s' is no longer supported"), token_buffer); + return tok_noop; break; default: - /* Other cases do not apply here. */ + return tx->ret_val; break; } - - return tx->ret_val; + abort (); }