X-Git-Url: https://git.saurik.com/bison.git/blobdiff_plain/92790e5bf0c5b23e4a1d92ab8a9204e69db1eaf2..c47dc59adc0864783aedce4c60a35016716aef98:/src/lex.c diff --git a/src/lex.c b/src/lex.c index b2458a4f..516f7997 100644 --- a/src/lex.c +++ b/src/lex.c @@ -32,12 +32,12 @@ static struct obstack token_obstack; const char *token_buffer = NULL; -bucket *symval = NULL; +symbol_t *symval = NULL; int numval; /* A token to be reread, see unlex and lex. */ static token_t unlexed = tok_undef; -static bucket *unlexed_symval = NULL; +static symbol_t *unlexed_symval = NULL; static const char *unlexed_token_buffer = NULL; void @@ -130,7 +130,7 @@ skip_white_space (void) | Do a getc, but give error message if EOF encountered | `-----------------------------------------------------*/ -static int +int xgetc (FILE *f) { int c = getc (f); @@ -140,67 +140,62 @@ xgetc (FILE *f) } -/*-----------------------------------------------------------------. -| Read one literal character from FINPUT. Process \-escapes. | -| Append the char to OUT and assign it *PCODE. Return 1 unless the | -| character is an unescaped `term' or \n report error for \n. | -`-----------------------------------------------------------------*/ +/*---------------------------------------------------------------. +| Read one literal character from FINPUT, process \-escapes, and | +| return the character. | +`---------------------------------------------------------------*/ -int -literalchar (struct obstack *out, int *pcode, char term) +char +literalchar (void) { int c; - int code; - int wasquote = 0; + int res; c = xgetc (finput); if (c == '\n') { complain (_("unescaped newline in constant")); ungetc (c, finput); - code = '?'; - wasquote = 1; + res = '?'; } else if (c != '\\') { - code = c; - if (c == term) - wasquote = 1; + res = c; } else { c = xgetc (finput); if (c == 't') - code = '\t'; + res = '\t'; else if (c == 'n') - code = '\n'; + res = '\n'; else if (c == 'a') - code = '\007'; + res = '\007'; else if (c == 'r') - code = '\r'; + res = '\r'; else if (c == 'f') - code = '\f'; + res = '\f'; else if (c == 'b') - code = '\b'; + res = '\b'; else if (c == 'v') - code = '\013'; + res = '\013'; else if (c == '\\') - code = '\\'; + res = '\\'; else if (c == '\'') - code = '\''; + res = '\''; else if (c == '\"') - code = '\"'; + res = '\"'; else if (c <= '7' && c >= '0') { - code = 0; + res = 0; while (c <= '7' && c >= '0') { - code = (code * 8) + (c - '0'); - if (code >= 256 || code < 0) + res = (res * 8) + (c - '0'); + if (res >= 256 || res < 0) { complain (_("octal value outside range 0...255: `\\%o'"), - code); - code &= 0xFF; + res); + res &= 0xFF; break; } c = xgetc (finput); @@ -210,21 +205,21 @@ literalchar (struct obstack *out, int *pcode, char term) else if (c == 'x') { c = xgetc (finput); - code = 0; + res = 0; while (1) { if (c >= '0' && c <= '9') - code *= 16, code += c - '0'; + res *= 16, res += c - '0'; else if (c >= 'a' && c <= 'f') - code *= 16, code += c - 'a' + 10; + res *= 16, res += c - 'a' + 10; else if (c >= 'A' && c <= 'F') - code *= 16, code += c - 'A' + 10; + res *= 16, res += c - 'A' + 10; else break; - if (code >= 256 || code < 0) + if (res >= 256 || res < 0) { - complain (_("hexadecimal value above 255: `\\x%x'"), code); - code &= 0xFF; + complain (_("hexadecimal value above 255: `\\x%x'"), res); + res &= 0xFF; break; } c = xgetc (finput); @@ -237,14 +232,11 @@ literalchar (struct obstack *out, int *pcode, char term) badchar[0] = c; complain (_("unknown escape sequence: `\\' followed by `%s'"), quote (badchar)); - code = '?'; + res = '?'; } } /* has \ */ - if (out) - obstack_1grow (out, code); - *pcode = code; - return !wasquote; + return res; } @@ -356,27 +348,29 @@ lex (void) /* parse the literal token and compute character code in code */ { - int code; + int code = literalchar (); obstack_1grow (&token_obstack, '\''); - literalchar (&token_obstack, &code, '\''); + obstack_1grow (&token_obstack, code); c = getc (finput); if (c != '\'') { - int discode; complain (_("use \"...\" for multi-character literal tokens")); - while (1) - if (!literalchar (0, &discode, '\'')) - break; + while (literalchar () != '\'') + /* Skip. */; } obstack_1grow (&token_obstack, '\''); obstack_1grow (&token_obstack, '\0'); token_buffer = obstack_finish (&token_obstack); symval = getsym (token_buffer); - symval->class = token_sym; - if (symval->user_token_number == SUNDEF) - symval->user_token_number = code; + if (symval->number == NUMBER_UNDEFINED) + { + symval->number = ntokens++; + symval->class = token_sym; + if (symval->user_token_number == SUNDEF) + symval->user_token_number = code; + } return tok_identifier; } @@ -388,13 +382,21 @@ lex (void) obstack_1grow (&token_obstack, '\"'); /* Read up to and including ". */ - while (literalchar (&token_obstack, &code, '\"')) - /* nothing */; + do + { + code = literalchar (); + obstack_1grow (&token_obstack, code); + } + while (code != '\"'); obstack_1grow (&token_obstack, '\0'); token_buffer = obstack_finish (&token_obstack); symval = getsym (token_buffer); - symval->class = token_sym; + if (symval->number == NUMBER_UNDEFINED) + { + symval->number = ntokens++; + symval->class = token_sym; + } return tok_identifier; } @@ -488,43 +490,50 @@ parse_percent_token (void) size_t arg_offset = 0; int c = getc (finput); + obstack_1grow (&token_obstack, '%'); + obstack_1grow (&token_obstack, c); - switch (c) + if (!isalpha (c)) { - case '%': - return tok_two_percents; + obstack_1grow (&token_obstack, '\0'); + token_buffer = obstack_finish (&token_obstack); - case '{': - return tok_percent_left_curly; + switch (c) + { + case '%': + return tok_two_percents; - /* FIXME: Who the heck are those 5 guys!?! `%<' = `%left'!!! - Let's ask for there removal. */ - case '<': - return tok_left; + case '{': + return tok_percent_left_curly; - case '>': - return tok_right; + /* The following guys are here for backward compatibility with + very ancient Yacc versions. The paper of Johnson mentions + them (as ancient :). */ + case '<': + return tok_left; - case '2': - return tok_nonassoc; + case '>': + return tok_right; - case '0': - return tok_token; + case '2': + return tok_nonassoc; - case '=': - return tok_prec; - } + case '0': + return tok_token; - if (!isalpha (c)) - return tok_illegal; + case '=': + return tok_prec; - obstack_1grow (&token_obstack, '%'); - while (isalpha (c) || c == '_' || c == '-') + default: + return tok_illegal; + } + } + + while (c = getc (finput), isalpha (c) || c == '_' || c == '-') { if (c == '_') c = '-'; obstack_1grow (&token_obstack, c); - c = getc (finput); } /* %DIRECTIVE="ARG". Separate into @@ -543,7 +552,7 @@ parse_percent_token (void) arg_offset = obstack_object_size (&token_obstack); /* Read up to and including `"'. Do not append the closing `"' in the output: it's not part of the ARG. */ - while (literalchar (NULL, &code, '"')) + while ((code = literalchar ()) != '"') obstack_1grow (&token_obstack, code); } /* else: should be an error. */