X-Git-Url: https://git.saurik.com/bison.git/blobdiff_plain/340ef489223a5d1f9c40674ea2973435cb8ecc1b..8c7d6a3de82ca7fcbdfa6ad31c3bf70f63c6a069:/src/lex.c?ds=inline diff --git a/src/lex.c b/src/lex.c index e2a94ad7..4bd0a6f2 100644 --- a/src/lex.c +++ b/src/lex.c @@ -1,5 +1,5 @@ /* Token-reader for Bison's input parser, - Copyright (C) 1984, 1986, 1989, 1992, 2000 Free Software Foundation, Inc. + Copyright 1984, 1986, 1989, 1992, 2000, 2001 Free Software Foundation, Inc. This file is part of Bison, the GNU Compiler Compiler. @@ -24,18 +24,14 @@ #include "getopt.h" /* for optarg */ #include "symtab.h" #include "lex.h" -#include "alloc.h" +#include "xalloc.h" #include "complain.h" #include "gram.h" - -/* functions from main.c */ -extern char *printable_version PARAMS ((int)); +#include "quote.h" /* Buffer for storing the current token. */ -char *token_buffer; - -/* Allocated size of token_buffer, not including space for terminator. */ -int maxtoken; +struct obstack token_obstack; +const char *token_buffer = NULL; bucket *symval; int numval; @@ -47,22 +43,11 @@ static bucket *unlexed_symval; /* by the next call to lex */ void init_lex (void) { - maxtoken = 100; - token_buffer = NEW2 (maxtoken + 1, char); + obstack_init (&token_obstack); unlexed = -1; } -char * -grow_token_buffer (char *p) -{ - int offset = p - token_buffer; - maxtoken *= 2; - token_buffer = (char *) xrealloc (token_buffer, maxtoken + 1); - return token_buffer + offset; -} - - int skip_white_space (void) { @@ -78,6 +63,7 @@ skip_white_space (void) switch (c) { case '/': + /* FIXME: Should probably be merged with copy_comment. */ c = getc (finput); if (c != '*' && c != '/') { @@ -132,7 +118,11 @@ skip_white_space (void) } } -/* do a getc, but give error message if EOF encountered */ + +/*-----------------------------------------------------. +| Do a getc, but give error message if EOF encountered | +`-----------------------------------------------------*/ + static int xgetc (FILE *f) { @@ -145,16 +135,22 @@ xgetc (FILE *f) /*------------------------------------------------------------------. | Read one literal character from finput. Process \ escapes. | -| Append the normalized string version of the char to *PP. Assign | +| Append the normalized string version of the char to OUT. Assign | | the character code to *PCODE. Return 1 unless the character is an | -| unescaped `term' or \n report error for \n | +| unescaped `term' or \n report error for \n. | `------------------------------------------------------------------*/ +/* FIXME: We could directly work in the obstack, but that would make + it more difficult to move to quotearg some day. So for the time + being, I prefer have literalchar behave like quotearg, and change + my mind later if I was wrong. */ + static int -literalchar (char **pp, int *pcode, char term) +literalchar (struct obstack *out, int *pcode, char term) { int c; - char *p; + char buf[4096]; + char *cp; int code; int wasquote = 0; @@ -238,74 +234,79 @@ literalchar (char **pp, int *pcode, char term) } else { + char badchar [] = "c"; + badchar[0] = c; complain (_("unknown escape sequence: `\\' followed by `%s'"), - printable_version (c)); + quote (badchar)); code = '?'; } } /* has \ */ - /* now fill token_buffer with the canonical name for this character - as a literal token. Do not use what the user typed, - so that `\012' and `\n' can be interchangeable. */ + /* now fill BUF with the canonical name for this character as a + literal token. Do not use what the user typed, so that `\012' + and `\n' can be interchangeable. */ - p = *pp; + cp = buf; if (code == term && wasquote) - *p++ = code; + *cp++ = code; else if (code == '\\') { - *p++ = '\\'; - *p++ = '\\'; + *cp++ = '\\'; + *cp++ = '\\'; } else if (code == '\'') { - *p++ = '\\'; - *p++ = '\''; + *cp++ = '\\'; + *cp++ = '\''; } else if (code == '\"') { - *p++ = '\\'; - *p++ = '\"'; + *cp++ = '\\'; + *cp++ = '\"'; } else if (code >= 040 && code < 0177) - *p++ = code; + *cp++ = code; else if (code == '\t') { - *p++ = '\\'; - *p++ = 't'; + *cp++ = '\\'; + *cp++ = 't'; } else if (code == '\n') { - *p++ = '\\'; - *p++ = 'n'; + *cp++ = '\\'; + *cp++ = 'n'; } else if (code == '\r') { - *p++ = '\\'; - *p++ = 'r'; + *cp++ = '\\'; + *cp++ = 'r'; } else if (code == '\v') { - *p++ = '\\'; - *p++ = 'v'; + *cp++ = '\\'; + *cp++ = 'v'; } else if (code == '\b') { - *p++ = '\\'; - *p++ = 'b'; + *cp++ = '\\'; + *cp++ = 'b'; } else if (code == '\f') { - *p++ = '\\'; - *p++ = 'f'; + *cp++ = '\\'; + *cp++ = 'f'; } else { - *p++ = '\\'; - *p++ = code / 0100 + '0'; - *p++ = ((code / 010) & 07) + '0'; - *p++ = (code & 07) + '0'; + *cp++ = '\\'; + *cp++ = code / 0100 + '0'; + *cp++ = ((code / 010) & 07) + '0'; + *cp++ = (code & 07) + '0'; } - *pp = p; + *cp = '\0'; + + if (out) + obstack_sgrow (out, buf); *pcode = code; return !wasquote; } @@ -318,12 +319,42 @@ unlex (int token) unlexed_symval = symval; } +/*-----------------------------------------------------------------. +| We just read `<' from FIN. Store in TOKEN_BUFFER, the type name | +| specified between the `<...>'. | +`-----------------------------------------------------------------*/ -int +void +read_type_name (FILE *fin) +{ + int c = getc (fin); + + while (c != '>') + { + if (c == EOF) + fatal (_("unterminated type name at end of file")); + if (c == '\n') + { + complain (_("unterminated type name")); + ungetc (c, fin); + break; + } + + obstack_1grow (&token_obstack, c); + c = getc (fin); + } + obstack_1grow (&token_obstack, '\0'); + token_buffer = obstack_finish (&token_obstack); +} + + +token_t lex (void) { int c; - char *p; + + /* Just to make sure. */ + token_buffer = NULL; if (unlexed >= 0) { @@ -334,14 +365,12 @@ lex (void) } c = skip_white_space (); - *token_buffer = c; /* for error messages (token buffer always valid) */ - token_buffer[1] = 0; switch (c) { case EOF: - strcpy (token_buffer, "EOF"); - return ENDFILE; + token_buffer = "EOF"; + return tok_eof; case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': @@ -357,39 +386,32 @@ lex (void) case 'z': case '.': case '_': - p = token_buffer; while (isalnum (c) || c == '_' || c == '.') { - if (p == token_buffer + maxtoken) - p = grow_token_buffer (p); - - *p++ = c; + obstack_1grow (&token_obstack, c); c = getc (finput); } - - *p = 0; + obstack_1grow (&token_obstack, '\0'); + token_buffer = obstack_finish (&token_obstack); ungetc (c, finput); symval = getsym (token_buffer); - return IDENTIFIER; + return tok_identifier; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { numval = 0; - p = token_buffer; while (isdigit (c)) { - if (p == token_buffer + maxtoken) - p = grow_token_buffer (p); - - *p++ = c; + obstack_1grow (&token_obstack, c); numval = numval * 10 + c - '0'; c = getc (finput); } - *p = 0; + obstack_1grow (&token_obstack, '\0'); + token_buffer = obstack_finish (&token_obstack); ungetc (c, finput); - return NUMBER; + return tok_number; } case '\'': @@ -398,30 +420,26 @@ lex (void) translations = -1; { int code, discode; - char discard[10], *dp; - p = token_buffer; - *p++ = '\''; - literalchar (&p, &code, '\''); + obstack_1grow (&token_obstack, '\''); + literalchar (&token_obstack, &code, '\''); c = getc (finput); if (c != '\'') { complain (_("use \"...\" for multi-character literal tokens")); while (1) - { - dp = discard; - if (!literalchar (&dp, &discode, '\'')) - break; - } + if (!literalchar (0, &discode, '\'')) + break; } - *p++ = '\''; - *p = 0; + obstack_1grow (&token_obstack, '\''); + obstack_1grow (&token_obstack, '\0'); + token_buffer = obstack_finish (&token_obstack); symval = getsym (token_buffer); - symval->class = STOKEN; + symval->class = token_sym; if (!symval->user_token_number) symval->user_token_number = code; - return IDENTIFIER; + return tok_identifier; } case '\"': @@ -430,35 +448,34 @@ lex (void) translations = -1; { int code; /* ignored here */ - p = token_buffer; - *p++ = '\"'; - while (literalchar (&p, &code, '\"')) /* read up to and including " */ - { - if (p >= token_buffer + maxtoken - 4) - p = grow_token_buffer (p); - } - *p = 0; + + obstack_1grow (&token_obstack, '\"'); + /* Read up to and including ". */ + while (literalchar (&token_obstack, &code, '\"')) + /* nothing */; + obstack_1grow (&token_obstack, '\0'); + token_buffer = obstack_finish (&token_obstack); symval = getsym (token_buffer); - symval->class = STOKEN; + symval->class = token_sym; - return IDENTIFIER; + return tok_identifier; } case ',': - return COMMA; + return tok_comma; case ':': - return COLON; + return tok_colon; case ';': - return SEMICOLON; + return tok_semicolon; case '|': - return BAR; + return tok_bar; case '{': - return LEFT_CURLY; + return tok_left_curly; case '=': do @@ -471,97 +488,78 @@ lex (void) if (c == '{') { - strcpy (token_buffer, "={"); - return LEFT_CURLY; + token_buffer = "={"; + return tok_left_curly; } else { ungetc (c, finput); - return ILLEGAL; + return tok_illegal; } case '<': - p = token_buffer; - c = getc (finput); - while (c != '>') - { - if (c == EOF) - fatal (_("unterminated type name at end of file")); - if (c == '\n') - { - complain (_("unterminated type name")); - ungetc (c, finput); - break; - } - - if (p == token_buffer + maxtoken) - p = grow_token_buffer (p); - - *p++ = c; - c = getc (finput); - } - *p = 0; - return TYPENAME; - + read_type_name (finput); + return tok_typename; case '%': return parse_percent_token (); default: - return ILLEGAL; + return tok_illegal; } } /* the following table dictates the action taken for the various % - directives. A setflag value causes the named flag to be set. A + directives. A set_flag value causes the named flag to be set. A retval action returns the code. */ struct percent_table_struct { const char *name; - void *setflag; + void *set_flag; int retval; -} -percent_table[] = +}; + +struct percent_table_struct percent_table[] = { - { "token", NULL, TOKEN }, - { "term", NULL, TOKEN }, - { "nterm", NULL, NTERM }, - { "type", NULL, TYPE }, - { "guard", NULL, GUARD }, - { "union", NULL, UNION }, - { "expect", NULL, EXPECT }, - { "thong", NULL, THONG }, - { "start", NULL, START }, - { "left", NULL, LEFT }, - { "right", NULL, RIGHT }, - { "nonassoc", NULL, NONASSOC }, - { "binary", NULL, NONASSOC }, - { "semantic_parser", NULL, SEMANTIC_PARSER }, - { "pure_parser", NULL, PURE_PARSER }, - { "prec", NULL, PREC }, - { "no_lines", &nolinesflag, NOOP}, /* -l */ - { "raw", &rawtoknumflag, NOOP }, /* -r */ - { "token_table", &toknumflag, NOOP}, /* -k */ + { "token", NULL, tok_token }, + { "term", NULL, tok_token }, + { "nterm", NULL, tok_nterm }, + { "type", NULL, tok_type }, + { "guard", NULL, tok_guard }, + { "union", NULL, tok_union }, + { "expect", NULL, tok_expect }, + { "thong", NULL, tok_thong }, + { "start", NULL, tok_start }, + { "left", NULL, tok_left }, + { "right", NULL, tok_right }, + { "nonassoc", NULL, tok_nonassoc }, + { "binary", NULL, tok_nonassoc }, + { "prec", NULL, tok_prec }, + { "locations", &locations_flag, tok_noop }, /* -l */ + { "no_lines", &no_lines_flag, tok_noop }, /* -l */ + { "raw", NULL, tok_obsolete }, /* -r */ + { "token_table", &token_table_flag, tok_noop }, /* -k */ + { "yacc", &yacc_flag, tok_noop }, /* -y */ + { "fixed_output_files",&yacc_flag, tok_noop }, /* -y */ + { "defines", &defines_flag, tok_noop }, /* -d */ + { "no_parser", &no_parser_flag, tok_noop }, /* -n */ #if 0 - /* These can be utilized after main is reoganized so - open_files() is deferred 'til after read_declarations(). - But %{ and %union both put information into files - that have to be opened before read_declarations(). - */ - { "yacc", &fixed_outfiles, NOOP}, /* -y */ - { "fixed_output_files", &fixed_outfiles, NOOP}, /* -y */ - { "defines", &definesflag, NOOP}, /* -d */ - { "no_parser", &noparserflag, NOOP}, /* -n */ - { "output_file", &spec_outfile, SETOPT}, /* -o */ - { "file_prefix", &spec_file_prefix, SETOPT}, /* -b */ - { "name_prefix", &spec_name_prefix, SETOPT}, /* -p */ - /* These would be acceptable, but they do not affect processing */ - { "verbose", &verboseflag, NOOP}, /* -v */ - { "debug", &debugflag, NOOP}, /* -t */ -/* {"help", , NOOP}, *//* -h */ -/* {"version", , NOOP}, *//* -V */ + /* For the time being, this is not enabled yet, while it's possible + though, since we use obstacks. The only risk is with semantic + parsers which will output an `include' of an output file: be sure + that the naem included is indeed the name of the output file. */ + { "output_file", &spec_outfile, tok_setopt }, /* -o */ + { "file_prefix", &spec_file_prefix, tok_setopt }, /* -b */ + { "name_prefix", &spec_name_prefix, tok_setopt }, /* -p */ #endif - { NULL, NULL, ILLEGAL} + { "header_extension", NULL, tok_hdrext}, + { "source_extension", NULL, tok_srcext}, + { "verbose", &verbose_flag, tok_noop }, /* -v */ + { "debug", &debug_flag, tok_noop }, /* -t */ + { "semantic_parser", &semantic_parser, tok_noop }, + { "pure_parser", &pure_parser, tok_noop }, + + { NULL, NULL, tok_illegal} }; /* Parse a token which starts with %. @@ -571,71 +569,72 @@ int parse_percent_token (void) { int c; - char *p; struct percent_table_struct *tx; - p = token_buffer; c = getc (finput); - *p++ = '%'; - *p++ = c; /* for error msg */ - *p = 0; switch (c) { case '%': - return TWO_PERCENTS; + return tok_two_percents; case '{': - return PERCENT_LEFT_CURLY; + return tok_percent_left_curly; case '<': - return LEFT; + return tok_left; case '>': - return RIGHT; + return tok_right; case '2': - return NONASSOC; + return tok_nonassoc; case '0': - return TOKEN; + return tok_token; case '=': - return PREC; + return tok_prec; } + if (!isalpha (c)) - return ILLEGAL; + return tok_illegal; - p = token_buffer; - *p++ = '%'; + obstack_1grow (&token_obstack, '%'); while (isalpha (c) || c == '_' || c == '-') { - if (p == token_buffer + maxtoken) - p = grow_token_buffer (p); - if (c == '-') c = '_'; - *p++ = c; + obstack_1grow (&token_obstack, c); c = getc (finput); } ungetc (c, finput); - - *p = 0; + obstack_1grow (&token_obstack, '\0'); + token_buffer = obstack_finish (&token_obstack); /* table lookup % directive */ for (tx = percent_table; tx->name; tx++) if (strcmp (token_buffer + 1, tx->name) == 0) break; - if (tx->retval == SETOPT) + + if (tx->set_flag) { - *((char **) (tx->setflag)) = optarg; - return NOOP; + *((int *) (tx->set_flag)) = 1; + return tok_noop; } - if (tx->setflag) + + switch (tx->retval) { - *((int *) (tx->setflag)) = 1; - return NOOP; + case tok_setopt: + *((char **) (tx->set_flag)) = optarg; + return tok_noop; + break; + + case tok_obsolete: + fatal (_("`%s' is no longer supported"), token_buffer); + break; } + return tx->retval; }