X-Git-Url: https://git.saurik.com/bison.git/blobdiff_plain/3628146521aa93cdd499fa5058d21c7be33014bf..f282676b7e6c9ae1b66233dbfc522bb685c3ece2:/src/lex.c diff --git a/src/lex.c b/src/lex.c index b59793bc..c5d73082 100644 --- a/src/lex.c +++ b/src/lex.c @@ -18,49 +18,16 @@ the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ - -/* - lex is the entry point. It is called from reader.c. - It returns one of the token-type codes defined in lex.h. - When an identifier is seen, the code IDENTIFIER is returned - and the name is looked up in the symbol table using symtab.c; - symval is set to a pointer to the entry found. */ - -#include #include "system.h" +#include "getargs.h" #include "files.h" #include "getopt.h" /* for optarg */ #include "symtab.h" #include "lex.h" -#include "alloc.h" +#include "xalloc.h" #include "complain.h" - -/* flags set by % directives */ -extern int definesflag; /* for -d */ -extern int toknumflag; /* for -k */ -extern int noparserflag; /* for -n */ -extern int fixed_outfiles; /* for -y */ -extern int nolinesflag; /* for -l */ -extern int rawtoknumflag; /* for -r */ -extern int verboseflag; /* for -v */ -extern int debugflag; /* for -t */ -extern char *spec_name_prefix; /* for -p */ -extern char *spec_file_prefix; /* for -b */ -/*spec_outfile is declared in files.h, for -o */ - -extern int translations; - -void init_lex PARAMS((void)); -char *grow_token_buffer PARAMS((char *)); -int skip_white_space PARAMS((void)); -int safegetc PARAMS((FILE *)); -int literalchar PARAMS((char **, int *, char)); -void unlex PARAMS((int)); -int lex PARAMS((void)); -int parse_percent_token PARAMS((void)); - -/* functions from main.c */ -extern char *printable_version PARAMS((int)); +#include "gram.h" +#include "quote.h" /* Buffer for storing the current token. */ char *token_buffer; @@ -79,7 +46,7 @@ void init_lex (void) { maxtoken = 100; - token_buffer = NEW2 (maxtoken + 1, char); + token_buffer = XCALLOC (char, maxtoken + 1); unlexed = -1; } @@ -89,7 +56,7 @@ grow_token_buffer (char *p) { int offset = p - token_buffer; maxtoken *= 2; - token_buffer = (char *) xrealloc(token_buffer, maxtoken + 1); + token_buffer = XREALLOC (token_buffer, char, maxtoken + 1); return token_buffer + offset; } @@ -97,10 +64,10 @@ grow_token_buffer (char *p) int skip_white_space (void) { - register int c; - register int inside; + int c; + int inside; - c = getc(finput); + c = getc (finput); for (;;) { @@ -109,7 +76,8 @@ skip_white_space (void) switch (c) { case '/': - c = getc(finput); + /* FIXME: Should probably be merged with copy_comment. */ + c = getc (finput); if (c != '*' && c != '/') { complain (_("unexpected `/' found and ignored")); @@ -117,7 +85,7 @@ skip_white_space (void) } cplus_comment = (c == '/'); - c = getc(finput); + c = getc (finput); inside = 1; while (inside) @@ -125,12 +93,12 @@ skip_white_space (void) if (!cplus_comment && c == '*') { while (c == '*') - c = getc(finput); + c = getc (finput); if (c == '/') { inside = 0; - c = getc(finput); + c = getc (finput); } } else if (c == '\n') @@ -138,12 +106,12 @@ skip_white_space (void) lineno++; if (cplus_comment) inside = 0; - c = getc(finput); + c = getc (finput); } else if (c == EOF) fatal (_("unterminated comment")); else - c = getc(finput); + c = getc (finput); } break; @@ -154,7 +122,7 @@ skip_white_space (void) case ' ': case '\t': case '\f': - c = getc(finput); + c = getc (finput); break; default: @@ -163,35 +131,41 @@ skip_white_space (void) } } -/* do a getc, but give error message if EOF encountered */ -int -safegetc (FILE *f) + +/*-----------------------------------------------------. +| Do a getc, but give error message if EOF encountered | +`-----------------------------------------------------*/ + +static int +xgetc (FILE *f) { - register int c = getc(f); + int c = getc (f); if (c == EOF) fatal (_("unexpected end of file")); return c; } -/* read one literal character from finput. process \ escapes. - append the normalized string version of the char to *pp. - assign the character code to *pcode - return 1 unless the character is an unescaped `term' or \n - report error for \n -*/ -int + +/*------------------------------------------------------------------. +| Read one literal character from finput. Process \ escapes. | +| Append the normalized string version of the char to *PP. Assign | +| the character code to *PCODE. Return 1 unless the character is an | +| unescaped `term' or \n report error for \n | +`------------------------------------------------------------------*/ + +static int literalchar (char **pp, int *pcode, char term) { - register int c; - register char *p; - register int code; + int c; + char *p; + int code; int wasquote = 0; - c = safegetc(finput); + c = xgetc (finput); if (c == '\n') { complain (_("unescaped newline in constant")); - ungetc(c, finput); + ungetc (c, finput); code = '?'; wasquote = 1; } @@ -203,17 +177,27 @@ literalchar (char **pp, int *pcode, char term) } else { - c = safegetc(finput); - if (c == 't') code = '\t'; - else if (c == 'n') code = '\n'; - else if (c == 'a') code = '\007'; - else if (c == 'r') code = '\r'; - else if (c == 'f') code = '\f'; - else if (c == 'b') code = '\b'; - else if (c == 'v') code = '\013'; - else if (c == '\\') code = '\\'; - else if (c == '\'') code = '\''; - else if (c == '\"') code = '\"'; + c = xgetc (finput); + if (c == 't') + code = '\t'; + else if (c == 'n') + code = '\n'; + else if (c == 'a') + code = '\007'; + else if (c == 'r') + code = '\r'; + else if (c == 'f') + code = '\f'; + else if (c == 'b') + code = '\b'; + else if (c == 'v') + code = '\013'; + else if (c == '\\') + code = '\\'; + else if (c == '\'') + code = '\''; + else if (c == '\"') + code = '\"'; else if (c <= '7' && c >= '0') { code = 0; @@ -227,42 +211,43 @@ literalchar (char **pp, int *pcode, char term) code &= 0xFF; break; } - c = safegetc(finput); + c = xgetc (finput); } - ungetc(c, finput); + ungetc (c, finput); } else if (c == 'x') { - c = safegetc(finput); + c = xgetc (finput); code = 0; while (1) { if (c >= '0' && c <= '9') - code *= 16, code += c - '0'; + code *= 16, code += c - '0'; else if (c >= 'a' && c <= 'f') - code *= 16, code += c - 'a' + 10; + code *= 16, code += c - 'a' + 10; else if (c >= 'A' && c <= 'F') - code *= 16, code += c - 'A' + 10; + code *= 16, code += c - 'A' + 10; else break; - if (code >= 256 || code<0) + if (code >= 256 || code < 0) { - complain (_("hexadecimal value above 255: `\\x%x'"), - code); + complain (_("hexadecimal value above 255: `\\x%x'"), code); code &= 0xFF; break; } - c = safegetc(finput); + c = xgetc (finput); } - ungetc(c, finput); + ungetc (c, finput); } else { + char buf [] = "c"; + buf[0] = c; complain (_("unknown escape sequence: `\\' followed by `%s'"), - printable_version(c)); + quote (buf)); code = '?'; } - } /* has \ */ + } /* has \ */ /* now fill token_buffer with the canonical name for this character as a literal token. Do not use what the user typed, @@ -271,17 +256,53 @@ literalchar (char **pp, int *pcode, char term) p = *pp; if (code == term && wasquote) *p++ = code; - else if (code == '\\') {*p++ = '\\'; *p++ = '\\';} - else if (code == '\'') {*p++ = '\\'; *p++ = '\'';} - else if (code == '\"') {*p++ = '\\'; *p++ = '\"';} + else if (code == '\\') + { + *p++ = '\\'; + *p++ = '\\'; + } + else if (code == '\'') + { + *p++ = '\\'; + *p++ = '\''; + } + else if (code == '\"') + { + *p++ = '\\'; + *p++ = '\"'; + } else if (code >= 040 && code < 0177) *p++ = code; - else if (code == '\t') {*p++ = '\\'; *p++ = 't';} - else if (code == '\n') {*p++ = '\\'; *p++ = 'n';} - else if (code == '\r') {*p++ = '\\'; *p++ = 'r';} - else if (code == '\v') {*p++ = '\\'; *p++ = 'v';} - else if (code == '\b') {*p++ = '\\'; *p++ = 'b';} - else if (code == '\f') {*p++ = '\\'; *p++ = 'f';} + else if (code == '\t') + { + *p++ = '\\'; + *p++ = 't'; + } + else if (code == '\n') + { + *p++ = '\\'; + *p++ = 'n'; + } + else if (code == '\r') + { + *p++ = '\\'; + *p++ = 'r'; + } + else if (code == '\v') + { + *p++ = '\\'; + *p++ = 'v'; + } + else if (code == '\b') + { + *p++ = '\\'; + *p++ = 'b'; + } + else if (code == '\f') + { + *p++ = '\\'; + *p++ = 'f'; + } else { *p++ = '\\'; @@ -291,7 +312,7 @@ literalchar (char **pp, int *pcode, char term) } *pp = p; *pcode = code; - return ! wasquote; + return !wasquote; } @@ -302,11 +323,42 @@ unlex (int token) unlexed_symval = symval; } +/*-----------------------------------------------------------------. +| We just read `<' from FIN. Store in TOKEN_BUFFER, the type name | +| specified between the `<...>'. | +`-----------------------------------------------------------------*/ + +void +read_type_name (FILE *fin) +{ + char *p = token_buffer; + int c = getc (fin); + + while (c != '>') + { + if (c == EOF) + fatal (_("unterminated type name at end of file")); + if (c == '\n') + { + complain (_("unterminated type name")); + ungetc (c, fin); + break; + } + + if (p == token_buffer + maxtoken) + p = grow_token_buffer (p); + + *p++ = c; + c = getc (fin); + } + *p = 0; +} + int lex (void) { - register int c; + int c; char *p; if (unlexed >= 0) @@ -317,66 +369,67 @@ lex (void) return c; } - c = skip_white_space(); - *token_buffer = c; /* for error messages (token buffer always valid) */ + c = skip_white_space (); + /* for error messages (token buffer always valid) */ + *token_buffer = c; token_buffer[1] = 0; switch (c) { case EOF: - strcpy(token_buffer, "EOF"); + strcpy (token_buffer, "EOF"); return ENDFILE; - case 'A': case 'B': case 'C': case 'D': case 'E': - case 'F': case 'G': case 'H': case 'I': case 'J': - case 'K': case 'L': case 'M': case 'N': case 'O': - case 'P': case 'Q': case 'R': case 'S': case 'T': - case 'U': case 'V': case 'W': case 'X': case 'Y': + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': case 'G': case 'H': case 'I': case 'J': + case 'K': case 'L': case 'M': case 'N': case 'O': + case 'P': case 'Q': case 'R': case 'S': case 'T': + case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': - case 'a': case 'b': case 'c': case 'd': case 'e': - case 'f': case 'g': case 'h': case 'i': case 'j': - case 'k': case 'l': case 'm': case 'n': case 'o': - case 'p': case 'q': case 'r': case 's': case 't': - case 'u': case 'v': case 'w': case 'x': case 'y': + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': case 'g': case 'h': case 'i': case 'j': + case 'k': case 'l': case 'm': case 'n': case 'o': + case 'p': case 'q': case 'r': case 's': case 't': + case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': - case '.': case '_': + case '.': case '_': + p = token_buffer; - while (isalnum(c) || c == '_' || c == '.') + while (isalnum (c) || c == '_' || c == '.') { if (p == token_buffer + maxtoken) - p = grow_token_buffer(p); + p = grow_token_buffer (p); *p++ = c; - c = getc(finput); + c = getc (finput); } *p = 0; - ungetc(c, finput); - symval = getsym(token_buffer); + ungetc (c, finput); + symval = getsym (token_buffer); return IDENTIFIER; - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': { numval = 0; p = token_buffer; - while (isdigit(c)) + while (isdigit (c)) { if (p == token_buffer + maxtoken) - p = grow_token_buffer(p); + p = grow_token_buffer (p); *p++ = c; - numval = numval*10 + c - '0'; - c = getc(finput); + numval = numval * 10 + c - '0'; + c = getc (finput); } *p = 0; - ungetc(c, finput); + ungetc (c, finput); return NUMBER; } case '\'': - /* parse the literal token and compute character code in code */ translations = -1; @@ -386,46 +439,46 @@ lex (void) p = token_buffer; *p++ = '\''; - literalchar(&p, &code, '\''); + literalchar (&p, &code, '\''); - c = getc(finput); + c = getc (finput); if (c != '\'') { complain (_("use \"...\" for multi-character literal tokens")); while (1) { dp = discard; - if (! literalchar(&dp, &discode, '\'')) + if (!literalchar (&dp, &discode, '\'')) break; } } *p++ = '\''; *p = 0; - symval = getsym(token_buffer); - symval->class = STOKEN; - if (! symval->user_token_number) + symval = getsym (token_buffer); + symval->class = token_sym; + if (!symval->user_token_number) symval->user_token_number = code; return IDENTIFIER; } case '\"': - /* parse the literal string token and treat as an identifier */ translations = -1; { - int code; /* ignored here */ + int code; /* ignored here */ p = token_buffer; *p++ = '\"'; - while (literalchar(&p, &code, '\"')) /* read up to and including " */ + /* Read up to and including ". */ + while (literalchar (&p, &code, '\"')) { if (p >= token_buffer + maxtoken - 4) - p = grow_token_buffer(p); + p = grow_token_buffer (p); } *p = 0; - symval = getsym(token_buffer); - symval->class = STOKEN; + symval = getsym (token_buffer); + symval->class = token_sym; return IDENTIFIER; } @@ -448,107 +501,86 @@ lex (void) case '=': do { - c = getc(finput); - if (c == '\n') lineno++; + c = getc (finput); + if (c == '\n') + lineno++; } - while(c==' ' || c=='\n' || c=='\t'); + while (c == ' ' || c == '\n' || c == '\t'); if (c == '{') { - strcpy(token_buffer, "={"); + strcpy (token_buffer, "={"); return LEFT_CURLY; } else { - ungetc(c, finput); + ungetc (c, finput); return ILLEGAL; } case '<': - p = token_buffer; - c = getc(finput); - while (c != '>') - { - if (c == EOF) - fatal (_("unterminated type name at end of file")); - if (c == '\n') - { - complain (_("unterminated type name")); - ungetc(c, finput); - break; - } - - if (p == token_buffer + maxtoken) - p = grow_token_buffer(p); - - *p++ = c; - c = getc(finput); - } - *p = 0; + read_type_name (finput); return TYPENAME; - case '%': - return parse_percent_token(); + return parse_percent_token (); default: return ILLEGAL; } } -/* the following table dictates the action taken for the various - % directives. A setflag value causes the named flag to be - set. A retval action returns the code. -*/ -struct percent_table_struct { - char *name; - void *setflag; - int retval; -} percent_table[] = +/* the following table dictates the action taken for the various % + directives. A set_flag value causes the named flag to be set. A + retval action returns the code. */ +struct percent_table_struct { - {"token", NULL, TOKEN}, - {"term", NULL, TOKEN}, - {"nterm", NULL, NTERM}, - {"type", NULL, TYPE}, - {"guard", NULL, GUARD}, - {"union", NULL, UNION}, - {"expect", NULL, EXPECT}, - {"thong", NULL, THONG}, - {"start", NULL, START}, - {"left", NULL, LEFT}, - {"right", NULL, RIGHT}, - {"nonassoc", NULL, NONASSOC}, - {"binary", NULL, NONASSOC}, - {"semantic_parser", NULL, SEMANTIC_PARSER}, - {"pure_parser", NULL, PURE_PARSER}, - {"prec", NULL, PREC}, - - {"no_lines", &nolinesflag, NOOP}, /* -l */ - {"raw", &rawtoknumflag, NOOP}, /* -r */ - {"token_table", &toknumflag, NOOP}, /* -k */ - + const char *name; + void *set_flag; + int retval; +} +percent_table[] = +{ + { "token", NULL, TOKEN }, + { "term", NULL, TOKEN }, + { "nterm", NULL, NTERM }, + { "type", NULL, TYPE }, + { "guard", NULL, GUARD }, + { "union", NULL, UNION }, + { "expect", NULL, EXPECT }, + { "thong", NULL, THONG }, + { "start", NULL, START }, + { "left", NULL, LEFT }, + { "right", NULL, RIGHT }, + { "nonassoc", NULL, NONASSOC }, + { "binary", NULL, NONASSOC }, + { "semantic_parser", NULL, SEMANTIC_PARSER }, + { "pure_parser", NULL, PURE_PARSER }, + { "prec", NULL, PREC }, + { "locations", &locations_flag, NOOP}, /* -l */ + { "no_lines", &no_lines_flag, NOOP}, /* -l */ + { "raw", &raw_flag, NOOP }, /* -r */ + { "token_table", &token_table_flag, NOOP}, /* -k */ #if 0 - /* These can be utilized after main is reoganized so - open_files() is deferred 'til after read_declarations(). - But %{ and %union both put information into files - that have to be opened before read_declarations(). + /* These can be utilized after main is reoganized so + open_files() is deferred 'til after read_declarations(). + But %{ and %union both put information into files + that have to be opened before read_declarations(). */ - {"yacc", &fixed_outfiles, NOOP}, /* -y */ - {"fixed_output_files", &fixed_outfiles, NOOP}, /* -y */ - {"defines", &definesflag, NOOP}, /* -d */ - {"no_parser", &noparserflag, NOOP}, /* -n */ - {"output_file", &spec_outfile, SETOPT}, /* -o */ - {"file_prefix", &spec_file_prefix, SETOPT}, /* -b */ - {"name_prefix", &spec_name_prefix, SETOPT}, /* -p */ - - /* These would be acceptable, but they do not affect processing */ - {"verbose", &verboseflag, NOOP}, /* -v */ - {"debug", &debugflag, NOOP}, /* -t */ - /* {"help", , NOOP},*/ /* -h */ - /* {"version", , NOOP},*/ /* -V */ + { "yacc", &yacc_flag, NOOP}, /* -y */ + { "fixed_output_files", &yacc_flag, NOOP}, /* -y */ + { "defines", &defines_flag, NOOP}, /* -d */ + { "no_parser", &no_parser_flag, NOOP}, /* -n */ + { "output_file", &spec_outfile, SETOPT}, /* -o */ + { "file_prefix", &spec_file_prefix, SETOPT}, /* -b */ + { "name_prefix", &spec_name_prefix, SETOPT}, /* -p */ + /* These would be acceptable, but they do not affect processing */ + { "verbose", &verbose_flag, NOOP}, /* -v */ + { "debug", &debug_flag, NOOP}, /* -t */ +/* {"help", , NOOP}, *//* -h */ +/* {"version", , NOOP}, *//* -V */ #endif - - {NULL, NULL, ILLEGAL} + { NULL, NULL, ILLEGAL} }; /* Parse a token which starts with %. @@ -557,14 +589,14 @@ struct percent_table_struct { int parse_percent_token (void) { - register int c; - register char *p; - register struct percent_table_struct *tx; + int c; + char *p; + struct percent_table_struct *tx; p = token_buffer; - c = getc(finput); + c = getc (finput); *p++ = '%'; - *p++ = c; /* for error msg */ + *p++ = c; /* for error msg */ *p = 0; switch (c) @@ -590,37 +622,38 @@ parse_percent_token (void) case '=': return PREC; } - if (!isalpha(c)) + if (!isalpha (c)) return ILLEGAL; p = token_buffer; *p++ = '%'; - while (isalpha(c) || c == '_' || c == '-') + while (isalpha (c) || c == '_' || c == '-') { if (p == token_buffer + maxtoken) - p = grow_token_buffer(p); + p = grow_token_buffer (p); - if (c == '-') c = '_'; + if (c == '-') + c = '_'; *p++ = c; - c = getc(finput); + c = getc (finput); } - ungetc(c, finput); + ungetc (c, finput); *p = 0; /* table lookup % directive */ for (tx = percent_table; tx->name; tx++) - if (strcmp(token_buffer+1, tx->name) == 0) + if (strcmp (token_buffer + 1, tx->name) == 0) break; if (tx->retval == SETOPT) { - *((char **)(tx->setflag)) = optarg; + *((char **) (tx->set_flag)) = optarg; return NOOP; } - if (tx->setflag) + if (tx->set_flag) { - *((int *)(tx->setflag)) = 1; + *((int *) (tx->set_flag)) = 1; return NOOP; } return tx->retval;