From e9071366c3104e4e4a2490c16d53e7eeef670f45 Mon Sep 17 00:00:00 2001 From: Akim Demaille Date: Tue, 6 Jun 2006 16:40:06 +0000 Subject: [PATCH] Extract the parsing of user actions from the grammar scanner. As a consequence, the relation between the grammar scanner and parser is much simpler. We can also split "composite tokens" back into simple tokens. * src/gram.h (ITEM_NUMBER_MAX, RULE_NUMBER_MAX): New. * src/scan-gram.l (add_column_width, adjust_location): Move to and rename as... * src/location.h, src/location.c (add_column_width) (location_compute): these. Fix the column count: the initial column is 0. (location_print): Be robust to ending column being 0. * src/location.h (boundary_set): New. * src/main.c: Adjust to scanner_free being renamed as gram_scanner_free. * src/output.c: Include scan-code.h. * src/parse-gram.y: Include scan-gram.h and scan-code.h. Use boundary_set. (PERCENT_DESTRUCTOR, PERCENT_PRINTER, PERCENT_INITIAL_ACTION) (PERCENT_LEX_PARAM, PERCENT_PARSE_PARAM): Remove the {...} part, which is now, again, a separate token. Adjust all dependencies. Whereever actions with $ and @ are used, use translate_code. (action): Remove this nonterminal which is now useless. * src/reader.c: Include assert.h, scan-gram.h and scan-code.h. (grammar_current_rule_action_append): Use translate_code. (packgram): Bound check ruleno, itemno, and rule_length. * src/reader.h (gram_in, gram__flex_debug, scanner_cursor) (last_string, last_braced_code_loc, max_left_semantic_context) (scanner_initialize, scanner_free, scanner_last_string_free) (gram_out, gram_lineno, YY_DECL_): Move to... * src/scan-gram.h: this new file. (YY_DECL): Rename as... (GRAM_DECL): this. * src/scan-code.h, src/scan-code.l, src/scan-code-c.c: New. * src/scan-gram.l (gram_get_lineno, gram_get_in, gram_get_out): (gram_get_leng, gram_get_text, gram_set_lineno, gram_set_in): (gram_set_out, gram_get_debug, gram_set_debug, gram_lex_destroy): Move these declarations, and... (obstack_for_string, STRING_GROW, STRING_FINISH, STRING_FREE): these to... * src/flex-scanner.h: this new file. * src/scan-gram.l (rule_length, rule_length_overflow) (increment_rule_length): Remove. (last_braced_code_loc): Rename as... (gram_last_braced_code_loc): this. Adjust to the changes of the parser. Move all the handling of $ and @ into... * src/scan-code.l: here. * src/scan-gram.l (handle_dollar, handle_at): Remove. (handle_action_dollar, handle_action_at): Move to... * src/scan-code.l: here. * src/Makefile.am (bison_SOURCES): Add flex-scanner.h, scan-code.h, scan-code-c.c, scan-gram.h. (EXTRA_bison_SOURCES): Add scan-code.l. (BUILT_SOURCES): Add scan-code.c. (yacc): Be robust to white spaces. * tests/conflicts.at, tests/input.at, tests/reduce.at, * tests/regression.at: Adjust the column numbers. * tests/regression.at: Adjust the error message. --- ChangeLog | 65 +++- src/Makefile.am | 19 +- src/gram.h | 4 +- src/location.c | 80 +++- src/location.h | 14 + src/main.c | 5 +- src/output.c | 1 + src/parse-gram.c | 527 +++++++++++++-------------- src/parse-gram.h | 6 +- src/parse-gram.y | 87 ++--- src/reader.c | 29 +- src/reader.h | 20 - src/scan-action.l | 866 ++++++++++++++++++++++++++++++++++++++++++++ src/scan-code-c.c | 2 + src/scan-code.h | 47 +++ src/scan-code.l | 358 ++++++++++++++++++ src/scan-gram.h | 44 +++ src/scan-gram.l | 427 +++------------------- src/system.h | 2 + tests/input.at | 26 +- tests/regression.at | 4 +- 21 files changed, 1857 insertions(+), 776 deletions(-) create mode 100644 src/scan-action.l create mode 100644 src/scan-code-c.c create mode 100644 src/scan-code.h create mode 100644 src/scan-code.l create mode 100644 src/scan-gram.h diff --git a/ChangeLog b/ChangeLog index 511d1799..143fd8be 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,4 +1,65 @@ -$Id$ +2006-06-06 Akim Demaille + + Extract the parsing of user actions from the grammar scanner. + As a consequence, the relation between the grammar scanner and + parser is much simpler. We can also split "composite tokens" back + into simple tokens. + * src/gram.h (ITEM_NUMBER_MAX, RULE_NUMBER_MAX): New. + * src/scan-gram.l (add_column_width, adjust_location): Move to and + rename as... + * src/location.h, src/location.c (add_column_width) + (location_compute): these. + Fix the column count: the initial column is 0. + (location_print): Be robust to ending column being 0. + * src/location.h (boundary_set): New. + * src/main.c: Adjust to scanner_free being renamed as + gram_scanner_free. + * src/output.c: Include scan-code.h. + * src/parse-gram.y: Include scan-gram.h and scan-code.h. + Use boundary_set. + (PERCENT_DESTRUCTOR, PERCENT_PRINTER, PERCENT_INITIAL_ACTION) + (PERCENT_LEX_PARAM, PERCENT_PARSE_PARAM): Remove the {...} part, + which is now, again, a separate token. + Adjust all dependencies. + Whereever actions with $ and @ are used, use translate_code. + (action): Remove this nonterminal which is now useless. + * src/reader.c: Include assert.h, scan-gram.h and scan-code.h. + (grammar_current_rule_action_append): Use translate_code. + (packgram): Bound check ruleno, itemno, and rule_length. + * src/reader.h (gram_in, gram__flex_debug, scanner_cursor) + (last_string, last_braced_code_loc, max_left_semantic_context) + (scanner_initialize, scanner_free, scanner_last_string_free) + (gram_out, gram_lineno, YY_DECL_): Move to... + * src/scan-gram.h: this new file. + (YY_DECL): Rename as... + (GRAM_DECL): this. + * src/scan-code.h, src/scan-code.l, src/scan-code-c.c: New. + * src/scan-gram.l (gram_get_lineno, gram_get_in, gram_get_out): + (gram_get_leng, gram_get_text, gram_set_lineno, gram_set_in): + (gram_set_out, gram_get_debug, gram_set_debug, gram_lex_destroy): + Move these declarations, and... + (obstack_for_string, STRING_GROW, STRING_FINISH, STRING_FREE): + these to... + * src/flex-scanner.h: this new file. + * src/scan-gram.l (rule_length, rule_length_overflow) + (increment_rule_length): Remove. + (last_braced_code_loc): Rename as... + (gram_last_braced_code_loc): this. + Adjust to the changes of the parser. + Move all the handling of $ and @ into... + * src/scan-code.l: here. + * src/scan-gram.l (handle_dollar, handle_at): Remove. + (handle_action_dollar, handle_action_at): Move to... + * src/scan-code.l: here. + * src/Makefile.am (bison_SOURCES): Add flex-scanner.h, + scan-code.h, scan-code-c.c, scan-gram.h. + (EXTRA_bison_SOURCES): Add scan-code.l. + (BUILT_SOURCES): Add scan-code.c. + (yacc): Be robust to white spaces. + + * tests/conflicts.at, tests/input.at, tests/reduce.at, + * tests/regression.at: Adjust the column numbers. + * tests/regression.at: Adjust the error message. 2006-06-06 Joel E. Denny @@ -16057,3 +16118,5 @@ $Id$ Copying and distribution of this file, with or without modification, are permitted provided the copyright notice and this notice are preserved. + +$Id$ diff --git a/src/Makefile.am b/src/Makefile.am index fd148f10..7f336922 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1,4 +1,4 @@ -## Copyright (C) 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc. +## Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc. ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by @@ -39,6 +39,7 @@ bison_SOURCES = \ conflicts.c conflicts.h \ derives.c derives.h \ files.c files.h \ + flex-scanner.h \ getargs.c getargs.h \ gram.c gram.h \ lalr.h lalr.c \ @@ -54,8 +55,9 @@ bison_SOURCES = \ reduce.c reduce.h \ revision.c revision.h \ relation.c relation.h \ - scan-gram-c.c \ - scan-skel-c.c scan-skel.h \ + scan-code.h scan-code-c.c \ + scan-gram.h scan-gram-c.c \ + scan-skel.h scan-skel-c.c \ state.c state.h \ symlist.c symlist.h \ symtab.c symtab.h \ @@ -65,15 +67,20 @@ bison_SOURCES = \ vcg.c vcg.h \ vcg_defaults.h -EXTRA_bison_SOURCES = scan-skel.l scan-gram.l +EXTRA_bison_SOURCES = scan-code.l scan-skel.l scan-gram.l -BUILT_SOURCES = revision.c scan-skel.c scan-gram.c parse-gram.c parse-gram.h +BUILT_SOURCES = \ +parse-gram.c parse-gram.h \ +revision.c \ +scan-code.c \ +scan-skel.c \ +scan-gram.c \ MOSTLYCLEANFILES = yacc yacc: echo '#! /bin/sh' >$@ - echo 'exec $(bindir)/bison -y "$$@"' >>$@ + echo "exec '$(bindir)/bison' -y \"$$@\"" >>$@ chmod a+x $@ echo: diff --git a/src/gram.h b/src/gram.h index b8f316a0..3b742d56 100644 --- a/src/gram.h +++ b/src/gram.h @@ -1,6 +1,6 @@ /* Data definitions for internal representation of Bison's input. - Copyright (C) 1984, 1986, 1989, 1992, 2001, 2002, 2003, 2004, 2005 + Copyright (C) 1984, 1986, 1989, 1992, 2001, 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc. This file is part of Bison, the GNU Compiler Compiler. @@ -115,6 +115,7 @@ extern int ntokens; extern int nvars; typedef int item_number; +#define ITEM_NUMBER_MAX INT_MAX extern item_number *ritem; extern unsigned int nritems; @@ -146,6 +147,7 @@ item_number_is_symbol_number (item_number i) /* Rule numbers. */ typedef int rule_number; +#define RULE_NUMBER_MAX INT_MAX extern rule_number nrules; static inline item_number diff --git a/src/location.c b/src/location.c index ecd3658b..2213f810 100644 --- a/src/location.c +++ b/src/location.c @@ -1,6 +1,5 @@ /* Locations for Bison - - Copyright (C) 2002, 2005 Free Software Foundation, Inc. + Copyright (C) 2002, 2005, 2006 Free Software Foundation, Inc. This file is part of Bison, the GNU Compiler Compiler. @@ -28,11 +27,80 @@ location const empty_location; +/* If BUF is null, add BUFSIZE (which in this case must be less than + INT_MAX) to COLUMN; otherwise, add mbsnwidth (BUF, BUFSIZE, 0) to + COLUMN. If an overflow occurs, or might occur but is undetectable, + return INT_MAX. Assume COLUMN is nonnegative. */ + +static inline int +add_column_width (int column, char const *buf, size_t bufsize) +{ + size_t width; + unsigned int remaining_columns = INT_MAX - column; + + if (buf) + { + if (INT_MAX / 2 <= bufsize) + return INT_MAX; + width = mbsnwidth (buf, bufsize, 0); + } + else + width = bufsize; + + return width <= remaining_columns ? column + width : INT_MAX; +} + +/* Set *LOC and adjust scanner cursor to account for token TOKEN of + size SIZE. */ + +void +location_compute (location *loc, boundary *cur, char const *token, size_t size) +{ + int line = cur->line; + int column = cur->column; + char const *p0 = token; + char const *p = token; + char const *lim = token + size; + + loc->start = *cur; + + for (p = token; p < lim; p++) + switch (*p) + { + case '\n': + line += line < INT_MAX; + column = 1; + p0 = p + 1; + break; + + case '\t': + column = add_column_width (column, p0, p - p0); + column = add_column_width (column, NULL, 8 - ((column - 1) & 7)); + p0 = p + 1; + break; + + default: + break; + } + + cur->line = line; + cur->column = column = add_column_width (column, p0, p - p0); + + loc->end = *cur; + + if (line == INT_MAX && loc->start.line != INT_MAX) + warn_at (*loc, _("line number overflow")); + if (column == INT_MAX && loc->start.column != INT_MAX) + warn_at (*loc, _("column number overflow")); +} + + /* Output to OUT the location LOC. Warning: it uses quotearg's slot 3. */ void location_print (FILE *out, location loc) { + int end_col = 0 < loc.end.column ? loc.end.column - 1 : 0; fprintf (out, "%s:%d.%d", quotearg_n_style (3, escape_quoting_style, loc.start.file), loc.start.line, loc.start.column); @@ -40,9 +108,9 @@ location_print (FILE *out, location loc) if (loc.start.file != loc.end.file) fprintf (out, "-%s:%d.%d", quotearg_n_style (3, escape_quoting_style, loc.end.file), - loc.end.line, loc.end.column - 1); + loc.end.line, end_col); else if (loc.start.line < loc.end.line) - fprintf (out, "-%d.%d", loc.end.line, loc.end.column - 1); - else if (loc.start.column < loc.end.column - 1) - fprintf (out, "-%d", loc.end.column - 1); + fprintf (out, "-%d.%d", loc.end.line, end_col); + else if (loc.start.column < end_col) + fprintf (out, "-%d", end_col); } diff --git a/src/location.h b/src/location.h index 49d2a2ed..542c6320 100644 --- a/src/location.h +++ b/src/location.h @@ -40,6 +40,15 @@ typedef struct } boundary; +/* Set the position of \a a. */ +static inline void +boundary_set (boundary *b, const char *f, int l, int c) +{ + b->file = f; + b->line = l; + b->column = c; +} + /* Return nonzero if A and B are equal boundaries. */ static inline bool equal_boundaries (boundary a, boundary b) @@ -64,6 +73,11 @@ typedef struct extern location const empty_location; +/* Set *LOC and adjust scanner cursor to account for token TOKEN of + size SIZE. */ +void location_compute (location *loc, + boundary *cur, char const *token, size_t size); + void location_print (FILE *out, location loc); #endif /* ! defined LOCATION_H_ */ diff --git a/src/main.c b/src/main.c index 8769fef8..4d7fd331 100644 --- a/src/main.c +++ b/src/main.c @@ -1,6 +1,7 @@ /* Top level entry point of Bison. - Copyright (C) 1984, 1986, 1989, 1992, 1995, 2000, 2001, 2002, 2004, 2005 + Copyright (C) 1984, 1986, 1989, 1992, 1995, 2000, 2001, 2002, 2004, 2005, + 2006 Free Software Foundation, Inc. This file is part of Bison, the GNU Compiler Compiler. @@ -169,7 +170,7 @@ main (int argc, char *argv[]) /* The scanner memory cannot be released right after parsing, as it contains things such as user actions, prologue, epilogue etc. */ - scanner_free (); + gram_scanner_free (); muscle_free (); uniqstrs_free (); timevar_pop (TV_FREE); diff --git a/src/output.c b/src/output.c index 6a02bb33..1391959a 100644 --- a/src/output.c +++ b/src/output.c @@ -36,6 +36,7 @@ #include "muscle_tab.h" #include "output.h" #include "reader.h" +#include "scan-code.h" /* max_left_semantic_context */ #include "scan-skel.h" #include "symtab.h" #include "tables.h" diff --git a/src/parse-gram.c b/src/parse-gram.c index 2b77f3ea..00c39a5f 100644 --- a/src/parse-gram.c +++ b/src/parse-gram.c @@ -1,4 +1,4 @@ -/* A Bison parser, made by GNU Bison 2.2a. */ +/* A Bison parser, made by GNU Bison 2.1b. */ /* Skeleton implementation for Bison's Yacc-like parsers in C @@ -47,7 +47,7 @@ #define YYBISON 1 /* Bison version. */ -#define YYBISON_VERSION "2.2a" +#define YYBISON_VERSION "2.1b" /* Skeleton name. */ #define YYSKELETON_NAME "yacc.c" @@ -182,7 +182,7 @@ /* Copy the first part of user declarations. */ -#line 1 "parse-gram.y" +#line 1 "../../src/parse-gram.y" /* Bison Grammar Parser -*- C -*- Copyright (C) 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc. @@ -217,6 +217,8 @@ #include "quotearg.h" #include "reader.h" #include "symlist.h" +#include "scan-gram.h" +#include "scan-code.h" #include "strverscmp.h" #define YYLLOC_DEFAULT(Current, Rhs, N) (Current) = lloc_default (Rhs, N) @@ -276,7 +278,7 @@ static int current_prec = 0; #if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED typedef union YYSTYPE -#line 94 "parse-gram.y" +#line 95 "../../src/parse-gram.y" { symbol *symbol; symbol_list *list; @@ -286,7 +288,7 @@ typedef union YYSTYPE uniqstr uniqstr; } /* Line 193 of yacc.c. */ -#line 290 "parse-gram.c" +#line 292 "../../src/parse-gram.c" YYSTYPE; # define yystype YYSTYPE /* obsolescent; will be withdrawn */ # define YYSTYPE_IS_DECLARED 1 @@ -311,7 +313,7 @@ typedef struct YYLTYPE /* Line 216 of yacc.c. */ -#line 315 "parse-gram.c" +#line 317 "../../src/parse-gram.c" #ifdef short # undef short @@ -528,16 +530,16 @@ union yyalloc /* YYFINAL -- State number of the termination state. */ #define YYFINAL 3 /* YYLAST -- Last index in YYTABLE. */ -#define YYLAST 161 +#define YYLAST 164 /* YYNTOKENS -- Number of terminals. */ #define YYNTOKENS 52 /* YYNNTS -- Number of nonterminals. */ -#define YYNNTS 26 +#define YYNNTS 24 /* YYNRULES -- Number of rules. */ -#define YYNRULES 82 +#define YYNRULES 80 /* YYNRULES -- Number of states. */ -#define YYNSTATES 111 +#define YYNSTATES 114 /* YYTRANSLATE(YYLEX) -- Bison symbol number corresponding to YYLEX. */ #define YYUNDEFTOK 2 @@ -588,54 +590,54 @@ static const yytype_uint8 yytranslate[] = static const yytype_uint8 yyprhs[] = { 0, 0, 3, 8, 9, 12, 14, 16, 18, 21, - 25, 27, 29, 32, 35, 39, 41, 43, 45, 47, - 51, 53, 55, 59, 61, 63, 66, 69, 71, 73, - 75, 77, 79, 81, 84, 86, 89, 92, 94, 96, - 97, 101, 102, 106, 110, 114, 116, 118, 120, 121, - 123, 125, 128, 130, 132, 135, 138, 142, 144, 147, - 149, 152, 154, 157, 160, 161, 165, 167, 171, 174, - 175, 178, 181, 185, 189, 193, 195, 197, 198, 201, - 203, 205, 206 + 25, 27, 29, 32, 35, 39, 41, 44, 47, 49, + 53, 55, 57, 61, 64, 66, 69, 72, 74, 76, + 78, 80, 82, 84, 87, 89, 93, 97, 99, 101, + 102, 106, 107, 111, 115, 119, 121, 123, 125, 126, + 128, 130, 133, 135, 137, 140, 143, 147, 149, 152, + 154, 157, 159, 162, 165, 166, 170, 172, 176, 179, + 180, 183, 186, 190, 194, 198, 200, 202, 204, 206, + 207 }; /* YYRHS -- A `-1'-separated list of the rules' RHS. */ static const yytype_int8 yyrhs[] = { - 53, 0, -1, 54, 48, 66, 77, -1, -1, 54, - 55, -1, 56, -1, 49, -1, 17, -1, 19, 76, - -1, 19, 76, 76, -1, 20, -1, 21, -1, 22, - 4, -1, 23, 4, -1, 24, 43, 76, -1, 25, - -1, 26, -1, 27, -1, 28, -1, 29, 43, 76, - -1, 31, -1, 32, -1, 33, 43, 76, -1, 34, - -1, 35, -1, 36, 76, -1, 37, 76, -1, 39, - -1, 40, -1, 41, -1, 44, -1, 60, -1, 57, - -1, 38, 72, -1, 10, -1, 8, 63, -1, 9, - 63, -1, 18, -1, 30, -1, -1, 6, 58, 65, - -1, -1, 5, 59, 65, -1, 7, 42, 63, -1, - 61, 62, 63, -1, 11, -1, 12, -1, 13, -1, - -1, 42, -1, 72, -1, 63, 72, -1, 42, -1, - 46, -1, 46, 4, -1, 46, 75, -1, 46, 4, - 75, -1, 64, -1, 65, 64, -1, 67, -1, 66, - 67, -1, 68, -1, 56, 44, -1, 1, 44, -1, - -1, 47, 69, 70, -1, 71, -1, 70, 45, 71, - -1, 70, 44, -1, -1, 71, 72, -1, 71, 73, - -1, 71, 14, 72, -1, 71, 15, 4, -1, 71, - 16, 42, -1, 46, -1, 75, -1, -1, 74, 51, - -1, 3, -1, 3, -1, -1, 48, 50, -1 + 53, 0, -1, 54, 48, 66, 75, -1, -1, 54, + 55, -1, 56, -1, 49, -1, 17, -1, 19, 74, + -1, 19, 74, 74, -1, 20, -1, 21, -1, 22, + 4, -1, 23, 4, -1, 24, 43, 74, -1, 25, + -1, 26, 51, -1, 27, 51, -1, 28, -1, 29, + 43, 74, -1, 31, -1, 32, -1, 33, 43, 74, + -1, 34, 51, -1, 35, -1, 36, 74, -1, 37, + 74, -1, 39, -1, 40, -1, 41, -1, 44, -1, + 60, -1, 57, -1, 38, 72, -1, 10, -1, 8, + 51, 63, -1, 9, 51, 63, -1, 18, -1, 30, + -1, -1, 6, 58, 65, -1, -1, 5, 59, 65, + -1, 7, 42, 63, -1, 61, 62, 63, -1, 11, + -1, 12, -1, 13, -1, -1, 42, -1, 72, -1, + 63, 72, -1, 42, -1, 46, -1, 46, 4, -1, + 46, 73, -1, 46, 4, 73, -1, 64, -1, 65, + 64, -1, 67, -1, 66, 67, -1, 68, -1, 56, + 44, -1, 1, 44, -1, -1, 47, 69, 70, -1, + 71, -1, 70, 45, 71, -1, 70, 44, -1, -1, + 71, 72, -1, 71, 51, -1, 71, 14, 72, -1, + 71, 15, 4, -1, 71, 16, 42, -1, 46, -1, + 73, -1, 3, -1, 3, -1, -1, 48, 50, -1 }; /* YYRLINE[YYN] -- source line where rule number YYN was defined. */ static const yytype_uint16 yyrline[] = { - 0, 202, 202, 210, 212, 216, 217, 218, 219, 224, - 225, 226, 227, 228, 229, 230, 235, 239, 240, 241, - 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, - 252, 256, 257, 258, 262, 278, 285, 292, 296, 303, - 303, 308, 308, 313, 323, 338, 339, 340, 344, 345, - 351, 352, 357, 361, 366, 372, 378, 389, 390, 399, - 400, 406, 407, 408, 415, 415, 419, 420, 421, 426, - 427, 429, 430, 432, 434, 439, 440, 456, 456, 462, - 471, 476, 478 + 0, 194, 194, 202, 204, 208, 209, 211, 212, 217, + 218, 219, 220, 221, 222, 223, 228, 232, 233, 234, + 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, + 245, 249, 250, 251, 255, 271, 279, 287, 291, 298, + 298, 303, 303, 308, 318, 333, 334, 335, 339, 340, + 345, 346, 351, 355, 360, 366, 372, 383, 384, 393, + 394, 400, 401, 402, 409, 409, 413, 414, 415, 420, + 421, 423, 426, 428, 430, 435, 436, 441, 450, 455, + 457 }; #endif @@ -645,14 +647,14 @@ static const yytype_uint16 yyrline[] = static const char *const yytname[] = { "\"end of file\"", "error", "$undefined", "\"string\"", "\"integer\"", - "\"%token\"", "\"%nterm\"", "\"%type\"", "\"%destructor {...}\"", - "\"%printer {...}\"", "\"%union {...}\"", "\"%left\"", "\"%right\"", + "\"%token\"", "\"%nterm\"", "\"%type\"", "\"%destructor\"", + "\"%printer\"", "\"%union {...}\"", "\"%left\"", "\"%right\"", "\"%nonassoc\"", "\"%prec\"", "\"%dprec\"", "\"%merge\"", "\"%debug\"", "\"%default-prec\"", "\"%define\"", "\"%defines\"", "\"%error-verbose\"", "\"%expect\"", "\"%expect-rr\"", "\"%file-prefix\"", "\"%glr-parser\"", - "\"%initial-action {...}\"", "\"%lex-param {...}\"", "\"%locations\"", + "\"%initial-action\"", "\"%lex-param\"", "\"%locations\"", "\"%name-prefix\"", "\"%no-default-prec\"", "\"%no-lines\"", - "\"%nondeterministic-parser\"", "\"%output\"", "\"%parse-param {...}\"", + "\"%nondeterministic-parser\"", "\"%output\"", "\"%parse-param\"", "\"%pure-parser\"", "\"%require\"", "\"%skeleton\"", "\"%start\"", "\"%token-table\"", "\"%verbose\"", "\"%yacc\"", "\"type\"", "\"=\"", "\";\"", "\"|\"", "\"identifier\"", "\"identifier:\"", "\"%%\"", @@ -661,8 +663,8 @@ static const char *const yytname[] = "symbol_declaration", "@1", "@2", "precedence_declaration", "precedence_declarator", "type.opt", "symbols.1", "symbol_def", "symbol_defs.1", "grammar", "rules_or_grammar_declaration", "rules", - "@3", "rhses.1", "rhs", "symbol", "action", "@4", "string_as_id", - "string_content", "epilogue.opt", 0 + "@3", "rhses.1", "rhs", "symbol", "string_as_id", "string_content", + "epilogue.opt", 0 }; #endif @@ -690,22 +692,22 @@ static const yytype_uint8 yyr1[] = 57, 59, 57, 57, 60, 61, 61, 61, 62, 62, 63, 63, 64, 64, 64, 64, 64, 65, 65, 66, 66, 67, 67, 67, 69, 68, 70, 70, 70, 71, - 71, 71, 71, 71, 71, 72, 72, 74, 73, 75, - 76, 77, 77 + 71, 71, 71, 71, 71, 72, 72, 73, 74, 75, + 75 }; /* YYR2[YYN] -- Number of symbols composing right hand side of rule YYN. */ static const yytype_uint8 yyr2[] = { 0, 2, 4, 0, 2, 1, 1, 1, 2, 3, - 1, 1, 2, 2, 3, 1, 1, 1, 1, 3, - 1, 1, 3, 1, 1, 2, 2, 1, 1, 1, - 1, 1, 1, 2, 1, 2, 2, 1, 1, 0, + 1, 1, 2, 2, 3, 1, 2, 2, 1, 3, + 1, 1, 3, 2, 1, 2, 2, 1, 1, 1, + 1, 1, 1, 2, 1, 3, 3, 1, 1, 0, 3, 0, 3, 3, 3, 1, 1, 1, 0, 1, 1, 2, 1, 1, 2, 2, 3, 1, 2, 1, 2, 1, 2, 2, 0, 3, 1, 3, 2, 0, - 2, 2, 3, 3, 3, 1, 1, 0, 2, 1, - 1, 0, 2 + 2, 2, 3, 3, 3, 1, 1, 1, 1, 0, + 2 }; /* YYDEFACT[STATE-NAME] -- Default rule to reduce with in state @@ -715,98 +717,98 @@ static const yytype_uint8 yydefact[] = { 3, 0, 0, 1, 41, 39, 0, 0, 0, 34, 45, 46, 47, 7, 37, 0, 10, 11, 0, 0, - 0, 15, 16, 17, 18, 0, 38, 20, 21, 0, - 23, 24, 0, 0, 0, 27, 28, 29, 30, 0, - 6, 4, 5, 32, 31, 48, 0, 0, 0, 79, - 75, 35, 50, 76, 36, 80, 8, 12, 13, 0, - 0, 0, 25, 26, 33, 0, 64, 0, 0, 59, - 61, 49, 0, 52, 53, 57, 42, 40, 43, 51, - 9, 14, 19, 22, 63, 69, 62, 0, 60, 2, - 44, 54, 55, 58, 65, 66, 82, 56, 68, 69, - 0, 0, 0, 70, 71, 0, 67, 72, 73, 74, - 78 + 0, 15, 0, 0, 18, 0, 38, 20, 21, 0, + 0, 24, 0, 0, 0, 27, 28, 29, 30, 0, + 6, 4, 5, 32, 31, 48, 0, 0, 0, 0, + 0, 78, 8, 12, 13, 0, 16, 17, 0, 0, + 23, 25, 26, 77, 75, 33, 76, 0, 64, 0, + 0, 59, 61, 49, 0, 52, 53, 57, 42, 40, + 43, 50, 35, 36, 9, 14, 19, 22, 63, 69, + 62, 0, 60, 2, 44, 54, 55, 58, 51, 65, + 66, 80, 56, 68, 69, 0, 0, 0, 71, 70, + 67, 72, 73, 74 }; /* YYDEFGOTO[NTERM-NUM]. */ static const yytype_int8 yydefgoto[] = { - -1, 1, 2, 41, 67, 43, 47, 46, 44, 45, - 72, 51, 75, 76, 68, 69, 70, 85, 94, 95, - 52, 104, 105, 53, 56, 89 + -1, 1, 2, 41, 69, 43, 47, 46, 44, 45, + 74, 80, 77, 78, 70, 71, 72, 89, 99, 100, + 81, 66, 52, 93 }; /* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing STATE-NUM. */ -#define YYPACT_NINF -69 +#define YYPACT_NINF -72 static const yytype_int8 yypact[] = { - -69, 5, 112, -69, -69, -69, -35, 0, 0, -69, - -69, -69, -69, -69, -69, 13, -69, -69, 20, 31, - -18, -69, -69, -69, -69, -6, -69, -69, -69, -5, - -69, -69, 13, 13, 0, -69, -69, -69, -69, 69, - -69, -69, -69, -69, -69, -2, -38, -38, 0, -69, - -69, 0, -69, -69, 0, -69, 13, -69, -69, 13, - 13, 13, -69, -69, -69, -8, -69, 3, 21, -69, - -69, -69, 0, -69, 6, -69, -38, -38, 0, -69, - -69, -69, -69, -69, -69, -69, -69, 2, -69, -69, - 0, 39, -69, -69, -33, -1, -69, -69, -69, -69, - 0, 44, 1, -69, -69, 4, -1, -69, -69, -69, - -69 + -72, 7, 115, -72, -72, -72, -22, -17, -16, -72, + -72, -72, -72, -72, -72, 26, -72, -72, 32, 33, + -3, -72, -8, -6, -72, 4, -72, -72, -72, 9, + 2, -72, 26, 26, -2, -72, -72, -72, -72, 72, + -72, -72, -72, -72, -72, 12, -40, -40, -2, -2, + -2, -72, 26, -72, -72, 26, -72, -72, 26, 26, + -72, -72, -72, -72, -72, -72, -72, 11, -72, 13, + 3, -72, -72, -72, -2, -72, 19, -72, -40, -40, + -2, -72, -2, -2, -72, -72, -72, -72, -72, -72, + -72, 18, -72, -72, -2, 53, -72, -72, -72, -19, + 16, -72, -72, -72, -72, -2, 55, 21, -72, -72, + 16, -72, -72, -72 }; /* YYPGOTO[NTERM-NUM]. */ static const yytype_int8 yypgoto[] = { - -69, -69, -69, -69, 47, -69, -69, -69, -69, -69, - -69, -7, -58, 7, -69, -15, -69, -69, -69, -42, - -34, -69, -69, -68, 30, -69 + -72, -72, -72, -72, 67, -72, -72, -72, -72, -72, + -72, -32, -51, 23, -72, 5, -72, -72, -72, -30, + -34, -71, 6, -72 }; /* YYTABLE[YYPACT[STATE-NUM]]. What to do in state STATE-NUM. If positive, shift that token. If negative, reduce the rule which number is the opposite. If zero, do what YYDEFACT says. If YYTABLE_NINF, syntax error. */ -#define YYTABLE_NINF -82 +#define YYTABLE_NINF -80 static const yytype_int8 yytable[] = { - 64, 54, 49, 49, 73, 3, 92, 48, 74, 49, - 91, 98, 99, 100, 101, 102, 55, 79, 93, 93, - 79, -81, 65, 97, 57, 59, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 58, 84, 60, 61, 14, - 71, 78, 49, 109, 79, 50, 50, 86, 108, 42, - -77, 26, 96, 88, 77, 110, 79, 106, 0, 34, - 0, 103, 62, 63, 0, 90, 107, 0, 66, 87, - 65, 0, 103, 0, 4, 5, 6, 7, 8, 9, - 10, 11, 12, 0, 0, 0, 80, 14, 0, 81, - 82, 83, 0, 0, 0, 0, 0, 0, 0, 26, - 0, 0, 0, 0, 0, 0, 0, 34, 0, 0, - 0, 0, 0, 0, 0, 0, 66, 4, 5, 6, - 7, 8, 9, 10, 11, 12, 0, 0, 0, 13, - 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, - 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, - 34, 35, 36, 37, 0, 0, 38, 0, 0, 0, - 39, 40 + 65, 63, 75, -79, 67, 96, 76, 3, 4, 5, + 6, 7, 8, 9, 10, 11, 12, 82, 83, 63, + 48, 14, 63, 95, 102, 103, 104, 97, 97, 51, + 105, 106, 107, 26, 49, 50, 53, 54, 61, 62, + 55, 34, 94, 56, 64, 57, 98, 58, 98, 98, + 68, 91, 59, 60, 73, 88, 63, 90, 84, 112, + 98, 85, 64, 113, 86, 87, 109, 108, 101, 42, + 79, 111, 0, 67, 110, 92, 109, 4, 5, 6, + 7, 8, 9, 10, 11, 12, 0, 0, 0, 0, + 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 26, 0, 0, 0, 0, 0, 0, 0, + 34, 0, 0, 0, 0, 0, 0, 0, 0, 68, + 4, 5, 6, 7, 8, 9, 10, 11, 12, 0, + 0, 0, 13, 14, 15, 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, + 31, 32, 33, 34, 35, 36, 37, 0, 0, 38, + 0, 0, 0, 39, 40 }; static const yytype_int8 yycheck[] = { - 34, 8, 3, 3, 42, 0, 74, 42, 46, 3, - 4, 44, 45, 14, 15, 16, 3, 51, 76, 77, - 54, 0, 1, 91, 4, 43, 5, 6, 7, 8, - 9, 10, 11, 12, 13, 4, 44, 43, 43, 18, - 42, 48, 3, 42, 78, 46, 46, 44, 4, 2, - 51, 30, 50, 68, 47, 51, 90, 99, -1, 38, - -1, 95, 32, 33, -1, 72, 100, -1, 47, 48, - 1, -1, 106, -1, 5, 6, 7, 8, 9, 10, - 11, 12, 13, -1, -1, -1, 56, 18, -1, 59, - 60, 61, -1, -1, -1, -1, -1, -1, -1, 30, - -1, -1, -1, -1, -1, -1, -1, 38, -1, -1, - -1, -1, -1, -1, -1, -1, 47, 5, 6, 7, - 8, 9, 10, 11, 12, 13, -1, -1, -1, 17, - 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, - 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, - 38, 39, 40, 41, -1, -1, 44, -1, -1, -1, - 48, 49 + 34, 3, 42, 0, 1, 76, 46, 0, 5, 6, + 7, 8, 9, 10, 11, 12, 13, 49, 50, 3, + 42, 18, 3, 4, 95, 44, 45, 78, 79, 3, + 14, 15, 16, 30, 51, 51, 4, 4, 32, 33, + 43, 38, 74, 51, 46, 51, 80, 43, 82, 83, + 47, 48, 43, 51, 42, 44, 3, 44, 52, 4, + 94, 55, 46, 42, 58, 59, 100, 51, 50, 2, + 47, 105, -1, 1, 104, 70, 110, 5, 6, 7, + 8, 9, 10, 11, 12, 13, -1, -1, -1, -1, + 18, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, 30, -1, -1, -1, -1, -1, -1, -1, + 38, -1, -1, -1, -1, -1, -1, -1, -1, 47, + 5, 6, 7, 8, 9, 10, 11, 12, 13, -1, + -1, -1, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, + 35, 36, 37, 38, 39, 40, 41, -1, -1, 44, + -1, -1, -1, 48, 49 }; /* YYSTOS[STATE-NUM] -- The (internal number of the) accessing @@ -817,14 +819,14 @@ static const yytype_uint8 yystos[] = 11, 12, 13, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 44, 48, - 49, 55, 56, 57, 60, 61, 59, 58, 42, 3, - 46, 63, 72, 75, 63, 3, 76, 4, 4, 43, - 43, 43, 76, 76, 72, 1, 47, 56, 66, 67, - 68, 42, 62, 42, 46, 64, 65, 65, 63, 72, - 76, 76, 76, 76, 44, 69, 44, 48, 67, 77, - 63, 4, 75, 64, 70, 71, 50, 75, 44, 45, - 14, 15, 16, 72, 73, 74, 71, 72, 4, 42, - 51 + 49, 55, 56, 57, 60, 61, 59, 58, 42, 51, + 51, 3, 74, 4, 4, 43, 51, 51, 43, 43, + 51, 74, 74, 3, 46, 72, 73, 1, 47, 56, + 66, 67, 68, 42, 62, 42, 46, 64, 65, 65, + 63, 72, 63, 63, 74, 74, 74, 74, 44, 69, + 44, 48, 67, 75, 63, 4, 73, 64, 72, 70, + 71, 50, 73, 44, 45, 14, 15, 16, 51, 72, + 71, 72, 4, 42 }; #define yyerrok (yyerrstatus = 0) @@ -973,84 +975,64 @@ yy_symbol_value_print (yyoutput, yytype, yyvaluep, yylocationp) switch (yytype) { case 3: /* "\"string\"" */ -#line 179 "parse-gram.y" +#line 175 "../../src/parse-gram.y" { fprintf (stderr, "\"%s\"", (yyvaluep->chars)); }; -#line 979 "parse-gram.c" +#line 981 "../../src/parse-gram.c" break; case 4: /* "\"integer\"" */ -#line 192 "parse-gram.y" +#line 184 "../../src/parse-gram.y" { fprintf (stderr, "%d", (yyvaluep->integer)); }; -#line 984 "parse-gram.c" - break; - case 8: /* "\"%destructor {...}\"" */ -#line 181 "parse-gram.y" - { fprintf (stderr, "{\n%s\n}", (yyvaluep->chars)); }; -#line 989 "parse-gram.c" - break; - case 9: /* "\"%printer {...}\"" */ -#line 181 "parse-gram.y" - { fprintf (stderr, "{\n%s\n}", (yyvaluep->chars)); }; -#line 994 "parse-gram.c" +#line 986 "../../src/parse-gram.c" break; case 10: /* "\"%union {...}\"" */ -#line 181 "parse-gram.y" - { fprintf (stderr, "{\n%s\n}", (yyvaluep->chars)); }; -#line 999 "parse-gram.c" - break; - case 26: /* "\"%initial-action {...}\"" */ -#line 181 "parse-gram.y" - { fprintf (stderr, "{\n%s\n}", (yyvaluep->chars)); }; -#line 1004 "parse-gram.c" - break; - case 27: /* "\"%lex-param {...}\"" */ -#line 181 "parse-gram.y" - { fprintf (stderr, "{\n%s\n}", (yyvaluep->chars)); }; -#line 1009 "parse-gram.c" - break; - case 34: /* "\"%parse-param {...}\"" */ -#line 181 "parse-gram.y" +#line 177 "../../src/parse-gram.y" { fprintf (stderr, "{\n%s\n}", (yyvaluep->chars)); }; -#line 1014 "parse-gram.c" +#line 991 "../../src/parse-gram.c" break; case 42: /* "\"type\"" */ -#line 190 "parse-gram.y" +#line 182 "../../src/parse-gram.y" { fprintf (stderr, "<%s>", (yyvaluep->uniqstr)); }; -#line 1019 "parse-gram.c" +#line 996 "../../src/parse-gram.c" break; case 46: /* "\"identifier\"" */ -#line 194 "parse-gram.y" +#line 186 "../../src/parse-gram.y" { fprintf (stderr, "%s", (yyvaluep->symbol)->tag); }; -#line 1024 "parse-gram.c" +#line 1001 "../../src/parse-gram.c" break; case 47: /* "\"identifier:\"" */ -#line 196 "parse-gram.y" +#line 188 "../../src/parse-gram.y" { fprintf (stderr, "%s:", (yyvaluep->symbol)->tag); }; -#line 1029 "parse-gram.c" +#line 1006 "../../src/parse-gram.c" break; case 49: /* "\"%{...%}\"" */ -#line 181 "parse-gram.y" +#line 177 "../../src/parse-gram.y" { fprintf (stderr, "{\n%s\n}", (yyvaluep->chars)); }; -#line 1034 "parse-gram.c" +#line 1011 "../../src/parse-gram.c" break; case 50: /* "\"epilogue\"" */ -#line 181 "parse-gram.y" +#line 177 "../../src/parse-gram.y" { fprintf (stderr, "{\n%s\n}", (yyvaluep->chars)); }; -#line 1039 "parse-gram.c" +#line 1016 "../../src/parse-gram.c" + break; + case 51: /* "\"{...}\"" */ +#line 177 "../../src/parse-gram.y" + { fprintf (stderr, "{\n%s\n}", (yyvaluep->chars)); }; +#line 1021 "../../src/parse-gram.c" break; case 72: /* "symbol" */ -#line 194 "parse-gram.y" +#line 186 "../../src/parse-gram.y" { fprintf (stderr, "%s", (yyvaluep->symbol)->tag); }; -#line 1044 "parse-gram.c" +#line 1026 "../../src/parse-gram.c" break; - case 75: /* "string_as_id" */ -#line 194 "parse-gram.y" + case 73: /* "string_as_id" */ +#line 186 "../../src/parse-gram.y" { fprintf (stderr, "%s", (yyvaluep->symbol)->tag); }; -#line 1049 "parse-gram.c" +#line 1031 "../../src/parse-gram.c" break; - case 76: /* "string_content" */ -#line 179 "parse-gram.y" + case 74: /* "string_content" */ +#line 175 "../../src/parse-gram.y" { fprintf (stderr, "\"%s\"", (yyvaluep->chars)); }; -#line 1054 "parse-gram.c" +#line 1036 "../../src/parse-gram.c" break; default: break; @@ -1560,16 +1542,15 @@ YYLTYPE yylloc; /* User initialization code. */ -#line 84 "parse-gram.y" +#line 86 "../../src/parse-gram.y" { /* Bison's grammar can initial empty locations, hence a default location is needed. */ - yylloc.start.file = yylloc.end.file = current_file; - yylloc.start.line = yylloc.end.line = 1; - yylloc.start.column = yylloc.end.column = 0; + boundary_set (&yylloc.start, current_file, 1, 0); + boundary_set (&yylloc.end, current_file, 1, 0); } /* Line 1078 of yacc.c. */ -#line 1573 "parse-gram.c" +#line 1554 "../../src/parse-gram.c" yylsp[0] = yylloc; goto yysetstate; @@ -1754,17 +1735,18 @@ yyreduce: switch (yyn) { case 6: -#line 217 "parse-gram.y" - { prologue_augment ((yyvsp[(1) - (1)].chars), (yylsp[(1) - (1)])); } +#line 209 "../../src/parse-gram.y" + { prologue_augment (translate_code ((yyvsp[(1) - (1)].chars), (yylsp[(1) - (1)])), + (yylsp[(1) - (1)])); } break; case 7: -#line 218 "parse-gram.y" +#line 211 "../../src/parse-gram.y" { debug_flag = true; } break; case 8: -#line 220 "parse-gram.y" +#line 213 "../../src/parse-gram.y" { static char one[] = "1"; muscle_insert ((yyvsp[(2) - (2)].chars), one); @@ -1772,37 +1754,37 @@ yyreduce: break; case 9: -#line 224 "parse-gram.y" +#line 217 "../../src/parse-gram.y" { muscle_insert ((yyvsp[(2) - (3)].chars), (yyvsp[(3) - (3)].chars)); } break; case 10: -#line 225 "parse-gram.y" +#line 218 "../../src/parse-gram.y" { defines_flag = true; } break; case 11: -#line 226 "parse-gram.y" +#line 219 "../../src/parse-gram.y" { error_verbose = true; } break; case 12: -#line 227 "parse-gram.y" +#line 220 "../../src/parse-gram.y" { expected_sr_conflicts = (yyvsp[(2) - (2)].integer); } break; case 13: -#line 228 "parse-gram.y" +#line 221 "../../src/parse-gram.y" { expected_rr_conflicts = (yyvsp[(2) - (2)].integer); } break; case 14: -#line 229 "parse-gram.y" +#line 222 "../../src/parse-gram.y" { spec_file_prefix = (yyvsp[(3) - (3)].chars); } break; case 15: -#line 231 "parse-gram.y" +#line 224 "../../src/parse-gram.y" { nondeterministic_parser = true; glr_parser = true; @@ -1810,86 +1792,86 @@ yyreduce: break; case 16: -#line 236 "parse-gram.y" +#line 229 "../../src/parse-gram.y" { - muscle_code_grow ("initial_action", (yyvsp[(1) - (1)].chars), (yylsp[(1) - (1)])); + muscle_code_grow ("initial_action", translate_symbol_action ((yyvsp[(2) - (2)].chars), (yylsp[(2) - (2)])), (yylsp[(2) - (2)])); } break; case 17: -#line 239 "parse-gram.y" - { add_param ("lex_param", (yyvsp[(1) - (1)].chars), (yylsp[(1) - (1)])); } +#line 232 "../../src/parse-gram.y" + { add_param ("lex_param", (yyvsp[(2) - (2)].chars), (yylsp[(2) - (2)])); } break; case 18: -#line 240 "parse-gram.y" +#line 233 "../../src/parse-gram.y" { locations_flag = true; } break; case 19: -#line 241 "parse-gram.y" +#line 234 "../../src/parse-gram.y" { spec_name_prefix = (yyvsp[(3) - (3)].chars); } break; case 20: -#line 242 "parse-gram.y" +#line 235 "../../src/parse-gram.y" { no_lines_flag = true; } break; case 21: -#line 243 "parse-gram.y" +#line 236 "../../src/parse-gram.y" { nondeterministic_parser = true; } break; case 22: -#line 244 "parse-gram.y" +#line 237 "../../src/parse-gram.y" { spec_outfile = (yyvsp[(3) - (3)].chars); } break; case 23: -#line 245 "parse-gram.y" - { add_param ("parse_param", (yyvsp[(1) - (1)].chars), (yylsp[(1) - (1)])); } +#line 238 "../../src/parse-gram.y" + { add_param ("parse_param", (yyvsp[(2) - (2)].chars), (yylsp[(2) - (2)])); } break; case 24: -#line 246 "parse-gram.y" +#line 239 "../../src/parse-gram.y" { pure_parser = true; } break; case 25: -#line 247 "parse-gram.y" +#line 240 "../../src/parse-gram.y" { version_check (&(yylsp[(2) - (2)]), (yyvsp[(2) - (2)].chars)); } break; case 26: -#line 248 "parse-gram.y" +#line 241 "../../src/parse-gram.y" { skeleton = (yyvsp[(2) - (2)].chars); } break; case 27: -#line 249 "parse-gram.y" +#line 242 "../../src/parse-gram.y" { token_table_flag = true; } break; case 28: -#line 250 "parse-gram.y" +#line 243 "../../src/parse-gram.y" { report_flag = report_states; } break; case 29: -#line 251 "parse-gram.y" +#line 244 "../../src/parse-gram.y" { yacc_flag = true; } break; case 33: -#line 259 "parse-gram.y" +#line 252 "../../src/parse-gram.y" { grammar_start_symbol_set ((yyvsp[(2) - (2)].symbol), (yylsp[(2) - (2)])); } break; case 34: -#line 263 "parse-gram.y" +#line 256 "../../src/parse-gram.y" { char const *body = (yyvsp[(1) - (1)].chars); @@ -1908,46 +1890,48 @@ yyreduce: break; case 35: -#line 279 "parse-gram.y" +#line 272 "../../src/parse-gram.y" { symbol_list *list; - for (list = (yyvsp[(2) - (2)].list); list; list = list->next) - symbol_destructor_set (list->sym, (yyvsp[(1) - (2)].chars), (yylsp[(1) - (2)])); - symbol_list_free ((yyvsp[(2) - (2)].list)); + const char *action = translate_symbol_action ((yyvsp[(2) - (3)].chars), (yylsp[(2) - (3)])); + for (list = (yyvsp[(3) - (3)].list); list; list = list->next) + symbol_destructor_set (list->sym, action, (yylsp[(2) - (3)])); + symbol_list_free ((yyvsp[(3) - (3)].list)); } break; case 36: -#line 286 "parse-gram.y" +#line 280 "../../src/parse-gram.y" { symbol_list *list; - for (list = (yyvsp[(2) - (2)].list); list; list = list->next) - symbol_printer_set (list->sym, (yyvsp[(1) - (2)].chars), (yylsp[(1) - (2)])); - symbol_list_free ((yyvsp[(2) - (2)].list)); + const char *action = translate_symbol_action ((yyvsp[(2) - (3)].chars), (yylsp[(2) - (3)])); + for (list = (yyvsp[(3) - (3)].list); list; list = list->next) + symbol_printer_set (list->sym, action, (yylsp[(2) - (3)])); + symbol_list_free ((yyvsp[(3) - (3)].list)); } break; case 37: -#line 293 "parse-gram.y" +#line 288 "../../src/parse-gram.y" { default_prec = true; } break; case 38: -#line 297 "parse-gram.y" +#line 292 "../../src/parse-gram.y" { default_prec = false; } break; case 39: -#line 303 "parse-gram.y" +#line 298 "../../src/parse-gram.y" { current_class = nterm_sym; } break; case 40: -#line 304 "parse-gram.y" +#line 299 "../../src/parse-gram.y" { current_class = unknown_sym; current_type = NULL; @@ -1955,12 +1939,12 @@ yyreduce: break; case 41: -#line 308 "parse-gram.y" +#line 303 "../../src/parse-gram.y" { current_class = token_sym; } break; case 42: -#line 309 "parse-gram.y" +#line 304 "../../src/parse-gram.y" { current_class = unknown_sym; current_type = NULL; @@ -1968,7 +1952,7 @@ yyreduce: break; case 43: -#line 314 "parse-gram.y" +#line 309 "../../src/parse-gram.y" { symbol_list *list; for (list = (yyvsp[(3) - (3)].list); list; list = list->next) @@ -1978,7 +1962,7 @@ yyreduce: break; case 44: -#line 324 "parse-gram.y" +#line 319 "../../src/parse-gram.y" { symbol_list *list; ++current_prec; @@ -1993,49 +1977,49 @@ yyreduce: break; case 45: -#line 338 "parse-gram.y" +#line 333 "../../src/parse-gram.y" { (yyval.assoc) = left_assoc; } break; case 46: -#line 339 "parse-gram.y" +#line 334 "../../src/parse-gram.y" { (yyval.assoc) = right_assoc; } break; case 47: -#line 340 "parse-gram.y" +#line 335 "../../src/parse-gram.y" { (yyval.assoc) = non_assoc; } break; case 48: -#line 344 "parse-gram.y" +#line 339 "../../src/parse-gram.y" { current_type = NULL; } break; case 49: -#line 345 "parse-gram.y" +#line 340 "../../src/parse-gram.y" { current_type = (yyvsp[(1) - (1)].uniqstr); } break; case 50: -#line 351 "parse-gram.y" +#line 345 "../../src/parse-gram.y" { (yyval.list) = symbol_list_new ((yyvsp[(1) - (1)].symbol), (yylsp[(1) - (1)])); } break; case 51: -#line 352 "parse-gram.y" +#line 346 "../../src/parse-gram.y" { (yyval.list) = symbol_list_prepend ((yyvsp[(1) - (2)].list), (yyvsp[(2) - (2)].symbol), (yylsp[(2) - (2)])); } break; case 52: -#line 358 "parse-gram.y" +#line 352 "../../src/parse-gram.y" { current_type = (yyvsp[(1) - (1)].uniqstr); } break; case 53: -#line 362 "parse-gram.y" +#line 356 "../../src/parse-gram.y" { symbol_class_set ((yyvsp[(1) - (1)].symbol), current_class, (yylsp[(1) - (1)]), true); symbol_type_set ((yyvsp[(1) - (1)].symbol), current_type, (yylsp[(1) - (1)])); @@ -2043,7 +2027,7 @@ yyreduce: break; case 54: -#line 367 "parse-gram.y" +#line 361 "../../src/parse-gram.y" { symbol_class_set ((yyvsp[(1) - (2)].symbol), current_class, (yylsp[(1) - (2)]), true); symbol_type_set ((yyvsp[(1) - (2)].symbol), current_type, (yylsp[(1) - (2)])); @@ -2052,7 +2036,7 @@ yyreduce: break; case 55: -#line 373 "parse-gram.y" +#line 367 "../../src/parse-gram.y" { symbol_class_set ((yyvsp[(1) - (2)].symbol), current_class, (yylsp[(1) - (2)]), true); symbol_type_set ((yyvsp[(1) - (2)].symbol), current_type, (yylsp[(1) - (2)])); @@ -2061,7 +2045,7 @@ yyreduce: break; case 56: -#line 379 "parse-gram.y" +#line 373 "../../src/parse-gram.y" { symbol_class_set ((yyvsp[(1) - (3)].symbol), current_class, (yylsp[(1) - (3)]), true); symbol_type_set ((yyvsp[(1) - (3)].symbol), current_type, (yylsp[(1) - (3)])); @@ -2071,91 +2055,92 @@ yyreduce: break; case 63: -#line 409 "parse-gram.y" +#line 403 "../../src/parse-gram.y" { yyerrok; } break; case 64: -#line 415 "parse-gram.y" +#line 409 "../../src/parse-gram.y" { current_lhs = (yyvsp[(1) - (1)].symbol); current_lhs_location = (yylsp[(1) - (1)]); } break; case 66: -#line 419 "parse-gram.y" +#line 413 "../../src/parse-gram.y" { grammar_current_rule_end ((yylsp[(1) - (1)])); } break; case 67: -#line 420 "parse-gram.y" +#line 414 "../../src/parse-gram.y" { grammar_current_rule_end ((yylsp[(3) - (3)])); } break; case 69: -#line 426 "parse-gram.y" +#line 420 "../../src/parse-gram.y" { grammar_current_rule_begin (current_lhs, current_lhs_location); } break; case 70: -#line 428 "parse-gram.y" +#line 422 "../../src/parse-gram.y" { grammar_current_rule_symbol_append ((yyvsp[(2) - (2)].symbol), (yylsp[(2) - (2)])); } break; + case 71: +#line 424 "../../src/parse-gram.y" + { grammar_current_rule_action_append (gram_last_string, + gram_last_braced_code_loc); } + break; + case 72: -#line 431 "parse-gram.y" +#line 427 "../../src/parse-gram.y" { grammar_current_rule_prec_set ((yyvsp[(3) - (3)].symbol), (yylsp[(3) - (3)])); } break; case 73: -#line 433 "parse-gram.y" +#line 429 "../../src/parse-gram.y" { grammar_current_rule_dprec_set ((yyvsp[(3) - (3)].integer), (yylsp[(3) - (3)])); } break; case 74: -#line 435 "parse-gram.y" +#line 431 "../../src/parse-gram.y" { grammar_current_rule_merge_set ((yyvsp[(3) - (3)].uniqstr), (yylsp[(3) - (3)])); } break; case 75: -#line 439 "parse-gram.y" +#line 435 "../../src/parse-gram.y" { (yyval.symbol) = (yyvsp[(1) - (1)].symbol); } break; case 76: -#line 440 "parse-gram.y" +#line 436 "../../src/parse-gram.y" { (yyval.symbol) = (yyvsp[(1) - (1)].symbol); } break; case 77: -#line 456 "parse-gram.y" - { grammar_current_rule_action_append (last_string, last_braced_code_loc); } - break; - - case 79: -#line 463 "parse-gram.y" +#line 442 "../../src/parse-gram.y" { (yyval.symbol) = symbol_get (quotearg_style (c_quoting_style, (yyvsp[(1) - (1)].chars)), (yylsp[(1) - (1)])); symbol_class_set ((yyval.symbol), token_sym, (yylsp[(1) - (1)]), false); } break; - case 80: -#line 472 "parse-gram.y" + case 78: +#line 451 "../../src/parse-gram.y" { (yyval.chars) = (yyvsp[(1) - (1)].chars); } break; - case 82: -#line 479 "parse-gram.y" + case 80: +#line 458 "../../src/parse-gram.y" { - muscle_code_grow ("epilogue", (yyvsp[(2) - (2)].chars), (yylsp[(2) - (2)])); - scanner_last_string_free (); + muscle_code_grow ("epilogue", translate_code ((yyvsp[(2) - (2)].chars), (yylsp[(2) - (2)])), (yylsp[(2) - (2)])); + gram_scanner_last_string_free (); } break; /* Line 1267 of yacc.c. */ -#line 2159 "parse-gram.c" +#line 2144 "../../src/parse-gram.c" default: break; } YY_SYMBOL_PRINT ("-> $$ =", yyr1[yyn], &yyval, &yyloc); @@ -2375,7 +2360,7 @@ yyreturn: } -#line 485 "parse-gram.y" +#line 464 "../../src/parse-gram.y" @@ -2457,7 +2442,7 @@ add_param (char const *type, char *decl, location loc) free (name); } - scanner_last_string_free (); + gram_scanner_last_string_free (); } static void diff --git a/src/parse-gram.h b/src/parse-gram.h index 2d37fc10..2c5a09db 100644 --- a/src/parse-gram.h +++ b/src/parse-gram.h @@ -1,4 +1,4 @@ -/* A Bison parser, made by GNU Bison 2.2a. */ +/* A Bison parser, made by GNU Bison 2.1b. */ /* Skeleton interface for Bison's Yacc-like parsers in C @@ -148,7 +148,7 @@ #if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED typedef union YYSTYPE -#line 94 "parse-gram.y" +#line 95 "../../src/parse-gram.y" { symbol *symbol; symbol_list *list; @@ -158,7 +158,7 @@ typedef union YYSTYPE uniqstr uniqstr; } /* Line 1529 of yacc.c. */ -#line 162 "parse-gram.h" +#line 162 "../../src/parse-gram.h" YYSTYPE; # define yystype YYSTYPE /* obsolescent; will be withdrawn */ # define YYSTYPE_IS_DECLARED 1 diff --git a/src/parse-gram.y b/src/parse-gram.y index e189e143..a0c708b3 100644 --- a/src/parse-gram.y +++ b/src/parse-gram.y @@ -32,6 +32,8 @@ #include "quotearg.h" #include "reader.h" #include "symlist.h" +#include "scan-gram.h" +#include "scan-code.h" #include "strverscmp.h" #define YYLLOC_DEFAULT(Current, Rhs, N) (Current) = lloc_default (Rhs, N) @@ -84,9 +86,8 @@ static int current_prec = 0; { /* Bison's grammar can initial empty locations, hence a default location is needed. */ - @$.start.file = @$.end.file = current_file; - @$.start.line = @$.end.line = 1; - @$.start.column = @$.end.column = 0; + boundary_set (&@$.start, current_file, 1, 0); + boundary_set (&@$.end, current_file, 1, 0); } /* Only NUMBERS have a value. */ @@ -109,8 +110,8 @@ static int current_prec = 0; %token PERCENT_NTERM "%nterm" %token PERCENT_TYPE "%type" -%token PERCENT_DESTRUCTOR "%destructor {...}" -%token PERCENT_PRINTER "%printer {...}" +%token PERCENT_DESTRUCTOR "%destructor" +%token PERCENT_PRINTER "%printer" %token PERCENT_UNION "%union {...}" @@ -137,8 +138,8 @@ static int current_prec = 0; PERCENT_EXPECT_RR "%expect-rr" PERCENT_FILE_PREFIX "%file-prefix" PERCENT_GLR_PARSER "%glr-parser" - PERCENT_INITIAL_ACTION "%initial-action {...}" - PERCENT_LEX_PARAM "%lex-param {...}" + PERCENT_INITIAL_ACTION "%initial-action" + PERCENT_LEX_PARAM "%lex-param" PERCENT_LOCATIONS "%locations" PERCENT_NAME_PREFIX "%name-prefix" PERCENT_NO_DEFAULT_PREC "%no-default-prec" @@ -146,7 +147,7 @@ static int current_prec = 0; PERCENT_NONDETERMINISTIC_PARSER "%nondeterministic-parser" PERCENT_OUTPUT "%output" - PERCENT_PARSE_PARAM "%parse-param {...}" + PERCENT_PARSE_PARAM "%parse-param" PERCENT_PURE_PARSER "%pure-parser" PERCENT_REQUIRE "%require" PERCENT_SKELETON "%skeleton" @@ -167,23 +168,14 @@ static int current_prec = 0; %token EPILOGUE "epilogue" %token BRACED_CODE "{...}" - %type STRING string_content - "%destructor {...}" - "%initial-action {...}" - "%lex-param {...}" - "%parse-param {...}" - "%printer {...}" + "{...}" "%union {...}" PROLOGUE EPILOGUE %printer { fprintf (stderr, "\"%s\"", $$); } STRING string_content %printer { fprintf (stderr, "{\n%s\n}", $$); } - "%destructor {...}" - "%initial-action {...}" - "%lex-param {...}" - "%parse-param {...}" - "%printer {...}" + "{...}" "%union {...}" PROLOGUE EPILOGUE %type TYPE @@ -214,7 +206,8 @@ declarations: declaration: grammar_declaration -| PROLOGUE { prologue_augment ($1, @1); } +| PROLOGUE { prologue_augment (translate_code ($1, @1), + @1); } | "%debug" { debug_flag = true; } | "%define" string_content { @@ -232,17 +225,17 @@ declaration: nondeterministic_parser = true; glr_parser = true; } -| "%initial-action {...}" +| "%initial-action" "{...}" { - muscle_code_grow ("initial_action", $1, @1); + muscle_code_grow ("initial_action", translate_symbol_action ($2, @2), @2); } -| "%lex-param {...}" { add_param ("lex_param", $1, @1); } +| "%lex-param" "{...}" { add_param ("lex_param", $2, @2); } | "%locations" { locations_flag = true; } | "%name-prefix" "=" string_content { spec_name_prefix = $3; } | "%no-lines" { no_lines_flag = true; } | "%nondeterministic-parser" { nondeterministic_parser = true; } | "%output" "=" string_content { spec_outfile = $3; } -| "%parse-param {...}" { add_param ("parse_param", $1, @1); } +| "%parse-param" "{...}" { add_param ("parse_param", $2, @2); } | "%pure-parser" { pure_parser = true; } | "%require" string_content { version_check (&@2, $2); } | "%skeleton" string_content { skeleton = $2; } @@ -275,19 +268,21 @@ grammar_declaration: typed = true; muscle_code_grow ("stype", body, @1); } -| "%destructor {...}" symbols.1 +| "%destructor" "{...}" symbols.1 { symbol_list *list; - for (list = $2; list; list = list->next) - symbol_destructor_set (list->sym, $1, @1); - symbol_list_free ($2); + const char *action = translate_symbol_action ($2, @2); + for (list = $3; list; list = list->next) + symbol_destructor_set (list->sym, action, @2); + symbol_list_free ($3); } -| "%printer {...}" symbols.1 +| "%printer" "{...}" symbols.1 { symbol_list *list; - for (list = $2; list; list = list->next) - symbol_printer_set (list->sym, $1, @1); - symbol_list_free ($2); + const char *action = translate_symbol_action ($2, @2); + for (list = $3; list; list = list->next) + symbol_printer_set (list->sym, action, @2); + symbol_list_free ($3); } | "%default-prec" { @@ -346,7 +341,6 @@ type.opt: ; /* One or more nonterminals to be %typed. */ - symbols.1: symbol { $$ = symbol_list_new ($1, @1); } | symbols.1 symbol { $$ = symbol_list_prepend ($1, $2, @2); } @@ -426,7 +420,9 @@ rhs: { grammar_current_rule_begin (current_lhs, current_lhs_location); } | rhs symbol { grammar_current_rule_symbol_append ($2, @2); } -| rhs action +| rhs "{...}" + { grammar_current_rule_action_append (gram_last_string, + gram_last_braced_code_loc); } | rhs "%prec" symbol { grammar_current_rule_prec_set ($3, @3); } | rhs "%dprec" INT @@ -440,23 +436,6 @@ symbol: | string_as_id { $$ = $1; } ; -/* Handle the semantics of an action specially, with a mid-rule - action, so that grammar_current_rule_action_append is invoked - immediately after the braced code is read by the scanner. - - This implementation relies on the LALR(1) parsing algorithm. - If grammar_current_rule_action_append were executed in a normal - action for this rule, then when the input grammar contains two - successive actions, the scanner would have to read both actions - before reducing this rule. That wouldn't work, since the scanner - relies on all preceding input actions being processed by - grammar_current_rule_action_append before it scans the next - action. */ -action: - { grammar_current_rule_action_append (last_string, last_braced_code_loc); } - BRACED_CODE -; - /* A string used as an ID: quote it. */ string_as_id: STRING @@ -477,8 +456,8 @@ epilogue.opt: /* Nothing. */ | "%%" EPILOGUE { - muscle_code_grow ("epilogue", $2, @2); - scanner_last_string_free (); + muscle_code_grow ("epilogue", translate_code ($2, @2), @2); + gram_scanner_last_string_free (); } ; @@ -563,7 +542,7 @@ add_param (char const *type, char *decl, location loc) free (name); } - scanner_last_string_free (); + gram_scanner_last_string_free (); } static void diff --git a/src/reader.c b/src/reader.c index d07ce5ca..33b62c2c 100644 --- a/src/reader.c +++ b/src/reader.c @@ -22,6 +22,7 @@ #include #include "system.h" +#include #include @@ -34,6 +35,8 @@ #include "reader.h" #include "symlist.h" #include "symtab.h" +#include "scan-gram.h" +#include "scan-code.h" static void check_and_convert_grammar (void); @@ -77,6 +80,8 @@ prologue_augment (const char *prologue, location loc) !typed ? &pre_prologue_obstack : &post_prologue_obstack; obstack_fgrow1 (oout, "]b4_syncline(%d, [[", loc.start.line); + /* FIXME: Protection of M4 characters missing here. See + output.c:escaped_output. */ MUSCLE_OBSTACK_SGROW (oout, quotearg_style (c_quoting_style, loc.start.file)); obstack_sgrow (oout, "]])[\n"); @@ -398,9 +403,7 @@ grammar_current_rule_symbol_append (symbol *sym, location loc) void grammar_current_rule_action_append (const char *action, location loc) { - /* There's no need to invoke grammar_midrule_action here, since the - scanner already did it if necessary. */ - current_rule->action = action; + current_rule->action = translate_rule_action (current_rule, action, loc); current_rule->action_location = loc; } @@ -426,6 +429,7 @@ packgram (void) while (p) { + int rule_length = 0; symbol *ruleprec = p->ruleprec; rules[ruleno].user_number = ruleno; rules[ruleno].number = ruleno; @@ -440,18 +444,22 @@ packgram (void) rules[ruleno].action = p->action; rules[ruleno].action_location = p->action_location; - p = p->next; - while (p && p->sym) + for (p = p->next; p && p->sym; p = p->next) { + ++rule_length; + + /* Don't allow rule_length == INT_MAX, since that might + cause confusion with strtol if INT_MAX == LONG_MAX. */ + if (rule_length == INT_MAX) + fatal_at (rules[ruleno].location, _("rule is too long")); + /* item_number = symbol_number. But the former needs to contain more: negative rule numbers. */ ritem[itemno++] = symbol_number_as_item_number (p->sym->number); /* A rule gets by default the precedence and associativity - of the last token in it. */ + of its last token. */ if (p->sym->class == token_sym && default_prec) rules[ruleno].prec = p->sym; - if (p) - p = p->next; } /* If this rule has a %prec, @@ -461,8 +469,11 @@ packgram (void) rules[ruleno].precsym = ruleprec; rules[ruleno].prec = ruleprec; } + /* An item ends by the rule number (negated). */ ritem[itemno++] = rule_number_as_item_number (ruleno); + assert (itemno < ITEM_NUMBER_MAX); ++ruleno; + assert (ruleno < RULE_NUMBER_MAX); if (p) p = p->next; @@ -511,7 +522,7 @@ reader (void) gram__flex_debug = trace_flag & trace_scan; gram_debug = trace_flag & trace_parse; - scanner_initialize (); + gram_scanner_initialize (); gram_parse (); if (! complaint_issued) diff --git a/src/reader.h b/src/reader.h index f110f70e..ec289f2d 100644 --- a/src/reader.h +++ b/src/reader.h @@ -35,26 +35,6 @@ typedef struct merger_list uniqstr type; } merger_list; -/* From the scanner. */ -extern FILE *gram_in; -extern int gram__flex_debug; -extern boundary scanner_cursor; -extern char *last_string; -extern location last_braced_code_loc; -extern int max_left_semantic_context; -void scanner_initialize (void); -void scanner_free (void); -void scanner_last_string_free (void); - -/* These are declared by the scanner, but not used. We put them here - to pacify "make syntax-check". */ -extern FILE *gram_out; -extern int gram_lineno; - -# define YY_DECL int gram_lex (YYSTYPE *val, location *loc) -YY_DECL; - - /* From the parser. */ extern int gram_debug; int gram_parse (void); diff --git a/src/scan-action.l b/src/scan-action.l new file mode 100644 index 00000000..f8dbf679 --- /dev/null +++ b/src/scan-action.l @@ -0,0 +1,866 @@ +/* Bison Grammar Scanner -*- C -*- + + Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc. + + This file is part of Bison, the GNU Compiler Compiler. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301 USA +*/ + +%option debug nodefault nounput noyywrap never-interactive +%option prefix="gram_" outfile="lex.yy.c" + +%{ +#include "system.h" + +#include +#include +#include + +#include "complain.h" +#include "files.h" +#include "getargs.h" +#include "gram.h" +#include "quotearg.h" +#include "reader.h" +#include "uniqstr.h" + +#define YY_USER_INIT \ + do \ + { \ + scanner_cursor.file = current_file; \ + scanner_cursor.line = 1; \ + scanner_cursor.column = 1; \ + code_start = scanner_cursor; \ + } \ + while (0) + +/* Location of scanner cursor. */ +boundary scanner_cursor; + +static void adjust_location (location *, char const *, size_t); +#define YY_USER_ACTION adjust_location (loc, yytext, yyleng); + +static size_t no_cr_read (FILE *, char *, size_t); +#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size)) + +/* Within well-formed rules, RULE_LENGTH is the number of values in + the current rule so far, which says where to find `$0' with respect + to the top of the stack. It is not the same as the rule->length in + the case of mid rule actions. + + Outside of well-formed rules, RULE_LENGTH has an undefined value. */ +int rule_length; + +static void handle_dollar (int token_type, char *cp, location loc); +static void handle_at (int token_type, char *cp, location loc); +static void handle_syncline (char *args); +static unsigned long int scan_integer (char const *p, int base, location loc); +static int convert_ucn_to_byte (char const *hex_text); +static void unexpected_eof (boundary, char const *); +static void unexpected_newline (boundary, char const *); + +%} +%x SC_COMMENT SC_LINE_COMMENT SC_YACC_COMMENT +%x SC_STRING SC_CHARACTER +%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER +%x SC_PRE_CODE SC_BRACED_CODE SC_PROLOGUE SC_EPILOGUE + +letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_] +id {letter}({letter}|[0-9])* +directive %{letter}({letter}|[0-9]|-)* +int [0-9]+ + +/* POSIX says that a tag must be both an id and a C union member, but + historically almost any character is allowed in a tag. We disallow + NUL and newline, as this simplifies our implementation. */ +tag [^\0\n>]+ + +/* Zero or more instances of backslash-newline. Following GCC, allow + white space between the backslash and the newline. */ +splice (\\[ \f\t\v]*\n)* + +%% +%{ + /* Nesting level of the current code in braces. */ + int braces_level IF_LINT (= 0); + + /* Parent context state, when applicable. */ + int context_state IF_LINT (= 0); + + /* Token type to return, when applicable. */ + int token_type IF_LINT (= 0); + + /* Where containing code started, when applicable. Its initial + value is relevant only when yylex is invoked in the SC_EPILOGUE + start condition. */ + boundary code_start = scanner_cursor; + + /* Where containing comment or string or character literal started, + when applicable. */ + boundary token_start IF_LINT (= scanner_cursor); +%} + + + /*-----------------------. + | Scanning white space. | + `-----------------------*/ + + +{ + /* Comments and white space. */ + "," warn_at (*loc, _("stray `,' treated as white space")); + [ \f\n\t\v] | + "//".* ; + "/*" { + token_start = loc->start; + context_state = YY_START; + BEGIN SC_YACC_COMMENT; + } + + /* #line directives are not documented, and may be withdrawn or + modified in future versions of Bison. */ + ^"#line "{int}" \"".*"\"\n" { + handle_syncline (yytext + sizeof "#line " - 1); + } +} + + + /*----------------------------. + | Scanning Bison directives. | + `----------------------------*/ + +{ + + /* Code in between braces. */ + "{" { + STRING_GROW; + token_type = BRACED_CODE; + braces_level = 0; + code_start = loc->start; + BEGIN SC_BRACED_CODE; + } + +} + + + /*------------------------------------------------------------. + | Scanning a C comment. The initial `/ *' is already eaten. | + `------------------------------------------------------------*/ + + +{ + "*"{splice}"/" STRING_GROW; BEGIN context_state; + <> unexpected_eof (token_start, "*/"); BEGIN context_state; +} + + + /*--------------------------------------------------------------. + | Scanning a line comment. The initial `//' is already eaten. | + `--------------------------------------------------------------*/ + + +{ + "\n" STRING_GROW; BEGIN context_state; + {splice} STRING_GROW; + <> BEGIN context_state; +} + + + /*------------------------------------------------. + | Scanning a Bison string, including its escapes. | + | The initial quote is already eaten. | + `------------------------------------------------*/ + + +{ + "\"" { + STRING_FINISH; + loc->start = token_start; + val->chars = last_string; + rule_length++; + BEGIN INITIAL; + return STRING; + } + \n unexpected_newline (token_start, "\""); BEGIN INITIAL; + <> unexpected_eof (token_start, "\""); BEGIN INITIAL; +} + + /*----------------------------------------------------------. + | Scanning a Bison character literal, decoding its escapes. | + | The initial quote is already eaten. | + `----------------------------------------------------------*/ + + +{ + "'" { + unsigned char last_string_1; + STRING_GROW; + STRING_FINISH; + loc->start = token_start; + val->symbol = symbol_get (quotearg_style (escape_quoting_style, + last_string), + *loc); + symbol_class_set (val->symbol, token_sym, *loc); + last_string_1 = last_string[1]; + symbol_user_token_number_set (val->symbol, last_string_1, *loc); + STRING_FREE; + rule_length++; + BEGIN INITIAL; + return ID; + } + \n unexpected_newline (token_start, "'"); BEGIN INITIAL; + <> unexpected_eof (token_start, "'"); BEGIN INITIAL; +} + + +{ + \0 complain_at (*loc, _("invalid null character")); +} + + + /*----------------------------. + | Decode escaped characters. | + `----------------------------*/ + + +{ + \\[0-7]{1,3} { + unsigned long int c = strtoul (yytext + 1, 0, 8); + if (UCHAR_MAX < c) + complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext)); + else if (! c) + complain_at (*loc, _("invalid null character: %s"), quote (yytext)); + else + obstack_1grow (&obstack_for_string, c); + } + + \\x[0-9abcdefABCDEF]+ { + unsigned long int c; + set_errno (0); + c = strtoul (yytext + 2, 0, 16); + if (UCHAR_MAX < c || get_errno ()) + complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext)); + else if (! c) + complain_at (*loc, _("invalid null character: %s"), quote (yytext)); + else + obstack_1grow (&obstack_for_string, c); + } + + \\a obstack_1grow (&obstack_for_string, '\a'); + \\b obstack_1grow (&obstack_for_string, '\b'); + \\f obstack_1grow (&obstack_for_string, '\f'); + \\n obstack_1grow (&obstack_for_string, '\n'); + \\r obstack_1grow (&obstack_for_string, '\r'); + \\t obstack_1grow (&obstack_for_string, '\t'); + \\v obstack_1grow (&obstack_for_string, '\v'); + + /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */ + \\("\""|"'"|"?"|"\\") obstack_1grow (&obstack_for_string, yytext[1]); + + \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} { + int c = convert_ucn_to_byte (yytext); + if (c < 0) + complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext)); + else if (! c) + complain_at (*loc, _("invalid null character: %s"), quote (yytext)); + else + obstack_1grow (&obstack_for_string, c); + } + \\(.|\n) { + complain_at (*loc, _("unrecognized escape sequence: %s"), quote (yytext)); + STRING_GROW; + } +} + + /*--------------------------------------------. + | Scanning user-code characters and strings. | + `--------------------------------------------*/ + + +{ + {splice}|\\{splice}[^\n$@\[\]] STRING_GROW; +} + + +{ + "'" STRING_GROW; BEGIN context_state; + \n unexpected_newline (token_start, "'"); BEGIN context_state; + <> unexpected_eof (token_start, "'"); BEGIN context_state; +} + + +{ + "\"" STRING_GROW; BEGIN context_state; + \n unexpected_newline (token_start, "\""); BEGIN context_state; + <> unexpected_eof (token_start, "\""); BEGIN context_state; +} + + + /*---------------------------------------------------. + | Strings, comments etc. can be found in user code. | + `---------------------------------------------------*/ + + +{ + "'" { + STRING_GROW; + context_state = YY_START; + token_start = loc->start; + BEGIN SC_CHARACTER; + } + "\"" { + STRING_GROW; + context_state = YY_START; + token_start = loc->start; + BEGIN SC_STRING; + } + "/"{splice}"*" { + STRING_GROW; + context_state = YY_START; + token_start = loc->start; + BEGIN SC_COMMENT; + } + "/"{splice}"/" { + STRING_GROW; + context_state = YY_START; + BEGIN SC_LINE_COMMENT; + } +} + + + /*---------------------------------------------------------------. + | Scanning some code in braces (%union and actions). The initial | + | "{" is already eaten. | + `---------------------------------------------------------------*/ + + +{ + "{"|"<"{splice}"%" STRING_GROW; braces_level++; + "%"{splice}">" STRING_GROW; braces_level--; + "}" { + bool outer_brace = --braces_level < 0; + + /* As an undocumented Bison extension, append `;' before the last + brace in braced code, so that the user code can omit trailing + `;'. But do not append `;' if emulating Yacc, since Yacc does + not append one. + + FIXME: Bison should warn if a semicolon seems to be necessary + here, and should omit the semicolon if it seems unnecessary + (e.g., after ';', '{', or '}', each followed by comments or + white space). Such a warning shouldn't depend on --yacc; it + should depend on a new --pedantic option, which would cause + Bison to warn if it detects an extension to POSIX. --pedantic + should also diagnose other Bison extensions like %yacc. + Perhaps there should also be a GCC-style --pedantic-errors + option, so that such warnings are diagnosed as errors. */ + if (outer_brace && token_type == BRACED_CODE && ! yacc_flag) + obstack_1grow (&obstack_for_string, ';'); + + obstack_1grow (&obstack_for_string, '}'); + + if (outer_brace) + { + STRING_FINISH; + rule_length++; + loc->start = code_start; + val->chars = last_string; + BEGIN INITIAL; + return token_type; + } + } + + /* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly + (as `<' `<%'). */ + "<"{splice}"<" STRING_GROW; + + "$"("<"{tag}">")?(-?[0-9]+|"$") handle_dollar (token_type, yytext, *loc); + "@"(-?[0-9]+|"$") handle_at (token_type, yytext, *loc); + + <> unexpected_eof (code_start, "}"); BEGIN INITIAL; +} + + + /*--------------------------------------------------------------. + | Scanning some prologue: from "%{" (already scanned) to "%}". | + `--------------------------------------------------------------*/ + + +{ + "%}" { + STRING_FINISH; + loc->start = code_start; + val->chars = last_string; + BEGIN INITIAL; + return PROLOGUE; + } + + <> unexpected_eof (code_start, "%}"); BEGIN INITIAL; +} + + + /*---------------------------------------------------------------. + | Scanning the epilogue (everything after the second "%%", which | + | has already been eaten). | + `---------------------------------------------------------------*/ + + +{ + <> { + STRING_FINISH; + loc->start = code_start; + val->chars = last_string; + BEGIN INITIAL; + return EPILOGUE; + } +} + + + /*-----------------------------------------. + | Escape M4 quoting characters in C code. | + `-----------------------------------------*/ + + +{ + \$ obstack_sgrow (&obstack_for_string, "$]["); + \@ obstack_sgrow (&obstack_for_string, "@@"); + \[ obstack_sgrow (&obstack_for_string, "@{"); + \] obstack_sgrow (&obstack_for_string, "@}"); +} + + + /*-----------------------------------------------------. + | By default, grow the string obstack with the input. | + `-----------------------------------------------------*/ + +. | +\n STRING_GROW; + +%% + +/* Keeps track of the maximum number of semantic values to the left of + a handle (those referenced by $0, $-1, etc.) are required by the + semantic actions of this grammar. */ +int max_left_semantic_context = 0; + +/* Set *LOC and adjust scanner cursor to account for token TOKEN of + size SIZE. */ + +static void +adjust_location (location *loc, char const *token, size_t size) +{ + int line = scanner_cursor.line; + int column = scanner_cursor.column; + char const *p0 = token; + char const *p = token; + char const *lim = token + size; + + loc->start = scanner_cursor; + + for (p = token; p < lim; p++) + switch (*p) + { + case '\n': + line++; + column = 1; + p0 = p + 1; + break; + + case '\t': + column += mbsnwidth (p0, p - p0, 0); + column += 8 - ((column - 1) & 7); + p0 = p + 1; + break; + } + + scanner_cursor.line = line; + scanner_cursor.column = column + mbsnwidth (p0, p - p0, 0); + + loc->end = scanner_cursor; +} + + +/* Read bytes from FP into buffer BUF of size SIZE. Return the + number of bytes read. Remove '\r' from input, treating \r\n + and isolated \r as \n. */ + +static size_t +no_cr_read (FILE *fp, char *buf, size_t size) +{ + size_t bytes_read = fread (buf, 1, size, fp); + if (bytes_read) + { + char *w = memchr (buf, '\r', bytes_read); + if (w) + { + char const *r = ++w; + char const *lim = buf + bytes_read; + + for (;;) + { + /* Found an '\r'. Treat it like '\n', but ignore any + '\n' that immediately follows. */ + w[-1] = '\n'; + if (r == lim) + { + int ch = getc (fp); + if (ch != '\n' && ungetc (ch, fp) != ch) + break; + } + else if (*r == '\n') + r++; + + /* Copy until the next '\r'. */ + do + { + if (r == lim) + return w - buf; + } + while ((*w++ = *r++) != '\r'); + } + + return w - buf; + } + } + + return bytes_read; +} + + +/*------------------------------------------------------------------. +| TEXT is pointing to a wannabee semantic value (i.e., a `$'). | +| | +| Possible inputs: $[]($|integer) | +| | +| Output to OBSTACK_FOR_STRING a reference to this semantic value. | +`------------------------------------------------------------------*/ + +static inline bool +handle_action_dollar (char *text, location loc) +{ + const char *type_name = NULL; + char *cp = text + 1; + + if (! current_rule) + return false; + + /* Get the type name if explicit. */ + if (*cp == '<') + { + type_name = ++cp; + while (*cp != '>') + ++cp; + *cp = '\0'; + ++cp; + } + + if (*cp == '$') + { + if (!type_name) + type_name = symbol_list_n_type_name_get (current_rule, loc, 0); + if (!type_name && typed) + complain_at (loc, _("$$ of `%s' has no declared type"), + current_rule->sym->tag); + if (!type_name) + type_name = ""; + obstack_fgrow1 (&obstack_for_string, + "]b4_lhs_value([%s])[", type_name); + } + else + { + long int num; + set_errno (0); + num = strtol (cp, 0, 10); + + if (INT_MIN <= num && num <= rule_length && ! get_errno ()) + { + int n = num; + if (1-n > max_left_semantic_context) + max_left_semantic_context = 1-n; + if (!type_name && n > 0) + type_name = symbol_list_n_type_name_get (current_rule, loc, n); + if (!type_name && typed) + complain_at (loc, _("$%d of `%s' has no declared type"), + n, current_rule->sym->tag); + if (!type_name) + type_name = ""; + obstack_fgrow3 (&obstack_for_string, + "]b4_rhs_value(%d, %d, [%s])[", + rule_length, n, type_name); + } + else + complain_at (loc, _("integer out of range: %s"), quote (text)); + } + + return true; +} + + +/*----------------------------------------------------------------. +| Map `$?' onto the proper M4 symbol, depending on its TOKEN_TYPE | +| (are we in an action?). | +`----------------------------------------------------------------*/ + +static void +handle_dollar (int token_type, char *text, location loc) +{ + switch (token_type) + { + case BRACED_CODE: + if (handle_action_dollar (text, loc)) + return; + break; + + case PERCENT_DESTRUCTOR: + case PERCENT_INITIAL_ACTION: + case PERCENT_PRINTER: + if (text[1] == '$') + { + obstack_sgrow (&obstack_for_string, "]b4_dollar_dollar["); + return; + } + break; + + default: + break; + } + + complain_at (loc, _("invalid value: %s"), quote (text)); +} + + +/*------------------------------------------------------. +| TEXT is a location token (i.e., a `@...'). Output to | +| OBSTACK_FOR_STRING a reference to this location. | +`------------------------------------------------------*/ + +static inline bool +handle_action_at (char *text, location loc) +{ + char *cp = text + 1; + locations_flag = true; + + if (! current_rule) + return false; + + if (*cp == '$') + obstack_sgrow (&obstack_for_string, "]b4_lhs_location["); + else + { + long int num; + set_errno (0); + num = strtol (cp, 0, 10); + + if (INT_MIN <= num && num <= rule_length && ! get_errno ()) + { + int n = num; + obstack_fgrow2 (&obstack_for_string, "]b4_rhs_location(%d, %d)[", + rule_length, n); + } + else + complain_at (loc, _("integer out of range: %s"), quote (text)); + } + + return true; +} + + +/*----------------------------------------------------------------. +| Map `@?' onto the proper M4 symbol, depending on its TOKEN_TYPE | +| (are we in an action?). | +`----------------------------------------------------------------*/ + +static void +handle_at (int token_type, char *text, location loc) +{ + switch (token_type) + { + case BRACED_CODE: + handle_action_at (text, loc); + return; + + case PERCENT_INITIAL_ACTION: + case PERCENT_DESTRUCTOR: + case PERCENT_PRINTER: + if (text[1] == '$') + { + obstack_sgrow (&obstack_for_string, "]b4_at_dollar["); + return; + } + break; + + default: + break; + } + + complain_at (loc, _("invalid value: %s"), quote (text)); +} + + +/*------------------------------------------------------. +| Scan NUMBER for a base-BASE integer at location LOC. | +`------------------------------------------------------*/ + +static unsigned long int +scan_integer (char const *number, int base, location loc) +{ + unsigned long int num; + set_errno (0); + num = strtoul (number, 0, base); + if (INT_MAX < num || get_errno ()) + { + complain_at (loc, _("integer out of range: %s"), quote (number)); + num = INT_MAX; + } + return num; +} + + +/*------------------------------------------------------------------. +| Convert universal character name UCN to a single-byte character, | +| and return that character. Return -1 if UCN does not correspond | +| to a single-byte character. | +`------------------------------------------------------------------*/ + +static int +convert_ucn_to_byte (char const *ucn) +{ + unsigned long int code = strtoul (ucn + 2, 0, 16); + + /* FIXME: Currently we assume Unicode-compatible unibyte characters + on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On + non-ASCII hosts we support only the portable C character set. + These limitations should be removed once we add support for + multibyte characters. */ + + if (UCHAR_MAX < code) + return -1; + +#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e) + { + /* A non-ASCII host. Use CODE to index into a table of the C + basic execution character set, which is guaranteed to exist on + all Standard C platforms. This table also includes '$', '@', + and '`', which are not in the basic execution character set but + which are unibyte characters on all the platforms that we know + about. */ + static signed char const table[] = + { + '\0', -1, -1, -1, -1, -1, -1, '\a', + '\b', '\t', '\n', '\v', '\f', '\r', -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, + ' ', '!', '"', '#', '$', '%', '&', '\'', + '(', ')', '*', '+', ',', '-', '.', '/', + '0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', ':', ';', '<', '=', '>', '?', + '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', + 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', + 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', + 'X', 'Y', 'Z', '[', '\\', ']', '^', '_', + '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', + 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', + 'x', 'y', 'z', '{', '|', '}', '~' + }; + + code = code < sizeof table ? table[code] : -1; + } +#endif + + return code; +} + + +/*----------------------------------------------------------------. +| Handle `#line INT "FILE"'. ARGS has already skipped `#line '. | +`----------------------------------------------------------------*/ + +static void +handle_syncline (char *args) +{ + int lineno = strtol (args, &args, 10); + const char *file = NULL; + file = strchr (args, '"') + 1; + *strchr (file, '"') = 0; + scanner_cursor.file = current_file = uniqstr_new (file); + scanner_cursor.line = lineno; + scanner_cursor.column = 1; +} + + +/*----------------------------------------------------------------. +| For a token or comment starting at START, report message MSGID, | +| which should say that an end marker was found before | +| the expected TOKEN_END. | +`----------------------------------------------------------------*/ + +static void +unexpected_end (boundary start, char const *msgid, char const *token_end) +{ + location loc; + loc.start = start; + loc.end = scanner_cursor; + complain_at (loc, _(msgid), token_end); +} + + +/*------------------------------------------------------------------------. +| Report an unexpected EOF in a token or comment starting at START. | +| An end of file was encountered and the expected TOKEN_END was missing. | +`------------------------------------------------------------------------*/ + +static void +unexpected_eof (boundary start, char const *token_end) +{ + unexpected_end (start, N_("missing `%s' at end of file"), token_end); +} + + +/*----------------------------------------. +| Likewise, but for unexpected newlines. | +`----------------------------------------*/ + +static void +unexpected_newline (boundary start, char const *token_end) +{ + unexpected_end (start, N_("missing `%s' at end of line"), token_end); +} + + +/*-------------------------. +| Initialize the scanner. | +`-------------------------*/ + +void +scanner_initialize (void) +{ + obstack_init (&obstack_for_string); +} + + +/*-----------------------------------------------. +| Free all the memory allocated to the scanner. | +`-----------------------------------------------*/ + +void +scanner_free (void) +{ + obstack_free (&obstack_for_string, 0); + /* Reclaim Flex's buffers. */ + yy_delete_buffer (YY_CURRENT_BUFFER); +} diff --git a/src/scan-code-c.c b/src/scan-code-c.c new file mode 100644 index 00000000..fd608569 --- /dev/null +++ b/src/scan-code-c.c @@ -0,0 +1,2 @@ +#include +#include "scan-code.c" diff --git a/src/scan-code.h b/src/scan-code.h new file mode 100644 index 00000000..8d979357 --- /dev/null +++ b/src/scan-code.h @@ -0,0 +1,47 @@ +/* Bison Action Scanner + + Copyright (C) 2006 Free Software Foundation, Inc. + + This file is part of Bison, the GNU Compiler Compiler. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301 USA +*/ + +#ifndef SCAN_CODE_H_ +# define SCAN_CODE_H_ + +# include "location.h" +# include "symlist.h" + +/* Keeps track of the maximum number of semantic values to the left of + a handle (those referenced by $0, $-1, etc.) are required by the + semantic actions of this grammar. */ +extern int max_left_semantic_context; + +void code_scanner_free (void); + +/* The action A contains $$, $1 etc. referring to the values + of the rule R. */ +const char *translate_rule_action (symbol_list *r, const char *a, location l); + +/* The action A refers to $$ and @$ only, referring to a symbol. */ +const char *translate_symbol_action (const char *a, location l); + +/* The action contains no special escapes, just protect M4 special + symbols. */ +const char *translate_code (const char *a, location l); + +#endif /* !SCAN_CODE_H_ */ diff --git a/src/scan-code.l b/src/scan-code.l new file mode 100644 index 00000000..70b250bc --- /dev/null +++ b/src/scan-code.l @@ -0,0 +1,358 @@ +/* Bison Action Scanner -*- C -*- + + Copyright (C) 2006 Free Software Foundation, Inc. + + This file is part of Bison, the GNU Compiler Compiler. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301 USA +*/ + +%option debug nodefault nounput noyywrap never-interactive +%option prefix="code_" outfile="lex.yy.c" + +%{ +/* Work around a bug in flex 2.5.31. See Debian bug 333231 + . */ +#undef code_wrap +#define code_wrap() 1 + +#define FLEX_PREFIX(Id) code_ ## Id +#include "flex-scanner.h" +#include "reader.h" +#include "getargs.h" +#include +#include +#include + +#include "scan-code.h" + +/* The current calling start condition: SC_RULE_ACTION or + SC_SYMBOL_ACTION. */ +# define YY_DECL const char *code_lex (int sc_context) +YY_DECL; + +#define YY_USER_ACTION location_compute (loc, &loc->end, yytext, yyleng); + +static void handle_action_dollar (char *cp, location loc); +static void handle_action_at (char *cp, location loc); +static location the_location; +static location *loc = &the_location; + +/* The rule being processed. */ +symbol_list *current_rule; +%} + /* C and C++ comments in code. */ +%x SC_COMMENT SC_LINE_COMMENT + /* Strings and characters in code. */ +%x SC_STRING SC_CHARACTER + /* Whether in a rule or symbol action. Specifies the translation + of $ and @. */ +%x SC_RULE_ACTION SC_SYMBOL_ACTION + + +/* POSIX says that a tag must be both an id and a C union member, but + historically almost any character is allowed in a tag. We disallow + NUL and newline, as this simplifies our implementation. */ +tag [^\0\n>]+ + +/* Zero or more instances of backslash-newline. Following GCC, allow + white space between the backslash and the newline. */ +splice (\\[ \f\t\v]*\n)* + +%% + +%{ + /* This scanner is special: it is invoked only once, henceforth + is expected to return only once. This initialization is + therefore done once per action to translate. */ + assert (sc_context == SC_SYMBOL_ACTION + || sc_context == SC_RULE_ACTION + || sc_context == INITIAL); + BEGIN sc_context; +%} + + /*------------------------------------------------------------. + | Scanning a C comment. The initial `/ *' is already eaten. | + `------------------------------------------------------------*/ + + +{ + "*"{splice}"/" STRING_GROW; BEGIN sc_context; +} + + + /*--------------------------------------------------------------. + | Scanning a line comment. The initial `//' is already eaten. | + `--------------------------------------------------------------*/ + + +{ + "\n" STRING_GROW; BEGIN sc_context; + {splice} STRING_GROW; +} + + + /*--------------------------------------------. + | Scanning user-code characters and strings. | + `--------------------------------------------*/ + + +{ + {splice}|\\{splice}. STRING_GROW; +} + + +{ + "'" STRING_GROW; BEGIN sc_context; +} + + +{ + "\"" STRING_GROW; BEGIN sc_context; +} + + +{ + "'" { + STRING_GROW; + BEGIN SC_CHARACTER; + } + "\"" { + STRING_GROW; + BEGIN SC_STRING; + } + "/"{splice}"*" { + STRING_GROW; + BEGIN SC_COMMENT; + } + "/"{splice}"/" { + STRING_GROW; + BEGIN SC_LINE_COMMENT; + } +} + + +{ + "$"("<"{tag}">")?(-?[0-9]+|"$") handle_action_dollar (yytext, *loc); + "@"(-?[0-9]+|"$") handle_action_at (yytext, *loc); + + "$" { + warn_at (*loc, _("stray `$'")); + obstack_sgrow (&obstack_for_string, "$]["); + } + "@" { + warn_at (*loc, _("stray `@'")); + obstack_sgrow (&obstack_for_string, "@@"); + } +} + + +{ + "$$" obstack_sgrow (&obstack_for_string, "]b4_dollar_dollar["); + "@$" obstack_sgrow (&obstack_for_string, "]b4_at_dollar["); +} + + + /*-----------------------------------------. + | Escape M4 quoting characters in C code. | + `-----------------------------------------*/ + +<*> +{ + \$ obstack_sgrow (&obstack_for_string, "$]["); + \@ obstack_sgrow (&obstack_for_string, "@@"); + \[ obstack_sgrow (&obstack_for_string, "@{"); + \] obstack_sgrow (&obstack_for_string, "@}"); +} + + /*-----------------------------------------------------. + | By default, grow the string obstack with the input. | + `-----------------------------------------------------*/ + +<*>.|\n STRING_GROW; + + /* End of processing. */ +<*><> { + obstack_1grow (&obstack_for_string, '\0'); + return obstack_finish (&obstack_for_string); + } + +%% + +/* Keeps track of the maximum number of semantic values to the left of + a handle (those referenced by $0, $-1, etc.) are required by the + semantic actions of this grammar. */ +int max_left_semantic_context = 0; + + +/*------------------------------------------------------------------. +| TEXT is pointing to a wannabee semantic value (i.e., a `$'). | +| | +| Possible inputs: $[]($|integer) | +| | +| Output to OBSTACK_FOR_STRING a reference to this semantic value. | +`------------------------------------------------------------------*/ + +static void +handle_action_dollar (char *text, location loc) +{ + const char *type_name = NULL; + char *cp = text + 1; + int rule_length = symbol_list_length (current_rule->next); + + /* Get the type name if explicit. */ + if (*cp == '<') + { + type_name = ++cp; + while (*cp != '>') + ++cp; + *cp = '\0'; + ++cp; + } + + if (*cp == '$') + { + if (!type_name) + type_name = symbol_list_n_type_name_get (current_rule, loc, 0); + if (!type_name && typed) + complain_at (loc, _("$$ of `%s' has no declared type"), + current_rule->sym->tag); + if (!type_name) + type_name = ""; + obstack_fgrow1 (&obstack_for_string, + "]b4_lhs_value([%s])[", type_name); + current_rule->used = true; + } + else + { + long int num; + set_errno (0); + num = strtol (cp, 0, 10); + if (INT_MIN <= num && num <= rule_length && ! get_errno ()) + { + int n = num; + if (1-n > max_left_semantic_context) + max_left_semantic_context = 1-n; + if (!type_name && n > 0) + type_name = symbol_list_n_type_name_get (current_rule, loc, n); + if (!type_name && typed) + complain_at (loc, _("$%d of `%s' has no declared type"), + n, current_rule->sym->tag); + if (!type_name) + type_name = ""; + obstack_fgrow3 (&obstack_for_string, + "]b4_rhs_value(%d, %d, [%s])[", + rule_length, n, type_name); + symbol_list_n_used_set (current_rule, n, true); + } + else + complain_at (loc, _("integer out of range: %s"), quote (text)); + } +} + + +/*------------------------------------------------------. +| TEXT is a location token (i.e., a `@...'). Output to | +| OBSTACK_FOR_STRING a reference to this location. | +`------------------------------------------------------*/ + +static void +handle_action_at (char *text, location loc) +{ + char *cp = text + 1; + int rule_length = symbol_list_length (current_rule->next); + locations_flag = true; + + if (*cp == '$') + obstack_sgrow (&obstack_for_string, "]b4_lhs_location["); + else + { + long int num; + set_errno (0); + num = strtol (cp, 0, 10); + + if (INT_MIN <= num && num <= rule_length && ! get_errno ()) + { + int n = num; + obstack_fgrow2 (&obstack_for_string, "]b4_rhs_location(%d, %d)[", + rule_length, n); + } + else + complain_at (loc, _("integer out of range: %s"), quote (text)); + } +} + + +/*-------------------------. +| Initialize the scanner. | +`-------------------------*/ + +/* Translate the dollars and ats in \a a, whose location is l. + Depending on the \a sc_context (SC_RULE_ACTION, SC_SYMBOL_ACTION, + INITIAL), the processing is different. */ + +static const char * +translate_action (int sc_context, const char *a, location l) +{ + const char *res; + static bool initialized = false; + if (!initialized) + { + obstack_init (&obstack_for_string); + /* The initial buffer, never used. */ + yy_delete_buffer (YY_CURRENT_BUFFER); + yy_flex_debug = 0; + initialized = true; + } + + loc->start = loc->end = l.start; + yy_switch_to_buffer (yy_scan_string (a)); + res = code_lex (sc_context); + yy_delete_buffer (YY_CURRENT_BUFFER); + + return res; +} + +const char * +translate_rule_action (symbol_list *r, const char *a, location l) +{ + current_rule = r; + return translate_action (SC_RULE_ACTION, a, l); +} + +const char * +translate_symbol_action (const char *a, location l) +{ + return translate_action (SC_SYMBOL_ACTION, a, l); +} + +const char * +translate_code (const char *a, location l) +{ + return translate_action (INITIAL, a, l); +} + +/*-----------------------------------------------. +| Free all the memory allocated to the scanner. | +`-----------------------------------------------*/ + +void +code_scanner_free (void) +{ + obstack_free (&obstack_for_string, 0); + /* Reclaim Flex's buffers. */ + yy_delete_buffer (YY_CURRENT_BUFFER); +} diff --git a/src/scan-gram.h b/src/scan-gram.h new file mode 100644 index 00000000..4e00dfa8 --- /dev/null +++ b/src/scan-gram.h @@ -0,0 +1,44 @@ +/* Bison Grammar Scanner + + Copyright (C) 2006 Free Software Foundation, Inc. + + This file is part of Bison, the GNU Compiler Compiler. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301 USA +*/ + +#ifndef SCAN_GRAM_H_ +# define SCAN_GRAM_H_ + +/* From the scanner. */ +extern FILE *gram_in; +extern int gram__flex_debug; +extern boundary gram_scanner_cursor; +extern char *gram_last_string; +extern location gram_last_braced_code_loc; +void gram_scanner_initialize (void); +void gram_scanner_free (void); +void gram_scanner_last_string_free (void); + +/* These are declared by the scanner, but not used. We put them here + to pacify "make syntax-check". */ +extern FILE *gram_out; +extern int gram_lineno; + +# define GRAM_LEX_DECL int gram_lex (YYSTYPE *val, location *loc) +GRAM_LEX_DECL; + +#endif /* !SCAN_GRAM_H_ */ diff --git a/src/scan-gram.l b/src/scan-gram.l index cf704c77..329e5087 100644 --- a/src/scan-gram.l +++ b/src/scan-gram.l @@ -29,112 +29,48 @@ #undef gram_wrap #define gram_wrap() 1 -#include "system.h" - -#include -#include +#define FLEX_PREFIX(Id) gram_ ## Id +#include "flex-scanner.h" #include "complain.h" #include "files.h" -#include "getargs.h" +#include "getargs.h" /* yacc_flag */ #include "gram.h" #include "quotearg.h" #include "reader.h" #include "uniqstr.h" +#include +#include + +#include "scan-gram.h" + +#define YY_DECL GRAM_LEX_DECL + #define YY_USER_INIT \ - do \ - { \ - scanner_cursor.file = current_file; \ - scanner_cursor.line = 1; \ - scanner_cursor.column = 1; \ - code_start = scanner_cursor; \ - } \ - while (0) - -/* Pacify "gcc -Wmissing-prototypes" when flex 2.5.31 is used. */ -int gram_get_lineno (void); -FILE *gram_get_in (void); -FILE *gram_get_out (void); -int gram_get_leng (void); -char *gram_get_text (void); -void gram_set_lineno (int); -void gram_set_in (FILE *); -void gram_set_out (FILE *); -int gram_get_debug (void); -void gram_set_debug (int); -int gram_lex_destroy (void); + code_start = scanner_cursor = loc->start; \ /* Location of scanner cursor. */ boundary scanner_cursor; -static void adjust_location (location *, char const *, size_t); -#define YY_USER_ACTION adjust_location (loc, yytext, yyleng); +#define YY_USER_ACTION location_compute (loc, &scanner_cursor, yytext, yyleng); static size_t no_cr_read (FILE *, char *, size_t); #define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size)) - -/* OBSTACK_FOR_STRING -- Used to store all the characters that we need to - keep (to construct ID, STRINGS etc.). Use the following macros to - use it. - - Use STRING_GROW to append what has just been matched, and - STRING_FINISH to end the string (it puts the ending 0). - STRING_FINISH also stores this string in LAST_STRING, which can be - used, and which is used by STRING_FREE to free the last string. */ - -static struct obstack obstack_for_string; - /* A string representing the most recently saved token. */ char *last_string; -/* The location of the most recently saved token, if it was a - BRACED_CODE token; otherwise, this has an unspecified value. */ -location last_braced_code_loc; - -#define STRING_GROW \ - obstack_grow (&obstack_for_string, yytext, yyleng) - -#define STRING_FINISH \ - do { \ - obstack_1grow (&obstack_for_string, '\0'); \ - last_string = obstack_finish (&obstack_for_string); \ - } while (0) - -#define STRING_FREE \ - obstack_free (&obstack_for_string, last_string) - void -scanner_last_string_free (void) +gram_scanner_last_string_free (void) { STRING_FREE; } -/* Within well-formed rules, RULE_LENGTH is the number of values in - the current rule so far, which says where to find `$0' with respect - to the top of the stack. It is not the same as the rule->length in - the case of mid rule actions. - - Outside of well-formed rules, RULE_LENGTH has an undefined value. */ -static int rule_length; - -static void rule_length_overflow (location) __attribute__ ((__noreturn__)); - -/* Increment the rule length by one, checking for overflow. */ -static inline void -increment_rule_length (location loc) -{ - rule_length++; - - /* Don't allow rule_length == INT_MAX, since that might cause - confusion with strtol if INT_MAX == LONG_MAX. */ - if (rule_length == INT_MAX) - rule_length_overflow (loc); -} +/* The location of the most recently saved token, if it was a + BRACED_CODE token; otherwise, this has an unspecified value. */ +location gram_last_braced_code_loc; -static void handle_dollar (int token_type, char *cp, location loc); -static void handle_at (int token_type, char *cp, location loc); static void handle_syncline (char *, location); static unsigned long int scan_integer (char const *p, int base, location loc); static int convert_ucn_to_byte (char const *hex_text); @@ -142,11 +78,26 @@ static void unexpected_eof (boundary, char const *); static void unexpected_newline (boundary, char const *); %} -%x SC_COMMENT SC_LINE_COMMENT SC_YACC_COMMENT -%x SC_STRING SC_CHARACTER -%x SC_AFTER_IDENTIFIER + /* A C-like comment in directives/rules. */ +%x SC_YACC_COMMENT + /* Strings and characters in directives/rules. */ %x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER -%x SC_PRE_CODE SC_BRACED_CODE SC_PROLOGUE SC_EPILOGUE + /* A identifier was just read in directives/rules. Special state + to capture the sequence `identifier :'. */ +%x SC_AFTER_IDENTIFIER + /* A keyword that should be followed by some code was read (e.g. + %printer). */ +%x SC_PRE_CODE + + /* Three types of user code: + - prologue (code between `%{' `%}' in the first section, before %%); + - actions, printers, union, etc, (between braced in the middle section); + - epilogue (everything after the second %%). */ +%x SC_PROLOGUE SC_BRACED_CODE SC_EPILOGUE + /* C and C++ comments in code. */ +%x SC_COMMENT SC_LINE_COMMENT + /* Strings and characters in code. */ +%x SC_STRING SC_CHARACTER letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_] id {letter}({letter}|[0-9])* @@ -221,17 +172,17 @@ splice (\\[ \f\t\v]*\n)* "%default"[-_]"prec" return PERCENT_DEFAULT_PREC; "%define" return PERCENT_DEFINE; "%defines" return PERCENT_DEFINES; - "%destructor" token_type = PERCENT_DESTRUCTOR; BEGIN SC_PRE_CODE; + "%destructor" /* FIXME: Remove once %union handled differently. */ token_type = BRACED_CODE; return PERCENT_DESTRUCTOR; "%dprec" return PERCENT_DPREC; "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE; "%expect" return PERCENT_EXPECT; "%expect"[-_]"rr" return PERCENT_EXPECT_RR; "%file-prefix" return PERCENT_FILE_PREFIX; "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC; - "%initial-action" token_type = PERCENT_INITIAL_ACTION; BEGIN SC_PRE_CODE; + "%initial-action" /* FIXME: Remove once %union handled differently. */ token_type = BRACED_CODE; return PERCENT_INITIAL_ACTION; "%glr-parser" return PERCENT_GLR_PARSER; "%left" return PERCENT_LEFT; - "%lex-param" token_type = PERCENT_LEX_PARAM; BEGIN SC_PRE_CODE; + "%lex-param" /* FIXME: Remove once %union handled differently. */ token_type = BRACED_CODE; return PERCENT_LEX_PARAM; "%locations" return PERCENT_LOCATIONS; "%merge" return PERCENT_MERGE; "%name"[-_]"prefix" return PERCENT_NAME_PREFIX; @@ -241,9 +192,9 @@ splice (\\[ \f\t\v]*\n)* "%nondeterministic-parser" return PERCENT_NONDETERMINISTIC_PARSER; "%nterm" return PERCENT_NTERM; "%output" return PERCENT_OUTPUT; - "%parse-param" token_type = PERCENT_PARSE_PARAM; BEGIN SC_PRE_CODE; - "%prec" rule_length--; return PERCENT_PREC; - "%printer" token_type = PERCENT_PRINTER; BEGIN SC_PRE_CODE; + "%parse-param" /* FIXME: Remove once %union handled differently. */ token_type = BRACED_CODE; return PERCENT_PARSE_PARAM; + "%prec" return PERCENT_PREC; + "%printer" /* FIXME: Remove once %union handled differently. */ token_type = BRACED_CODE; return PERCENT_PRINTER; "%pure"[-_]"parser" return PERCENT_PURE_PARSER; "%require" return PERCENT_REQUIRE; "%right" return PERCENT_RIGHT; @@ -262,13 +213,12 @@ splice (\\[ \f\t\v]*\n)* } "=" return EQUAL; - "|" rule_length = 0; return PIPE; + "|" return PIPE; ";" return SEMICOLON; {id} { val->symbol = symbol_get (yytext, *loc); id_loc = *loc; - increment_rule_length (*loc); BEGIN SC_AFTER_IDENTIFIER; } @@ -335,7 +285,6 @@ splice (\\[ \f\t\v]*\n)* { ":" { - rule_length = 0; *loc = id_loc; BEGIN INITIAL; return ID_COLON; @@ -401,7 +350,6 @@ splice (\\[ \f\t\v]*\n)* STRING_FINISH; loc->start = token_start; val->chars = last_string; - increment_rule_length (*loc); BEGIN INITIAL; return STRING; } @@ -428,7 +376,6 @@ splice (\\[ \f\t\v]*\n)* last_string_1 = last_string[1]; symbol_user_token_number_set (val->symbol, last_string_1, *loc); STRING_FREE; - increment_rule_length (*loc); BEGIN INITIAL; return ID; } @@ -501,7 +448,7 @@ splice (\\[ \f\t\v]*\n)* { - {splice}|\\{splice}[^\n$@\[\]] STRING_GROW; + {splice}|\\{splice}[^\n\[\]] STRING_GROW; } @@ -622,8 +569,7 @@ splice (\\[ \f\t\v]*\n)* STRING_FINISH; loc->start = code_start; val->chars = last_string; - increment_rule_length (*loc); - last_braced_code_loc = *loc; + gram_last_braced_code_loc = *loc; BEGIN INITIAL; return token_type; } @@ -633,18 +579,6 @@ splice (\\[ \f\t\v]*\n)* (as `<' `<%'). */ "<"{splice}"<" STRING_GROW; - "$"("<"{tag}">")?(-?[0-9]+|"$") handle_dollar (token_type, yytext, *loc); - "@"(-?[0-9]+|"$") handle_at (token_type, yytext, *loc); - - "$" { - warn_at (*loc, _("stray `$'")); - obstack_sgrow (&obstack_for_string, "$]["); - } - "@" { - warn_at (*loc, _("stray `@'")); - obstack_sgrow (&obstack_for_string, "@@"); - } - <> unexpected_eof (code_start, "}"); BEGIN INITIAL; } @@ -684,19 +618,6 @@ splice (\\[ \f\t\v]*\n)* } - /*-----------------------------------------. - | Escape M4 quoting characters in C code. | - `-----------------------------------------*/ - - -{ - \$ obstack_sgrow (&obstack_for_string, "$]["); - \@ obstack_sgrow (&obstack_for_string, "@@"); - \[ obstack_sgrow (&obstack_for_string, "@{"); - \] obstack_sgrow (&obstack_for_string, "@}"); -} - - /*-----------------------------------------------------. | By default, grow the string obstack with the input. | `-----------------------------------------------------*/ @@ -706,79 +627,6 @@ splice (\\[ \f\t\v]*\n)* %% -/* Keeps track of the maximum number of semantic values to the left of - a handle (those referenced by $0, $-1, etc.) are required by the - semantic actions of this grammar. */ -int max_left_semantic_context = 0; - -/* If BUF is null, add BUFSIZE (which in this case must be less than - INT_MAX) to COLUMN; otherwise, add mbsnwidth (BUF, BUFSIZE, 0) to - COLUMN. If an overflow occurs, or might occur but is undetectable, - return INT_MAX. Assume COLUMN is nonnegative. */ - -static inline int -add_column_width (int column, char const *buf, size_t bufsize) -{ - size_t width; - unsigned int remaining_columns = INT_MAX - column; - - if (buf) - { - if (INT_MAX / 2 <= bufsize) - return INT_MAX; - width = mbsnwidth (buf, bufsize, 0); - } - else - width = bufsize; - - return width <= remaining_columns ? column + width : INT_MAX; -} - -/* Set *LOC and adjust scanner cursor to account for token TOKEN of - size SIZE. */ - -static void -adjust_location (location *loc, char const *token, size_t size) -{ - int line = scanner_cursor.line; - int column = scanner_cursor.column; - char const *p0 = token; - char const *p = token; - char const *lim = token + size; - - loc->start = scanner_cursor; - - for (p = token; p < lim; p++) - switch (*p) - { - case '\n': - line += line < INT_MAX; - column = 1; - p0 = p + 1; - break; - - case '\t': - column = add_column_width (column, p0, p - p0); - column = add_column_width (column, NULL, 8 - ((column - 1) & 7)); - p0 = p + 1; - break; - - default: - break; - } - - scanner_cursor.line = line; - scanner_cursor.column = column = add_column_width (column, p0, p - p0); - - loc->end = scanner_cursor; - - if (line == INT_MAX && loc->start.line != INT_MAX) - warn_at (*loc, _("line number overflow")); - if (column == INT_MAX && loc->start.column != INT_MAX) - warn_at (*loc, _("column number overflow")); -} - - /* Read bytes from FP into buffer BUF of size SIZE. Return the number of bytes read. Remove '\r' from input, treating \r\n and isolated \r as \n. */ @@ -826,173 +674,6 @@ no_cr_read (FILE *fp, char *buf, size_t size) } -/*------------------------------------------------------------------. -| TEXT is pointing to a wannabee semantic value (i.e., a `$'). | -| | -| Possible inputs: $[]($|integer) | -| | -| Output to OBSTACK_FOR_STRING a reference to this semantic value. | -`------------------------------------------------------------------*/ - -static inline bool -handle_action_dollar (char *text, location loc) -{ - const char *type_name = NULL; - char *cp = text + 1; - - if (! current_rule) - return false; - - /* Get the type name if explicit. */ - if (*cp == '<') - { - type_name = ++cp; - while (*cp != '>') - ++cp; - *cp = '\0'; - ++cp; - } - - if (*cp == '$') - { - if (!type_name) - type_name = symbol_list_n_type_name_get (current_rule, loc, 0); - if (!type_name && typed) - complain_at (loc, _("$$ of `%s' has no declared type"), - current_rule->sym->tag); - if (!type_name) - type_name = ""; - obstack_fgrow1 (&obstack_for_string, - "]b4_lhs_value([%s])[", type_name); - current_rule->used = true; - } - else - { - long int num = strtol (cp, NULL, 10); - - if (1 - INT_MAX + rule_length <= num && num <= rule_length) - { - int n = num; - if (max_left_semantic_context < 1 - n) - max_left_semantic_context = 1 - n; - if (!type_name && 0 < n) - type_name = symbol_list_n_type_name_get (current_rule, loc, n); - if (!type_name && typed) - complain_at (loc, _("$%d of `%s' has no declared type"), - n, current_rule->sym->tag); - if (!type_name) - type_name = ""; - obstack_fgrow3 (&obstack_for_string, - "]b4_rhs_value(%d, %d, [%s])[", - rule_length, n, type_name); - symbol_list_n_used_set (current_rule, n, true); - } - else - complain_at (loc, _("integer out of range: %s"), quote (text)); - } - - return true; -} - - -/*----------------------------------------------------------------. -| Map `$?' onto the proper M4 symbol, depending on its TOKEN_TYPE | -| (are we in an action?). | -`----------------------------------------------------------------*/ - -static void -handle_dollar (int token_type, char *text, location loc) -{ - switch (token_type) - { - case BRACED_CODE: - if (handle_action_dollar (text, loc)) - return; - break; - - case PERCENT_DESTRUCTOR: - case PERCENT_INITIAL_ACTION: - case PERCENT_PRINTER: - if (text[1] == '$') - { - obstack_sgrow (&obstack_for_string, "]b4_dollar_dollar["); - return; - } - break; - - default: - break; - } - - complain_at (loc, _("invalid value: %s"), quote (text)); -} - - -/*------------------------------------------------------. -| TEXT is a location token (i.e., a `@...'). Output to | -| OBSTACK_FOR_STRING a reference to this location. | -`------------------------------------------------------*/ - -static inline bool -handle_action_at (char *text, location loc) -{ - char *cp = text + 1; - locations_flag = true; - - if (! current_rule) - return false; - - if (*cp == '$') - obstack_sgrow (&obstack_for_string, "]b4_lhs_location["); - else - { - long int num = strtol (cp, NULL, 10); - - if (1 - INT_MAX + rule_length <= num && num <= rule_length) - { - int n = num; - obstack_fgrow2 (&obstack_for_string, "]b4_rhs_location(%d, %d)[", - rule_length, n); - } - else - complain_at (loc, _("integer out of range: %s"), quote (text)); - } - - return true; -} - - -/*----------------------------------------------------------------. -| Map `@?' onto the proper M4 symbol, depending on its TOKEN_TYPE | -| (are we in an action?). | -`----------------------------------------------------------------*/ - -static void -handle_at (int token_type, char *text, location loc) -{ - switch (token_type) - { - case BRACED_CODE: - handle_action_at (text, loc); - return; - - case PERCENT_INITIAL_ACTION: - case PERCENT_DESTRUCTOR: - case PERCENT_PRINTER: - if (text[1] == '$') - { - obstack_sgrow (&obstack_for_string, "]b4_at_dollar["); - return; - } - break; - - default: - break; - } - - complain_at (loc, _("invalid value: %s"), quote (text)); -} - /*------------------------------------------------------. | Scan NUMBER for a base-BASE integer at location LOC. | @@ -1087,20 +768,8 @@ handle_syncline (char *args, location loc) warn_at (loc, _("line number overflow")); lineno = INT_MAX; } - scanner_cursor.file = current_file = uniqstr_new (file); - scanner_cursor.line = lineno; - scanner_cursor.column = 1; -} - - -/*---------------------------------. -| Report a rule that is too long. | -`---------------------------------*/ - -static void -rule_length_overflow (location loc) -{ - fatal_at (loc, _("rule is too long")); + current_file = uniqstr_new (file); + boundary_set, (&scanner_cursor, current_file, lineno, 1); } @@ -1148,7 +817,7 @@ unexpected_newline (boundary start, char const *token_end) `-------------------------*/ void -scanner_initialize (void) +gram_scanner_initialize (void) { obstack_init (&obstack_for_string); } @@ -1159,7 +828,7 @@ scanner_initialize (void) `-----------------------------------------------*/ void -scanner_free (void) +gram_scanner_free (void) { obstack_free (&obstack_for_string, 0); /* Reclaim Flex's buffers. */ diff --git a/src/system.h b/src/system.h index 63157904..2468deed 100644 --- a/src/system.h +++ b/src/system.h @@ -113,6 +113,8 @@ char *base_name (char const *name); # define ATTRIBUTE_UNUSED __attribute__ ((__unused__)) #endif +#define FUNCTION_PRINT() fprintf (stderr, "%s: ", __func__) + /*------. | NLS. | `------*/ diff --git a/tests/input.at b/tests/input.at index 82fbe063..afacd201 100644 --- a/tests/input.at +++ b/tests/input.at @@ -25,33 +25,17 @@ AT_BANNER([[Input Processing.]]) ## Invalid $n. ## ## ------------ ## -AT_SETUP([Invalid dollar-n]) +AT_SETUP([Invalid \$n and @n]) AT_DATA([input.y], [[%% exp: { $$ = $1 ; }; -]]) - -AT_CHECK([bison input.y], [1], [], -[[input.y:2.13-14: integer out of range: `$1' -]]) - -AT_CLEANUP - - -## ------------ ## -## Invalid @n. ## -## ------------ ## - -AT_SETUP([Invalid @n]) - -AT_DATA([input.y], -[[%% exp: { @$ = @1 ; }; ]]) AT_CHECK([bison input.y], [1], [], -[[input.y:2.13-14: integer out of range: `@1' +[[input.y:2.13-14: integer out of range: `$1' +input.y:3.13-14: integer out of range: `@1' ]]) AT_CLEANUP @@ -200,11 +184,11 @@ AT_SETUP([Torturing the Scanner]) AT_DATA([input.y], []) AT_CHECK([bison input.y], [1], [], -[[input.y:1.1: syntax error, unexpected end of file +[[input.y:1.0: syntax error, unexpected end of file ]]) -AT_DATA([input.y], +AT_DATA([input.y], [{} ]) AT_CHECK([bison input.y], [1], [], diff --git a/tests/regression.at b/tests/regression.at index 6a37f759..04beccc3 100644 --- a/tests/regression.at +++ b/tests/regression.at @@ -346,9 +346,7 @@ AT_DATA([input.y], ]]) AT_CHECK([bison input.y], [1], [], -[[input.y:3.1: missing `{' in "%destructor {...}" -input.y:4.1: missing `{' in "%initial-action {...}" -input.y:4.1: syntax error, unexpected %initial-action {...}, expecting string or identifier +[[input.y:3.1-15: syntax error, unexpected %initial-action, expecting {...} ]]) AT_CLEANUP -- 2.45.2