X-Git-Url: https://git.saurik.com/bison.git/blobdiff_plain/7067cb3620a61461b33d4192ce4ed81c3b5c27ea..ccdc1577ef7fb32a5e30cb655337e7cd42b98987:/src/output.c?ds=inline diff --git a/src/output.c b/src/output.c index a8ddf822..d6560970 100644 --- a/src/output.c +++ b/src/output.c @@ -1,170 +1,48 @@ -/* Output the generated parsing program for bison, - Copyright (C) 1984, 1986, 1989, 1992, 2000, 2001, 2002 - Free Software Foundation, Inc. +/* Output the generated parsing program for Bison. + + Copyright (C) 1984, 1986, 1989, 1992, 2000-2012 Free Software + Foundation, Inc. This file is part of Bison, the GNU Compiler Compiler. - Bison is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. - Bison is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with Bison; see the file COPYING. If not, write to the Free - Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA - 02111-1307, USA. */ - - -/* The parser tables consist of these tables. Marked ones needed only - for the semantic parser. Double marked are output only if switches - are set. - - YYTRANSLATE = vector mapping yylex's token numbers into bison's - token numbers. - - ++ YYTNAME = vector of string-names indexed by bison token number. - - ++ YYTOKNUM = vector of yylex token numbers corresponding to - entries in YYTNAME. - - YYRLINE = vector of line-numbers of all rules. For yydebug - printouts. - - YYRHS = vector of items of all rules. This is exactly what RITEMS - contains. For yydebug and for semantic parser. - - YYPRHS[R] = index in YYRHS of first item for rule R. - - YYR1[R] = symbol number of symbol that rule R derives. - - YYR2[R] = number of symbols composing right hand side of rule R. - - + YYSTOS[S] = the symbol number of the symbol that leads to state - S. - - YYDEFACT[S] = default rule to reduce with in state s, when YYTABLE - doesn't specify something else to do. Zero means the default is an - error. - - YYDEFGOTO[I] = default state to go to after a reduction of a rule - that generates variable NTOKENS + I, except when YYTABLE specifies - something else to do. - - YYPACT[S] = index in YYTABLE of the portion describing state S. - The lookahead token's type is used to index that portion to find - out what to do. + along with this program. If not, see . */ - If the value in YYTABLE is positive, we shift the token and go to - that state. - - If the value is negative, it is minus a rule number to reduce by. - - If the value is zero, the default action from YYDEFACT[S] is used. - - YYPGOTO[I] = the index in YYTABLE of the portion describing what to - do after reducing a rule that derives variable I + NTOKENS. This - portion is indexed by the parser state number, S, as of before the - text for this nonterminal was read. The value from YYTABLE is the - state to go to if the corresponding value in YYCHECK is S. - - YYTABLE = a vector filled with portions for different uses, found - via YYPACT and YYPGOTO. - - YYCHECK = a vector indexed in parallel with YYTABLE. It indicates, - in a roundabout way, the bounds of the portion you are trying to - examine. - - Suppose that the portion of yytable starts at index P and the index - to be examined within the portion is I. Then if YYCHECK[P+I] != I, - I is outside the bounds of what is actually allocated, and the - default (from YYDEFACT or YYDEFGOTO) should be used. Otherwise, - YYTABLE[P+I] should be used. +#include +#include "system.h" - YYFINAL = the state number of the termination state. YYFLAG = most - negative short int. Used to flag ?? */ +#include +#include +#include +#include +#include +#include +#include +#include -#include "system.h" -#include "bitsetv.h" -#include "quotearg.h" -#include "error.h" -#include "getargs.h" +#include "complain.h" #include "files.h" +#include "getargs.h" #include "gram.h" -#include "LR0.h" -#include "complain.h" +#include "muscle-tab.h" #include "output.h" -#include "lalr.h" #include "reader.h" +#include "scan-code.h" /* max_left_semantic_context */ +#include "scan-skel.h" #include "symtab.h" -#include "conflicts.h" -#include "muscle_tab.h" - -/* From lib/readpipe.h. */ -FILE *readpipe PARAMS ((const char *, ...)); - -/* From src/scan-skel.l. */ -int skel_lex PARAMS ((void)); -extern FILE *skel_in; - -static int nvectors; -static int nentries; -static short **froms = NULL; -static short **tos = NULL; -static short *tally = NULL; -static short *width = NULL; -static short *actrow = NULL; -static short *state_count = NULL; -static short *order = NULL; -static short *base = NULL; -static short *pos = NULL; - -/* TABLE_SIZE is the allocated size of both TABLE and CHECK. - We start with the original hard-coded value: SHRT_MAX - (yes, not USHRT_MAX). */ -static size_t table_size = SHRT_MAX; -static short *table = NULL; -static short *check = NULL; -static int lowzero; -static int high; - -struct obstack muscle_obstack; -static struct obstack format_obstack; - -int error_verbose = 0; +#include "tables.h" - -/*----------------------------------------------------------------. -| If TABLE (and CHECK) appear to be small to be addressed at | -| DESIRED, grow them. Note that TABLE[DESIRED] is to be used, so | -| the desired size is at least DESIRED + 1. | -`----------------------------------------------------------------*/ - -static void -table_grow (size_t desired) -{ - size_t old_size = table_size; - - while (table_size <= desired) - table_size *= 2; - - if (trace_flag) - fprintf (stderr, "growing table and check from: %d to %d\n", - old_size, table_size); - - table = XREALLOC (table, short, table_size); - check = XREALLOC (check, short, table_size); - - for (/* Nothing. */; old_size < table_size; ++old_size) - { - table[old_size] = 0; - check[old_size] = -1; - } -} +static struct obstack format_obstack; /*-------------------------------------------------------------------. @@ -175,154 +53,201 @@ table_grow (size_t desired) `-------------------------------------------------------------------*/ -#define GENERATE_MUSCLE_INSERT_TABLE(Name, Type) \ - \ -static void \ -Name (const char *name, \ - Type *table_data, \ - Type first, \ - int begin, \ - int end) \ -{ \ - Type max = first; \ - int i; \ - int j = 1; \ - \ - obstack_fgrow1 (&format_obstack, "%6d", first); \ - for (i = begin; i < end; ++i) \ - { \ - obstack_1grow (&format_obstack, ','); \ - if (j >= 10) \ - { \ - obstack_sgrow (&format_obstack, "\n "); \ - j = 1; \ - } \ - else \ - ++j; \ - obstack_fgrow1 (&format_obstack, "%6d", table_data[i]); \ - if (table_data[i] > max) \ - max = table_data[i]; \ - } \ - obstack_1grow (&format_obstack, 0); \ - muscle_insert (name, obstack_finish (&format_obstack)); \ - \ - /* Build `NAME_max' in the obstack. */ \ - obstack_fgrow1 (&format_obstack, "%s_max", name); \ - obstack_1grow (&format_obstack, 0); \ - MUSCLE_INSERT_LONG_INT (obstack_finish (&format_obstack), \ - (long int) max); \ +#define GENERATE_MUSCLE_INSERT_TABLE(Name, Type) \ + \ +static void \ +Name (char const *name, \ + Type *table_data, \ + Type first, \ + int begin, \ + int end) \ +{ \ + Type min = first; \ + Type max = first; \ + long int lmin; \ + long int lmax; \ + int i; \ + int j = 1; \ + \ + obstack_fgrow1 (&format_obstack, "%6d", first); \ + for (i = begin; i < end; ++i) \ + { \ + obstack_1grow (&format_obstack, ','); \ + if (j >= 10) \ + { \ + obstack_sgrow (&format_obstack, "\n "); \ + j = 1; \ + } \ + else \ + ++j; \ + obstack_fgrow1 (&format_obstack, "%6d", table_data[i]); \ + if (table_data[i] < min) \ + min = table_data[i]; \ + if (max < table_data[i]) \ + max = table_data[i]; \ + } \ + obstack_1grow (&format_obstack, 0); \ + muscle_insert (name, obstack_finish (&format_obstack)); \ + \ + lmin = min; \ + lmax = max; \ + /* Build `NAME_min' and `NAME_max' in the obstack. */ \ + obstack_fgrow1 (&format_obstack, "%s_min", name); \ + obstack_1grow (&format_obstack, 0); \ + MUSCLE_INSERT_LONG_INT (obstack_finish (&format_obstack), lmin); \ + obstack_fgrow1 (&format_obstack, "%s_max", name); \ + obstack_1grow (&format_obstack, 0); \ + MUSCLE_INSERT_LONG_INT (obstack_finish (&format_obstack), lmax); \ } GENERATE_MUSCLE_INSERT_TABLE(muscle_insert_unsigned_int_table, unsigned int) -GENERATE_MUSCLE_INSERT_TABLE(muscle_insert_short_table, short) -GENERATE_MUSCLE_INSERT_TABLE(muscle_insert_symbol_number_table, symbol_number_t) -GENERATE_MUSCLE_INSERT_TABLE(muscle_insert_item_number_table, item_number_t) +GENERATE_MUSCLE_INSERT_TABLE(muscle_insert_int_table, int) +GENERATE_MUSCLE_INSERT_TABLE(muscle_insert_base_table, base_number) +GENERATE_MUSCLE_INSERT_TABLE(muscle_insert_rule_number_table, rule_number) +GENERATE_MUSCLE_INSERT_TABLE(muscle_insert_symbol_number_table, symbol_number) +GENERATE_MUSCLE_INSERT_TABLE(muscle_insert_state_number_table, state_number) -/*-----------------------------------------------------------------. -| Prepare the muscles related to the tokens: translate, tname, and | -| toknum. | -`-----------------------------------------------------------------*/ +/*--------------------------------------------------------------------. +| Print to OUT a representation of STRING escaped both for C and M4. | +`--------------------------------------------------------------------*/ static void -prepare_tokens (void) +escaped_output (FILE *out, char const *string) { + char const *p; + fprintf (out, "[["); + + for (p = quotearg_style (c_quoting_style, string); *p; p++) + switch (*p) + { + case '$': fputs ("$][", out); break; + case '@': fputs ("@@", out); break; + case '[': fputs ("@{", out); break; + case ']': fputs ("@}", out); break; + default: fputc (*p, out); break; + } + + fprintf (out, "]]"); +} + + +/*------------------------------------------------------------------. +| Prepare the muscles related to the symbols: translate, tname, and | +| toknum. | +`------------------------------------------------------------------*/ + +static void +prepare_symbols (void) +{ + MUSCLE_INSERT_INT ("tokens_number", ntokens); + MUSCLE_INSERT_INT ("nterms_number", nvars); + MUSCLE_INSERT_INT ("symbols_number", nsyms); + MUSCLE_INSERT_INT ("undef_token_number", undeftoken->number); + MUSCLE_INSERT_INT ("user_token_number_max", max_user_token_number); + muscle_insert_symbol_number_table ("translate", - token_translations, - 0, 1, max_user_token_number + 1); + token_translations, + token_translations[0], + 1, max_user_token_number + 1); + /* tname -- token names. */ { int i; - int j = 0; + /* We assume that the table will be output starting at column 2. */ + int j = 2; + struct quoting_options *qo = clone_quoting_options (0); + set_quoting_style (qo, c_quoting_style); + set_quoting_flags (qo, QA_SPLIT_TRIGRAPHS); for (i = 0; i < nsyms; i++) { - /* Be sure not to use twice the same quotearg slot. */ - const char *cp = - quotearg_n_style (1, c_quoting_style, - quotearg_style (escape_quoting_style, - symbols[i]->tag)); - /* Width of the next token, including the two quotes, the coma - and the space. */ - int strsize = strlen (cp) + 2; - - if (j + strsize > 75) - { - obstack_sgrow (&format_obstack, "\n "); - j = 2; - } - - obstack_sgrow (&format_obstack, cp); - obstack_sgrow (&format_obstack, ", "); - j += strsize; + char *cp = quotearg_alloc (symbols[i]->tag, -1, qo); + /* Width of the next token, including the two quotes, the + comma and the space. */ + int width = strlen (cp) + 2; + + if (j + width > 75) + { + obstack_sgrow (&format_obstack, "\n "); + j = 1; + } + + if (i) + obstack_1grow (&format_obstack, ' '); + MUSCLE_OBSTACK_SGROW (&format_obstack, cp); + free (cp); + obstack_1grow (&format_obstack, ','); + j += width; } - /* Add a NULL entry to list of tokens (well, 0, as NULL might not be - defined). */ - obstack_sgrow (&format_obstack, "0"); + free (qo); + obstack_sgrow (&format_obstack, " ]b4_null["); /* Finish table and store. */ obstack_1grow (&format_obstack, 0); muscle_insert ("tname", obstack_finish (&format_obstack)); } - /* Output YYTOKNUM. */ + /* Output YYTOKNUM. */ { int i; - short *values = XCALLOC (short, ntokens + 1); - for (i = 0; i < ntokens + 1; ++i) + int *values = xnmalloc (ntokens, sizeof *values); + for (i = 0; i < ntokens; ++i) values[i] = symbols[i]->user_token_number; - muscle_insert_short_table ("toknum", values, - 0, 1, ntokens + 1); + muscle_insert_int_table ("toknum", values, + values[0], 1, ntokens); free (values); } } -/*-------------------------------------------------------------. -| Prepare the muscles related to the rules: rhs, prhs, r1, r2, | -| rline. | -`-------------------------------------------------------------*/ +/*----------------------------------------------------------------. +| Prepare the muscles related to the rules: r1, r2, rline, dprec, | +| merger, immediate. | +`----------------------------------------------------------------*/ static void prepare_rules (void) { - int r; - unsigned int i = 0; - item_number_t *rhs = XMALLOC (item_number_t, nritems); - unsigned int *prhs = XMALLOC (unsigned int, nrules + 1); - unsigned int *rline = XMALLOC (unsigned int, nrules + 1); - symbol_number_t *r1 = XMALLOC (symbol_number_t, nrules + 1); - unsigned int *r2 = XMALLOC (unsigned int, nrules + 1); - - for (r = 1; r < nrules + 1; ++r) + unsigned int *rline = xnmalloc (nrules, sizeof *rline); + symbol_number *r1 = xnmalloc (nrules, sizeof *r1); + unsigned int *r2 = xnmalloc (nrules, sizeof *r2); + int *dprec = xnmalloc (nrules, sizeof *dprec); + int *merger = xnmalloc (nrules, sizeof *merger); + int *immediate = xnmalloc (nrules, sizeof *immediate); + + rule_number r; + for (r = 0; r < nrules; ++r) { - item_number_t *rhsp; - /* Index of rule R in RHS. */ - prhs[r] = i; - /* RHS of the rule R. */ - for (rhsp = rules[r].rhs; *rhsp >= 0; ++rhsp) - rhs[i++] = *rhsp; /* LHS of the rule R. */ r1[r] = rules[r].lhs->number; /* Length of rule R's RHS. */ - r2[r] = i - prhs[r]; - /* Separator in RHS. */ - rhs[i++] = -1; + r2[r] = rule_rhs_length(&rules[r]); /* Line where rule was defined. */ - rline[r] = rules[r].line; + rline[r] = rules[r].location.start.line; + /* Dynamic precedence (GLR). */ + dprec[r] = rules[r].dprec; + /* Merger-function index (GLR). */ + merger[r] = rules[r].merger; + /* Immediate reduction flags (GLR). */ + immediate[r] = rules[r].is_predicate; } - assert (i == nritems); - muscle_insert_item_number_table ("rhs", rhs, ritem[0], 1, nritems); - muscle_insert_unsigned_int_table ("prhs", prhs, 0, 1, nrules + 1); - muscle_insert_unsigned_int_table ("rline", rline, 0, 1, nrules + 1); - muscle_insert_symbol_number_table ("r1", r1, 0, 1, nrules + 1); - muscle_insert_unsigned_int_table ("r2", r2, 0, 1, nrules + 1); + muscle_insert_unsigned_int_table ("rline", rline, 0, 0, nrules); + muscle_insert_symbol_number_table ("r1", r1, 0, 0, nrules); + muscle_insert_unsigned_int_table ("r2", r2, 0, 0, nrules); + muscle_insert_int_table ("dprec", dprec, 0, 0, nrules); + muscle_insert_int_table ("merger", merger, 0, 0, nrules); + muscle_insert_int_table ("immediate", immediate, 0, 0, nrules); + + MUSCLE_INSERT_INT ("rules_number", nrules); + MUSCLE_INSERT_INT ("max_left_semantic_context", max_left_semantic_context); - free (rhs); - free (prhs); free (rline); free (r1); free (r2); + free (dprec); + free (merger); + free (immediate); } /*--------------------------------------------. @@ -332,619 +257,316 @@ prepare_rules (void) static void prepare_states (void) { - size_t i; - symbol_number_t *values = - (symbol_number_t *) alloca (sizeof (symbol_number_t) * nstates); + state_number i; + symbol_number *values = xnmalloc (nstates, sizeof *values); for (i = 0; i < nstates; ++i) values[i] = states[i]->accessing_symbol; muscle_insert_symbol_number_table ("stos", values, - 0, 1, nstates); + 0, 1, nstates); + free (values); + + MUSCLE_INSERT_INT ("last", high); + MUSCLE_INSERT_INT ("final_state_number", final_state->number); + MUSCLE_INSERT_INT ("states_number", nstates); } -/*------------------------------------------------------------------. -| Decide what to do for each type of token if seen as the lookahead | -| token in specified state. The value returned is used as the | -| default action (yydefact) for the state. In addition, actrow is | -| filled with what to do for each kind of token, index by symbol | -| number, with zero meaning do the default action. The value | -| SHRT_MIN, a very negative number, means this situation is an | -| error. The parser recognizes this value specially. | -| | -| This is where conflicts are resolved. The loop over lookahead | -| rules considered lower-numbered rules last, and the last rule | -| considered that likes a token gets to handle it. | -`------------------------------------------------------------------*/ +/*-------------------------------------------------------. +| Compare two symbols by type-name, and then by number. | +`-------------------------------------------------------*/ static int -action_row (state_t *state) +symbol_type_name_cmp (const symbol **lhs, const symbol **rhs) { - int i; - int default_rule = 0; - reductions *redp = state->reductions; - shifts *shiftp = state->shifts; - errs *errp = state->errs; - /* set nonzero to inhibit having any default reduction */ - int nodefault = 0; - - for (i = 0; i < ntokens; i++) - actrow[i] = 0; - - if (redp->nreds >= 1) - { - int j; - /* loop over all the rules available here which require - lookahead */ - for (i = state->nlookaheads - 1; i >= 0; --i) - /* and find each token which the rule finds acceptable - to come next */ - for (j = 0; j < ntokens; j++) - /* and record this rule as the rule to use if that - token follows. */ - if (bitset_test (LA[state->lookaheadsp + i], j)) - actrow[j] = -LArule[state->lookaheadsp + i]->number; - } - - /* Now see which tokens are allowed for shifts in this state. For - them, record the shift as the thing to do. So shift is preferred - to reduce. */ - for (i = 0; i < shiftp->nshifts; i++) - { - symbol_number_t symbol; - int shift_state = shiftp->shifts[i]; - if (!shift_state) - continue; - - symbol = states[shift_state]->accessing_symbol; - - if (ISVAR (symbol)) - break; - - actrow[symbol] = shift_state; - - /* Do not use any default reduction if there is a shift for - error */ - if (symbol == errtoken->number) - nodefault = 1; - } - - /* See which tokens are an explicit error in this state (due to - %nonassoc). For them, record SHRT_MIN as the action. */ - for (i = 0; i < errp->nerrs; i++) - { - int symbol = errp->errs[i]; - actrow[symbol] = SHRT_MIN; - } - - /* Now find the most common reduction and make it the default action - for this state. */ - - if (redp->nreds >= 1 && !nodefault) - { - if (state->consistent) - default_rule = redp->rules[0]; - else - { - int max = 0; - for (i = 0; i < state->nlookaheads; i++) - { - int count = 0; - int rule = -LArule[state->lookaheadsp + i]->number; - int j; - - for (j = 0; j < ntokens; j++) - if (actrow[j] == rule) - count++; - - if (count > max) - { - max = count; - default_rule = rule; - } - } - - /* actions which match the default are replaced with zero, - which means "use the default" */ - - if (max > 0) - { - int j; - for (j = 0; j < ntokens; j++) - if (actrow[j] == default_rule) - actrow[j] = 0; - - default_rule = -default_rule; - } - } - } - - /* If have no default rule, the default is an error. - So replace any action which says "error" with "use default". */ - - if (default_rule == 0) - for (i = 0; i < ntokens; i++) - if (actrow[i] == SHRT_MIN) - actrow[i] = 0; - - return default_rule; + int res = UNIQSTR_CMP((*lhs)->type_name, (*rhs)->type_name); + if (res) + return res; + return (*lhs)->number - (*rhs)->number; } -static void -save_row (int state) -{ - int i; - int count; - short *sp; - short *sp1; - short *sp2; - - count = 0; - for (i = 0; i < ntokens; i++) - if (actrow[i] != 0) - count++; - - if (count == 0) - return; - - froms[state] = sp1 = sp = XCALLOC (short, count); - tos[state] = sp2 = XCALLOC (short, count); - - for (i = 0; i < ntokens; i++) - if (actrow[i] != 0) - { - *sp1++ = i; - *sp2++ = actrow[i]; - } +/*----------------------------------------------------------------. +| Return a (malloc'ed) table of the symbols sorted by type-name. | +`----------------------------------------------------------------*/ - tally[state] = count; - width[state] = sp1[-1] - sp[0] + 1; +static symbol ** +symbols_by_type_name (void) +{ + typedef int (*qcmp_type) (const void *, const void *); + symbol **res = xmemdup (symbols, nsyms * sizeof *res); + qsort (res, nsyms, sizeof *res, (qcmp_type) &symbol_type_name_cmp); + return res; } /*------------------------------------------------------------------. -| Figure out the actions for the specified state, indexed by | -| lookahead token type. | -| | -| The YYDEFACT table is output now. The detailed info is saved for | -| putting into YYTABLE later. | +| Define b4_type_names, which is a list of (lists of the numbers of | +| symbols with same type-name). | `------------------------------------------------------------------*/ static void -token_actions (void) -{ - size_t i; - short *yydefact = XCALLOC (short, nstates); - - actrow = XCALLOC (short, ntokens); - for (i = 0; i < nstates; ++i) - { - yydefact[i] = action_row (states[i]); - save_row (i); - } - - muscle_insert_short_table ("defact", yydefact, - yydefact[0], 1, nstates); - XFREE (actrow); - XFREE (yydefact); -} - - -/*-----------------------------. -| Output the actions to OOUT. | -`-----------------------------*/ - -void -actions_output (FILE *out) -{ - int rule; - for (rule = 1; rule < nrules + 1; ++rule) - if (rules[rule].action) - { - fprintf (out, " case %d:\n", rule); - - if (!no_lines_flag) - fprintf (out, muscle_find ("linef"), - rules[rule].action_line, - quotearg_style (c_quoting_style, - muscle_find ("filename"))); - /* As a Bison extension, add the ending semicolon. Since some - Yacc don't do that, help people using bison as a Yacc - finding their missing semicolons. */ - fprintf (out, "{ %s%s }\n break;\n\n", - rules[rule].action, - yacc_flag ? ";" : ""); - } -} - - -/*---------------------------------------. -| Output the tokens definition to OOUT. | -`---------------------------------------*/ - -void -token_definitions_output (FILE *out) +type_names_output (FILE *out) { int i; - int first = 1; - for (i = 0; i < ntokens; ++i) + symbol **syms = symbols_by_type_name (); + fputs ("m4_define([b4_type_names],\n[", out); + for (i = 0; i < nsyms; /* nothing */) { - symbol_t *symbol = symbols[i]; - int number = symbol->user_token_number; - - /* At this stage, if there are literal aliases, they are part of - SYMBOLS, so we should not find symbols which are the aliases - here. */ - assert (number != USER_NUMBER_ALIAS); - - /* Skip error token. */ - if (symbol == errtoken) - continue; - - /* If this string has an alias, then it is necessarily the alias - which is to be output. */ - if (symbol->alias) - symbol = symbol->alias; - - /* Don't output literal chars or strings (when defined only as a - string). Note that must be done after the alias resolution: - think about `%token 'f' "f"'. */ - if (symbol->tag[0] == '\'' || symbol->tag[0] == '\"') - continue; - - /* Don't #define nonliteral tokens whose names contain periods - or '$' (as does the default value of the EOF token). */ - if (strchr (symbol->tag, '.') || strchr (symbol->tag, '$')) - continue; - - fprintf (out, "%s[[[%s]], [%d]]", - first ? "" : ",\n", symbol->tag, number); - - first = 0; + // The index of the first symbol of the current type-name. + int i0 = i; + fputs (i ? ",\n[" : "[", out); + for (; i < nsyms && syms[i]->type_name == syms[i0]->type_name; ++i) + fprintf (out, "%s%d", i != i0 ? ", " : "", syms[i]->number); + fputs ("]", out); } + fputs ("])\n\n", out); + free (syms); } +/*-------------------------------------. +| The list of all the symbol numbers. | +`-------------------------------------*/ + static void -save_column (int symbol, int default_state) +symbol_numbers_output (FILE *out) { int i; - short *sp; - short *sp1; - short *sp2; - int count; - int symno = symbol - ntokens + nstates; - - short begin = goto_map[symbol]; - short end = goto_map[symbol + 1]; - - count = 0; - for (i = begin; i < end; i++) - if (to_state[i] != default_state) - count++; - - if (count == 0) - return; - - froms[symno] = sp1 = sp = XCALLOC (short, count); - tos[symno] = sp2 = XCALLOC (short, count); - - for (i = begin; i < end; i++) - if (to_state[i] != default_state) - { - *sp1++ = from_state[i]; - *sp2++ = to_state[i]; - } - - tally[symno] = count; - width[symno] = sp1[-1] - sp[0] + 1; + fputs ("m4_define([b4_symbol_numbers],\n[", out); + for (i = 0; i < nsyms; ++i) + fprintf (out, "%s[%d]", i ? ", " : "", i); + fputs ("])\n\n", out); } -static int -default_goto (int symbol) -{ - size_t i; - size_t m = goto_map[symbol]; - size_t n = goto_map[symbol + 1]; - int default_state = -1; - int max = 0; - - if (m == n) - return -1; - for (i = 0; i < nstates; i++) - state_count[i] = 0; +/*---------------------------------. +| Output the user actions to OUT. | +`---------------------------------*/ - for (i = m; i < n; i++) - state_count[to_state[i]]++; +static void +user_actions_output (FILE *out) +{ + rule_number r; - for (i = 0; i < nstates; i++) - if (state_count[i] > max) + fputs ("m4_define([b4_actions], \n[", out); + for (r = 0; r < nrules; ++r) + if (rules[r].action) { - max = state_count[i]; - default_state = i; + fprintf (out, "b4_%scase(%d, [b4_syncline(%d, ", + rules[r].is_predicate ? "predicate_" : "", + r + 1, rules[r].action_location.start.line); + escaped_output (out, rules[r].action_location.start.file); + fprintf (out, ")\n[ %s]])\n\n", rules[r].action); } - - return default_state; + fputs ("])\n\n", out); } - -/*-------------------------------------------------------------------. -| Figure out what to do after reducing with each rule, depending on | -| the saved state from before the beginning of parsing the data that | -| matched this rule. | -| | -| The YYDEFGOTO table is output now. The detailed info is saved for | -| putting into YYTABLE later. | -`-------------------------------------------------------------------*/ +/*------------------------------------. +| Output the merge functions to OUT. | +`------------------------------------*/ static void -goto_actions (void) +merger_output (FILE *out) { - int i; - short *yydefgoto = XMALLOC (short, nsyms - ntokens); + int n; + merger_list* p; - state_count = XCALLOC (short, nstates); - for (i = ntokens; i < nsyms; ++i) + fputs ("m4_define([b4_mergers], \n[[", out); + for (n = 1, p = merge_functions; p != NULL; n += 1, p = p->next) { - int default_state = default_goto (i); - save_column (i, default_state); - yydefgoto[i - ntokens] = default_state; + if (p->type[0] == '\0') + fprintf (out, " case %d: *yy0 = %s (*yy0, *yy1); break;\n", + n, p->name); + else + fprintf (out, " case %d: yy0->%s = %s (*yy0, *yy1); break;\n", + n, p->type, p->name); } - - muscle_insert_short_table ("defgoto", yydefgoto, - yydefgoto[0], 1, nsyms - ntokens); - XFREE (state_count); - XFREE (yydefgoto); + fputs ("]])\n\n", out); } -/* The next few functions decide how to pack the actions and gotos - information into yytable. */ +/*---------------------------------------------. +| Prepare the muscles for symbol definitions. | +`---------------------------------------------*/ static void -sort_actions (void) +prepare_symbol_definitions (void) { int i; + for (i = 0; i < nsyms; ++i) + { + symbol *sym = symbols[i]; + const char *key; + const char *value; - order = XCALLOC (short, nvectors); - nentries = 0; - - for (i = 0; i < nvectors; i++) - if (tally[i] > 0) - { - int k; - int t = tally[i]; - int w = width[i]; - int j = nentries - 1; - - while (j >= 0 && (width[order[j]] < w)) - j--; - - while (j >= 0 && (width[order[j]] == w) && (tally[order[j]] < t)) - j--; +#define SET_KEY(Entry) \ + obstack_fgrow2 (&format_obstack, "symbol(%d, %s)", \ + i, Entry); \ + obstack_1grow (&format_obstack, 0); \ + key = obstack_finish (&format_obstack); - for (k = nentries - 1; k > j; k--) - order[k + 1] = order[k]; +#define SET_KEY2(Entry, Suffix) \ + obstack_fgrow3 (&format_obstack, "symbol(%d, %s_%s)", \ + i, Entry, Suffix); \ + obstack_1grow (&format_obstack, 0); \ + key = obstack_finish (&format_obstack); - order[j + 1] = i; - nentries++; - } -} + // Whether the symbol has an identifier. + value = symbol_id_get (sym); + SET_KEY("has_id"); + MUSCLE_INSERT_INT (key, !!value); + // Its identifier. + SET_KEY("id"); + MUSCLE_INSERT_STRING (key, value ? value : ""); -static int -matching_state (int vector) -{ - int i = order[vector]; - int t; - int w; - int prev; + // Its tag. Typically for documentation purpose. + SET_KEY("tag"); + MUSCLE_INSERT_STRING (key, sym->tag); - if (i >= (int) nstates) - return -1; + SET_KEY("user_number"); + MUSCLE_INSERT_INT (key, sym->user_token_number); - t = tally[i]; - w = width[i]; + SET_KEY("is_token"); + MUSCLE_INSERT_INT (key, + i < ntokens && sym != errtoken && sym != undeftoken); - for (prev = vector - 1; prev >= 0; prev--) - { - int j = order[prev]; - int k; - int match = 1; + SET_KEY("number"); + MUSCLE_INSERT_INT (key, sym->number); - if (width[j] != w || tally[j] != t) - return -1; + SET_KEY("has_type"); + MUSCLE_INSERT_INT (key, !!sym->type_name); - for (k = 0; match && k < t; k++) - if (tos[j][k] != tos[i][k] || froms[j][k] != froms[i][k]) - match = 0; + SET_KEY("type"); + MUSCLE_INSERT_STRING (key, sym->type_name ? sym->type_name : ""); - if (match) - return j; + { + int j; + for (j = 0; j < CODE_PROPS_SIZE; ++j) + { + /* "printer", not "%printer". */ + char const *pname = code_props_type_string (j) + 1; + code_props const *p = symbol_code_props_get (sym, j); + SET_KEY2("has", pname); + MUSCLE_INSERT_INT (key, !!p->code); + + if (p->code) + { + SET_KEY2(pname, "file"); + MUSCLE_INSERT_STRING (key, p->location.start.file); + + SET_KEY2(pname, "line"); + MUSCLE_INSERT_INT (key, p->location.start.line); + + SET_KEY(pname); + MUSCLE_INSERT_STRING_RAW (key, p->code); + } + } + } +#undef SET_KEY2 +#undef SET_KEY } - - return -1; } -static int -pack_vector (int vector) -{ - int i = order[vector]; - int j; - int t = tally[i]; - int loc = 0; - short *from = froms[i]; - short *to = tos[i]; - - assert (t); - - for (j = lowzero - from[0]; j < (int) table_size; j++) - { - int k; - int ok = 1; - - for (k = 0; ok && k < t; k++) - { - loc = j + from[k]; - if (loc > (int) table_size) - table_grow (loc); - - if (table[loc] != 0) - ok = 0; - } - - for (k = 0; ok && k < vector; k++) - if (pos[k] == j) - ok = 0; - - if (ok) - { - for (k = 0; k < t; k++) - { - loc = j + from[k]; - table[loc] = to[k]; - check[loc] = from[k]; - } - - while (table[lowzero] != 0) - lowzero++; - - if (loc > high) - high = loc; - - return j; - } - } -#define pack_vector_succeeded 0 - assert (pack_vector_succeeded); - return 0; -} - +/*--------------------------------------. +| Output the tokens definition to OUT. | +`--------------------------------------*/ static void -pack_table (void) +token_definitions_output (FILE *out) { int i; - int place; - int state; - - base = XCALLOC (short, nvectors); - pos = XCALLOC (short, nentries); - table = XCALLOC (short, table_size); - check = XCALLOC (short, table_size); - - lowzero = 0; - high = 0; - - for (i = 0; i < nvectors; i++) - base[i] = SHRT_MIN; + char const *sep = ""; - for (i = 0; i < (int) table_size; i++) - check[i] = -1; - - for (i = 0; i < nentries; i++) - { - state = matching_state (i); - - if (state < 0) - place = pack_vector (i); - else - place = base[state]; - - pos[i] = place; - base[order[i]] = place; - } - - for (i = 0; i < nvectors; i++) + fputs ("m4_define([b4_tokens], \n[", out); + for (i = 0; i < ntokens; ++i) { - XFREE (froms[i]); - XFREE (tos[i]); + symbol *sym = symbols[i]; + int number = sym->user_token_number; + uniqstr id = symbol_id_get (sym); + + /* At this stage, if there are literal string aliases, they are + part of SYMBOLS, so we should not find their aliased symbols + here. */ + aver (number != USER_NUMBER_HAS_STRING_ALIAS); + + /* Skip error token and tokens without identifier. */ + if (sym != errtoken && id) + { + fprintf (out, "%s[[[%s]], %d]", + sep, id, number); + sep = ",\n"; + } } - - XFREE (froms); - XFREE (tos); - XFREE (pos); -} - -/* the following functions output yytable, yycheck - and the vectors whose elements index the portion starts */ - -static void -output_base (void) -{ - /* Output pact. */ - muscle_insert_short_table ("pact", base, - base[0], 1, nstates); - - /* Output pgoto. */ - muscle_insert_short_table ("pgoto", base, - base[nstates], nstates + 1, nvectors); - XFREE (base); + fputs ("])\n\n", out); } static void -output_table (void) +prepare_actions (void) { - muscle_insert_short_table ("table", table, - table[0], 1, high + 1); - XFREE (table); + /* Figure out the actions for the specified state, indexed by + lookahead token type. */ + + muscle_insert_rule_number_table ("defact", yydefact, + yydefact[0], 1, nstates); + + /* Figure out what to do after reducing with each rule, depending on + the saved state from before the beginning of parsing the data + that matched this rule. */ + muscle_insert_state_number_table ("defgoto", yydefgoto, + yydefgoto[0], 1, nsyms - ntokens); + + + /* Output PACT. */ + muscle_insert_base_table ("pact", base, + base[0], 1, nstates); + MUSCLE_INSERT_INT ("pact_ninf", base_ninf); + + /* Output PGOTO. */ + muscle_insert_base_table ("pgoto", base, + base[nstates], nstates + 1, nvectors); + + muscle_insert_base_table ("table", table, + table[0], 1, high + 1); + MUSCLE_INSERT_INT ("table_ninf", table_ninf); + + muscle_insert_base_table ("check", check, + check[0], 1, high + 1); + + /* GLR parsing slightly modifies YYTABLE and YYCHECK (and thus + YYPACT) so that in states with unresolved conflicts, the default + reduction is not used in the conflicted entries, so that there is + a place to put a conflict pointer. + + This means that YYCONFLP and YYCONFL are nonsense for a non-GLR + parser, so we could avoid accidents by not writing them out in + that case. Nevertheless, it seems even better to be able to use + the GLR skeletons even without the non-deterministic tables. */ + muscle_insert_unsigned_int_table ("conflict_list_heads", conflict_table, + conflict_table[0], 1, high + 1); + muscle_insert_unsigned_int_table ("conflicting_rules", conflict_list, + 0, 1, conflict_list_cnt); } -static void -output_check (void) -{ - muscle_insert_short_table ("check", check, - check[0], 1, high + 1); - XFREE (check); -} - -/*-----------------------------------------------------------------. -| Compute and output yydefact, yydefgoto, yypact, yypgoto, yytable | -| and yycheck. | -`-----------------------------------------------------------------*/ +/*--------------------------------------------. +| Output the definitions of all the muscles. | +`--------------------------------------------*/ static void -output_actions (void) +muscles_output (FILE *out) { - size_t i; - nvectors = nstates + nvars; - - froms = XCALLOC (short *, nvectors); - tos = XCALLOC (short *, nvectors); - tally = XCALLOC (short, nvectors); - width = XCALLOC (short, nvectors); - - token_actions (); - bitsetv_free (LA); - free (LArule); - - goto_actions (); - XFREE (goto_map + ntokens); - XFREE (from_state); - XFREE (to_state); - - sort_actions (); - pack_table (); - - output_base (); - output_table (); - - output_check (); - - for (i = 0; i < nstates; ++i) - { - free (states[i]->shifts); - XFREE (states[i]->reductions); - free (states[i]->errs); - free (states[i]); - } - XFREE (states); + fputs ("m4_init()\n", out); + merger_output (out); + symbol_numbers_output (out); + token_definitions_output (out); + type_names_output (out); + user_actions_output (out); + // Must be last. + muscles_m4_output (out); } - /*---------------------------. | Call the skeleton parser. | @@ -953,122 +575,144 @@ output_actions (void) static void output_skeleton (void) { - /* Store the definition of all the muscles. */ - const char *tempdir = getenv ("TMPDIR"); - char *tempfile = NULL; - FILE *out = NULL; - int fd; - - if (tempdir == NULL) - tempdir = DEFAULT_TMPDIR; - tempfile = xmalloc (strlen (tempdir) + 11); - sprintf (tempfile, "%s/bsnXXXXXX", tempdir); - fd = mkstemp (tempfile); - if (fd == -1) - error (EXIT_FAILURE, errno, "%s", tempfile); - - out = fdopen (fd, "w"); - if (out == NULL) - error (EXIT_FAILURE, errno, "%s", tempfile); - - /* There are no comments, especially not `#': we do want M4 expansion - after `#': think of CPP macros! */ - fputs ("m4_changecom()\n", out); - fputs ("m4_init()\n", out); - - fputs ("m4_define([b4_actions], \n[[", out); - actions_output (out); - fputs ("]])\n\n", out); - - fputs ("m4_define([b4_tokens], \n[", out); - token_definitions_output (out); - fputs ("])\n\n", out); + int filter_fd[2]; + pid_t pid; + + /* Compute the names of the package data dir and skeleton files. */ + char const *m4 = (m4 = getenv ("M4")) ? m4 : M4; + char const *datadir = pkgdatadir (); + char *m4sugar = xconcatenated_filename (datadir, "m4sugar/m4sugar.m4", NULL); + char *m4bison = xconcatenated_filename (datadir, "bison.m4", NULL); + char *skel = (IS_PATH_WITH_DIR (skeleton) + ? xstrdup (skeleton) + : xconcatenated_filename (datadir, skeleton, NULL)); + + /* Test whether m4sugar.m4 is readable, to check for proper + installation. A faulty installation can cause deadlock, so a + cheap sanity check is worthwhile. */ + xfclose (xfopen (m4sugar, "r")); + + /* Create an m4 subprocess connected to us via two pipes. */ + + if (trace_flag & trace_tools) + fprintf (stderr, "running: %s %s - %s %s\n", + m4, m4sugar, m4bison, skel); + + /* Some future version of GNU M4 (most likely 1.6) may treat the -dV in a + position-dependent manner. Keep it as the first argument so that all + files are traced. + + See the thread starting at + + for details. */ + { + char const *argv[10]; + int i = 0; + argv[i++] = m4; + + /* When POSIXLY_CORRECT is set, GNU M4 1.6 and later disable GNU + extensions, which Bison's skeletons depend on. With older M4, + it has no effect. M4 1.4.12 added a -g/--gnu command-line + option to make it explicit that a program wants GNU M4 + extensions even when POSIXLY_CORRECT is set. + + See the thread starting at + + for details. */ + if (*M4_GNU_OPTION) + argv[i++] = M4_GNU_OPTION; + + argv[i++] = "-I"; + argv[i++] = datadir; + if (trace_flag & trace_m4) + argv[i++] = "-dV"; + argv[i++] = m4sugar; + argv[i++] = "-"; + argv[i++] = m4bison; + argv[i++] = skel; + argv[i++] = NULL; + aver (i <= ARRAY_CARDINALITY (argv)); + + /* The ugly cast is because gnulib gets the const-ness wrong. */ + pid = create_pipe_bidi ("m4", m4, (char **)(void*)argv, false, true, + true, filter_fd); + } - muscles_m4_output (out); + free (m4sugar); + free (m4bison); + free (skel); - fputs ("m4_wrap([m4_divert_pop(0)])\n", out); - fputs ("m4_divert_push(0)dnl\n", out); - xfclose (out); + if (trace_flag & trace_muscles) + muscles_output (stderr); + { + FILE *out = xfdopen (filter_fd[1], "w"); + muscles_output (out); + xfclose (out); + } - /* Invoke m4 on the definition of the muscles, and the skeleton. */ + /* Read and process m4's output. */ + timevar_push (TV_M4); { - const char *bison_pkgdatadir = getenv ("BISON_PKGDATADIR"); - const char *m4 = getenv ("M4"); - int pkg_data_len; - char *full_skeleton; - - if (!m4) - m4 = M4; - if (!bison_pkgdatadir) - bison_pkgdatadir = PKGDATADIR; - pkg_data_len = strlen (bison_pkgdatadir); - full_skeleton = XMALLOC (char, pkg_data_len + strlen (skeleton) + 1); - if (bison_pkgdatadir[pkg_data_len-1] == '/') - sprintf (full_skeleton, "%s%s", bison_pkgdatadir, skeleton); - else - sprintf (full_skeleton, "%s/%s", bison_pkgdatadir, skeleton); - if (trace_flag) - fprintf (stderr, - "running: %s -I %s m4sugar/m4sugar.m4 %s %s\n", - m4, bison_pkgdatadir, tempfile, full_skeleton); - skel_in = readpipe (m4, - "-I", bison_pkgdatadir, - "m4sugar/m4sugar.m4", - tempfile, - full_skeleton, - NULL); - XFREE (full_skeleton); - if (!skel_in) - error (EXIT_FAILURE, errno, "cannot run m4"); - skel_lex (); - - /* If `debugging', keep this file alive. */ - if (!trace_flag) - unlink (tempfile); + FILE *in = xfdopen (filter_fd[0], "r"); + scan_skel (in); + /* scan_skel should have read all of M4's output. Otherwise, when we + close the pipe, we risk letting M4 report a broken-pipe to the + Bison user. */ + aver (feof (in)); + xfclose (in); } + wait_subprocess (pid, "m4", false, false, true, true, NULL); + timevar_pop (TV_M4); } static void prepare (void) { - MUSCLE_INSERT_INT ("last", high); - MUSCLE_INSERT_INT ("flag", SHRT_MIN); - MUSCLE_INSERT_INT ("pure", pure_parser); - MUSCLE_INSERT_INT ("nsym", nsyms); - MUSCLE_INSERT_INT ("debug", debug_flag); - MUSCLE_INSERT_INT ("final", final_state); - MUSCLE_INSERT_INT ("undef_token_number", undeftoken->number); - MUSCLE_INSERT_INT ("user_token_number_max", max_user_token_number); - MUSCLE_INSERT_INT ("error_verbose", error_verbose); - MUSCLE_INSERT_STRING ("prefix", spec_name_prefix ? spec_name_prefix : "yy"); - - /* FIXME: This is wrong: the muscles should decide whether they hold - a copy or not, but the situation is too obscure currently. */ - MUSCLE_INSERT_STRING ("output_infix", output_infix ? output_infix : ""); - MUSCLE_INSERT_STRING ("output_prefix", short_base_name); - MUSCLE_INSERT_STRING ("output_parser_name", parser_file_name); - MUSCLE_INSERT_STRING ("output_header_name", spec_defines_file); - - MUSCLE_INSERT_INT ("nnts", nvars); - MUSCLE_INSERT_INT ("nrules", nrules); - MUSCLE_INSERT_INT ("nstates", nstates); - MUSCLE_INSERT_INT ("ntokens", ntokens); - - MUSCLE_INSERT_INT ("locations_flag", locations_flag); - MUSCLE_INSERT_INT ("defines_flag", defines_flag); - - /* Copy definitions in directive. */ - obstack_1grow (&pre_prologue_obstack, 0); - obstack_1grow (&post_prologue_obstack, 0); - muscle_insert ("pre_prologue", obstack_finish (&pre_prologue_obstack)); - muscle_insert ("post_prologue", obstack_finish (&post_prologue_obstack)); - - /* Find the right skeleton file. */ - if (!skeleton) - skeleton = "bison.simple"; - - /* Parse the skeleton file and output the needed parsers. */ - muscle_insert ("skeleton", skeleton); + /* BISON_USE_PUSH_FOR_PULL is for the test suite and should not be + documented for the user. */ + char const *cp = getenv ("BISON_USE_PUSH_FOR_PULL"); + bool use_push_for_pull_flag = cp && *cp && strtol (cp, 0, 10); + + /* Flags. */ + MUSCLE_INSERT_BOOL ("defines_flag", defines_flag); + MUSCLE_INSERT_BOOL ("glr_flag", glr_parser); + MUSCLE_INSERT_BOOL ("nondeterministic_flag", nondeterministic_parser); + MUSCLE_INSERT_BOOL ("synclines_flag", !no_lines_flag); + MUSCLE_INSERT_BOOL ("tag_seen_flag", tag_seen); + MUSCLE_INSERT_BOOL ("token_table_flag", token_table_flag); + MUSCLE_INSERT_BOOL ("use_push_for_pull_flag", use_push_for_pull_flag); + MUSCLE_INSERT_BOOL ("yacc_flag", yacc_flag); + + /* File names. */ + if (spec_name_prefix) + MUSCLE_INSERT_STRING ("prefix", spec_name_prefix); + + MUSCLE_INSERT_STRING ("file_name_all_but_ext", all_but_ext); + +#define DEFINE(Name) MUSCLE_INSERT_STRING (#Name, Name ? Name : "") + DEFINE (dir_prefix); + DEFINE (parser_file_name); + DEFINE (spec_defines_file); + DEFINE (spec_file_prefix); + DEFINE (spec_graph_file); + DEFINE (spec_name_prefix); + DEFINE (spec_outfile); + DEFINE (spec_verbose_file); +#undef DEFINE + + /* Find the right skeleton file, and add muscles about the skeletons. */ + if (skeleton) + MUSCLE_INSERT_C_STRING ("skeleton", skeleton); + else + skeleton = language->skeleton; + + /* About the skeletons. */ + { + /* b4_pkgdatadir is used inside m4_include in the skeletons, so digraphs + would never be expanded. Hopefully no one has M4-special characters in + his Bison installation path. */ + MUSCLE_INSERT_STRING_RAW ("pkgdatadir", pkgdatadir ()); + } } @@ -1081,19 +725,23 @@ output (void) { obstack_init (&format_obstack); - prepare_tokens (); + prepare_symbols (); prepare_rules (); prepare_states (); - output_actions (); + prepare_actions (); + prepare_symbol_definitions (); prepare (); /* Process the selected skeleton file. */ output_skeleton (); - obstack_free (&muscle_obstack, NULL); obstack_free (&format_obstack, NULL); - obstack_free (&action_obstack, NULL); - obstack_free (&pre_prologue_obstack, NULL); - obstack_free (&post_prologue_obstack, NULL); +} + +char const * +pkgdatadir (void) +{ + char const *cp = getenv ("BISON_PKGDATADIR"); + return cp ? cp : PKGDATADIR; }