1 /* Output the generated parsing program for Bison.
3 Copyright (C) 1984, 1986, 1989, 1992, 2000-2012 Free Software
6 This file is part of Bison, the GNU Compiler Compiler.
8 This program is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
24 #include <concat-filename.h>
25 #include <configmake.h>
27 #include <get-errno.h>
29 #include <spawn-pipe.h>
31 #include <wait-process.h>
37 #include "muscle-tab.h"
40 #include "scan-code.h" /* max_left_semantic_context */
41 #include "scan-skel.h"
45 static struct obstack format_obstack
;
48 /*-------------------------------------------------------------------.
49 | Create a function NAME which associates to the muscle NAME the |
50 | result of formatting the FIRST and then TABLE_DATA[BEGIN..END[ (of |
51 | TYPE), and to the muscle NAME_max, the max value of the |
53 `-------------------------------------------------------------------*/
56 #define GENERATE_MUSCLE_INSERT_TABLE(Name, Type) \
59 Name (char const *name, \
72 obstack_fgrow1 (&format_obstack, "%6d", first); \
73 for (i = begin; i < end; ++i) \
75 obstack_1grow (&format_obstack, ','); \
78 obstack_sgrow (&format_obstack, "\n "); \
83 obstack_fgrow1 (&format_obstack, "%6d", table_data[i]); \
84 if (table_data[i] < min) \
85 min = table_data[i]; \
86 if (max < table_data[i]) \
87 max = table_data[i]; \
89 obstack_1grow (&format_obstack, 0); \
90 muscle_insert (name, obstack_finish (&format_obstack)); \
94 /* Build `NAME_min' and `NAME_max' in the obstack. */ \
95 obstack_fgrow1 (&format_obstack, "%s_min", name); \
96 obstack_1grow (&format_obstack, 0); \
97 MUSCLE_INSERT_LONG_INT (obstack_finish (&format_obstack), lmin); \
98 obstack_fgrow1 (&format_obstack, "%s_max", name); \
99 obstack_1grow (&format_obstack, 0); \
100 MUSCLE_INSERT_LONG_INT (obstack_finish (&format_obstack), lmax); \
103 GENERATE_MUSCLE_INSERT_TABLE(muscle_insert_unsigned_int_table
, unsigned int)
104 GENERATE_MUSCLE_INSERT_TABLE(muscle_insert_int_table
, int)
105 GENERATE_MUSCLE_INSERT_TABLE(muscle_insert_base_table
, base_number
)
106 GENERATE_MUSCLE_INSERT_TABLE(muscle_insert_rule_number_table
, rule_number
)
107 GENERATE_MUSCLE_INSERT_TABLE(muscle_insert_symbol_number_table
, symbol_number
)
108 GENERATE_MUSCLE_INSERT_TABLE(muscle_insert_state_number_table
, state_number
)
111 /*----------------------------------------------------------------.
112 | Print to OUT a representation of CP quoted and escaped for M4. |
113 `----------------------------------------------------------------*/
116 quoted_output (FILE *out
, char const *cp
)
123 case '$': fputs ("$][", out
); break;
124 case '@': fputs ("@@", out
); break;
125 case '[': fputs ("@{", out
); break;
126 case ']': fputs ("@}", out
); break;
127 default: fputc (*cp
, out
); break;
133 /*----------------------------------------------------------------.
134 | Print to OUT a representation of STRING quoted and escaped both |
136 `----------------------------------------------------------------*/
139 string_output (FILE *out
, char const *string
)
141 quoted_output (out
, quotearg_style (c_quoting_style
, string
));
145 /*------------------------------------------------------------------.
146 | Prepare the muscles related to the symbols: translate, tname, and |
148 `------------------------------------------------------------------*/
151 prepare_symbols (void)
153 MUSCLE_INSERT_INT ("tokens_number", ntokens
);
154 MUSCLE_INSERT_INT ("nterms_number", nvars
);
155 MUSCLE_INSERT_INT ("symbols_number", nsyms
);
156 MUSCLE_INSERT_INT ("undef_token_number", undeftoken
->number
);
157 MUSCLE_INSERT_INT ("user_token_number_max", max_user_token_number
);
159 muscle_insert_symbol_number_table ("translate",
161 token_translations
[0],
162 1, max_user_token_number
+ 1);
164 /* tname -- token names. */
167 /* We assume that the table will be output starting at column 2. */
169 struct quoting_options
*qo
= clone_quoting_options (0);
170 set_quoting_style (qo
, c_quoting_style
);
171 set_quoting_flags (qo
, QA_SPLIT_TRIGRAPHS
);
172 for (i
= 0; i
< nsyms
; i
++)
174 char *cp
= quotearg_alloc (symbols
[i
]->tag
, -1, qo
);
175 /* Width of the next token, including the two quotes, the
176 comma and the space. */
177 int width
= strlen (cp
) + 2;
181 obstack_sgrow (&format_obstack
, "\n ");
186 obstack_1grow (&format_obstack
, ' ');
187 obstack_escape (&format_obstack
, cp
);
189 obstack_1grow (&format_obstack
, ',');
193 obstack_sgrow (&format_obstack
, " ]b4_null[");
195 /* Finish table and store. */
196 obstack_1grow (&format_obstack
, 0);
197 muscle_insert ("tname", obstack_finish (&format_obstack
));
200 /* Output YYTOKNUM. */
203 int *values
= xnmalloc (ntokens
, sizeof *values
);
204 for (i
= 0; i
< ntokens
; ++i
)
205 values
[i
] = symbols
[i
]->user_token_number
;
206 muscle_insert_int_table ("toknum", values
,
207 values
[0], 1, ntokens
);
213 /*----------------------------------------------------------------.
214 | Prepare the muscles related to the rules: r1, r2, rline, dprec, |
215 | merger, immediate. |
216 `----------------------------------------------------------------*/
221 unsigned int *rline
= xnmalloc (nrules
, sizeof *rline
);
222 symbol_number
*r1
= xnmalloc (nrules
, sizeof *r1
);
223 unsigned int *r2
= xnmalloc (nrules
, sizeof *r2
);
224 int *dprec
= xnmalloc (nrules
, sizeof *dprec
);
225 int *merger
= xnmalloc (nrules
, sizeof *merger
);
226 int *immediate
= xnmalloc (nrules
, sizeof *immediate
);
229 for (r
= 0; r
< nrules
; ++r
)
231 /* LHS of the rule R. */
232 r1
[r
] = rules
[r
].lhs
->number
;
233 /* Length of rule R's RHS. */
234 r2
[r
] = rule_rhs_length(&rules
[r
]);
235 /* Line where rule was defined. */
236 rline
[r
] = rules
[r
].location
.start
.line
;
237 /* Dynamic precedence (GLR). */
238 dprec
[r
] = rules
[r
].dprec
;
239 /* Merger-function index (GLR). */
240 merger
[r
] = rules
[r
].merger
;
241 /* Immediate reduction flags (GLR). */
242 immediate
[r
] = rules
[r
].is_predicate
;
245 muscle_insert_unsigned_int_table ("rline", rline
, 0, 0, nrules
);
246 muscle_insert_symbol_number_table ("r1", r1
, 0, 0, nrules
);
247 muscle_insert_unsigned_int_table ("r2", r2
, 0, 0, nrules
);
248 muscle_insert_int_table ("dprec", dprec
, 0, 0, nrules
);
249 muscle_insert_int_table ("merger", merger
, 0, 0, nrules
);
250 muscle_insert_int_table ("immediate", immediate
, 0, 0, nrules
);
252 MUSCLE_INSERT_INT ("rules_number", nrules
);
253 MUSCLE_INSERT_INT ("max_left_semantic_context", max_left_semantic_context
);
263 /*--------------------------------------------.
264 | Prepare the muscles related to the states. |
265 `--------------------------------------------*/
268 prepare_states (void)
271 symbol_number
*values
= xnmalloc (nstates
, sizeof *values
);
272 for (i
= 0; i
< nstates
; ++i
)
273 values
[i
] = states
[i
]->accessing_symbol
;
274 muscle_insert_symbol_number_table ("stos", values
,
278 MUSCLE_INSERT_INT ("last", high
);
279 MUSCLE_INSERT_INT ("final_state_number", final_state
->number
);
280 MUSCLE_INSERT_INT ("states_number", nstates
);
284 /*-------------------------------------------------------.
285 | Compare two symbols by type-name, and then by number. |
286 `-------------------------------------------------------*/
289 symbol_type_name_cmp (const symbol
**lhs
, const symbol
**rhs
)
291 int res
= UNIQSTR_CMP((*lhs
)->type_name
, (*rhs
)->type_name
);
294 return (*lhs
)->number
- (*rhs
)->number
;
298 /*----------------------------------------------------------------.
299 | Return a (malloc'ed) table of the symbols sorted by type-name. |
300 `----------------------------------------------------------------*/
303 symbols_by_type_name (void)
305 typedef int (*qcmp_type
) (const void *, const void *);
306 symbol
**res
= xmemdup (symbols
, nsyms
* sizeof *res
);
307 qsort (res
, nsyms
, sizeof *res
, (qcmp_type
) &symbol_type_name_cmp
);
312 /*------------------------------------------------------------------.
313 | Define b4_type_names, which is a list of (lists of the numbers of |
314 | symbols with same type-name). |
315 `------------------------------------------------------------------*/
318 type_names_output (FILE *out
)
321 symbol
**syms
= symbols_by_type_name ();
322 fputs ("m4_define([b4_type_names],\n[", out
);
323 for (i
= 0; i
< nsyms
; /* nothing */)
325 // The index of the first symbol of the current type-name.
327 fputs (i
? ",\n[" : "[", out
);
328 for (; i
< nsyms
&& syms
[i
]->type_name
== syms
[i0
]->type_name
; ++i
)
329 fprintf (out
, "%s%d", i
!= i0
? ", " : "", syms
[i
]->number
);
332 fputs ("])\n\n", out
);
337 /*-------------------------------------.
338 | The list of all the symbol numbers. |
339 `-------------------------------------*/
342 symbol_numbers_output (FILE *out
)
345 fputs ("m4_define([b4_symbol_numbers],\n[", out
);
346 for (i
= 0; i
< nsyms
; ++i
)
347 fprintf (out
, "%s[%d]", i
? ", " : "", i
);
348 fputs ("])\n\n", out
);
352 /*---------------------------------.
353 | Output the user actions to OUT. |
354 `---------------------------------*/
357 user_actions_output (FILE *out
)
361 fputs ("m4_define([b4_actions], \n[", out
);
362 for (r
= 0; r
< nrules
; ++r
)
365 fprintf (out
, "b4_%scase(%d, [b4_syncline(%d, ",
366 rules
[r
].is_predicate
? "predicate_" : "",
367 r
+ 1, rules
[r
].action_location
.start
.line
);
368 string_output (out
, rules
[r
].action_location
.start
.file
);
369 fprintf (out
, ")\n[ %s]])\n\n", rules
[r
].action
);
371 fputs ("])\n\n", out
);
374 /*------------------------------------.
375 | Output the merge functions to OUT. |
376 `------------------------------------*/
379 merger_output (FILE *out
)
384 fputs ("m4_define([b4_mergers], \n[[", out
);
385 for (n
= 1, p
= merge_functions
; p
!= NULL
; n
+= 1, p
= p
->next
)
387 if (p
->type
[0] == '\0')
388 fprintf (out
, " case %d: *yy0 = %s (*yy0, *yy1); break;\n",
391 fprintf (out
, " case %d: yy0->%s = %s (*yy0, *yy1); break;\n",
392 n
, p
->type
, p
->name
);
394 fputs ("]])\n\n", out
);
398 /*---------------------------------------------.
399 | Prepare the muscles for symbol definitions. |
400 `---------------------------------------------*/
403 prepare_symbol_definitions (void)
406 for (i
= 0; i
< nsyms
; ++i
)
408 symbol
*sym
= symbols
[i
];
412 #define SET_KEY(Entry) \
413 obstack_fgrow2 (&format_obstack, "symbol(%d, %s)", \
415 obstack_1grow (&format_obstack, 0); \
416 key = obstack_finish (&format_obstack);
418 #define SET_KEY2(Entry, Suffix) \
419 obstack_fgrow3 (&format_obstack, "symbol(%d, %s_%s)", \
421 obstack_1grow (&format_obstack, 0); \
422 key = obstack_finish (&format_obstack);
424 // Whether the symbol has an identifier.
425 value
= symbol_id_get (sym
);
427 MUSCLE_INSERT_INT (key
, !!value
);
431 MUSCLE_INSERT_STRING (key
, value
? value
: "");
433 // Its tag. Typically for documentation purpose.
435 MUSCLE_INSERT_STRING (key
, sym
->tag
);
437 SET_KEY("user_number");
438 MUSCLE_INSERT_INT (key
, sym
->user_token_number
);
441 MUSCLE_INSERT_INT (key
,
442 i
< ntokens
&& sym
!= errtoken
&& sym
!= undeftoken
);
445 MUSCLE_INSERT_INT (key
, sym
->number
);
448 MUSCLE_INSERT_INT (key
, !!sym
->type_name
);
451 MUSCLE_INSERT_STRING (key
, sym
->type_name
? sym
->type_name
: "");
455 for (j
= 0; j
< CODE_PROPS_SIZE
; ++j
)
457 /* "printer", not "%printer". */
458 char const *pname
= code_props_type_string (j
) + 1;
459 code_props
const *p
= symbol_code_props_get (sym
, j
);
460 SET_KEY2("has", pname
);
461 MUSCLE_INSERT_INT (key
, !!p
->code
);
465 SET_KEY2(pname
, "file");
466 MUSCLE_INSERT_STRING (key
, p
->location
.start
.file
);
468 SET_KEY2(pname
, "line");
469 MUSCLE_INSERT_INT (key
, p
->location
.start
.line
);
472 MUSCLE_INSERT_STRING_RAW (key
, p
->code
);
483 prepare_actions (void)
485 /* Figure out the actions for the specified state, indexed by
486 lookahead token type. */
488 muscle_insert_rule_number_table ("defact", yydefact
,
489 yydefact
[0], 1, nstates
);
491 /* Figure out what to do after reducing with each rule, depending on
492 the saved state from before the beginning of parsing the data
493 that matched this rule. */
494 muscle_insert_state_number_table ("defgoto", yydefgoto
,
495 yydefgoto
[0], 1, nsyms
- ntokens
);
499 muscle_insert_base_table ("pact", base
,
500 base
[0], 1, nstates
);
501 MUSCLE_INSERT_INT ("pact_ninf", base_ninf
);
504 muscle_insert_base_table ("pgoto", base
,
505 base
[nstates
], nstates
+ 1, nvectors
);
507 muscle_insert_base_table ("table", table
,
508 table
[0], 1, high
+ 1);
509 MUSCLE_INSERT_INT ("table_ninf", table_ninf
);
511 muscle_insert_base_table ("check", check
,
512 check
[0], 1, high
+ 1);
514 /* GLR parsing slightly modifies YYTABLE and YYCHECK (and thus
515 YYPACT) so that in states with unresolved conflicts, the default
516 reduction is not used in the conflicted entries, so that there is
517 a place to put a conflict pointer.
519 This means that YYCONFLP and YYCONFL are nonsense for a non-GLR
520 parser, so we could avoid accidents by not writing them out in
521 that case. Nevertheless, it seems even better to be able to use
522 the GLR skeletons even without the non-deterministic tables. */
523 muscle_insert_unsigned_int_table ("conflict_list_heads", conflict_table
,
524 conflict_table
[0], 1, high
+ 1);
525 muscle_insert_unsigned_int_table ("conflicting_rules", conflict_list
,
526 0, 1, conflict_list_cnt
);
530 /*--------------------------------------------.
531 | Output the definitions of all the muscles. |
532 `--------------------------------------------*/
535 muscles_output (FILE *out
)
537 fputs ("m4_init()\n", out
);
539 symbol_numbers_output (out
);
540 type_names_output (out
);
541 user_actions_output (out
);
543 muscles_m4_output (out
);
546 /*---------------------------.
547 | Call the skeleton parser. |
548 `---------------------------*/
551 output_skeleton (void)
556 /* Compute the names of the package data dir and skeleton files. */
557 char const *m4
= (m4
= getenv ("M4")) ? m4
: M4
;
558 char const *datadir
= pkgdatadir ();
559 char *m4sugar
= xconcatenated_filename (datadir
, "m4sugar/m4sugar.m4", NULL
);
560 char *m4bison
= xconcatenated_filename (datadir
, "bison.m4", NULL
);
561 char *skel
= (IS_PATH_WITH_DIR (skeleton
)
563 : xconcatenated_filename (datadir
, skeleton
, NULL
));
565 /* Test whether m4sugar.m4 is readable, to check for proper
566 installation. A faulty installation can cause deadlock, so a
567 cheap sanity check is worthwhile. */
568 xfclose (xfopen (m4sugar
, "r"));
570 /* Create an m4 subprocess connected to us via two pipes. */
572 if (trace_flag
& trace_tools
)
573 fprintf (stderr
, "running: %s %s - %s %s\n",
574 m4
, m4sugar
, m4bison
, skel
);
576 /* Some future version of GNU M4 (most likely 1.6) may treat the -dV in a
577 position-dependent manner. Keep it as the first argument so that all
580 See the thread starting at
581 <http://lists.gnu.org/archive/html/bug-bison/2008-07/msg00000.html>
584 char const *argv
[10];
588 /* When POSIXLY_CORRECT is set, GNU M4 1.6 and later disable GNU
589 extensions, which Bison's skeletons depend on. With older M4,
590 it has no effect. M4 1.4.12 added a -g/--gnu command-line
591 option to make it explicit that a program wants GNU M4
592 extensions even when POSIXLY_CORRECT is set.
594 See the thread starting at
595 <http://lists.gnu.org/archive/html/bug-bison/2008-07/msg00000.html>
598 argv
[i
++] = M4_GNU_OPTION
;
602 if (trace_flag
& trace_m4
)
609 aver (i
<= ARRAY_CARDINALITY (argv
));
611 /* The ugly cast is because gnulib gets the const-ness wrong. */
612 pid
= create_pipe_bidi ("m4", m4
, (char **)(void*)argv
, false, true,
620 if (trace_flag
& trace_muscles
)
621 muscles_output (stderr
);
623 FILE *out
= xfdopen (filter_fd
[1], "w");
624 muscles_output (out
);
628 /* Read and process m4's output. */
629 timevar_push (TV_M4
);
631 FILE *in
= xfdopen (filter_fd
[0], "r");
633 /* scan_skel should have read all of M4's output. Otherwise, when we
634 close the pipe, we risk letting M4 report a broken-pipe to the
639 wait_subprocess (pid
, "m4", false, false, true, true, NULL
);
646 /* BISON_USE_PUSH_FOR_PULL is for the test suite and should not be
647 documented for the user. */
648 char const *cp
= getenv ("BISON_USE_PUSH_FOR_PULL");
649 bool use_push_for_pull_flag
= cp
&& *cp
&& strtol (cp
, 0, 10);
652 MUSCLE_INSERT_BOOL ("defines_flag", defines_flag
);
653 MUSCLE_INSERT_BOOL ("glr_flag", glr_parser
);
654 MUSCLE_INSERT_BOOL ("nondeterministic_flag", nondeterministic_parser
);
655 MUSCLE_INSERT_BOOL ("synclines_flag", !no_lines_flag
);
656 MUSCLE_INSERT_BOOL ("tag_seen_flag", tag_seen
);
657 MUSCLE_INSERT_BOOL ("token_table_flag", token_table_flag
);
658 MUSCLE_INSERT_BOOL ("use_push_for_pull_flag", use_push_for_pull_flag
);
659 MUSCLE_INSERT_BOOL ("yacc_flag", yacc_flag
);
662 if (spec_name_prefix
)
663 MUSCLE_INSERT_STRING ("prefix", spec_name_prefix
);
665 MUSCLE_INSERT_STRING ("file_name_all_but_ext", all_but_ext
);
667 #define DEFINE(Name) MUSCLE_INSERT_STRING (#Name, Name ? Name : "")
669 DEFINE (parser_file_name
);
670 DEFINE (spec_defines_file
);
671 DEFINE (spec_file_prefix
);
672 DEFINE (spec_graph_file
);
673 DEFINE (spec_name_prefix
);
674 DEFINE (spec_outfile
);
675 DEFINE (spec_verbose_file
);
678 /* Find the right skeleton file, and add muscles about the skeletons. */
680 MUSCLE_INSERT_C_STRING ("skeleton", skeleton
);
682 skeleton
= language
->skeleton
;
684 /* About the skeletons. */
686 /* b4_pkgdatadir is used inside m4_include in the skeletons, so digraphs
687 would never be expanded. Hopefully no one has M4-special characters in
688 his Bison installation path. */
689 MUSCLE_INSERT_STRING_RAW ("pkgdatadir", pkgdatadir ());
694 /*----------------------------------------------------------.
695 | Output the parsing tables and the parser code to ftable. |
696 `----------------------------------------------------------*/
701 obstack_init (&format_obstack
);
707 prepare_symbol_definitions ();
711 /* Process the selected skeleton file. */
714 obstack_free (&format_obstack
, NULL
);
720 char const *cp
= getenv ("BISON_PKGDATADIR");
721 return cp
? cp
: PKGDATADIR
;