1 /* Output the generated parsing program for Bison.
3 Copyright (C) 1984, 1986, 1989, 1992, 2000-2012 Free Software
6 This file is part of Bison, the GNU Compiler Compiler.
8 This program is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
24 #include <configmake.h>
26 #include <get-errno.h>
28 #include <spawn-pipe.h>
30 #include <wait-process.h>
36 #include "muscle-tab.h"
39 #include "scan-code.h" /* max_left_semantic_context */
40 #include "scan-skel.h"
44 # define ARRAY_CARDINALITY(Array) (sizeof (Array) / sizeof *(Array))
46 static struct obstack format_obstack
;
49 /*-------------------------------------------------------------------.
50 | Create a function NAME which associates to the muscle NAME the |
51 | result of formatting the FIRST and then TABLE_DATA[BEGIN..END[ (of |
52 | TYPE), and to the muscle NAME_max, the max value of the |
54 `-------------------------------------------------------------------*/
57 #define GENERATE_MUSCLE_INSERT_TABLE(Name, Type) \
60 Name (char const *name, \
73 obstack_fgrow1 (&format_obstack, "%6d", first); \
74 for (i = begin; i < end; ++i) \
76 obstack_1grow (&format_obstack, ','); \
79 obstack_sgrow (&format_obstack, "\n "); \
84 obstack_fgrow1 (&format_obstack, "%6d", table_data[i]); \
85 if (table_data[i] < min) \
86 min = table_data[i]; \
87 if (max < table_data[i]) \
88 max = table_data[i]; \
90 obstack_1grow (&format_obstack, 0); \
91 muscle_insert (name, obstack_finish (&format_obstack)); \
95 /* Build `NAME_min' and `NAME_max' in the obstack. */ \
96 obstack_fgrow1 (&format_obstack, "%s_min", name); \
97 obstack_1grow (&format_obstack, 0); \
98 MUSCLE_INSERT_LONG_INT (obstack_finish (&format_obstack), lmin); \
99 obstack_fgrow1 (&format_obstack, "%s_max", name); \
100 obstack_1grow (&format_obstack, 0); \
101 MUSCLE_INSERT_LONG_INT (obstack_finish (&format_obstack), lmax); \
104 GENERATE_MUSCLE_INSERT_TABLE(muscle_insert_unsigned_int_table
, unsigned int)
105 GENERATE_MUSCLE_INSERT_TABLE(muscle_insert_int_table
, int)
106 GENERATE_MUSCLE_INSERT_TABLE(muscle_insert_base_table
, base_number
)
107 GENERATE_MUSCLE_INSERT_TABLE(muscle_insert_rule_number_table
, rule_number
)
108 GENERATE_MUSCLE_INSERT_TABLE(muscle_insert_symbol_number_table
, symbol_number
)
109 GENERATE_MUSCLE_INSERT_TABLE(muscle_insert_state_number_table
, state_number
)
112 /*--------------------------------------------------------------------.
113 | Print to OUT a representation of STRING escaped both for C and M4. |
114 `--------------------------------------------------------------------*/
117 escaped_output (FILE *out
, char const *string
)
122 for (p
= quotearg_style (c_quoting_style
, string
); *p
; p
++)
125 case '$': fputs ("$][", out
); break;
126 case '@': fputs ("@@", out
); break;
127 case '[': fputs ("@{", out
); break;
128 case ']': fputs ("@}", out
); break;
129 default: fputc (*p
, out
); break;
136 /*------------------------------------------------------------------.
137 | Prepare the muscles related to the symbols: translate, tname, and |
139 `------------------------------------------------------------------*/
142 prepare_symbols (void)
144 MUSCLE_INSERT_BOOL ("token_table", token_table_flag
);
145 MUSCLE_INSERT_INT ("tokens_number", ntokens
);
146 MUSCLE_INSERT_INT ("nterms_number", nvars
);
147 MUSCLE_INSERT_INT ("symbols_number", nsyms
);
148 MUSCLE_INSERT_INT ("undef_token_number", undeftoken
->number
);
149 MUSCLE_INSERT_INT ("user_token_number_max", max_user_token_number
);
151 muscle_insert_symbol_number_table ("translate",
153 token_translations
[0],
154 1, max_user_token_number
+ 1);
156 /* tname -- token names. */
159 /* We assume that the table will be output starting at column 2. */
161 struct quoting_options
*qo
= clone_quoting_options (0);
162 set_quoting_style (qo
, c_quoting_style
);
163 set_quoting_flags (qo
, QA_SPLIT_TRIGRAPHS
);
164 for (i
= 0; i
< nsyms
; i
++)
166 char *cp
= quotearg_alloc (symbols
[i
]->tag
, -1, qo
);
167 /* Width of the next token, including the two quotes, the
168 comma and the space. */
169 int width
= strlen (cp
) + 2;
173 obstack_sgrow (&format_obstack
, "\n ");
178 obstack_1grow (&format_obstack
, ' ');
179 MUSCLE_OBSTACK_SGROW (&format_obstack
, cp
);
181 obstack_1grow (&format_obstack
, ',');
185 obstack_sgrow (&format_obstack
, " ]b4_null[");
187 /* Finish table and store. */
188 obstack_1grow (&format_obstack
, 0);
189 muscle_insert ("tname", obstack_finish (&format_obstack
));
192 /* Output YYTOKNUM. */
195 int *values
= xnmalloc (ntokens
, sizeof *values
);
196 for (i
= 0; i
< ntokens
; ++i
)
197 values
[i
] = symbols
[i
]->user_token_number
;
198 muscle_insert_int_table ("toknum", values
,
199 values
[0], 1, ntokens
);
205 /*----------------------------------------------------------------.
206 | Prepare the muscles related to the rules: r1, r2, rline, dprec, |
207 | merger, immediate. |
208 `----------------------------------------------------------------*/
213 unsigned int *rline
= xnmalloc (nrules
, sizeof *rline
);
214 symbol_number
*r1
= xnmalloc (nrules
, sizeof *r1
);
215 unsigned int *r2
= xnmalloc (nrules
, sizeof *r2
);
216 int *dprec
= xnmalloc (nrules
, sizeof *dprec
);
217 int *merger
= xnmalloc (nrules
, sizeof *merger
);
218 int *immediate
= xnmalloc (nrules
, sizeof *immediate
);
221 for (r
= 0; r
< nrules
; ++r
)
223 /* LHS of the rule R. */
224 r1
[r
] = rules
[r
].lhs
->number
;
225 /* Length of rule R's RHS. */
226 r2
[r
] = rule_rhs_length(&rules
[r
]);
227 /* Line where rule was defined. */
228 rline
[r
] = rules
[r
].location
.start
.line
;
229 /* Dynamic precedence (GLR). */
230 dprec
[r
] = rules
[r
].dprec
;
231 /* Merger-function index (GLR). */
232 merger
[r
] = rules
[r
].merger
;
233 /* Immediate reduction flags (GLR). */
234 immediate
[r
] = rules
[r
].is_predicate
;
237 muscle_insert_unsigned_int_table ("rline", rline
, 0, 0, nrules
);
238 muscle_insert_symbol_number_table ("r1", r1
, 0, 0, nrules
);
239 muscle_insert_unsigned_int_table ("r2", r2
, 0, 0, nrules
);
240 muscle_insert_int_table ("dprec", dprec
, 0, 0, nrules
);
241 muscle_insert_int_table ("merger", merger
, 0, 0, nrules
);
242 muscle_insert_int_table ("immediate", immediate
, 0, 0, nrules
);
244 MUSCLE_INSERT_INT ("rules_number", nrules
);
245 MUSCLE_INSERT_INT ("max_left_semantic_context", max_left_semantic_context
);
255 /*--------------------------------------------.
256 | Prepare the muscles related to the states. |
257 `--------------------------------------------*/
260 prepare_states (void)
263 symbol_number
*values
= xnmalloc (nstates
, sizeof *values
);
264 for (i
= 0; i
< nstates
; ++i
)
265 values
[i
] = states
[i
]->accessing_symbol
;
266 muscle_insert_symbol_number_table ("stos", values
,
270 MUSCLE_INSERT_INT ("last", high
);
271 MUSCLE_INSERT_INT ("final_state_number", final_state
->number
);
272 MUSCLE_INSERT_INT ("states_number", nstates
);
276 /*-------------------------------------------------------.
277 | Compare two symbols by type-name, and then by number. |
278 `-------------------------------------------------------*/
281 symbol_type_name_cmp (const symbol
**lhs
, const symbol
**rhs
)
283 int res
= UNIQSTR_CMP((*lhs
)->type_name
, (*rhs
)->type_name
);
286 return (*lhs
)->number
- (*rhs
)->number
;
290 /*----------------------------------------------------------------.
291 | Return a (malloc'ed) table of the symbols sorted by type-name. |
292 `----------------------------------------------------------------*/
295 symbols_by_type_name (void)
297 typedef int (*qcmp_type
) (const void *, const void *);
298 symbol
**res
= xmemdup (symbols
, nsyms
* sizeof *res
);
299 qsort (res
, nsyms
, sizeof *res
, (qcmp_type
) &symbol_type_name_cmp
);
304 /*------------------------------------------------------------------.
305 | Define b4_type_names, which is a list of (lists of the numbers of |
306 | symbols with same type-name). |
307 `------------------------------------------------------------------*/
310 type_names_output (FILE *out
)
313 symbol
**syms
= symbols_by_type_name ();
314 fputs ("m4_define([b4_type_names],\n[", out
);
315 for (i
= 0; i
< nsyms
; /* nothing */)
317 // The index of the first symbol of the current type-name.
319 fputs (i
? ",\n[" : "[", out
);
320 for (; i
< nsyms
&& syms
[i
]->type_name
== syms
[i0
]->type_name
; ++i
)
321 fprintf (out
, "%s%d", i
!= i0
? ", " : "", syms
[i
]->number
);
324 fputs ("])\n\n", out
);
329 /*-------------------------------------.
330 | The list of all the symbol numbers. |
331 `-------------------------------------*/
334 symbol_numbers_output (FILE *out
)
337 fputs ("m4_define([b4_symbol_numbers],\n[", out
);
338 for (i
= 0; i
< nsyms
; ++i
)
339 fprintf (out
, "%s[%d]", i
? ", " : "", i
);
340 fputs ("])\n\n", out
);
344 /*---------------------------------.
345 | Output the user actions to OUT. |
346 `---------------------------------*/
349 user_actions_output (FILE *out
)
353 fputs ("m4_define([b4_actions], \n[", out
);
354 for (r
= 0; r
< nrules
; ++r
)
357 fprintf (out
, "b4_%scase(%d, [b4_syncline(%d, ",
358 rules
[r
].is_predicate
? "predicate_" : "",
359 r
+ 1, rules
[r
].action_location
.start
.line
);
360 escaped_output (out
, rules
[r
].action_location
.start
.file
);
361 fprintf (out
, ")\n[ %s]])\n\n", rules
[r
].action
);
363 fputs ("])\n\n", out
);
366 /*------------------------------------.
367 | Output the merge functions to OUT. |
368 `------------------------------------*/
371 merger_output (FILE *out
)
376 fputs ("m4_define([b4_mergers], \n[[", out
);
377 for (n
= 1, p
= merge_functions
; p
!= NULL
; n
+= 1, p
= p
->next
)
379 if (p
->type
[0] == '\0')
380 fprintf (out
, " case %d: *yy0 = %s (*yy0, *yy1); break;\n",
383 fprintf (out
, " case %d: yy0->%s = %s (*yy0, *yy1); break;\n",
384 n
, p
->type
, p
->name
);
386 fputs ("]])\n\n", out
);
390 /*---------------------------------------------.
391 | Prepare the muscles for symbol definitions. |
392 `---------------------------------------------*/
395 prepare_symbol_definitions (void)
398 for (i
= 0; i
< nsyms
; ++i
)
400 symbol
*sym
= symbols
[i
];
404 #define SET_KEY(Entry) \
405 obstack_fgrow2 (&format_obstack, "symbol(%d, %s)", i, Entry); \
406 obstack_1grow (&format_obstack, 0); \
407 key = obstack_finish (&format_obstack);
409 // Whether the symbol has an identifier.
410 value
= symbol_id_get (sym
);
412 MUSCLE_INSERT_INT (key
, !!value
);
416 MUSCLE_INSERT_STRING (key
, value
? value
: "");
418 // Its tag. Typically for documentation purpose.
420 MUSCLE_INSERT_STRING (key
, sym
->tag
);
422 SET_KEY("user_number");
423 MUSCLE_INSERT_INT (key
, sym
->user_token_number
);
426 MUSCLE_INSERT_INT (key
,
427 i
< ntokens
&& sym
!= errtoken
&& sym
!= undeftoken
);
430 MUSCLE_INSERT_INT (key
, sym
->number
);
433 MUSCLE_INSERT_INT (key
, !!sym
->type_name
);
436 MUSCLE_INSERT_STRING (key
, sym
->type_name
? sym
->type_name
: "");
438 #define CODE_PROP(PropName) \
440 code_props const *p = symbol_ ## PropName ## _get (sym); \
441 SET_KEY("has_" #PropName); \
442 MUSCLE_INSERT_INT (key, !!p->code); \
446 SET_KEY(#PropName "_file"); \
447 MUSCLE_INSERT_STRING (key, p->location.start.file); \
449 SET_KEY(#PropName "_line"); \
450 MUSCLE_INSERT_INT (key, p->location.start.line); \
452 SET_KEY(#PropName); \
453 MUSCLE_INSERT_STRING_RAW (key, p->code); \
457 CODE_PROP(destructor
);
465 /*--------------------------------------.
466 | Output the tokens definition to OUT. |
467 `--------------------------------------*/
470 token_definitions_output (FILE *out
)
473 char const *sep
= "";
475 fputs ("m4_define([b4_tokens], \n[", out
);
476 for (i
= 0; i
< ntokens
; ++i
)
478 symbol
*sym
= symbols
[i
];
479 int number
= sym
->user_token_number
;
480 uniqstr id
= symbol_id_get (sym
);
482 /* At this stage, if there are literal string aliases, they are
483 part of SYMBOLS, so we should not find their aliased symbols
485 aver (number
!= USER_NUMBER_HAS_STRING_ALIAS
);
487 /* Skip error token and tokens without identifier. */
488 if (sym
!= errtoken
&& id
)
490 fprintf (out
, "%s[[[%s]], %d]",
495 fputs ("])\n\n", out
);
500 prepare_actions (void)
502 /* Figure out the actions for the specified state, indexed by
503 lookahead token type. */
505 muscle_insert_rule_number_table ("defact", yydefact
,
506 yydefact
[0], 1, nstates
);
508 /* Figure out what to do after reducing with each rule, depending on
509 the saved state from before the beginning of parsing the data
510 that matched this rule. */
511 muscle_insert_state_number_table ("defgoto", yydefgoto
,
512 yydefgoto
[0], 1, nsyms
- ntokens
);
516 muscle_insert_base_table ("pact", base
,
517 base
[0], 1, nstates
);
518 MUSCLE_INSERT_INT ("pact_ninf", base_ninf
);
521 muscle_insert_base_table ("pgoto", base
,
522 base
[nstates
], nstates
+ 1, nvectors
);
524 muscle_insert_base_table ("table", table
,
525 table
[0], 1, high
+ 1);
526 MUSCLE_INSERT_INT ("table_ninf", table_ninf
);
528 muscle_insert_base_table ("check", check
,
529 check
[0], 1, high
+ 1);
531 /* GLR parsing slightly modifies YYTABLE and YYCHECK (and thus
532 YYPACT) so that in states with unresolved conflicts, the default
533 reduction is not used in the conflicted entries, so that there is
534 a place to put a conflict pointer.
536 This means that YYCONFLP and YYCONFL are nonsense for a non-GLR
537 parser, so we could avoid accidents by not writing them out in
538 that case. Nevertheless, it seems even better to be able to use
539 the GLR skeletons even without the non-deterministic tables. */
540 muscle_insert_unsigned_int_table ("conflict_list_heads", conflict_table
,
541 conflict_table
[0], 1, high
+ 1);
542 muscle_insert_unsigned_int_table ("conflicting_rules", conflict_list
,
543 0, 1, conflict_list_cnt
);
547 /*--------------------------------------------.
548 | Output the definitions of all the muscles. |
549 `--------------------------------------------*/
552 muscles_output (FILE *out
)
554 fputs ("m4_init()\n", out
);
556 symbol_numbers_output (out
);
557 token_definitions_output (out
);
558 type_names_output (out
);
559 user_actions_output (out
);
561 muscles_m4_output (out
);
564 /*---------------------------.
565 | Call the skeleton parser. |
566 `---------------------------*/
569 output_skeleton (void)
573 char const *argv
[10];
576 /* Compute the names of the package data dir and skeleton files. */
577 char const m4sugar
[] = "m4sugar/m4sugar.m4";
578 char const m4bison
[] = "bison.m4";
583 char const *m4
= (p
= getenv ("M4")) ? p
: M4
;
584 char const *pkgdatadir
= compute_pkgdatadir ();
585 size_t skeleton_size
= strlen (skeleton
) + 1;
586 size_t pkgdatadirlen
= strlen (pkgdatadir
);
587 while (pkgdatadirlen
&& pkgdatadir
[pkgdatadirlen
- 1] == '/')
589 full_skeleton
= xmalloc (pkgdatadirlen
+ 1
590 + (skeleton_size
< sizeof m4sugar
591 ? sizeof m4sugar
: skeleton_size
));
592 strncpy (full_skeleton
, pkgdatadir
, pkgdatadirlen
);
593 full_skeleton
[pkgdatadirlen
] = '/';
594 strcpy (full_skeleton
+ pkgdatadirlen
+ 1, m4sugar
);
595 full_m4sugar
= xstrdup (full_skeleton
);
596 strcpy (full_skeleton
+ pkgdatadirlen
+ 1, m4bison
);
597 full_m4bison
= xstrdup (full_skeleton
);
598 if (mbschr (skeleton
, '/'))
599 strcpy (full_skeleton
, skeleton
);
601 strcpy (full_skeleton
+ pkgdatadirlen
+ 1, skeleton
);
603 /* Test whether m4sugar.m4 is readable, to check for proper
604 installation. A faulty installation can cause deadlock, so a
605 cheap sanity check is worthwhile. */
606 xfclose (xfopen (full_m4sugar
, "r"));
608 /* Create an m4 subprocess connected to us via two pipes. */
610 if (trace_flag
& trace_tools
)
611 fprintf (stderr
, "running: %s %s - %s %s\n",
612 m4
, full_m4sugar
, full_m4bison
, full_skeleton
);
614 /* Some future version of GNU M4 (most likely 1.6) may treat the -dV in a
615 position-dependent manner. Keep it as the first argument so that all
618 See the thread starting at
619 <http://lists.gnu.org/archive/html/bug-bison/2008-07/msg00000.html>
625 /* When POSIXLY_CORRECT is set, GNU M4 1.6 and later disable GNU
626 extensions, which Bison's skeletons depend on. With older M4,
627 it has no effect. M4 1.4.12 added a -g/--gnu command-line
628 option to make it explicit that a program wants GNU M4
629 extensions even when POSIXLY_CORRECT is set.
631 See the thread starting at
632 <http://lists.gnu.org/archive/html/bug-bison/2008-07/msg00000.html>
635 argv
[i
++] = M4_GNU_OPTION
;
638 argv
[i
++] = pkgdatadir
;
639 if (trace_flag
& trace_m4
)
641 argv
[i
++] = full_m4sugar
;
643 argv
[i
++] = full_m4bison
;
644 argv
[i
++] = full_skeleton
;
646 aver (i
<= ARRAY_CARDINALITY (argv
));
649 /* The ugly cast is because gnulib gets the const-ness wrong. */
650 pid
= create_pipe_bidi ("m4", m4
, (char **)(void*)argv
, false, true,
654 free (full_skeleton
);
656 if (trace_flag
& trace_muscles
)
657 muscles_output (stderr
);
659 FILE *out
= fdopen (filter_fd
[1], "w");
661 error (EXIT_FAILURE
, get_errno (),
663 muscles_output (out
);
667 /* Read and process m4's output. */
668 timevar_push (TV_M4
);
669 in
= fdopen (filter_fd
[0], "r");
671 error (EXIT_FAILURE
, get_errno (),
674 /* scan_skel should have read all of M4's output. Otherwise, when we
675 close the pipe, we risk letting M4 report a broken-pipe to the
679 wait_subprocess (pid
, "m4", false, false, true, true, NULL
);
686 /* BISON_USE_PUSH_FOR_PULL is for the test suite and should not be documented
688 char const *use_push_for_pull_env
= getenv ("BISON_USE_PUSH_FOR_PULL");
689 bool use_push_for_pull_flag
= false;
690 if (use_push_for_pull_env
!= NULL
691 && use_push_for_pull_env
[0] != '\0'
692 && STRNEQ (use_push_for_pull_env
, "0"))
693 use_push_for_pull_flag
= true;
696 MUSCLE_INSERT_BOOL ("defines_flag", defines_flag
);
697 MUSCLE_INSERT_BOOL ("glr_flag", glr_parser
);
698 MUSCLE_INSERT_BOOL ("nondeterministic_flag", nondeterministic_parser
);
699 MUSCLE_INSERT_BOOL ("synclines_flag", !no_lines_flag
);
700 MUSCLE_INSERT_BOOL ("tag_seen_flag", tag_seen
);
701 MUSCLE_INSERT_BOOL ("use_push_for_pull_flag", use_push_for_pull_flag
);
702 MUSCLE_INSERT_BOOL ("yacc_flag", yacc_flag
);
705 if (spec_name_prefix
)
706 MUSCLE_INSERT_STRING ("prefix", spec_name_prefix
);
708 MUSCLE_INSERT_STRING ("file_name_all_but_ext", all_but_ext
);
710 #define DEFINE(Name) MUSCLE_INSERT_STRING (#Name, Name ? Name : "")
712 DEFINE (parser_file_name
);
713 DEFINE (spec_defines_file
);
714 DEFINE (spec_file_prefix
);
715 DEFINE (spec_graph_file
);
716 DEFINE (spec_name_prefix
);
717 DEFINE (spec_outfile
);
718 DEFINE (spec_verbose_file
);
721 /* Find the right skeleton file, and add muscles about the skeletons. */
723 MUSCLE_INSERT_C_STRING ("skeleton", skeleton
);
725 skeleton
= language
->skeleton
;
727 /* About the skeletons. */
729 /* b4_pkgdatadir is used inside m4_include in the skeletons, so digraphs
730 would never be expanded. Hopefully no one has M4-special characters in
731 his Bison installation path. */
732 MUSCLE_INSERT_STRING_RAW ("pkgdatadir", compute_pkgdatadir ());
737 /*----------------------------------------------------------.
738 | Output the parsing tables and the parser code to ftable. |
739 `----------------------------------------------------------*/
744 obstack_init (&format_obstack
);
750 prepare_symbol_definitions ();
754 /* Process the selected skeleton file. */
757 obstack_free (&format_obstack
, NULL
);
761 compute_pkgdatadir (void)
763 char const *pkgdatadir
= getenv ("BISON_PKGDATADIR");
764 return pkgdatadir
? pkgdatadir
: PKGDATADIR
;