1 /* Output the generated parsing program for Bison.
3 Copyright (C) 1984, 1986, 1989, 1992, 2000, 2001, 2002, 2003, 2004,
4 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
6 This file is part of Bison, the GNU Compiler Compiler.
8 This program is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
24 #include <configmake.h>
26 #include <get-errno.h>
35 #include "muscle_tab.h"
38 #include "scan-code.h" /* max_left_semantic_context */
39 #include "scan-skel.h"
44 static struct obstack format_obstack
;
47 /*-------------------------------------------------------------------.
48 | Create a function NAME which associates to the muscle NAME the |
49 | result of formatting the FIRST and then TABLE_DATA[BEGIN..END[ (of |
50 | TYPE), and to the muscle NAME_max, the max value of the |
52 `-------------------------------------------------------------------*/
55 #define GENERATE_MUSCLE_INSERT_TABLE(Name, Type) \
58 Name (char const *name, \
71 obstack_fgrow1 (&format_obstack, "%6d", first); \
72 for (i = begin; i < end; ++i) \
74 obstack_1grow (&format_obstack, ','); \
77 obstack_sgrow (&format_obstack, "\n "); \
82 obstack_fgrow1 (&format_obstack, "%6d", table_data[i]); \
83 if (table_data[i] < min) \
84 min = table_data[i]; \
85 if (max < table_data[i]) \
86 max = table_data[i]; \
88 obstack_1grow (&format_obstack, 0); \
89 muscle_insert (name, obstack_finish (&format_obstack)); \
93 /* Build `NAME_min' and `NAME_max' in the obstack. */ \
94 obstack_fgrow1 (&format_obstack, "%s_min", name); \
95 obstack_1grow (&format_obstack, 0); \
96 MUSCLE_INSERT_LONG_INT (obstack_finish (&format_obstack), lmin); \
97 obstack_fgrow1 (&format_obstack, "%s_max", name); \
98 obstack_1grow (&format_obstack, 0); \
99 MUSCLE_INSERT_LONG_INT (obstack_finish (&format_obstack), lmax); \
102 GENERATE_MUSCLE_INSERT_TABLE(muscle_insert_unsigned_int_table
, unsigned int)
103 GENERATE_MUSCLE_INSERT_TABLE(muscle_insert_int_table
, int)
104 GENERATE_MUSCLE_INSERT_TABLE(muscle_insert_base_table
, base_number
)
105 GENERATE_MUSCLE_INSERT_TABLE(muscle_insert_rule_number_table
, rule_number
)
106 GENERATE_MUSCLE_INSERT_TABLE(muscle_insert_symbol_number_table
, symbol_number
)
107 GENERATE_MUSCLE_INSERT_TABLE(muscle_insert_state_number_table
, state_number
)
110 /*--------------------------------------------------------------------.
111 | Print to OUT a representation of STRING escaped both for C and M4. |
112 `--------------------------------------------------------------------*/
115 escaped_output (FILE *out
, char const *string
)
120 for (p
= quotearg_style (c_quoting_style
, string
); *p
; p
++)
123 case '$': fputs ("$][", out
); break;
124 case '@': fputs ("@@", out
); break;
125 case '[': fputs ("@{", out
); break;
126 case ']': fputs ("@}", out
); break;
127 default: fputc (*p
, out
); break;
134 /*------------------------------------------------------------------.
135 | Prepare the muscles related to the symbols: translate, tname, and |
137 `------------------------------------------------------------------*/
140 prepare_symbols (void)
142 MUSCLE_INSERT_BOOL ("token_table", token_table_flag
);
143 MUSCLE_INSERT_INT ("tokens_number", ntokens
);
144 MUSCLE_INSERT_INT ("nterms_number", nvars
);
145 MUSCLE_INSERT_INT ("symbols_number", nsyms
);
146 MUSCLE_INSERT_INT ("undef_token_number", undeftoken
->number
);
147 MUSCLE_INSERT_INT ("user_token_number_max", max_user_token_number
);
149 muscle_insert_symbol_number_table ("translate",
151 token_translations
[0],
152 1, max_user_token_number
+ 1);
154 /* tname -- token names. */
157 /* We assume that the table will be output starting at column 2. */
159 struct quoting_options
*qo
= clone_quoting_options (0);
160 set_quoting_style (qo
, c_quoting_style
);
161 set_quoting_flags (qo
, QA_SPLIT_TRIGRAPHS
);
162 for (i
= 0; i
< nsyms
; i
++)
164 char *cp
= quotearg_alloc (symbols
[i
]->tag
, -1, qo
);
165 /* Width of the next token, including the two quotes, the
166 comma and the space. */
167 int width
= strlen (cp
) + 2;
171 obstack_sgrow (&format_obstack
, "\n ");
176 obstack_1grow (&format_obstack
, ' ');
177 MUSCLE_OBSTACK_SGROW (&format_obstack
, cp
);
179 obstack_1grow (&format_obstack
, ',');
183 obstack_sgrow (&format_obstack
, " ]b4_null[");
185 /* Finish table and store. */
186 obstack_1grow (&format_obstack
, 0);
187 muscle_insert ("tname", obstack_finish (&format_obstack
));
190 /* Output YYTOKNUM. */
193 int *values
= xnmalloc (ntokens
, sizeof *values
);
194 for (i
= 0; i
< ntokens
; ++i
)
195 values
[i
] = symbols
[i
]->user_token_number
;
196 muscle_insert_int_table ("toknum", values
,
197 values
[0], 1, ntokens
);
203 /*----------------------------------------------------------------.
204 | Prepare the muscles related to the rules: r1, r2, rline, dprec, |
206 `----------------------------------------------------------------*/
211 unsigned int *rline
= xnmalloc (nrules
, sizeof *rline
);
212 symbol_number
*r1
= xnmalloc (nrules
, sizeof *r1
);
213 unsigned int *r2
= xnmalloc (nrules
, sizeof *r2
);
214 int *dprec
= xnmalloc (nrules
, sizeof *dprec
);
215 int *merger
= xnmalloc (nrules
, sizeof *merger
);
218 for (r
= 0; r
< nrules
; ++r
)
220 /* LHS of the rule R. */
221 r1
[r
] = rules
[r
].lhs
->number
;
222 /* Length of rule R's RHS. */
223 r2
[r
] = rule_rhs_length(&rules
[r
]);
224 /* Line where rule was defined. */
225 rline
[r
] = rules
[r
].location
.start
.line
;
226 /* Dynamic precedence (GLR). */
227 dprec
[r
] = rules
[r
].dprec
;
228 /* Merger-function index (GLR). */
229 merger
[r
] = rules
[r
].merger
;
232 muscle_insert_unsigned_int_table ("rline", rline
, 0, 0, nrules
);
233 muscle_insert_symbol_number_table ("r1", r1
, 0, 0, nrules
);
234 muscle_insert_unsigned_int_table ("r2", r2
, 0, 0, nrules
);
235 muscle_insert_int_table ("dprec", dprec
, 0, 0, nrules
);
236 muscle_insert_int_table ("merger", merger
, 0, 0, nrules
);
238 MUSCLE_INSERT_INT ("rules_number", nrules
);
239 MUSCLE_INSERT_INT ("max_left_semantic_context", max_left_semantic_context
);
248 /*--------------------------------------------.
249 | Prepare the muscles related to the states. |
250 `--------------------------------------------*/
253 prepare_states (void)
256 symbol_number
*values
= xnmalloc (nstates
, sizeof *values
);
257 for (i
= 0; i
< nstates
; ++i
)
258 values
[i
] = states
[i
]->accessing_symbol
;
259 muscle_insert_symbol_number_table ("stos", values
,
263 MUSCLE_INSERT_INT ("last", high
);
264 MUSCLE_INSERT_INT ("final_state_number", final_state
->number
);
265 MUSCLE_INSERT_INT ("states_number", nstates
);
269 /*-------------------------------------------------------.
270 | Compare two symbols by type-name, and then by number. |
271 `-------------------------------------------------------*/
274 symbol_type_name_cmp (const symbol
**lhs
, const symbol
**rhs
)
276 int res
= UNIQSTR_CMP((*lhs
)->type_name
, (*rhs
)->type_name
);
279 return (*lhs
)->number
- (*rhs
)->number
;
283 /*----------------------------------------------------------------.
284 | Return a (malloc'ed) table of the symbols sorted by type-name. |
285 `----------------------------------------------------------------*/
288 symbols_by_type_name (void)
290 typedef int (*qcmp_type
) (const void *, const void *);
291 symbol
**res
= xmemdup (symbols
, nsyms
* sizeof *res
);
292 qsort (res
, nsyms
, sizeof *res
, (qcmp_type
) &symbol_type_name_cmp
);
297 /*------------------------------------------------------------------.
298 | Define b4_type_names, which is a list of (lists of the numbers of |
299 | symbols with same type-name). |
300 `------------------------------------------------------------------*/
303 type_names_output (FILE *out
)
306 symbol
**syms
= symbols_by_type_name ();
307 fputs ("m4_define([b4_type_names],\n[", out
);
308 for (i
= 0; i
< nsyms
; /* nothing */)
310 // The index of the first symbol of the current type-name.
312 fputs (i
? ",\n[" : "[", out
);
313 for (; i
< nsyms
&& syms
[i
]->type_name
== syms
[i0
]->type_name
; ++i
)
314 fprintf (out
, "%s%d", i
!= i0
? ", " : "", syms
[i
]->number
);
317 fputs ("])\n\n", out
);
322 /*-------------------------------------.
323 | The list of all the symbol numbers. |
324 `-------------------------------------*/
327 symbol_numbers_output (FILE *out
)
330 fputs ("m4_define([b4_symbol_numbers],\n[", out
);
331 for (i
= 0; i
< nsyms
; ++i
)
332 fprintf (out
, "%s[%d]", i
? ", " : "", i
);
333 fputs ("])\n\n", out
);
337 /*---------------------------------.
338 | Output the user actions to OUT. |
339 `---------------------------------*/
342 user_actions_output (FILE *out
)
346 fputs ("m4_define([b4_actions], \n[", out
);
347 for (r
= 0; r
< nrules
; ++r
)
350 fprintf (out
, "b4_case(%d, [b4_syncline(%d, ", r
+ 1,
351 rules
[r
].action_location
.start
.line
);
352 escaped_output (out
, rules
[r
].action_location
.start
.file
);
353 fprintf (out
, ")\n[ %s]])\n\n", rules
[r
].action
);
355 fputs ("])\n\n", out
);
358 /*------------------------------------.
359 | Output the merge functions to OUT. |
360 `------------------------------------*/
363 merger_output (FILE *out
)
368 fputs ("m4_define([b4_mergers], \n[[", out
);
369 for (n
= 1, p
= merge_functions
; p
!= NULL
; n
+= 1, p
= p
->next
)
371 if (p
->type
[0] == '\0')
372 fprintf (out
, " case %d: *yy0 = %s (*yy0, *yy1); break;\n",
375 fprintf (out
, " case %d: yy0->%s = %s (*yy0, *yy1); break;\n",
376 n
, p
->type
, p
->name
);
378 fputs ("]])\n\n", out
);
382 /*---------------------------------------------.
383 | Prepare the muscles for symbol definitions. |
384 `---------------------------------------------*/
387 prepare_symbol_definitions (void)
390 for (i
= 0; i
< nsyms
; ++i
)
392 symbol
*sym
= symbols
[i
];
396 #define SET_KEY(Entry) \
397 obstack_fgrow2 (&format_obstack, "symbol(%d, %s)", i, Entry); \
398 obstack_1grow (&format_obstack, 0); \
399 key = obstack_finish (&format_obstack);
401 // Whether the symbol has an identifier.
402 value
= symbol_id_get (sym
);
404 MUSCLE_INSERT_INT (key
, !!value
);
408 MUSCLE_INSERT_STRING (key
, value
? value
: "");
410 // Its tag. Typically for documentation purpose.
412 MUSCLE_INSERT_STRING (key
, sym
->tag
);
414 SET_KEY("user_number");
415 MUSCLE_INSERT_INT (key
, sym
->user_token_number
);
418 MUSCLE_INSERT_INT (key
,
419 i
< ntokens
&& sym
!= errtoken
&& sym
!= undeftoken
);
422 MUSCLE_INSERT_INT (key
, sym
->number
);
425 MUSCLE_INSERT_INT (key
, !!sym
->type_name
);
428 MUSCLE_INSERT_STRING (key
, sym
->type_name
? sym
->type_name
: "");
430 #define CODE_PROP(PropName) \
432 code_props const *p = symbol_ ## PropName ## _get (sym); \
433 SET_KEY("has_" #PropName); \
434 MUSCLE_INSERT_INT (key, !!p->code); \
438 SET_KEY(#PropName "_file"); \
439 MUSCLE_INSERT_STRING (key, p->location.start.file); \
441 SET_KEY(#PropName "_line"); \
442 MUSCLE_INSERT_INT (key, p->location.start.line); \
444 SET_KEY(#PropName); \
445 MUSCLE_INSERT_STRING_RAW (key, p->code); \
449 CODE_PROP(destructor
);
457 /*--------------------------------------.
458 | Output the tokens definition to OUT. |
459 `--------------------------------------*/
462 token_definitions_output (FILE *out
)
465 char const *sep
= "";
467 fputs ("m4_define([b4_tokens], \n[", out
);
468 for (i
= 0; i
< ntokens
; ++i
)
470 symbol
*sym
= symbols
[i
];
471 int number
= sym
->user_token_number
;
473 /* At this stage, if there are literal aliases, they are part of
474 SYMBOLS, so we should not find symbols which are the aliases
476 aver (number
!= USER_NUMBER_ALIAS
);
478 /* Skip error token. */
482 /* If this string has an alias, then it is necessarily the alias
483 which is to be output. */
487 /* Don't output literal chars or strings (when defined only as a
488 string). Note that must be done after the alias resolution:
489 think about `%token 'f' "f"'. */
490 if (sym
->tag
[0] == '\'' || sym
->tag
[0] == '\"')
493 /* Don't #define nonliteral tokens whose names contain periods
494 or '$' (as does the default value of the EOF token). */
495 if (strchr (sym
->tag
, '.') || strchr (sym
->tag
, '$'))
498 fprintf (out
, "%s[[[%s]], %d]",
499 sep
, sym
->tag
, number
);
502 fputs ("])\n\n", out
);
507 prepare_actions (void)
509 /* Figure out the actions for the specified state, indexed by
510 lookahead token type. */
512 muscle_insert_rule_number_table ("defact", yydefact
,
513 yydefact
[0], 1, nstates
);
515 /* Figure out what to do after reducing with each rule, depending on
516 the saved state from before the beginning of parsing the data
517 that matched this rule. */
518 muscle_insert_state_number_table ("defgoto", yydefgoto
,
519 yydefgoto
[0], 1, nsyms
- ntokens
);
523 muscle_insert_base_table ("pact", base
,
524 base
[0], 1, nstates
);
525 MUSCLE_INSERT_INT ("pact_ninf", base_ninf
);
528 muscle_insert_base_table ("pgoto", base
,
529 base
[nstates
], nstates
+ 1, nvectors
);
531 muscle_insert_base_table ("table", table
,
532 table
[0], 1, high
+ 1);
533 MUSCLE_INSERT_INT ("table_ninf", table_ninf
);
535 muscle_insert_base_table ("check", check
,
536 check
[0], 1, high
+ 1);
538 /* GLR parsing slightly modifies YYTABLE and YYCHECK (and thus
539 YYPACT) so that in states with unresolved conflicts, the default
540 reduction is not used in the conflicted entries, so that there is
541 a place to put a conflict pointer.
543 This means that YYCONFLP and YYCONFL are nonsense for a non-GLR
544 parser, so we could avoid accidents by not writing them out in
545 that case. Nevertheless, it seems even better to be able to use
546 the GLR skeletons even without the non-deterministic tables. */
547 muscle_insert_unsigned_int_table ("conflict_list_heads", conflict_table
,
548 conflict_table
[0], 1, high
+ 1);
549 muscle_insert_unsigned_int_table ("conflicting_rules", conflict_list
,
550 0, 1, conflict_list_cnt
);
554 /*--------------------------------------------.
555 | Output the definitions of all the muscles. |
556 `--------------------------------------------*/
559 muscles_output (FILE *out
)
561 fputs ("m4_init()\n", out
);
563 symbol_numbers_output (out
);
564 token_definitions_output (out
);
565 type_names_output (out
);
566 user_actions_output (out
);
568 muscles_m4_output (out
);
571 /*---------------------------.
572 | Call the skeleton parser. |
573 `---------------------------*/
576 output_skeleton (void)
583 /* Compute the names of the package data dir and skeleton files. */
584 char const m4sugar
[] = "m4sugar/m4sugar.m4";
585 char const m4bison
[] = "bison.m4";
590 char const *m4
= (p
= getenv ("M4")) ? p
: M4
;
591 char const *pkgdatadir
= compute_pkgdatadir ();
592 size_t skeleton_size
= strlen (skeleton
) + 1;
593 size_t pkgdatadirlen
= strlen (pkgdatadir
);
594 while (pkgdatadirlen
&& pkgdatadir
[pkgdatadirlen
- 1] == '/')
596 full_skeleton
= xmalloc (pkgdatadirlen
+ 1
597 + (skeleton_size
< sizeof m4sugar
598 ? sizeof m4sugar
: skeleton_size
));
599 strncpy (full_skeleton
, pkgdatadir
, pkgdatadirlen
);
600 full_skeleton
[pkgdatadirlen
] = '/';
601 strcpy (full_skeleton
+ pkgdatadirlen
+ 1, m4sugar
);
602 full_m4sugar
= xstrdup (full_skeleton
);
603 strcpy (full_skeleton
+ pkgdatadirlen
+ 1, m4bison
);
604 full_m4bison
= xstrdup (full_skeleton
);
605 if (strchr (skeleton
, '/'))
606 strcpy (full_skeleton
, skeleton
);
608 strcpy (full_skeleton
+ pkgdatadirlen
+ 1, skeleton
);
610 /* Test whether m4sugar.m4 is readable, to check for proper
611 installation. A faulty installation can cause deadlock, so a
612 cheap sanity check is worthwhile. */
613 xfclose (xfopen (full_m4sugar
, "r"));
615 /* Create an m4 subprocess connected to us via two pipes. */
617 if (trace_flag
& trace_tools
)
618 fprintf (stderr
, "running: %s %s - %s %s\n",
619 m4
, full_m4sugar
, full_m4bison
, full_skeleton
);
621 /* Some future version of GNU M4 (most likely 1.6) may treat the -dV in a
622 position-dependent manner. Keep it as the first argument so that all
625 See the thread starting at
626 <http://lists.gnu.org/archive/html/bug-bison/2008-07/msg00000.html>
632 argv
[i
++] = pkgdatadir
;
633 if (trace_flag
& trace_m4
)
635 argv
[i
++] = full_m4sugar
;
637 argv
[i
++] = full_m4bison
;
638 argv
[i
++] = full_skeleton
;
641 /* When POSIXLY_CORRECT is set, some future versions of GNU M4 (most likely
642 2.0) may drop some of the GNU extensions that Bison's skeletons depend
643 upon. So that the next release of Bison is forward compatible with those
644 future versions of GNU M4, we unset POSIXLY_CORRECT here.
646 FIXME: A user might set POSIXLY_CORRECT to affect processes run from
647 macros like m4_syscmd in a custom skeleton. For now, Bison makes no
648 promises about the behavior of custom skeletons, so this scenario is not a
649 concern. However, we eventually want to eliminate this shortcoming. The
650 next release of GNU M4 (1.4.12 or 1.6) will accept the -g command-line
651 option as a no-op, and later releases will accept it to indicate that
652 POSIXLY_CORRECT should be ignored. Once the GNU M4 versions that accept
653 -g are pervasive, Bison should use -g instead of unsetting
656 See the thread starting at
657 <http://lists.gnu.org/archive/html/bug-bison/2008-07/msg00000.html>
659 unsetenv ("POSIXLY_CORRECT");
661 pid
= create_subpipe (argv
, filter_fd
);
664 free (full_skeleton
);
666 if (trace_flag
& trace_muscles
)
667 muscles_output (stderr
);
669 FILE *out
= fdopen (filter_fd
[0], "w");
671 error (EXIT_FAILURE
, get_errno (),
673 muscles_output (out
);
677 /* Read and process m4's output. */
678 timevar_push (TV_M4
);
679 end_of_output_subpipe (pid
, filter_fd
);
680 in
= fdopen (filter_fd
[1], "r");
682 error (EXIT_FAILURE
, get_errno (),
686 reap_subpipe (pid
, m4
);
693 /* BISON_USE_PUSH_FOR_PULL is for the test suite and should not be documented
695 char const *use_push_for_pull_env
= getenv ("BISON_USE_PUSH_FOR_PULL");
696 bool use_push_for_pull_flag
= false;
697 if (use_push_for_pull_env
!= NULL
698 && use_push_for_pull_env
[0] != '\0'
699 && 0 != strcmp (use_push_for_pull_env
, "0"))
700 use_push_for_pull_flag
= true;
703 MUSCLE_INSERT_BOOL ("debug_flag", debug_flag
);
704 MUSCLE_INSERT_BOOL ("defines_flag", defines_flag
);
705 MUSCLE_INSERT_BOOL ("glr_flag", glr_parser
);
706 MUSCLE_INSERT_BOOL ("locations_flag", locations_flag
);
707 MUSCLE_INSERT_BOOL ("nondeterministic_flag", nondeterministic_parser
);
708 MUSCLE_INSERT_BOOL ("synclines_flag", !no_lines_flag
);
709 MUSCLE_INSERT_BOOL ("tag_seen_flag", tag_seen
);
710 MUSCLE_INSERT_BOOL ("use_push_for_pull_flag", use_push_for_pull_flag
);
711 MUSCLE_INSERT_BOOL ("yacc_flag", yacc_flag
);
714 if (spec_name_prefix
)
715 MUSCLE_INSERT_STRING ("prefix", spec_name_prefix
);
717 MUSCLE_INSERT_STRING ("file_name_all_but_ext", all_but_ext
);
719 #define DEFINE(Name) MUSCLE_INSERT_STRING (#Name, Name ? Name : "")
721 DEFINE (parser_file_name
);
722 DEFINE (spec_defines_file
);
723 DEFINE (spec_file_prefix
);
724 DEFINE (spec_graph_file
);
725 DEFINE (spec_name_prefix
);
726 DEFINE (spec_outfile
);
727 DEFINE (spec_verbose_file
);
730 /* Find the right skeleton file, and add muscles about the skeletons. */
732 MUSCLE_INSERT_C_STRING ("skeleton", skeleton
);
734 skeleton
= language
->skeleton
;
736 /* About the skeletons. */
738 /* b4_pkgdatadir is used inside m4_include in the skeletons, so digraphs
739 would never be expanded. Hopefully no one has M4-special characters in
740 his Bison installation path. */
741 MUSCLE_INSERT_STRING_RAW ("pkgdatadir", compute_pkgdatadir ());
746 /*----------------------------------------------------------.
747 | Output the parsing tables and the parser code to ftable. |
748 `----------------------------------------------------------*/
753 obstack_init (&format_obstack
);
759 prepare_symbol_definitions ();
763 /* Process the selected skeleton file. */
766 obstack_free (&format_obstack
, NULL
);
770 compute_pkgdatadir (void)
772 char const *pkgdatadir
= getenv ("BISON_PKGDATADIR");
773 return pkgdatadir
? pkgdatadir
: PKGDATADIR
;