1 /* Output the generated parsing program for Bison.
3 Copyright (C) 1984, 1986, 1989, 1992, 2000, 2001, 2002, 2003, 2004,
4 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
6 This file is part of Bison, the GNU Compiler Compiler.
8 This program is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
24 #include <configmake.h>
26 #include <get-errno.h>
35 #include "muscle-tab.h"
38 #include "scan-code.h" /* max_left_semantic_context */
39 #include "scan-skel.h"
44 static struct obstack format_obstack
;
47 /*-------------------------------------------------------------------.
48 | Create a function NAME which associates to the muscle NAME the |
49 | result of formatting the FIRST and then TABLE_DATA[BEGIN..END[ (of |
50 | TYPE), and to the muscle NAME_max, the max value of the |
52 `-------------------------------------------------------------------*/
55 #define GENERATE_MUSCLE_INSERT_TABLE(Name, Type) \
58 Name (char const *name, \
71 obstack_fgrow1 (&format_obstack, "%6d", first); \
72 for (i = begin; i < end; ++i) \
74 obstack_1grow (&format_obstack, ','); \
77 obstack_sgrow (&format_obstack, "\n "); \
82 obstack_fgrow1 (&format_obstack, "%6d", table_data[i]); \
83 if (table_data[i] < min) \
84 min = table_data[i]; \
85 if (max < table_data[i]) \
86 max = table_data[i]; \
88 obstack_1grow (&format_obstack, 0); \
89 muscle_insert (name, obstack_finish (&format_obstack)); \
93 /* Build `NAME_min' and `NAME_max' in the obstack. */ \
94 obstack_fgrow1 (&format_obstack, "%s_min", name); \
95 obstack_1grow (&format_obstack, 0); \
96 MUSCLE_INSERT_LONG_INT (obstack_finish (&format_obstack), lmin); \
97 obstack_fgrow1 (&format_obstack, "%s_max", name); \
98 obstack_1grow (&format_obstack, 0); \
99 MUSCLE_INSERT_LONG_INT (obstack_finish (&format_obstack), lmax); \
102 GENERATE_MUSCLE_INSERT_TABLE(muscle_insert_unsigned_int_table
, unsigned int)
103 GENERATE_MUSCLE_INSERT_TABLE(muscle_insert_int_table
, int)
104 GENERATE_MUSCLE_INSERT_TABLE(muscle_insert_base_table
, base_number
)
105 GENERATE_MUSCLE_INSERT_TABLE(muscle_insert_rule_number_table
, rule_number
)
106 GENERATE_MUSCLE_INSERT_TABLE(muscle_insert_symbol_number_table
, symbol_number
)
107 GENERATE_MUSCLE_INSERT_TABLE(muscle_insert_state_number_table
, state_number
)
110 /*--------------------------------------------------------------------.
111 | Print to OUT a representation of STRING escaped both for C and M4. |
112 `--------------------------------------------------------------------*/
115 escaped_output (FILE *out
, char const *string
)
120 for (p
= quotearg_style (c_quoting_style
, string
); *p
; p
++)
123 case '$': fputs ("$][", out
); break;
124 case '@': fputs ("@@", out
); break;
125 case '[': fputs ("@{", out
); break;
126 case ']': fputs ("@}", out
); break;
127 default: fputc (*p
, out
); break;
134 /*------------------------------------------------------------------.
135 | Prepare the muscles related to the symbols: translate, tname, and |
137 `------------------------------------------------------------------*/
140 prepare_symbols (void)
142 MUSCLE_INSERT_BOOL ("token_table", token_table_flag
);
143 MUSCLE_INSERT_INT ("tokens_number", ntokens
);
144 MUSCLE_INSERT_INT ("nterms_number", nvars
);
145 MUSCLE_INSERT_INT ("symbols_number", nsyms
);
146 MUSCLE_INSERT_INT ("undef_token_number", undeftoken
->number
);
147 MUSCLE_INSERT_INT ("user_token_number_max", max_user_token_number
);
149 muscle_insert_symbol_number_table ("translate",
151 token_translations
[0],
152 1, max_user_token_number
+ 1);
154 /* tname -- token names. */
157 /* We assume that the table will be output starting at column 2. */
159 struct quoting_options
*qo
= clone_quoting_options (0);
160 set_quoting_style (qo
, c_quoting_style
);
161 set_quoting_flags (qo
, QA_SPLIT_TRIGRAPHS
);
162 for (i
= 0; i
< nsyms
; i
++)
164 char *cp
= quotearg_alloc (symbols
[i
]->tag
, -1, qo
);
165 /* Width of the next token, including the two quotes, the
166 comma and the space. */
167 int width
= strlen (cp
) + 2;
171 obstack_sgrow (&format_obstack
, "\n ");
176 obstack_1grow (&format_obstack
, ' ');
177 MUSCLE_OBSTACK_SGROW (&format_obstack
, cp
);
179 obstack_1grow (&format_obstack
, ',');
183 obstack_sgrow (&format_obstack
, " ]b4_null[");
185 /* Finish table and store. */
186 obstack_1grow (&format_obstack
, 0);
187 muscle_insert ("tname", obstack_finish (&format_obstack
));
190 /* Output YYTOKNUM. */
193 int *values
= xnmalloc (ntokens
, sizeof *values
);
194 for (i
= 0; i
< ntokens
; ++i
)
195 values
[i
] = symbols
[i
]->user_token_number
;
196 muscle_insert_int_table ("toknum", values
,
197 values
[0], 1, ntokens
);
203 /*----------------------------------------------------------------.
204 | Prepare the muscles related to the rules: r1, r2, rline, dprec, |
206 `----------------------------------------------------------------*/
211 unsigned int *rline
= xnmalloc (nrules
, sizeof *rline
);
212 symbol_number
*r1
= xnmalloc (nrules
, sizeof *r1
);
213 unsigned int *r2
= xnmalloc (nrules
, sizeof *r2
);
214 int *dprec
= xnmalloc (nrules
, sizeof *dprec
);
215 int *merger
= xnmalloc (nrules
, sizeof *merger
);
218 for (r
= 0; r
< nrules
; ++r
)
220 /* LHS of the rule R. */
221 r1
[r
] = rules
[r
].lhs
->number
;
222 /* Length of rule R's RHS. */
223 r2
[r
] = rule_rhs_length(&rules
[r
]);
224 /* Line where rule was defined. */
225 rline
[r
] = rules
[r
].location
.start
.line
;
226 /* Dynamic precedence (GLR). */
227 dprec
[r
] = rules
[r
].dprec
;
228 /* Merger-function index (GLR). */
229 merger
[r
] = rules
[r
].merger
;
232 muscle_insert_unsigned_int_table ("rline", rline
, 0, 0, nrules
);
233 muscle_insert_symbol_number_table ("r1", r1
, 0, 0, nrules
);
234 muscle_insert_unsigned_int_table ("r2", r2
, 0, 0, nrules
);
235 muscle_insert_int_table ("dprec", dprec
, 0, 0, nrules
);
236 muscle_insert_int_table ("merger", merger
, 0, 0, nrules
);
238 MUSCLE_INSERT_INT ("rules_number", nrules
);
239 MUSCLE_INSERT_INT ("max_left_semantic_context", max_left_semantic_context
);
248 /*--------------------------------------------.
249 | Prepare the muscles related to the states. |
250 `--------------------------------------------*/
253 prepare_states (void)
256 symbol_number
*values
= xnmalloc (nstates
, sizeof *values
);
257 for (i
= 0; i
< nstates
; ++i
)
258 values
[i
] = states
[i
]->accessing_symbol
;
259 muscle_insert_symbol_number_table ("stos", values
,
263 MUSCLE_INSERT_INT ("last", high
);
264 MUSCLE_INSERT_INT ("final_state_number", final_state
->number
);
265 MUSCLE_INSERT_INT ("states_number", nstates
);
269 /*-------------------------------------------------------.
270 | Compare two symbols by type-name, and then by number. |
271 `-------------------------------------------------------*/
274 symbol_type_name_cmp (const symbol
**lhs
, const symbol
**rhs
)
276 int res
= UNIQSTR_CMP((*lhs
)->type_name
, (*rhs
)->type_name
);
279 return (*lhs
)->number
- (*rhs
)->number
;
283 /*----------------------------------------------------------------.
284 | Return a (malloc'ed) table of the symbols sorted by type-name. |
285 `----------------------------------------------------------------*/
288 symbols_by_type_name (void)
290 typedef int (*qcmp_type
) (const void *, const void *);
291 symbol
**res
= xmemdup (symbols
, nsyms
* sizeof *res
);
292 qsort (res
, nsyms
, sizeof *res
, (qcmp_type
) &symbol_type_name_cmp
);
297 /*------------------------------------------------------------------.
298 | Define b4_type_names, which is a list of (lists of the numbers of |
299 | symbols with same type-name). |
300 `------------------------------------------------------------------*/
303 type_names_output (FILE *out
)
306 symbol
**syms
= symbols_by_type_name ();
307 fputs ("m4_define([b4_type_names],\n[", out
);
308 for (i
= 0; i
< nsyms
; /* nothing */)
310 // The index of the first symbol of the current type-name.
312 fputs (i
? ",\n[" : "[", out
);
313 for (; i
< nsyms
&& syms
[i
]->type_name
== syms
[i0
]->type_name
; ++i
)
314 fprintf (out
, "%s%d", i
!= i0
? ", " : "", syms
[i
]->number
);
317 fputs ("])\n\n", out
);
322 /*-------------------------------------.
323 | The list of all the symbol numbers. |
324 `-------------------------------------*/
327 symbol_numbers_output (FILE *out
)
330 fputs ("m4_define([b4_symbol_numbers],\n[", out
);
331 for (i
= 0; i
< nsyms
; ++i
)
332 fprintf (out
, "%s[%d]", i
? ", " : "", i
);
333 fputs ("])\n\n", out
);
337 /*---------------------------------.
338 | Output the user actions to OUT. |
339 `---------------------------------*/
342 user_actions_output (FILE *out
)
346 fputs ("m4_define([b4_actions], \n[", out
);
347 for (r
= 0; r
< nrules
; ++r
)
350 fprintf (out
, "b4_case(%d, [b4_syncline(%d, ", r
+ 1,
351 rules
[r
].action_location
.start
.line
);
352 escaped_output (out
, rules
[r
].action_location
.start
.file
);
353 fprintf (out
, ")\n[ %s]])\n\n", rules
[r
].action
);
355 fputs ("])\n\n", out
);
358 /*------------------------------------.
359 | Output the merge functions to OUT. |
360 `------------------------------------*/
363 merger_output (FILE *out
)
368 fputs ("m4_define([b4_mergers], \n[[", out
);
369 for (n
= 1, p
= merge_functions
; p
!= NULL
; n
+= 1, p
= p
->next
)
371 if (p
->type
[0] == '\0')
372 fprintf (out
, " case %d: *yy0 = %s (*yy0, *yy1); break;\n",
375 fprintf (out
, " case %d: yy0->%s = %s (*yy0, *yy1); break;\n",
376 n
, p
->type
, p
->name
);
378 fputs ("]])\n\n", out
);
382 /*---------------------------------------------.
383 | Prepare the muscles for symbol definitions. |
384 `---------------------------------------------*/
387 prepare_symbol_definitions (void)
390 for (i
= 0; i
< nsyms
; ++i
)
392 symbol
*sym
= symbols
[i
];
396 #define SET_KEY(Entry) \
397 obstack_fgrow2 (&format_obstack, "symbol(%d, %s)", i, Entry); \
398 obstack_1grow (&format_obstack, 0); \
399 key = obstack_finish (&format_obstack);
401 // Whether the symbol has an identifier.
402 value
= symbol_id_get (sym
);
404 MUSCLE_INSERT_INT (key
, !!value
);
408 MUSCLE_INSERT_STRING (key
, value
? value
: "");
410 // Its tag. Typically for documentation purpose.
412 MUSCLE_INSERT_STRING (key
, sym
->tag
);
414 SET_KEY("user_number");
415 MUSCLE_INSERT_INT (key
, sym
->user_token_number
);
418 MUSCLE_INSERT_INT (key
,
419 i
< ntokens
&& sym
!= errtoken
&& sym
!= undeftoken
);
422 MUSCLE_INSERT_INT (key
, sym
->number
);
425 MUSCLE_INSERT_INT (key
, !!sym
->type_name
);
428 MUSCLE_INSERT_STRING (key
, sym
->type_name
? sym
->type_name
: "");
430 #define CODE_PROP(PropName) \
432 code_props const *p = symbol_ ## PropName ## _get (sym); \
433 SET_KEY("has_" #PropName); \
434 MUSCLE_INSERT_INT (key, !!p->code); \
438 SET_KEY(#PropName "_file"); \
439 MUSCLE_INSERT_STRING (key, p->location.start.file); \
441 SET_KEY(#PropName "_line"); \
442 MUSCLE_INSERT_INT (key, p->location.start.line); \
444 SET_KEY(#PropName); \
445 MUSCLE_INSERT_STRING_RAW (key, p->code); \
449 CODE_PROP(destructor
);
457 /*--------------------------------------.
458 | Output the tokens definition to OUT. |
459 `--------------------------------------*/
462 token_definitions_output (FILE *out
)
465 char const *sep
= "";
467 fputs ("m4_define([b4_tokens], \n[", out
);
468 for (i
= 0; i
< ntokens
; ++i
)
470 symbol
*sym
= symbols
[i
];
471 int number
= sym
->user_token_number
;
472 uniqstr id
= symbol_id_get (sym
);
474 /* At this stage, if there are literal aliases, they are part of
475 SYMBOLS, so we should not find symbols which are the aliases
477 aver (number
!= USER_NUMBER_ALIAS
);
479 /* Skip error token and tokens without identifier. */
480 if (sym
!= errtoken
&& id
)
482 fprintf (out
, "%s[[[%s]], %d]",
487 fputs ("])\n\n", out
);
492 prepare_actions (void)
494 /* Figure out the actions for the specified state, indexed by
495 lookahead token type. */
497 muscle_insert_rule_number_table ("defact", yydefact
,
498 yydefact
[0], 1, nstates
);
500 /* Figure out what to do after reducing with each rule, depending on
501 the saved state from before the beginning of parsing the data
502 that matched this rule. */
503 muscle_insert_state_number_table ("defgoto", yydefgoto
,
504 yydefgoto
[0], 1, nsyms
- ntokens
);
508 muscle_insert_base_table ("pact", base
,
509 base
[0], 1, nstates
);
510 MUSCLE_INSERT_INT ("pact_ninf", base_ninf
);
513 muscle_insert_base_table ("pgoto", base
,
514 base
[nstates
], nstates
+ 1, nvectors
);
516 muscle_insert_base_table ("table", table
,
517 table
[0], 1, high
+ 1);
518 MUSCLE_INSERT_INT ("table_ninf", table_ninf
);
520 muscle_insert_base_table ("check", check
,
521 check
[0], 1, high
+ 1);
523 /* GLR parsing slightly modifies YYTABLE and YYCHECK (and thus
524 YYPACT) so that in states with unresolved conflicts, the default
525 reduction is not used in the conflicted entries, so that there is
526 a place to put a conflict pointer.
528 This means that YYCONFLP and YYCONFL are nonsense for a non-GLR
529 parser, so we could avoid accidents by not writing them out in
530 that case. Nevertheless, it seems even better to be able to use
531 the GLR skeletons even without the non-deterministic tables. */
532 muscle_insert_unsigned_int_table ("conflict_list_heads", conflict_table
,
533 conflict_table
[0], 1, high
+ 1);
534 muscle_insert_unsigned_int_table ("conflicting_rules", conflict_list
,
535 0, 1, conflict_list_cnt
);
539 /*--------------------------------------------.
540 | Output the definitions of all the muscles. |
541 `--------------------------------------------*/
544 muscles_output (FILE *out
)
546 fputs ("m4_init()\n", out
);
548 symbol_numbers_output (out
);
549 token_definitions_output (out
);
550 type_names_output (out
);
551 user_actions_output (out
);
553 muscles_m4_output (out
);
556 /*---------------------------.
557 | Call the skeleton parser. |
558 `---------------------------*/
561 output_skeleton (void)
568 /* Compute the names of the package data dir and skeleton files. */
569 char const m4sugar
[] = "m4sugar/m4sugar.m4";
570 char const m4bison
[] = "bison.m4";
575 char const *m4
= (p
= getenv ("M4")) ? p
: M4
;
576 char const *pkgdatadir
= compute_pkgdatadir ();
577 size_t skeleton_size
= strlen (skeleton
) + 1;
578 size_t pkgdatadirlen
= strlen (pkgdatadir
);
579 while (pkgdatadirlen
&& pkgdatadir
[pkgdatadirlen
- 1] == '/')
581 full_skeleton
= xmalloc (pkgdatadirlen
+ 1
582 + (skeleton_size
< sizeof m4sugar
583 ? sizeof m4sugar
: skeleton_size
));
584 strncpy (full_skeleton
, pkgdatadir
, pkgdatadirlen
);
585 full_skeleton
[pkgdatadirlen
] = '/';
586 strcpy (full_skeleton
+ pkgdatadirlen
+ 1, m4sugar
);
587 full_m4sugar
= xstrdup (full_skeleton
);
588 strcpy (full_skeleton
+ pkgdatadirlen
+ 1, m4bison
);
589 full_m4bison
= xstrdup (full_skeleton
);
590 if (strchr (skeleton
, '/'))
591 strcpy (full_skeleton
, skeleton
);
593 strcpy (full_skeleton
+ pkgdatadirlen
+ 1, skeleton
);
595 /* Test whether m4sugar.m4 is readable, to check for proper
596 installation. A faulty installation can cause deadlock, so a
597 cheap sanity check is worthwhile. */
598 xfclose (xfopen (full_m4sugar
, "r"));
600 /* Create an m4 subprocess connected to us via two pipes. */
602 if (trace_flag
& trace_tools
)
603 fprintf (stderr
, "running: %s %s - %s %s\n",
604 m4
, full_m4sugar
, full_m4bison
, full_skeleton
);
606 /* Some future version of GNU M4 (most likely 1.6) may treat the -dV in a
607 position-dependent manner. Keep it as the first argument so that all
610 See the thread starting at
611 <http://lists.gnu.org/archive/html/bug-bison/2008-07/msg00000.html>
617 argv
[i
++] = pkgdatadir
;
618 if (trace_flag
& trace_m4
)
620 argv
[i
++] = full_m4sugar
;
622 argv
[i
++] = full_m4bison
;
623 argv
[i
++] = full_skeleton
;
626 /* When POSIXLY_CORRECT is set, some future versions of GNU M4 (most likely
627 2.0) may drop some of the GNU extensions that Bison's skeletons depend
628 upon. So that the next release of Bison is forward compatible with those
629 future versions of GNU M4, we unset POSIXLY_CORRECT here.
631 FIXME: A user might set POSIXLY_CORRECT to affect processes run from
632 macros like m4_syscmd in a custom skeleton. For now, Bison makes no
633 promises about the behavior of custom skeletons, so this scenario is not a
634 concern. However, we eventually want to eliminate this shortcoming. The
635 next release of GNU M4 (1.4.12 or 1.6) will accept the -g command-line
636 option as a no-op, and later releases will accept it to indicate that
637 POSIXLY_CORRECT should be ignored. Once the GNU M4 versions that accept
638 -g are pervasive, Bison should use -g instead of unsetting
641 See the thread starting at
642 <http://lists.gnu.org/archive/html/bug-bison/2008-07/msg00000.html>
644 unsetenv ("POSIXLY_CORRECT");
646 pid
= create_subpipe (argv
, filter_fd
);
649 free (full_skeleton
);
651 if (trace_flag
& trace_muscles
)
652 muscles_output (stderr
);
654 FILE *out
= fdopen (filter_fd
[0], "w");
656 error (EXIT_FAILURE
, get_errno (),
658 muscles_output (out
);
662 /* Read and process m4's output. */
663 timevar_push (TV_M4
);
664 end_of_output_subpipe (pid
, filter_fd
);
665 in
= fdopen (filter_fd
[1], "r");
667 error (EXIT_FAILURE
, get_errno (),
671 reap_subpipe (pid
, m4
);
678 /* BISON_USE_PUSH_FOR_PULL is for the test suite and should not be documented
680 char const *use_push_for_pull_env
= getenv ("BISON_USE_PUSH_FOR_PULL");
681 bool use_push_for_pull_flag
= false;
682 if (use_push_for_pull_env
!= NULL
683 && use_push_for_pull_env
[0] != '\0'
684 && 0 != strcmp (use_push_for_pull_env
, "0"))
685 use_push_for_pull_flag
= true;
688 MUSCLE_INSERT_BOOL ("defines_flag", defines_flag
);
689 MUSCLE_INSERT_BOOL ("glr_flag", glr_parser
);
690 MUSCLE_INSERT_BOOL ("nondeterministic_flag", nondeterministic_parser
);
691 MUSCLE_INSERT_BOOL ("synclines_flag", !no_lines_flag
);
692 MUSCLE_INSERT_BOOL ("tag_seen_flag", tag_seen
);
693 MUSCLE_INSERT_BOOL ("use_push_for_pull_flag", use_push_for_pull_flag
);
694 MUSCLE_INSERT_BOOL ("yacc_flag", yacc_flag
);
697 if (spec_name_prefix
)
698 MUSCLE_INSERT_STRING ("prefix", spec_name_prefix
);
700 MUSCLE_INSERT_STRING ("file_name_all_but_ext", all_but_ext
);
702 #define DEFINE(Name) MUSCLE_INSERT_STRING (#Name, Name ? Name : "")
704 DEFINE (parser_file_name
);
705 DEFINE (spec_defines_file
);
706 DEFINE (spec_file_prefix
);
707 DEFINE (spec_graph_file
);
708 DEFINE (spec_name_prefix
);
709 DEFINE (spec_outfile
);
710 DEFINE (spec_verbose_file
);
713 /* Find the right skeleton file, and add muscles about the skeletons. */
715 MUSCLE_INSERT_C_STRING ("skeleton", skeleton
);
717 skeleton
= language
->skeleton
;
719 /* About the skeletons. */
721 /* b4_pkgdatadir is used inside m4_include in the skeletons, so digraphs
722 would never be expanded. Hopefully no one has M4-special characters in
723 his Bison installation path. */
724 MUSCLE_INSERT_STRING_RAW ("pkgdatadir", compute_pkgdatadir ());
729 /*----------------------------------------------------------.
730 | Output the parsing tables and the parser code to ftable. |
731 `----------------------------------------------------------*/
736 obstack_init (&format_obstack
);
742 prepare_symbol_definitions ();
746 /* Process the selected skeleton file. */
749 obstack_free (&format_obstack
, NULL
);
753 compute_pkgdatadir (void)
755 char const *pkgdatadir
= getenv ("BISON_PKGDATADIR");
756 return pkgdatadir
? pkgdatadir
: PKGDATADIR
;