1 /* Output the generated parsing program for Bison.
3 Copyright (C) 1984, 1986, 1989, 1992, 2000, 2001, 2002, 2003, 2004,
4 2005, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
6 This file is part of Bison, the GNU Compiler Compiler.
8 This program is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
24 #include <configmake.h>
26 #include <get-errno.h>
30 #include <wait-process.h>
36 #include "muscle-tab.h"
39 #include "scan-code.h" /* max_left_semantic_context */
40 #include "scan-skel.h"
44 # define ARRAY_CARDINALITY(Array) (sizeof (Array) / sizeof *(Array))
46 static struct obstack format_obstack
;
49 /*-------------------------------------------------------------------.
50 | Create a function NAME which associates to the muscle NAME the |
51 | result of formatting the FIRST and then TABLE_DATA[BEGIN..END[ (of |
52 | TYPE), and to the muscle NAME_max, the max value of the |
54 `-------------------------------------------------------------------*/
57 #define GENERATE_MUSCLE_INSERT_TABLE(Name, Type) \
60 Name (char const *name, \
73 obstack_fgrow1 (&format_obstack, "%6d", first); \
74 for (i = begin; i < end; ++i) \
76 obstack_1grow (&format_obstack, ','); \
79 obstack_sgrow (&format_obstack, "\n "); \
84 obstack_fgrow1 (&format_obstack, "%6d", table_data[i]); \
85 if (table_data[i] < min) \
86 min = table_data[i]; \
87 if (max < table_data[i]) \
88 max = table_data[i]; \
90 obstack_1grow (&format_obstack, 0); \
91 muscle_insert (name, obstack_finish (&format_obstack)); \
95 /* Build `NAME_min' and `NAME_max' in the obstack. */ \
96 obstack_fgrow1 (&format_obstack, "%s_min", name); \
97 obstack_1grow (&format_obstack, 0); \
98 MUSCLE_INSERT_LONG_INT (obstack_finish (&format_obstack), lmin); \
99 obstack_fgrow1 (&format_obstack, "%s_max", name); \
100 obstack_1grow (&format_obstack, 0); \
101 MUSCLE_INSERT_LONG_INT (obstack_finish (&format_obstack), lmax); \
104 GENERATE_MUSCLE_INSERT_TABLE(muscle_insert_unsigned_int_table
, unsigned int)
105 GENERATE_MUSCLE_INSERT_TABLE(muscle_insert_int_table
, int)
106 GENERATE_MUSCLE_INSERT_TABLE(muscle_insert_base_table
, base_number
)
107 GENERATE_MUSCLE_INSERT_TABLE(muscle_insert_rule_number_table
, rule_number
)
108 GENERATE_MUSCLE_INSERT_TABLE(muscle_insert_symbol_number_table
, symbol_number
)
109 GENERATE_MUSCLE_INSERT_TABLE(muscle_insert_state_number_table
, state_number
)
112 /*--------------------------------------------------------------------.
113 | Print to OUT a representation of STRING escaped both for C and M4. |
114 `--------------------------------------------------------------------*/
117 escaped_output (FILE *out
, char const *string
)
122 for (p
= quotearg_style (c_quoting_style
, string
); *p
; p
++)
125 case '$': fputs ("$][", out
); break;
126 case '@': fputs ("@@", out
); break;
127 case '[': fputs ("@{", out
); break;
128 case ']': fputs ("@}", out
); break;
129 default: fputc (*p
, out
); break;
136 /*------------------------------------------------------------------.
137 | Prepare the muscles related to the symbols: translate, tname, and |
139 `------------------------------------------------------------------*/
142 prepare_symbols (void)
144 MUSCLE_INSERT_BOOL ("token_table", token_table_flag
);
145 MUSCLE_INSERT_INT ("tokens_number", ntokens
);
146 MUSCLE_INSERT_INT ("nterms_number", nvars
);
147 MUSCLE_INSERT_INT ("symbols_number", nsyms
);
148 MUSCLE_INSERT_INT ("undef_token_number", undeftoken
->number
);
149 MUSCLE_INSERT_INT ("user_token_number_max", max_user_token_number
);
151 muscle_insert_symbol_number_table ("translate",
153 token_translations
[0],
154 1, max_user_token_number
+ 1);
156 /* tname -- token names. */
159 /* We assume that the table will be output starting at column 2. */
161 struct quoting_options
*qo
= clone_quoting_options (0);
162 set_quoting_style (qo
, c_quoting_style
);
163 set_quoting_flags (qo
, QA_SPLIT_TRIGRAPHS
);
164 for (i
= 0; i
< nsyms
; i
++)
166 char *cp
= quotearg_alloc (symbols
[i
]->tag
, -1, qo
);
167 /* Width of the next token, including the two quotes, the
168 comma and the space. */
169 int width
= strlen (cp
) + 2;
173 obstack_sgrow (&format_obstack
, "\n ");
178 obstack_1grow (&format_obstack
, ' ');
179 MUSCLE_OBSTACK_SGROW (&format_obstack
, cp
);
181 obstack_1grow (&format_obstack
, ',');
185 obstack_sgrow (&format_obstack
, " ]b4_null[");
187 /* Finish table and store. */
188 obstack_1grow (&format_obstack
, 0);
189 muscle_insert ("tname", obstack_finish (&format_obstack
));
192 /* Output YYTOKNUM. */
195 int *values
= xnmalloc (ntokens
, sizeof *values
);
196 for (i
= 0; i
< ntokens
; ++i
)
197 values
[i
] = symbols
[i
]->user_token_number
;
198 muscle_insert_int_table ("toknum", values
,
199 values
[0], 1, ntokens
);
205 /*----------------------------------------------------------------.
206 | Prepare the muscles related to the rules: r1, r2, rline, dprec, |
208 `----------------------------------------------------------------*/
213 unsigned int *rline
= xnmalloc (nrules
, sizeof *rline
);
214 symbol_number
*r1
= xnmalloc (nrules
, sizeof *r1
);
215 unsigned int *r2
= xnmalloc (nrules
, sizeof *r2
);
216 int *dprec
= xnmalloc (nrules
, sizeof *dprec
);
217 int *merger
= xnmalloc (nrules
, sizeof *merger
);
220 for (r
= 0; r
< nrules
; ++r
)
222 /* LHS of the rule R. */
223 r1
[r
] = rules
[r
].lhs
->number
;
224 /* Length of rule R's RHS. */
225 r2
[r
] = rule_rhs_length(&rules
[r
]);
226 /* Line where rule was defined. */
227 rline
[r
] = rules
[r
].location
.start
.line
;
228 /* Dynamic precedence (GLR). */
229 dprec
[r
] = rules
[r
].dprec
;
230 /* Merger-function index (GLR). */
231 merger
[r
] = rules
[r
].merger
;
234 muscle_insert_unsigned_int_table ("rline", rline
, 0, 0, nrules
);
235 muscle_insert_symbol_number_table ("r1", r1
, 0, 0, nrules
);
236 muscle_insert_unsigned_int_table ("r2", r2
, 0, 0, nrules
);
237 muscle_insert_int_table ("dprec", dprec
, 0, 0, nrules
);
238 muscle_insert_int_table ("merger", merger
, 0, 0, nrules
);
240 MUSCLE_INSERT_INT ("rules_number", nrules
);
241 MUSCLE_INSERT_INT ("max_left_semantic_context", max_left_semantic_context
);
250 /*--------------------------------------------.
251 | Prepare the muscles related to the states. |
252 `--------------------------------------------*/
255 prepare_states (void)
258 symbol_number
*values
= xnmalloc (nstates
, sizeof *values
);
259 for (i
= 0; i
< nstates
; ++i
)
260 values
[i
] = states
[i
]->accessing_symbol
;
261 muscle_insert_symbol_number_table ("stos", values
,
265 MUSCLE_INSERT_INT ("last", high
);
266 MUSCLE_INSERT_INT ("final_state_number", final_state
->number
);
267 MUSCLE_INSERT_INT ("states_number", nstates
);
271 /*-------------------------------------------------------.
272 | Compare two symbols by type-name, and then by number. |
273 `-------------------------------------------------------*/
276 symbol_type_name_cmp (const symbol
**lhs
, const symbol
**rhs
)
278 int res
= UNIQSTR_CMP((*lhs
)->type_name
, (*rhs
)->type_name
);
281 return (*lhs
)->number
- (*rhs
)->number
;
285 /*----------------------------------------------------------------.
286 | Return a (malloc'ed) table of the symbols sorted by type-name. |
287 `----------------------------------------------------------------*/
290 symbols_by_type_name (void)
292 typedef int (*qcmp_type
) (const void *, const void *);
293 symbol
**res
= xmemdup (symbols
, nsyms
* sizeof *res
);
294 qsort (res
, nsyms
, sizeof *res
, (qcmp_type
) &symbol_type_name_cmp
);
299 /*------------------------------------------------------------------.
300 | Define b4_type_names, which is a list of (lists of the numbers of |
301 | symbols with same type-name). |
302 `------------------------------------------------------------------*/
305 type_names_output (FILE *out
)
308 symbol
**syms
= symbols_by_type_name ();
309 fputs ("m4_define([b4_type_names],\n[", out
);
310 for (i
= 0; i
< nsyms
; /* nothing */)
312 // The index of the first symbol of the current type-name.
314 fputs (i
? ",\n[" : "[", out
);
315 for (; i
< nsyms
&& syms
[i
]->type_name
== syms
[i0
]->type_name
; ++i
)
316 fprintf (out
, "%s%d", i
!= i0
? ", " : "", syms
[i
]->number
);
319 fputs ("])\n\n", out
);
324 /*-------------------------------------.
325 | The list of all the symbol numbers. |
326 `-------------------------------------*/
329 symbol_numbers_output (FILE *out
)
332 fputs ("m4_define([b4_symbol_numbers],\n[", out
);
333 for (i
= 0; i
< nsyms
; ++i
)
334 fprintf (out
, "%s[%d]", i
? ", " : "", i
);
335 fputs ("])\n\n", out
);
339 /*---------------------------------.
340 | Output the user actions to OUT. |
341 `---------------------------------*/
344 user_actions_output (FILE *out
)
348 fputs ("m4_define([b4_actions], \n[", out
);
349 for (r
= 0; r
< nrules
; ++r
)
352 fprintf (out
, "b4_case(%d, [b4_syncline(%d, ", r
+ 1,
353 rules
[r
].action_location
.start
.line
);
354 escaped_output (out
, rules
[r
].action_location
.start
.file
);
355 fprintf (out
, ")\n[ %s]])\n\n", rules
[r
].action
);
357 fputs ("])\n\n", out
);
360 /*------------------------------------.
361 | Output the merge functions to OUT. |
362 `------------------------------------*/
365 merger_output (FILE *out
)
370 fputs ("m4_define([b4_mergers], \n[[", out
);
371 for (n
= 1, p
= merge_functions
; p
!= NULL
; n
+= 1, p
= p
->next
)
373 if (p
->type
[0] == '\0')
374 fprintf (out
, " case %d: *yy0 = %s (*yy0, *yy1); break;\n",
377 fprintf (out
, " case %d: yy0->%s = %s (*yy0, *yy1); break;\n",
378 n
, p
->type
, p
->name
);
380 fputs ("]])\n\n", out
);
384 /*---------------------------------------------.
385 | Prepare the muscles for symbol definitions. |
386 `---------------------------------------------*/
389 prepare_symbol_definitions (void)
392 for (i
= 0; i
< nsyms
; ++i
)
394 symbol
*sym
= symbols
[i
];
398 #define SET_KEY(Entry) \
399 obstack_fgrow2 (&format_obstack, "symbol(%d, %s)", i, Entry); \
400 obstack_1grow (&format_obstack, 0); \
401 key = obstack_finish (&format_obstack);
403 // Whether the symbol has an identifier.
404 value
= symbol_id_get (sym
);
406 MUSCLE_INSERT_INT (key
, !!value
);
410 MUSCLE_INSERT_STRING (key
, value
? value
: "");
412 // Its tag. Typically for documentation purpose.
414 MUSCLE_INSERT_STRING (key
, sym
->tag
);
416 SET_KEY("user_number");
417 MUSCLE_INSERT_INT (key
, sym
->user_token_number
);
420 MUSCLE_INSERT_INT (key
,
421 i
< ntokens
&& sym
!= errtoken
&& sym
!= undeftoken
);
424 MUSCLE_INSERT_INT (key
, sym
->number
);
427 MUSCLE_INSERT_INT (key
, !!sym
->type_name
);
430 MUSCLE_INSERT_STRING (key
, sym
->type_name
? sym
->type_name
: "");
432 #define CODE_PROP(PropName) \
434 code_props const *p = symbol_ ## PropName ## _get (sym); \
435 SET_KEY("has_" #PropName); \
436 MUSCLE_INSERT_INT (key, !!p->code); \
440 SET_KEY(#PropName "_file"); \
441 MUSCLE_INSERT_STRING (key, p->location.start.file); \
443 SET_KEY(#PropName "_line"); \
444 MUSCLE_INSERT_INT (key, p->location.start.line); \
446 SET_KEY(#PropName); \
447 MUSCLE_INSERT_STRING_RAW (key, p->code); \
451 CODE_PROP(destructor
);
459 /*--------------------------------------.
460 | Output the tokens definition to OUT. |
461 `--------------------------------------*/
464 token_definitions_output (FILE *out
)
467 char const *sep
= "";
469 fputs ("m4_define([b4_tokens], \n[", out
);
470 for (i
= 0; i
< ntokens
; ++i
)
472 symbol
*sym
= symbols
[i
];
473 int number
= sym
->user_token_number
;
474 uniqstr id
= symbol_id_get (sym
);
476 /* At this stage, if there are literal string aliases, they are
477 part of SYMBOLS, so we should not find their aliased symbols
479 aver (number
!= USER_NUMBER_HAS_STRING_ALIAS
);
481 /* Skip error token and tokens without identifier. */
482 if (sym
!= errtoken
&& id
)
484 fprintf (out
, "%s[[[%s]], %d]",
489 fputs ("])\n\n", out
);
494 prepare_actions (void)
496 /* Figure out the actions for the specified state, indexed by
497 lookahead token type. */
499 muscle_insert_rule_number_table ("defact", yydefact
,
500 yydefact
[0], 1, nstates
);
502 /* Figure out what to do after reducing with each rule, depending on
503 the saved state from before the beginning of parsing the data
504 that matched this rule. */
505 muscle_insert_state_number_table ("defgoto", yydefgoto
,
506 yydefgoto
[0], 1, nsyms
- ntokens
);
510 muscle_insert_base_table ("pact", base
,
511 base
[0], 1, nstates
);
512 MUSCLE_INSERT_INT ("pact_ninf", base_ninf
);
515 muscle_insert_base_table ("pgoto", base
,
516 base
[nstates
], nstates
+ 1, nvectors
);
518 muscle_insert_base_table ("table", table
,
519 table
[0], 1, high
+ 1);
520 MUSCLE_INSERT_INT ("table_ninf", table_ninf
);
522 muscle_insert_base_table ("check", check
,
523 check
[0], 1, high
+ 1);
525 /* GLR parsing slightly modifies YYTABLE and YYCHECK (and thus
526 YYPACT) so that in states with unresolved conflicts, the default
527 reduction is not used in the conflicted entries, so that there is
528 a place to put a conflict pointer.
530 This means that YYCONFLP and YYCONFL are nonsense for a non-GLR
531 parser, so we could avoid accidents by not writing them out in
532 that case. Nevertheless, it seems even better to be able to use
533 the GLR skeletons even without the non-deterministic tables. */
534 muscle_insert_unsigned_int_table ("conflict_list_heads", conflict_table
,
535 conflict_table
[0], 1, high
+ 1);
536 muscle_insert_unsigned_int_table ("conflicting_rules", conflict_list
,
537 0, 1, conflict_list_cnt
);
541 /*--------------------------------------------.
542 | Output the definitions of all the muscles. |
543 `--------------------------------------------*/
546 muscles_output (FILE *out
)
548 fputs ("m4_init()\n", out
);
550 symbol_numbers_output (out
);
551 token_definitions_output (out
);
552 type_names_output (out
);
553 user_actions_output (out
);
555 muscles_m4_output (out
);
558 /*---------------------------.
559 | Call the skeleton parser. |
560 `---------------------------*/
563 output_skeleton (void)
567 char const *argv
[10];
570 /* Compute the names of the package data dir and skeleton files. */
571 char const m4sugar
[] = "m4sugar/m4sugar.m4";
572 char const m4bison
[] = "bison.m4";
577 char const *m4
= (p
= getenv ("M4")) ? p
: M4
;
578 char const *pkgdatadir
= compute_pkgdatadir ();
579 size_t skeleton_size
= strlen (skeleton
) + 1;
580 size_t pkgdatadirlen
= strlen (pkgdatadir
);
581 while (pkgdatadirlen
&& pkgdatadir
[pkgdatadirlen
- 1] == '/')
583 full_skeleton
= xmalloc (pkgdatadirlen
+ 1
584 + (skeleton_size
< sizeof m4sugar
585 ? sizeof m4sugar
: skeleton_size
));
586 strncpy (full_skeleton
, pkgdatadir
, pkgdatadirlen
);
587 full_skeleton
[pkgdatadirlen
] = '/';
588 strcpy (full_skeleton
+ pkgdatadirlen
+ 1, m4sugar
);
589 full_m4sugar
= xstrdup (full_skeleton
);
590 strcpy (full_skeleton
+ pkgdatadirlen
+ 1, m4bison
);
591 full_m4bison
= xstrdup (full_skeleton
);
592 if (strchr (skeleton
, '/'))
593 strcpy (full_skeleton
, skeleton
);
595 strcpy (full_skeleton
+ pkgdatadirlen
+ 1, skeleton
);
597 /* Test whether m4sugar.m4 is readable, to check for proper
598 installation. A faulty installation can cause deadlock, so a
599 cheap sanity check is worthwhile. */
600 xfclose (xfopen (full_m4sugar
, "r"));
602 /* Create an m4 subprocess connected to us via two pipes. */
604 if (trace_flag
& trace_tools
)
605 fprintf (stderr
, "running: %s %s - %s %s\n",
606 m4
, full_m4sugar
, full_m4bison
, full_skeleton
);
608 /* Some future version of GNU M4 (most likely 1.6) may treat the -dV in a
609 position-dependent manner. Keep it as the first argument so that all
612 See the thread starting at
613 <http://lists.gnu.org/archive/html/bug-bison/2008-07/msg00000.html>
619 /* When POSIXLY_CORRECT is set, GNU M4 1.6 and later disable GNU
620 extensions, which Bison's skeletons depend on. With older M4,
621 it has no effect. M4 1.4.12 added a -g/--gnu command-line
622 option to make it explicit that a program wants GNU M4
623 extensions even when POSIXLY_CORRECT is set.
625 See the thread starting at
626 <http://lists.gnu.org/archive/html/bug-bison/2008-07/msg00000.html>
629 argv
[i
++] = M4_GNU_OPTION
;
632 argv
[i
++] = pkgdatadir
;
633 if (trace_flag
& trace_m4
)
635 argv
[i
++] = full_m4sugar
;
637 argv
[i
++] = full_m4bison
;
638 argv
[i
++] = full_skeleton
;
640 aver (i
<= ARRAY_CARDINALITY (argv
));
643 /* The ugly cast is because gnulib gets the const-ness wrong. */
644 pid
= create_pipe_bidi ("m4", m4
, (char **)(void*)argv
, false, true,
648 free (full_skeleton
);
650 if (trace_flag
& trace_muscles
)
651 muscles_output (stderr
);
653 FILE *out
= fdopen (filter_fd
[1], "w");
655 error (EXIT_FAILURE
, get_errno (),
657 muscles_output (out
);
661 /* Read and process m4's output. */
662 timevar_push (TV_M4
);
663 in
= fdopen (filter_fd
[0], "r");
665 error (EXIT_FAILURE
, get_errno (),
668 /* scan_skel should have read all of M4's output. Otherwise, when we
669 close the pipe, we risk letting M4 report a broken-pipe to the
673 wait_subprocess (pid
, "m4", false, false, true, true, NULL
);
680 /* BISON_USE_PUSH_FOR_PULL is for the test suite and should not be documented
682 char const *use_push_for_pull_env
= getenv ("BISON_USE_PUSH_FOR_PULL");
683 bool use_push_for_pull_flag
= false;
684 if (use_push_for_pull_env
!= NULL
685 && use_push_for_pull_env
[0] != '\0'
686 && 0 != strcmp (use_push_for_pull_env
, "0"))
687 use_push_for_pull_flag
= true;
690 MUSCLE_INSERT_BOOL ("defines_flag", defines_flag
);
691 MUSCLE_INSERT_BOOL ("glr_flag", glr_parser
);
692 MUSCLE_INSERT_BOOL ("nondeterministic_flag", nondeterministic_parser
);
693 MUSCLE_INSERT_BOOL ("synclines_flag", !no_lines_flag
);
694 MUSCLE_INSERT_BOOL ("tag_seen_flag", tag_seen
);
695 MUSCLE_INSERT_BOOL ("use_push_for_pull_flag", use_push_for_pull_flag
);
696 MUSCLE_INSERT_BOOL ("yacc_flag", yacc_flag
);
699 if (spec_name_prefix
)
700 MUSCLE_INSERT_STRING ("prefix", spec_name_prefix
);
702 MUSCLE_INSERT_STRING ("file_name_all_but_ext", all_but_ext
);
704 #define DEFINE(Name) MUSCLE_INSERT_STRING (#Name, Name ? Name : "")
706 DEFINE (parser_file_name
);
707 DEFINE (spec_defines_file
);
708 DEFINE (spec_file_prefix
);
709 DEFINE (spec_graph_file
);
710 DEFINE (spec_name_prefix
);
711 DEFINE (spec_outfile
);
712 DEFINE (spec_verbose_file
);
715 /* Find the right skeleton file, and add muscles about the skeletons. */
717 MUSCLE_INSERT_C_STRING ("skeleton", skeleton
);
719 skeleton
= language
->skeleton
;
721 /* About the skeletons. */
723 /* b4_pkgdatadir is used inside m4_include in the skeletons, so digraphs
724 would never be expanded. Hopefully no one has M4-special characters in
725 his Bison installation path. */
726 MUSCLE_INSERT_STRING_RAW ("pkgdatadir", compute_pkgdatadir ());
731 /*----------------------------------------------------------.
732 | Output the parsing tables and the parser code to ftable. |
733 `----------------------------------------------------------*/
738 obstack_init (&format_obstack
);
744 prepare_symbol_definitions ();
748 /* Process the selected skeleton file. */
751 obstack_free (&format_obstack
, NULL
);
755 compute_pkgdatadir (void)
757 char const *pkgdatadir
= getenv ("BISON_PKGDATADIR");
758 return pkgdatadir
? pkgdatadir
: PKGDATADIR
;