X-Git-Url: https://git.saurik.com/bison.git/blobdiff_plain/4323e0dac386d777d070c68564f1c0041b06935d..d0a304384f4207bdf7f8b1038c2226c312f5e954:/doc/bison.texi diff --git a/doc/bison.texi b/doc/bison.texi index 62ff0459..3b9f2da2 100644 --- a/doc/bison.texi +++ b/doc/bison.texi @@ -298,6 +298,7 @@ Handling Context Dependencies Debugging Your Parser * Understanding:: Understanding the structure of your parser. +* Graphviz:: Getting a visual representation of the parser. * Tracing:: Tracing the execution of your parser. Tracing Your Parser @@ -331,6 +332,7 @@ C++ Location Values * C++ position:: One point in the source file * C++ location:: Two points in the source file +* User Defined Location Type:: Required interface for locations A Complete C++ Example @@ -4783,6 +4785,10 @@ incoming terminals during the second phase of error recovery, the current lookahead and the entire stack (except the current right-hand side symbols) when the parser returns immediately, and @item +the current lookahead and the entire stack (including the current right-hand +side symbols) when the C++ parser (@file{lalr1.cc}) catches an exception in +@code{parse}, +@item the start symbol, when the parser succeeds. @end itemize @@ -5204,6 +5210,23 @@ Values, ,Semantic Values of Tokens}. If you have declared @code{%code requires} or @code{%code provides}, the output header also contains their code. @xref{%code Summary}. + +@cindex Header guard +The generated header is protected against multiple inclusions with a C +preprocessor guard: @samp{YY_@var{PREFIX}_@var{FILE}_INCLUDED}, where +@var{PREFIX} and @var{FILE} are the prefix (@pxref{Multiple Parsers, +,Multiple Parsers in the Same Program}) and generated file name turned +uppercase, with each series of non alphanumerical characters converted to a +single underscore. + +For instance with @samp{%define api.prefix "calc"} and @samp{%defines +"lib/parse.h"}, the header will be guarded as follows. +@example +#ifndef YY_CALC_LIB_PARSE_H_INCLUDED +# define YY_CALC_LIB_PARSE_H_INCLUDED +... +#endif /* ! YY_CALC_LIB_PARSE_H_INCLUDED */ +@end example @end deffn @deffn {Directive} %defines @var{defines-file} @@ -5452,6 +5475,22 @@ The parser namespace is @code{foo} and @code{yylex} is referenced as @end itemize @c namespace +@c ================================================== api.location.type +@item @code{api.location.type} +@findex %define api.location.type + +@itemize @bullet +@item Language(s): C++, Java + +@item Purpose: Define the location type. +@xref{User Defined Location Type}. + +@item Accepted Values: String + +@item Default Value: none + +@item History: introduced in Bison 2.7 +@end itemize @c ================================================== api.prefix @item api.prefix @@ -5460,7 +5499,7 @@ The parser namespace is @code{foo} and @code{yylex} is referenced as @itemize @bullet @item Language(s): All -@item Purpose: Rename exported symbols +@item Purpose: Rename exported symbols. @xref{Multiple Parsers, ,Multiple Parsers in the Same Program}. @item Accepted Values: String @@ -5508,9 +5547,9 @@ More user feedback will help to stabilize it.) -@c ================================================== api.tokens.prefix -@item api.tokens.prefix -@findex %define api.tokens.prefix +@c ================================================== api.token.prefix +@item api.token.prefix +@findex %define api.token.prefix @itemize @item Languages(s): all @@ -5521,7 +5560,7 @@ target language. For instance @example %token FILE for ERROR -%define api.tokens.prefix "TOK_" +%define api.token.prefix "TOK_" %% start: FILE for ERROR; @end example @@ -5542,8 +5581,10 @@ letters, underscores, and ---not at the beginning--- digits). @item Default Value: empty +@item History: +introduced in Bison 2.8 @end itemize -@c api.tokens.prefix +@c api.token.prefix @c ================================================== lex_symbol @@ -5568,10 +5609,10 @@ Boolean. @c lex_symbol -@c ================================================== lr.default-reductions +@c ================================================== lr.default-reduction -@item lr.default-reductions -@findex %define lr.default-reductions +@item lr.default-reduction +@findex %define lr.default-reduction @itemize @bullet @item Language(s): all @@ -5587,12 +5628,15 @@ feedback will help to stabilize it.) @item @code{accepting} if @code{lr.type} is @code{canonical-lr}. @item @code{most} otherwise. @end itemize +@item History: +introduced as @code{lr.default-reduction} in 2.5, renamed as +@code{lr.default-reduction} in 2.8. @end itemize -@c ============================================ lr.keep-unreachable-states +@c ============================================ lr.keep-unreachable-state -@item lr.keep-unreachable-states -@findex %define lr.keep-unreachable-states +@item lr.keep-unreachable-state +@findex %define lr.keep-unreachable-state @itemize @bullet @item Language(s): all @@ -5601,7 +5645,10 @@ remain in the parser tables. @xref{Unreachable States}. @item Accepted Values: Boolean @item Default Value: @code{false} @end itemize -@c lr.keep-unreachable-states +introduced as @code{lr.keep_unreachable_states} in 2.3b, renamed as +@code{lr.keep-unreachable-state} in 2.5, and as +@code{lr.keep-unreachable-state} in 2.8. +@c lr.keep-unreachable-state @c ================================================== lr.type @@ -7645,7 +7692,7 @@ and the benefits of IELR, @pxref{Bibliography,,Denny 2008 March}, and @node Default Reductions @subsection Default Reductions @cindex default reductions -@findex %define lr.default-reductions +@findex %define lr.default-reduction @findex %nonassoc After parser table construction, Bison identifies the reduction with the @@ -7727,9 +7774,9 @@ token for which there is a conflict. The correct action in this case is to split the parse instead. To adjust which states have default reductions enabled, use the -@code{%define lr.default-reductions} directive. +@code{%define lr.default-reduction} directive. -@deffn {Directive} {%define lr.default-reductions @var{WHERE}} +@deffn {Directive} {%define lr.default-reduction @var{WHERE}} Specify the kind of states that are permitted to contain default reductions. The accepted values of @var{WHERE} are: @itemize @@ -7852,7 +7899,7 @@ parser community for years, for the publication that introduces LAC, @node Unreachable States @subsection Unreachable States -@findex %define lr.keep-unreachable-states +@findex %define lr.keep-unreachable-state @cindex unreachable states If there exists no sequence of transitions from the parser's start state to @@ -7865,7 +7912,7 @@ resolution because they are useless in the generated parser. However, keeping unreachable states is sometimes useful when trying to understand the relationship between the parser and the grammar. -@deffn {Directive} {%define lr.keep-unreachable-states @var{VALUE}} +@deffn {Directive} {%define lr.keep-unreachable-state @var{VALUE}} Request that Bison allow unreachable states to remain in the parser tables. @var{VALUE} must be a Boolean. The default is @code{false}. @end deffn @@ -8376,6 +8423,7 @@ automaton, and how to enable and understand the parser run-time traces. @menu * Understanding:: Understanding the structure of your parser. +* Graphviz:: Getting a visual representation of the parser. * Tracing:: Tracing the execution of your parser. @end menu @@ -8792,6 +8840,114 @@ precedence of @samp{/} with respect to @samp{+}, @samp{-}, and @samp{*}, but also because the associativity of @samp{/} is not specified. +@c ================================================= Graphical Representation + +@node Graphviz +@section Visualizing Your Parser +@cindex dot + +As another means to gain better understanding of the shift/reduce +automaton corresponding to the Bison parser, a DOT file can be generated. Note +that debugging a real grammar with this is tedious at best, and impractical +most of the times, because the generated files are huge (the generation of +a PDF or PNG file from it will take very long, and more often than not it will +fail due to memory exhaustion). This option was rather designed for beginners, +to help them understand LR parsers. + +This file is generated when the @option{--graph} option is specified (see +@pxref{Invocation, , Invoking Bison}). Its name is made by removing +@samp{.tab.c} or @samp{.c} from the parser implementation file name, and +adding @samp{.dot} instead. If the grammar file is @file{foo.y}, the +Graphviz output file is called @file{foo.dot}. + +The following grammar file, @file{rr.y}, will be used in the sequel: + +@example +%% +@group +exp: a ";" | b "."; +a: "0"; +b: "0"; +@end group +@end example + +The graphical output is very similar to the textual one, and as such it is +easier understood by making direct comparisons between them. See +@ref{Debugging, , Debugging Your Parser} for a detailled analysis of the +textual report. + +@subheading Graphical Representation of States + +The items (pointed rules) for each state are grouped together in graph nodes. +Their numbering is the same as in the verbose file. See the following points, +about transitions, for examples + +When invoked with @option{--report=lookaheads}, the lookahead tokens, when +needed, are shown next to the relevant rule between square brackets as a +comma separated list. This is the case in the figure for the representation of +reductions, below. + +@sp 1 + +The transitions are represented as directed edges between the current and +the target states. + +@subheading Graphical Representation of Shifts + +Shifts are shown as solid arrows, labelled with the lookahead token for that +shift. The following describes a reduction in the @file{rr.output} file: + +@example +@group +state 3 + + 1 exp: a . ";" + + ";" shift, and go to state 6 +@end group +@end example + +A Graphviz rendering of this portion of the graph could be: + +@center @image{figs/example-shift, 100pt} + +@subheading Graphical Representation of Reductions + +Reductions are shown as solid arrows, leading to a diamond-shaped node +bearing the number of the reduction rule. The arrow is labelled with the +appropriate comma separated lookahead tokens. If the reduction is the default +action for the given state, there is no such label. + +This is how reductions are represented in the verbose file @file{rr.output}: +@example +state 1 + + 3 a: "0" . [";"] + 4 b: "0" . ["."] + + "." reduce using rule 4 (b) + $default reduce using rule 3 (a) +@end example + +A Graphviz rendering of this portion of the graph could be: + +@center @image{figs/example-reduce, 120pt} + +When unresolved conflicts are present, because in deterministic parsing +a single decision can be made, Bison can arbitrarily choose to disable a +reduction, see @ref{Shift/Reduce, , Shift/Reduce Conflicts}. Discarded actions +are distinguished by a red filling color on these nodes, just like how they are +reported between square brackets in the verbose file. + +The reduction corresponding to the rule number 0 is the acceptation state. It +is shown as a blue diamond, labelled "Acc". + +@subheading Graphical representation of go tos + +The @samp{go to} jump transitions are represented as dotted lines bearing +the name of the rule being jumped to. + +@c ================================================= Tracing @node Tracing @section Tracing Your Parser @@ -8824,8 +8980,8 @@ Prologue}). If the @code{%define} variable @code{api.prefix} is used (@pxref{Multiple Parsers, ,Multiple Parsers in the Same Program}), for instance @samp{%define api.prefix x}, then if @code{CDEBUG} is defined, its value controls the -tracing feature (enabled iff nonzero); otherwise tracing is enabled iff -@code{YYDEBUG} is nonzero. +tracing feature (enabled if and only if nonzero); otherwise tracing is +enabled if and only if @code{YYDEBUG} is nonzero. @item the option @option{-t} (POSIX Yacc compliant) @itemx the option @option{--debug} (Bison extension) @@ -9247,6 +9403,10 @@ unexpected number of conflicts is an error, and an expected number of conflicts is not reported, so @option{-W} and @option{--warning} then have no effect on the conflict report. +@item deprecated +Deprecated constructs whose support will be removed in future versions of +Bison. + @item other All warnings not categorized above. These warnings are enabled by default. @@ -9259,12 +9419,33 @@ All the warnings. @item none Turn off all the warnings. @item error -Treat warnings as errors. +See @option{-Werror}, below. @end table A category can be turned off by prefixing its name with @samp{no-}. For instance, @option{-Wno-yacc} will hide the warnings about POSIX Yacc incompatibilities. + +@item -Werror[=@var{category}] +@itemx -Wno-error[=@var{category}] +Enable warnings falling in @var{category}, and treat them as errors. If no +@var{category} is given, it defaults to making all enabled warnings into errors. + +@var{category} is the same as for @option{--warnings}, with the exception that +it may not be prefixed with @samp{no-} (see above). + +Prefixed with @samp{no}, it deactivates the error treatment for this +@var{category}. However, the warning itself won't be disabled, or enabled, by +this option. + +Note that the precedence of the @samp{=} and @samp{,} operators is such that +the following commands are @emph{not} equivalent, as the first will not treat +S/R conflicts as errors. + +@example +$ bison -Werror=yacc,conflicts-sr input.y +$ bison -Werror=yacc,error=conflicts-sr input.y +@end example @end table @noindent @@ -9384,13 +9565,23 @@ separated list of @var{things} among: Description of the grammar, conflicts (resolved and unresolved), and parser's automaton. +@item itemset +Implies @code{state} and augments the description of the automaton with +the full set of items for each state, instead of its core only. + @item lookahead Implies @code{state} and augments the description of the automaton with each rule's lookahead set. -@item itemset -Implies @code{state} and augments the description of the automaton with -the full set of items for each state, instead of its core only. +@item solved +Implies @code{state}. Explain how conflicts were solved thanks to +precedence and associativity directives. + +@item all +Enable all the items. + +@item none +Do not generate the report. @end table @item --report-file=@var{file} @@ -9508,8 +9699,10 @@ in the following files: @table @file @item position.hh @itemx location.hh -The definition of the classes @code{position} and @code{location}, -used for location tracking when enabled. @xref{C++ Location Values}. +The definition of the classes @code{position} and @code{location}, used for +location tracking when enabled. These files are not generated if the +@code{%define} variable @code{api.location.type} is defined. @xref{C++ +Location Values}. @item stack.hh An auxiliary class @code{stack} used by the parser. @@ -9668,10 +9861,13 @@ is some time and/or some talented C++ hacker willing to contribute to Bison. @c - %define filename_type "const symbol::Symbol" When the directive @code{%locations} is used, the C++ parser supports -location tracking, see @ref{Tracking Locations}. Two auxiliary classes -define a @code{position}, a single point in a file, and a @code{location}, a -range composed of a pair of @code{position}s (possibly spanning several -files). +location tracking, see @ref{Tracking Locations}. + +By default, two auxiliary classes define a @code{position}, a single point +in a file, and a @code{location}, a range composed of a pair of +@code{position}s (possibly spanning several files). But if the +@code{%define} variable @code{api.location.type} is defined, then these +classes will not be generated, and the user defined type will be used. @tindex uint In this section @code{uint} is an abbreviation for @code{unsigned int}: in @@ -9680,6 +9876,7 @@ genuine code only the latter is used. @menu * C++ position:: One point in the source file * C++ location:: Two points in the source file +* User Defined Location Type:: Required interface for locations @end menu @node C++ position @@ -9783,6 +9980,63 @@ Report @var{p} on @var{o}, taking care of special cases such as: no @code{filename} defined, or equal filename/line or column. @end deftypefun +@node User Defined Location Type +@subsubsection User Defined Location Type +@findex %define api.location.type + +Instead of using the built-in types you may use the @code{%define} variable +@code{api.location.type} to specify your own type: + +@example +%define api.location.type @var{LocationType} +@end example + +The requirements over your @var{LocationType} are: +@itemize +@item +it must be copyable; + +@item +in order to compute the (default) value of @code{@@$} in a reduction, the +parser basically runs +@example +@@$.begin = @@$1.begin; +@@$.end = @@$@var{N}.end; // The location of last right-hand side symbol. +@end example +@noindent +so there must be copyable @code{begin} and @code{end} members; + +@item +alternatively you may redefine the computation of the default location, in +which case these members are not required (@pxref{Location Default Action}); + +@item +if traces are enabled, then there must exist an @samp{std::ostream& + operator<< (std::ostream& o, const @var{LocationType}& s)} function. +@end itemize + +@sp 1 + +In programs with several C++ parsers, you may also use the @code{%define} +variable @code{api.location.type} to share a common set of built-in +definitions for @code{position} and @code{location}. For instance, one +parser @file{master/parser.yy} might use: + +@example +%defines +%locations +%define namespace "master::" +@end example + +@noindent +to generate the @file{master/position.hh} and @file{master/location.hh} +files, reused by other parsers as follows: + +@example +%define api.location.type "master::location" +%code requires @{ #include @} +@end example + @node C++ Parser Interface @subsection C++ Parser Interface @c - define parser_class_name @@ -9836,6 +10090,11 @@ Instantiate a syntax-error exception. @deftypemethod {parser} {int} parse () Run the syntactic analysis, and return 0 on success, 1 otherwise. + +@cindex exceptions +The whole function is wrapped in a @code{try}/@code{catch} block, so that +when an exception is thrown, the @code{%destructor}s are called to release +the lookahead symbol, and the symbols pushed on the stack. @end deftypemethod @deftypemethod {parser} {std::ostream&} debug_stream () @@ -9957,7 +10216,7 @@ So for each token type, Bison generates named constructors as follows. @deftypemethod {symbol_type} {} make_@var{token} (const @var{value_type}& @var{value}, const location_type& @var{location}) @deftypemethodx {symbol_type} {} make_@var{token} (const location_type& @var{location}) Build a complete terminal symbol for the token type @var{token} (not -including the @code{api.tokens.prefix}) whose possible semantic value is +including the @code{api.token.prefix}) whose possible semantic value is @var{value} of adequate @var{value_type}. If location tracking is enabled, also pass the @var{location}. @end deftypemethod @@ -9965,7 +10224,7 @@ also pass the @var{location}. For instance, given the following declarations: @example -%define api.tokens.prefix "TOK_" +%define api.token.prefix "TOK_" %token IDENTIFIER; %token INTEGER; %token COLON; @@ -10287,11 +10546,11 @@ The token numbered as 0 corresponds to end of file; the following line allows for nicer error messages referring to ``end of file'' instead of ``$end''. Similarly user friendly names are provided for each symbol. To avoid name clashes in the generated files (@pxref{Calc++ Scanner}), prefix -tokens with @code{TOK_} (@pxref{%define Summary,,api.tokens.prefix}). +tokens with @code{TOK_} (@pxref{%define Summary,,api.token.prefix}). @comment file: calc++-parser.yy @example -%define api.tokens.prefix "TOK_" +%define api.token.prefix "TOK_" %token END 0 "end of file" ASSIGN ":=" @@ -10320,9 +10579,8 @@ tags. No @code{%destructor} is needed to enable memory deallocation during error recovery; the memory, for strings for instance, will be reclaimed by the regular destructors. All the values are printed using their -@code{operator<<}. +@code{operator<<} (@pxref{Printer Decl, , Printing Semantic Values}). -@c FIXME: Document %printer, and mention that it takes a braced-code operand. @comment file: calc++-parser.yy @example %printer @{ yyoutput << $$; @} <*>; @@ -10664,11 +10922,11 @@ class defines a @dfn{position}, a single point in a file; Bison itself defines a class representing a @dfn{location}, a range composed of a pair of positions (possibly spanning several files). The location class is an inner class of the parser; the name is @code{Location} by default, and may also be -renamed using @samp{%define location_type "@var{class-name}"}. +renamed using @code{%define api.location.type "@var{class-name}"}. The location class treats the position as a completely opaque value. By default, the class name is @code{Position}, but this can be changed -with @samp{%define position_type "@var{class-name}"}. This class must +with @code{%define api.position.type "@var{class-name}"}. This class must be supplied by the user. @@ -10830,7 +11088,7 @@ In both cases, the scanner has to implement the following methods. @deftypemethod {Lexer} {void} yyerror (Location @var{loc}, String @var{msg}) This method is defined by the user to emit an error message. The first parameter is omitted if location tracking is not active. Its type can be -changed using @samp{%define location_type "@var{class-name}".} +changed using @code{%define api.location.type "@var{class-name}".} @end deftypemethod @deftypemethod {Lexer} {int} yylex () @@ -10848,7 +11106,7 @@ Return respectively the first position of the last token that @code{yylex} returned, and the first position beyond it. These methods are not needed unless location tracking is active. -The return type can be changed using @samp{%define position_type +The return type can be changed using @code{%define api.position.type "@var{class-name}".} @end deftypemethod @@ -11110,10 +11368,11 @@ comma-separated list. Default is @code{java.io.IOException}. @xref{Java Scanner Interface}. @end deffn -@deffn {Directive} {%define location_type} "@var{class}" +@deffn {Directive} {%define api.location.type} "@var{class}" The name of the class used for locations (a range between two positions). This class is generated as an inner class of the parser class by @command{bison}. Default is @code{Location}. +Formerly named @code{location_type}. @xref{Java Location Values}. @end deffn @@ -11128,9 +11387,10 @@ The name of the parser class. Default is @code{YYParser} or @xref{Java Bison Interface}. @end deffn -@deffn {Directive} {%define position_type} "@var{class}" +@deffn {Directive} {%define api.position.type} "@var{class}" The name of the class used for positions. This class must be supplied by the user. Default is @code{Position}. +Formerly named @code{position_type}. @xref{Java Location Values}. @end deffn @@ -12084,13 +12344,6 @@ parse a single token. @xref{Push Parser Function, ,The Push Parser Function More user feedback will help to stabilize it.) @end deffn -@deffn {Macro} YYPARSE_PARAM -An obsolete macro for specifying the name of a parameter that -@code{yyparse} should accept. The use of this macro is deprecated, and -is supported only for Yacc like parsers. @xref{Pure Calling,, Calling -Conventions for Pure Parsers}. -@end deffn - @deffn {Macro} YYRECOVERING The expression @code{YYRECOVERING ()} yields 1 when the parser is recovering from a syntax error, and 0 otherwise. @@ -12432,10 +12685,12 @@ London, Department of Computer Science, TR-00-12 (December 2000). @c LocalWords: toString deftypeivar deftypeivarx deftypeop YYParser strictfp @c LocalWords: superclasses boolean getErrorVerbose setErrorVerbose deftypecv @c LocalWords: getDebugStream setDebugStream getDebugLevel setDebugLevel url -@c LocalWords: bisonVersion deftypecvx bisonSkeleton getStartPos getEndPos +@c LocalWords: bisonVersion deftypecvx bisonSkeleton getStartPos getEndPos uint @c LocalWords: getLVal defvar deftypefn deftypefnx gotos msgfmt Corbett LALR's -@c LocalWords: subdirectory Solaris nonassociativity perror schemas Malloy -@c LocalWords: Scannerless ispell american +@c LocalWords: subdirectory Solaris nonassociativity perror schemas Malloy ints +@c LocalWords: Scannerless ispell american ChangeLog smallexample CSTYPE CLTYPE +@c LocalWords: clval CDEBUG cdebug deftypeopx yyterminate LocationType +@c LocalWords: errorVerbose @c Local Variables: @c ispell-dictionary: "american"