From db433e9db8298261691fa2dd8f4759b4c4fdf195 Mon Sep 17 00:00:00 2001 From: Akim Demaille Date: Wed, 29 Aug 2001 12:16:04 +0000 Subject: [PATCH] * doc/bison.texinfo (Location Tracking Calc): New node. --- ChangeLog | 4 + doc/bison.info | 208 +++++++++++++++-------------- doc/bison.info-1 | 8 ++ doc/bison.info-2 | 333 ++++++++++++++++++++++++++-------------------- doc/bison.info-3 | 251 +++++++++++++++++++--------------- doc/bison.info-4 | 107 +++++++++++++++ doc/bison.info-5 | 3 + doc/bison.texinfo | 206 ++++++++++++++++++++++++++++ 8 files changed, 765 insertions(+), 355 deletions(-) diff --git a/ChangeLog b/ChangeLog index 7a6507ec..3fe071ee 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,7 @@ +2001-08-29 Robert Anisko + + * doc/bison.texinfo (Location Tracking Calc): New node. + 2001-08-29 Paul Eggert * src/output.c (output): Do not define const, as this now diff --git a/doc/bison.info b/doc/bison.info index 905b3e6c..017768da 100644 --- a/doc/bison.info +++ b/doc/bison.info @@ -31,111 +31,115 @@ instead of in the original English.  Indirect: bison.info-1: 1313 -bison.info-2: 50345 -bison.info-3: 99970 -bison.info-4: 148168 -bison.info-5: 191130 +bison.info-2: 50688 +bison.info-3: 100578 +bison.info-4: 150128 +bison.info-5: 197515  Tag Table: (Indirect) Node: Top1313 -Node: Introduction8686 -Node: Conditions9961 -Node: Copying11425 -Node: Concepts30628 -Node: Language and Grammar31707 -Node: Grammar in Bison36723 -Node: Semantic Values38647 -Node: Semantic Actions40748 -Node: Locations Overview41937 -Node: Bison Parser43384 -Node: Stages45696 -Node: Grammar Layout46979 -Node: Examples48236 -Node: RPN Calc49371 -Node: Rpcalc Decls50345 -Node: Rpcalc Rules51932 -Node: Rpcalc Input53732 -Node: Rpcalc Line55193 -Node: Rpcalc Expr56308 -Node: Rpcalc Lexer58253 -Node: Rpcalc Main60825 -Node: Rpcalc Error61223 -Node: Rpcalc Gen62231 -Node: Rpcalc Compile63380 -Node: Infix Calc64255 -Node: Simple Error Recovery66962 -Node: Multi-function Calc68848 -Node: Mfcalc Decl70414 -Node: Mfcalc Rules72437 -Node: Mfcalc Symtab73817 -Node: Exercises80190 -Node: Grammar File80696 -Node: Grammar Outline81544 -Node: C Declarations82278 -Node: Bison Declarations82858 -Node: Grammar Rules83270 -Node: C Code83730 -Node: Symbols84660 -Node: Rules89741 -Node: Recursion91380 -Node: Semantics93099 -Node: Value Type94193 -Node: Multiple Types94865 -Node: Actions95882 -Node: Action Types98667 -Node: Mid-Rule Actions99970 -Node: Locations105540 -Node: Location Type106188 -Node: Actions and Locations106746 -Node: Location Default Action108902 -Node: Declarations110365 -Node: Token Decl111684 -Node: Precedence Decl113697 -Node: Union Decl115248 -Node: Type Decl116092 -Node: Expect Decl116998 -Node: Start Decl118544 -Node: Pure Decl118922 -Node: Decl Summary120599 -Node: Multiple Parsers125982 -Node: Interface127476 -Node: Parser Function128348 -Node: Lexical129183 -Node: Calling Convention130589 -Node: Token Values133360 -Node: Token Positions134509 -Node: Pure Calling135394 -Node: Error Reporting138326 -Node: Action Features140448 -Node: Algorithm143743 -Node: Look-Ahead146036 -Node: Shift/Reduce148168 -Node: Precedence151080 -Node: Why Precedence151731 -Node: Using Precedence153596 -Node: Precedence Examples154564 -Node: How Precedence155265 -Node: Contextual Precedence156414 -Node: Parser States158205 -Node: Reduce/Reduce159448 -Node: Mystery Conflicts163009 -Node: Stack Overflow166395 -Node: Error Recovery167768 -Node: Context Dependency172904 -Node: Semantic Tokens173752 -Node: Lexical Tie-ins176769 -Node: Tie-in Recovery178317 -Node: Debugging180489 -Node: Invocation183790 -Node: Bison Options185042 -Node: Environment Variables188654 -Node: Option Cross Key189502 -Node: VMS Invocation190346 -Node: Table of Symbols191130 -Node: Glossary198769 -Node: Copying This Manual205073 -Node: GNU Free Documentation License205282 -Node: Index225147 +Node: Introduction8966 +Node: Conditions10241 +Node: Copying11705 +Node: Concepts30908 +Node: Language and Grammar31987 +Node: Grammar in Bison37003 +Node: Semantic Values38927 +Node: Semantic Actions41028 +Node: Locations Overview42217 +Node: Bison Parser43664 +Node: Stages45976 +Node: Grammar Layout47259 +Node: Examples48516 +Node: RPN Calc49714 +Node: Rpcalc Decls50688 +Node: Rpcalc Rules52275 +Node: Rpcalc Input54075 +Node: Rpcalc Line55536 +Node: Rpcalc Expr56651 +Node: Rpcalc Lexer58596 +Node: Rpcalc Main61168 +Node: Rpcalc Error61566 +Node: Rpcalc Gen62574 +Node: Rpcalc Compile63723 +Node: Infix Calc64598 +Node: Simple Error Recovery67305 +Node: Location Tracking Calc69194 +Node: Ltcalc Decls69924 +Node: Ltcalc Rules70833 +Node: Ltcalc Lexer72894 +Node: Multi-function Calc75232 +Node: Mfcalc Decl76799 +Node: Mfcalc Rules78822 +Node: Mfcalc Symtab80202 +Node: Exercises86575 +Node: Grammar File87081 +Node: Grammar Outline87929 +Node: C Declarations88663 +Node: Bison Declarations89243 +Node: Grammar Rules89655 +Node: C Code90115 +Node: Symbols91045 +Node: Rules96126 +Node: Recursion97765 +Node: Semantics99484 +Node: Value Type100578 +Node: Multiple Types101250 +Node: Actions102267 +Node: Action Types105052 +Node: Mid-Rule Actions106355 +Node: Locations111925 +Node: Location Type112573 +Node: Actions and Locations113131 +Node: Location Default Action115287 +Node: Declarations116750 +Node: Token Decl118069 +Node: Precedence Decl120082 +Node: Union Decl121633 +Node: Type Decl122477 +Node: Expect Decl123383 +Node: Start Decl124929 +Node: Pure Decl125307 +Node: Decl Summary126984 +Node: Multiple Parsers132367 +Node: Interface133861 +Node: Parser Function134733 +Node: Lexical135568 +Node: Calling Convention136974 +Node: Token Values139745 +Node: Token Positions140894 +Node: Pure Calling141779 +Node: Error Reporting144711 +Node: Action Features146833 +Node: Algorithm150128 +Node: Look-Ahead152421 +Node: Shift/Reduce154553 +Node: Precedence157465 +Node: Why Precedence158116 +Node: Using Precedence159981 +Node: Precedence Examples160949 +Node: How Precedence161650 +Node: Contextual Precedence162799 +Node: Parser States164590 +Node: Reduce/Reduce165833 +Node: Mystery Conflicts169394 +Node: Stack Overflow172780 +Node: Error Recovery174153 +Node: Context Dependency179289 +Node: Semantic Tokens180137 +Node: Lexical Tie-ins183154 +Node: Tie-in Recovery184702 +Node: Debugging186874 +Node: Invocation190175 +Node: Bison Options191427 +Node: Environment Variables195039 +Node: Option Cross Key195887 +Node: VMS Invocation196731 +Node: Table of Symbols197515 +Node: Glossary205154 +Node: Copying This Manual211458 +Node: GNU Free Documentation License211667 +Node: Index231532  End Tag Table diff --git a/doc/bison.info-1 b/doc/bison.info-1 index 3961f742..4867aa6f 100644 --- a/doc/bison.info-1 +++ b/doc/bison.info-1 @@ -83,6 +83,7 @@ Examples * Infix Calc:: Infix (algebraic) notation calculator. Operator precedence is introduced. * Simple Error Recovery:: Continuing after syntax errors. +* Location Tracking Calc:: Demonstrating the use of @N and @$. * Multi-function Calc:: Calculator with memory and trig functions. It uses multiple data-types for semantic values. * Exercises:: Ideas for improving the multi-function calculator. @@ -103,6 +104,12 @@ Grammar Rules for `rpcalc' * Rpcalc Line:: * Rpcalc Expr:: +Location Tracking Calculator: `ltcalc' + +* Decls: Ltcalc Decls. Bison and C declarations for ltcalc. +* Rules: Ltcalc Rules. Grammar rules for ltcalc, with explanations. +* Lexer: Ltcalc Lexer. The lexical analyzer. + Multi-Function Calculator: `mfcalc' * Decl: Mfcalc Decl. Bison declarations for multi-function calculator. @@ -1036,6 +1043,7 @@ the Info file and into a source file to try them. * Infix Calc:: Infix (algebraic) notation calculator. Operator precedence is introduced. * Simple Error Recovery:: Continuing after syntax errors. +* Location Tracking Calc:: Demonstrating the use of @N and @$. * Multi-function Calc:: Calculator with memory and trig functions. It uses multiple data-types for semantic values. * Exercises:: Ideas for improving the multi-function calculator. diff --git a/doc/bison.info-2 b/doc/bison.info-2 index 21b3e2df..db638a46 100644 --- a/doc/bison.info-2 +++ b/doc/bison.info-2 @@ -494,7 +494,7 @@ Precedence. 9  -File: bison.info, Node: Simple Error Recovery, Next: Multi-function Calc, Prev: Infix Calc, Up: Examples +File: bison.info, Node: Simple Error Recovery, Next: Location Tracking Calc, Prev: Infix Calc, Up: Examples Simple Error Recovery ===================== @@ -533,7 +533,192 @@ the current line of input. We won't discuss this issue further because it is not specific to Bison programs.  -File: bison.info, Node: Multi-function Calc, Next: Exercises, Prev: Simple Error Recovery, Up: Examples +File: bison.info, Node: Location Tracking Calc, Next: Multi-function Calc, Prev: Simple Error Recovery, Up: Examples + +Location Tracking Calculator: `ltcalc' +====================================== + + This example extends the infix notation calculator with location +tracking. This feature will be used to improve error reporting, and +provide better error messages. + + For the sake of clarity, we will switch for this example to an +integer calculator, since most of the work needed to use locations will +be done in the lexical analyser. + +* Menu: + +* Decls: Ltcalc Decls. Bison and C declarations for ltcalc. +* Rules: Ltcalc Rules. Grammar rules for ltcalc, with explanations. +* Lexer: Ltcalc Lexer. The lexical analyzer. + + +File: bison.info, Node: Ltcalc Decls, Next: Ltcalc Rules, Up: Location Tracking Calc + +Declarations for `ltcalc' +------------------------- + + The C and Bison declarations for the location tracking calculator +are the same as the declarations for the infix notation calculator. + + /* Location tracking calculator. */ + + %{ + #define YYSTYPE int + #include + %} + + /* Bison declarations. */ + %token NUM + + %left '-' '+' + %left '*' '/' + %left NEG + %right '^' + + %% /* Grammar follows */ + + In the code above, there are no declarations specific to locations. +Defining a data type for storing locations is not needed: we will use +the type provided by default (*note Data Types of Locations: Location +Type.), which is a four member structure with the following integer +fields: `first_line', `first_column', `last_line' and `last_column'. + + +File: bison.info, Node: Ltcalc Rules, Next: Ltcalc Lexer, Prev: Ltcalc Decls, Up: Location Tracking Calc + +Grammar Rules for `ltcalc' +-------------------------- + + Whether you choose to handle locations or not has no effect on the +syntax of your language. Therefore, grammar rules for this example +will be very close to those of the previous example: we will only +modify them to benefit from the new informations we will have. + + Here, we will use locations to report divisions by zero, and locate +the wrong expressions or subexpressions. + + input : /* empty */ + | input line + ; + + line : '\n' + | exp '\n' { printf ("%d\n", $1); } + ; + + exp : NUM { $$ = $1; } + | exp '+' exp { $$ = $1 + $3; } + | exp '-' exp { $$ = $1 - $3; } + | exp '*' exp { $$ = $1 * $3; } + | exp '/' exp + { + if ($3) + $$ = $1 / $3; + else + { + $$ = 1; + printf("Division by zero, l%d,c%d-l%d,c%d", + @3.first_line, @3.first_column, + @3.last_line, @3.last_column); + } + } + | '-' exp %preg NEG { $$ = -$2; } + | exp '^' exp { $$ = pow ($1, $3); } + | '(' exp ')' { $$ = $2; } + + This code shows how to reach locations inside of semantic actions, by +using the pseudo-variables `@N' for rule components, and the +pseudo-variable `@$' for groupings. + + In this example, we never assign a value to `@$', because the output +parser can do this automatically. By default, before executing the C +code of each action, `@$' is set to range from the beginning of `@1' to +the end of `@N', for a rule with N components. + + Of course, this behavior can be redefined (*note Default Action for +Locations: Location Default Action.), and for very specific rules, `@$' +can be computed by hand. + + +File: bison.info, Node: Ltcalc Lexer, Prev: Ltcalc Rules, Up: Location Tracking Calc + +The `ltcalc' Lexical Analyzer. +------------------------------ + + Until now, we relied on Bison's defaults to enable location +tracking. The next step is to rewrite the lexical analyser, and make it +able to feed the parser with locations of tokens, as he already does +for semantic values. + + To do so, we must take into account every single character of the +input text, to avoid the computed locations of being fuzzy or wrong: + + int + yylex (void) + { + int c; + + /* skip white space */ + while ((c = getchar ()) == ' ' || c == '\t') + ++yylloc.last_column; + + /* step */ + yylloc.first_line = yylloc.last_line; + yylloc.first_column = yylloc.last_column; + + /* process numbers */ + if (isdigit (c)) + { + yylval = c - '0'; + ++yylloc.last_column; + while (isdigit (c = getchar ())) + { + ++yylloc.last_column; + yylval = yylval * 10 + c - '0'; + } + ungetc (c, stdin); + return NUM; + } + + /* return end-of-file */ + if (c == EOF) + return 0; + + /* return single chars and update location */ + if (c == '\n') + { + ++yylloc.last_line; + yylloc.last_column = 0; + } + else + ++yylloc.last_column; + return c; + } + + Basically, the lexical analyzer does the same processing as before: +it skips blanks and tabs, and reads numbers or single-character tokens. +In addition to this, it updates the `yylloc' global variable (of type +`YYLTYPE'), where the location of tokens is stored. + + Now, each time this function returns a token, the parser has it's +number as well as it's semantic value, and it's position in the text. +The last needed change is to initialize `yylloc', for example in the +controlling function: + + int + main (void) + { + yylloc.first_line = yylloc.last_line = 1; + yylloc.first_column = yylloc.last_column = 0; + return yyparse (); + } + + Remember that computing locations is not a matter of syntax. Every +character must be associated to a location update, whether it is in +valid input, in comments, in literal strings, and so on... + + +File: bison.info, Node: Multi-function Calc, Next: Exercises, Prev: Location Tracking Calc, Up: Examples Multi-Function Calculator: `mfcalc' =================================== @@ -1240,147 +1425,3 @@ associated with X and Y. This says when, why and how to use the exceptional action in the middle of a rule. - -File: bison.info, Node: Value Type, Next: Multiple Types, Up: Semantics - -Data Types of Semantic Values ------------------------------ - - In a simple program it may be sufficient to use the same data type -for the semantic values of all language constructs. This was true in -the RPN and infix calculator examples (*note Reverse Polish Notation -Calculator: RPN Calc.). - - Bison's default is to use type `int' for all semantic values. To -specify some other type, define `YYSTYPE' as a macro, like this: - - #define YYSTYPE double - -This macro definition must go in the C declarations section of the -grammar file (*note Outline of a Bison Grammar: Grammar Outline.). - - -File: bison.info, Node: Multiple Types, Next: Actions, Prev: Value Type, Up: Semantics - -More Than One Value Type ------------------------- - - In most programs, you will need different data types for different -kinds of tokens and groupings. For example, a numeric constant may -need type `int' or `long', while a string constant needs type `char *', -and an identifier might need a pointer to an entry in the symbol table. - - To use more than one data type for semantic values in one parser, -Bison requires you to do two things: - - * Specify the entire collection of possible data types, with the - `%union' Bison declaration (*note The Collection of Value Types: - Union Decl.). - - * Choose one of those types for each symbol (terminal or - nonterminal) for which semantic values are used. This is done for - tokens with the `%token' Bison declaration (*note Token Type - Names: Token Decl.) and for groupings with the `%type' Bison - declaration (*note Nonterminal Symbols: Type Decl.). - - -File: bison.info, Node: Actions, Next: Action Types, Prev: Multiple Types, Up: Semantics - -Actions -------- - - An action accompanies a syntactic rule and contains C code to be -executed each time an instance of that rule is recognized. The task of -most actions is to compute a semantic value for the grouping built by -the rule from the semantic values associated with tokens or smaller -groupings. - - An action consists of C statements surrounded by braces, much like a -compound statement in C. It can be placed at any position in the rule; -it is executed at that position. Most rules have just one action at -the end of the rule, following all the components. Actions in the -middle of a rule are tricky and used only for special purposes (*note -Actions in Mid-Rule: Mid-Rule Actions.). - - The C code in an action can refer to the semantic values of the -components matched by the rule with the construct `$N', which stands for -the value of the Nth component. The semantic value for the grouping -being constructed is `$$'. (Bison translates both of these constructs -into array element references when it copies the actions into the parser -file.) - - Here is a typical example: - - exp: ... - | exp '+' exp - { $$ = $1 + $3; } - -This rule constructs an `exp' from two smaller `exp' groupings -connected by a plus-sign token. In the action, `$1' and `$3' refer to -the semantic values of the two component `exp' groupings, which are the -first and third symbols on the right hand side of the rule. The sum is -stored into `$$' so that it becomes the semantic value of the -addition-expression just recognized by the rule. If there were a -useful semantic value associated with the `+' token, it could be -referred to as `$2'. - - If you don't specify an action for a rule, Bison supplies a default: -`$$ = $1'. Thus, the value of the first symbol in the rule becomes the -value of the whole rule. Of course, the default rule is valid only if -the two data types match. There is no meaningful default action for an -empty rule; every empty rule must have an explicit action unless the -rule's value does not matter. - - `$N' with N zero or negative is allowed for reference to tokens and -groupings on the stack _before_ those that match the current rule. -This is a very risky practice, and to use it reliably you must be -certain of the context in which the rule is applied. Here is a case in -which you can use this reliably: - - foo: expr bar '+' expr { ... } - | expr bar '-' expr { ... } - ; - - bar: /* empty */ - { previous_expr = $0; } - ; - - As long as `bar' is used only in the fashion shown here, `$0' always -refers to the `expr' which precedes `bar' in the definition of `foo'. - - -File: bison.info, Node: Action Types, Next: Mid-Rule Actions, Prev: Actions, Up: Semantics - -Data Types of Values in Actions -------------------------------- - - If you have chosen a single data type for semantic values, the `$$' -and `$N' constructs always have that data type. - - If you have used `%union' to specify a variety of data types, then -you must declare a choice among these types for each terminal or -nonterminal symbol that can have a semantic value. Then each time you -use `$$' or `$N', its data type is determined by which symbol it refers -to in the rule. In this example, - - exp: ... - | exp '+' exp - { $$ = $1 + $3; } - -`$1' and `$3' refer to instances of `exp', so they all have the data -type declared for the nonterminal symbol `exp'. If `$2' were used, it -would have the data type declared for the terminal symbol `'+'', -whatever that might be. - - Alternatively, you can specify the data type when you refer to the -value, by inserting `' after the `$' at the beginning of the -reference. For example, if you have defined types as shown here: - - %union { - int itype; - double dtype; - } - -then you can write `$1' to refer to the first subunit of the -rule as an integer, or `$1' to refer to it as a double. - diff --git a/doc/bison.info-3 b/doc/bison.info-3 index 7640c5c0..c80fae48 100644 --- a/doc/bison.info-3 +++ b/doc/bison.info-3 @@ -28,6 +28,150 @@ License", "Conditions for Using Bison" and this permission notice may be included in translations approved by the Free Software Foundation instead of in the original English. + +File: bison.info, Node: Value Type, Next: Multiple Types, Up: Semantics + +Data Types of Semantic Values +----------------------------- + + In a simple program it may be sufficient to use the same data type +for the semantic values of all language constructs. This was true in +the RPN and infix calculator examples (*note Reverse Polish Notation +Calculator: RPN Calc.). + + Bison's default is to use type `int' for all semantic values. To +specify some other type, define `YYSTYPE' as a macro, like this: + + #define YYSTYPE double + +This macro definition must go in the C declarations section of the +grammar file (*note Outline of a Bison Grammar: Grammar Outline.). + + +File: bison.info, Node: Multiple Types, Next: Actions, Prev: Value Type, Up: Semantics + +More Than One Value Type +------------------------ + + In most programs, you will need different data types for different +kinds of tokens and groupings. For example, a numeric constant may +need type `int' or `long', while a string constant needs type `char *', +and an identifier might need a pointer to an entry in the symbol table. + + To use more than one data type for semantic values in one parser, +Bison requires you to do two things: + + * Specify the entire collection of possible data types, with the + `%union' Bison declaration (*note The Collection of Value Types: + Union Decl.). + + * Choose one of those types for each symbol (terminal or + nonterminal) for which semantic values are used. This is done for + tokens with the `%token' Bison declaration (*note Token Type + Names: Token Decl.) and for groupings with the `%type' Bison + declaration (*note Nonterminal Symbols: Type Decl.). + + +File: bison.info, Node: Actions, Next: Action Types, Prev: Multiple Types, Up: Semantics + +Actions +------- + + An action accompanies a syntactic rule and contains C code to be +executed each time an instance of that rule is recognized. The task of +most actions is to compute a semantic value for the grouping built by +the rule from the semantic values associated with tokens or smaller +groupings. + + An action consists of C statements surrounded by braces, much like a +compound statement in C. It can be placed at any position in the rule; +it is executed at that position. Most rules have just one action at +the end of the rule, following all the components. Actions in the +middle of a rule are tricky and used only for special purposes (*note +Actions in Mid-Rule: Mid-Rule Actions.). + + The C code in an action can refer to the semantic values of the +components matched by the rule with the construct `$N', which stands for +the value of the Nth component. The semantic value for the grouping +being constructed is `$$'. (Bison translates both of these constructs +into array element references when it copies the actions into the parser +file.) + + Here is a typical example: + + exp: ... + | exp '+' exp + { $$ = $1 + $3; } + +This rule constructs an `exp' from two smaller `exp' groupings +connected by a plus-sign token. In the action, `$1' and `$3' refer to +the semantic values of the two component `exp' groupings, which are the +first and third symbols on the right hand side of the rule. The sum is +stored into `$$' so that it becomes the semantic value of the +addition-expression just recognized by the rule. If there were a +useful semantic value associated with the `+' token, it could be +referred to as `$2'. + + If you don't specify an action for a rule, Bison supplies a default: +`$$ = $1'. Thus, the value of the first symbol in the rule becomes the +value of the whole rule. Of course, the default rule is valid only if +the two data types match. There is no meaningful default action for an +empty rule; every empty rule must have an explicit action unless the +rule's value does not matter. + + `$N' with N zero or negative is allowed for reference to tokens and +groupings on the stack _before_ those that match the current rule. +This is a very risky practice, and to use it reliably you must be +certain of the context in which the rule is applied. Here is a case in +which you can use this reliably: + + foo: expr bar '+' expr { ... } + | expr bar '-' expr { ... } + ; + + bar: /* empty */ + { previous_expr = $0; } + ; + + As long as `bar' is used only in the fashion shown here, `$0' always +refers to the `expr' which precedes `bar' in the definition of `foo'. + + +File: bison.info, Node: Action Types, Next: Mid-Rule Actions, Prev: Actions, Up: Semantics + +Data Types of Values in Actions +------------------------------- + + If you have chosen a single data type for semantic values, the `$$' +and `$N' constructs always have that data type. + + If you have used `%union' to specify a variety of data types, then +you must declare a choice among these types for each terminal or +nonterminal symbol that can have a semantic value. Then each time you +use `$$' or `$N', its data type is determined by which symbol it refers +to in the rule. In this example, + + exp: ... + | exp '+' exp + { $$ = $1 + $3; } + +`$1' and `$3' refer to instances of `exp', so they all have the data +type declared for the nonterminal symbol `exp'. If `$2' were used, it +would have the data type declared for the terminal symbol `'+'', +whatever that might be. + + Alternatively, you can specify the data type when you refer to the +value, by inserting `' after the `$' at the beginning of the +reference. For example, if you have defined types as shown here: + + %union { + int itype; + double dtype; + } + +then you can write `$1' to refer to the first subunit of the +rule as an integer, or `$1' to refer to it as a double. +  File: bison.info, Node: Mid-Rule Actions, Prev: Action Types, Up: Semantics @@ -1171,110 +1315,3 @@ useful in actions. textual position of the Nth component of the current rule. *Note Tracking Locations: Locations. - -File: bison.info, Node: Algorithm, Next: Error Recovery, Prev: Interface, Up: Top - -The Bison Parser Algorithm -************************** - - As Bison reads tokens, it pushes them onto a stack along with their -semantic values. The stack is called the "parser stack". Pushing a -token is traditionally called "shifting". - - For example, suppose the infix calculator has read `1 + 5 *', with a -`3' to come. The stack will have four elements, one for each token -that was shifted. - - But the stack does not always have an element for each token read. -When the last N tokens and groupings shifted match the components of a -grammar rule, they can be combined according to that rule. This is -called "reduction". Those tokens and groupings are replaced on the -stack by a single grouping whose symbol is the result (left hand side) -of that rule. Running the rule's action is part of the process of -reduction, because this is what computes the semantic value of the -resulting grouping. - - For example, if the infix calculator's parser stack contains this: - - 1 + 5 * 3 - -and the next input token is a newline character, then the last three -elements can be reduced to 15 via the rule: - - expr: expr '*' expr; - -Then the stack contains just these three elements: - - 1 + 15 - -At this point, another reduction can be made, resulting in the single -value 16. Then the newline token can be shifted. - - The parser tries, by shifts and reductions, to reduce the entire -input down to a single grouping whose symbol is the grammar's -start-symbol (*note Languages and Context-Free Grammars: Language and -Grammar.). - - This kind of parser is known in the literature as a bottom-up parser. - -* Menu: - -* Look-Ahead:: Parser looks one token ahead when deciding what to do. -* Shift/Reduce:: Conflicts: when either shifting or reduction is valid. -* Precedence:: Operator precedence works by resolving conflicts. -* Contextual Precedence:: When an operator's precedence depends on context. -* Parser States:: The parser is a finite-state-machine with stack. -* Reduce/Reduce:: When two rules are applicable in the same situation. -* Mystery Conflicts:: Reduce/reduce conflicts that look unjustified. -* Stack Overflow:: What happens when stack gets full. How to avoid it. - - -File: bison.info, Node: Look-Ahead, Next: Shift/Reduce, Up: Algorithm - -Look-Ahead Tokens -================= - - The Bison parser does _not_ always reduce immediately as soon as the -last N tokens and groupings match a rule. This is because such a -simple strategy is inadequate to handle most languages. Instead, when a -reduction is possible, the parser sometimes "looks ahead" at the next -token in order to decide what to do. - - When a token is read, it is not immediately shifted; first it -becomes the "look-ahead token", which is not on the stack. Now the -parser can perform one or more reductions of tokens and groupings on -the stack, while the look-ahead token remains off to the side. When no -more reductions should take place, the look-ahead token is shifted onto -the stack. This does not mean that all possible reductions have been -done; depending on the token type of the look-ahead token, some rules -may choose to delay their application. - - Here is a simple case where look-ahead is needed. These three rules -define expressions which contain binary addition operators and postfix -unary factorial operators (`!'), and allow parentheses for grouping. - - expr: term '+' expr - | term - ; - - term: '(' expr ')' - | term '!' - | NUMBER - ; - - Suppose that the tokens `1 + 2' have been read and shifted; what -should be done? If the following token is `)', then the first three -tokens must be reduced to form an `expr'. This is the only valid -course, because shifting the `)' would produce a sequence of symbols -`term ')'', and no rule allows this. - - If the following token is `!', then it must be shifted immediately so -that `2 !' can be reduced to make a `term'. If instead the parser were -to reduce before shifting, `1 + 2' would become an `expr'. It would -then be impossible to shift the `!' because doing so would produce on -the stack the sequence of symbols `expr '!''. No rule allows that -sequence. - - The current look-ahead token is stored in the variable `yychar'. -*Note Special Features for Use in Actions: Action Features. - diff --git a/doc/bison.info-4 b/doc/bison.info-4 index a385c81c..6bd65ae9 100644 --- a/doc/bison.info-4 +++ b/doc/bison.info-4 @@ -28,6 +28,113 @@ License", "Conditions for Using Bison" and this permission notice may be included in translations approved by the Free Software Foundation instead of in the original English. + +File: bison.info, Node: Algorithm, Next: Error Recovery, Prev: Interface, Up: Top + +The Bison Parser Algorithm +************************** + + As Bison reads tokens, it pushes them onto a stack along with their +semantic values. The stack is called the "parser stack". Pushing a +token is traditionally called "shifting". + + For example, suppose the infix calculator has read `1 + 5 *', with a +`3' to come. The stack will have four elements, one for each token +that was shifted. + + But the stack does not always have an element for each token read. +When the last N tokens and groupings shifted match the components of a +grammar rule, they can be combined according to that rule. This is +called "reduction". Those tokens and groupings are replaced on the +stack by a single grouping whose symbol is the result (left hand side) +of that rule. Running the rule's action is part of the process of +reduction, because this is what computes the semantic value of the +resulting grouping. + + For example, if the infix calculator's parser stack contains this: + + 1 + 5 * 3 + +and the next input token is a newline character, then the last three +elements can be reduced to 15 via the rule: + + expr: expr '*' expr; + +Then the stack contains just these three elements: + + 1 + 15 + +At this point, another reduction can be made, resulting in the single +value 16. Then the newline token can be shifted. + + The parser tries, by shifts and reductions, to reduce the entire +input down to a single grouping whose symbol is the grammar's +start-symbol (*note Languages and Context-Free Grammars: Language and +Grammar.). + + This kind of parser is known in the literature as a bottom-up parser. + +* Menu: + +* Look-Ahead:: Parser looks one token ahead when deciding what to do. +* Shift/Reduce:: Conflicts: when either shifting or reduction is valid. +* Precedence:: Operator precedence works by resolving conflicts. +* Contextual Precedence:: When an operator's precedence depends on context. +* Parser States:: The parser is a finite-state-machine with stack. +* Reduce/Reduce:: When two rules are applicable in the same situation. +* Mystery Conflicts:: Reduce/reduce conflicts that look unjustified. +* Stack Overflow:: What happens when stack gets full. How to avoid it. + + +File: bison.info, Node: Look-Ahead, Next: Shift/Reduce, Up: Algorithm + +Look-Ahead Tokens +================= + + The Bison parser does _not_ always reduce immediately as soon as the +last N tokens and groupings match a rule. This is because such a +simple strategy is inadequate to handle most languages. Instead, when a +reduction is possible, the parser sometimes "looks ahead" at the next +token in order to decide what to do. + + When a token is read, it is not immediately shifted; first it +becomes the "look-ahead token", which is not on the stack. Now the +parser can perform one or more reductions of tokens and groupings on +the stack, while the look-ahead token remains off to the side. When no +more reductions should take place, the look-ahead token is shifted onto +the stack. This does not mean that all possible reductions have been +done; depending on the token type of the look-ahead token, some rules +may choose to delay their application. + + Here is a simple case where look-ahead is needed. These three rules +define expressions which contain binary addition operators and postfix +unary factorial operators (`!'), and allow parentheses for grouping. + + expr: term '+' expr + | term + ; + + term: '(' expr ')' + | term '!' + | NUMBER + ; + + Suppose that the tokens `1 + 2' have been read and shifted; what +should be done? If the following token is `)', then the first three +tokens must be reduced to form an `expr'. This is the only valid +course, because shifting the `)' would produce a sequence of symbols +`term ')'', and no rule allows this. + + If the following token is `!', then it must be shifted immediately so +that `2 !' can be reduced to make a `term'. If instead the parser were +to reduce before shifting, `1 + 2' would become an `expr'. It would +then be impossible to shift the `!' because doing so would produce on +the stack the sequence of symbols `expr '!''. No rule allows that +sequence. + + The current look-ahead token is stored in the variable `yychar'. +*Note Special Features for Use in Actions: Action Features. +  File: bison.info, Node: Shift/Reduce, Next: Precedence, Prev: Look-Ahead, Up: Algorithm diff --git a/doc/bison.info-5 b/doc/bison.info-5 index 965c6fac..927438b2 100644 --- a/doc/bison.info-5 +++ b/doc/bison.info-5 @@ -855,6 +855,7 @@ Index * C-language interface: Interface. * calc: Infix Calc. * calculator, infix notation: Infix Calc. +* calculator, location tracking: Location Tracking Calc. * calculator, multi-function: Multi-function Calc. * calculator, simple: RPN Calc. * character token: Symbols. @@ -925,8 +926,10 @@ Index * location <1>: Locations. * location: Locations Overview. * location actions: Actions and Locations. +* location tracking calculator: Location Tracking Calc. * look-ahead token: Look-Ahead. * LR(1): Mystery Conflicts. +* ltcalc: Location Tracking Calc. * main function in simple example: Rpcalc Main. * mfcalc: Multi-function Calc. * mid-rule actions: Mid-Rule Actions. diff --git a/doc/bison.texinfo b/doc/bison.texinfo index 517fcfdc..db9f2cf0 100644 --- a/doc/bison.texinfo +++ b/doc/bison.texinfo @@ -184,6 +184,7 @@ Examples * Infix Calc:: Infix (algebraic) notation calculator. Operator precedence is introduced. * Simple Error Recovery:: Continuing after syntax errors. +* Location Tracking Calc:: Demonstrating the use of @@@var{n} and @@$. * Multi-function Calc:: Calculator with memory and trig functions. It uses multiple data-types for semantic values. * Exercises:: Ideas for improving the multi-function calculator. @@ -204,6 +205,12 @@ Grammar Rules for @code{rpcalc} * Rpcalc Line:: * Rpcalc Expr:: +Location Tracking Calculator: @code{ltcalc} + +* Decls: Ltcalc Decls. Bison and C declarations for ltcalc. +* Rules: Ltcalc Rules. Grammar rules for ltcalc, with explanations. +* Lexer: Ltcalc Lexer. The lexical analyzer. + Multi-Function Calculator: @code{mfcalc} * Decl: Mfcalc Decl. Bison declarations for multi-function calculator. @@ -794,6 +801,7 @@ to try them. * Infix Calc:: Infix (algebraic) notation calculator. Operator precedence is introduced. * Simple Error Recovery:: Continuing after syntax errors. +* Location Tracking Calc:: Demonstrating the use of @@@var{n} and @@$. * Multi-function Calc:: Calculator with memory and trig functions. It uses multiple data-types for semantic values. * Exercises:: Ideas for improving the multi-function calculator. @@ -1358,6 +1366,204 @@ input lines; it would also have to discard the rest of the current line of input. We won't discuss this issue further because it is not specific to Bison programs. +@node Location Tracking Calc +@section Location Tracking Calculator: @code{ltcalc} +@cindex location tracking calculator +@cindex @code{ltcalc} +@cindex calculator, location tracking + +This example extends the infix notation calculator with location tracking. +This feature will be used to improve error reporting, and provide better +error messages. + +For the sake of clarity, we will switch for this example to an integer +calculator, since most of the work needed to use locations will be done +in the lexical analyser. + +@menu +* Decls: Ltcalc Decls. Bison and C declarations for ltcalc. +* Rules: Ltcalc Rules. Grammar rules for ltcalc, with explanations. +* Lexer: Ltcalc Lexer. The lexical analyzer. +@end menu + +@node Ltcalc Decls +@subsection Declarations for @code{ltcalc} + +The C and Bison declarations for the location tracking calculator are the same +as the declarations for the infix notation calculator. + +@example +/* Location tracking calculator. */ + +%@{ +#define YYSTYPE int +#include +%@} + +/* Bison declarations. */ +%token NUM + +%left '-' '+' +%left '*' '/' +%left NEG +%right '^' + +%% /* Grammar follows */ +@end example + +In the code above, there are no declarations specific to locations. Defining +a data type for storing locations is not needed: we will use the type provided +by default (@pxref{Location Type, ,Data Types of Locations}), which is a four +member structure with the following integer fields: @code{first_line}, +@code{first_column}, @code{last_line} and @code{last_column}. + +@node Ltcalc Rules +@subsection Grammar Rules for @code{ltcalc} + +Whether you choose to handle locations or not has no effect on the syntax of +your language. Therefore, grammar rules for this example will be very close to +those of the previous example: we will only modify them to benefit from the new +informations we will have. + +Here, we will use locations to report divisions by zero, and locate the wrong +expressions or subexpressions. + +@example +@group +input : /* empty */ + | input line +; +@end group + +@group +line : '\n' + | exp '\n' @{ printf ("%d\n", $1); @} +; +@end group + +@group +exp : NUM @{ $$ = $1; @} + | exp '+' exp @{ $$ = $1 + $3; @} + | exp '-' exp @{ $$ = $1 - $3; @} + | exp '*' exp @{ $$ = $1 * $3; @} +@end group + | exp '/' exp +@group + @{ + if ($3) + $$ = $1 / $3; + else + @{ + $$ = 1; + printf("Division by zero, l%d,c%d-l%d,c%d", + @@3.first_line, @@3.first_column, + @@3.last_line, @@3.last_column); + @} + @} +@end group +@group + | '-' exp %preg NEG @{ $$ = -$2; @} + | exp '^' exp @{ $$ = pow ($1, $3); @} + | '(' exp ')' @{ $$ = $2; @} +@end group +@end example + +This code shows how to reach locations inside of semantic actions, by +using the pseudo-variables @code{@@@var{n}} for rule components, and the +pseudo-variable @code{@@$} for groupings. + +In this example, we never assign a value to @code{@@$}, because the +output parser can do this automatically. By default, before executing +the C code of each action, @code{@@$} is set to range from the beginning +of @code{@@1} to the end of @code{@@@var{n}}, for a rule with @var{n} +components. + +Of course, this behavior can be redefined (@pxref{Location Default +Action, , Default Action for Locations}), and for very specific rules, +@code{@@$} can be computed by hand. + +@node Ltcalc Lexer +@subsection The @code{ltcalc} Lexical Analyzer. + +Until now, we relied on Bison's defaults to enable location tracking. The next +step is to rewrite the lexical analyser, and make it able to feed the parser +with locations of tokens, as he already does for semantic values. + +To do so, we must take into account every single character of the input text, +to avoid the computed locations of being fuzzy or wrong: + +@example +@group +int +yylex (void) +@{ + int c; + + /* skip white space */ + while ((c = getchar ()) == ' ' || c == '\t') + ++yylloc.last_column; + + /* step */ + yylloc.first_line = yylloc.last_line; + yylloc.first_column = yylloc.last_column; +@end group + +@group + /* process numbers */ + if (isdigit (c)) + @{ + yylval = c - '0'; + ++yylloc.last_column; + while (isdigit (c = getchar ())) + @{ + ++yylloc.last_column; + yylval = yylval * 10 + c - '0'; + @} + ungetc (c, stdin); + return NUM; + @} +@end group + + /* return end-of-file */ + if (c == EOF) + return 0; + + /* return single chars and update location */ + if (c == '\n') + @{ + ++yylloc.last_line; + yylloc.last_column = 0; + @} + else + ++yylloc.last_column; + return c; +@} +@end example + +Basically, the lexical analyzer does the same processing as before: it skips +blanks and tabs, and reads numbers or single-character tokens. In addition +to this, it updates the @code{yylloc} global variable (of type @code{YYLTYPE}), +where the location of tokens is stored. + +Now, each time this function returns a token, the parser has it's number as +well as it's semantic value, and it's position in the text. The last needed +change is to initialize @code{yylloc}, for example in the controlling +function: + +@example +int +main (void) +@{ + yylloc.first_line = yylloc.last_line = 1; + yylloc.first_column = yylloc.last_column = 0; + return yyparse (); +@} +@end example + +Remember that computing locations is not a matter of syntax. Every character +must be associated to a location update, whether it is in valid input, in +comments, in literal strings, and so on... + @node Multi-function Calc @section Multi-Function Calculator: @code{mfcalc} @cindex multi-function calculator -- 2.50.0