From 5202b6ac1d3662fe283240aab1b29f35607995cd Mon Sep 17 00:00:00 2001 From: Valentin Tolmer Date: Tue, 5 Mar 2013 12:29:50 +0100 Subject: [PATCH 1/1] gram: correct token numbering in precedence declarations In a precedence declaration, when tokens are declared with a litteral character (e.g., 'a') or with a identifier (e.g., B), Bison behaved differently: the litteral tokens would be numbered first, and then the other ones, leading to the following grammar: %right A B 'c' 'd' being numbered as such: 'c' 'd' A B. * src/parse-gram.y (symbol.prec): Set the symbol number when reading the symbols. * tests/conflicts.at (Token declaration order: literals vs. identifiers): New. Signed-off-by: Akim Demaille --- NEWS | 24 ++++++++++--- src/parse-gram.y | 13 +++++-- tests/conflicts.at | 86 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 117 insertions(+), 6 deletions(-) diff --git a/NEWS b/NEWS index ef9f4fa9..3bfc1f94 100644 --- a/NEWS +++ b/NEWS @@ -284,11 +284,27 @@ GNU Bison NEWS It used to be an error only if used in non GLR mode, _and_ if there are reduce/reduce conflicts. -** Token numbering has changed to preserve the user-defined order +** Tokens are numbered in their order of appearance - When declaring %token A B, the numbering for A is inferior to B. Up to now, - when declaring associativity at the same time, with %left (or %right, - %precedence, %nonassoc), B was inferior to A. + Contributed by Valentin Tolmer. + + With '%token A B', A had a number less than the one of B. However, + precedence declarations used to generate a reversed order. This is now + fixed, and introducing tokens with any of %token, %left, %right, + %precedence, or %nonassoc yields the same result. + + When mixing declarations of tokens with a litteral character (e.g., 'a') + or with an identifier (e.g., B) in a precedence declaration, Bison + numbered the litteral characters first. For example + + %right A B 'c' 'd' + + would lead to the tokens declared in this order: 'c' 'd' A B. Again, the + input order is now preserved. + + These changes were made so that one can remove useless precedence and + associativity declarations (i.e., map %nonassoc, %left or %right to + %precedence, or to %token) and get exactly the same output. ** Useless precedence and associativity diff --git a/src/parse-gram.y b/src/parse-gram.y index 4e887e81..e4d36cc7 100644 --- a/src/parse-gram.y +++ b/src/parse-gram.y @@ -503,8 +503,17 @@ symbols.prec: ; symbol.prec: - symbol { $$ = $1; } -| symbol INT { $$ = $1; symbol_user_token_number_set ($1, $2, @2); } + symbol + { + $$ = $1; + symbol_class_set ($1, token_sym, @1, false); + } +| symbol INT + { + $$ = $1; + symbol_user_token_number_set ($1, $2, @2); + symbol_class_set ($1, token_sym, @1, false); + } ; /* One or more symbols to be %typed. */ diff --git a/tests/conflicts.at b/tests/conflicts.at index 07ff1786..2d67a358 100644 --- a/tests/conflicts.at +++ b/tests/conflicts.at @@ -87,6 +87,92 @@ AT_BISON_OPTION_POPDEFS AT_CLEANUP +## --------------------------------------------------- ## +## Token declaration order: literals vs. identifiers. ## +## --------------------------------------------------- ## + +# This test checks that when several tokens are declared by the same keyword, +# some of them defined as a character ('a'), others as simple textual reference +# (A), they are declared correctly left to right. +# Previously, the following test would declare the states in the order 'o' 'p' +# M N, instead of M N 'o' 'p'. + +AT_SETUP([Token declaration order: literals vs. identifiers]) + +AT_DATA_GRAMMAR([[input.y]], +[[%token 'a' 'b' C D +%token E F 'g' 'h' +%right 'i' 'j' K L +%right M N 'o' 'p' +%% +exp: 'a' + | 'b' + | C + | D + | E + | F + | 'g' + | 'h' + | 'i' + | 'j' + | K + | L + | M + | N + | 'o' + | 'p' +; +%% +]]) + +AT_BISON_CHECK([[--report=all -o input.c input.y]], 0, [], [ignore]) +AT_CHECK([[cat input.output | sed -n '/^State 0$/,/^State 1$/p']], 0, +[[State 0 + + 0 $accept: . exp $end + 1 exp: . 'a' + 2 | . 'b' + 3 | . C + 4 | . D + 5 | . E + 6 | . F + 7 | . 'g' + 8 | . 'h' + 9 | . 'i' + 10 | . 'j' + 11 | . K + 12 | . L + 13 | . M + 14 | . N + 15 | . 'o' + 16 | . 'p' + + 'a' shift, and go to state 1 + 'b' shift, and go to state 2 + C shift, and go to state 3 + D shift, and go to state 4 + E shift, and go to state 5 + F shift, and go to state 6 + 'g' shift, and go to state 7 + 'h' shift, and go to state 8 + 'i' shift, and go to state 9 + 'j' shift, and go to state 10 + K shift, and go to state 11 + L shift, and go to state 12 + M shift, and go to state 13 + N shift, and go to state 14 + 'o' shift, and go to state 15 + 'p' shift, and go to state 16 + + exp go to state 17 + + +State 1 +]]) + +AT_CLEANUP + + ## ------------------------------- ## ## Useless associativity warning. ## ## ------------------------------- ## -- 2.45.2