From bf70fa8729594f339189575d3a90782014eef6d0 Mon Sep 17 00:00:00 2001 From: "Joel E. Denny" Date: Fri, 6 Jan 2006 20:48:33 +0000 Subject: [PATCH] * data/glr.c (yyGLRStateSet): Add yybool* yylookaheadStatuses member to use during nondeterministic operation to track which stacks have actually needed the current lookahead. (yyinitStateSet, yyfreeStateSet, yyremoveDeletes, yysplitStack): Allocate, deallocate, resize, and otherwise shuffle space for yylookaheadStatuses in parallel with yystates member of yyGLRStateSet. (yysplitStack, yyprocessOneStack, yyparse): Set lookahead status appropriately during nondeterministic operation. (yySemanticOption): Add int yyrawchar, YYSTYPE yyval, and YYLTYPE yyloc members to store the current lookahead to be used by the deferred user action. (yyaddDeferredAction): Add size_t yyk parameter specifying the stack from which the RHS is taken. Set the lookahead members of the new yySemanticOption according to the lookahead status for stack yyk. (yyglrShiftDefer, yyglrReduce): Pass yyk parameter on to yyaddDeferredAction. (yyresolveAction): Set yychar, yylval, and yylloc to the lookahead members of yySemanticOption before invoking yyuserAction, and then set them back to their current values afterward. (yyparse): Set yychar = YYEMPTY where yytoken = YYEMPTY. (yyreportAmbiguity): Add /*ARGSUSED*/ to pacify lint. * tests/glr-regression.at: Remove `.' from the ends of recent test case titles for consistency. (Leaked merged semantic value if user action cuts parse): In order to suppress lint warnings, use arguments in merge function, and assign char value < 128 in main. (Incorrect lookahead during deterministic GLR): New test case. (Incorrect lookahead during nondeterministic GLR): New test case. --- ChangeLog | 31 ++++ data/glr.c | 82 ++++++++- tests/glr-regression.at | 358 +++++++++++++++++++++++++++++++++++++++- 3 files changed, 458 insertions(+), 13 deletions(-) diff --git a/ChangeLog b/ChangeLog index dfdb3529..d4fd13d8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,34 @@ +2006-01-06 Joel E. Denny + + * data/glr.c (yyGLRStateSet): Add yybool* yylookaheadStatuses member to + use during nondeterministic operation to track which stacks have + actually needed the current lookahead. + (yyinitStateSet, yyfreeStateSet, yyremoveDeletes, yysplitStack): + Allocate, deallocate, resize, and otherwise shuffle space for + yylookaheadStatuses in parallel with yystates member of yyGLRStateSet. + (yysplitStack, yyprocessOneStack, yyparse): Set lookahead status + appropriately during nondeterministic operation. + (yySemanticOption): Add int yyrawchar, YYSTYPE yyval, and YYLTYPE yyloc + members to store the current lookahead to be used by the deferred + user action. + (yyaddDeferredAction): Add size_t yyk parameter specifying the stack + from which the RHS is taken. Set the lookahead members of the new + yySemanticOption according to the lookahead status for stack yyk. + (yyglrShiftDefer, yyglrReduce): Pass yyk parameter on to + yyaddDeferredAction. + (yyresolveAction): Set yychar, yylval, and yylloc to the lookahead + members of yySemanticOption before invoking yyuserAction, and then set + them back to their current values afterward. + (yyparse): Set yychar = YYEMPTY where yytoken = YYEMPTY. + (yyreportAmbiguity): Add /*ARGSUSED*/ to pacify lint. + * tests/glr-regression.at: Remove `.' from the ends of recent test case + titles for consistency. + (Leaked merged semantic value if user action cuts parse): In order to + suppress lint warnings, use arguments in merge function, and assign + char value < 128 in main. + (Incorrect lookahead during deterministic GLR): New test case. + (Incorrect lookahead during nondeterministic GLR): New test case. + 2006-01-06 Joel E. Denny * data/c.m4 (b4_yy_symbol_print_generate): In yy_symbol_print, accept diff --git a/data/glr.c b/data/glr.c index e488f7c9..f3d7f990 100644 --- a/data/glr.c +++ b/data/glr.c @@ -761,6 +761,11 @@ struct yyGLRState { struct yyGLRStateSet { yyGLRState** yystates; + /** During nondeterministic operation, yylookaheadStatuses tracks which + * stacks have actually needed the current lookahead. During deterministic + * operation, yylookaheadStatuses[0] is not maintained since it would merely + * duplicate yychar != YYEMPTY. */ + yybool* yylookaheadStatuses; size_t yysize, yycapacity; }; @@ -771,6 +776,10 @@ struct yySemanticOption { yyRuleNum yyrule; /** The last RHS state in the list of states to be reduced. */ yyGLRState* yystate; + /** The lookahead for this reduction. */ + int yyrawchar; + YYSTYPE yyval; + YYLTYPE yyloc; /** Next sibling in chain of options. To facilitate merging, * options are chained in decreasing order by address. */ yySemanticOption* yynext; @@ -1092,14 +1101,24 @@ yynewGLRStackItem (yyGLRStack* yystackp, yybool yyisState) return yynewItem; } +/** Stack #K = the stack from which RHS is taken. This might not be the stack + * containing STATE, to which the deferred action is added. */ static void -yyaddDeferredAction (yyGLRStack* yystackp, yyGLRState* yystate, +yyaddDeferredAction (yyGLRStack* yystackp, size_t yyk, yyGLRState* yystate, yyGLRState* rhs, yyRuleNum yyrule) { yySemanticOption* yynewOption = &yynewGLRStackItem (yystackp, yyfalse)->yyoption; yynewOption->yystate = rhs; yynewOption->yyrule = yyrule; + if (yystackp->yytops.yylookaheadStatuses[yyk]) + { + yynewOption->yyrawchar = yychar; + yynewOption->yyval = yylval; + yynewOption->yyloc = yylloc; + } + else + yynewOption->yyrawchar = YYEMPTY; yynewOption->yynext = yystate->yysemantics.yyfirstVal; yystate->yysemantics.yyfirstVal = yynewOption; @@ -1118,12 +1137,20 @@ yyinitStateSet (yyGLRStateSet* yyset) if (! yyset->yystates) return yyfalse; yyset->yystates[0] = NULL; + yyset->yylookaheadStatuses = + (yybool*) YYMALLOC (16 * sizeof yyset->yylookaheadStatuses[0]); + if (! yyset->yylookaheadStatuses) + { + YYFREE (yyset->yystates); + return yyfalse; + } return yytrue; } static void yyfreeStateSet (yyGLRStateSet* yyset) { YYFREE (yyset->yystates); + YYFREE (yyset->yylookaheadStatuses); } /** Initialize STACK to a single empty stack, with total maximum @@ -1270,6 +1297,13 @@ yyremoveDeletes (yyGLRStack* yystackp) else { yystackp->yytops.yystates[yyj] = yystackp->yytops.yystates[yyi]; + /* In the current implementation, it's unnecessary to copy + yystackp->yytops.yylookaheadStatuses[yyi] since, after + yyremoveDeletes returns, the parser immediately either enters + deterministic operation or shifts a token. However, it doesn't + hurt, and the code might evolve to need it. */ + yystackp->yytops.yylookaheadStatuses[yyj] = + yystackp->yytops.yylookaheadStatuses[yyi]; if (yyj != yyi) { YYDPRINTF ((stderr, "Rename stack %lu -> %lu.\n", @@ -1318,7 +1352,7 @@ yyglrShiftDefer (yyGLRStack* yystackp, size_t yyk, yyStateNum yylrState, yystackp->yytops.yystates[yyk] = yynewState; /* Invokes YY_RESERVE_GLRSTACK. */ - yyaddDeferredAction (yystackp, yynewState, rhs, yyrule); + yyaddDeferredAction (yystackp, yyk, yynewState, rhs, yyrule); } /** Pop the symbols consumed by reduction #RULE from the top of stack @@ -1470,7 +1504,7 @@ yyglrReduce (yyGLRStack* yystackp, size_t yyk, yyRuleNum yyrule, { if (yyp->yylrState == yynewLRState && yyp->yypred == yys) { - yyaddDeferredAction (yystackp, yyp, yys0, yyrule); + yyaddDeferredAction (yystackp, yyk, yyp, yys0, yyrule); yymarkStackDeleted (yystackp, yyk); YYDPRINTF ((stderr, "Merging stack %lu into stack %lu.\n", (unsigned long int) yyk, @@ -1497,6 +1531,7 @@ yysplitStack (yyGLRStack* yystackp, size_t yyk) if (yystackp->yytops.yysize >= yystackp->yytops.yycapacity) { yyGLRState** yynewStates; + yybool* yynewLookaheadStatuses; if (! ((yystackp->yytops.yycapacity <= (YYSIZEMAX / (2 * sizeof yynewStates[0]))) && (yynewStates = @@ -1505,9 +1540,17 @@ yysplitStack (yyGLRStack* yystackp, size_t yyk) * sizeof yynewStates[0]))))) yyMemoryExhausted (yystackp); yystackp->yytops.yystates = yynewStates; + if (! (yynewLookaheadStatuses = + (yybool*) YYREALLOC (yystackp->yytops.yylookaheadStatuses, + ((yystackp->yytops.yycapacity) + * sizeof yynewLookaheadStatuses[0])))) + yyMemoryExhausted (yystackp); + yystackp->yytops.yylookaheadStatuses = yynewLookaheadStatuses; } yystackp->yytops.yystates[yystackp->yytops.yysize] = yystackp->yytops.yystates[yyk]; + yystackp->yytops.yylookaheadStatuses[yystackp->yytops.yysize] + = yystackp->yytops.yylookaheadStatuses[yyk]; yystackp->yytops.yysize += 1; return yystackp->yytops.yysize-1; } @@ -1647,6 +1690,10 @@ yyresolveAction (yySemanticOption* yyopt, yyGLRStack* yystackp, { yyGLRStackItem yyrhsVals[YYMAXRHS + YYMAXLEFT + 1]; int yynrhs; + int yychar_current; + YYSTYPE yylval_current; + YYLTYPE yylloc_current; + YYRESULTTAG yyresult; yynrhs = yyrhsLength (yyopt->yyrule); YYCHK (yyresolveStates (yyopt->yystate, yynrhs, yystackp]b4_user_args[)); @@ -1654,9 +1701,19 @@ yyresolveAction (yySemanticOption* yyopt, yyGLRStack* yystackp, if (yynrhs == 0) /* Set default location. */ yyrhsVals[YYMAXRHS + YYMAXLEFT - 1].yystate.yyloc = yyopt->yystate->yyloc;]])[ - return yyuserAction (yyopt->yyrule, yynrhs, - yyrhsVals + YYMAXRHS + YYMAXLEFT - 1, - yyvalp, yylocp, yystackp]b4_user_args[); + yychar_current = yychar; + yylval_current = yylval; + yylloc_current = yylloc; + yychar = yyopt->yyrawchar; + yylval = yyopt->yyval; + yylloc = yyopt->yyloc; + yyresult = yyuserAction (yyopt->yyrule, yynrhs, + yyrhsVals + YYMAXRHS + YYMAXLEFT - 1, + yyvalp, yylocp, yystackp]b4_user_args[); + yychar = yychar_current; + yylval = yylval_current; + yylloc = yylloc_current; + return yyresult; } #if YYDEBUG @@ -1710,7 +1767,7 @@ yyreportTree (yySemanticOption* yyx, int yyindent) static void yyreportAmbiguity (yySemanticOption* yyx0, yySemanticOption* yyx1, yyGLRStack* yystackp]b4_pure_formals[) __attribute__ ((__noreturn__)); -static void +/*ARGSUSED*/ static void yyreportAmbiguity (yySemanticOption* yyx0, yySemanticOption* yyx1, yyGLRStack* yystackp]b4_pure_formals[) { @@ -1885,6 +1942,7 @@ yyprocessOneStack (yyGLRStack* yystackp, size_t yyk, } else { + yystackp->yytops.yylookaheadStatuses[yyk] = yytrue; if (*yytokenp == YYEMPTY) { YYDPRINTF ((stderr, "Reading a token: ")); @@ -2147,6 +2205,7 @@ yyrecoverSyntaxError (yyGLRStack* yystackp]b4_user_formals[) YYDPRINTF ((stderr, "Starting parse\n")); + yychar = YYEMPTY; yytoken = YYEMPTY; yylval = yyval_default; ]b4_location_if([ @@ -2221,7 +2280,10 @@ b4_syncline([@oline@], [@ofile@])])dnl { YY_SYMBOL_PRINT ("Shifting", yytoken, &yylval, &yylloc); if (yytoken != YYEOF) - yytoken = YYEMPTY; + { + yychar = YYEMPTY; + yytoken = YYEMPTY; + } yyposn += 1; yyglrShift (&yystack, 0, yyaction, yyposn, &yylval, &yylloc); if (0 < yystack.yyerrState) @@ -2244,6 +2306,9 @@ b4_syncline([@oline@], [@ofile@])])dnl size_t yys; size_t yyn = yystack.yytops.yysize; + for (yys = 0; yys < yyn; yys += 1) + yystackp->yytops.yylookaheadStatuses[yys] = yychar != YYEMPTY; + /* yyprocessOneStack returns one of three things: - An error flag. If the caller is yyprocessOneStack, it @@ -2274,6 +2339,7 @@ b4_syncline([@oline@], [@ofile@])])dnl before the loop to make sure the user destructor for yylval isn't called twice. */ yytoken_to_shift = yytoken; + yychar = YYEMPTY; yytoken = YYEMPTY; yyposn += 1; for (yys = 0; yys < yyn; yys += 1) diff --git a/tests/glr-regression.at b/tests/glr-regression.at index 2298688a..7ccb37fa 100644 --- a/tests/glr-regression.at +++ b/tests/glr-regression.at @@ -816,7 +816,7 @@ AT_CLEANUP ## Corrupted semantic options if user action cuts parse. ## ## ------------------------------------------------------------------------- ## -AT_SETUP([Corrupted semantic options if user action cuts parse.]) +AT_SETUP([Corrupted semantic options if user action cuts parse]) AT_DATA_GRAMMAR([glr-regr10.y], [[ @@ -858,7 +858,7 @@ main (void) { int index; for (index = 0; index < GARBAGE_SIZE; index+=1) - garbage[index] = 132; + garbage[index] = 108; return yyparse (); } ]]) @@ -877,7 +877,7 @@ AT_CLEANUP ## Undesirable destructors if user action cuts parse. ## ## ------------------------------------------------------------------------- ## -AT_SETUP([Undesirable destructors if user action cuts parse.]) +AT_SETUP([Undesirable destructors if user action cuts parse]) AT_DATA_GRAMMAR([glr-regr11.y], [[ @@ -943,7 +943,7 @@ AT_CLEANUP ## Leaked merged semantic value if user action cuts parse. ## ## ------------------------------------------------------------------------- ## -AT_SETUP([Leaked merged semantic value if user action cuts parse.]) +AT_SETUP([Leaked merged semantic value if user action cuts parse]) AT_DATA_GRAMMAR([glr-regr12.y], [[ @@ -974,7 +974,8 @@ static int merge (YYSTYPE s1, YYSTYPE s2) { /* Not invoked. */ - return 0; + char dummy = s1.dummy + s2.dummy; + return dummy; } static void @@ -1010,3 +1011,350 @@ AT_COMPILE([glr-regr12]) AT_CHECK([[./glr-regr12]], 0, [], []) AT_CLEANUP + + +## ------------------------------------------------------------------------- ## +## Incorrect lookahead during deterministic GLR. See ## +## . ## +## ------------------------------------------------------------------------- ## + +AT_SETUP([Incorrect lookahead during deterministic GLR]) + +AT_DATA_GRAMMAR([glr-regr13.y], +[[ +/* Tests: + - Defaulted state with initial yychar: yychar == YYEMPTY. + - Nondefaulted state: yychar != YYEMPTY. + - Defaulted state after lookahead: yychar != YYEMPTY. + - Defaulted state after shift: yychar == YYEMPTY. */ + +%{ + #include + static void yyerror (char const *); + static int yylex (void); + static void print_look_ahead (char const *); + #define USE(value) +%} + +%union { char value; } +%type 'a' 'b' +%glr-parser +%locations + +%% + +start: + defstate_init defstate_shift 'b' { + USE ($3); + print_look_ahead ("start <- defstate_init defstate_shift 'b'"); + } + ; +defstate_init: + { + print_look_ahead ("defstate_init <- empty string"); + } + ; +defstate_shift: + nondefstate defstate_look 'a' { + USE ($3); + print_look_ahead ("defstate_shift <- nondefstate defstate_look 'a'"); + } + ; +defstate_look: + { + print_look_ahead ("defstate_look <- empty string"); + } + ; +nondefstate: + { + print_look_ahead ("nondefstate <- empty string"); + } + | 'b' { + USE ($1); + print_look_ahead ("nondefstate <- 'b'"); + } + ; + +%% + +static void +yyerror (char const *msg) +{ + fprintf (stderr, "%s\n", msg); +} + +static int +yylex (void) +{ + static char const *input = "ab"; + static int index = 0; + yylloc.first_line = yylloc.last_line = 1; + yylloc.first_column = yylloc.last_column = index+1; + yylval.value = input[index] + 'A' - 'a'; + return input[index++]; +} + +static void +print_look_ahead (char const *reduction) +{ + printf ("%s:\n yychar=", reduction); + if (yychar == YYEMPTY) + printf ("YYEMPTY"); + else if (yychar == YYEOF) + printf ("YYEOF"); + else + { + printf ("'%c', yylval='", yychar); + if (yylval.value > ' ') + printf ("%c", yylval.value); + printf ("', yylloc=(%d,%d),(%d,%d)", + yylloc.first_line, yylloc.first_column, + yylloc.last_line, yylloc.last_column); + } + printf ("\n"); +} + +int +main (void) +{ + yychar = '#'; /* Not a token in the grammar. */ + yylval.value = '!'; + return yyparse (); +} +]]) + +AT_CHECK([[bison -t -o glr-regr13.c glr-regr13.y]], 0, [], []) +AT_COMPILE([glr-regr13]) + +AT_CHECK([[./glr-regr13]], 0, +[defstate_init <- empty string: + yychar=YYEMPTY +nondefstate <- empty string: + yychar='a', yylval='A', yylloc=(1,1),(1,1) +defstate_look <- empty string: + yychar='a', yylval='A', yylloc=(1,1),(1,1) +defstate_shift <- nondefstate defstate_look 'a': + yychar=YYEMPTY +start <- defstate_init defstate_shift 'b': + yychar=YYEMPTY +], []) + +AT_CLEANUP + + +## ------------------------------------------------------------------------- ## +## Incorrect lookahead during nondeterministic GLR. ## +## ------------------------------------------------------------------------- ## + +AT_SETUP([Incorrect lookahead during nondeterministic GLR]) + +AT_DATA_GRAMMAR([glr-regr14.y], +[[ +/* Tests: + - Conflicting actions (split-off parse, which copies lookahead status, + which is necessarily yytrue) and nonconflicting actions (non-split-off + parse) for nondefaulted state: yychar != YYEMPTY. + - Merged deferred actions (lookahead status and RHS from different stack + than the target state) and nonmerged deferred actions (same stack). + - Defaulted state after lookahead: yychar != YYEMPTY. + - Defaulted state after shift: yychar == YYEMPTY. + - yychar != YYEMPTY but lookahead status is yyfalse (a previous stack has + seen the lookahead but current stack has not). + - Exceeding stack capacity (stack explosion), and thus reallocating + lookahead status array. + Note that it does not seem possible to see the initial yychar value during + nondeterministic operation since: + - In order to preserve the initial yychar, only defaulted states may be + entered. + - If only defaulted states are entered, there are no conflicts, so + nondeterministic operation does not start. */ + +%union { char value; } + +%{ + #include + static void yyerror (char const *); + static int yylex (void); + static void print_look_ahead (char const *); + static char merge (union YYSTYPE, union YYSTYPE); + #define USE(value) +%} + +%type 'a' 'b' 'c' 'd' stack_explosion +%glr-parser +%locations + +%% + +start: + merge 'c' stack_explosion { + USE ($2); USE ($3); + print_look_ahead ("start <- merge 'c' stack_explosion"); + } + ; + +/* When merging the 2 deferred actions, the lookahead statuses are + different. */ +merge: + nonconflict1 'a' 'b' nonconflict2 %dprec 1 { + USE ($2); USE ($3); + print_look_ahead ("merge <- nonconflict1 'a' 'b' nonconflict2"); + } + | conflict defstate_look 'a' nonconflict2 'b' defstate_shift %dprec 2 { + USE ($3); USE ($5); + print_look_ahead ("merge <- conflict defstate_look 'a' nonconflict2 'b'" + " defstate_shift"); + } + ; + +nonconflict1: + { + print_look_ahead ("nonconflict1 <- empty string"); + } + ; +nonconflict2: + { + print_look_ahead ("nonconflict2 <- empty string"); + } + | 'a' { + USE ($1); + print_look_ahead ("nonconflict2 <- 'a'"); + } + ; +conflict: + { + print_look_ahead ("conflict <- empty string"); + } + ; +defstate_look: + { + print_look_ahead ("defstate_look <- empty string"); + } + ; + +/* yychar != YYEMPTY but lookahead status is yyfalse. */ +defstate_shift: + { + print_look_ahead ("defstate_shift <- empty string"); + } + ; + +stack_explosion: + { $$ = '\0'; } + | alt1 stack_explosion %merge { $$ = $2; } + | alt2 stack_explosion %merge { $$ = $2; } + | alt3 stack_explosion %merge { $$ = $2; } + ; +alt1: + 'd' no_look { + USE ($1); + if (yychar != 'd' && yychar != YYEOF) + { + fprintf (stderr, "Incorrect lookahead during stack explosion.\n"); + } + } + ; +alt2: + 'd' no_look { + USE ($1); + if (yychar != 'd' && yychar != YYEOF) + { + fprintf (stderr, "Incorrect lookahead during stack explosion.\n"); + } + } + ; +alt3: + 'd' no_look { + USE ($1); + if (yychar != 'd' && yychar != YYEOF) + { + fprintf (stderr, "Incorrect lookahead during stack explosion.\n"); + } + } + ; +no_look: + { + if (yychar != YYEMPTY) + { + fprintf (stderr, + "Found lookahead where shouldn't during stack explosion.\n"); + } + } + ; + +%% + +static void +yyerror (char const *msg) +{ + fprintf (stderr, "%s\n", msg); +} + +static int +yylex (void) +{ + static char const *input = "abcdddd"; + static int index = 0; + yylloc.first_line = yylloc.last_line = 1; + yylloc.first_column = yylloc.last_column = index+1; + yylval.value = input[index] + 'A' - 'a'; + return input[index++]; +} + +static void +print_look_ahead (char const *reduction) +{ + printf ("%s:\n yychar=", reduction); + if (yychar == YYEMPTY) + printf ("YYEMPTY"); + else if (yychar == YYEOF) + printf ("YYEOF"); + else + { + printf ("'%c', yylval='", yychar); + if (yylval.value > ' ') + printf ("%c", yylval.value); + printf ("', yylloc=(%d,%d),(%d,%d)", + yylloc.first_line, yylloc.first_column, + yylloc.last_line, yylloc.last_column); + } + printf ("\n"); +} + +static char +merge (union YYSTYPE s1, union YYSTYPE s2) +{ + char dummy = s1.value + s2.value; + return dummy; +} + +int +main (void) +{ + yychar = '#'; /* Not a token in the grammar. */ + yylval.value = '!'; + return yyparse (); +} +]]) + +AT_CHECK([[bison -t -o glr-regr14.c glr-regr14.y]], 0, [], +[glr-regr14.y: conflicts: 3 reduce/reduce +]) +AT_COMPILE([glr-regr14]) + +AT_CHECK([[./glr-regr14]], 0, +[conflict <- empty string: + yychar='a', yylval='A', yylloc=(1,1),(1,1) +defstate_look <- empty string: + yychar='a', yylval='A', yylloc=(1,1),(1,1) +nonconflict2 <- empty string: + yychar='b', yylval='B', yylloc=(1,2),(1,2) +defstate_shift <- empty string: + yychar=YYEMPTY +merge <- conflict defstate_look 'a' nonconflict2 'b' defstate_shift: + yychar=YYEMPTY +start <- merge 'c' stack_explosion: + yychar=YYEOF +], []) + +AT_CLEANUP -- 2.47.2