From 24985964740b3b28ebfa2fe1de05c0e483947fbf Mon Sep 17 00:00:00 2001 From: "Joel E. Denny" Date: Sat, 8 Aug 2009 20:19:01 -0400 Subject: [PATCH] Make it easier to write deterministic tests. Continues Akim's work from his 2009-06-10 commits. * src/reader.c (check_and_convert_grammar): Don't add any symbols after the first symbols_do invocation. * src/symtab.c (symbols_sorted): New static global. (user_token_number_redeclaration): Update comments. (symbol_from_uniqstr): If a new symbol is being created, assert that symbols_sorted hasn't been allocated yet. (symbols_free): Free symbols_sorted. (symbols_cmp, symbols_cmp_qsort): New functions. (symbols_do): Sort symbol_table into symbols_sorted on first invocation. * tests/input.at (Numbered tokens): Recombine tests now that the output should be deterministic across multiple numbers. (cherry picked from commit 83b60c97ee1f98bb1f15ffa38acdc4cc765515f5) --- ChangeLog | 17 +++++++++++++++++ src/reader.c | 6 +++--- src/symtab.c | 42 +++++++++++++++++++++++++++++++++++++----- tests/input.at | 40 +++++++++++++++------------------------- 4 files changed, 72 insertions(+), 33 deletions(-) diff --git a/ChangeLog b/ChangeLog index 914a3ece..a8b78fb8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,20 @@ +2009-08-13 Joel E. Denny + + Make it easier to write deterministic tests. + Continues Akim's work from his 2009-06-10 commits. + * src/reader.c (check_and_convert_grammar): Don't add any + symbols after the first symbols_do invocation. + * src/symtab.c (symbols_sorted): New static global. + (user_token_number_redeclaration): Update comments. + (symbol_from_uniqstr): If a new symbol is being created, assert + that symbols_sorted hasn't been allocated yet. + (symbols_free): Free symbols_sorted. + (symbols_cmp, symbols_cmp_qsort): New functions. + (symbols_do): Sort symbol_table into symbols_sorted on first + invocation. + * tests/input.at (Numbered tokens): Recombine tests now that the + output should be deterministic across multiple numbers. + 2009-08-12 Akim Demaille distcheck: fix. diff --git a/src/reader.c b/src/reader.c index 060d4d38..4ac5977d 100644 --- a/src/reader.c +++ b/src/reader.c @@ -641,9 +641,6 @@ check_and_convert_grammar (void) if (nrules == 0) fatal (_("no rules in the input grammar")); - /* Report any undefined symbols and consider them nonterminals. */ - symbols_check_defined (); - /* If the user did not define her ENDTOKEN, do it now. */ if (!endtoken) { @@ -654,6 +651,9 @@ check_and_convert_grammar (void) endtoken->user_token_number = 0; } + /* Report any undefined symbols and consider them nonterminals. */ + symbols_check_defined (); + /* Find the start symbol if no %start. */ if (!start_flag) { diff --git a/src/symtab.c b/src/symtab.c index fd2a8b36..7d18f92d 100644 --- a/src/symtab.c +++ b/src/symtab.c @@ -29,6 +29,13 @@ #include "gram.h" #include "symtab.h" +/*-------------------------------------------------------------------. +| Symbols sorted by tag. Allocated by the first invocation of | +| symbols_do, after which no more symbols should be created. | +`-------------------------------------------------------------------*/ + +static symbol **symbols_sorted = NULL; + /*------------------------. | Distinguished symbols. | `------------------------*/ @@ -548,10 +555,10 @@ static void user_token_number_redeclaration (int num, symbol *first, symbol *second) { /* User token numbers are not assigned during the parsing, but in a - second step, via a (nondeterministic) traversal of the symbol - hash table. + second step, via a traversal of the symbol table sorted on tag. - Make errors deterministic: keep the first declaration first. */ + However, error messages make more sense if we keep the first + declaration first. */ if (location_cmp (first->location, second->location) > 0) { symbol* tmp = first; @@ -695,6 +702,7 @@ symbol_from_uniqstr (const uniqstr key, location loc) if (!entry) { /* First insertion in the hash. */ + aver (!symbols_sorted); entry = symbol_new (key, loc); if (!hash_insert (symbol_table, entry)) xalloc_die (); @@ -789,6 +797,7 @@ symbols_free (void) hash_free (symbol_table); hash_free (semantic_type_table); free (symbols); + free (symbols_sorted); } @@ -797,13 +806,36 @@ symbols_free (void) | terminals. | `---------------------------------------------------------------*/ +static int +symbols_cmp (symbol const *a, symbol const *b) +{ + return strcmp (a->tag, b->tag); +} + +static int +symbols_cmp_qsort (void const *a, void const *b) +{ + return symbols_cmp (*(symbol * const *)a, *(symbol * const *)b); +} + static void symbols_do (Hash_processor processor, void *processor_data) { - hash_do_for_each (symbol_table, processor, processor_data); + size_t count = hash_get_n_entries (symbol_table); + if (!symbols_sorted) + { + symbols_sorted = xnmalloc (count, sizeof *symbols_sorted); + hash_get_entries (symbol_table, (void**)symbols_sorted, count); + qsort (symbols_sorted, count, sizeof *symbols_sorted, + symbols_cmp_qsort); + } + { + size_t i; + for (i = 0; i < count; ++i) + processor (symbols_sorted[i], processor_data); + } } - /*--------------------------------------------------------------. | Check that all the symbols are defined. Report any undefined | | symbols and consider them nonterminals. | diff --git a/tests/input.at b/tests/input.at index 4d5d0bbc..84f5a997 100644 --- a/tests/input.at +++ b/tests/input.at @@ -680,33 +680,23 @@ AT_CLEANUP AT_SETUP([Numbered tokens]) -AT_DATA_GRAMMAR([1.y], -[[%token DECIMAL 11259375 - HEXADECIMAL 0xabcdef +AT_DATA_GRAMMAR([redecl.y], +[[%token DECIMAL_1 11259375 + HEXADECIMAL_1 0xabcdef + HEXADECIMAL_2 0xFEDCBA + DECIMAL_2 16702650 %% -start: DECIMAL; +start: DECIMAL_1 HEXADECIMAL_2; ]]) -AT_BISON_CHECK([1.y], [1], [], -[[1.y:10.10-20: user token number 11259375 redeclaration for HEXADECIMAL -1.y:9.8-14: previous declaration for DECIMAL +AT_BISON_CHECK([redecl.y], [1], [], +[[redecl.y:10.10-22: user token number 11259375 redeclaration for HEXADECIMAL_1 +redecl.y:9.8-16: previous declaration for DECIMAL_1 +redecl.y:12.10-18: user token number 16702650 redeclaration for DECIMAL_2 +redecl.y:11.10-22: previous declaration for HEXADECIMAL_2 ]]) - -AT_DATA_GRAMMAR([2.y], -[[%token HEXADECIMAL 0xabcdef - DECIMAL 11259375 -%% -start: HEXADECIMAL; -]]) - -AT_BISON_CHECK([2.y], [1], [], -[[2.y:10.10-16: user token number 11259375 redeclaration for DECIMAL -2.y:9.8-18: previous declaration for HEXADECIMAL -]]) - - -AT_DATA_GRAMMAR([3.y], +AT_DATA_GRAMMAR([too-large.y], [[%token TOO_LARGE_DEC 999999999999999999999 TOO_LARGE_HEX 0xFFFFFFFFFFFFFFFFFFF %% @@ -714,9 +704,9 @@ start: TOO_LARGE_DEC TOO_LARGE_HEX %% ]]) -AT_BISON_CHECK([3.y], [1], [], -[[3.y:9.22-42: integer out of range: `999999999999999999999' -3.y:10.24-44: integer out of range: `0xFFFFFFFFFFFFFFFFFFF' +AT_BISON_CHECK([too-large.y], [1], [], +[[too-large.y:9.22-42: integer out of range: `999999999999999999999' +too-large.y:10.24-44: integer out of range: `0xFFFFFFFFFFFFFFFFFFF' ]]) AT_CLEANUP -- 2.45.2