1 /* Symbol table manager for Bison.
3 Copyright (C) 1984, 1989, 2000-2002, 2004-2012 Free Software
6 This file is part of Bison, the GNU Compiler Compiler.
8 This program is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
30 /*-------------------------------------------------------------------.
31 | Symbols sorted by tag. Allocated by the first invocation of |
32 | symbols_do, after which no more symbols should be created. |
33 `-------------------------------------------------------------------*/
35 static symbol
**symbols_sorted
= NULL
;
37 /*------------------------.
38 | Distinguished symbols. |
39 `------------------------*/
41 symbol
*errtoken
= NULL
;
42 symbol
*undeftoken
= NULL
;
43 symbol
*endtoken
= NULL
;
44 symbol
*accept
= NULL
;
45 symbol
*startsymbol
= NULL
;
46 location startsymbol_location
;
48 /*---------------------------------------.
49 | Default %destructor's and %printer's. |
50 `---------------------------------------*/
52 static code_props default_tagged_code_props
[CODE_PROPS_SIZE
] =
57 static code_props default_tagless_code_props
[CODE_PROPS_SIZE
] =
63 /*---------------------------------.
64 | Create a new symbol, named TAG. |
65 `---------------------------------*/
68 symbol_new (uniqstr tag
, location loc
)
70 symbol
*res
= xmalloc (sizeof *res
);
74 /* If the tag is not a string (starts with a double quote), check
75 that it is valid for Yacc. */
76 if (tag
[0] != '\"' && tag
[0] != '\'' && strchr (tag
, '-'))
77 yacc_at (loc
, _("POSIX Yacc forbids dashes in symbol names: %s"),
83 res
->type_name
= NULL
;
84 for (int i
= 0; i
< CODE_PROPS_SIZE
; ++i
)
85 code_props_none_init (&res
->props
[i
]);
87 res
->number
= NUMBER_UNDEFINED
;
89 res
->assoc
= undef_assoc
;
90 res
->user_token_number
= USER_NUMBER_UNDEFINED
;
93 res
->class = unknown_sym
;
94 res
->status
= undeclared
;
96 if (nsyms
== SYMBOL_NUMBER_MAXIMUM
)
97 fatal (_("too many symbols in input grammar (limit is %d)"),
98 SYMBOL_NUMBER_MAXIMUM
);
103 /*-------------------------------------------------------.
104 | Name of the code_props type: %destructor or %printer. |
105 `-------------------------------------------------------*/
108 code_props_type_string (code_props_type kind
)
113 return "%destructor";
120 /*----------------------------------------.
121 | Create a new semantic type, named TAG. |
122 `----------------------------------------*/
124 static semantic_type
*
125 semantic_type_new (uniqstr tag
)
127 semantic_type
*res
= xmalloc (sizeof *res
);
129 uniqstr_assert (tag
);
131 for (int i
= 0; i
< CODE_PROPS_SIZE
; ++i
)
132 code_props_none_init (&res
->props
[i
]);
142 #define SYMBOL_ATTR_PRINT(Attr) \
144 fprintf (f, " %s { %s }", #Attr, s->Attr)
146 #define SYMBOL_CODE_PRINT(Attr) \
147 if (s->props[Attr].code) \
148 fprintf (f, " %s { %s }", #Attr, s->props[Attr].code)
151 symbol_print (symbol
*s
, FILE *f
)
155 fprintf (f
, "\"%s\"", s
->tag
);
156 SYMBOL_ATTR_PRINT (type_name
);
157 SYMBOL_CODE_PRINT (destructor
);
158 SYMBOL_CODE_PRINT (printer
);
161 fprintf (f
, "<NULL>");
164 #undef SYMBOL_ATTR_PRINT
165 #undef SYMBOL_CODE_PRINT
168 /*----------------------------------.
169 | Whether S is a valid identifier. |
170 `----------------------------------*/
173 is_identifier (uniqstr s
)
175 static char const alphanum
[26 + 26 + 1 + 10] =
176 "abcdefghijklmnopqrstuvwxyz"
177 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
180 if (!s
|| ! memchr (alphanum
, *s
, sizeof alphanum
- 10))
183 if (! memchr (alphanum
, *s
, sizeof alphanum
))
189 /*-----------------------------------------------.
190 | Get the identifier associated to this symbol. |
191 `-----------------------------------------------*/
193 symbol_id_get (symbol
const *sym
)
195 aver (sym
->user_token_number
!= USER_NUMBER_HAS_STRING_ALIAS
);
198 return is_identifier (sym
->tag
) ? sym
->tag
: 0;
202 /*------------------------------------------------------------------.
203 | Complain that S's WHAT is redeclared at SECOND, and was first set |
205 `------------------------------------------------------------------*/
208 symbol_redeclaration (symbol
*s
, const char *what
, location first
,
211 complain_at (second
, _("%s redeclaration for %s"), what
, s
->tag
);
212 complain_at (first
, _("previous declaration"));
216 semantic_type_redeclaration (semantic_type
*s
, const char *what
, location first
,
219 complain_at (second
, _("%s redeclaration for <%s>"), what
, s
->tag
);
220 complain_at (first
, _("previous declaration"));
225 /*-----------------------------------------------------------------.
226 | Set the TYPE_NAME associated with SYM. Does nothing if passed 0 |
228 `-----------------------------------------------------------------*/
231 symbol_type_set (symbol
*sym
, uniqstr type_name
, location loc
)
236 symbol_redeclaration (sym
, "%type", sym
->type_location
, loc
);
237 uniqstr_assert (type_name
);
238 sym
->type_name
= type_name
;
239 sym
->type_location
= loc
;
243 /*--------------------------------------------------------.
244 | Set the DESTRUCTOR or PRINTER associated with the SYM. |
245 `--------------------------------------------------------*/
248 symbol_code_props_set (symbol
*sym
, code_props_type kind
,
249 code_props
const *code
)
251 if (sym
->props
[kind
].code
)
252 symbol_redeclaration (sym
, code_props_type_string (kind
),
253 sym
->props
[kind
].location
,
255 sym
->props
[kind
] = *code
;
258 /*-----------------------------------------------------.
259 | Set the DESTRUCTOR or PRINTER associated with TYPE. |
260 `-----------------------------------------------------*/
263 semantic_type_code_props_set (semantic_type
*type
,
264 code_props_type kind
,
265 code_props
const *code
)
267 if (type
->props
[kind
].code
)
268 semantic_type_redeclaration (type
, code_props_type_string (kind
),
269 type
->props
[kind
].location
,
271 type
->props
[kind
] = *code
;
274 /*---------------------------------------------------.
275 | Get the computed %destructor or %printer for SYM. |
276 `---------------------------------------------------*/
279 symbol_code_props_get (symbol
const *sym
,
280 code_props_type kind
)
282 /* Per-symbol code props. */
283 if (sym
->props
[kind
].code
)
284 return &sym
->props
[kind
];
286 /* Per-type code props. */
289 code_props
const *code
=
290 &semantic_type_get (sym
->type_name
)->props
[kind
];
295 /* Apply default code props's only to user-defined symbols. */
296 if (sym
->tag
[0] == '$' || sym
== errtoken
)
297 return &code_props_none
;
300 return &default_tagged_code_props
[kind
];
301 return &default_tagless_code_props
[kind
];
304 /*-----------------------------------------------------------------.
305 | Set the PRECEDENCE associated with SYM. Does nothing if invoked |
306 | with UNDEF_ASSOC as ASSOC. |
307 `-----------------------------------------------------------------*/
310 symbol_precedence_set (symbol
*sym
, int prec
, assoc a
, location loc
)
312 if (a
!= undef_assoc
)
315 symbol_redeclaration (sym
, assoc_to_string (a
), sym
->prec_location
,
319 sym
->prec_location
= loc
;
322 /* Only terminals have a precedence. */
323 symbol_class_set (sym
, token_sym
, loc
, false);
327 /*------------------------------------.
328 | Set the CLASS associated with SYM. |
329 `------------------------------------*/
332 symbol_class_set (symbol
*sym
, symbol_class
class, location loc
, bool declaring
)
335 if (sym
->class != unknown_sym
&& sym
->class != class)
337 complain_at (loc
, _("symbol %s redefined"), sym
->tag
);
338 // Don't report both "redefined" and "redeclared".
342 if (class == nterm_sym
&& sym
->class != nterm_sym
)
343 sym
->number
= nvars
++;
344 else if (class == token_sym
&& sym
->number
== NUMBER_UNDEFINED
)
345 sym
->number
= ntokens
++;
351 if (sym
->status
== declared
&& !warned
)
352 warn_at (loc
, _("symbol %s redeclared"), sym
->tag
);
353 sym
->status
= declared
;
358 /*------------------------------------------------.
359 | Set the USER_TOKEN_NUMBER associated with SYM. |
360 `------------------------------------------------*/
363 symbol_user_token_number_set (symbol
*sym
, int user_token_number
, location loc
)
365 int *user_token_numberp
;
367 if (sym
->user_token_number
!= USER_NUMBER_HAS_STRING_ALIAS
)
368 user_token_numberp
= &sym
->user_token_number
;
370 user_token_numberp
= &sym
->alias
->user_token_number
;
371 if (*user_token_numberp
!= USER_NUMBER_UNDEFINED
372 && *user_token_numberp
!= user_token_number
)
373 complain_at (loc
, _("redefining user token number of %s"), sym
->tag
);
375 *user_token_numberp
= user_token_number
;
376 /* User defined $end token? */
377 if (user_token_number
== 0)
380 /* It is always mapped to 0, so it was already counted in
382 if (endtoken
->number
!= NUMBER_UNDEFINED
)
384 endtoken
->number
= 0;
389 /*----------------------------------------------------------.
390 | If SYM is not defined, report an error, and consider it a |
392 `----------------------------------------------------------*/
395 symbol_check_defined (symbol
*sym
)
397 if (sym
->class == unknown_sym
)
402 warn_at (sym
->location
,
403 _("symbol %s is used, but is not defined as a token"
404 " and has no rules"),
409 complain_at (sym
->location
,
410 _("symbol %s is used, but is not defined as a token"
411 " and has no rules"),
415 /* If declared, then sym->class != unknown_sym. */
419 sym
->class = nterm_sym
;
420 sym
->number
= nvars
++;
427 symbol_check_defined_processor (void *sym
, void *null ATTRIBUTE_UNUSED
)
429 return symbol_check_defined (sym
);
434 symbol_make_alias (symbol
*sym
, symbol
*str
, location loc
)
437 warn_at (loc
, _("symbol %s used more than once as a literal string"),
440 warn_at (loc
, _("symbol %s given more than one literal string"),
444 str
->class = token_sym
;
445 str
->user_token_number
= sym
->user_token_number
;
446 sym
->user_token_number
= USER_NUMBER_HAS_STRING_ALIAS
;
449 str
->number
= sym
->number
;
450 symbol_type_set (str
, sym
->type_name
, loc
);
455 /*---------------------------------------------------------.
456 | Check that THIS, and its alias, have same precedence and |
458 `---------------------------------------------------------*/
461 symbol_check_alias_consistency (symbol
*this)
464 symbol
*str
= this->alias
;
466 /* Check only the symbol in the symbol-string pair. */
468 && this->user_token_number
== USER_NUMBER_HAS_STRING_ALIAS
))
471 if (str
->type_name
!= sym
->type_name
)
474 symbol_type_set (sym
, str
->type_name
, str
->type_location
);
476 symbol_type_set (str
, sym
->type_name
, sym
->type_location
);
480 for (int i
= 0; i
< CODE_PROPS_SIZE
; ++i
)
481 if (str
->props
[i
].code
)
482 symbol_code_props_set (sym
, i
, &str
->props
[i
]);
483 else if (sym
->props
[i
].code
)
484 symbol_code_props_set (str
, i
, &sym
->props
[i
]);
486 if (sym
->prec
|| str
->prec
)
489 symbol_precedence_set (sym
, str
->prec
, str
->assoc
,
492 symbol_precedence_set (str
, sym
->prec
, sym
->assoc
,
498 symbol_check_alias_consistency_processor (void *this,
499 void *null ATTRIBUTE_UNUSED
)
501 symbol_check_alias_consistency (this);
506 /*-------------------------------------------------------------------.
507 | Assign a symbol number, and write the definition of the token name |
508 | into FDEFINES. Put in SYMBOLS. |
509 `-------------------------------------------------------------------*/
512 symbol_pack (symbol
*this)
514 aver (this->number
!= NUMBER_UNDEFINED
);
515 if (this->class == nterm_sym
)
516 this->number
+= ntokens
;
517 else if (this->user_token_number
== USER_NUMBER_HAS_STRING_ALIAS
)
520 symbols
[this->number
] = this;
525 symbol_pack_processor (void *this, void *null ATTRIBUTE_UNUSED
)
527 return symbol_pack (this);
532 user_token_number_redeclaration (int num
, symbol
*first
, symbol
*second
)
534 /* User token numbers are not assigned during the parsing, but in a
535 second step, via a traversal of the symbol table sorted on tag.
537 However, error messages make more sense if we keep the first
538 declaration first. */
539 if (location_cmp (first
->location
, second
->location
) > 0)
545 complain_at (second
->location
,
546 _("user token number %d redeclaration for %s"),
548 complain_at (first
->location
, _("previous declaration for %s"),
552 /*--------------------------------------------------.
553 | Put THIS in TOKEN_TRANSLATIONS if it is a token. |
554 `--------------------------------------------------*/
557 symbol_translation (symbol
*this)
560 if (this->class == token_sym
561 && this->user_token_number
!= USER_NUMBER_HAS_STRING_ALIAS
)
563 /* A token which translation has already been set? */
564 if (token_translations
[this->user_token_number
] != undeftoken
->number
)
565 user_token_number_redeclaration
566 (this->user_token_number
,
567 symbols
[token_translations
[this->user_token_number
]],
570 token_translations
[this->user_token_number
] = this->number
;
577 symbol_translation_processor (void *this, void *null ATTRIBUTE_UNUSED
)
579 return symbol_translation (this);
583 /*---------------------------------------.
584 | Symbol and semantic type hash tables. |
585 `---------------------------------------*/
587 /* Initial capacity of symbol and semantic type hash table. */
588 #define HT_INITIAL_CAPACITY 257
590 static struct hash_table
*symbol_table
= NULL
;
591 static struct hash_table
*semantic_type_table
= NULL
;
594 hash_compare_symbol (const symbol
*m1
, const symbol
*m2
)
596 /* Since tags are unique, we can compare the pointers themselves. */
597 return UNIQSTR_EQ (m1
->tag
, m2
->tag
);
601 hash_compare_semantic_type (const semantic_type
*m1
, const semantic_type
*m2
)
603 /* Since names are unique, we can compare the pointers themselves. */
604 return UNIQSTR_EQ (m1
->tag
, m2
->tag
);
608 hash_symbol_comparator (void const *m1
, void const *m2
)
610 return hash_compare_symbol (m1
, m2
);
614 hash_semantic_type_comparator (void const *m1
, void const *m2
)
616 return hash_compare_semantic_type (m1
, m2
);
620 hash_symbol (const symbol
*m
, size_t tablesize
)
622 /* Since tags are unique, we can hash the pointer itself. */
623 return ((uintptr_t) m
->tag
) % tablesize
;
627 hash_semantic_type (const semantic_type
*m
, size_t tablesize
)
629 /* Since names are unique, we can hash the pointer itself. */
630 return ((uintptr_t) m
->tag
) % tablesize
;
634 hash_symbol_hasher (void const *m
, size_t tablesize
)
636 return hash_symbol (m
, tablesize
);
640 hash_semantic_type_hasher (void const *m
, size_t tablesize
)
642 return hash_semantic_type (m
, tablesize
);
645 /*-------------------------------.
646 | Create the symbol hash table. |
647 `-------------------------------*/
652 symbol_table
= hash_initialize (HT_INITIAL_CAPACITY
,
655 hash_symbol_comparator
,
657 semantic_type_table
= hash_initialize (HT_INITIAL_CAPACITY
,
659 hash_semantic_type_hasher
,
660 hash_semantic_type_comparator
,
665 /*----------------------------------------------------------------.
666 | Find the symbol named KEY, and return it. If it does not exist |
668 `----------------------------------------------------------------*/
671 symbol_from_uniqstr (const uniqstr key
, location loc
)
677 entry
= hash_lookup (symbol_table
, &probe
);
681 /* First insertion in the hash. */
682 aver (!symbols_sorted
);
683 entry
= symbol_new (key
, loc
);
684 if (!hash_insert (symbol_table
, entry
))
691 /*-----------------------------------------------------------------------.
692 | Find the semantic type named KEY, and return it. If it does not exist |
694 `-----------------------------------------------------------------------*/
697 semantic_type_from_uniqstr (const uniqstr key
)
700 semantic_type
*entry
;
703 entry
= hash_lookup (semantic_type_table
, &probe
);
707 /* First insertion in the hash. */
708 entry
= semantic_type_new (key
);
709 if (!hash_insert (semantic_type_table
, entry
))
716 /*----------------------------------------------------------------.
717 | Find the symbol named KEY, and return it. If it does not exist |
719 `----------------------------------------------------------------*/
722 symbol_get (const char *key
, location loc
)
724 return symbol_from_uniqstr (uniqstr_new (key
), loc
);
728 /*-----------------------------------------------------------------------.
729 | Find the semantic type named KEY, and return it. If it does not exist |
731 `-----------------------------------------------------------------------*/
734 semantic_type_get (const char *key
)
736 return semantic_type_from_uniqstr (uniqstr_new (key
));
740 /*------------------------------------------------------------------.
741 | Generate a dummy nonterminal, whose name cannot conflict with the |
743 `------------------------------------------------------------------*/
746 dummy_symbol_get (location loc
)
748 /* Incremented for each generated symbol. */
749 static int dummy_count
= 0;
750 static char buf
[256];
754 sprintf (buf
, "$@%d", ++dummy_count
);
755 sym
= symbol_get (buf
, loc
);
756 sym
->class = nterm_sym
;
757 sym
->number
= nvars
++;
762 symbol_is_dummy (const symbol
*sym
)
764 return sym
->tag
[0] == '@' || (sym
->tag
[0] == '$' && sym
->tag
[1] == '@');
767 /*-------------------.
768 | Free the symbols. |
769 `-------------------*/
774 hash_free (symbol_table
);
775 hash_free (semantic_type_table
);
777 free (symbols_sorted
);
781 /*---------------------------------------------------------------.
782 | Look for undefined symbols, report an error, and consider them |
784 `---------------------------------------------------------------*/
787 symbols_cmp (symbol
const *a
, symbol
const *b
)
789 return strcmp (a
->tag
, b
->tag
);
793 symbols_cmp_qsort (void const *a
, void const *b
)
795 return symbols_cmp (*(symbol
* const *)a
, *(symbol
* const *)b
);
799 symbols_do (Hash_processor processor
, void *processor_data
,
800 struct hash_table
*table
, symbol
**sorted
)
802 size_t count
= hash_get_n_entries (table
);
805 sorted
= xnmalloc (count
, sizeof *sorted
);
806 hash_get_entries (table
, (void**)sorted
, count
);
807 qsort (sorted
, count
, sizeof *sorted
, symbols_cmp_qsort
);
811 for (i
= 0; i
< count
; ++i
)
812 processor (sorted
[i
], processor_data
);
816 /*--------------------------------------------------------------.
817 | Check that all the symbols are defined. Report any undefined |
818 | symbols and consider them nonterminals. |
819 `--------------------------------------------------------------*/
822 symbols_check_defined (void)
824 symbols_do (symbol_check_defined_processor
, NULL
,
825 symbol_table
, symbols_sorted
);
828 /*------------------------------------------------------------------.
829 | Set TOKEN_TRANSLATIONS. Check that no two symbols share the same |
831 `------------------------------------------------------------------*/
834 symbols_token_translations_init (void)
836 bool num_256_available_p
= true;
839 /* Find the highest user token number, and whether 256, the POSIX
840 preferred user token number for the error token, is used. */
841 max_user_token_number
= 0;
842 for (i
= 0; i
< ntokens
; ++i
)
844 symbol
*this = symbols
[i
];
845 if (this->user_token_number
!= USER_NUMBER_UNDEFINED
)
847 if (this->user_token_number
> max_user_token_number
)
848 max_user_token_number
= this->user_token_number
;
849 if (this->user_token_number
== 256)
850 num_256_available_p
= false;
854 /* If 256 is not used, assign it to error, to follow POSIX. */
855 if (num_256_available_p
856 && errtoken
->user_token_number
== USER_NUMBER_UNDEFINED
)
857 errtoken
->user_token_number
= 256;
859 /* Set the missing user numbers. */
860 if (max_user_token_number
< 256)
861 max_user_token_number
= 256;
863 for (i
= 0; i
< ntokens
; ++i
)
865 symbol
*this = symbols
[i
];
866 if (this->user_token_number
== USER_NUMBER_UNDEFINED
)
867 this->user_token_number
= ++max_user_token_number
;
868 if (this->user_token_number
> max_user_token_number
)
869 max_user_token_number
= this->user_token_number
;
872 token_translations
= xnmalloc (max_user_token_number
+ 1,
873 sizeof *token_translations
);
875 /* Initialize all entries for literal tokens to the internal token
876 number for $undefined, which represents all invalid inputs. */
877 for (i
= 0; i
< max_user_token_number
+ 1; i
++)
878 token_translations
[i
] = undeftoken
->number
;
879 symbols_do (symbol_translation_processor
, NULL
,
880 symbol_table
, symbols_sorted
);
884 /*----------------------------------------------------------------.
885 | Assign symbol numbers, and write definition of token names into |
886 | FDEFINES. Set up vectors SYMBOL_TABLE, TAGS of symbols. |
887 `----------------------------------------------------------------*/
892 symbols_do (symbol_check_alias_consistency_processor
, NULL
,
893 symbol_table
, symbols_sorted
);
895 symbols
= xcalloc (nsyms
, sizeof *symbols
);
896 symbols_do (symbol_pack_processor
, NULL
, symbol_table
, symbols_sorted
);
898 /* Aliases leave empty slots in symbols, so remove them. */
902 int nsyms_old
= nsyms
;
903 for (writei
= 0, readi
= 0; readi
< nsyms_old
; readi
+= 1)
905 if (symbols
[readi
] == NULL
)
912 symbols
[writei
] = symbols
[readi
];
913 symbols
[writei
]->number
= writei
;
914 if (symbols
[writei
]->alias
)
915 symbols
[writei
]->alias
->number
= writei
;
920 symbols
= xnrealloc (symbols
, nsyms
, sizeof *symbols
);
922 symbols_token_translations_init ();
924 if (startsymbol
->class == unknown_sym
)
925 fatal_at (startsymbol_location
,
926 _("the start symbol %s is undefined"),
928 else if (startsymbol
->class == token_sym
)
929 fatal_at (startsymbol_location
,
930 _("the start symbol %s is a token"),
935 /*--------------------------------------------------.
936 | Set default tagged/tagless %destructor/%printer. |
937 `--------------------------------------------------*/
940 default_tagged_code_props_set (code_props_type kind
, code_props
const *code
)
942 if (default_tagged_code_props
[kind
].code
)
944 complain_at (code
->location
,
945 _("redeclaration for default tagged %s"),
946 code_props_type_string (kind
));
947 complain_at (default_tagged_code_props
[kind
].location
,
948 _("previous declaration"));
950 default_tagged_code_props
[kind
] = *code
;
954 default_tagless_code_props_set (code_props_type kind
, code_props
const *code
)
956 if (default_tagless_code_props
[kind
].code
)
958 complain_at (code
->location
,
959 _("redeclaration for default tagless %s"),
960 code_props_type_string (kind
));
961 complain_at (default_tagless_code_props
[kind
].location
,
962 _("previous declaration"));
964 default_tagless_code_props
[kind
] = *code
;