]>
Commit | Line | Data |
---|---|---|
f7d4d87a | 1 | /* Data definitions for internal representation of bison's input, |
d7e1f00c | 2 | Copyright (C) 1984, 1986, 1989, 1992, 2001, 2002 |
99013900 | 3 | Free Software Foundation, Inc. |
f7d4d87a | 4 | |
b2ca4022 | 5 | This file is part of Bison, the GNU Compiler Compiler. |
f7d4d87a | 6 | |
b2ca4022 AD |
7 | Bison is free software; you can redistribute it and/or modify |
8 | it under the terms of the GNU General Public License as published by | |
9 | the Free Software Foundation; either version 2, or (at your option) | |
10 | any later version. | |
f7d4d87a | 11 | |
b2ca4022 AD |
12 | Bison is distributed in the hope that it will be useful, |
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | GNU General Public License for more details. | |
f7d4d87a | 16 | |
b2ca4022 AD |
17 | You should have received a copy of the GNU General Public License |
18 | along with Bison; see the file COPYING. If not, write to | |
19 | the Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
20 | Boston, MA 02111-1307, USA. */ | |
f7d4d87a | 21 | |
b2ca4022 AD |
22 | #ifndef GRAM_H_ |
23 | # define GRAM_H_ | |
f7d4d87a | 24 | |
aea13e97 | 25 | /* Representation of the grammar rules: |
f7d4d87a | 26 | |
aea13e97 AD |
27 | NTOKENS is the number of tokens, and NVARS is the number of |
28 | variables (nonterminals). NSYMS is the total number, ntokens + | |
b2ca4022 | 29 | nvars. |
f7d4d87a | 30 | |
b2ca4022 | 31 | Each symbol (either token or variable) receives a symbol number. |
aea13e97 AD |
32 | Numbers 0 to NTOKENS - 1 are for tokens, and NTOKENS to NSYMS - 1 |
33 | are for variables. Symbol number zero is the end-of-input token. | |
34 | This token is counted in ntokens. The true number of token values | |
35 | assigned is NTOKENS reduced by one for each alias declaration. | |
36 | ||
37 | The rules receive rule numbers 1 to NRULES in the order they are | |
38 | written. More precisely Bison augments the grammar with the | |
39 | initial rule, `$axiom: START-SYMBOL EOF', which is numbered 1, all | |
40 | the user rules are 2, 3 etc. Each time a rule number is presented | |
41 | to the user, we subtract 1, so *displayed* rule numbers are 0, 1, | |
42 | 2... | |
43 | ||
44 | Internally, we cannot use the number 0 for a rule because for | |
45 | instance RITEM stores both symbol (the RHS) and rule numbers: the | |
46 | symbols are shorts >= 0, and rule number are stored negative. | |
47 | Therefore 0 cannot be used, since it would be both the rule number | |
48 | 0, and the token EOF). | |
49 | ||
50 | Actions and guards are accessed via the rule number. | |
f7d4d87a | 51 | |
b2ed6e58 | 52 | The rules themselves are described by several arrays: amongst which |
1a2b5d37 | 53 | RITEM, and RULES. |
b2ed6e58 | 54 | |
1a2b5d37 | 55 | RULES is an array of struct rule_s, which members are: |
b2ed6e58 | 56 | |
aea13e97 AD |
57 | RULES[R].lhs -- the symbol number of the left hand side of rule R. |
58 | If -1, the rule has been thrown out by reduce.c and should be | |
59 | ignored. | |
b2ed6e58 | 60 | |
aea13e97 AD |
61 | RULES[R].rhs -- the index in RITEM of the beginning of the portion |
62 | for rule R. | |
f7d4d87a | 63 | |
1a2b5d37 | 64 | RULES[R].prec -- the precedence level of R. |
652a871c | 65 | |
aea13e97 AD |
66 | RULES[R].precsym -- the symbol-number of the symbol in %prec for R |
67 | (if any). | |
652a871c | 68 | |
1a2b5d37 | 69 | RULES[R].assoc -- the associativity of R. |
e41dc700 | 70 | |
1a2b5d37 | 71 | RULES[R].line -- the line where R was defined. |
652a871c | 72 | |
1a2b5d37 | 73 | RULES[R].useful -- TRUE iff the rule is used. |
68f1e3ed | 74 | |
b2ed6e58 AD |
75 | The right hand side is stored as symbol numbers in a portion of |
76 | RITEM. | |
f7d4d87a | 77 | |
b2ca4022 AD |
78 | The length of the portion is one greater than the number of symbols |
79 | in the rule's right hand side. The last element in the portion | |
80 | contains minus R, which identifies it as the end of a portion and | |
81 | says which rule it is for. | |
f7d4d87a | 82 | |
b2ed6e58 | 83 | The portions of RITEM come in order of increasing rule number and |
b2ca4022 AD |
84 | are followed by an element which is zero to mark the end. nitems |
85 | is the total length of ritem, not counting the final zero. Each | |
aea13e97 | 86 | element of RITEM is called an "item" and its index in RITEM is an |
b2ca4022 | 87 | item number. |
f7d4d87a | 88 | |
b2ca4022 AD |
89 | Item numbers are used in the finite state machine to represent |
90 | places that parsing can get to. | |
f7d4d87a | 91 | |
aea13e97 | 92 | SYMBOLS[I]->prec records the precedence level of each symbol. |
f7d4d87a | 93 | |
b2ca4022 AD |
94 | Precedence levels are assigned in increasing order starting with 1 |
95 | so that numerically higher precedence values mean tighter binding | |
96 | as they ought to. Zero as a symbol or rule's precedence means none | |
97 | is assigned. | |
f7d4d87a | 98 | |
aea13e97 | 99 | Associativities are recorded similarly in SYMBOLS[I]->assoc. */ |
f7d4d87a | 100 | |
bba97eb2 | 101 | #include "symtab.h" |
f7d4d87a DM |
102 | |
103 | #define ISTOKEN(s) ((s) < ntokens) | |
104 | #define ISVAR(s) ((s) >= ntokens) | |
105 | ||
f7d4d87a DM |
106 | extern int nitems; |
107 | extern int nrules; | |
108 | extern int nsyms; | |
109 | extern int ntokens; | |
110 | extern int nvars; | |
111 | ||
112 | extern short *ritem; | |
75142d45 | 113 | extern int nritems; |
b2ed6e58 | 114 | |
f7d4d87a DM |
115 | extern int start_symbol; |
116 | ||
652a871c AD |
117 | typedef struct rule_s |
118 | { | |
c3b407f4 AD |
119 | /* The number of the rule in the source. It is usually the index in |
120 | RULES too, except if there are useless rules. */ | |
d7e1f00c AD |
121 | short user_number; |
122 | ||
123 | /* The index in RULES. Usually the rule number in the source, | |
124 | except if some rules are useless. */ | |
c3b407f4 AD |
125 | short number; |
126 | ||
bba97eb2 | 127 | bucket *lhs; |
99013900 | 128 | short *rhs; |
652a871c AD |
129 | short prec; |
130 | short precsym; | |
aea13e97 | 131 | associativity assoc; |
e41dc700 | 132 | short line; |
68f1e3ed | 133 | bool useful; |
f499b062 | 134 | |
3f96f4dc AD |
135 | const char *action; |
136 | short action_line; | |
f499b062 AD |
137 | |
138 | const char *guard; | |
139 | short guard_line; | |
652a871c AD |
140 | } rule_t; |
141 | ||
1a2b5d37 | 142 | extern struct rule_s *rules; |
652a871c | 143 | |
0e78e603 AD |
144 | /* Table of the symbols, indexed by the symbol number. */ |
145 | extern struct bucket **symbols; | |
146 | ||
b2ca4022 AD |
147 | /* token translation table: indexed by a token number as returned by |
148 | the user's yylex routine, it yields the internal token number used | |
342b8b6e | 149 | by the parser and throughout bison. */ |
f7d4d87a DM |
150 | |
151 | extern short *token_translations; | |
f7d4d87a DM |
152 | extern int max_user_token_number; |
153 | ||
b2ca4022 AD |
154 | /* SEMANTIC_PARSER is nonzero if the input file says to use the hairy |
155 | parser that provides for semantic error recovery. If it is zero, | |
156 | the yacc-compatible simplified parser is used. */ | |
f7d4d87a DM |
157 | |
158 | extern int semantic_parser; | |
159 | ||
b2ca4022 AD |
160 | /* PURE_PARSER is nonzero if should generate a parser that is all pure |
161 | and reentrant. */ | |
f7d4d87a DM |
162 | |
163 | extern int pure_parser; | |
164 | ||
b2ca4022 | 165 | /* ERROR_TOKEN_NUMBER is the token number of the error token. */ |
f7d4d87a DM |
166 | |
167 | extern int error_token_number; | |
3067fbef | 168 | |
c3b407f4 AD |
169 | /* Report the length of the RHS. */ |
170 | int rule_rhs_length PARAMS ((rule_t *rule)); | |
3067fbef AD |
171 | |
172 | /* Dump RITEM for traces. */ | |
c2713865 AD |
173 | void ritem_print PARAMS ((FILE *out)); |
174 | ||
175 | /* Return the size of the longest rule RHS. */ | |
176 | size_t ritem_longest_rhs PARAMS ((void)); | |
177 | ||
b2ca4022 | 178 | #endif /* !GRAM_H_ */ |