]> git.saurik.com Git - bison.git/blob - src/parse-gram.y
* data/glr.c (yyreportTree): Make room in yystates for the state
[bison.git] / src / parse-gram.y
1 %{/* Bison Grammar Parser -*- C -*-
2
3 Copyright (C) 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
4
5 This file is part of Bison, the GNU Compiler Compiler.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
20 02110-1301 USA
21 */
22
23 #include <config.h>
24 #include "system.h"
25
26 #include "complain.h"
27 #include "conflicts.h"
28 #include "files.h"
29 #include "getargs.h"
30 #include "gram.h"
31 #include "muscle_tab.h"
32 #include "quotearg.h"
33 #include "reader.h"
34 #include "symlist.h"
35 #include "strverscmp.h"
36
37 #define YYLLOC_DEFAULT(Current, Rhs, N) (Current) = lloc_default (Rhs, N)
38 static YYLTYPE lloc_default (YYLTYPE const *, int);
39
40 #define YY_LOCATION_PRINT(File, Loc) \
41 location_print (File, Loc)
42
43 static void version_check (location const *loc, char const *version);
44
45 /* Request detailed syntax error messages, and pass them to GRAM_ERROR.
46 FIXME: depends on the undocumented availability of YYLLOC. */
47 #undef yyerror
48 #define yyerror(Msg) \
49 gram_error (&yylloc, Msg)
50 static void gram_error (location const *, char const *);
51
52 static void add_param (char const *, char *, location);
53
54 static symbol_class current_class = unknown_sym;
55 static uniqstr current_type = 0;
56 static symbol *current_lhs;
57 static location current_lhs_location;
58 static int current_prec = 0;
59
60 #ifdef UINT_FAST8_MAX
61 # define YYTYPE_UINT8 uint_fast8_t
62 #endif
63 #ifdef INT_FAST8_MAX
64 # define YYTYPE_INT8 int_fast8_t
65 #endif
66 #ifdef UINT_FAST16_MAX
67 # define YYTYPE_UINT16 uint_fast16_t
68 #endif
69 #ifdef INT_FAST16_MAX
70 # define YYTYPE_INT16 int_fast16_t
71 #endif
72 %}
73
74 %debug
75 %verbose
76 %defines
77 %locations
78 %pure-parser
79 %error-verbose
80 %defines
81 %name-prefix="gram_"
82
83 %initial-action
84 {
85 /* Bison's grammar can initial empty locations, hence a default
86 location is needed. */
87 @$.start.file = @$.end.file = current_file;
88 @$.start.line = @$.end.line = 1;
89 @$.start.column = @$.end.column = 0;
90 }
91
92 /* Only NUMBERS have a value. */
93 %union
94 {
95 symbol *symbol;
96 symbol_list *list;
97 int integer;
98 char *chars;
99 assoc assoc;
100 uniqstr uniqstr;
101 };
102
103 /* Define the tokens together with their human representation. */
104 %token GRAM_EOF 0 "end of file"
105 %token STRING "string"
106 %token INT "integer"
107
108 %token PERCENT_TOKEN "%token"
109 %token PERCENT_NTERM "%nterm"
110
111 %token PERCENT_TYPE "%type"
112 %token PERCENT_DESTRUCTOR "%destructor {...}"
113 %token PERCENT_PRINTER "%printer {...}"
114
115 %token PERCENT_UNION "%union {...}"
116
117 %token PERCENT_LEFT "%left"
118 %token PERCENT_RIGHT "%right"
119 %token PERCENT_NONASSOC "%nonassoc"
120
121 %token PERCENT_PREC "%prec"
122 %token PERCENT_DPREC "%dprec"
123 %token PERCENT_MERGE "%merge"
124
125
126 /*----------------------.
127 | Global Declarations. |
128 `----------------------*/
129
130 %token
131 PERCENT_DEBUG "%debug"
132 PERCENT_DEFAULT_PREC "%default-prec"
133 PERCENT_DEFINE "%define"
134 PERCENT_DEFINES "%defines"
135 PERCENT_ERROR_VERBOSE "%error-verbose"
136 PERCENT_EXPECT "%expect"
137 PERCENT_EXPECT_RR "%expect-rr"
138 PERCENT_FILE_PREFIX "%file-prefix"
139 PERCENT_GLR_PARSER "%glr-parser"
140 PERCENT_INITIAL_ACTION "%initial-action {...}"
141 PERCENT_LEX_PARAM "%lex-param {...}"
142 PERCENT_LOCATIONS "%locations"
143 PERCENT_NAME_PREFIX "%name-prefix"
144 PERCENT_NO_DEFAULT_PREC "%no-default-prec"
145 PERCENT_NO_LINES "%no-lines"
146 PERCENT_NONDETERMINISTIC_PARSER
147 "%nondeterministic-parser"
148 PERCENT_OUTPUT "%output"
149 PERCENT_PARSE_PARAM "%parse-param {...}"
150 PERCENT_PURE_PARSER "%pure-parser"
151 PERCENT_REQUIRE "%require"
152 PERCENT_SKELETON "%skeleton"
153 PERCENT_START "%start"
154 PERCENT_TOKEN_TABLE "%token-table"
155 PERCENT_VERBOSE "%verbose"
156 PERCENT_YACC "%yacc"
157 ;
158
159 %token TYPE "type"
160 %token EQUAL "="
161 %token SEMICOLON ";"
162 %token PIPE "|"
163 %token ID "identifier"
164 %token ID_COLON "identifier:"
165 %token PERCENT_PERCENT "%%"
166 %token PROLOGUE "%{...%}"
167 %token EPILOGUE "epilogue"
168 %token BRACED_CODE "{...}"
169
170
171 %type <chars> STRING string_content
172 "%destructor {...}"
173 "%initial-action {...}"
174 "%lex-param {...}"
175 "%parse-param {...}"
176 "%printer {...}"
177 "%union {...}"
178 PROLOGUE EPILOGUE
179 %printer { fprintf (stderr, "\"%s\"", $$); }
180 STRING string_content
181 %printer { fprintf (stderr, "{\n%s\n}", $$); }
182 "%destructor {...}"
183 "%initial-action {...}"
184 "%lex-param {...}"
185 "%parse-param {...}"
186 "%printer {...}"
187 "%union {...}"
188 PROLOGUE EPILOGUE
189 %type <uniqstr> TYPE
190 %printer { fprintf (stderr, "<%s>", $$); } TYPE
191 %type <integer> INT
192 %printer { fprintf (stderr, "%d", $$); } INT
193 %type <symbol> ID symbol string_as_id
194 %printer { fprintf (stderr, "%s", $$->tag); } ID symbol string_as_id
195 %type <symbol> ID_COLON
196 %printer { fprintf (stderr, "%s:", $$->tag); } ID_COLON
197 %type <assoc> precedence_declarator
198 %type <list> symbols.1
199 %%
200
201 input:
202 declarations "%%" grammar epilogue.opt
203 ;
204
205
206 /*------------------------------------.
207 | Declarations: before the first %%. |
208 `------------------------------------*/
209
210 declarations:
211 /* Nothing */
212 | declarations declaration
213 ;
214
215 declaration:
216 grammar_declaration
217 | PROLOGUE { prologue_augment ($1, @1); }
218 | "%debug" { debug_flag = true; }
219 | "%define" string_content
220 {
221 static char one[] = "1";
222 muscle_insert ($2, one);
223 }
224 | "%define" string_content string_content { muscle_insert ($2, $3); }
225 | "%defines" { defines_flag = true; }
226 | "%error-verbose" { error_verbose = true; }
227 | "%expect" INT { expected_sr_conflicts = $2; }
228 | "%expect-rr" INT { expected_rr_conflicts = $2; }
229 | "%file-prefix" "=" string_content { spec_file_prefix = $3; }
230 | "%glr-parser"
231 {
232 nondeterministic_parser = true;
233 glr_parser = true;
234 }
235 | "%initial-action {...}"
236 {
237 muscle_code_grow ("initial_action", $1, @1);
238 }
239 | "%lex-param {...}" { add_param ("lex_param", $1, @1); }
240 | "%locations" { locations_flag = true; }
241 | "%name-prefix" "=" string_content { spec_name_prefix = $3; }
242 | "%no-lines" { no_lines_flag = true; }
243 | "%nondeterministic-parser" { nondeterministic_parser = true; }
244 | "%output" "=" string_content { spec_outfile = $3; }
245 | "%parse-param {...}" { add_param ("parse_param", $1, @1); }
246 | "%pure-parser" { pure_parser = true; }
247 | "%require" string_content { version_check (&@2, $2); }
248 | "%skeleton" string_content { skeleton = $2; }
249 | "%token-table" { token_table_flag = true; }
250 | "%verbose" { report_flag = report_states; }
251 | "%yacc" { yacc_flag = true; }
252 | /*FIXME: Err? What is this horror doing here? */ ";"
253 ;
254
255 grammar_declaration:
256 precedence_declaration
257 | symbol_declaration
258 | "%start" symbol
259 {
260 grammar_start_symbol_set ($2, @2);
261 }
262 | "%union {...}"
263 {
264 char const *body = $1;
265
266 if (typed)
267 {
268 /* Concatenate the union bodies, turning the first one's
269 trailing '}' into '\n', and omitting the second one's '{'. */
270 char *code = muscle_find ("stype");
271 code[strlen (code) - 1] = '\n';
272 body++;
273 }
274
275 typed = true;
276 muscle_code_grow ("stype", body, @1);
277 }
278 | "%destructor {...}" symbols.1
279 {
280 symbol_list *list;
281 for (list = $2; list; list = list->next)
282 symbol_destructor_set (list->sym, $1, @1);
283 symbol_list_free ($2);
284 }
285 | "%printer {...}" symbols.1
286 {
287 symbol_list *list;
288 for (list = $2; list; list = list->next)
289 symbol_printer_set (list->sym, $1, @1);
290 symbol_list_free ($2);
291 }
292 | "%default-prec"
293 {
294 default_prec = true;
295 }
296 | "%no-default-prec"
297 {
298 default_prec = false;
299 }
300 ;
301
302 symbol_declaration:
303 "%nterm" { current_class = nterm_sym; } symbol_defs.1
304 {
305 current_class = unknown_sym;
306 current_type = NULL;
307 }
308 | "%token" { current_class = token_sym; } symbol_defs.1
309 {
310 current_class = unknown_sym;
311 current_type = NULL;
312 }
313 | "%type" TYPE symbols.1
314 {
315 symbol_list *list;
316 for (list = $3; list; list = list->next)
317 symbol_type_set (list->sym, $2, @2);
318 symbol_list_free ($3);
319 }
320 ;
321
322 precedence_declaration:
323 precedence_declarator type.opt symbols.1
324 {
325 symbol_list *list;
326 ++current_prec;
327 for (list = $3; list; list = list->next)
328 {
329 symbol_type_set (list->sym, current_type, @2);
330 symbol_precedence_set (list->sym, current_prec, $1, @1);
331 }
332 symbol_list_free ($3);
333 current_type = NULL;
334 }
335 ;
336
337 precedence_declarator:
338 "%left" { $$ = left_assoc; }
339 | "%right" { $$ = right_assoc; }
340 | "%nonassoc" { $$ = non_assoc; }
341 ;
342
343 type.opt:
344 /* Nothing. */ { current_type = NULL; }
345 | TYPE { current_type = $1; }
346 ;
347
348 /* One or more nonterminals to be %typed. */
349
350 symbols.1:
351 symbol { $$ = symbol_list_new ($1, @1); }
352 | symbols.1 symbol { $$ = symbol_list_prepend ($1, $2, @2); }
353 ;
354
355 /* One token definition. */
356 symbol_def:
357 TYPE
358 {
359 current_type = $1;
360 }
361 | ID
362 {
363 symbol_class_set ($1, current_class, @1, true);
364 symbol_type_set ($1, current_type, @1);
365 }
366 | ID INT
367 {
368 symbol_class_set ($1, current_class, @1, true);
369 symbol_type_set ($1, current_type, @1);
370 symbol_user_token_number_set ($1, $2, @2);
371 }
372 | ID string_as_id
373 {
374 symbol_class_set ($1, current_class, @1, true);
375 symbol_type_set ($1, current_type, @1);
376 symbol_make_alias ($1, $2, @$);
377 }
378 | ID INT string_as_id
379 {
380 symbol_class_set ($1, current_class, @1, true);
381 symbol_type_set ($1, current_type, @1);
382 symbol_user_token_number_set ($1, $2, @2);
383 symbol_make_alias ($1, $3, @$);
384 }
385 ;
386
387 /* One or more symbol definitions. */
388 symbol_defs.1:
389 symbol_def
390 | symbol_defs.1 symbol_def
391 ;
392
393
394 /*------------------------------------------.
395 | The grammar section: between the two %%. |
396 `------------------------------------------*/
397
398 grammar:
399 rules_or_grammar_declaration
400 | grammar rules_or_grammar_declaration
401 ;
402
403 /* As a Bison extension, one can use the grammar declarations in the
404 body of the grammar. */
405 rules_or_grammar_declaration:
406 rules
407 | grammar_declaration ";"
408 | error ";"
409 {
410 yyerrok;
411 }
412 ;
413
414 rules:
415 ID_COLON { current_lhs = $1; current_lhs_location = @1; } rhses.1
416 ;
417
418 rhses.1:
419 rhs { grammar_current_rule_end (@1); }
420 | rhses.1 "|" rhs { grammar_current_rule_end (@3); }
421 | rhses.1 ";"
422 ;
423
424 rhs:
425 /* Nothing. */
426 { grammar_current_rule_begin (current_lhs, current_lhs_location); }
427 | rhs symbol
428 { grammar_current_rule_symbol_append ($2, @2); }
429 | rhs action
430 | rhs "%prec" symbol
431 { grammar_current_rule_prec_set ($3, @3); }
432 | rhs "%dprec" INT
433 { grammar_current_rule_dprec_set ($3, @3); }
434 | rhs "%merge" TYPE
435 { grammar_current_rule_merge_set ($3, @3); }
436 ;
437
438 symbol:
439 ID { $$ = $1; }
440 | string_as_id { $$ = $1; }
441 ;
442
443 /* Handle the semantics of an action specially, with a mid-rule
444 action, so that grammar_current_rule_action_append is invoked
445 immediately after the braced code is read by the scanner.
446
447 This implementation relies on the LALR(1) parsing algorithm.
448 If grammar_current_rule_action_append were executed in a normal
449 action for this rule, then when the input grammar contains two
450 successive actions, the scanner would have to read both actions
451 before reducing this rule. That wouldn't work, since the scanner
452 relies on all preceding input actions being processed by
453 grammar_current_rule_action_append before it scans the next
454 action. */
455 action:
456 { grammar_current_rule_action_append (last_string, last_braced_code_loc); }
457 BRACED_CODE
458 ;
459
460 /* A string used as an ID: quote it. */
461 string_as_id:
462 STRING
463 {
464 $$ = symbol_get (quotearg_style (c_quoting_style, $1), @1);
465 symbol_class_set ($$, token_sym, @1, false);
466 }
467 ;
468
469 /* A string used for its contents. Don't quote it. */
470 string_content:
471 STRING
472 { $$ = $1; }
473 ;
474
475
476 epilogue.opt:
477 /* Nothing. */
478 | "%%" EPILOGUE
479 {
480 muscle_code_grow ("epilogue", $2, @2);
481 scanner_last_string_free ();
482 }
483 ;
484
485 %%
486
487
488 /* Return the location of the left-hand side of a rule whose
489 right-hand side is RHS[1] ... RHS[N]. Ignore empty nonterminals in
490 the right-hand side, and return an empty location equal to the end
491 boundary of RHS[0] if the right-hand side is empty. */
492
493 static YYLTYPE
494 lloc_default (YYLTYPE const *rhs, int n)
495 {
496 int i;
497 YYLTYPE loc;
498
499 /* SGI MIPSpro 7.4.1m miscompiles "loc.start = loc.end = rhs[n].end;".
500 The bug is fixed in 7.4.2m, but play it safe for now. */
501 loc.start = rhs[n].end;
502 loc.end = rhs[n].end;
503
504 /* Ignore empty nonterminals the start of the the right-hand side.
505 Do not bother to ignore them at the end of the right-hand side,
506 since empty nonterminals have the same end as their predecessors. */
507 for (i = 1; i <= n; i++)
508 if (! equal_boundaries (rhs[i].start, rhs[i].end))
509 {
510 loc.start = rhs[i].start;
511 break;
512 }
513
514 return loc;
515 }
516
517
518 /* Add a lex-param or a parse-param (depending on TYPE) with
519 declaration DECL and location LOC. */
520
521 static void
522 add_param (char const *type, char *decl, location loc)
523 {
524 static char const alphanum[26 + 26 + 1 + 10] =
525 "abcdefghijklmnopqrstuvwxyz"
526 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
527 "_"
528 "0123456789";
529 char const *name_start = NULL;
530 char *p;
531
532 /* Stop on last actual character. */
533 for (p = decl; p[1]; p++)
534 if ((p == decl
535 || ! memchr (alphanum, p[-1], sizeof alphanum))
536 && memchr (alphanum, p[0], sizeof alphanum - 10))
537 name_start = p;
538
539 /* Strip the surrounding '{' and '}', and any blanks just inside
540 the braces. */
541 while (*--p == ' ' || *p == '\t')
542 continue;
543 p[1] = '\0';
544 while (*++decl == ' ' || *decl == '\t')
545 continue;
546
547 if (! name_start)
548 complain_at (loc, _("missing identifier in parameter declaration"));
549 else
550 {
551 char *name;
552 size_t name_len;
553
554 for (name_len = 1;
555 memchr (alphanum, name_start[name_len], sizeof alphanum);
556 name_len++)
557 continue;
558
559 name = xmalloc (name_len + 1);
560 memcpy (name, name_start, name_len);
561 name[name_len] = '\0';
562 muscle_pair_list_grow (type, decl, name);
563 free (name);
564 }
565
566 scanner_last_string_free ();
567 }
568
569 static void
570 version_check (location const *loc, char const *version)
571 {
572 if (strverscmp (version, PACKAGE_VERSION) > 0)
573 {
574 complain_at (*loc, "require bison %s, but have %s",
575 version, PACKAGE_VERSION);
576 exit (63);
577 }
578 }
579
580 static void
581 gram_error (location const *loc, char const *msg)
582 {
583 complain_at (*loc, "%s", msg);
584 }
585
586 char const *
587 token_name (int type)
588 {
589 return yytname[YYTRANSLATE (type)];
590 }