]> git.saurik.com Git - bison.git/blob - src/parse-gram.y
11cd489e36f578999f3d6e5c5ddc9b040385d23f
[bison.git] / src / parse-gram.y
1 %{/* Bison Grammar Parser -*- C -*-
2
3 Copyright (C) 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
4
5 This file is part of Bison, the GNU Compiler Compiler.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
20 02110-1301 USA
21 */
22
23 #include <config.h>
24 #include "system.h"
25
26 #include "complain.h"
27 #include "conflicts.h"
28 #include "files.h"
29 #include "getargs.h"
30 #include "gram.h"
31 #include "muscle_tab.h"
32 #include "output.h"
33 #include "quotearg.h"
34 #include "reader.h"
35 #include "symlist.h"
36 #include "strverscmp.h"
37
38 #define YYLLOC_DEFAULT(Current, Rhs, N) (Current) = lloc_default (Rhs, N)
39 static YYLTYPE lloc_default (YYLTYPE const *, int);
40
41 #define YY_LOCATION_PRINT(File, Loc) \
42 location_print (File, Loc)
43
44 static void version_check (location const *loc, char const *version);
45
46 /* Request detailed syntax error messages, and pass them to GRAM_ERROR.
47 FIXME: depends on the undocumented availability of YYLLOC. */
48 #undef yyerror
49 #define yyerror(Msg) \
50 gram_error (&yylloc, Msg)
51 static void gram_error (location const *, char const *);
52
53 static void add_param (char const *, char *, location);
54
55 static symbol_class current_class = unknown_sym;
56 static uniqstr current_type = 0;
57 static symbol *current_lhs;
58 static location current_lhs_location;
59 static int current_prec = 0;
60
61 #ifdef UINT_FAST8_MAX
62 # define YYTYPE_UINT8 uint_fast8_t
63 #endif
64 #ifdef INT_FAST8_MAX
65 # define YYTYPE_INT8 int_fast8_t
66 #endif
67 #ifdef UINT_FAST16_MAX
68 # define YYTYPE_UINT16 uint_fast16_t
69 #endif
70 #ifdef INT_FAST16_MAX
71 # define YYTYPE_INT16 int_fast16_t
72 #endif
73 %}
74
75 %debug
76 %verbose
77 %defines
78 %locations
79 %pure-parser
80 %error-verbose
81 %defines
82 %name-prefix="gram_"
83
84 %initial-action
85 {
86 /* Bison's grammar can initial empty locations, hence a default
87 location is needed. */
88 @$.start.file = @$.end.file = current_file;
89 @$.start.line = @$.end.line = 1;
90 @$.start.column = @$.end.column = 0;
91 }
92
93 /* Only NUMBERS have a value. */
94 %union
95 {
96 symbol *symbol;
97 symbol_list *list;
98 int integer;
99 char *chars;
100 assoc assoc;
101 uniqstr uniqstr;
102 };
103
104 /* Define the tokens together with their human representation. */
105 %token GRAM_EOF 0 "end of file"
106 %token STRING "string"
107 %token INT "integer"
108
109 %token PERCENT_TOKEN "%token"
110 %token PERCENT_NTERM "%nterm"
111
112 %token PERCENT_TYPE "%type"
113 %token PERCENT_DESTRUCTOR "%destructor {...}"
114 %token PERCENT_PRINTER "%printer {...}"
115
116 %token PERCENT_UNION "%union {...}"
117
118 %token PERCENT_LEFT "%left"
119 %token PERCENT_RIGHT "%right"
120 %token PERCENT_NONASSOC "%nonassoc"
121
122 %token PERCENT_PREC "%prec"
123 %token PERCENT_DPREC "%dprec"
124 %token PERCENT_MERGE "%merge"
125
126
127 /*----------------------.
128 | Global Declarations. |
129 `----------------------*/
130
131 %token
132 PERCENT_DEBUG "%debug"
133 PERCENT_DEFAULT_PREC "%default-prec"
134 PERCENT_DEFINE "%define"
135 PERCENT_DEFINES "%defines"
136 PERCENT_ERROR_VERBOSE "%error-verbose"
137 PERCENT_EXPECT "%expect"
138 PERCENT_EXPECT_RR "%expect-rr"
139 PERCENT_FILE_PREFIX "%file-prefix"
140 PERCENT_GLR_PARSER "%glr-parser"
141 PERCENT_INITIAL_ACTION "%initial-action {...}"
142 PERCENT_LEX_PARAM "%lex-param {...}"
143 PERCENT_LOCATIONS "%locations"
144 PERCENT_NAME_PREFIX "%name-prefix"
145 PERCENT_NO_DEFAULT_PREC "%no-default-prec"
146 PERCENT_NO_LINES "%no-lines"
147 PERCENT_NONDETERMINISTIC_PARSER
148 "%nondeterministic-parser"
149 PERCENT_OUTPUT "%output"
150 PERCENT_PARSE_PARAM "%parse-param {...}"
151 PERCENT_PURE_PARSER "%pure-parser"
152 PERCENT_REQUIRE "%require"
153 PERCENT_SKELETON "%skeleton"
154 PERCENT_START "%start"
155 PERCENT_TOKEN_TABLE "%token-table"
156 PERCENT_VERBOSE "%verbose"
157 PERCENT_YACC "%yacc"
158 ;
159
160 %token TYPE "type"
161 %token EQUAL "="
162 %token SEMICOLON ";"
163 %token PIPE "|"
164 %token ID "identifier"
165 %token ID_COLON "identifier:"
166 %token PERCENT_PERCENT "%%"
167 %token PROLOGUE "%{...%}"
168 %token EPILOGUE "epilogue"
169 %token BRACED_CODE "{...}"
170
171
172 %type <chars> STRING string_content
173 "%destructor {...}"
174 "%initial-action {...}"
175 "%lex-param {...}"
176 "%parse-param {...}"
177 "%printer {...}"
178 "%union {...}"
179 PROLOGUE EPILOGUE
180 %printer { fprintf (stderr, "\"%s\"", $$); }
181 STRING string_content
182 %printer { fprintf (stderr, "{\n%s\n}", $$); }
183 "%destructor {...}"
184 "%initial-action {...}"
185 "%lex-param {...}"
186 "%parse-param {...}"
187 "%printer {...}"
188 "%union {...}"
189 PROLOGUE EPILOGUE
190 %type <uniqstr> TYPE
191 %printer { fprintf (stderr, "<%s>", $$); } TYPE
192 %type <integer> INT
193 %printer { fprintf (stderr, "%d", $$); } INT
194 %type <symbol> ID symbol string_as_id
195 %printer { fprintf (stderr, "%s", $$->tag); } ID symbol string_as_id
196 %type <symbol> ID_COLON
197 %printer { fprintf (stderr, "%s:", $$->tag); } ID_COLON
198 %type <assoc> precedence_declarator
199 %type <list> symbols.1
200 %%
201
202 input:
203 declarations "%%" grammar epilogue.opt
204 ;
205
206
207 /*------------------------------------.
208 | Declarations: before the first %%. |
209 `------------------------------------*/
210
211 declarations:
212 /* Nothing */
213 | declarations declaration
214 ;
215
216 declaration:
217 grammar_declaration
218 | PROLOGUE { prologue_augment ($1, @1); }
219 | "%debug" { debug_flag = true; }
220 | "%define" string_content
221 {
222 static char one[] = "1";
223 muscle_insert ($2, one);
224 }
225 | "%define" string_content string_content { muscle_insert ($2, $3); }
226 | "%defines" { defines_flag = true; }
227 | "%error-verbose" { error_verbose = true; }
228 | "%expect" INT { expected_sr_conflicts = $2; }
229 | "%expect-rr" INT { expected_rr_conflicts = $2; }
230 | "%file-prefix" "=" string_content { spec_file_prefix = $3; }
231 | "%glr-parser"
232 {
233 nondeterministic_parser = true;
234 glr_parser = true;
235 }
236 | "%initial-action {...}"
237 {
238 muscle_code_grow ("initial_action", $1, @1);
239 }
240 | "%lex-param {...}" { add_param ("lex_param", $1, @1); }
241 | "%locations" { locations_flag = true; }
242 | "%name-prefix" "=" string_content { spec_name_prefix = $3; }
243 | "%no-lines" { no_lines_flag = true; }
244 | "%nondeterministic-parser" { nondeterministic_parser = true; }
245 | "%output" "=" string_content { spec_outfile = $3; }
246 | "%parse-param {...}" { add_param ("parse_param", $1, @1); }
247 | "%pure-parser" { pure_parser = true; }
248 | "%require" string_content { version_check (&@2, $2); }
249 | "%skeleton" string_content { skeleton = $2; }
250 | "%token-table" { token_table_flag = true; }
251 | "%verbose" { report_flag = report_states; }
252 | "%yacc" { yacc_flag = true; }
253 | /*FIXME: Err? What is this horror doing here? */ ";"
254 ;
255
256 grammar_declaration:
257 precedence_declaration
258 | symbol_declaration
259 | "%start" symbol
260 {
261 grammar_start_symbol_set ($2, @2);
262 }
263 | "%union {...}"
264 {
265 char const *body = $1;
266
267 if (typed)
268 {
269 /* Concatenate the union bodies, turning the first one's
270 trailing '}' into '\n', and omitting the second one's '{'. */
271 char *code = muscle_find ("stype");
272 code[strlen (code) - 1] = '\n';
273 body++;
274 }
275
276 typed = true;
277 muscle_code_grow ("stype", body, @1);
278 }
279 | "%destructor {...}" symbols.1
280 {
281 symbol_list *list;
282 for (list = $2; list; list = list->next)
283 symbol_destructor_set (list->sym, $1, @1);
284 symbol_list_free ($2);
285 }
286 | "%printer {...}" symbols.1
287 {
288 symbol_list *list;
289 for (list = $2; list; list = list->next)
290 symbol_printer_set (list->sym, $1, @1);
291 symbol_list_free ($2);
292 }
293 | "%default-prec"
294 {
295 default_prec = true;
296 }
297 | "%no-default-prec"
298 {
299 default_prec = false;
300 }
301 ;
302
303 symbol_declaration:
304 "%nterm" { current_class = nterm_sym; } symbol_defs.1
305 {
306 current_class = unknown_sym;
307 current_type = NULL;
308 }
309 | "%token" { current_class = token_sym; } symbol_defs.1
310 {
311 current_class = unknown_sym;
312 current_type = NULL;
313 }
314 | "%type" TYPE symbols.1
315 {
316 symbol_list *list;
317 for (list = $3; list; list = list->next)
318 symbol_type_set (list->sym, $2, @2);
319 symbol_list_free ($3);
320 }
321 ;
322
323 precedence_declaration:
324 precedence_declarator type.opt symbols.1
325 {
326 symbol_list *list;
327 ++current_prec;
328 for (list = $3; list; list = list->next)
329 {
330 symbol_type_set (list->sym, current_type, @2);
331 symbol_precedence_set (list->sym, current_prec, $1, @1);
332 }
333 symbol_list_free ($3);
334 current_type = NULL;
335 }
336 ;
337
338 precedence_declarator:
339 "%left" { $$ = left_assoc; }
340 | "%right" { $$ = right_assoc; }
341 | "%nonassoc" { $$ = non_assoc; }
342 ;
343
344 type.opt:
345 /* Nothing. */ { current_type = NULL; }
346 | TYPE { current_type = $1; }
347 ;
348
349 /* One or more nonterminals to be %typed. */
350
351 symbols.1:
352 symbol { $$ = symbol_list_new ($1, @1); }
353 | symbols.1 symbol { $$ = symbol_list_prepend ($1, $2, @2); }
354 ;
355
356 /* One token definition. */
357 symbol_def:
358 TYPE
359 {
360 current_type = $1;
361 }
362 | ID
363 {
364 symbol_class_set ($1, current_class, @1, true);
365 symbol_type_set ($1, current_type, @1);
366 }
367 | ID INT
368 {
369 symbol_class_set ($1, current_class, @1, true);
370 symbol_type_set ($1, current_type, @1);
371 symbol_user_token_number_set ($1, $2, @2);
372 }
373 | ID string_as_id
374 {
375 symbol_class_set ($1, current_class, @1, true);
376 symbol_type_set ($1, current_type, @1);
377 symbol_make_alias ($1, $2, @$);
378 }
379 | ID INT string_as_id
380 {
381 symbol_class_set ($1, current_class, @1, true);
382 symbol_type_set ($1, current_type, @1);
383 symbol_user_token_number_set ($1, $2, @2);
384 symbol_make_alias ($1, $3, @$);
385 }
386 ;
387
388 /* One or more symbol definitions. */
389 symbol_defs.1:
390 symbol_def
391 | symbol_defs.1 symbol_def
392 ;
393
394
395 /*------------------------------------------.
396 | The grammar section: between the two %%. |
397 `------------------------------------------*/
398
399 grammar:
400 rules_or_grammar_declaration
401 | grammar rules_or_grammar_declaration
402 ;
403
404 /* As a Bison extension, one can use the grammar declarations in the
405 body of the grammar. */
406 rules_or_grammar_declaration:
407 rules
408 | grammar_declaration ";"
409 | error ";"
410 {
411 yyerrok;
412 }
413 ;
414
415 rules:
416 ID_COLON { current_lhs = $1; current_lhs_location = @1; } rhses.1
417 ;
418
419 rhses.1:
420 rhs { grammar_current_rule_end (@1); }
421 | rhses.1 "|" rhs { grammar_current_rule_end (@3); }
422 | rhses.1 ";"
423 ;
424
425 rhs:
426 /* Nothing. */
427 { grammar_current_rule_begin (current_lhs, current_lhs_location); }
428 | rhs symbol
429 { grammar_current_rule_symbol_append ($2, @2); }
430 | rhs action
431 | rhs "%prec" symbol
432 { grammar_current_rule_prec_set ($3, @3); }
433 | rhs "%dprec" INT
434 { grammar_current_rule_dprec_set ($3, @3); }
435 | rhs "%merge" TYPE
436 { grammar_current_rule_merge_set ($3, @3); }
437 ;
438
439 symbol:
440 ID { $$ = $1; }
441 | string_as_id { $$ = $1; }
442 ;
443
444 /* Handle the semantics of an action specially, with a mid-rule
445 action, so that grammar_current_rule_action_append is invoked
446 immediately after the braced code is read by the scanner.
447
448 This implementation relies on the LALR(1) parsing algorithm.
449 If grammar_current_rule_action_append were executed in a normal
450 action for this rule, then when the input grammar contains two
451 successive actions, the scanner would have to read both actions
452 before reducing this rule. That wouldn't work, since the scanner
453 relies on all preceding input actions being processed by
454 grammar_current_rule_action_append before it scans the next
455 action. */
456 action:
457 { grammar_current_rule_action_append (last_string, last_braced_code_loc); }
458 BRACED_CODE
459 ;
460
461 /* A string used as an ID: quote it. */
462 string_as_id:
463 STRING
464 {
465 $$ = symbol_get (quotearg_style (c_quoting_style, $1), @1);
466 symbol_class_set ($$, token_sym, @1, false);
467 }
468 ;
469
470 /* A string used for its contents. Don't quote it. */
471 string_content:
472 STRING
473 { $$ = $1; }
474 ;
475
476
477 epilogue.opt:
478 /* Nothing. */
479 | "%%" EPILOGUE
480 {
481 muscle_code_grow ("epilogue", $2, @2);
482 scanner_last_string_free ();
483 }
484 ;
485
486 %%
487
488
489 /* Return the location of the left-hand side of a rule whose
490 right-hand side is RHS[1] ... RHS[N]. Ignore empty nonterminals in
491 the right-hand side, and return an empty location equal to the end
492 boundary of RHS[0] if the right-hand side is empty. */
493
494 static YYLTYPE
495 lloc_default (YYLTYPE const *rhs, int n)
496 {
497 int i;
498 YYLTYPE loc;
499
500 /* SGI MIPSpro 7.4.1m miscompiles "loc.start = loc.end = rhs[n].end;".
501 The bug is fixed in 7.4.2m, but play it safe for now. */
502 loc.start = rhs[n].end;
503 loc.end = rhs[n].end;
504
505 /* Ignore empty nonterminals the start of the the right-hand side.
506 Do not bother to ignore them at the end of the right-hand side,
507 since empty nonterminals have the same end as their predecessors. */
508 for (i = 1; i <= n; i++)
509 if (! equal_boundaries (rhs[i].start, rhs[i].end))
510 {
511 loc.start = rhs[i].start;
512 break;
513 }
514
515 return loc;
516 }
517
518
519 /* Add a lex-param or a parse-param (depending on TYPE) with
520 declaration DECL and location LOC. */
521
522 static void
523 add_param (char const *type, char *decl, location loc)
524 {
525 static char const alphanum[26 + 26 + 1 + 10] =
526 "abcdefghijklmnopqrstuvwxyz"
527 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
528 "_"
529 "0123456789";
530 char const *name_start = NULL;
531 char *p;
532
533 /* Stop on last actual character. */
534 for (p = decl; p[1]; p++)
535 if ((p == decl
536 || ! memchr (alphanum, p[-1], sizeof alphanum))
537 && memchr (alphanum, p[0], sizeof alphanum - 10))
538 name_start = p;
539
540 /* Strip the surrounding '{' and '}', and any blanks just inside
541 the braces. */
542 while (*--p == ' ' || *p == '\t')
543 continue;
544 p[1] = '\0';
545 while (*++decl == ' ' || *decl == '\t')
546 continue;
547
548 if (! name_start)
549 complain_at (loc, _("missing identifier in parameter declaration"));
550 else
551 {
552 char *name;
553 size_t name_len;
554
555 for (name_len = 1;
556 memchr (alphanum, name_start[name_len], sizeof alphanum);
557 name_len++)
558 continue;
559
560 name = xmalloc (name_len + 1);
561 memcpy (name, name_start, name_len);
562 name[name_len] = '\0';
563 muscle_pair_list_grow (type, decl, name);
564 free (name);
565 }
566
567 scanner_last_string_free ();
568 }
569
570 static void
571 version_check (location const *loc, char const *version)
572 {
573 if (strverscmp (version, PACKAGE_VERSION) > 0)
574 {
575 complain_at (*loc, "require bison %s, but have %s",
576 version, PACKAGE_VERSION);
577 exit (63);
578 }
579 }
580
581 static void
582 gram_error (location const *loc, char const *msg)
583 {
584 complain_at (*loc, "%s", msg);
585 }
586
587 char const *
588 token_name (int type)
589 {
590 return yytname[YYTRANSLATE (type)];
591 }