]> git.saurik.com Git - bison.git/blame - src/parse-gram.y
* src/scan-gram.l: No longer "parse" things after `%union' until
[bison.git] / src / parse-gram.y
CommitLineData
12ffdd28 1%{/* Bison Grammar Parser -*- C -*-
a737b216 2
073f9288 3 Copyright (C) 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
e9955c83
AD
4
5 This file is part of Bison, the GNU Compiler Compiler.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
0fb669f9
PE
19 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
20 02110-1301 USA
e9955c83
AD
21*/
22
2cec9080 23#include <config.h>
e9955c83 24#include "system.h"
3d2cbc26 25
b275314e 26#include "complain.h"
3d2cbc26 27#include "conflicts.h"
e9955c83
AD
28#include "files.h"
29#include "getargs.h"
e9955c83 30#include "gram.h"
3d2cbc26 31#include "muscle_tab.h"
ca407bdf 32#include "quotearg.h"
e9955c83 33#include "reader.h"
3d2cbc26 34#include "symlist.h"
e9071366
AD
35#include "scan-gram.h"
36#include "scan-code.h"
b50d2359 37#include "strverscmp.h"
e9955c83 38
b7295522
PE
39#define YYLLOC_DEFAULT(Current, Rhs, N) (Current) = lloc_default (Rhs, N)
40static YYLTYPE lloc_default (YYLTYPE const *, int);
4cdb01db 41
b233d555 42#define YY_LOCATION_PRINT(File, Loc) \
f0064700 43 location_print (File, Loc)
b233d555 44
b50d2359
AD
45static void version_check (location const *loc, char const *version);
46
6e649e65 47/* Request detailed syntax error messages, and pass them to GRAM_ERROR.
ad8a3efc 48 FIXME: depends on the undocumented availability of YYLLOC. */
e9955c83
AD
49#undef yyerror
50#define yyerror(Msg) \
f0064700 51 gram_error (&yylloc, Msg)
3d2cbc26 52static void gram_error (location const *, char const *);
e9955c83 53
e9ce5688 54static void add_param (char const *, char *, location);
1773ceee 55
04098407
PE
56static symbol_class current_class = unknown_sym;
57static uniqstr current_type = 0;
877519f8
PE
58static symbol *current_lhs;
59static location current_lhs_location;
04098407 60static int current_prec = 0;
d42cf844
PE
61
62#ifdef UINT_FAST8_MAX
63# define YYTYPE_UINT8 uint_fast8_t
64#endif
65#ifdef INT_FAST8_MAX
66# define YYTYPE_INT8 int_fast8_t
67#endif
68#ifdef UINT_FAST16_MAX
69# define YYTYPE_UINT16 uint_fast16_t
70#endif
71#ifdef INT_FAST16_MAX
72# define YYTYPE_INT16 int_fast16_t
73#endif
e9955c83
AD
74%}
75
12ffdd28 76%debug
82b248ad 77%verbose
12ffdd28
PE
78%defines
79%locations
80%pure-parser
81%error-verbose
82%defines
83%name-prefix="gram_"
84
cd3684cf
AD
85%initial-action
86{
87 /* Bison's grammar can initial empty locations, hence a default
88 location is needed. */
e9071366
AD
89 boundary_set (&@$.start, current_file, 1, 0);
90 boundary_set (&@$.end, current_file, 1, 0);
cd3684cf 91}
e9955c83
AD
92
93/* Only NUMBERS have a value. */
94%union
95{
3d2cbc26
PE
96 symbol *symbol;
97 symbol_list *list;
e9955c83 98 int integer;
3d2cbc26
PE
99 char *chars;
100 assoc assoc;
101 uniqstr uniqstr;
58d7a1a1 102 unsigned char character;
e9955c83
AD
103};
104
f9a85a15 105/* Define the tokens together with their human representation. */
3d38c03a
AD
106%token GRAM_EOF 0 "end of file"
107%token STRING "string"
3d38c03a 108%token INT "integer"
e9955c83 109
9280d3ef
AD
110%token PERCENT_TOKEN "%token"
111%token PERCENT_NTERM "%nterm"
366eea36 112
9280d3ef 113%token PERCENT_TYPE "%type"
e9071366
AD
114%token PERCENT_DESTRUCTOR "%destructor"
115%token PERCENT_PRINTER "%printer"
366eea36 116
9280d3ef
AD
117%token PERCENT_LEFT "%left"
118%token PERCENT_RIGHT "%right"
119%token PERCENT_NONASSOC "%nonassoc"
04e60654 120
3d38c03a
AD
121%token PERCENT_PREC "%prec"
122%token PERCENT_DPREC "%dprec"
123%token PERCENT_MERGE "%merge"
e9955c83 124
e9955c83 125
ae7453f2
AD
126/*----------------------.
127| Global Declarations. |
128`----------------------*/
129
130%token
cd3684cf 131 PERCENT_DEBUG "%debug"
39a06c25 132 PERCENT_DEFAULT_PREC "%default-prec"
cd3684cf
AD
133 PERCENT_DEFINE "%define"
134 PERCENT_DEFINES "%defines"
135 PERCENT_ERROR_VERBOSE "%error-verbose"
136 PERCENT_EXPECT "%expect"
d6328241 137 PERCENT_EXPECT_RR "%expect-rr"
cd3684cf
AD
138 PERCENT_FILE_PREFIX "%file-prefix"
139 PERCENT_GLR_PARSER "%glr-parser"
e9071366
AD
140 PERCENT_INITIAL_ACTION "%initial-action"
141 PERCENT_LEX_PARAM "%lex-param"
cd3684cf
AD
142 PERCENT_LOCATIONS "%locations"
143 PERCENT_NAME_PREFIX "%name-prefix"
22fccf95 144 PERCENT_NO_DEFAULT_PREC "%no-default-prec"
cd3684cf
AD
145 PERCENT_NO_LINES "%no-lines"
146 PERCENT_NONDETERMINISTIC_PARSER
f0064700 147 "%nondeterministic-parser"
cd3684cf 148 PERCENT_OUTPUT "%output"
e9071366 149 PERCENT_PARSE_PARAM "%parse-param"
cd3684cf 150 PERCENT_PURE_PARSER "%pure-parser"
f0064700 151 PERCENT_REQUIRE "%require"
cd3684cf
AD
152 PERCENT_SKELETON "%skeleton"
153 PERCENT_START "%start"
154 PERCENT_TOKEN_TABLE "%token-table"
155 PERCENT_VERBOSE "%verbose"
156 PERCENT_YACC "%yacc"
ae7453f2 157;
e9955c83 158
58d7a1a1
AD
159%token BRACED_CODE "{...}"
160%token CHAR "char"
161%token EPILOGUE "epilogue"
3d38c03a 162%token EQUAL "="
3d38c03a 163%token ID "identifier"
b7295522 164%token ID_COLON "identifier:"
e9955c83 165%token PERCENT_PERCENT "%%"
58d7a1a1 166%token PIPE "|"
3d38c03a 167%token PROLOGUE "%{...%}"
58d7a1a1
AD
168%token SEMICOLON ";"
169%token TYPE "type"
170
171%type <character> CHAR
172%printer { fprintf (stderr, "'%c' (%d)", $$, $$); } CHAR
3d38c03a 173
58d7a1a1
AD
174%type <chars> STRING string_content "{...}" PROLOGUE EPILOGUE
175%printer { fprintf (stderr, "\"%s\"", $$); } STRING string_content
176%printer { fprintf (stderr, "{\n%s\n}", $$); } "{...}" PROLOGUE EPILOGUE
177
178%type <uniqstr> TYPE ID ID_COLON
82b248ad 179%printer { fprintf (stderr, "<%s>", $$); } TYPE
58d7a1a1
AD
180%printer { fprintf (stderr, "%s", $$); } ID
181%printer { fprintf (stderr, "%s:", $$); } ID_COLON
182
e9955c83 183%type <integer> INT
82b248ad 184%printer { fprintf (stderr, "%d", $$); } INT
58d7a1a1
AD
185
186%type <symbol> id id_colon symbol string_as_id
187%printer { fprintf (stderr, "%s", $$->tag); } id symbol string_as_id
188%printer { fprintf (stderr, "%s:", $$->tag); } id_colon
189
2c569025 190%type <assoc> precedence_declarator
1e0bab92 191%type <list> symbols.1
e9955c83 192%%
2c569025 193
8efe435c 194input:
2c569025 195 declarations "%%" grammar epilogue.opt
e9955c83
AD
196;
197
2c569025
AD
198
199 /*------------------------------------.
200 | Declarations: before the first %%. |
201 `------------------------------------*/
202
203declarations:
e9955c83 204 /* Nothing */
b7295522 205| declarations declaration
e9955c83
AD
206;
207
2c569025
AD
208declaration:
209 grammar_declaration
e9071366
AD
210| PROLOGUE { prologue_augment (translate_code ($1, @1),
211 @1); }
d0829076 212| "%debug" { debug_flag = true; }
c66dfadd
PE
213| "%define" string_content
214 {
215 static char one[] = "1";
216 muscle_insert ($2, one);
217 }
e9955c83 218| "%define" string_content string_content { muscle_insert ($2, $3); }
d0829076
PE
219| "%defines" { defines_flag = true; }
220| "%error-verbose" { error_verbose = true; }
d6328241 221| "%expect" INT { expected_sr_conflicts = $2; }
04098407 222| "%expect-rr" INT { expected_rr_conflicts = $2; }
e9955c83 223| "%file-prefix" "=" string_content { spec_file_prefix = $3; }
cd3684cf 224| "%glr-parser"
c66dfadd
PE
225 {
226 nondeterministic_parser = true;
227 glr_parser = true;
228 }
e9071366 229| "%initial-action" "{...}"
c66dfadd 230 {
e9071366 231 muscle_code_grow ("initial_action", translate_symbol_action ($2, @2), @2);
c66dfadd 232 }
e9071366 233| "%lex-param" "{...}" { add_param ("lex_param", $2, @2); }
d0829076 234| "%locations" { locations_flag = true; }
e9955c83 235| "%name-prefix" "=" string_content { spec_name_prefix = $3; }
d0829076 236| "%no-lines" { no_lines_flag = true; }
04098407 237| "%nondeterministic-parser" { nondeterministic_parser = true; }
e9955c83 238| "%output" "=" string_content { spec_outfile = $3; }
e9071366 239| "%parse-param" "{...}" { add_param ("parse_param", $2, @2); }
916708d5 240| "%pure-parser" { pure_parser = true; }
b50d2359 241| "%require" string_content { version_check (&@2, $2); }
e9955c83 242| "%skeleton" string_content { skeleton = $2; }
d0829076 243| "%token-table" { token_table_flag = true; }
9dd5b378 244| "%verbose" { report_flag = report_states; }
d0829076 245| "%yacc" { yacc_flag = true; }
cd3684cf 246| /*FIXME: Err? What is this horror doing here? */ ";"
e9955c83
AD
247;
248
2c569025
AD
249grammar_declaration:
250 precedence_declaration
251| symbol_declaration
e9955c83
AD
252| "%start" symbol
253 {
8efe435c 254 grammar_start_symbol_set ($2, @2);
e9955c83 255 }
e9071366 256| "%destructor" "{...}" symbols.1
9280d3ef 257 {
3d2cbc26 258 symbol_list *list;
e9071366
AD
259 const char *action = translate_symbol_action ($2, @2);
260 for (list = $3; list; list = list->next)
261 symbol_destructor_set (list->sym, action, @2);
262 symbol_list_free ($3);
9280d3ef 263 }
e9071366 264| "%printer" "{...}" symbols.1
366eea36 265 {
3d2cbc26 266 symbol_list *list;
e9071366
AD
267 const char *action = translate_symbol_action ($2, @2);
268 for (list = $3; list; list = list->next)
269 symbol_printer_set (list->sym, action, @2);
270 symbol_list_free ($3);
366eea36 271 }
22fccf95 272| "%default-prec"
39a06c25 273 {
22fccf95
PE
274 default_prec = true;
275 }
276| "%no-default-prec"
277 {
278 default_prec = false;
39a06c25 279 }
2c569025
AD
280;
281
58d7a1a1
AD
282
283/*----------*
284 | %union. |
285 *----------*/
286
287%token PERCENT_UNION "%union";
288
289union_name:
290 /* Nothing. */ {}
291| ID { muscle_code_grow ("union_name", $1, @1); }
292;
293
294grammar_declaration:
295 "%union" union_name "{...}"
296 {
297 char const *body = $3;
298
299 if (typed)
300 {
301 /* Concatenate the union bodies, turning the first one's
302 trailing '}' into '\n', and omitting the second one's '{'. */
303 char *code = muscle_find ("stype");
304 code[strlen (code) - 1] = '\n';
305 body++;
306 }
307
308 typed = true;
309 muscle_code_grow ("stype", body, @3);
310 }
311;
312
313
314
315
2c569025
AD
316symbol_declaration:
317 "%nterm" { current_class = nterm_sym; } symbol_defs.1
e9955c83
AD
318 {
319 current_class = unknown_sym;
320 current_type = NULL;
321 }
2c569025 322| "%token" { current_class = token_sym; } symbol_defs.1
e9955c83 323 {
2c569025 324 current_class = unknown_sym;
e9955c83
AD
325 current_type = NULL;
326 }
1e0bab92 327| "%type" TYPE symbols.1
e9955c83 328 {
3d2cbc26 329 symbol_list *list;
1e0bab92 330 for (list = $3; list; list = list->next)
1a31ed21 331 symbol_type_set (list->sym, $2, @2);
dafdc66f 332 symbol_list_free ($3);
e9955c83
AD
333 }
334;
335
2c569025 336precedence_declaration:
1e0bab92
AD
337 precedence_declarator type.opt symbols.1
338 {
3d2cbc26 339 symbol_list *list;
1e0bab92
AD
340 ++current_prec;
341 for (list = $3; list; list = list->next)
342 {
1a31ed21
AD
343 symbol_type_set (list->sym, current_type, @2);
344 symbol_precedence_set (list->sym, current_prec, $1, @1);
1e0bab92 345 }
dafdc66f 346 symbol_list_free ($3);
1e0bab92
AD
347 current_type = NULL;
348 }
e9955c83
AD
349;
350
2c569025 351precedence_declarator:
e9955c83
AD
352 "%left" { $$ = left_assoc; }
353| "%right" { $$ = right_assoc; }
354| "%nonassoc" { $$ = non_assoc; }
355;
356
357type.opt:
87fbb0bf 358 /* Nothing. */ { current_type = NULL; }
e9955c83
AD
359| TYPE { current_type = $1; }
360;
361
362/* One or more nonterminals to be %typed. */
1e0bab92
AD
363symbols.1:
364 symbol { $$ = symbol_list_new ($1, @1); }
365| symbols.1 symbol { $$ = symbol_list_prepend ($1, $2, @2); }
e9955c83
AD
366;
367
e9955c83
AD
368/* One token definition. */
369symbol_def:
370 TYPE
371 {
372 current_type = $1;
373 }
58d7a1a1 374| id
e9955c83 375 {
073f9288 376 symbol_class_set ($1, current_class, @1, true);
1a31ed21 377 symbol_type_set ($1, current_type, @1);
e9955c83 378 }
58d7a1a1 379| id INT
e9955c83 380 {
073f9288 381 symbol_class_set ($1, current_class, @1, true);
1a31ed21 382 symbol_type_set ($1, current_type, @1);
e776192e 383 symbol_user_token_number_set ($1, $2, @2);
e9955c83 384 }
58d7a1a1 385| id string_as_id
e9955c83 386 {
073f9288 387 symbol_class_set ($1, current_class, @1, true);
1a31ed21 388 symbol_type_set ($1, current_type, @1);
a5d50994 389 symbol_make_alias ($1, $2, @$);
e9955c83 390 }
58d7a1a1 391| id INT string_as_id
e9955c83 392 {
073f9288 393 symbol_class_set ($1, current_class, @1, true);
1a31ed21 394 symbol_type_set ($1, current_type, @1);
e776192e 395 symbol_user_token_number_set ($1, $2, @2);
a5d50994 396 symbol_make_alias ($1, $3, @$);
e9955c83
AD
397 }
398;
399
400/* One or more symbol definitions. */
401symbol_defs.1:
402 symbol_def
e9955c83 403| symbol_defs.1 symbol_def
e9955c83
AD
404;
405
2c569025
AD
406
407 /*------------------------------------------.
408 | The grammar section: between the two %%. |
409 `------------------------------------------*/
410
411grammar:
1921f1d7
AD
412 rules_or_grammar_declaration
413| grammar rules_or_grammar_declaration
414;
415
416/* As a Bison extension, one can use the grammar declarations in the
b7295522 417 body of the grammar. */
1921f1d7 418rules_or_grammar_declaration:
e9955c83 419 rules
8d0a98bb 420| grammar_declaration ";"
b275314e
AD
421| error ";"
422 {
423 yyerrok;
424 }
e9955c83
AD
425;
426
427rules:
58d7a1a1 428 id_colon { current_lhs = $1; current_lhs_location = @1; } rhses.1
e9955c83
AD
429;
430
431rhses.1:
8f3596a6
AD
432 rhs { grammar_current_rule_end (@1); }
433| rhses.1 "|" rhs { grammar_current_rule_end (@3); }
8d0a98bb 434| rhses.1 ";"
e9955c83
AD
435;
436
437rhs:
438 /* Nothing. */
8f3596a6 439 { grammar_current_rule_begin (current_lhs, current_lhs_location); }
e9955c83 440| rhs symbol
8efe435c 441 { grammar_current_rule_symbol_append ($2, @2); }
e9071366
AD
442| rhs "{...}"
443 { grammar_current_rule_action_append (gram_last_string,
444 gram_last_braced_code_loc); }
e9955c83 445| rhs "%prec" symbol
e776192e 446 { grammar_current_rule_prec_set ($3, @3); }
676385e2
PH
447| rhs "%dprec" INT
448 { grammar_current_rule_dprec_set ($3, @3); }
449| rhs "%merge" TYPE
450 { grammar_current_rule_merge_set ($3, @3); }
e9955c83
AD
451;
452
58d7a1a1
AD
453
454/*---------------*
455 | Identifiers. |
456 *---------------*/
457
458/* Identifiers are return as uniqstr by the scanner. Depending on
459 their use, we may need to make them genuine symbols. */
460
461id:
462 ID { $$ = symbol_get ($1, @1); }
463| CHAR { char cp[4] = { '\'', $1, '\'', 0 };
464 $$ = symbol_get (quotearg_style (escape_quoting_style, cp),
465 @1);
466 symbol_class_set ($$, token_sym, @1, false);
467 symbol_user_token_number_set ($$, $1, @1);
468 }
469;
470
471id_colon:
472 ID_COLON { $$ = symbol_get ($1, @1); }
473;
474
475
e9955c83 476symbol:
58d7a1a1
AD
477 id
478| string_as_id
e9955c83
AD
479;
480
ca407bdf 481/* A string used as an ID: quote it. */
e9955c83
AD
482string_as_id:
483 STRING
484 {
ca407bdf 485 $$ = symbol_get (quotearg_style (c_quoting_style, $1), @1);
073f9288 486 symbol_class_set ($$, token_sym, @1, false);
e9955c83
AD
487 }
488;
489
ca407bdf 490/* A string used for its contents. Don't quote it. */
e9955c83
AD
491string_content:
492 STRING
ca407bdf
PE
493 { $$ = $1; }
494;
e9955c83
AD
495
496
497epilogue.opt:
498 /* Nothing. */
e9955c83
AD
499| "%%" EPILOGUE
500 {
e9071366
AD
501 muscle_code_grow ("epilogue", translate_code ($2, @2), @2);
502 gram_scanner_last_string_free ();
e9955c83
AD
503 }
504;
505
e9955c83 506%%
b7295522
PE
507
508
509/* Return the location of the left-hand side of a rule whose
510 right-hand side is RHS[1] ... RHS[N]. Ignore empty nonterminals in
511 the right-hand side, and return an empty location equal to the end
512 boundary of RHS[0] if the right-hand side is empty. */
513
514static YYLTYPE
515lloc_default (YYLTYPE const *rhs, int n)
516{
517 int i;
a737b216 518 YYLTYPE loc;
62cb8a99
PE
519
520 /* SGI MIPSpro 7.4.1m miscompiles "loc.start = loc.end = rhs[n].end;".
521 The bug is fixed in 7.4.2m, but play it safe for now. */
522 loc.start = rhs[n].end;
523 loc.end = rhs[n].end;
b7295522 524
5320ca4d
PE
525 /* Ignore empty nonterminals the start of the the right-hand side.
526 Do not bother to ignore them at the end of the right-hand side,
527 since empty nonterminals have the same end as their predecessors. */
b7295522
PE
528 for (i = 1; i <= n; i++)
529 if (! equal_boundaries (rhs[i].start, rhs[i].end))
530 {
a737b216 531 loc.start = rhs[i].start;
b7295522
PE
532 break;
533 }
534
a737b216 535 return loc;
b7295522
PE
536}
537
538
539/* Add a lex-param or a parse-param (depending on TYPE) with
540 declaration DECL and location LOC. */
541
1773ceee 542static void
e9ce5688 543add_param (char const *type, char *decl, location loc)
1773ceee 544{
ead9e56e 545 static char const alphanum[26 + 26 + 1 + 10] =
1773ceee
PE
546 "abcdefghijklmnopqrstuvwxyz"
547 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
ead9e56e
PE
548 "_"
549 "0123456789";
1773ceee 550 char const *name_start = NULL;
e9ce5688 551 char *p;
1773ceee 552
e503aa60
AD
553 /* Stop on last actual character. */
554 for (p = decl; p[1]; p++)
ead9e56e
PE
555 if ((p == decl
556 || ! memchr (alphanum, p[-1], sizeof alphanum))
557 && memchr (alphanum, p[0], sizeof alphanum - 10))
1773ceee
PE
558 name_start = p;
559
ead9e56e
PE
560 /* Strip the surrounding '{' and '}', and any blanks just inside
561 the braces. */
562 while (*--p == ' ' || *p == '\t')
563 continue;
e503aa60 564 p[1] = '\0';
ead9e56e
PE
565 while (*++decl == ' ' || *decl == '\t')
566 continue;
e9ce5688 567
1773ceee
PE
568 if (! name_start)
569 complain_at (loc, _("missing identifier in parameter declaration"));
570 else
571 {
572 char *name;
573 size_t name_len;
574
575 for (name_len = 1;
ead9e56e 576 memchr (alphanum, name_start[name_len], sizeof alphanum);
1773ceee
PE
577 name_len++)
578 continue;
579
580 name = xmalloc (name_len + 1);
581 memcpy (name, name_start, name_len);
582 name[name_len] = '\0';
583 muscle_pair_list_grow (type, decl, name);
584 free (name);
585 }
586
e9071366 587 gram_scanner_last_string_free ();
1773ceee
PE
588}
589
b50d2359
AD
590static void
591version_check (location const *loc, char const *version)
592{
593 if (strverscmp (version, PACKAGE_VERSION) > 0)
9b8a5ce0
AD
594 {
595 complain_at (*loc, "require bison %s, but have %s",
596 version, PACKAGE_VERSION);
597 exit (63);
598 }
b50d2359
AD
599}
600
1fec91df 601static void
3d2cbc26 602gram_error (location const *loc, char const *msg)
e9955c83 603{
ad8a3efc 604 complain_at (*loc, "%s", msg);
e9955c83 605}
e9ce5688
PE
606
607char const *
608token_name (int type)
609{
fc01665e 610 return yytname[YYTRANSLATE (type)];
e9ce5688 611}