]> git.saurik.com Git - bison.git/blame - src/parse-gram.y
Between Bison releases, manually append `+' to the previous Bison
[bison.git] / src / parse-gram.y
CommitLineData
12ffdd28 1%{/* Bison Grammar Parser -*- C -*-
a737b216 2
073f9288 3 Copyright (C) 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
e9955c83
AD
4
5 This file is part of Bison, the GNU Compiler Compiler.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
0fb669f9
PE
19 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
20 02110-1301 USA
e9955c83
AD
21*/
22
2cec9080 23#include <config.h>
e9955c83 24#include "system.h"
3d2cbc26 25
b275314e 26#include "complain.h"
3d2cbc26 27#include "conflicts.h"
e9955c83
AD
28#include "files.h"
29#include "getargs.h"
e9955c83 30#include "gram.h"
3d2cbc26 31#include "muscle_tab.h"
ca407bdf 32#include "quotearg.h"
e9955c83 33#include "reader.h"
3d2cbc26 34#include "symlist.h"
b50d2359 35#include "strverscmp.h"
e9955c83 36
b7295522
PE
37#define YYLLOC_DEFAULT(Current, Rhs, N) (Current) = lloc_default (Rhs, N)
38static YYLTYPE lloc_default (YYLTYPE const *, int);
4cdb01db 39
b233d555 40#define YY_LOCATION_PRINT(File, Loc) \
f0064700 41 location_print (File, Loc)
b233d555 42
b50d2359
AD
43static void version_check (location const *loc, char const *version);
44
6e649e65 45/* Request detailed syntax error messages, and pass them to GRAM_ERROR.
ad8a3efc 46 FIXME: depends on the undocumented availability of YYLLOC. */
e9955c83
AD
47#undef yyerror
48#define yyerror(Msg) \
f0064700 49 gram_error (&yylloc, Msg)
3d2cbc26 50static void gram_error (location const *, char const *);
e9955c83 51
e9ce5688 52static void add_param (char const *, char *, location);
1773ceee 53
04098407
PE
54static symbol_class current_class = unknown_sym;
55static uniqstr current_type = 0;
877519f8
PE
56static symbol *current_lhs;
57static location current_lhs_location;
04098407 58static int current_prec = 0;
d42cf844
PE
59
60#ifdef UINT_FAST8_MAX
61# define YYTYPE_UINT8 uint_fast8_t
62#endif
63#ifdef INT_FAST8_MAX
64# define YYTYPE_INT8 int_fast8_t
65#endif
66#ifdef UINT_FAST16_MAX
67# define YYTYPE_UINT16 uint_fast16_t
68#endif
69#ifdef INT_FAST16_MAX
70# define YYTYPE_INT16 int_fast16_t
71#endif
e9955c83
AD
72%}
73
12ffdd28 74%debug
82b248ad 75%verbose
12ffdd28
PE
76%defines
77%locations
78%pure-parser
79%error-verbose
80%defines
81%name-prefix="gram_"
82
cd3684cf
AD
83%initial-action
84{
85 /* Bison's grammar can initial empty locations, hence a default
86 location is needed. */
87 @$.start.file = @$.end.file = current_file;
88 @$.start.line = @$.end.line = 1;
89 @$.start.column = @$.end.column = 0;
90}
e9955c83
AD
91
92/* Only NUMBERS have a value. */
93%union
94{
3d2cbc26
PE
95 symbol *symbol;
96 symbol_list *list;
e9955c83 97 int integer;
3d2cbc26
PE
98 char *chars;
99 assoc assoc;
100 uniqstr uniqstr;
e9955c83
AD
101};
102
f9a85a15 103/* Define the tokens together with their human representation. */
3d38c03a
AD
104%token GRAM_EOF 0 "end of file"
105%token STRING "string"
3d38c03a 106%token INT "integer"
e9955c83 107
9280d3ef
AD
108%token PERCENT_TOKEN "%token"
109%token PERCENT_NTERM "%nterm"
366eea36 110
9280d3ef 111%token PERCENT_TYPE "%type"
e9ce5688
PE
112%token PERCENT_DESTRUCTOR "%destructor {...}"
113%token PERCENT_PRINTER "%printer {...}"
366eea36 114
e9ce5688 115%token PERCENT_UNION "%union {...}"
366eea36 116
9280d3ef
AD
117%token PERCENT_LEFT "%left"
118%token PERCENT_RIGHT "%right"
119%token PERCENT_NONASSOC "%nonassoc"
04e60654 120
3d38c03a
AD
121%token PERCENT_PREC "%prec"
122%token PERCENT_DPREC "%dprec"
123%token PERCENT_MERGE "%merge"
e9955c83 124
e9955c83 125
ae7453f2
AD
126/*----------------------.
127| Global Declarations. |
128`----------------------*/
129
130%token
cd3684cf 131 PERCENT_DEBUG "%debug"
39a06c25 132 PERCENT_DEFAULT_PREC "%default-prec"
cd3684cf
AD
133 PERCENT_DEFINE "%define"
134 PERCENT_DEFINES "%defines"
135 PERCENT_ERROR_VERBOSE "%error-verbose"
136 PERCENT_EXPECT "%expect"
d6328241 137 PERCENT_EXPECT_RR "%expect-rr"
cd3684cf
AD
138 PERCENT_FILE_PREFIX "%file-prefix"
139 PERCENT_GLR_PARSER "%glr-parser"
140 PERCENT_INITIAL_ACTION "%initial-action {...}"
141 PERCENT_LEX_PARAM "%lex-param {...}"
142 PERCENT_LOCATIONS "%locations"
143 PERCENT_NAME_PREFIX "%name-prefix"
22fccf95 144 PERCENT_NO_DEFAULT_PREC "%no-default-prec"
cd3684cf
AD
145 PERCENT_NO_LINES "%no-lines"
146 PERCENT_NONDETERMINISTIC_PARSER
f0064700 147 "%nondeterministic-parser"
cd3684cf
AD
148 PERCENT_OUTPUT "%output"
149 PERCENT_PARSE_PARAM "%parse-param {...}"
150 PERCENT_PURE_PARSER "%pure-parser"
f0064700 151 PERCENT_REQUIRE "%require"
cd3684cf
AD
152 PERCENT_SKELETON "%skeleton"
153 PERCENT_START "%start"
154 PERCENT_TOKEN_TABLE "%token-table"
155 PERCENT_VERBOSE "%verbose"
156 PERCENT_YACC "%yacc"
ae7453f2 157;
e9955c83 158
3d38c03a
AD
159%token TYPE "type"
160%token EQUAL "="
161%token SEMICOLON ";"
3d38c03a
AD
162%token PIPE "|"
163%token ID "identifier"
b7295522 164%token ID_COLON "identifier:"
e9955c83 165%token PERCENT_PERCENT "%%"
3d38c03a
AD
166%token PROLOGUE "%{...%}"
167%token EPILOGUE "epilogue"
168%token BRACED_CODE "{...}"
169
e9955c83 170
3d2cbc26 171%type <chars> STRING string_content
e9ce5688 172 "%destructor {...}"
cd3684cf 173 "%initial-action {...}"
e9ce5688
PE
174 "%lex-param {...}"
175 "%parse-param {...}"
176 "%printer {...}"
177 "%union {...}"
3d2cbc26 178 PROLOGUE EPILOGUE
82b248ad 179%printer { fprintf (stderr, "\"%s\"", $$); }
f0064700 180 STRING string_content
82b248ad
AD
181%printer { fprintf (stderr, "{\n%s\n}", $$); }
182 "%destructor {...}"
183 "%initial-action {...}"
184 "%lex-param {...}"
185 "%parse-param {...}"
186 "%printer {...}"
187 "%union {...}"
82b248ad 188 PROLOGUE EPILOGUE
3d2cbc26 189%type <uniqstr> TYPE
82b248ad 190%printer { fprintf (stderr, "<%s>", $$); } TYPE
e9955c83 191%type <integer> INT
82b248ad
AD
192%printer { fprintf (stderr, "%d", $$); } INT
193%type <symbol> ID symbol string_as_id
2f4f028d 194%printer { fprintf (stderr, "%s", $$->tag); } ID symbol string_as_id
82b248ad
AD
195%type <symbol> ID_COLON
196%printer { fprintf (stderr, "%s:", $$->tag); } ID_COLON
2c569025 197%type <assoc> precedence_declarator
1e0bab92 198%type <list> symbols.1
e9955c83 199%%
2c569025 200
8efe435c 201input:
2c569025 202 declarations "%%" grammar epilogue.opt
e9955c83
AD
203;
204
2c569025
AD
205
206 /*------------------------------------.
207 | Declarations: before the first %%. |
208 `------------------------------------*/
209
210declarations:
e9955c83 211 /* Nothing */
b7295522 212| declarations declaration
e9955c83
AD
213;
214
2c569025
AD
215declaration:
216 grammar_declaration
217| PROLOGUE { prologue_augment ($1, @1); }
d0829076 218| "%debug" { debug_flag = true; }
c66dfadd
PE
219| "%define" string_content
220 {
221 static char one[] = "1";
222 muscle_insert ($2, one);
223 }
e9955c83 224| "%define" string_content string_content { muscle_insert ($2, $3); }
d0829076
PE
225| "%defines" { defines_flag = true; }
226| "%error-verbose" { error_verbose = true; }
d6328241 227| "%expect" INT { expected_sr_conflicts = $2; }
04098407 228| "%expect-rr" INT { expected_rr_conflicts = $2; }
e9955c83 229| "%file-prefix" "=" string_content { spec_file_prefix = $3; }
cd3684cf 230| "%glr-parser"
c66dfadd
PE
231 {
232 nondeterministic_parser = true;
233 glr_parser = true;
234 }
cd3684cf 235| "%initial-action {...}"
c66dfadd
PE
236 {
237 muscle_code_grow ("initial_action", $1, @1);
238 }
e9ce5688 239| "%lex-param {...}" { add_param ("lex_param", $1, @1); }
d0829076 240| "%locations" { locations_flag = true; }
e9955c83 241| "%name-prefix" "=" string_content { spec_name_prefix = $3; }
d0829076 242| "%no-lines" { no_lines_flag = true; }
04098407 243| "%nondeterministic-parser" { nondeterministic_parser = true; }
e9955c83 244| "%output" "=" string_content { spec_outfile = $3; }
cd3684cf 245| "%parse-param {...}" { add_param ("parse_param", $1, @1); }
916708d5 246| "%pure-parser" { pure_parser = true; }
b50d2359 247| "%require" string_content { version_check (&@2, $2); }
e9955c83 248| "%skeleton" string_content { skeleton = $2; }
d0829076 249| "%token-table" { token_table_flag = true; }
9dd5b378 250| "%verbose" { report_flag = report_states; }
d0829076 251| "%yacc" { yacc_flag = true; }
cd3684cf 252| /*FIXME: Err? What is this horror doing here? */ ";"
e9955c83
AD
253;
254
2c569025
AD
255grammar_declaration:
256 precedence_declaration
257| symbol_declaration
e9955c83
AD
258| "%start" symbol
259 {
8efe435c 260 grammar_start_symbol_set ($2, @2);
e9955c83 261 }
e9ce5688 262| "%union {...}"
2c569025 263 {
1221b78a
PE
264 char const *body = $1;
265
266 if (typed)
267 {
268 /* Concatenate the union bodies, turning the first one's
269 trailing '}' into '\n', and omitting the second one's '{'. */
270 char *code = muscle_find ("stype");
271 code[strlen (code) - 1] = '\n';
272 body++;
273 }
274
d0829076 275 typed = true;
1221b78a 276 muscle_code_grow ("stype", body, @1);
2c569025 277 }
e9ce5688 278| "%destructor {...}" symbols.1
9280d3ef 279 {
3d2cbc26 280 symbol_list *list;
e9ce5688
PE
281 for (list = $2; list; list = list->next)
282 symbol_destructor_set (list->sym, $1, @1);
283 symbol_list_free ($2);
9280d3ef 284 }
e9ce5688 285| "%printer {...}" symbols.1
366eea36 286 {
3d2cbc26 287 symbol_list *list;
e9ce5688 288 for (list = $2; list; list = list->next)
e2a8c0f5 289 symbol_printer_set (list->sym, $1, @1);
e9ce5688 290 symbol_list_free ($2);
366eea36 291 }
22fccf95 292| "%default-prec"
39a06c25 293 {
22fccf95
PE
294 default_prec = true;
295 }
296| "%no-default-prec"
297 {
298 default_prec = false;
39a06c25 299 }
2c569025
AD
300;
301
302symbol_declaration:
303 "%nterm" { current_class = nterm_sym; } symbol_defs.1
e9955c83
AD
304 {
305 current_class = unknown_sym;
306 current_type = NULL;
307 }
2c569025 308| "%token" { current_class = token_sym; } symbol_defs.1
e9955c83 309 {
2c569025 310 current_class = unknown_sym;
e9955c83
AD
311 current_type = NULL;
312 }
1e0bab92 313| "%type" TYPE symbols.1
e9955c83 314 {
3d2cbc26 315 symbol_list *list;
1e0bab92 316 for (list = $3; list; list = list->next)
1a31ed21 317 symbol_type_set (list->sym, $2, @2);
dafdc66f 318 symbol_list_free ($3);
e9955c83
AD
319 }
320;
321
2c569025 322precedence_declaration:
1e0bab92
AD
323 precedence_declarator type.opt symbols.1
324 {
3d2cbc26 325 symbol_list *list;
1e0bab92
AD
326 ++current_prec;
327 for (list = $3; list; list = list->next)
328 {
1a31ed21
AD
329 symbol_type_set (list->sym, current_type, @2);
330 symbol_precedence_set (list->sym, current_prec, $1, @1);
1e0bab92 331 }
dafdc66f 332 symbol_list_free ($3);
1e0bab92
AD
333 current_type = NULL;
334 }
e9955c83
AD
335;
336
2c569025 337precedence_declarator:
e9955c83
AD
338 "%left" { $$ = left_assoc; }
339| "%right" { $$ = right_assoc; }
340| "%nonassoc" { $$ = non_assoc; }
341;
342
343type.opt:
87fbb0bf 344 /* Nothing. */ { current_type = NULL; }
e9955c83
AD
345| TYPE { current_type = $1; }
346;
347
348/* One or more nonterminals to be %typed. */
e9955c83 349
1e0bab92
AD
350symbols.1:
351 symbol { $$ = symbol_list_new ($1, @1); }
352| symbols.1 symbol { $$ = symbol_list_prepend ($1, $2, @2); }
e9955c83
AD
353;
354
e9955c83
AD
355/* One token definition. */
356symbol_def:
357 TYPE
358 {
359 current_type = $1;
360 }
361| ID
362 {
073f9288 363 symbol_class_set ($1, current_class, @1, true);
1a31ed21 364 symbol_type_set ($1, current_type, @1);
e9955c83
AD
365 }
366| ID INT
367 {
073f9288 368 symbol_class_set ($1, current_class, @1, true);
1a31ed21 369 symbol_type_set ($1, current_type, @1);
e776192e 370 symbol_user_token_number_set ($1, $2, @2);
e9955c83
AD
371 }
372| ID string_as_id
373 {
073f9288 374 symbol_class_set ($1, current_class, @1, true);
1a31ed21 375 symbol_type_set ($1, current_type, @1);
a5d50994 376 symbol_make_alias ($1, $2, @$);
e9955c83
AD
377 }
378| ID INT string_as_id
379 {
073f9288 380 symbol_class_set ($1, current_class, @1, true);
1a31ed21 381 symbol_type_set ($1, current_type, @1);
e776192e 382 symbol_user_token_number_set ($1, $2, @2);
a5d50994 383 symbol_make_alias ($1, $3, @$);
e9955c83
AD
384 }
385;
386
387/* One or more symbol definitions. */
388symbol_defs.1:
389 symbol_def
e9955c83 390| symbol_defs.1 symbol_def
e9955c83
AD
391;
392
2c569025
AD
393
394 /*------------------------------------------.
395 | The grammar section: between the two %%. |
396 `------------------------------------------*/
397
398grammar:
1921f1d7
AD
399 rules_or_grammar_declaration
400| grammar rules_or_grammar_declaration
401;
402
403/* As a Bison extension, one can use the grammar declarations in the
b7295522 404 body of the grammar. */
1921f1d7 405rules_or_grammar_declaration:
e9955c83 406 rules
8d0a98bb 407| grammar_declaration ";"
b275314e
AD
408| error ";"
409 {
410 yyerrok;
411 }
e9955c83
AD
412;
413
414rules:
b7295522 415 ID_COLON { current_lhs = $1; current_lhs_location = @1; } rhses.1
e9955c83
AD
416;
417
418rhses.1:
8f3596a6
AD
419 rhs { grammar_current_rule_end (@1); }
420| rhses.1 "|" rhs { grammar_current_rule_end (@3); }
8d0a98bb 421| rhses.1 ";"
e9955c83
AD
422;
423
424rhs:
425 /* Nothing. */
8f3596a6 426 { grammar_current_rule_begin (current_lhs, current_lhs_location); }
e9955c83 427| rhs symbol
8efe435c 428 { grammar_current_rule_symbol_append ($2, @2); }
e9955c83 429| rhs action
e9955c83 430| rhs "%prec" symbol
e776192e 431 { grammar_current_rule_prec_set ($3, @3); }
676385e2
PH
432| rhs "%dprec" INT
433 { grammar_current_rule_dprec_set ($3, @3); }
434| rhs "%merge" TYPE
435 { grammar_current_rule_merge_set ($3, @3); }
e9955c83
AD
436;
437
438symbol:
439 ID { $$ = $1; }
440| string_as_id { $$ = $1; }
e9955c83
AD
441;
442
6b702268
PE
443/* Handle the semantics of an action specially, with a mid-rule
444 action, so that grammar_current_rule_action_append is invoked
445 immediately after the braced code is read by the scanner.
446
447 This implementation relies on the LALR(1) parsing algorithm.
448 If grammar_current_rule_action_append were executed in a normal
449 action for this rule, then when the input grammar contains two
450 successive actions, the scanner would have to read both actions
451 before reducing this rule. That wouldn't work, since the scanner
452 relies on all preceding input actions being processed by
453 grammar_current_rule_action_append before it scans the next
454 action. */
e9955c83 455action:
6b702268 456 { grammar_current_rule_action_append (last_string, last_braced_code_loc); }
e9955c83 457 BRACED_CODE
e9955c83
AD
458;
459
ca407bdf 460/* A string used as an ID: quote it. */
e9955c83
AD
461string_as_id:
462 STRING
463 {
ca407bdf 464 $$ = symbol_get (quotearg_style (c_quoting_style, $1), @1);
073f9288 465 symbol_class_set ($$, token_sym, @1, false);
e9955c83
AD
466 }
467;
468
ca407bdf 469/* A string used for its contents. Don't quote it. */
e9955c83
AD
470string_content:
471 STRING
ca407bdf
PE
472 { $$ = $1; }
473;
e9955c83
AD
474
475
476epilogue.opt:
477 /* Nothing. */
e9955c83
AD
478| "%%" EPILOGUE
479 {
cd3684cf 480 muscle_code_grow ("epilogue", $2, @2);
7ec2d4cd 481 scanner_last_string_free ();
e9955c83
AD
482 }
483;
484
e9955c83 485%%
b7295522
PE
486
487
488/* Return the location of the left-hand side of a rule whose
489 right-hand side is RHS[1] ... RHS[N]. Ignore empty nonterminals in
490 the right-hand side, and return an empty location equal to the end
491 boundary of RHS[0] if the right-hand side is empty. */
492
493static YYLTYPE
494lloc_default (YYLTYPE const *rhs, int n)
495{
496 int i;
a737b216 497 YYLTYPE loc;
62cb8a99
PE
498
499 /* SGI MIPSpro 7.4.1m miscompiles "loc.start = loc.end = rhs[n].end;".
500 The bug is fixed in 7.4.2m, but play it safe for now. */
501 loc.start = rhs[n].end;
502 loc.end = rhs[n].end;
b7295522 503
5320ca4d
PE
504 /* Ignore empty nonterminals the start of the the right-hand side.
505 Do not bother to ignore them at the end of the right-hand side,
506 since empty nonterminals have the same end as their predecessors. */
b7295522
PE
507 for (i = 1; i <= n; i++)
508 if (! equal_boundaries (rhs[i].start, rhs[i].end))
509 {
a737b216 510 loc.start = rhs[i].start;
b7295522
PE
511 break;
512 }
513
a737b216 514 return loc;
b7295522
PE
515}
516
517
518/* Add a lex-param or a parse-param (depending on TYPE) with
519 declaration DECL and location LOC. */
520
1773ceee 521static void
e9ce5688 522add_param (char const *type, char *decl, location loc)
1773ceee 523{
ead9e56e 524 static char const alphanum[26 + 26 + 1 + 10] =
1773ceee
PE
525 "abcdefghijklmnopqrstuvwxyz"
526 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
ead9e56e
PE
527 "_"
528 "0123456789";
1773ceee 529 char const *name_start = NULL;
e9ce5688 530 char *p;
1773ceee 531
e503aa60
AD
532 /* Stop on last actual character. */
533 for (p = decl; p[1]; p++)
ead9e56e
PE
534 if ((p == decl
535 || ! memchr (alphanum, p[-1], sizeof alphanum))
536 && memchr (alphanum, p[0], sizeof alphanum - 10))
1773ceee
PE
537 name_start = p;
538
ead9e56e
PE
539 /* Strip the surrounding '{' and '}', and any blanks just inside
540 the braces. */
541 while (*--p == ' ' || *p == '\t')
542 continue;
e503aa60 543 p[1] = '\0';
ead9e56e
PE
544 while (*++decl == ' ' || *decl == '\t')
545 continue;
e9ce5688 546
1773ceee
PE
547 if (! name_start)
548 complain_at (loc, _("missing identifier in parameter declaration"));
549 else
550 {
551 char *name;
552 size_t name_len;
553
554 for (name_len = 1;
ead9e56e 555 memchr (alphanum, name_start[name_len], sizeof alphanum);
1773ceee
PE
556 name_len++)
557 continue;
558
559 name = xmalloc (name_len + 1);
560 memcpy (name, name_start, name_len);
561 name[name_len] = '\0';
562 muscle_pair_list_grow (type, decl, name);
563 free (name);
564 }
565
566 scanner_last_string_free ();
567}
568
b50d2359
AD
569static void
570version_check (location const *loc, char const *version)
571{
572 if (strverscmp (version, PACKAGE_VERSION) > 0)
9b8a5ce0
AD
573 {
574 complain_at (*loc, "require bison %s, but have %s",
575 version, PACKAGE_VERSION);
576 exit (63);
577 }
b50d2359
AD
578}
579
1fec91df 580static void
3d2cbc26 581gram_error (location const *loc, char const *msg)
e9955c83 582{
ad8a3efc 583 complain_at (*loc, "%s", msg);
e9955c83 584}
e9ce5688
PE
585
586char const *
587token_name (int type)
588{
fc01665e 589 return yytname[YYTRANSLATE (type)];
e9ce5688 590}