]> git.saurik.com Git - bison.git/blame - src/parse-gram.y
* m4/cxx.m4 (BISON_TEST_FOR_WORKING_CXX_COMPILER): Check that
[bison.git] / src / parse-gram.y
CommitLineData
12ffdd28 1%{/* Bison Grammar Parser -*- C -*-
a737b216 2
073f9288 3 Copyright (C) 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
e9955c83
AD
4
5 This file is part of Bison, the GNU Compiler Compiler.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
0fb669f9
PE
19 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
20 02110-1301 USA
e9955c83
AD
21*/
22
2cec9080 23#include <config.h>
e9955c83 24#include "system.h"
3d2cbc26 25
b275314e 26#include "complain.h"
3d2cbc26 27#include "conflicts.h"
e9955c83
AD
28#include "files.h"
29#include "getargs.h"
e9955c83 30#include "gram.h"
3d2cbc26
PE
31#include "muscle_tab.h"
32#include "output.h"
ca407bdf 33#include "quotearg.h"
e9955c83 34#include "reader.h"
3d2cbc26 35#include "symlist.h"
b50d2359 36#include "strverscmp.h"
e9955c83 37
b7295522
PE
38#define YYLLOC_DEFAULT(Current, Rhs, N) (Current) = lloc_default (Rhs, N)
39static YYLTYPE lloc_default (YYLTYPE const *, int);
4cdb01db 40
b233d555 41#define YY_LOCATION_PRINT(File, Loc) \
f0064700 42 location_print (File, Loc)
b233d555 43
b50d2359
AD
44static void version_check (location const *loc, char const *version);
45
6e649e65 46/* Request detailed syntax error messages, and pass them to GRAM_ERROR.
ad8a3efc 47 FIXME: depends on the undocumented availability of YYLLOC. */
e9955c83
AD
48#undef yyerror
49#define yyerror(Msg) \
f0064700 50 gram_error (&yylloc, Msg)
3d2cbc26 51static void gram_error (location const *, char const *);
e9955c83 52
e9ce5688 53static void add_param (char const *, char *, location);
1773ceee 54
04098407
PE
55static symbol_class current_class = unknown_sym;
56static uniqstr current_type = 0;
877519f8
PE
57static symbol *current_lhs;
58static location current_lhs_location;
04098407 59static int current_prec = 0;
d42cf844
PE
60
61#ifdef UINT_FAST8_MAX
62# define YYTYPE_UINT8 uint_fast8_t
63#endif
64#ifdef INT_FAST8_MAX
65# define YYTYPE_INT8 int_fast8_t
66#endif
67#ifdef UINT_FAST16_MAX
68# define YYTYPE_UINT16 uint_fast16_t
69#endif
70#ifdef INT_FAST16_MAX
71# define YYTYPE_INT16 int_fast16_t
72#endif
e9955c83
AD
73%}
74
12ffdd28 75%debug
82b248ad 76%verbose
12ffdd28
PE
77%defines
78%locations
79%pure-parser
80%error-verbose
81%defines
82%name-prefix="gram_"
83
cd3684cf
AD
84%initial-action
85{
86 /* Bison's grammar can initial empty locations, hence a default
87 location is needed. */
88 @$.start.file = @$.end.file = current_file;
89 @$.start.line = @$.end.line = 1;
90 @$.start.column = @$.end.column = 0;
91}
e9955c83
AD
92
93/* Only NUMBERS have a value. */
94%union
95{
3d2cbc26
PE
96 symbol *symbol;
97 symbol_list *list;
e9955c83 98 int integer;
3d2cbc26
PE
99 char *chars;
100 assoc assoc;
101 uniqstr uniqstr;
e9955c83
AD
102};
103
f9a85a15 104/* Define the tokens together with their human representation. */
3d38c03a
AD
105%token GRAM_EOF 0 "end of file"
106%token STRING "string"
3d38c03a 107%token INT "integer"
e9955c83 108
9280d3ef
AD
109%token PERCENT_TOKEN "%token"
110%token PERCENT_NTERM "%nterm"
366eea36 111
9280d3ef 112%token PERCENT_TYPE "%type"
e9ce5688
PE
113%token PERCENT_DESTRUCTOR "%destructor {...}"
114%token PERCENT_PRINTER "%printer {...}"
366eea36 115
e9ce5688 116%token PERCENT_UNION "%union {...}"
366eea36 117
9280d3ef
AD
118%token PERCENT_LEFT "%left"
119%token PERCENT_RIGHT "%right"
120%token PERCENT_NONASSOC "%nonassoc"
04e60654 121
3d38c03a
AD
122%token PERCENT_PREC "%prec"
123%token PERCENT_DPREC "%dprec"
124%token PERCENT_MERGE "%merge"
e9955c83 125
e9955c83 126
ae7453f2
AD
127/*----------------------.
128| Global Declarations. |
129`----------------------*/
130
131%token
cd3684cf 132 PERCENT_DEBUG "%debug"
39a06c25 133 PERCENT_DEFAULT_PREC "%default-prec"
cd3684cf
AD
134 PERCENT_DEFINE "%define"
135 PERCENT_DEFINES "%defines"
136 PERCENT_ERROR_VERBOSE "%error-verbose"
137 PERCENT_EXPECT "%expect"
d6328241 138 PERCENT_EXPECT_RR "%expect-rr"
cd3684cf
AD
139 PERCENT_FILE_PREFIX "%file-prefix"
140 PERCENT_GLR_PARSER "%glr-parser"
141 PERCENT_INITIAL_ACTION "%initial-action {...}"
142 PERCENT_LEX_PARAM "%lex-param {...}"
143 PERCENT_LOCATIONS "%locations"
144 PERCENT_NAME_PREFIX "%name-prefix"
22fccf95 145 PERCENT_NO_DEFAULT_PREC "%no-default-prec"
cd3684cf
AD
146 PERCENT_NO_LINES "%no-lines"
147 PERCENT_NONDETERMINISTIC_PARSER
f0064700 148 "%nondeterministic-parser"
cd3684cf
AD
149 PERCENT_OUTPUT "%output"
150 PERCENT_PARSE_PARAM "%parse-param {...}"
151 PERCENT_PURE_PARSER "%pure-parser"
f0064700 152 PERCENT_REQUIRE "%require"
cd3684cf
AD
153 PERCENT_SKELETON "%skeleton"
154 PERCENT_START "%start"
155 PERCENT_TOKEN_TABLE "%token-table"
156 PERCENT_VERBOSE "%verbose"
157 PERCENT_YACC "%yacc"
ae7453f2 158;
e9955c83 159
3d38c03a
AD
160%token TYPE "type"
161%token EQUAL "="
162%token SEMICOLON ";"
3d38c03a
AD
163%token PIPE "|"
164%token ID "identifier"
b7295522 165%token ID_COLON "identifier:"
e9955c83 166%token PERCENT_PERCENT "%%"
3d38c03a
AD
167%token PROLOGUE "%{...%}"
168%token EPILOGUE "epilogue"
169%token BRACED_CODE "{...}"
170
e9955c83 171
3d2cbc26 172%type <chars> STRING string_content
e9ce5688 173 "%destructor {...}"
cd3684cf 174 "%initial-action {...}"
e9ce5688
PE
175 "%lex-param {...}"
176 "%parse-param {...}"
177 "%printer {...}"
178 "%union {...}"
3d2cbc26 179 PROLOGUE EPILOGUE
82b248ad 180%printer { fprintf (stderr, "\"%s\"", $$); }
f0064700 181 STRING string_content
82b248ad
AD
182%printer { fprintf (stderr, "{\n%s\n}", $$); }
183 "%destructor {...}"
184 "%initial-action {...}"
185 "%lex-param {...}"
186 "%parse-param {...}"
187 "%printer {...}"
188 "%union {...}"
82b248ad 189 PROLOGUE EPILOGUE
3d2cbc26 190%type <uniqstr> TYPE
82b248ad 191%printer { fprintf (stderr, "<%s>", $$); } TYPE
e9955c83 192%type <integer> INT
82b248ad
AD
193%printer { fprintf (stderr, "%d", $$); } INT
194%type <symbol> ID symbol string_as_id
2f4f028d 195%printer { fprintf (stderr, "%s", $$->tag); } ID symbol string_as_id
82b248ad
AD
196%type <symbol> ID_COLON
197%printer { fprintf (stderr, "%s:", $$->tag); } ID_COLON
2c569025 198%type <assoc> precedence_declarator
1e0bab92 199%type <list> symbols.1
e9955c83 200%%
2c569025 201
8efe435c 202input:
2c569025 203 declarations "%%" grammar epilogue.opt
e9955c83
AD
204;
205
2c569025
AD
206
207 /*------------------------------------.
208 | Declarations: before the first %%. |
209 `------------------------------------*/
210
211declarations:
e9955c83 212 /* Nothing */
b7295522 213| declarations declaration
e9955c83
AD
214;
215
2c569025
AD
216declaration:
217 grammar_declaration
218| PROLOGUE { prologue_augment ($1, @1); }
d0829076 219| "%debug" { debug_flag = true; }
c66dfadd
PE
220| "%define" string_content
221 {
222 static char one[] = "1";
223 muscle_insert ($2, one);
224 }
e9955c83 225| "%define" string_content string_content { muscle_insert ($2, $3); }
d0829076
PE
226| "%defines" { defines_flag = true; }
227| "%error-verbose" { error_verbose = true; }
d6328241 228| "%expect" INT { expected_sr_conflicts = $2; }
04098407 229| "%expect-rr" INT { expected_rr_conflicts = $2; }
e9955c83 230| "%file-prefix" "=" string_content { spec_file_prefix = $3; }
cd3684cf 231| "%glr-parser"
c66dfadd
PE
232 {
233 nondeterministic_parser = true;
234 glr_parser = true;
235 }
cd3684cf 236| "%initial-action {...}"
c66dfadd
PE
237 {
238 muscle_code_grow ("initial_action", $1, @1);
239 }
e9ce5688 240| "%lex-param {...}" { add_param ("lex_param", $1, @1); }
d0829076 241| "%locations" { locations_flag = true; }
e9955c83 242| "%name-prefix" "=" string_content { spec_name_prefix = $3; }
d0829076 243| "%no-lines" { no_lines_flag = true; }
04098407 244| "%nondeterministic-parser" { nondeterministic_parser = true; }
e9955c83 245| "%output" "=" string_content { spec_outfile = $3; }
cd3684cf 246| "%parse-param {...}" { add_param ("parse_param", $1, @1); }
916708d5 247| "%pure-parser" { pure_parser = true; }
b50d2359 248| "%require" string_content { version_check (&@2, $2); }
e9955c83 249| "%skeleton" string_content { skeleton = $2; }
d0829076 250| "%token-table" { token_table_flag = true; }
9dd5b378 251| "%verbose" { report_flag = report_states; }
d0829076 252| "%yacc" { yacc_flag = true; }
cd3684cf 253| /*FIXME: Err? What is this horror doing here? */ ";"
e9955c83
AD
254;
255
2c569025
AD
256grammar_declaration:
257 precedence_declaration
258| symbol_declaration
e9955c83
AD
259| "%start" symbol
260 {
8efe435c 261 grammar_start_symbol_set ($2, @2);
e9955c83 262 }
e9ce5688 263| "%union {...}"
2c569025 264 {
1221b78a
PE
265 char const *body = $1;
266
267 if (typed)
268 {
269 /* Concatenate the union bodies, turning the first one's
270 trailing '}' into '\n', and omitting the second one's '{'. */
271 char *code = muscle_find ("stype");
272 code[strlen (code) - 1] = '\n';
273 body++;
274 }
275
d0829076 276 typed = true;
1221b78a 277 muscle_code_grow ("stype", body, @1);
2c569025 278 }
e9ce5688 279| "%destructor {...}" symbols.1
9280d3ef 280 {
3d2cbc26 281 symbol_list *list;
e9ce5688
PE
282 for (list = $2; list; list = list->next)
283 symbol_destructor_set (list->sym, $1, @1);
284 symbol_list_free ($2);
9280d3ef 285 }
e9ce5688 286| "%printer {...}" symbols.1
366eea36 287 {
3d2cbc26 288 symbol_list *list;
e9ce5688 289 for (list = $2; list; list = list->next)
e2a8c0f5 290 symbol_printer_set (list->sym, $1, @1);
e9ce5688 291 symbol_list_free ($2);
366eea36 292 }
22fccf95 293| "%default-prec"
39a06c25 294 {
22fccf95
PE
295 default_prec = true;
296 }
297| "%no-default-prec"
298 {
299 default_prec = false;
39a06c25 300 }
2c569025
AD
301;
302
303symbol_declaration:
304 "%nterm" { current_class = nterm_sym; } symbol_defs.1
e9955c83
AD
305 {
306 current_class = unknown_sym;
307 current_type = NULL;
308 }
2c569025 309| "%token" { current_class = token_sym; } symbol_defs.1
e9955c83 310 {
2c569025 311 current_class = unknown_sym;
e9955c83
AD
312 current_type = NULL;
313 }
1e0bab92 314| "%type" TYPE symbols.1
e9955c83 315 {
3d2cbc26 316 symbol_list *list;
1e0bab92 317 for (list = $3; list; list = list->next)
1a31ed21 318 symbol_type_set (list->sym, $2, @2);
dafdc66f 319 symbol_list_free ($3);
e9955c83
AD
320 }
321;
322
2c569025 323precedence_declaration:
1e0bab92
AD
324 precedence_declarator type.opt symbols.1
325 {
3d2cbc26 326 symbol_list *list;
1e0bab92
AD
327 ++current_prec;
328 for (list = $3; list; list = list->next)
329 {
1a31ed21
AD
330 symbol_type_set (list->sym, current_type, @2);
331 symbol_precedence_set (list->sym, current_prec, $1, @1);
1e0bab92 332 }
dafdc66f 333 symbol_list_free ($3);
1e0bab92
AD
334 current_type = NULL;
335 }
e9955c83
AD
336;
337
2c569025 338precedence_declarator:
e9955c83
AD
339 "%left" { $$ = left_assoc; }
340| "%right" { $$ = right_assoc; }
341| "%nonassoc" { $$ = non_assoc; }
342;
343
344type.opt:
87fbb0bf 345 /* Nothing. */ { current_type = NULL; }
e9955c83
AD
346| TYPE { current_type = $1; }
347;
348
349/* One or more nonterminals to be %typed. */
e9955c83 350
1e0bab92
AD
351symbols.1:
352 symbol { $$ = symbol_list_new ($1, @1); }
353| symbols.1 symbol { $$ = symbol_list_prepend ($1, $2, @2); }
e9955c83
AD
354;
355
e9955c83
AD
356/* One token definition. */
357symbol_def:
358 TYPE
359 {
360 current_type = $1;
361 }
362| ID
363 {
073f9288 364 symbol_class_set ($1, current_class, @1, true);
1a31ed21 365 symbol_type_set ($1, current_type, @1);
e9955c83
AD
366 }
367| ID INT
368 {
073f9288 369 symbol_class_set ($1, current_class, @1, true);
1a31ed21 370 symbol_type_set ($1, current_type, @1);
e776192e 371 symbol_user_token_number_set ($1, $2, @2);
e9955c83
AD
372 }
373| ID string_as_id
374 {
073f9288 375 symbol_class_set ($1, current_class, @1, true);
1a31ed21 376 symbol_type_set ($1, current_type, @1);
a5d50994 377 symbol_make_alias ($1, $2, @$);
e9955c83
AD
378 }
379| ID INT string_as_id
380 {
073f9288 381 symbol_class_set ($1, current_class, @1, true);
1a31ed21 382 symbol_type_set ($1, current_type, @1);
e776192e 383 symbol_user_token_number_set ($1, $2, @2);
a5d50994 384 symbol_make_alias ($1, $3, @$);
e9955c83
AD
385 }
386;
387
388/* One or more symbol definitions. */
389symbol_defs.1:
390 symbol_def
e9955c83 391| symbol_defs.1 symbol_def
e9955c83
AD
392;
393
2c569025
AD
394
395 /*------------------------------------------.
396 | The grammar section: between the two %%. |
397 `------------------------------------------*/
398
399grammar:
1921f1d7
AD
400 rules_or_grammar_declaration
401| grammar rules_or_grammar_declaration
402;
403
404/* As a Bison extension, one can use the grammar declarations in the
b7295522 405 body of the grammar. */
1921f1d7 406rules_or_grammar_declaration:
e9955c83 407 rules
8d0a98bb 408| grammar_declaration ";"
b275314e
AD
409| error ";"
410 {
411 yyerrok;
412 }
e9955c83
AD
413;
414
415rules:
b7295522 416 ID_COLON { current_lhs = $1; current_lhs_location = @1; } rhses.1
e9955c83
AD
417;
418
419rhses.1:
8f3596a6
AD
420 rhs { grammar_current_rule_end (@1); }
421| rhses.1 "|" rhs { grammar_current_rule_end (@3); }
8d0a98bb 422| rhses.1 ";"
e9955c83
AD
423;
424
425rhs:
426 /* Nothing. */
8f3596a6 427 { grammar_current_rule_begin (current_lhs, current_lhs_location); }
e9955c83 428| rhs symbol
8efe435c 429 { grammar_current_rule_symbol_append ($2, @2); }
e9955c83 430| rhs action
e9955c83 431| rhs "%prec" symbol
e776192e 432 { grammar_current_rule_prec_set ($3, @3); }
676385e2
PH
433| rhs "%dprec" INT
434 { grammar_current_rule_dprec_set ($3, @3); }
435| rhs "%merge" TYPE
436 { grammar_current_rule_merge_set ($3, @3); }
e9955c83
AD
437;
438
439symbol:
440 ID { $$ = $1; }
441| string_as_id { $$ = $1; }
e9955c83
AD
442;
443
6b702268
PE
444/* Handle the semantics of an action specially, with a mid-rule
445 action, so that grammar_current_rule_action_append is invoked
446 immediately after the braced code is read by the scanner.
447
448 This implementation relies on the LALR(1) parsing algorithm.
449 If grammar_current_rule_action_append were executed in a normal
450 action for this rule, then when the input grammar contains two
451 successive actions, the scanner would have to read both actions
452 before reducing this rule. That wouldn't work, since the scanner
453 relies on all preceding input actions being processed by
454 grammar_current_rule_action_append before it scans the next
455 action. */
e9955c83 456action:
6b702268 457 { grammar_current_rule_action_append (last_string, last_braced_code_loc); }
e9955c83 458 BRACED_CODE
e9955c83
AD
459;
460
ca407bdf 461/* A string used as an ID: quote it. */
e9955c83
AD
462string_as_id:
463 STRING
464 {
ca407bdf 465 $$ = symbol_get (quotearg_style (c_quoting_style, $1), @1);
073f9288 466 symbol_class_set ($$, token_sym, @1, false);
e9955c83
AD
467 }
468;
469
ca407bdf 470/* A string used for its contents. Don't quote it. */
e9955c83
AD
471string_content:
472 STRING
ca407bdf
PE
473 { $$ = $1; }
474;
e9955c83
AD
475
476
477epilogue.opt:
478 /* Nothing. */
e9955c83
AD
479| "%%" EPILOGUE
480 {
cd3684cf 481 muscle_code_grow ("epilogue", $2, @2);
7ec2d4cd 482 scanner_last_string_free ();
e9955c83
AD
483 }
484;
485
e9955c83 486%%
b7295522
PE
487
488
489/* Return the location of the left-hand side of a rule whose
490 right-hand side is RHS[1] ... RHS[N]. Ignore empty nonterminals in
491 the right-hand side, and return an empty location equal to the end
492 boundary of RHS[0] if the right-hand side is empty. */
493
494static YYLTYPE
495lloc_default (YYLTYPE const *rhs, int n)
496{
497 int i;
a737b216 498 YYLTYPE loc;
62cb8a99
PE
499
500 /* SGI MIPSpro 7.4.1m miscompiles "loc.start = loc.end = rhs[n].end;".
501 The bug is fixed in 7.4.2m, but play it safe for now. */
502 loc.start = rhs[n].end;
503 loc.end = rhs[n].end;
b7295522 504
5320ca4d
PE
505 /* Ignore empty nonterminals the start of the the right-hand side.
506 Do not bother to ignore them at the end of the right-hand side,
507 since empty nonterminals have the same end as their predecessors. */
b7295522
PE
508 for (i = 1; i <= n; i++)
509 if (! equal_boundaries (rhs[i].start, rhs[i].end))
510 {
a737b216 511 loc.start = rhs[i].start;
b7295522
PE
512 break;
513 }
514
a737b216 515 return loc;
b7295522
PE
516}
517
518
519/* Add a lex-param or a parse-param (depending on TYPE) with
520 declaration DECL and location LOC. */
521
1773ceee 522static void
e9ce5688 523add_param (char const *type, char *decl, location loc)
1773ceee 524{
ead9e56e 525 static char const alphanum[26 + 26 + 1 + 10] =
1773ceee
PE
526 "abcdefghijklmnopqrstuvwxyz"
527 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
ead9e56e
PE
528 "_"
529 "0123456789";
1773ceee 530 char const *name_start = NULL;
e9ce5688 531 char *p;
1773ceee 532
e503aa60
AD
533 /* Stop on last actual character. */
534 for (p = decl; p[1]; p++)
ead9e56e
PE
535 if ((p == decl
536 || ! memchr (alphanum, p[-1], sizeof alphanum))
537 && memchr (alphanum, p[0], sizeof alphanum - 10))
1773ceee
PE
538 name_start = p;
539
ead9e56e
PE
540 /* Strip the surrounding '{' and '}', and any blanks just inside
541 the braces. */
542 while (*--p == ' ' || *p == '\t')
543 continue;
e503aa60 544 p[1] = '\0';
ead9e56e
PE
545 while (*++decl == ' ' || *decl == '\t')
546 continue;
e9ce5688 547
1773ceee
PE
548 if (! name_start)
549 complain_at (loc, _("missing identifier in parameter declaration"));
550 else
551 {
552 char *name;
553 size_t name_len;
554
555 for (name_len = 1;
ead9e56e 556 memchr (alphanum, name_start[name_len], sizeof alphanum);
1773ceee
PE
557 name_len++)
558 continue;
559
560 name = xmalloc (name_len + 1);
561 memcpy (name, name_start, name_len);
562 name[name_len] = '\0';
563 muscle_pair_list_grow (type, decl, name);
564 free (name);
565 }
566
567 scanner_last_string_free ();
568}
569
b50d2359
AD
570static void
571version_check (location const *loc, char const *version)
572{
573 if (strverscmp (version, PACKAGE_VERSION) > 0)
9b8a5ce0
AD
574 {
575 complain_at (*loc, "require bison %s, but have %s",
576 version, PACKAGE_VERSION);
577 exit (63);
578 }
b50d2359
AD
579}
580
1fec91df 581static void
3d2cbc26 582gram_error (location const *loc, char const *msg)
e9955c83 583{
ad8a3efc 584 complain_at (*loc, "%s", msg);
e9955c83 585}
e9ce5688
PE
586
587char const *
588token_name (int type)
589{
fc01665e 590 return yytname[YYTRANSLATE (type)];
e9ce5688 591}