]> git.saurik.com Git - bison.git/blame - src/scan-gram.l
and Akim Demaille <akim@epita.fr>
[bison.git] / src / scan-gram.l
CommitLineData
e9955c83
AD
1/* Bison Grammar Scanner -*- C -*-
2 Copyright (C) 2002 Free Software Foundation, Inc.
3
4 This file is part of Bison, the GNU Compiler Compiler.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA
20*/
21
22%option debug nodefault noyywrap nounput never-interactive stack
23%option prefix="gram_" outfile="lex.yy.c"
24
25%{
26#include "system.h"
27#include "complain.h"
28#include "quote.h"
29#include "getargs.h"
30#include "gram.h"
31#include "reader.h"
32
33/* Each time we match a string, move the end cursor to its end. */
8efe435c
AD
34#define YY_USER_INIT \
35do { \
36 LOCATION_RESET (*yylloc); \
37 /* This is only to avoid GCC warnings. */ \
38 if (yycontrol) {;}; \
39} while (0)
40
e9955c83
AD
41#define YY_USER_ACTION LOCATION_COLUMNS (*yylloc, yyleng)
42#define YY_LINES LOCATION_LINES (*yylloc, yyleng); lineno += yyleng;
43#define YY_STEP LOCATION_STEP (*yylloc)
44
44995b2e
AD
45
46/* STRING_OBSTACK -- Used to store all the characters that we need to
47 keep (to construct ID, STRINGS etc.). Use the following macros to
48 use it.
49
1d6412ad
AD
50 Use YY_OBS_GROW to append what has just been matched, and
51 YY_OBS_FINISH to end the string (it puts the ending 0).
52 YY_OBS_FINISH also stores this string in LAST_STRING, which can be
53 used, and which is used by YY_OBS_FREE to free the last string. */
44995b2e
AD
54
55static struct obstack string_obstack;
56char *last_string;
57
44995b2e
AD
58#define YY_OBS_GROW \
59 obstack_grow (&string_obstack, yytext, yyleng)
60
61#define YY_OBS_FINISH \
62 do { \
63 obstack_1grow (&string_obstack, '\0'); \
64 last_string = obstack_finish (&string_obstack); \
44995b2e
AD
65 } while (0)
66
67#define YY_OBS_FREE \
68 do { \
69 obstack_free (&string_obstack, last_string); \
70 } while (0)
e9955c83 71
4cdb01db
AD
72void
73scanner_last_string_free (void)
74{
75 YY_OBS_FREE;
76}
77
78
44995b2e 79
e9955c83
AD
80static int braces_level = 0;
81static int percent_percent_count = 0;
82
9280d3ef
AD
83static void handle_action_dollar PARAMS ((char *cp, location_t location));
84static void handle_destructor_dollar PARAMS ((char *cp, location_t location));
e9955c83
AD
85static void handle_at PARAMS ((char *cp));
86
87%}
88%x SC_COMMENT
89%x SC_STRING SC_CHARACTER
90%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
91%x SC_BRACED_CODE SC_PROLOGUE SC_EPILOGUE
92
93id [.a-zA-Z][.a-zA-Z_0-9]*
94int [0-9]+
95eols (\n|\r|\n\r|\r\n)+
96blanks [ \t\f]+
97
98%%
99%{
100 /* At each yylex invocation, mark the current position as the
101 start of the next token. */
102#define TR_POS 0
103#if TR_POS
8efe435c 104 fprintf (stderr, "FOO1: %p: ", yylloc);
e9955c83
AD
105 LOCATION_PRINT (stderr, *yylloc);
106 fprintf (stderr, "\n");
107#endif
108 YY_STEP;
109#if TR_POS
110 fprintf (stderr, "BAR1: ");
111 LOCATION_PRINT (stderr, *yylloc);
112 fprintf (stderr, "\n");
113#endif
114%}
115
116
117 /*----------------------------.
118 | Scanning Bison directives. |
119 `----------------------------*/
120<INITIAL>
121{
122 "%binary" return PERCENT_NONASSOC;
123 "%debug" return PERCENT_DEBUG;
124 "%define" return PERCENT_DEFINE;
125 "%defines" return PERCENT_DEFINES;
9280d3ef 126 "%destructor" return PERCENT_DESTRUCTOR;
e9955c83
AD
127 "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
128 "%expect" return PERCENT_EXPECT;
129 "%file-prefix" return PERCENT_FILE_PREFIX;
130 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
131 "%left" return PERCENT_LEFT;
132 "%locations" return PERCENT_LOCATIONS;
133 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
134 "%no"[-_]"lines" return PERCENT_NO_LINES;
135 "%nonassoc" return PERCENT_NONASSOC;
136 "%nterm" return PERCENT_NTERM;
137 "%output" return PERCENT_OUTPUT;
138 "%prec" return PERCENT_PREC;
139 "%pure"[-_]"parser" return PERCENT_PURE_PARSER;
140 "%right" return PERCENT_RIGHT;
141 "%skeleton" return PERCENT_SKELETON;
142 "%start" return PERCENT_START;
143 "%term" return PERCENT_TOKEN;
144 "%token" return PERCENT_TOKEN;
145 "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
146 "%type" return PERCENT_TYPE;
147 "%union" return PERCENT_UNION;
148 "%verbose" return PERCENT_VERBOSE;
149 "%yacc" return PERCENT_YACC;
150
151 "=" return EQUAL;
152 ":" return COLON;
153 "|" return PIPE;
154 ";" return SEMICOLON;
155
156 {eols} YY_LINES; YY_STEP;
157 {blanks} YY_STEP;
158 {id} {
ee000ba4 159 yylval->symbol = getsym (yytext, *yylloc);
e9955c83
AD
160 return ID;
161 }
162
163 {int} yylval->integer = strtol (yytext, 0, 10); return INT;
164
165 /* Characters. We don't check there is only one. */
1d6412ad 166 \' YY_OBS_GROW; yy_push_state (SC_ESCAPED_CHARACTER);
e9955c83
AD
167
168 /* Strings. */
1d6412ad 169 \" YY_OBS_GROW; yy_push_state (SC_ESCAPED_STRING);
e9955c83
AD
170
171 /* Comments. */
172 "/*" yy_push_state (SC_COMMENT);
173 "//".* YY_STEP;
174
175 /* Prologue. */
1d6412ad 176 "%{" yy_push_state (SC_PROLOGUE);
e9955c83
AD
177
178 /* Code in between braces. */
1d6412ad 179 "{" YY_OBS_GROW; ++braces_level; yy_push_state (SC_BRACED_CODE);
e9955c83
AD
180
181 /* A type. */
4cdb01db 182 "<"[^>]+">" {
4cdb01db
AD
183 obstack_grow (&string_obstack, yytext + 1, yyleng - 2);
184 YY_OBS_FINISH;
185 yylval->string = last_string;
186 return TYPE;
187 }
188
e9955c83
AD
189
190 "%%" {
191 if (++percent_percent_count == 2)
192 yy_push_state (SC_EPILOGUE);
193 return PERCENT_PERCENT;
194 }
195
196 . {
197 LOCATION_PRINT (stderr, *yylloc);
198 fprintf (stderr, ": invalid character: `%c'\n", *yytext);
199 YY_STEP;
200 }
201}
202
203
204 /*------------------------------------------------------------.
205 | Whatever the start condition (but those which correspond to |
206 | entity `swallowed' by Bison: SC_ESCAPED_STRING and |
207 | SC_ESCAPED_CHARACTER), no M4 character must escape as is. |
208 `------------------------------------------------------------*/
209
210<SC_COMMENT,SC_STRING,SC_CHARACTER,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
211{
1d6412ad
AD
212 \[ if (YY_START != SC_COMMENT) obstack_sgrow (&string_obstack, "@<:@");
213 \] if (YY_START != SC_COMMENT) obstack_sgrow (&string_obstack, "@:>@");
e9955c83
AD
214}
215
216
217
218 /*-----------------------------------------------------------.
219 | Scanning a C comment. The initial `/ *' is already eaten. |
220 `-----------------------------------------------------------*/
221
222<SC_COMMENT>
223{
224 "*/" { /* End of the comment. */
225 if (yy_top_state () == INITIAL)
226 {
227 YY_STEP;
228 }
229 else
230 {
44995b2e 231 YY_OBS_GROW;
e9955c83
AD
232 }
233 yy_pop_state ();
234 }
235
44995b2e
AD
236 [^\[\]*\n\r]+ if (yy_top_state () != INITIAL) YY_OBS_GROW;
237 {eols} if (yy_top_state () != INITIAL) YY_OBS_GROW; YY_LINES;
238 . /* Stray `*'. */if (yy_top_state () != INITIAL) YY_OBS_GROW;
e9955c83
AD
239
240 <<EOF>> {
241 LOCATION_PRINT (stderr, *yylloc);
242 fprintf (stderr, ": unexpected end of file in a comment\n");
243 yy_pop_state ();
244 }
245}
246
247
248 /*----------------------------------------------------------------.
249 | Scanning a C string, including its escapes. The initial `"' is |
250 | already eaten. |
251 `----------------------------------------------------------------*/
252
253<SC_ESCAPED_STRING>
254{
255 \" {
256 assert (yy_top_state () == INITIAL);
44995b2e
AD
257 YY_OBS_GROW;
258 YY_OBS_FINISH;
4cdb01db 259 yylval->string = last_string;
e9955c83
AD
260 yy_pop_state ();
261 return STRING;
262 }
263
44995b2e 264 [^\"\n\r\\]+ YY_OBS_GROW;
e9955c83
AD
265
266 {eols} obstack_1grow (&string_obstack, '\n'); YY_LINES;
267
268 <<EOF>> {
269 LOCATION_PRINT (stderr, *yylloc);
270 fprintf (stderr, ": unexpected end of file in a string\n");
271 assert (yy_top_state () == INITIAL);
44995b2e 272 YY_OBS_FINISH;
4cdb01db 273 yylval->string = last_string;
e9955c83
AD
274 yy_pop_state ();
275 return STRING;
276 }
277}
278
279 /*---------------------------------------------------------------.
280 | Scanning a C character, decoding its escapes. The initial "'" |
281 | is already eaten. |
282 `---------------------------------------------------------------*/
283
284<SC_ESCAPED_CHARACTER>
285{
286 \' {
44995b2e 287 YY_OBS_GROW;
e9955c83
AD
288 assert (yy_top_state () == INITIAL);
289 {
44995b2e 290 YY_OBS_FINISH;
ee000ba4 291 yylval->symbol = getsym (last_string, *yylloc);
e9955c83 292 symbol_class_set (yylval->symbol, token_sym);
44995b2e
AD
293 symbol_user_token_number_set (yylval->symbol, last_string[1]);
294 YY_OBS_FREE;
e9955c83
AD
295 yy_pop_state ();
296 return ID;
297 }
298 }
299
44995b2e 300 [^\'\n\r\\] YY_OBS_GROW;
e9955c83
AD
301
302 {eols} obstack_1grow (&string_obstack, '\n'); YY_LINES;
303
304 <<EOF>> {
305 LOCATION_PRINT (stderr, *yylloc);
306 fprintf (stderr, ": unexpected end of file in a character\n");
307 assert (yy_top_state () == INITIAL);
44995b2e 308 YY_OBS_FINISH;
4cdb01db 309 yylval->string = last_string;
e9955c83
AD
310 yy_pop_state ();
311 return CHARACTER;
312 }
313}
314
315
316 /*----------------------------.
317 | Decode escaped characters. |
318 `----------------------------*/
319
320<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
321{
322 \\[0-7]{3} {
323 long c = strtol (yytext + 1, 0, 8);
324 if (c > 255)
325 {
326 LOCATION_PRINT (stderr, *yylloc);
327 fprintf (stderr, ": invalid escape: %s\n", yytext);
328 YY_STEP;
329 }
330 else
331 obstack_1grow (&string_obstack, c);
332 }
333
334 \\x[0-9a-fA-F]{2} {
335 obstack_1grow (&string_obstack, strtol (yytext + 2, 0, 16));
336 }
337
338 \\a obstack_1grow (&string_obstack, '\a');
339 \\b obstack_1grow (&string_obstack, '\b');
340 \\f obstack_1grow (&string_obstack, '\f');
341 \\n obstack_1grow (&string_obstack, '\n');
342 \\r obstack_1grow (&string_obstack, '\r');
343 \\t obstack_1grow (&string_obstack, '\t');
344 \\v obstack_1grow (&string_obstack, '\v');
345 \\[\\""] obstack_1grow (&string_obstack, yytext[1]);
346 \\. {
347 LOCATION_PRINT (stderr, *yylloc);
348 fprintf (stderr, ": unrecognized escape: %s\n", yytext);
44995b2e 349 YY_OBS_GROW;
e9955c83
AD
350 }
351}
352
353
354 /*----------------------------------------------------------.
355 | Scanning a C character without decoding its escapes. The |
356 | initial "'" is already eaten. |
357 `----------------------------------------------------------*/
358
359<SC_CHARACTER>
360{
361 \' {
44995b2e 362 YY_OBS_GROW;
e9955c83
AD
363 assert (yy_top_state () != INITIAL);
364 yy_pop_state ();
365 }
366
44995b2e
AD
367 [^\[\]\'\n\r\\] YY_OBS_GROW;
368 \\. YY_OBS_GROW;
e9955c83 369
44995b2e 370 {eols} YY_OBS_GROW; YY_LINES;
e9955c83
AD
371
372 <<EOF>> {
373 LOCATION_PRINT (stderr, *yylloc);
374 fprintf (stderr, ": unexpected end of file in a character\n");
375 assert (yy_top_state () != INITIAL);
376 yy_pop_state ();
377 }
378}
379
380
381 /*----------------------------------------------------------------.
382 | Scanning a C string, without decoding its escapes. The initial |
383 | `"' is already eaten. |
384 `----------------------------------------------------------------*/
385
386<SC_STRING>
387{
388 \" {
389 assert (yy_top_state () != INITIAL);
44995b2e 390 YY_OBS_GROW;
e9955c83
AD
391 yy_pop_state ();
392 }
393
44995b2e
AD
394 [^\[\]\"\n\r\\]+ YY_OBS_GROW;
395 \\. YY_OBS_GROW;
e9955c83 396
44995b2e 397 {eols} YY_OBS_GROW; YY_LINES;
e9955c83
AD
398
399 <<EOF>> {
400 LOCATION_PRINT (stderr, *yylloc);
401 fprintf (stderr, ": unexpected end of file in a string\n");
402 assert (yy_top_state () != INITIAL);
403 yy_pop_state ();
404 }
405}
406
407
408 /*---------------------------------------------------.
409 | Strings, comments etc. can be found in user code. |
410 `---------------------------------------------------*/
411
412<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
413{
414 /* Characters. We don't check there is only one. */
44995b2e 415 \' YY_OBS_GROW; yy_push_state (SC_CHARACTER);
e9955c83
AD
416
417 /* Strings. */
44995b2e 418 \" YY_OBS_GROW; yy_push_state (SC_STRING);
e9955c83
AD
419
420 /* Comments. */
44995b2e
AD
421 "/*" YY_OBS_GROW; yy_push_state (SC_COMMENT);
422 "//".* YY_OBS_GROW;
e9955c83
AD
423}
424
425
426 /*---------------------------------------------------------------.
427 | Scanning some code in braces (%union and actions). The initial |
428 | "{" is already eaten. |
429 `---------------------------------------------------------------*/
430
431<SC_BRACED_CODE>
432{
433 "}" {
44995b2e 434 YY_OBS_GROW;
e9955c83
AD
435 if (--braces_level == 0)
436 {
437 yy_pop_state ();
44995b2e 438 YY_OBS_FINISH;
4cdb01db 439 yylval->string = last_string;
e9955c83
AD
440 return BRACED_CODE;
441 }
442 }
443
44995b2e 444 "{" YY_OBS_GROW; braces_level++;
e9955c83 445
9280d3ef
AD
446 "$"("<"[^>]+">")?(-?[0-9]+|"$") {
447 switch (current_braced_code)
448 {
449 case action_braced_code:
450 handle_action_dollar (yytext, *yylloc);
451 break;
452
453 case destructor_braced_code:
454 handle_destructor_dollar (yytext, *yylloc);
455 break;
456 }
457 }
458 "@"(-?[0-9]+|"$") { handle_at (yytext); }
e9955c83 459
6c35d22c 460 [^$@\[\]/\'\"\{\}\n\r]+ YY_OBS_GROW;
44995b2e 461 {eols} YY_OBS_GROW; YY_LINES;
e9955c83
AD
462
463 /* A lose $, or /, or etc. */
44995b2e 464 . YY_OBS_GROW;
e9955c83
AD
465
466 <<EOF>> {
467 LOCATION_PRINT (stderr, *yylloc);
468 fprintf (stderr, ": unexpected end of file in a braced code\n");
469 yy_pop_state ();
44995b2e 470 YY_OBS_FINISH;
4cdb01db
AD
471 yylval->string = last_string;
472 return BRACED_CODE;
e9955c83
AD
473 }
474
475}
476
477
478 /*--------------------------------------------------------------.
479 | Scanning some prologue: from "%{" (already scanned) to "%}". |
480 `--------------------------------------------------------------*/
481
482<SC_PROLOGUE>
483{
484 "%}" {
485 yy_pop_state ();
44995b2e 486 YY_OBS_FINISH;
4cdb01db 487 yylval->string = last_string;
e9955c83
AD
488 return PROLOGUE;
489 }
490
6c35d22c 491 [^%\[\]/\'\"\n\r]+ YY_OBS_GROW;
44995b2e
AD
492 "%"+[^%\}\n\r]+ YY_OBS_GROW;
493 {eols} YY_OBS_GROW; YY_LINES;
e9955c83
AD
494
495 <<EOF>> {
496 LOCATION_PRINT (stderr, *yylloc);
497 fprintf (stderr, ": unexpected end of file in a prologue\n");
498 yy_pop_state ();
44995b2e 499 YY_OBS_FINISH;
4cdb01db 500 yylval->string = last_string;
e9955c83
AD
501 return PROLOGUE;
502 }
503
504}
505
506
507 /*---------------------------------------------------------------.
508 | Scanning the epilogue (everything after the second "%%", which |
509 | has already been eaten. |
510 `---------------------------------------------------------------*/
511
512<SC_EPILOGUE>
513{
44995b2e 514 ([^\[\]]|{eols})+ YY_OBS_GROW;
e9955c83
AD
515
516 <<EOF>> {
517 yy_pop_state ();
44995b2e 518 YY_OBS_FINISH;
4cdb01db 519 yylval->string = last_string;
e9955c83
AD
520 return EPILOGUE;
521 }
522}
523
524
525%%
526
527/*------------------------------------------------------------------.
528| CP is pointing to a wannabee semantic value (i.e., a `$'). |
529| |
530| Possible inputs: $[<TYPENAME>]($|integer) |
531| |
532| Output to the STRING_OBSTACK a reference to this semantic value. |
533`------------------------------------------------------------------*/
534
535static void
9280d3ef 536handle_action_dollar (char *cp, location_t location)
e9955c83
AD
537{
538 const char *type_name = NULL;
539
e9955c83
AD
540 ++cp;
541
542 /* Get the type name if explicit. */
543 if (*cp == '<')
544 {
545 type_name = ++cp;
546 while (*cp != '>')
547 ++cp;
548 *cp = '\0';
549 ++cp;
550 }
551
552 if (*cp == '$')
553 {
554 if (!type_name)
56c47203 555 type_name = symbol_list_n_type_name_get (current_rule, location, 0);
e9955c83 556 if (!type_name && typed)
56c47203
AD
557 complain_at (location, _("$$ of `%s' has no declared type"),
558 current_rule->sym->tag);
e9955c83
AD
559 if (!type_name)
560 type_name = "";
561 obstack_fgrow1 (&string_obstack,
562 "]b4_lhs_value([%s])[", type_name);
563 }
564 else if (isdigit (*cp) || *cp == '-')
565 {
dafdc66f
AD
566 /* RULE_LENGTH is the number of values in the current rule so
567 far, which says where to find `$0' with respect to the top of
568 the stack. It is not the same as the rule->length in the
569 case of mid rule actions. */
570 int rule_length = symbol_list_length (current_rule->next);
e9955c83
AD
571 int n = strtol (cp, &cp, 10);
572
573 if (n > rule_length)
56c47203 574 complain_at (location, _("invalid value: %s%d"), "$", n);
e9955c83
AD
575 else
576 {
577 if (!type_name && n > 0)
56c47203
AD
578 type_name = symbol_list_n_type_name_get (current_rule, location,
579 n);
e9955c83 580 if (!type_name && typed)
56c47203 581 complain_at (location, _("$%d of `%s' has no declared type"),
e9955c83
AD
582 n, current_rule->sym->tag);
583 if (!type_name)
584 type_name = "";
585 obstack_fgrow3 (&string_obstack,
586 "]b4_rhs_value([%d], [%d], [%s])[",
587 rule_length, n, type_name);
588 }
589 }
590 else
591 {
592 char buf[] = "$c";
593 buf[1] = *cp;
9280d3ef
AD
594 complain_at (location, _("%s is invalid"), quote (buf));
595 }
596}
597
598
599/*---------------------------------------------------------------.
600| CP is pointing to $$ in a destructor. This should probably be |
601| done once the grammar completely parsed, instead of during its |
602| parsing, since that means %type must be specified before |
603| %destructor. |
604`---------------------------------------------------------------*/
605
606static void
607handle_destructor_dollar (char *cp, location_t location)
608{
609 ++cp;
610 if (*cp == '$')
611 {
612 /* FIXME: We should find something more robust. */
613 obstack_sgrow (&string_obstack, "b4_dollar_dollar");
614 }
615 else
616 {
617 char buf[] = "$c";
618 buf[1] = *cp;
619 complain_at (location, _("%s is invalid"), quote (buf));
e9955c83
AD
620 }
621}
622
623/*-------------------------------------------------------.
624| CP is pointing to a location (i.e., a `@'). Output to |
625| STRING_OBSTACK a reference to this location. |
626`-------------------------------------------------------*/
627
628static void
629handle_at (char *cp)
630{
e9955c83
AD
631 locations_flag = 1;
632 ++cp;
633
634 if (*cp == '$')
635 {
636 obstack_sgrow (&string_obstack, "]b4_lhs_location[");
637 }
638 else if (isdigit (*cp) || *cp == '-')
639 {
dafdc66f
AD
640 /* RULE_LENGTH is the number of values in the current rule so
641 far, which says where to find `$0' with respect to the top of
642 the stack. It is not the same as the rule->length in the
643 case of mid rule actions. */
644 int rule_length = symbol_list_length (current_rule->next);
e9955c83 645 int n = strtol (cp, &cp, 10);
dafdc66f 646
e9955c83
AD
647 if (n > rule_length)
648 complain (_("invalid value: %s%d"), "@", n);
649 else
650 obstack_fgrow2 (&string_obstack, "]b4_rhs_location([%d], [%d])[",
651 rule_length, n);
652 }
653 else
654 {
655 char buf[] = "@c";
656 buf[1] = *cp;
657 complain (_("%s is invalid"), quote (buf));
658 }
659}
4cdb01db 660
1d6412ad
AD
661void
662scanner_initialize (void)
663{
664 obstack_init (&string_obstack);
665}
666
667
4cdb01db
AD
668void
669scanner_free (void)
670{
671 obstack_free (&string_obstack, 0);
672}