]> git.saurik.com Git - bison.git/blame - src/scan-gram.l
* src/scan-gram.l (SC_PROLOGUE): Don't eat characters amongst
[bison.git] / src / scan-gram.l
CommitLineData
e9955c83
AD
1/* Bison Grammar Scanner -*- C -*-
2 Copyright (C) 2002 Free Software Foundation, Inc.
3
4 This file is part of Bison, the GNU Compiler Compiler.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA
20*/
21
22%option debug nodefault noyywrap nounput never-interactive stack
23%option prefix="gram_" outfile="lex.yy.c"
24
25%{
26#include "system.h"
27#include "complain.h"
28#include "quote.h"
29#include "getargs.h"
30#include "gram.h"
31#include "reader.h"
32
33/* Each time we match a string, move the end cursor to its end. */
34#define YY_USER_ACTION LOCATION_COLUMNS (*yylloc, yyleng)
35#define YY_LINES LOCATION_LINES (*yylloc, yyleng); lineno += yyleng;
36#define YY_STEP LOCATION_STEP (*yylloc)
37
44995b2e
AD
38
39/* STRING_OBSTACK -- Used to store all the characters that we need to
40 keep (to construct ID, STRINGS etc.). Use the following macros to
41 use it.
42
1d6412ad
AD
43 Use YY_OBS_GROW to append what has just been matched, and
44 YY_OBS_FINISH to end the string (it puts the ending 0).
45 YY_OBS_FINISH also stores this string in LAST_STRING, which can be
46 used, and which is used by YY_OBS_FREE to free the last string. */
44995b2e
AD
47
48static struct obstack string_obstack;
49char *last_string;
50
44995b2e
AD
51#define YY_OBS_GROW \
52 obstack_grow (&string_obstack, yytext, yyleng)
53
54#define YY_OBS_FINISH \
55 do { \
56 obstack_1grow (&string_obstack, '\0'); \
57 last_string = obstack_finish (&string_obstack); \
44995b2e
AD
58 } while (0)
59
60#define YY_OBS_FREE \
61 do { \
62 obstack_free (&string_obstack, last_string); \
63 } while (0)
e9955c83 64
4cdb01db
AD
65void
66scanner_last_string_free (void)
67{
68 YY_OBS_FREE;
69}
70
71
e9955c83
AD
72/* This is only to avoid GCC warnings. */
73#define YY_USER_INIT if (yycontrol) {;};
74
44995b2e 75
e9955c83
AD
76static int braces_level = 0;
77static int percent_percent_count = 0;
78
79static void handle_dollar PARAMS ((char *cp));
80static void handle_at PARAMS ((char *cp));
81
82%}
83%x SC_COMMENT
84%x SC_STRING SC_CHARACTER
85%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
86%x SC_BRACED_CODE SC_PROLOGUE SC_EPILOGUE
87
88id [.a-zA-Z][.a-zA-Z_0-9]*
89int [0-9]+
90eols (\n|\r|\n\r|\r\n)+
91blanks [ \t\f]+
92
93%%
94%{
95 /* At each yylex invocation, mark the current position as the
96 start of the next token. */
97#define TR_POS 0
98#if TR_POS
99 fprintf (stderr, "FOO1: ");
100 LOCATION_PRINT (stderr, *yylloc);
101 fprintf (stderr, "\n");
102#endif
103 YY_STEP;
104#if TR_POS
105 fprintf (stderr, "BAR1: ");
106 LOCATION_PRINT (stderr, *yylloc);
107 fprintf (stderr, "\n");
108#endif
109%}
110
111
112 /*----------------------------.
113 | Scanning Bison directives. |
114 `----------------------------*/
115<INITIAL>
116{
117 "%binary" return PERCENT_NONASSOC;
118 "%debug" return PERCENT_DEBUG;
119 "%define" return PERCENT_DEFINE;
120 "%defines" return PERCENT_DEFINES;
121 "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
122 "%expect" return PERCENT_EXPECT;
123 "%file-prefix" return PERCENT_FILE_PREFIX;
124 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
125 "%left" return PERCENT_LEFT;
126 "%locations" return PERCENT_LOCATIONS;
127 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
128 "%no"[-_]"lines" return PERCENT_NO_LINES;
129 "%nonassoc" return PERCENT_NONASSOC;
130 "%nterm" return PERCENT_NTERM;
131 "%output" return PERCENT_OUTPUT;
132 "%prec" return PERCENT_PREC;
133 "%pure"[-_]"parser" return PERCENT_PURE_PARSER;
134 "%right" return PERCENT_RIGHT;
135 "%skeleton" return PERCENT_SKELETON;
136 "%start" return PERCENT_START;
137 "%term" return PERCENT_TOKEN;
138 "%token" return PERCENT_TOKEN;
139 "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
140 "%type" return PERCENT_TYPE;
141 "%union" return PERCENT_UNION;
142 "%verbose" return PERCENT_VERBOSE;
143 "%yacc" return PERCENT_YACC;
144
145 "=" return EQUAL;
146 ":" return COLON;
147 "|" return PIPE;
148 ";" return SEMICOLON;
149
150 {eols} YY_LINES; YY_STEP;
151 {blanks} YY_STEP;
152 {id} {
4cdb01db 153 yylval->symbol = getsym (yytext);
e9955c83
AD
154 return ID;
155 }
156
157 {int} yylval->integer = strtol (yytext, 0, 10); return INT;
158
159 /* Characters. We don't check there is only one. */
1d6412ad 160 \' YY_OBS_GROW; yy_push_state (SC_ESCAPED_CHARACTER);
e9955c83
AD
161
162 /* Strings. */
1d6412ad 163 \" YY_OBS_GROW; yy_push_state (SC_ESCAPED_STRING);
e9955c83
AD
164
165 /* Comments. */
166 "/*" yy_push_state (SC_COMMENT);
167 "//".* YY_STEP;
168
169 /* Prologue. */
1d6412ad 170 "%{" yy_push_state (SC_PROLOGUE);
e9955c83
AD
171
172 /* Code in between braces. */
1d6412ad 173 "{" YY_OBS_GROW; ++braces_level; yy_push_state (SC_BRACED_CODE);
e9955c83
AD
174
175 /* A type. */
4cdb01db 176 "<"[^>]+">" {
4cdb01db
AD
177 obstack_grow (&string_obstack, yytext + 1, yyleng - 2);
178 YY_OBS_FINISH;
179 yylval->string = last_string;
180 return TYPE;
181 }
182
e9955c83
AD
183
184 "%%" {
185 if (++percent_percent_count == 2)
186 yy_push_state (SC_EPILOGUE);
187 return PERCENT_PERCENT;
188 }
189
190 . {
191 LOCATION_PRINT (stderr, *yylloc);
192 fprintf (stderr, ": invalid character: `%c'\n", *yytext);
193 YY_STEP;
194 }
195}
196
197
198 /*------------------------------------------------------------.
199 | Whatever the start condition (but those which correspond to |
200 | entity `swallowed' by Bison: SC_ESCAPED_STRING and |
201 | SC_ESCAPED_CHARACTER), no M4 character must escape as is. |
202 `------------------------------------------------------------*/
203
204<SC_COMMENT,SC_STRING,SC_CHARACTER,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
205{
1d6412ad
AD
206 \[ if (YY_START != SC_COMMENT) obstack_sgrow (&string_obstack, "@<:@");
207 \] if (YY_START != SC_COMMENT) obstack_sgrow (&string_obstack, "@:>@");
e9955c83
AD
208}
209
210
211
212 /*-----------------------------------------------------------.
213 | Scanning a C comment. The initial `/ *' is already eaten. |
214 `-----------------------------------------------------------*/
215
216<SC_COMMENT>
217{
218 "*/" { /* End of the comment. */
219 if (yy_top_state () == INITIAL)
220 {
221 YY_STEP;
222 }
223 else
224 {
44995b2e 225 YY_OBS_GROW;
e9955c83
AD
226 }
227 yy_pop_state ();
228 }
229
44995b2e
AD
230 [^\[\]*\n\r]+ if (yy_top_state () != INITIAL) YY_OBS_GROW;
231 {eols} if (yy_top_state () != INITIAL) YY_OBS_GROW; YY_LINES;
232 . /* Stray `*'. */if (yy_top_state () != INITIAL) YY_OBS_GROW;
e9955c83
AD
233
234 <<EOF>> {
235 LOCATION_PRINT (stderr, *yylloc);
236 fprintf (stderr, ": unexpected end of file in a comment\n");
237 yy_pop_state ();
238 }
239}
240
241
242 /*----------------------------------------------------------------.
243 | Scanning a C string, including its escapes. The initial `"' is |
244 | already eaten. |
245 `----------------------------------------------------------------*/
246
247<SC_ESCAPED_STRING>
248{
249 \" {
250 assert (yy_top_state () == INITIAL);
44995b2e
AD
251 YY_OBS_GROW;
252 YY_OBS_FINISH;
4cdb01db 253 yylval->string = last_string;
e9955c83
AD
254 yy_pop_state ();
255 return STRING;
256 }
257
44995b2e 258 [^\"\n\r\\]+ YY_OBS_GROW;
e9955c83
AD
259
260 {eols} obstack_1grow (&string_obstack, '\n'); YY_LINES;
261
262 <<EOF>> {
263 LOCATION_PRINT (stderr, *yylloc);
264 fprintf (stderr, ": unexpected end of file in a string\n");
265 assert (yy_top_state () == INITIAL);
44995b2e 266 YY_OBS_FINISH;
4cdb01db 267 yylval->string = last_string;
e9955c83
AD
268 yy_pop_state ();
269 return STRING;
270 }
271}
272
273 /*---------------------------------------------------------------.
274 | Scanning a C character, decoding its escapes. The initial "'" |
275 | is already eaten. |
276 `---------------------------------------------------------------*/
277
278<SC_ESCAPED_CHARACTER>
279{
280 \' {
44995b2e 281 YY_OBS_GROW;
e9955c83
AD
282 assert (yy_top_state () == INITIAL);
283 {
44995b2e
AD
284 YY_OBS_FINISH;
285 yylval->symbol = getsym (last_string);
e9955c83 286 symbol_class_set (yylval->symbol, token_sym);
44995b2e
AD
287 symbol_user_token_number_set (yylval->symbol, last_string[1]);
288 YY_OBS_FREE;
e9955c83
AD
289 yy_pop_state ();
290 return ID;
291 }
292 }
293
44995b2e 294 [^\'\n\r\\] YY_OBS_GROW;
e9955c83
AD
295
296 {eols} obstack_1grow (&string_obstack, '\n'); YY_LINES;
297
298 <<EOF>> {
299 LOCATION_PRINT (stderr, *yylloc);
300 fprintf (stderr, ": unexpected end of file in a character\n");
301 assert (yy_top_state () == INITIAL);
44995b2e 302 YY_OBS_FINISH;
4cdb01db 303 yylval->string = last_string;
e9955c83
AD
304 yy_pop_state ();
305 return CHARACTER;
306 }
307}
308
309
310 /*----------------------------.
311 | Decode escaped characters. |
312 `----------------------------*/
313
314<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
315{
316 \\[0-7]{3} {
317 long c = strtol (yytext + 1, 0, 8);
318 if (c > 255)
319 {
320 LOCATION_PRINT (stderr, *yylloc);
321 fprintf (stderr, ": invalid escape: %s\n", yytext);
322 YY_STEP;
323 }
324 else
325 obstack_1grow (&string_obstack, c);
326 }
327
328 \\x[0-9a-fA-F]{2} {
329 obstack_1grow (&string_obstack, strtol (yytext + 2, 0, 16));
330 }
331
332 \\a obstack_1grow (&string_obstack, '\a');
333 \\b obstack_1grow (&string_obstack, '\b');
334 \\f obstack_1grow (&string_obstack, '\f');
335 \\n obstack_1grow (&string_obstack, '\n');
336 \\r obstack_1grow (&string_obstack, '\r');
337 \\t obstack_1grow (&string_obstack, '\t');
338 \\v obstack_1grow (&string_obstack, '\v');
339 \\[\\""] obstack_1grow (&string_obstack, yytext[1]);
340 \\. {
341 LOCATION_PRINT (stderr, *yylloc);
342 fprintf (stderr, ": unrecognized escape: %s\n", yytext);
44995b2e 343 YY_OBS_GROW;
e9955c83
AD
344 }
345}
346
347
348 /*----------------------------------------------------------.
349 | Scanning a C character without decoding its escapes. The |
350 | initial "'" is already eaten. |
351 `----------------------------------------------------------*/
352
353<SC_CHARACTER>
354{
355 \' {
44995b2e 356 YY_OBS_GROW;
e9955c83
AD
357 assert (yy_top_state () != INITIAL);
358 yy_pop_state ();
359 }
360
44995b2e
AD
361 [^\[\]\'\n\r\\] YY_OBS_GROW;
362 \\. YY_OBS_GROW;
e9955c83 363
44995b2e 364 {eols} YY_OBS_GROW; YY_LINES;
e9955c83
AD
365
366 <<EOF>> {
367 LOCATION_PRINT (stderr, *yylloc);
368 fprintf (stderr, ": unexpected end of file in a character\n");
369 assert (yy_top_state () != INITIAL);
370 yy_pop_state ();
371 }
372}
373
374
375 /*----------------------------------------------------------------.
376 | Scanning a C string, without decoding its escapes. The initial |
377 | `"' is already eaten. |
378 `----------------------------------------------------------------*/
379
380<SC_STRING>
381{
382 \" {
383 assert (yy_top_state () != INITIAL);
44995b2e 384 YY_OBS_GROW;
e9955c83
AD
385 yy_pop_state ();
386 }
387
44995b2e
AD
388 [^\[\]\"\n\r\\]+ YY_OBS_GROW;
389 \\. YY_OBS_GROW;
e9955c83 390
44995b2e 391 {eols} YY_OBS_GROW; YY_LINES;
e9955c83
AD
392
393 <<EOF>> {
394 LOCATION_PRINT (stderr, *yylloc);
395 fprintf (stderr, ": unexpected end of file in a string\n");
396 assert (yy_top_state () != INITIAL);
397 yy_pop_state ();
398 }
399}
400
401
402 /*---------------------------------------------------.
403 | Strings, comments etc. can be found in user code. |
404 `---------------------------------------------------*/
405
406<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
407{
408 /* Characters. We don't check there is only one. */
44995b2e 409 \' YY_OBS_GROW; yy_push_state (SC_CHARACTER);
e9955c83
AD
410
411 /* Strings. */
44995b2e 412 \" YY_OBS_GROW; yy_push_state (SC_STRING);
e9955c83
AD
413
414 /* Comments. */
44995b2e
AD
415 "/*" YY_OBS_GROW; yy_push_state (SC_COMMENT);
416 "//".* YY_OBS_GROW;
e9955c83
AD
417}
418
419
420 /*---------------------------------------------------------------.
421 | Scanning some code in braces (%union and actions). The initial |
422 | "{" is already eaten. |
423 `---------------------------------------------------------------*/
424
425<SC_BRACED_CODE>
426{
427 "}" {
44995b2e 428 YY_OBS_GROW;
e9955c83
AD
429 if (--braces_level == 0)
430 {
431 yy_pop_state ();
44995b2e 432 YY_OBS_FINISH;
4cdb01db 433 yylval->string = last_string;
e9955c83
AD
434 return BRACED_CODE;
435 }
436 }
437
44995b2e 438 "{" YY_OBS_GROW; braces_level++;
e9955c83
AD
439
440 "$"("<".*">")?(-?[0-9]+|"$") { handle_dollar (yytext); }
441 "@"(-?[0-9]+|"$") { handle_at (yytext); }
442
6c35d22c 443 [^$@\[\]/\'\"\{\}\n\r]+ YY_OBS_GROW;
44995b2e 444 {eols} YY_OBS_GROW; YY_LINES;
e9955c83
AD
445
446 /* A lose $, or /, or etc. */
44995b2e 447 . YY_OBS_GROW;
e9955c83
AD
448
449 <<EOF>> {
450 LOCATION_PRINT (stderr, *yylloc);
451 fprintf (stderr, ": unexpected end of file in a braced code\n");
452 yy_pop_state ();
44995b2e 453 YY_OBS_FINISH;
4cdb01db
AD
454 yylval->string = last_string;
455 return BRACED_CODE;
e9955c83
AD
456 }
457
458}
459
460
461 /*--------------------------------------------------------------.
462 | Scanning some prologue: from "%{" (already scanned) to "%}". |
463 `--------------------------------------------------------------*/
464
465<SC_PROLOGUE>
466{
467 "%}" {
468 yy_pop_state ();
44995b2e 469 YY_OBS_FINISH;
4cdb01db 470 yylval->string = last_string;
e9955c83
AD
471 return PROLOGUE;
472 }
473
6c35d22c 474 [^%\[\]/\'\"\n\r]+ YY_OBS_GROW;
44995b2e
AD
475 "%"+[^%\}\n\r]+ YY_OBS_GROW;
476 {eols} YY_OBS_GROW; YY_LINES;
e9955c83
AD
477
478 <<EOF>> {
479 LOCATION_PRINT (stderr, *yylloc);
480 fprintf (stderr, ": unexpected end of file in a prologue\n");
481 yy_pop_state ();
44995b2e 482 YY_OBS_FINISH;
4cdb01db 483 yylval->string = last_string;
e9955c83
AD
484 return PROLOGUE;
485 }
486
487}
488
489
490 /*---------------------------------------------------------------.
491 | Scanning the epilogue (everything after the second "%%", which |
492 | has already been eaten. |
493 `---------------------------------------------------------------*/
494
495<SC_EPILOGUE>
496{
44995b2e 497 ([^\[\]]|{eols})+ YY_OBS_GROW;
e9955c83
AD
498
499 <<EOF>> {
500 yy_pop_state ();
44995b2e 501 YY_OBS_FINISH;
4cdb01db 502 yylval->string = last_string;
e9955c83
AD
503 return EPILOGUE;
504 }
505}
506
507
508%%
509
510/*------------------------------------------------------------------.
511| CP is pointing to a wannabee semantic value (i.e., a `$'). |
512| |
513| Possible inputs: $[<TYPENAME>]($|integer) |
514| |
515| Output to the STRING_OBSTACK a reference to this semantic value. |
516`------------------------------------------------------------------*/
517
518static void
519handle_dollar (char *cp)
520{
521 const char *type_name = NULL;
522
523 /* RULE_LENGTH is the number of values in the current rule so far,
524 which says where to find `$0' with respect to the top of the
525 stack. It is not the same as the rule->length in the case of mid
526 rule actions. */
527 int rule_length = 0;
528 symbol_list *rhs;
529 for (rhs = current_rule->next; rhs; rhs = rhs->next)
530 ++rule_length;
531
532 ++cp;
533
534 /* Get the type name if explicit. */
535 if (*cp == '<')
536 {
537 type_name = ++cp;
538 while (*cp != '>')
539 ++cp;
540 *cp = '\0';
541 ++cp;
542 }
543
544 if (*cp == '$')
545 {
546 if (!type_name)
547 type_name = get_type_name (0, current_rule);
548 if (!type_name && typed)
549 complain (_("$$ of `%s' has no declared type"),
550 current_rule->sym->tag);
551 if (!type_name)
552 type_name = "";
553 obstack_fgrow1 (&string_obstack,
554 "]b4_lhs_value([%s])[", type_name);
555 }
556 else if (isdigit (*cp) || *cp == '-')
557 {
558 int n = strtol (cp, &cp, 10);
559
560 if (n > rule_length)
561 complain (_("invalid value: %s%d"), "$", n);
562 else
563 {
564 if (!type_name && n > 0)
565 type_name = get_type_name (n, current_rule);
566 if (!type_name && typed)
567 complain (_("$%d of `%s' has no declared type"),
568 n, current_rule->sym->tag);
569 if (!type_name)
570 type_name = "";
571 obstack_fgrow3 (&string_obstack,
572 "]b4_rhs_value([%d], [%d], [%s])[",
573 rule_length, n, type_name);
574 }
575 }
576 else
577 {
578 char buf[] = "$c";
579 buf[1] = *cp;
580 complain (_("%s is invalid"), quote (buf));
581 }
582}
583
584/*-------------------------------------------------------.
585| CP is pointing to a location (i.e., a `@'). Output to |
586| STRING_OBSTACK a reference to this location. |
587`-------------------------------------------------------*/
588
589static void
590handle_at (char *cp)
591{
592 /* RULE_LENGTH is the number of values in the current rule so far,
593 which says where to find `$0' with respect to the top of the
594 stack. It is not the same as the rule->length in the case of mid
595 rule actions. */
596 int rule_length = 0;
597 symbol_list *rhs;
598 for (rhs = current_rule->next; rhs; rhs = rhs->next)
599 ++rule_length;
600
601 locations_flag = 1;
602 ++cp;
603
604 if (*cp == '$')
605 {
606 obstack_sgrow (&string_obstack, "]b4_lhs_location[");
607 }
608 else if (isdigit (*cp) || *cp == '-')
609 {
610 int n = strtol (cp, &cp, 10);
611 if (n > rule_length)
612 complain (_("invalid value: %s%d"), "@", n);
613 else
614 obstack_fgrow2 (&string_obstack, "]b4_rhs_location([%d], [%d])[",
615 rule_length, n);
616 }
617 else
618 {
619 char buf[] = "@c";
620 buf[1] = *cp;
621 complain (_("%s is invalid"), quote (buf));
622 }
623}
4cdb01db 624
1d6412ad
AD
625void
626scanner_initialize (void)
627{
628 obstack_init (&string_obstack);
629}
630
631
4cdb01db
AD
632void
633scanner_free (void)
634{
635 obstack_free (&string_obstack, 0);
636}