]> git.saurik.com Git - bison.git/blob - src/scan-gram.l
* src/scan-gram.l (SC_PROLOGUE): Don't eat characters amongst
[bison.git] / src / scan-gram.l
1 /* Bison Grammar Scanner -*- C -*-
2 Copyright (C) 2002 Free Software Foundation, Inc.
3
4 This file is part of Bison, the GNU Compiler Compiler.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA
20 */
21
22 %option debug nodefault noyywrap nounput never-interactive stack
23 %option prefix="gram_" outfile="lex.yy.c"
24
25 %{
26 #include "system.h"
27 #include "complain.h"
28 #include "quote.h"
29 #include "getargs.h"
30 #include "gram.h"
31 #include "reader.h"
32
33 /* Each time we match a string, move the end cursor to its end. */
34 #define YY_USER_ACTION LOCATION_COLUMNS (*yylloc, yyleng)
35 #define YY_LINES LOCATION_LINES (*yylloc, yyleng); lineno += yyleng;
36 #define YY_STEP LOCATION_STEP (*yylloc)
37
38
39 /* STRING_OBSTACK -- Used to store all the characters that we need to
40 keep (to construct ID, STRINGS etc.). Use the following macros to
41 use it.
42
43 Use YY_OBS_GROW to append what has just been matched, and
44 YY_OBS_FINISH to end the string (it puts the ending 0).
45 YY_OBS_FINISH also stores this string in LAST_STRING, which can be
46 used, and which is used by YY_OBS_FREE to free the last string. */
47
48 static struct obstack string_obstack;
49 char *last_string;
50
51 #define YY_OBS_GROW \
52 obstack_grow (&string_obstack, yytext, yyleng)
53
54 #define YY_OBS_FINISH \
55 do { \
56 obstack_1grow (&string_obstack, '\0'); \
57 last_string = obstack_finish (&string_obstack); \
58 } while (0)
59
60 #define YY_OBS_FREE \
61 do { \
62 obstack_free (&string_obstack, last_string); \
63 } while (0)
64
65 void
66 scanner_last_string_free (void)
67 {
68 YY_OBS_FREE;
69 }
70
71
72 /* This is only to avoid GCC warnings. */
73 #define YY_USER_INIT if (yycontrol) {;};
74
75
76 static int braces_level = 0;
77 static int percent_percent_count = 0;
78
79 static void handle_dollar PARAMS ((char *cp));
80 static void handle_at PARAMS ((char *cp));
81
82 %}
83 %x SC_COMMENT
84 %x SC_STRING SC_CHARACTER
85 %x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
86 %x SC_BRACED_CODE SC_PROLOGUE SC_EPILOGUE
87
88 id [.a-zA-Z][.a-zA-Z_0-9]*
89 int [0-9]+
90 eols (\n|\r|\n\r|\r\n)+
91 blanks [ \t\f]+
92
93 %%
94 %{
95 /* At each yylex invocation, mark the current position as the
96 start of the next token. */
97 #define TR_POS 0
98 #if TR_POS
99 fprintf (stderr, "FOO1: ");
100 LOCATION_PRINT (stderr, *yylloc);
101 fprintf (stderr, "\n");
102 #endif
103 YY_STEP;
104 #if TR_POS
105 fprintf (stderr, "BAR1: ");
106 LOCATION_PRINT (stderr, *yylloc);
107 fprintf (stderr, "\n");
108 #endif
109 %}
110
111
112 /*----------------------------.
113 | Scanning Bison directives. |
114 `----------------------------*/
115 <INITIAL>
116 {
117 "%binary" return PERCENT_NONASSOC;
118 "%debug" return PERCENT_DEBUG;
119 "%define" return PERCENT_DEFINE;
120 "%defines" return PERCENT_DEFINES;
121 "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
122 "%expect" return PERCENT_EXPECT;
123 "%file-prefix" return PERCENT_FILE_PREFIX;
124 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
125 "%left" return PERCENT_LEFT;
126 "%locations" return PERCENT_LOCATIONS;
127 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
128 "%no"[-_]"lines" return PERCENT_NO_LINES;
129 "%nonassoc" return PERCENT_NONASSOC;
130 "%nterm" return PERCENT_NTERM;
131 "%output" return PERCENT_OUTPUT;
132 "%prec" return PERCENT_PREC;
133 "%pure"[-_]"parser" return PERCENT_PURE_PARSER;
134 "%right" return PERCENT_RIGHT;
135 "%skeleton" return PERCENT_SKELETON;
136 "%start" return PERCENT_START;
137 "%term" return PERCENT_TOKEN;
138 "%token" return PERCENT_TOKEN;
139 "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
140 "%type" return PERCENT_TYPE;
141 "%union" return PERCENT_UNION;
142 "%verbose" return PERCENT_VERBOSE;
143 "%yacc" return PERCENT_YACC;
144
145 "=" return EQUAL;
146 ":" return COLON;
147 "|" return PIPE;
148 ";" return SEMICOLON;
149
150 {eols} YY_LINES; YY_STEP;
151 {blanks} YY_STEP;
152 {id} {
153 yylval->symbol = getsym (yytext);
154 return ID;
155 }
156
157 {int} yylval->integer = strtol (yytext, 0, 10); return INT;
158
159 /* Characters. We don't check there is only one. */
160 \' YY_OBS_GROW; yy_push_state (SC_ESCAPED_CHARACTER);
161
162 /* Strings. */
163 \" YY_OBS_GROW; yy_push_state (SC_ESCAPED_STRING);
164
165 /* Comments. */
166 "/*" yy_push_state (SC_COMMENT);
167 "//".* YY_STEP;
168
169 /* Prologue. */
170 "%{" yy_push_state (SC_PROLOGUE);
171
172 /* Code in between braces. */
173 "{" YY_OBS_GROW; ++braces_level; yy_push_state (SC_BRACED_CODE);
174
175 /* A type. */
176 "<"[^>]+">" {
177 obstack_grow (&string_obstack, yytext + 1, yyleng - 2);
178 YY_OBS_FINISH;
179 yylval->string = last_string;
180 return TYPE;
181 }
182
183
184 "%%" {
185 if (++percent_percent_count == 2)
186 yy_push_state (SC_EPILOGUE);
187 return PERCENT_PERCENT;
188 }
189
190 . {
191 LOCATION_PRINT (stderr, *yylloc);
192 fprintf (stderr, ": invalid character: `%c'\n", *yytext);
193 YY_STEP;
194 }
195 }
196
197
198 /*------------------------------------------------------------.
199 | Whatever the start condition (but those which correspond to |
200 | entity `swallowed' by Bison: SC_ESCAPED_STRING and |
201 | SC_ESCAPED_CHARACTER), no M4 character must escape as is. |
202 `------------------------------------------------------------*/
203
204 <SC_COMMENT,SC_STRING,SC_CHARACTER,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
205 {
206 \[ if (YY_START != SC_COMMENT) obstack_sgrow (&string_obstack, "@<:@");
207 \] if (YY_START != SC_COMMENT) obstack_sgrow (&string_obstack, "@:>@");
208 }
209
210
211
212 /*-----------------------------------------------------------.
213 | Scanning a C comment. The initial `/ *' is already eaten. |
214 `-----------------------------------------------------------*/
215
216 <SC_COMMENT>
217 {
218 "*/" { /* End of the comment. */
219 if (yy_top_state () == INITIAL)
220 {
221 YY_STEP;
222 }
223 else
224 {
225 YY_OBS_GROW;
226 }
227 yy_pop_state ();
228 }
229
230 [^\[\]*\n\r]+ if (yy_top_state () != INITIAL) YY_OBS_GROW;
231 {eols} if (yy_top_state () != INITIAL) YY_OBS_GROW; YY_LINES;
232 . /* Stray `*'. */if (yy_top_state () != INITIAL) YY_OBS_GROW;
233
234 <<EOF>> {
235 LOCATION_PRINT (stderr, *yylloc);
236 fprintf (stderr, ": unexpected end of file in a comment\n");
237 yy_pop_state ();
238 }
239 }
240
241
242 /*----------------------------------------------------------------.
243 | Scanning a C string, including its escapes. The initial `"' is |
244 | already eaten. |
245 `----------------------------------------------------------------*/
246
247 <SC_ESCAPED_STRING>
248 {
249 \" {
250 assert (yy_top_state () == INITIAL);
251 YY_OBS_GROW;
252 YY_OBS_FINISH;
253 yylval->string = last_string;
254 yy_pop_state ();
255 return STRING;
256 }
257
258 [^\"\n\r\\]+ YY_OBS_GROW;
259
260 {eols} obstack_1grow (&string_obstack, '\n'); YY_LINES;
261
262 <<EOF>> {
263 LOCATION_PRINT (stderr, *yylloc);
264 fprintf (stderr, ": unexpected end of file in a string\n");
265 assert (yy_top_state () == INITIAL);
266 YY_OBS_FINISH;
267 yylval->string = last_string;
268 yy_pop_state ();
269 return STRING;
270 }
271 }
272
273 /*---------------------------------------------------------------.
274 | Scanning a C character, decoding its escapes. The initial "'" |
275 | is already eaten. |
276 `---------------------------------------------------------------*/
277
278 <SC_ESCAPED_CHARACTER>
279 {
280 \' {
281 YY_OBS_GROW;
282 assert (yy_top_state () == INITIAL);
283 {
284 YY_OBS_FINISH;
285 yylval->symbol = getsym (last_string);
286 symbol_class_set (yylval->symbol, token_sym);
287 symbol_user_token_number_set (yylval->symbol, last_string[1]);
288 YY_OBS_FREE;
289 yy_pop_state ();
290 return ID;
291 }
292 }
293
294 [^\'\n\r\\] YY_OBS_GROW;
295
296 {eols} obstack_1grow (&string_obstack, '\n'); YY_LINES;
297
298 <<EOF>> {
299 LOCATION_PRINT (stderr, *yylloc);
300 fprintf (stderr, ": unexpected end of file in a character\n");
301 assert (yy_top_state () == INITIAL);
302 YY_OBS_FINISH;
303 yylval->string = last_string;
304 yy_pop_state ();
305 return CHARACTER;
306 }
307 }
308
309
310 /*----------------------------.
311 | Decode escaped characters. |
312 `----------------------------*/
313
314 <SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
315 {
316 \\[0-7]{3} {
317 long c = strtol (yytext + 1, 0, 8);
318 if (c > 255)
319 {
320 LOCATION_PRINT (stderr, *yylloc);
321 fprintf (stderr, ": invalid escape: %s\n", yytext);
322 YY_STEP;
323 }
324 else
325 obstack_1grow (&string_obstack, c);
326 }
327
328 \\x[0-9a-fA-F]{2} {
329 obstack_1grow (&string_obstack, strtol (yytext + 2, 0, 16));
330 }
331
332 \\a obstack_1grow (&string_obstack, '\a');
333 \\b obstack_1grow (&string_obstack, '\b');
334 \\f obstack_1grow (&string_obstack, '\f');
335 \\n obstack_1grow (&string_obstack, '\n');
336 \\r obstack_1grow (&string_obstack, '\r');
337 \\t obstack_1grow (&string_obstack, '\t');
338 \\v obstack_1grow (&string_obstack, '\v');
339 \\[\\""] obstack_1grow (&string_obstack, yytext[1]);
340 \\. {
341 LOCATION_PRINT (stderr, *yylloc);
342 fprintf (stderr, ": unrecognized escape: %s\n", yytext);
343 YY_OBS_GROW;
344 }
345 }
346
347
348 /*----------------------------------------------------------.
349 | Scanning a C character without decoding its escapes. The |
350 | initial "'" is already eaten. |
351 `----------------------------------------------------------*/
352
353 <SC_CHARACTER>
354 {
355 \' {
356 YY_OBS_GROW;
357 assert (yy_top_state () != INITIAL);
358 yy_pop_state ();
359 }
360
361 [^\[\]\'\n\r\\] YY_OBS_GROW;
362 \\. YY_OBS_GROW;
363
364 {eols} YY_OBS_GROW; YY_LINES;
365
366 <<EOF>> {
367 LOCATION_PRINT (stderr, *yylloc);
368 fprintf (stderr, ": unexpected end of file in a character\n");
369 assert (yy_top_state () != INITIAL);
370 yy_pop_state ();
371 }
372 }
373
374
375 /*----------------------------------------------------------------.
376 | Scanning a C string, without decoding its escapes. The initial |
377 | `"' is already eaten. |
378 `----------------------------------------------------------------*/
379
380 <SC_STRING>
381 {
382 \" {
383 assert (yy_top_state () != INITIAL);
384 YY_OBS_GROW;
385 yy_pop_state ();
386 }
387
388 [^\[\]\"\n\r\\]+ YY_OBS_GROW;
389 \\. YY_OBS_GROW;
390
391 {eols} YY_OBS_GROW; YY_LINES;
392
393 <<EOF>> {
394 LOCATION_PRINT (stderr, *yylloc);
395 fprintf (stderr, ": unexpected end of file in a string\n");
396 assert (yy_top_state () != INITIAL);
397 yy_pop_state ();
398 }
399 }
400
401
402 /*---------------------------------------------------.
403 | Strings, comments etc. can be found in user code. |
404 `---------------------------------------------------*/
405
406 <SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
407 {
408 /* Characters. We don't check there is only one. */
409 \' YY_OBS_GROW; yy_push_state (SC_CHARACTER);
410
411 /* Strings. */
412 \" YY_OBS_GROW; yy_push_state (SC_STRING);
413
414 /* Comments. */
415 "/*" YY_OBS_GROW; yy_push_state (SC_COMMENT);
416 "//".* YY_OBS_GROW;
417 }
418
419
420 /*---------------------------------------------------------------.
421 | Scanning some code in braces (%union and actions). The initial |
422 | "{" is already eaten. |
423 `---------------------------------------------------------------*/
424
425 <SC_BRACED_CODE>
426 {
427 "}" {
428 YY_OBS_GROW;
429 if (--braces_level == 0)
430 {
431 yy_pop_state ();
432 YY_OBS_FINISH;
433 yylval->string = last_string;
434 return BRACED_CODE;
435 }
436 }
437
438 "{" YY_OBS_GROW; braces_level++;
439
440 "$"("<".*">")?(-?[0-9]+|"$") { handle_dollar (yytext); }
441 "@"(-?[0-9]+|"$") { handle_at (yytext); }
442
443 [^$@\[\]/\'\"\{\}\n\r]+ YY_OBS_GROW;
444 {eols} YY_OBS_GROW; YY_LINES;
445
446 /* A lose $, or /, or etc. */
447 . YY_OBS_GROW;
448
449 <<EOF>> {
450 LOCATION_PRINT (stderr, *yylloc);
451 fprintf (stderr, ": unexpected end of file in a braced code\n");
452 yy_pop_state ();
453 YY_OBS_FINISH;
454 yylval->string = last_string;
455 return BRACED_CODE;
456 }
457
458 }
459
460
461 /*--------------------------------------------------------------.
462 | Scanning some prologue: from "%{" (already scanned) to "%}". |
463 `--------------------------------------------------------------*/
464
465 <SC_PROLOGUE>
466 {
467 "%}" {
468 yy_pop_state ();
469 YY_OBS_FINISH;
470 yylval->string = last_string;
471 return PROLOGUE;
472 }
473
474 [^%\[\]/\'\"\n\r]+ YY_OBS_GROW;
475 "%"+[^%\}\n\r]+ YY_OBS_GROW;
476 {eols} YY_OBS_GROW; YY_LINES;
477
478 <<EOF>> {
479 LOCATION_PRINT (stderr, *yylloc);
480 fprintf (stderr, ": unexpected end of file in a prologue\n");
481 yy_pop_state ();
482 YY_OBS_FINISH;
483 yylval->string = last_string;
484 return PROLOGUE;
485 }
486
487 }
488
489
490 /*---------------------------------------------------------------.
491 | Scanning the epilogue (everything after the second "%%", which |
492 | has already been eaten. |
493 `---------------------------------------------------------------*/
494
495 <SC_EPILOGUE>
496 {
497 ([^\[\]]|{eols})+ YY_OBS_GROW;
498
499 <<EOF>> {
500 yy_pop_state ();
501 YY_OBS_FINISH;
502 yylval->string = last_string;
503 return EPILOGUE;
504 }
505 }
506
507
508 %%
509
510 /*------------------------------------------------------------------.
511 | CP is pointing to a wannabee semantic value (i.e., a `$'). |
512 | |
513 | Possible inputs: $[<TYPENAME>]($|integer) |
514 | |
515 | Output to the STRING_OBSTACK a reference to this semantic value. |
516 `------------------------------------------------------------------*/
517
518 static void
519 handle_dollar (char *cp)
520 {
521 const char *type_name = NULL;
522
523 /* RULE_LENGTH is the number of values in the current rule so far,
524 which says where to find `$0' with respect to the top of the
525 stack. It is not the same as the rule->length in the case of mid
526 rule actions. */
527 int rule_length = 0;
528 symbol_list *rhs;
529 for (rhs = current_rule->next; rhs; rhs = rhs->next)
530 ++rule_length;
531
532 ++cp;
533
534 /* Get the type name if explicit. */
535 if (*cp == '<')
536 {
537 type_name = ++cp;
538 while (*cp != '>')
539 ++cp;
540 *cp = '\0';
541 ++cp;
542 }
543
544 if (*cp == '$')
545 {
546 if (!type_name)
547 type_name = get_type_name (0, current_rule);
548 if (!type_name && typed)
549 complain (_("$$ of `%s' has no declared type"),
550 current_rule->sym->tag);
551 if (!type_name)
552 type_name = "";
553 obstack_fgrow1 (&string_obstack,
554 "]b4_lhs_value([%s])[", type_name);
555 }
556 else if (isdigit (*cp) || *cp == '-')
557 {
558 int n = strtol (cp, &cp, 10);
559
560 if (n > rule_length)
561 complain (_("invalid value: %s%d"), "$", n);
562 else
563 {
564 if (!type_name && n > 0)
565 type_name = get_type_name (n, current_rule);
566 if (!type_name && typed)
567 complain (_("$%d of `%s' has no declared type"),
568 n, current_rule->sym->tag);
569 if (!type_name)
570 type_name = "";
571 obstack_fgrow3 (&string_obstack,
572 "]b4_rhs_value([%d], [%d], [%s])[",
573 rule_length, n, type_name);
574 }
575 }
576 else
577 {
578 char buf[] = "$c";
579 buf[1] = *cp;
580 complain (_("%s is invalid"), quote (buf));
581 }
582 }
583
584 /*-------------------------------------------------------.
585 | CP is pointing to a location (i.e., a `@'). Output to |
586 | STRING_OBSTACK a reference to this location. |
587 `-------------------------------------------------------*/
588
589 static void
590 handle_at (char *cp)
591 {
592 /* RULE_LENGTH is the number of values in the current rule so far,
593 which says where to find `$0' with respect to the top of the
594 stack. It is not the same as the rule->length in the case of mid
595 rule actions. */
596 int rule_length = 0;
597 symbol_list *rhs;
598 for (rhs = current_rule->next; rhs; rhs = rhs->next)
599 ++rule_length;
600
601 locations_flag = 1;
602 ++cp;
603
604 if (*cp == '$')
605 {
606 obstack_sgrow (&string_obstack, "]b4_lhs_location[");
607 }
608 else if (isdigit (*cp) || *cp == '-')
609 {
610 int n = strtol (cp, &cp, 10);
611 if (n > rule_length)
612 complain (_("invalid value: %s%d"), "@", n);
613 else
614 obstack_fgrow2 (&string_obstack, "]b4_rhs_location([%d], [%d])[",
615 rule_length, n);
616 }
617 else
618 {
619 char buf[] = "@c";
620 buf[1] = *cp;
621 complain (_("%s is invalid"), quote (buf));
622 }
623 }
624
625 void
626 scanner_initialize (void)
627 {
628 obstack_init (&string_obstack);
629 }
630
631
632 void
633 scanner_free (void)
634 {
635 obstack_free (&string_obstack, 0);
636 }