]> git.saurik.com Git - bison.git/blob - src/scan-gram.l
Have Bison grammars parsed by a Bison grammar.
[bison.git] / src / scan-gram.l
1 /* Bison Grammar Scanner -*- C -*-
2 Copyright (C) 2002 Free Software Foundation, Inc.
3
4 This file is part of Bison, the GNU Compiler Compiler.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA
20 */
21
22 %option debug nodefault noyywrap nounput never-interactive stack
23 %option prefix="gram_" outfile="lex.yy.c"
24
25 %{
26 #include "system.h"
27 #include "complain.h"
28 #include "quote.h"
29 #include "getargs.h"
30 #include "gram.h"
31 #include "reader.h"
32
33 /* Each time we match a string, move the end cursor to its end. */
34 #define YY_USER_ACTION LOCATION_COLUMNS (*yylloc, yyleng)
35 #define YY_LINES LOCATION_LINES (*yylloc, yyleng); lineno += yyleng;
36 #define YY_STEP LOCATION_STEP (*yylloc)
37
38 /* Appending to the STRING_OBSTACK. */
39 #define YY_INIT obstack_init (&string_obstack)
40 #define YY_GROW obstack_grow (&string_obstack, yytext, yyleng)
41 #define YY_FINISH obstack_1grow (&string_obstack, '\0'); yylval->string = obstack_finish (&string_obstack);
42
43 /* This is only to avoid GCC warnings. */
44 #define YY_USER_INIT if (yycontrol) {;};
45
46 static struct obstack string_obstack;
47 static int braces_level = 0;
48 static int percent_percent_count = 0;
49
50 static void handle_dollar PARAMS ((char *cp));
51 static void handle_at PARAMS ((char *cp));
52
53 %}
54 %x SC_COMMENT
55 %x SC_STRING SC_CHARACTER
56 %x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
57 %x SC_BRACED_CODE SC_PROLOGUE SC_EPILOGUE
58
59 id [.a-zA-Z][.a-zA-Z_0-9]*
60 int [0-9]+
61 eols (\n|\r|\n\r|\r\n)+
62 blanks [ \t\f]+
63
64 %%
65 %{
66 /* At each yylex invocation, mark the current position as the
67 start of the next token. */
68 #define TR_POS 0
69 #if TR_POS
70 fprintf (stderr, "FOO1: ");
71 LOCATION_PRINT (stderr, *yylloc);
72 fprintf (stderr, "\n");
73 #endif
74 YY_STEP;
75 #if TR_POS
76 fprintf (stderr, "BAR1: ");
77 LOCATION_PRINT (stderr, *yylloc);
78 fprintf (stderr, "\n");
79 #endif
80 %}
81
82
83 /*----------------------------.
84 | Scanning Bison directives. |
85 `----------------------------*/
86 <INITIAL>
87 {
88 "%binary" return PERCENT_NONASSOC;
89 "%debug" return PERCENT_DEBUG;
90 "%define" return PERCENT_DEFINE;
91 "%defines" return PERCENT_DEFINES;
92 "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
93 "%expect" return PERCENT_EXPECT;
94 "%file-prefix" return PERCENT_FILE_PREFIX;
95 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
96 "%left" return PERCENT_LEFT;
97 "%locations" return PERCENT_LOCATIONS;
98 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
99 "%no"[-_]"lines" return PERCENT_NO_LINES;
100 "%nonassoc" return PERCENT_NONASSOC;
101 "%nterm" return PERCENT_NTERM;
102 "%output" return PERCENT_OUTPUT;
103 "%prec" return PERCENT_PREC;
104 "%pure"[-_]"parser" return PERCENT_PURE_PARSER;
105 "%right" return PERCENT_RIGHT;
106 "%skeleton" return PERCENT_SKELETON;
107 "%start" return PERCENT_START;
108 "%term" return PERCENT_TOKEN;
109 "%token" return PERCENT_TOKEN;
110 "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
111 "%type" return PERCENT_TYPE;
112 "%union" return PERCENT_UNION;
113 "%verbose" return PERCENT_VERBOSE;
114 "%yacc" return PERCENT_YACC;
115
116 "=" return EQUAL;
117 ":" return COLON;
118 "|" return PIPE;
119 ";" return SEMICOLON;
120
121 {eols} YY_LINES; YY_STEP;
122 {blanks} YY_STEP;
123 {id} {
124 YY_INIT; YY_GROW; YY_FINISH;
125 yylval->symbol = getsym (yylval->string);
126 return ID;
127 }
128
129 {int} yylval->integer = strtol (yytext, 0, 10); return INT;
130
131 /* Characters. We don't check there is only one. */
132 \' YY_INIT; YY_GROW; yy_push_state (SC_ESCAPED_CHARACTER);
133
134 /* Strings. */
135 \" YY_INIT; YY_GROW; yy_push_state (SC_ESCAPED_STRING);
136
137 /* Comments. */
138 "/*" yy_push_state (SC_COMMENT);
139 "//".* YY_STEP;
140
141 /* Prologue. */
142 "%{" YY_INIT; yy_push_state (SC_PROLOGUE);
143
144 /* Code in between braces. */
145 "{" YY_INIT; YY_GROW; ++braces_level; yy_push_state (SC_BRACED_CODE);
146
147 /* A type. */
148 "<"[^>]+">" YY_INIT; obstack_grow (&string_obstack, yytext + 1, yyleng - 2); YY_FINISH; return TYPE;
149
150 "%%" {
151 if (++percent_percent_count == 2)
152 yy_push_state (SC_EPILOGUE);
153 return PERCENT_PERCENT;
154 }
155
156 . {
157 LOCATION_PRINT (stderr, *yylloc);
158 fprintf (stderr, ": invalid character: `%c'\n", *yytext);
159 YY_STEP;
160 }
161 }
162
163
164 /*------------------------------------------------------------.
165 | Whatever the start condition (but those which correspond to |
166 | entity `swallowed' by Bison: SC_ESCAPED_STRING and |
167 | SC_ESCAPED_CHARACTER), no M4 character must escape as is. |
168 `------------------------------------------------------------*/
169
170 <SC_COMMENT,SC_STRING,SC_CHARACTER,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
171 {
172 \[ obstack_sgrow (&string_obstack, "@<:@");
173 \] obstack_sgrow (&string_obstack, "@:>@");
174 }
175
176
177
178 /*-----------------------------------------------------------.
179 | Scanning a C comment. The initial `/ *' is already eaten. |
180 `-----------------------------------------------------------*/
181
182 <SC_COMMENT>
183 {
184 "*/" { /* End of the comment. */
185 if (yy_top_state () == INITIAL)
186 {
187 YY_STEP;
188 }
189 else
190 {
191 YY_GROW;
192 }
193 yy_pop_state ();
194 }
195
196 [^\[\]*\n\r]+ if (yy_top_state () != INITIAL) YY_GROW;
197 {eols} if (yy_top_state () != INITIAL) YY_GROW; YY_LINES;
198 . /* Stray `*'. */if (yy_top_state () != INITIAL) YY_GROW;
199
200 <<EOF>> {
201 LOCATION_PRINT (stderr, *yylloc);
202 fprintf (stderr, ": unexpected end of file in a comment\n");
203 yy_pop_state ();
204 }
205 }
206
207
208 /*----------------------------------------------------------------.
209 | Scanning a C string, including its escapes. The initial `"' is |
210 | already eaten. |
211 `----------------------------------------------------------------*/
212
213 <SC_ESCAPED_STRING>
214 {
215 \" {
216 assert (yy_top_state () == INITIAL);
217 YY_GROW;
218 YY_FINISH;
219 yy_pop_state ();
220 return STRING;
221 }
222
223 [^\"\n\r\\]+ YY_GROW;
224
225 {eols} obstack_1grow (&string_obstack, '\n'); YY_LINES;
226
227 <<EOF>> {
228 LOCATION_PRINT (stderr, *yylloc);
229 fprintf (stderr, ": unexpected end of file in a string\n");
230 assert (yy_top_state () == INITIAL);
231 YY_FINISH;
232 yy_pop_state ();
233 return STRING;
234 }
235 }
236
237 /*---------------------------------------------------------------.
238 | Scanning a C character, decoding its escapes. The initial "'" |
239 | is already eaten. |
240 `---------------------------------------------------------------*/
241
242 <SC_ESCAPED_CHARACTER>
243 {
244 \' {
245 YY_GROW;
246 assert (yy_top_state () == INITIAL);
247 {
248 char c;
249 YY_FINISH;
250 c = yylval->string[1];
251 yylval->symbol = getsym (yylval->string);
252 symbol_class_set (yylval->symbol, token_sym);
253 symbol_user_token_number_set (yylval->symbol, (unsigned int) c);
254 yy_pop_state ();
255 return ID;
256 }
257 }
258
259 [^\'\n\r\\] YY_GROW;
260
261 {eols} obstack_1grow (&string_obstack, '\n'); YY_LINES;
262
263 <<EOF>> {
264 LOCATION_PRINT (stderr, *yylloc);
265 fprintf (stderr, ": unexpected end of file in a character\n");
266 assert (yy_top_state () == INITIAL);
267 YY_FINISH;
268 yy_pop_state ();
269 return CHARACTER;
270 }
271 }
272
273
274 /*----------------------------.
275 | Decode escaped characters. |
276 `----------------------------*/
277
278 <SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
279 {
280 \\[0-7]{3} {
281 long c = strtol (yytext + 1, 0, 8);
282 if (c > 255)
283 {
284 LOCATION_PRINT (stderr, *yylloc);
285 fprintf (stderr, ": invalid escape: %s\n", yytext);
286 YY_STEP;
287 }
288 else
289 obstack_1grow (&string_obstack, c);
290 }
291
292 \\x[0-9a-fA-F]{2} {
293 obstack_1grow (&string_obstack, strtol (yytext + 2, 0, 16));
294 }
295
296 \\a obstack_1grow (&string_obstack, '\a');
297 \\b obstack_1grow (&string_obstack, '\b');
298 \\f obstack_1grow (&string_obstack, '\f');
299 \\n obstack_1grow (&string_obstack, '\n');
300 \\r obstack_1grow (&string_obstack, '\r');
301 \\t obstack_1grow (&string_obstack, '\t');
302 \\v obstack_1grow (&string_obstack, '\v');
303 \\[\\""] obstack_1grow (&string_obstack, yytext[1]);
304 \\. {
305 LOCATION_PRINT (stderr, *yylloc);
306 fprintf (stderr, ": unrecognized escape: %s\n", yytext);
307 YY_GROW;
308 }
309 }
310
311
312 /*----------------------------------------------------------.
313 | Scanning a C character without decoding its escapes. The |
314 | initial "'" is already eaten. |
315 `----------------------------------------------------------*/
316
317 <SC_CHARACTER>
318 {
319 \' {
320 YY_GROW;
321 assert (yy_top_state () != INITIAL);
322 yy_pop_state ();
323 }
324
325 [^\[\]\'\n\r\\] YY_GROW;
326 \\. YY_GROW;
327
328 {eols} YY_GROW; YY_LINES;
329
330 <<EOF>> {
331 LOCATION_PRINT (stderr, *yylloc);
332 fprintf (stderr, ": unexpected end of file in a character\n");
333 assert (yy_top_state () != INITIAL);
334 yy_pop_state ();
335 }
336 }
337
338
339 /*----------------------------------------------------------------.
340 | Scanning a C string, without decoding its escapes. The initial |
341 | `"' is already eaten. |
342 `----------------------------------------------------------------*/
343
344 <SC_STRING>
345 {
346 \" {
347 assert (yy_top_state () != INITIAL);
348 YY_GROW;
349 yy_pop_state ();
350 }
351
352 [^\[\]\"\n\r\\]+ YY_GROW;
353 \\. YY_GROW;
354
355 {eols} YY_GROW; YY_LINES;
356
357 <<EOF>> {
358 LOCATION_PRINT (stderr, *yylloc);
359 fprintf (stderr, ": unexpected end of file in a string\n");
360 assert (yy_top_state () != INITIAL);
361 yy_pop_state ();
362 }
363 }
364
365
366 /*---------------------------------------------------.
367 | Strings, comments etc. can be found in user code. |
368 `---------------------------------------------------*/
369
370 <SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
371 {
372 /* Characters. We don't check there is only one. */
373 \' YY_GROW; yy_push_state (SC_CHARACTER);
374
375 /* Strings. */
376 \" YY_GROW; yy_push_state (SC_STRING);
377
378 /* Comments. */
379 "/*" YY_GROW; yy_push_state (SC_COMMENT);
380 "//".* YY_GROW;
381 }
382
383
384 /*---------------------------------------------------------------.
385 | Scanning some code in braces (%union and actions). The initial |
386 | "{" is already eaten. |
387 `---------------------------------------------------------------*/
388
389 <SC_BRACED_CODE>
390 {
391 "}" {
392 YY_GROW;
393 if (--braces_level == 0)
394 {
395 yy_pop_state ();
396 YY_FINISH;
397 return BRACED_CODE;
398 }
399 }
400
401 "{" YY_GROW; braces_level++;
402
403 "$"("<".*">")?(-?[0-9]+|"$") { handle_dollar (yytext); }
404 "@"(-?[0-9]+|"$") { handle_at (yytext); }
405
406 [^\[\]$/\'\"@\{\}\n\r]+ YY_GROW;
407 {eols} YY_GROW; YY_LINES;
408
409 /* A lose $, or /, or etc. */
410 . YY_GROW;
411
412 <<EOF>> {
413 LOCATION_PRINT (stderr, *yylloc);
414 fprintf (stderr, ": unexpected end of file in a braced code\n");
415 yy_pop_state ();
416 YY_FINISH;
417 return PROLOGUE;
418 }
419
420 }
421
422
423 /*--------------------------------------------------------------.
424 | Scanning some prologue: from "%{" (already scanned) to "%}". |
425 `--------------------------------------------------------------*/
426
427 <SC_PROLOGUE>
428 {
429 "%}" {
430 yy_pop_state ();
431 YY_FINISH;
432 return PROLOGUE;
433 }
434
435 [^\[\]%\n\r]+ YY_GROW;
436 "%"+[^%\}\n\r]+ YY_GROW;
437 {eols} YY_GROW; YY_LINES;
438
439 <<EOF>> {
440 LOCATION_PRINT (stderr, *yylloc);
441 fprintf (stderr, ": unexpected end of file in a prologue\n");
442 yy_pop_state ();
443 YY_FINISH;
444 return PROLOGUE;
445 }
446
447 }
448
449
450 /*---------------------------------------------------------------.
451 | Scanning the epilogue (everything after the second "%%", which |
452 | has already been eaten. |
453 `---------------------------------------------------------------*/
454
455 <SC_EPILOGUE>
456 {
457 ([^\[\]]|{eols})+ YY_GROW;
458
459 <<EOF>> {
460 yy_pop_state ();
461 YY_FINISH;
462 return EPILOGUE;
463 }
464 }
465
466
467 %%
468
469 /*------------------------------------------------------------------.
470 | CP is pointing to a wannabee semantic value (i.e., a `$'). |
471 | |
472 | Possible inputs: $[<TYPENAME>]($|integer) |
473 | |
474 | Output to the STRING_OBSTACK a reference to this semantic value. |
475 `------------------------------------------------------------------*/
476
477 static void
478 handle_dollar (char *cp)
479 {
480 const char *type_name = NULL;
481
482 /* RULE_LENGTH is the number of values in the current rule so far,
483 which says where to find `$0' with respect to the top of the
484 stack. It is not the same as the rule->length in the case of mid
485 rule actions. */
486 int rule_length = 0;
487 symbol_list *rhs;
488 for (rhs = current_rule->next; rhs; rhs = rhs->next)
489 ++rule_length;
490
491 ++cp;
492
493 /* Get the type name if explicit. */
494 if (*cp == '<')
495 {
496 type_name = ++cp;
497 while (*cp != '>')
498 ++cp;
499 *cp = '\0';
500 ++cp;
501 }
502
503 if (*cp == '$')
504 {
505 if (!type_name)
506 type_name = get_type_name (0, current_rule);
507 if (!type_name && typed)
508 complain (_("$$ of `%s' has no declared type"),
509 current_rule->sym->tag);
510 if (!type_name)
511 type_name = "";
512 obstack_fgrow1 (&string_obstack,
513 "]b4_lhs_value([%s])[", type_name);
514 }
515 else if (isdigit (*cp) || *cp == '-')
516 {
517 int n = strtol (cp, &cp, 10);
518
519 if (n > rule_length)
520 complain (_("invalid value: %s%d"), "$", n);
521 else
522 {
523 if (!type_name && n > 0)
524 type_name = get_type_name (n, current_rule);
525 if (!type_name && typed)
526 complain (_("$%d of `%s' has no declared type"),
527 n, current_rule->sym->tag);
528 if (!type_name)
529 type_name = "";
530 obstack_fgrow3 (&string_obstack,
531 "]b4_rhs_value([%d], [%d], [%s])[",
532 rule_length, n, type_name);
533 }
534 }
535 else
536 {
537 char buf[] = "$c";
538 buf[1] = *cp;
539 complain (_("%s is invalid"), quote (buf));
540 }
541 }
542
543 /*-------------------------------------------------------.
544 | CP is pointing to a location (i.e., a `@'). Output to |
545 | STRING_OBSTACK a reference to this location. |
546 `-------------------------------------------------------*/
547
548 static void
549 handle_at (char *cp)
550 {
551 /* RULE_LENGTH is the number of values in the current rule so far,
552 which says where to find `$0' with respect to the top of the
553 stack. It is not the same as the rule->length in the case of mid
554 rule actions. */
555 int rule_length = 0;
556 symbol_list *rhs;
557 for (rhs = current_rule->next; rhs; rhs = rhs->next)
558 ++rule_length;
559
560 locations_flag = 1;
561 ++cp;
562
563 if (*cp == '$')
564 {
565 obstack_sgrow (&string_obstack, "]b4_lhs_location[");
566 }
567 else if (isdigit (*cp) || *cp == '-')
568 {
569 int n = strtol (cp, &cp, 10);
570 if (n > rule_length)
571 complain (_("invalid value: %s%d"), "@", n);
572 else
573 obstack_fgrow2 (&string_obstack, "]b4_rhs_location([%d], [%d])[",
574 rule_length, n);
575 }
576 else
577 {
578 char buf[] = "@c";
579 buf[1] = *cp;
580 complain (_("%s is invalid"), quote (buf));
581 }
582 }