]> git.saurik.com Git - bison.git/blame - src/scan-gram.l
Revamp to fix POSIX incompatibilities, to count columns correctly, and
[bison.git] / src / scan-gram.l
CommitLineData
e9955c83
AD
1/* Bison Grammar Scanner -*- C -*-
2 Copyright (C) 2002 Free Software Foundation, Inc.
3
4 This file is part of Bison, the GNU Compiler Compiler.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA
20*/
21
22%option debug nodefault noyywrap nounput never-interactive stack
23%option prefix="gram_" outfile="lex.yy.c"
24
25%{
26#include "system.h"
d8d3f94a 27#include "mbswidth.h"
e9955c83
AD
28#include "complain.h"
29#include "quote.h"
30#include "getargs.h"
31#include "gram.h"
32#include "reader.h"
33
34/* Each time we match a string, move the end cursor to its end. */
8efe435c
AD
35#define YY_USER_INIT \
36do { \
37 LOCATION_RESET (*yylloc); \
1a715ef2 38 yylloc->file = infile; \
8efe435c
AD
39 /* This is only to avoid GCC warnings. */ \
40 if (yycontrol) {;}; \
41} while (0)
42
d8d3f94a
PE
43#define YY_USER_ACTION extend_location (yylloc, yytext, yyleng);
44#define YY_STEP LOCATION_STEP (*yylloc)
45
46#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
47
48
49/* Read bytes from FP into buffer BUF of size SIZE. Return the
50 number of bytes read. Remove '\r' from input, treating \r\n
51 and isolated \r as \n. */
52
53static size_t
54no_cr_read (FILE *fp, char *buf, size_t size)
55{
56 size_t s = fread (buf, 1, size, fp);
57 if (s)
58 {
59 char *w = memchr (buf, '\r', s);
60 if (w)
61 {
62 char const *r = ++w;
63 char const *lim = buf + s;
64
65 for (;;)
66 {
67 /* Found an '\r'. Treat it like '\n', but ignore any
68 '\n' that immediately follows. */
69 w[-1] = '\n';
70 if (r == lim)
71 {
72 int ch = getc (fp);
73 if (ch != '\n' && ungetc (ch, fp) != ch)
74 break;
75 }
76 else if (*r == '\n')
77 r++;
78
79 /* Copy until the next '\r'. */
80 do
81 {
82 if (r == lim)
83 return w - buf;
84 }
85 while ((*w++ = *r++) != '\r');
86 }
87
88 return w - buf;
89 }
90 }
91
92 return s;
93}
94
95
96/* Extend *LOC to account for token TOKEN of size SIZE. */
97
98static void
99extend_location (location_t *loc, char const *token, int size)
100{
101 int line = loc->last_line;
102 int column = loc->last_column;
103 char const *p0 = token;
104 char const *p = token;
105 char const *lim = token + size;
106
107 for (p = token; p < lim; p++)
108 switch (*p)
109 {
110 case '\r':
111 /* \r shouldn't survive no_cr_read. */
112 abort ();
113
114 case '\n':
115 line++;
116 column = 1;
117 p0 = p + 1;
118 break;
119
120 case '\t':
121 column += mbsnwidth (p0, p - p0, 0);
122 column += 8 - ((column - 1) & 7);
123 p0 = p + 1;
124 break;
125 }
126
127 loc->last_line = line;
128 loc->last_column = column + mbsnwidth (p0, p - p0, 0);
129}
130
131
e9955c83 132
44995b2e
AD
133/* STRING_OBSTACK -- Used to store all the characters that we need to
134 keep (to construct ID, STRINGS etc.). Use the following macros to
135 use it.
136
1d6412ad
AD
137 Use YY_OBS_GROW to append what has just been matched, and
138 YY_OBS_FINISH to end the string (it puts the ending 0).
139 YY_OBS_FINISH also stores this string in LAST_STRING, which can be
140 used, and which is used by YY_OBS_FREE to free the last string. */
44995b2e
AD
141
142static struct obstack string_obstack;
143char *last_string;
144
44995b2e
AD
145#define YY_OBS_GROW \
146 obstack_grow (&string_obstack, yytext, yyleng)
147
148#define YY_OBS_FINISH \
149 do { \
150 obstack_1grow (&string_obstack, '\0'); \
151 last_string = obstack_finish (&string_obstack); \
44995b2e
AD
152 } while (0)
153
154#define YY_OBS_FREE \
155 do { \
156 obstack_free (&string_obstack, last_string); \
157 } while (0)
e9955c83 158
4cdb01db
AD
159void
160scanner_last_string_free (void)
161{
162 YY_OBS_FREE;
163}
164
165
e9955c83
AD
166static int braces_level = 0;
167static int percent_percent_count = 0;
168
efcb44dd
PE
169/* Within well-formed rules, RULE_LENGTH is the number of values in
170 the current rule so far, which says where to find `$0' with respect
171 to the top of the stack. It is not the same as the rule->length in
172 the case of mid rule actions.
173
174 Outside of well-formed rules, RULE_LENGTH has an undefined value. */
175static int rule_length;
176
d33cb3ae
PE
177static void handle_dollar (braced_code_t code_kind,
178 char *cp, location_t location);
179static void handle_at (braced_code_t code_kind,
180 char *cp, location_t location);
d8d3f94a 181static int convert_ucn_to_byte (char const *hex_text);
e9955c83
AD
182
183%}
d8d3f94a 184%x SC_COMMENT SC_LINE_COMMENT SC_YACC_COMMENT
e9955c83
AD
185%x SC_STRING SC_CHARACTER
186%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
187%x SC_BRACED_CODE SC_PROLOGUE SC_EPILOGUE
188
d8d3f94a
PE
189letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
190id {letter}({letter}|[0-9])*
e9955c83 191int [0-9]+
d8d3f94a
PE
192
193/* POSIX says that a tag must be both an id and a C union member, but
194 historically almost any character is allowed in a tag. We disallow
195 NUL and newline, as this simplifies our implementation. */
196tag [^\0\n>]+
197
198/* Zero or more instances of backslash-newline. Following GCC, allow
199 white space between the backslash and the newline. */
200splice (\\[ \f\t\v]*\n)*
e9955c83
AD
201
202%%
203%{
204 /* At each yylex invocation, mark the current position as the
205 start of the next token. */
e9955c83 206 YY_STEP;
e9955c83
AD
207%}
208
209
210 /*----------------------------.
211 | Scanning Bison directives. |
212 `----------------------------*/
213<INITIAL>
214{
215 "%binary" return PERCENT_NONASSOC;
216 "%debug" return PERCENT_DEBUG;
217 "%define" return PERCENT_DEFINE;
218 "%defines" return PERCENT_DEFINES;
9280d3ef 219 "%destructor" return PERCENT_DESTRUCTOR;
676385e2 220 "%dprec" return PERCENT_DPREC;
e9955c83
AD
221 "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
222 "%expect" return PERCENT_EXPECT;
223 "%file-prefix" return PERCENT_FILE_PREFIX;
224 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
ae7453f2 225 "%glr-parser" return PERCENT_GLR_PARSER;
e9955c83
AD
226 "%left" return PERCENT_LEFT;
227 "%locations" return PERCENT_LOCATIONS;
676385e2 228 "%merge" return PERCENT_MERGE;
e9955c83
AD
229 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
230 "%no"[-_]"lines" return PERCENT_NO_LINES;
231 "%nonassoc" return PERCENT_NONASSOC;
232 "%nterm" return PERCENT_NTERM;
233 "%output" return PERCENT_OUTPUT;
ae7453f2 234 "%parse-param" return PERCENT_PARSE_PARAM;
d8d3f94a 235 "%prec" rule_length--; return PERCENT_PREC;
366eea36 236 "%printer" return PERCENT_PRINTER;
e9955c83
AD
237 "%pure"[-_]"parser" return PERCENT_PURE_PARSER;
238 "%right" return PERCENT_RIGHT;
ae7453f2 239 "%lex-param" return PERCENT_LEX_PARAM;
e9955c83
AD
240 "%skeleton" return PERCENT_SKELETON;
241 "%start" return PERCENT_START;
242 "%term" return PERCENT_TOKEN;
243 "%token" return PERCENT_TOKEN;
244 "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
245 "%type" return PERCENT_TYPE;
246 "%union" return PERCENT_UNION;
247 "%verbose" return PERCENT_VERBOSE;
248 "%yacc" return PERCENT_YACC;
249
250 "=" return EQUAL;
d8d3f94a
PE
251 ":" rule_length = 0; return COLON;
252 "|" rule_length = 0; return PIPE;
ae7453f2 253 "," return COMMA;
e9955c83
AD
254 ";" return SEMICOLON;
255
d8d3f94a
PE
256 [ \f\n\t\v]+ YY_STEP;
257
e9955c83 258 {id} {
39f41916 259 yylval->symbol = symbol_get (yytext, *yylloc);
efcb44dd 260 rule_length++;
e9955c83
AD
261 return ID;
262 }
263
d8d3f94a
PE
264 {int} {
265 unsigned long num;
266 errno = 0;
267 num = strtoul (yytext, 0, 10);
268 if (INT_MAX < num || errno)
269 {
270 complain_at (*yylloc, _("%s is invalid"), yytext);
271 num = INT_MAX;
272 }
273 yylval->integer = num;
274 return INT;
275 }
e9955c83
AD
276
277 /* Characters. We don't check there is only one. */
db2cc12f 278 "'" YY_OBS_GROW; yy_push_state (SC_ESCAPED_CHARACTER);
e9955c83
AD
279
280 /* Strings. */
db2cc12f 281 "\"" YY_OBS_GROW; yy_push_state (SC_ESCAPED_STRING);
e9955c83
AD
282
283 /* Comments. */
d8d3f94a 284 "/*" BEGIN SC_YACC_COMMENT;
e9955c83
AD
285 "//".* YY_STEP;
286
287 /* Prologue. */
1d6412ad 288 "%{" yy_push_state (SC_PROLOGUE);
e9955c83
AD
289
290 /* Code in between braces. */
1d6412ad 291 "{" YY_OBS_GROW; ++braces_level; yy_push_state (SC_BRACED_CODE);
e9955c83
AD
292
293 /* A type. */
d8d3f94a 294 "<"{tag}">" {
4cdb01db
AD
295 obstack_grow (&string_obstack, yytext + 1, yyleng - 2);
296 YY_OBS_FINISH;
297 yylval->string = last_string;
298 return TYPE;
299 }
300
e9955c83
AD
301
302 "%%" {
303 if (++percent_percent_count == 2)
304 yy_push_state (SC_EPILOGUE);
305 return PERCENT_PERCENT;
306 }
307
308 . {
309 LOCATION_PRINT (stderr, *yylloc);
db2cc12f 310 fprintf (stderr, _(": invalid character: `%c'\n"), *yytext);
e9955c83
AD
311 YY_STEP;
312 }
313}
314
315
d8d3f94a
PE
316 /*-------------------------------------------------------------------.
317 | Whatever the start condition (but those which correspond to |
318 | entities `swallowed' by Bison: SC_YACC_COMMENT, SC_ESCAPED_STRING, |
319 | and SC_ESCAPED_CHARACTER), no M4 character must escape as is. |
320 `-------------------------------------------------------------------*/
e9955c83 321
d8d3f94a 322<SC_COMMENT,SC_LINE_COMMENT,SC_STRING,SC_CHARACTER,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
e9955c83 323{
d8d3f94a
PE
324 \[ obstack_sgrow (&string_obstack, "@<:@");
325 \] obstack_sgrow (&string_obstack, "@:>@");
e9955c83
AD
326}
327
328
d8d3f94a
PE
329 /*---------------------------------------------------------------.
330 | Scanning a Yacc comment. The initial `/ *' is already eaten. |
331 `---------------------------------------------------------------*/
e9955c83 332
d8d3f94a 333<SC_YACC_COMMENT>
e9955c83 334{
d8d3f94a
PE
335 "*/" {
336 YY_STEP;
337 BEGIN INITIAL;
e9955c83
AD
338 }
339
d8d3f94a
PE
340 [^*]+|"*" ;
341
342 <<EOF>> {
343 LOCATION_PRINT (stderr, *yylloc);
344 fprintf (stderr, _(": unexpected end of file in a comment\n"));
345 BEGIN INITIAL;
346 }
347}
348
349
350 /*------------------------------------------------------------.
351 | Scanning a C comment. The initial `/ *' is already eaten. |
352 `------------------------------------------------------------*/
353
354<SC_COMMENT>
355{
356 "*"{splice}"/" YY_OBS_GROW; yy_pop_state ();
357 [^*\[\]]+|"*" YY_OBS_GROW;
e9955c83
AD
358
359 <<EOF>> {
360 LOCATION_PRINT (stderr, *yylloc);
db2cc12f 361 fprintf (stderr, _(": unexpected end of file in a comment\n"));
e9955c83
AD
362 yy_pop_state ();
363 }
364}
365
366
d8d3f94a
PE
367 /*--------------------------------------------------------------.
368 | Scanning a line comment. The initial `//' is already eaten. |
369 `--------------------------------------------------------------*/
370
371<SC_LINE_COMMENT>
372{
373 "\n" YY_OBS_GROW; yy_pop_state ();
374 ([^\n\[\]]|{splice})+ YY_OBS_GROW;
375 <<EOF>> yy_pop_state ();
376}
377
378
e9955c83
AD
379 /*----------------------------------------------------------------.
380 | Scanning a C string, including its escapes. The initial `"' is |
381 | already eaten. |
382 `----------------------------------------------------------------*/
383
384<SC_ESCAPED_STRING>
385{
db2cc12f 386 "\"" {
e9955c83 387 assert (yy_top_state () == INITIAL);
44995b2e
AD
388 YY_OBS_GROW;
389 YY_OBS_FINISH;
4cdb01db 390 yylval->string = last_string;
e9955c83 391 yy_pop_state ();
efcb44dd 392 rule_length++;
e9955c83
AD
393 return STRING;
394 }
395
d8d3f94a 396 [^\"\\]+ YY_OBS_GROW;
e9955c83
AD
397
398 <<EOF>> {
399 LOCATION_PRINT (stderr, *yylloc);
db2cc12f 400 fprintf (stderr, _(": unexpected end of file in a string\n"));
e9955c83 401 assert (yy_top_state () == INITIAL);
44995b2e 402 YY_OBS_FINISH;
4cdb01db 403 yylval->string = last_string;
e9955c83
AD
404 yy_pop_state ();
405 return STRING;
406 }
407}
408
409 /*---------------------------------------------------------------.
410 | Scanning a C character, decoding its escapes. The initial "'" |
411 | is already eaten. |
412 `---------------------------------------------------------------*/
413
414<SC_ESCAPED_CHARACTER>
415{
db2cc12f 416 "'" {
44995b2e 417 YY_OBS_GROW;
e9955c83
AD
418 assert (yy_top_state () == INITIAL);
419 {
44995b2e 420 YY_OBS_FINISH;
39f41916 421 yylval->symbol = symbol_get (last_string, *yylloc);
e776192e 422 symbol_class_set (yylval->symbol, token_sym, *yylloc);
e68d4575
PE
423 symbol_user_token_number_set (yylval->symbol,
424 (unsigned char) last_string[1], *yylloc);
44995b2e 425 YY_OBS_FREE;
e9955c83 426 yy_pop_state ();
efcb44dd 427 rule_length++;
e9955c83
AD
428 return ID;
429 }
430 }
431
d8d3f94a 432 [^'\\]+ YY_OBS_GROW;
e9955c83
AD
433
434 <<EOF>> {
435 LOCATION_PRINT (stderr, *yylloc);
db2cc12f 436 fprintf (stderr, _(": unexpected end of file in a character\n"));
e9955c83 437 assert (yy_top_state () == INITIAL);
44995b2e 438 YY_OBS_FINISH;
4cdb01db 439 yylval->string = last_string;
e9955c83
AD
440 yy_pop_state ();
441 return CHARACTER;
442 }
443}
444
445
446 /*----------------------------.
447 | Decode escaped characters. |
448 `----------------------------*/
449
450<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
451{
d8d3f94a
PE
452 \\[0-7]{1,3} {
453 unsigned long c = strtoul (yytext + 1, 0, 8);
454 if (UCHAR_MAX < c)
e9955c83
AD
455 {
456 LOCATION_PRINT (stderr, *yylloc);
db2cc12f 457 fprintf (stderr, _(": invalid escape: %s\n"), quote (yytext));
e9955c83
AD
458 YY_STEP;
459 }
460 else
461 obstack_1grow (&string_obstack, c);
462 }
463
d8d3f94a
PE
464 \\x[0-9a-fA-F]+ {
465 unsigned long c;
466 errno = 0;
467 c = strtoul (yytext + 2, 0, 16);
468 if (UCHAR_MAX < c || errno)
469 {
470 LOCATION_PRINT (stderr, *yylloc);
471 fprintf (stderr, _(": invalid escape: %s\n"), quote (yytext));
472 YY_STEP;
473 }
474 else
475 obstack_1grow (&string_obstack, c);
e9955c83
AD
476 }
477
478 \\a obstack_1grow (&string_obstack, '\a');
479 \\b obstack_1grow (&string_obstack, '\b');
480 \\f obstack_1grow (&string_obstack, '\f');
481 \\n obstack_1grow (&string_obstack, '\n');
482 \\r obstack_1grow (&string_obstack, '\r');
483 \\t obstack_1grow (&string_obstack, '\t');
484 \\v obstack_1grow (&string_obstack, '\v');
d8d3f94a
PE
485 \\[\"'?\\] obstack_1grow (&string_obstack, yytext[1]);
486 \\(u|U[0-9a-fA-F]{4})[0-9a-fA-F]{4} {
487 int c = convert_ucn_to_byte (yytext);
488 if (c < 0)
489 {
490 LOCATION_PRINT (stderr, *yylloc);
491 fprintf (stderr, _(": invalid escape: %s\n"), quote (yytext));
492 YY_STEP;
493 }
494 else
495 obstack_1grow (&string_obstack, c);
496 }
4f25ebb0 497 \\(.|\n) {
e9955c83 498 LOCATION_PRINT (stderr, *yylloc);
db2cc12f 499 fprintf (stderr, _(": unrecognized escape: %s\n"), quote (yytext));
44995b2e 500 YY_OBS_GROW;
e9955c83 501 }
4f25ebb0
AD
502 /* FLex wants this rule, in case of a `\<<EOF>>'. */
503 \\ YY_OBS_GROW;
e9955c83
AD
504}
505
506
507 /*----------------------------------------------------------.
508 | Scanning a C character without decoding its escapes. The |
509 | initial "'" is already eaten. |
510 `----------------------------------------------------------*/
511
512<SC_CHARACTER>
513{
db2cc12f 514 "'" {
44995b2e 515 YY_OBS_GROW;
e9955c83
AD
516 assert (yy_top_state () != INITIAL);
517 yy_pop_state ();
518 }
519
d8d3f94a
PE
520 [^'\[\]\\]+ YY_OBS_GROW;
521 \\{splice}[^\[\]] YY_OBS_GROW;
522 {splice} YY_OBS_GROW;
523 /* Needed for `\<<EOF>>', `\\<<newline>>[', and `\\<<newline>>]'. */
4f25ebb0 524 \\ YY_OBS_GROW;
e9955c83 525
e9955c83
AD
526 <<EOF>> {
527 LOCATION_PRINT (stderr, *yylloc);
db2cc12f 528 fprintf (stderr, _(": unexpected end of file in a character\n"));
e9955c83
AD
529 assert (yy_top_state () != INITIAL);
530 yy_pop_state ();
531 }
532}
533
534
535 /*----------------------------------------------------------------.
536 | Scanning a C string, without decoding its escapes. The initial |
537 | `"' is already eaten. |
538 `----------------------------------------------------------------*/
539
540<SC_STRING>
541{
db2cc12f 542 "\"" {
e9955c83 543 assert (yy_top_state () != INITIAL);
44995b2e 544 YY_OBS_GROW;
e9955c83
AD
545 yy_pop_state ();
546 }
547
d8d3f94a
PE
548 [^\"\[\]\\]+ YY_OBS_GROW;
549 \\{splice}[^\[\]] YY_OBS_GROW;
550 {splice} YY_OBS_GROW;
551 /* Needed for `\<<EOF>>', `\\<<newline>>[', and `\\<<newline>>]'. */
4f25ebb0 552 \\ YY_OBS_GROW;
e9955c83 553
e9955c83
AD
554 <<EOF>> {
555 LOCATION_PRINT (stderr, *yylloc);
db2cc12f 556 fprintf (stderr, _(": unexpected end of file in a string\n"));
e9955c83
AD
557 assert (yy_top_state () != INITIAL);
558 yy_pop_state ();
559 }
560}
561
562
563 /*---------------------------------------------------.
564 | Strings, comments etc. can be found in user code. |
565 `---------------------------------------------------*/
566
567<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
568{
569 /* Characters. We don't check there is only one. */
db2cc12f 570 "'" YY_OBS_GROW; yy_push_state (SC_CHARACTER);
e9955c83
AD
571
572 /* Strings. */
db2cc12f 573 "\"" YY_OBS_GROW; yy_push_state (SC_STRING);
e9955c83
AD
574
575 /* Comments. */
d8d3f94a
PE
576 "/"{splice}"*" YY_OBS_GROW; yy_push_state (SC_COMMENT);
577 "/"{splice}"/" YY_OBS_GROW; yy_push_state (SC_LINE_COMMENT);
4f25ebb0
AD
578
579 /* Not comments. */
580 "/" YY_OBS_GROW;
e9955c83
AD
581}
582
583
584 /*---------------------------------------------------------------.
585 | Scanning some code in braces (%union and actions). The initial |
586 | "{" is already eaten. |
587 `---------------------------------------------------------------*/
588
589<SC_BRACED_CODE>
590{
591 "}" {
44995b2e 592 YY_OBS_GROW;
e9955c83
AD
593 if (--braces_level == 0)
594 {
595 yy_pop_state ();
44995b2e 596 YY_OBS_FINISH;
4cdb01db 597 yylval->string = last_string;
efcb44dd 598 rule_length++;
e9955c83
AD
599 return BRACED_CODE;
600 }
601 }
602
44995b2e 603 "{" YY_OBS_GROW; braces_level++;
e9955c83 604
d8d3f94a 605 "$"("<"{tag}">")?(-?[0-9]+|"$") { handle_dollar (current_braced_code,
f25bfb75
AD
606 yytext, *yylloc); }
607 "@"(-?[0-9]+|"$") { handle_at (current_braced_code,
608 yytext, *yylloc); }
e9955c83 609
d8d3f94a 610 [^$@\[\]/'\"\{\}]+ YY_OBS_GROW;
e9955c83 611
d8d3f94a 612 /* A stray $, or /, or etc. */
44995b2e 613 . YY_OBS_GROW;
e9955c83
AD
614
615 <<EOF>> {
616 LOCATION_PRINT (stderr, *yylloc);
db2cc12f 617 fprintf (stderr, _(": unexpected end of file in a braced code\n"));
e9955c83 618 yy_pop_state ();
44995b2e 619 YY_OBS_FINISH;
4cdb01db
AD
620 yylval->string = last_string;
621 return BRACED_CODE;
e9955c83
AD
622 }
623
624}
625
626
627 /*--------------------------------------------------------------.
628 | Scanning some prologue: from "%{" (already scanned) to "%}". |
629 `--------------------------------------------------------------*/
630
631<SC_PROLOGUE>
632{
633 "%}" {
634 yy_pop_state ();
44995b2e 635 YY_OBS_FINISH;
4cdb01db 636 yylval->string = last_string;
e9955c83
AD
637 return PROLOGUE;
638 }
639
d8d3f94a 640 [^%\[\]/'\"]+ YY_OBS_GROW;
4f25ebb0 641 "%" YY_OBS_GROW;
e9955c83
AD
642
643 <<EOF>> {
644 LOCATION_PRINT (stderr, *yylloc);
db2cc12f 645 fprintf (stderr, _(": unexpected end of file in a prologue\n"));
e9955c83 646 yy_pop_state ();
44995b2e 647 YY_OBS_FINISH;
4cdb01db 648 yylval->string = last_string;
e9955c83
AD
649 return PROLOGUE;
650 }
e9955c83
AD
651}
652
653
654 /*---------------------------------------------------------------.
655 | Scanning the epilogue (everything after the second "%%", which |
d8d3f94a 656 | has already been eaten). |
e9955c83
AD
657 `---------------------------------------------------------------*/
658
659<SC_EPILOGUE>
660{
d8d3f94a 661 [^\[\]]+ YY_OBS_GROW;
e9955c83
AD
662
663 <<EOF>> {
664 yy_pop_state ();
44995b2e 665 YY_OBS_FINISH;
4cdb01db 666 yylval->string = last_string;
e9955c83
AD
667 return EPILOGUE;
668 }
669}
670
671
672%%
673
674/*------------------------------------------------------------------.
366eea36 675| TEXT is pointing to a wannabee semantic value (i.e., a `$'). |
e9955c83
AD
676| |
677| Possible inputs: $[<TYPENAME>]($|integer) |
678| |
679| Output to the STRING_OBSTACK a reference to this semantic value. |
680`------------------------------------------------------------------*/
681
f25bfb75 682static inline void
366eea36 683handle_action_dollar (char *text, location_t location)
e9955c83
AD
684{
685 const char *type_name = NULL;
366eea36 686 char *cp = text + 1;
e9955c83
AD
687
688 /* Get the type name if explicit. */
689 if (*cp == '<')
690 {
691 type_name = ++cp;
692 while (*cp != '>')
693 ++cp;
694 *cp = '\0';
695 ++cp;
696 }
697
698 if (*cp == '$')
699 {
700 if (!type_name)
56c47203 701 type_name = symbol_list_n_type_name_get (current_rule, location, 0);
e9955c83 702 if (!type_name && typed)
56c47203 703 complain_at (location, _("$$ of `%s' has no declared type"),
97650f4e 704 current_rule->sym->tag);
e9955c83
AD
705 if (!type_name)
706 type_name = "";
707 obstack_fgrow1 (&string_obstack,
708 "]b4_lhs_value([%s])[", type_name);
709 }
d8d3f94a 710 else
e9955c83 711 {
d8d3f94a
PE
712 long num;
713 errno = 0;
714 num = strtol (cp, 0, 10);
e9955c83 715
d8d3f94a 716 if (INT_MIN <= num && num <= rule_length && ! errno)
e9955c83 717 {
d8d3f94a 718 int n = num;
e9955c83 719 if (!type_name && n > 0)
56c47203
AD
720 type_name = symbol_list_n_type_name_get (current_rule, location,
721 n);
e9955c83 722 if (!type_name && typed)
56c47203 723 complain_at (location, _("$%d of `%s' has no declared type"),
97650f4e 724 n, current_rule->sym->tag);
e9955c83
AD
725 if (!type_name)
726 type_name = "";
727 obstack_fgrow3 (&string_obstack,
728 "]b4_rhs_value([%d], [%d], [%s])[",
729 rule_length, n, type_name);
730 }
d8d3f94a
PE
731 else
732 complain_at (location, _("invalid value: %s"), text);
9280d3ef
AD
733 }
734}
735
736
366eea36 737/*---------------------------------------------------------------.
d8d3f94a 738| TEXT is expected to be $$ in some code associated to a symbol: |
366eea36
AD
739| destructor or printer. |
740`---------------------------------------------------------------*/
9280d3ef 741
f25bfb75 742static inline void
366eea36 743handle_symbol_code_dollar (char *text, location_t location)
9280d3ef 744{
366eea36 745 char *cp = text + 1;
9280d3ef 746 if (*cp == '$')
366eea36 747 obstack_sgrow (&string_obstack, "]b4_dollar_dollar[");
9280d3ef 748 else
d8d3f94a 749 complain_at (location, _("%s is invalid"), quote_n (1, text));
e9955c83
AD
750}
751
f25bfb75
AD
752
753/*-----------------------------------------------------------------.
754| Dispatch onto handle_action_dollar, or handle_destructor_dollar, |
755| depending upon CODE_KIND. |
756`-----------------------------------------------------------------*/
e9955c83
AD
757
758static void
f25bfb75
AD
759handle_dollar (braced_code_t braced_code_kind,
760 char *text, location_t location)
761{
762 switch (braced_code_kind)
763 {
764 case action_braced_code:
765 handle_action_dollar (text, location);
766 break;
767
768 case destructor_braced_code:
366eea36
AD
769 case printer_braced_code:
770 handle_symbol_code_dollar (text, location);
f25bfb75
AD
771 break;
772 }
773}
774
775
776/*------------------------------------------------------.
777| TEXT is a location token (i.e., a `@...'). Output to |
778| STRING_OBSTACK a reference to this location. |
779`------------------------------------------------------*/
780
781static inline void
782handle_action_at (char *text, location_t location)
e9955c83 783{
366eea36 784 char *cp = text + 1;
e9955c83 785 locations_flag = 1;
e9955c83 786
366eea36 787 if (*cp == '$')
e9955c83
AD
788 {
789 obstack_sgrow (&string_obstack, "]b4_lhs_location[");
790 }
d8d3f94a 791 else
e9955c83 792 {
d8d3f94a
PE
793 long num;
794 errno = 0;
795 num = strtol (cp, 0, 10);
dafdc66f 796
d8d3f94a
PE
797 if (INT_MIN <= num && num <= rule_length && ! errno)
798 {
799 int n = num;
800 obstack_fgrow2 (&string_obstack, "]b4_rhs_location([%d], [%d])[",
801 rule_length, n);
802 }
e9955c83 803 else
d8d3f94a 804 complain_at (location, _("invalid value: %s"), text);
f25bfb75
AD
805 }
806}
807
808
366eea36 809/*---------------------------------------------------------------.
d8d3f94a 810| TEXT is expected to be @$ in some code associated to a symbol: |
366eea36
AD
811| destructor or printer. |
812`---------------------------------------------------------------*/
f25bfb75
AD
813
814static inline void
366eea36 815handle_symbol_code_at (char *text, location_t location)
f25bfb75 816{
366eea36
AD
817 char *cp = text + 1;
818 if (*cp == '$')
819 obstack_sgrow (&string_obstack, "]b4_at_dollar[");
f25bfb75 820 else
d8d3f94a 821 complain_at (location, _("%s is invalid"), quote_n (1, text));
e9955c83 822}
4cdb01db 823
f25bfb75
AD
824
825/*-------------------------------------------------------------------.
826| Dispatch onto handle_action_at, or handle_destructor_at, depending |
827| upon CODE_KIND. |
828`-------------------------------------------------------------------*/
829
830static void
831handle_at (braced_code_t braced_code_kind,
832 char *text, location_t location)
833{
834 switch (braced_code_kind)
835 {
836 case action_braced_code:
837 handle_action_at (text, location);
838 break;
839
840 case destructor_braced_code:
366eea36
AD
841 case printer_braced_code:
842 handle_symbol_code_at (text, location);
f25bfb75
AD
843 break;
844 }
845}
846
847
d8d3f94a
PE
848/*------------------------------------------------------------------.
849| Convert universal character name UCN to a single-byte character, |
850| and return that character. Return -1 if UCN does not correspond |
851| to a single-byte character. |
852`------------------------------------------------------------------*/
853
854static int
855convert_ucn_to_byte (char const *ucn)
856{
857 unsigned long code = strtoul (ucn + 2, 0, 16);
858
859 /* FIXME: Currently we assume Unicode-compatible unibyte characters
860 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
861 non-ASCII hosts we support only the portable C character set.
862 These limitations should be removed once we add support for
863 multibyte characters. */
864
865 if (UCHAR_MAX < code)
866 return -1;
867
868#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
869 {
870 /* A non-ASCII host. Use CODE to index into a table of the C
871 basic execution character set, which is guaranteed to exist on
872 all Standard C platforms. This table also includes '$', '@',
873 and '`', which not in the basic execution character set but
874 which are unibyte characters on all the platforms that we know
875 about. */
876 static signed char const table[] =
877 {
878 '\0', -1, -1, -1, -1, -1, -1, '\a',
879 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
880 -1, -1, -1, -1, -1, -1, -1, -1,
881 -1, -1, -1, -1, -1, -1, -1, -1,
882 ' ', '!', '"', '#', '$', '%', '&', '\'',
883 '(', ')', '*', '+', ',', '-', '.', '/',
884 '0', '1', '2', '3', '4', '5', '6', '7',
885 '8', '9', ':', ';', '<', '=', '>', '?',
886 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
887 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
888 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
889 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
890 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
891 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
892 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
893 'x', 'y', 'z', '{', '|', '}', '~'
894 };
895
896 code = code < sizeof table ? table[code] : -1;
897 }
898#endif
899
900 return code;
901}
902
903
f25bfb75
AD
904/*-------------------------.
905| Initialize the scanner. |
906`-------------------------*/
907
1d6412ad
AD
908void
909scanner_initialize (void)
910{
911 obstack_init (&string_obstack);
912}
913
914
f25bfb75
AD
915/*-----------------------------------------------.
916| Free all the memory allocated to the scanner. |
917`-----------------------------------------------*/
918
4cdb01db
AD
919void
920scanner_free (void)
921{
922 obstack_free (&string_obstack, 0);
536545f3
AD
923 /* Reclaim Flex's buffers. */
924 yy_delete_buffer (YY_CURRENT_BUFFER);
4cdb01db 925}