]> git.saurik.com Git - bison.git/blame - src/scan-gram.l
* src/scan-gram.l: When it starts with `%', complain about the
[bison.git] / src / scan-gram.l
CommitLineData
e9955c83
AD
1/* Bison Grammar Scanner -*- C -*-
2 Copyright (C) 2002 Free Software Foundation, Inc.
3
4 This file is part of Bison, the GNU Compiler Compiler.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA
20*/
21
22%option debug nodefault noyywrap nounput never-interactive stack
23%option prefix="gram_" outfile="lex.yy.c"
24
25%{
26#include "system.h"
d8d3f94a 27#include "mbswidth.h"
e9955c83
AD
28#include "complain.h"
29#include "quote.h"
30#include "getargs.h"
31#include "gram.h"
32#include "reader.h"
33
34/* Each time we match a string, move the end cursor to its end. */
8efe435c
AD
35#define YY_USER_INIT \
36do { \
37 LOCATION_RESET (*yylloc); \
1a715ef2 38 yylloc->file = infile; \
8efe435c
AD
39 /* This is only to avoid GCC warnings. */ \
40 if (yycontrol) {;}; \
41} while (0)
42
d8d3f94a
PE
43#define YY_USER_ACTION extend_location (yylloc, yytext, yyleng);
44#define YY_STEP LOCATION_STEP (*yylloc)
45
46#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
47
48
49/* Read bytes from FP into buffer BUF of size SIZE. Return the
50 number of bytes read. Remove '\r' from input, treating \r\n
51 and isolated \r as \n. */
52
53static size_t
54no_cr_read (FILE *fp, char *buf, size_t size)
55{
56 size_t s = fread (buf, 1, size, fp);
57 if (s)
58 {
59 char *w = memchr (buf, '\r', s);
60 if (w)
61 {
62 char const *r = ++w;
63 char const *lim = buf + s;
64
65 for (;;)
66 {
67 /* Found an '\r'. Treat it like '\n', but ignore any
68 '\n' that immediately follows. */
69 w[-1] = '\n';
70 if (r == lim)
71 {
72 int ch = getc (fp);
73 if (ch != '\n' && ungetc (ch, fp) != ch)
74 break;
75 }
76 else if (*r == '\n')
77 r++;
78
79 /* Copy until the next '\r'. */
80 do
81 {
82 if (r == lim)
83 return w - buf;
84 }
85 while ((*w++ = *r++) != '\r');
86 }
87
88 return w - buf;
89 }
90 }
91
92 return s;
93}
94
95
96/* Extend *LOC to account for token TOKEN of size SIZE. */
97
98static void
99extend_location (location_t *loc, char const *token, int size)
100{
101 int line = loc->last_line;
102 int column = loc->last_column;
103 char const *p0 = token;
104 char const *p = token;
105 char const *lim = token + size;
106
107 for (p = token; p < lim; p++)
108 switch (*p)
109 {
110 case '\r':
111 /* \r shouldn't survive no_cr_read. */
112 abort ();
113
114 case '\n':
115 line++;
116 column = 1;
117 p0 = p + 1;
118 break;
119
120 case '\t':
121 column += mbsnwidth (p0, p - p0, 0);
122 column += 8 - ((column - 1) & 7);
123 p0 = p + 1;
124 break;
125 }
126
127 loc->last_line = line;
128 loc->last_column = column + mbsnwidth (p0, p - p0, 0);
129}
130
131
e9955c83 132
44995b2e
AD
133/* STRING_OBSTACK -- Used to store all the characters that we need to
134 keep (to construct ID, STRINGS etc.). Use the following macros to
135 use it.
136
1d6412ad
AD
137 Use YY_OBS_GROW to append what has just been matched, and
138 YY_OBS_FINISH to end the string (it puts the ending 0).
139 YY_OBS_FINISH also stores this string in LAST_STRING, which can be
140 used, and which is used by YY_OBS_FREE to free the last string. */
44995b2e
AD
141
142static struct obstack string_obstack;
143char *last_string;
144
44995b2e
AD
145#define YY_OBS_GROW \
146 obstack_grow (&string_obstack, yytext, yyleng)
147
148#define YY_OBS_FINISH \
149 do { \
150 obstack_1grow (&string_obstack, '\0'); \
151 last_string = obstack_finish (&string_obstack); \
44995b2e
AD
152 } while (0)
153
154#define YY_OBS_FREE \
155 do { \
156 obstack_free (&string_obstack, last_string); \
157 } while (0)
e9955c83 158
4cdb01db
AD
159void
160scanner_last_string_free (void)
161{
162 YY_OBS_FREE;
163}
164
165
e9955c83
AD
166static int braces_level = 0;
167static int percent_percent_count = 0;
168
efcb44dd
PE
169/* Within well-formed rules, RULE_LENGTH is the number of values in
170 the current rule so far, which says where to find `$0' with respect
171 to the top of the stack. It is not the same as the rule->length in
172 the case of mid rule actions.
173
174 Outside of well-formed rules, RULE_LENGTH has an undefined value. */
175static int rule_length;
176
d33cb3ae
PE
177static void handle_dollar (braced_code_t code_kind,
178 char *cp, location_t location);
179static void handle_at (braced_code_t code_kind,
180 char *cp, location_t location);
d8d3f94a 181static int convert_ucn_to_byte (char const *hex_text);
e9955c83
AD
182
183%}
d8d3f94a 184%x SC_COMMENT SC_LINE_COMMENT SC_YACC_COMMENT
e9955c83
AD
185%x SC_STRING SC_CHARACTER
186%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
187%x SC_BRACED_CODE SC_PROLOGUE SC_EPILOGUE
188
29c01725
AD
189letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
190id {letter}({letter}|[0-9])*
191directive %{letter}({letter}|[0-9]|-)*
192int [0-9]+
d8d3f94a
PE
193
194/* POSIX says that a tag must be both an id and a C union member, but
195 historically almost any character is allowed in a tag. We disallow
196 NUL and newline, as this simplifies our implementation. */
197tag [^\0\n>]+
198
199/* Zero or more instances of backslash-newline. Following GCC, allow
200 white space between the backslash and the newline. */
201splice (\\[ \f\t\v]*\n)*
e9955c83
AD
202
203%%
204%{
205 /* At each yylex invocation, mark the current position as the
206 start of the next token. */
e9955c83 207 YY_STEP;
e9955c83
AD
208%}
209
210
211 /*----------------------------.
212 | Scanning Bison directives. |
213 `----------------------------*/
214<INITIAL>
215{
216 "%binary" return PERCENT_NONASSOC;
217 "%debug" return PERCENT_DEBUG;
218 "%define" return PERCENT_DEFINE;
219 "%defines" return PERCENT_DEFINES;
9280d3ef 220 "%destructor" return PERCENT_DESTRUCTOR;
676385e2 221 "%dprec" return PERCENT_DPREC;
e9955c83
AD
222 "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
223 "%expect" return PERCENT_EXPECT;
224 "%file-prefix" return PERCENT_FILE_PREFIX;
225 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
ae7453f2 226 "%glr-parser" return PERCENT_GLR_PARSER;
e9955c83
AD
227 "%left" return PERCENT_LEFT;
228 "%locations" return PERCENT_LOCATIONS;
676385e2 229 "%merge" return PERCENT_MERGE;
e9955c83
AD
230 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
231 "%no"[-_]"lines" return PERCENT_NO_LINES;
232 "%nonassoc" return PERCENT_NONASSOC;
233 "%nterm" return PERCENT_NTERM;
234 "%output" return PERCENT_OUTPUT;
ae7453f2 235 "%parse-param" return PERCENT_PARSE_PARAM;
d8d3f94a 236 "%prec" rule_length--; return PERCENT_PREC;
366eea36 237 "%printer" return PERCENT_PRINTER;
e9955c83
AD
238 "%pure"[-_]"parser" return PERCENT_PURE_PARSER;
239 "%right" return PERCENT_RIGHT;
ae7453f2 240 "%lex-param" return PERCENT_LEX_PARAM;
e9955c83
AD
241 "%skeleton" return PERCENT_SKELETON;
242 "%start" return PERCENT_START;
243 "%term" return PERCENT_TOKEN;
244 "%token" return PERCENT_TOKEN;
245 "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
246 "%type" return PERCENT_TYPE;
247 "%union" return PERCENT_UNION;
248 "%verbose" return PERCENT_VERBOSE;
249 "%yacc" return PERCENT_YACC;
250
29c01725
AD
251 {directive} {
252 complain_at (*yylloc, _("invalid directive: %s"), quote (yytext));
253 YY_STEP;
254 }
255
e9955c83 256 "=" return EQUAL;
d8d3f94a
PE
257 ":" rule_length = 0; return COLON;
258 "|" rule_length = 0; return PIPE;
ae7453f2 259 "," return COMMA;
e9955c83
AD
260 ";" return SEMICOLON;
261
d8d3f94a
PE
262 [ \f\n\t\v]+ YY_STEP;
263
e9955c83 264 {id} {
39f41916 265 yylval->symbol = symbol_get (yytext, *yylloc);
efcb44dd 266 rule_length++;
e9955c83
AD
267 return ID;
268 }
269
d8d3f94a
PE
270 {int} {
271 unsigned long num;
272 errno = 0;
273 num = strtoul (yytext, 0, 10);
274 if (INT_MAX < num || errno)
275 {
c4d720cd 276 complain_at (*yylloc, _("invalid value: %s"), quote (yytext));
d8d3f94a
PE
277 num = INT_MAX;
278 }
279 yylval->integer = num;
280 return INT;
281 }
e9955c83
AD
282
283 /* Characters. We don't check there is only one. */
db2cc12f 284 "'" YY_OBS_GROW; yy_push_state (SC_ESCAPED_CHARACTER);
e9955c83
AD
285
286 /* Strings. */
db2cc12f 287 "\"" YY_OBS_GROW; yy_push_state (SC_ESCAPED_STRING);
e9955c83
AD
288
289 /* Comments. */
d8d3f94a 290 "/*" BEGIN SC_YACC_COMMENT;
e9955c83
AD
291 "//".* YY_STEP;
292
293 /* Prologue. */
1d6412ad 294 "%{" yy_push_state (SC_PROLOGUE);
e9955c83
AD
295
296 /* Code in between braces. */
1d6412ad 297 "{" YY_OBS_GROW; ++braces_level; yy_push_state (SC_BRACED_CODE);
e9955c83
AD
298
299 /* A type. */
d8d3f94a 300 "<"{tag}">" {
4cdb01db
AD
301 obstack_grow (&string_obstack, yytext + 1, yyleng - 2);
302 YY_OBS_FINISH;
303 yylval->string = last_string;
304 return TYPE;
305 }
306
e9955c83
AD
307
308 "%%" {
309 if (++percent_percent_count == 2)
310 yy_push_state (SC_EPILOGUE);
311 return PERCENT_PERCENT;
312 }
313
314 . {
c4d720cd 315 complain_at (*yylloc, _("invalid character: %s"), quote (yytext));
e9955c83
AD
316 YY_STEP;
317 }
318}
319
320
d8d3f94a
PE
321 /*-------------------------------------------------------------------.
322 | Whatever the start condition (but those which correspond to |
323 | entities `swallowed' by Bison: SC_YACC_COMMENT, SC_ESCAPED_STRING, |
324 | and SC_ESCAPED_CHARACTER), no M4 character must escape as is. |
325 `-------------------------------------------------------------------*/
e9955c83 326
d8d3f94a 327<SC_COMMENT,SC_LINE_COMMENT,SC_STRING,SC_CHARACTER,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
e9955c83 328{
d8d3f94a
PE
329 \[ obstack_sgrow (&string_obstack, "@<:@");
330 \] obstack_sgrow (&string_obstack, "@:>@");
e9955c83
AD
331}
332
333
d8d3f94a
PE
334 /*---------------------------------------------------------------.
335 | Scanning a Yacc comment. The initial `/ *' is already eaten. |
336 `---------------------------------------------------------------*/
e9955c83 337
d8d3f94a 338<SC_YACC_COMMENT>
e9955c83 339{
d8d3f94a
PE
340 "*/" {
341 YY_STEP;
342 BEGIN INITIAL;
e9955c83
AD
343 }
344
d8d3f94a
PE
345 [^*]+|"*" ;
346
347 <<EOF>> {
c4d720cd 348 complain_at (*yylloc, _("unexpected end of file in a comment"));
d8d3f94a
PE
349 BEGIN INITIAL;
350 }
351}
352
353
354 /*------------------------------------------------------------.
355 | Scanning a C comment. The initial `/ *' is already eaten. |
356 `------------------------------------------------------------*/
357
358<SC_COMMENT>
359{
360 "*"{splice}"/" YY_OBS_GROW; yy_pop_state ();
361 [^*\[\]]+|"*" YY_OBS_GROW;
e9955c83
AD
362
363 <<EOF>> {
c4d720cd 364 complain_at (*yylloc, _("unexpected end of file in a comment"));
e9955c83
AD
365 yy_pop_state ();
366 }
367}
368
369
d8d3f94a
PE
370 /*--------------------------------------------------------------.
371 | Scanning a line comment. The initial `//' is already eaten. |
372 `--------------------------------------------------------------*/
373
374<SC_LINE_COMMENT>
375{
376 "\n" YY_OBS_GROW; yy_pop_state ();
377 ([^\n\[\]]|{splice})+ YY_OBS_GROW;
378 <<EOF>> yy_pop_state ();
379}
380
381
e9955c83
AD
382 /*----------------------------------------------------------------.
383 | Scanning a C string, including its escapes. The initial `"' is |
384 | already eaten. |
385 `----------------------------------------------------------------*/
386
387<SC_ESCAPED_STRING>
388{
db2cc12f 389 "\"" {
e9955c83 390 assert (yy_top_state () == INITIAL);
44995b2e
AD
391 YY_OBS_GROW;
392 YY_OBS_FINISH;
4cdb01db 393 yylval->string = last_string;
e9955c83 394 yy_pop_state ();
efcb44dd 395 rule_length++;
e9955c83
AD
396 return STRING;
397 }
398
d8d3f94a 399 [^\"\\]+ YY_OBS_GROW;
e9955c83
AD
400
401 <<EOF>> {
c4d720cd 402 complain_at (*yylloc, _("unexpected end of file in a string"));
e9955c83 403 assert (yy_top_state () == INITIAL);
44995b2e 404 YY_OBS_FINISH;
4cdb01db 405 yylval->string = last_string;
e9955c83
AD
406 yy_pop_state ();
407 return STRING;
408 }
409}
410
411 /*---------------------------------------------------------------.
412 | Scanning a C character, decoding its escapes. The initial "'" |
413 | is already eaten. |
414 `---------------------------------------------------------------*/
415
416<SC_ESCAPED_CHARACTER>
417{
db2cc12f 418 "'" {
44995b2e 419 YY_OBS_GROW;
e9955c83
AD
420 assert (yy_top_state () == INITIAL);
421 {
44995b2e 422 YY_OBS_FINISH;
39f41916 423 yylval->symbol = symbol_get (last_string, *yylloc);
e776192e 424 symbol_class_set (yylval->symbol, token_sym, *yylloc);
e68d4575
PE
425 symbol_user_token_number_set (yylval->symbol,
426 (unsigned char) last_string[1], *yylloc);
44995b2e 427 YY_OBS_FREE;
e9955c83 428 yy_pop_state ();
efcb44dd 429 rule_length++;
e9955c83
AD
430 return ID;
431 }
432 }
433
c4d720cd 434 [^\'\\]+ YY_OBS_GROW;
e9955c83
AD
435
436 <<EOF>> {
c4d720cd 437 complain_at (*yylloc, _("unexpected end of file in a character"));
e9955c83 438 assert (yy_top_state () == INITIAL);
44995b2e 439 YY_OBS_FINISH;
4cdb01db 440 yylval->string = last_string;
e9955c83
AD
441 yy_pop_state ();
442 return CHARACTER;
443 }
444}
445
446
447 /*----------------------------.
448 | Decode escaped characters. |
449 `----------------------------*/
450
451<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
452{
d8d3f94a
PE
453 \\[0-7]{1,3} {
454 unsigned long c = strtoul (yytext + 1, 0, 8);
455 if (UCHAR_MAX < c)
e9955c83 456 {
c4d720cd 457 complain_at (*yylloc, _("invalid escape: %s"), quote (yytext));
e9955c83
AD
458 YY_STEP;
459 }
460 else
461 obstack_1grow (&string_obstack, c);
462 }
463
d8d3f94a
PE
464 \\x[0-9a-fA-F]+ {
465 unsigned long c;
466 errno = 0;
467 c = strtoul (yytext + 2, 0, 16);
468 if (UCHAR_MAX < c || errno)
469 {
c4d720cd 470 complain_at (*yylloc, _("invalid escape: %s"), quote (yytext));
d8d3f94a
PE
471 YY_STEP;
472 }
473 else
474 obstack_1grow (&string_obstack, c);
e9955c83
AD
475 }
476
477 \\a obstack_1grow (&string_obstack, '\a');
478 \\b obstack_1grow (&string_obstack, '\b');
479 \\f obstack_1grow (&string_obstack, '\f');
480 \\n obstack_1grow (&string_obstack, '\n');
481 \\r obstack_1grow (&string_obstack, '\r');
482 \\t obstack_1grow (&string_obstack, '\t');
483 \\v obstack_1grow (&string_obstack, '\v');
c4d720cd 484 \\[\"\'?\\] obstack_1grow (&string_obstack, yytext[1]);
d8d3f94a
PE
485 \\(u|U[0-9a-fA-F]{4})[0-9a-fA-F]{4} {
486 int c = convert_ucn_to_byte (yytext);
487 if (c < 0)
488 {
c4d720cd 489 complain_at (*yylloc, _("invalid escape: %s"), quote (yytext));
d8d3f94a
PE
490 YY_STEP;
491 }
492 else
493 obstack_1grow (&string_obstack, c);
494 }
4f25ebb0 495 \\(.|\n) {
c4d720cd 496 complain_at (*yylloc, _("unrecognized escape: %s"), quote (yytext));
44995b2e 497 YY_OBS_GROW;
e9955c83 498 }
4f25ebb0
AD
499 /* FLex wants this rule, in case of a `\<<EOF>>'. */
500 \\ YY_OBS_GROW;
e9955c83
AD
501}
502
503
504 /*----------------------------------------------------------.
505 | Scanning a C character without decoding its escapes. The |
506 | initial "'" is already eaten. |
507 `----------------------------------------------------------*/
508
509<SC_CHARACTER>
510{
db2cc12f 511 "'" {
44995b2e 512 YY_OBS_GROW;
e9955c83
AD
513 assert (yy_top_state () != INITIAL);
514 yy_pop_state ();
515 }
516
d8d3f94a
PE
517 [^'\[\]\\]+ YY_OBS_GROW;
518 \\{splice}[^\[\]] YY_OBS_GROW;
519 {splice} YY_OBS_GROW;
520 /* Needed for `\<<EOF>>', `\\<<newline>>[', and `\\<<newline>>]'. */
4f25ebb0 521 \\ YY_OBS_GROW;
e9955c83 522
e9955c83 523 <<EOF>> {
c4d720cd 524 complain_at (*yylloc, _("unexpected end of file in a character"));
e9955c83
AD
525 assert (yy_top_state () != INITIAL);
526 yy_pop_state ();
527 }
528}
529
530
531 /*----------------------------------------------------------------.
532 | Scanning a C string, without decoding its escapes. The initial |
533 | `"' is already eaten. |
534 `----------------------------------------------------------------*/
535
536<SC_STRING>
537{
db2cc12f 538 "\"" {
e9955c83 539 assert (yy_top_state () != INITIAL);
44995b2e 540 YY_OBS_GROW;
e9955c83
AD
541 yy_pop_state ();
542 }
543
d8d3f94a
PE
544 [^\"\[\]\\]+ YY_OBS_GROW;
545 \\{splice}[^\[\]] YY_OBS_GROW;
546 {splice} YY_OBS_GROW;
547 /* Needed for `\<<EOF>>', `\\<<newline>>[', and `\\<<newline>>]'. */
4f25ebb0 548 \\ YY_OBS_GROW;
e9955c83 549
e9955c83 550 <<EOF>> {
c4d720cd 551 complain_at (*yylloc, _("unexpected end of file in a string"));
e9955c83
AD
552 assert (yy_top_state () != INITIAL);
553 yy_pop_state ();
554 }
555}
556
557
558 /*---------------------------------------------------.
559 | Strings, comments etc. can be found in user code. |
560 `---------------------------------------------------*/
561
562<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
563{
564 /* Characters. We don't check there is only one. */
db2cc12f 565 "'" YY_OBS_GROW; yy_push_state (SC_CHARACTER);
e9955c83
AD
566
567 /* Strings. */
db2cc12f 568 "\"" YY_OBS_GROW; yy_push_state (SC_STRING);
e9955c83
AD
569
570 /* Comments. */
d8d3f94a
PE
571 "/"{splice}"*" YY_OBS_GROW; yy_push_state (SC_COMMENT);
572 "/"{splice}"/" YY_OBS_GROW; yy_push_state (SC_LINE_COMMENT);
4f25ebb0
AD
573
574 /* Not comments. */
575 "/" YY_OBS_GROW;
e9955c83
AD
576}
577
578
579 /*---------------------------------------------------------------.
580 | Scanning some code in braces (%union and actions). The initial |
581 | "{" is already eaten. |
582 `---------------------------------------------------------------*/
583
584<SC_BRACED_CODE>
585{
586 "}" {
44995b2e 587 YY_OBS_GROW;
e9955c83
AD
588 if (--braces_level == 0)
589 {
590 yy_pop_state ();
44995b2e 591 YY_OBS_FINISH;
4cdb01db 592 yylval->string = last_string;
efcb44dd 593 rule_length++;
e9955c83
AD
594 return BRACED_CODE;
595 }
596 }
597
44995b2e 598 "{" YY_OBS_GROW; braces_level++;
e9955c83 599
d8d3f94a 600 "$"("<"{tag}">")?(-?[0-9]+|"$") { handle_dollar (current_braced_code,
f25bfb75
AD
601 yytext, *yylloc); }
602 "@"(-?[0-9]+|"$") { handle_at (current_braced_code,
603 yytext, *yylloc); }
e9955c83 604
c4d720cd 605 [^$@\[\]/\'\"\{\}]+ YY_OBS_GROW;
e9955c83 606
d8d3f94a 607 /* A stray $, or /, or etc. */
44995b2e 608 . YY_OBS_GROW;
e9955c83
AD
609
610 <<EOF>> {
c4d720cd 611 complain_at (*yylloc, _("unexpected end of file in a braced code"));
e9955c83 612 yy_pop_state ();
44995b2e 613 YY_OBS_FINISH;
4cdb01db
AD
614 yylval->string = last_string;
615 return BRACED_CODE;
e9955c83
AD
616 }
617
618}
619
620
621 /*--------------------------------------------------------------.
622 | Scanning some prologue: from "%{" (already scanned) to "%}". |
623 `--------------------------------------------------------------*/
624
625<SC_PROLOGUE>
626{
627 "%}" {
628 yy_pop_state ();
44995b2e 629 YY_OBS_FINISH;
4cdb01db 630 yylval->string = last_string;
e9955c83
AD
631 return PROLOGUE;
632 }
633
c4d720cd 634 [^%\[\]/\'\"]+ YY_OBS_GROW;
4f25ebb0 635 "%" YY_OBS_GROW;
e9955c83
AD
636
637 <<EOF>> {
c4d720cd 638 complain_at (*yylloc, _("unexpected end of file in a prologue"));
e9955c83 639 yy_pop_state ();
44995b2e 640 YY_OBS_FINISH;
4cdb01db 641 yylval->string = last_string;
e9955c83
AD
642 return PROLOGUE;
643 }
e9955c83
AD
644}
645
646
647 /*---------------------------------------------------------------.
648 | Scanning the epilogue (everything after the second "%%", which |
d8d3f94a 649 | has already been eaten). |
e9955c83
AD
650 `---------------------------------------------------------------*/
651
652<SC_EPILOGUE>
653{
d8d3f94a 654 [^\[\]]+ YY_OBS_GROW;
e9955c83
AD
655
656 <<EOF>> {
657 yy_pop_state ();
44995b2e 658 YY_OBS_FINISH;
4cdb01db 659 yylval->string = last_string;
e9955c83
AD
660 return EPILOGUE;
661 }
662}
663
664
665%%
666
667/*------------------------------------------------------------------.
366eea36 668| TEXT is pointing to a wannabee semantic value (i.e., a `$'). |
e9955c83
AD
669| |
670| Possible inputs: $[<TYPENAME>]($|integer) |
671| |
672| Output to the STRING_OBSTACK a reference to this semantic value. |
673`------------------------------------------------------------------*/
674
f25bfb75 675static inline void
366eea36 676handle_action_dollar (char *text, location_t location)
e9955c83
AD
677{
678 const char *type_name = NULL;
366eea36 679 char *cp = text + 1;
e9955c83
AD
680
681 /* Get the type name if explicit. */
682 if (*cp == '<')
683 {
684 type_name = ++cp;
685 while (*cp != '>')
686 ++cp;
687 *cp = '\0';
688 ++cp;
689 }
690
691 if (*cp == '$')
692 {
693 if (!type_name)
56c47203 694 type_name = symbol_list_n_type_name_get (current_rule, location, 0);
e9955c83 695 if (!type_name && typed)
56c47203 696 complain_at (location, _("$$ of `%s' has no declared type"),
97650f4e 697 current_rule->sym->tag);
e9955c83
AD
698 if (!type_name)
699 type_name = "";
700 obstack_fgrow1 (&string_obstack,
701 "]b4_lhs_value([%s])[", type_name);
702 }
d8d3f94a 703 else
e9955c83 704 {
d8d3f94a
PE
705 long num;
706 errno = 0;
707 num = strtol (cp, 0, 10);
e9955c83 708
d8d3f94a 709 if (INT_MIN <= num && num <= rule_length && ! errno)
e9955c83 710 {
d8d3f94a 711 int n = num;
e9955c83 712 if (!type_name && n > 0)
56c47203
AD
713 type_name = symbol_list_n_type_name_get (current_rule, location,
714 n);
e9955c83 715 if (!type_name && typed)
56c47203 716 complain_at (location, _("$%d of `%s' has no declared type"),
97650f4e 717 n, current_rule->sym->tag);
e9955c83
AD
718 if (!type_name)
719 type_name = "";
720 obstack_fgrow3 (&string_obstack,
721 "]b4_rhs_value([%d], [%d], [%s])[",
722 rule_length, n, type_name);
723 }
d8d3f94a 724 else
c4d720cd 725 complain_at (location, _("invalid value: %s"), quote (text));
9280d3ef
AD
726 }
727}
728
729
366eea36 730/*---------------------------------------------------------------.
d8d3f94a 731| TEXT is expected to be $$ in some code associated to a symbol: |
366eea36
AD
732| destructor or printer. |
733`---------------------------------------------------------------*/
9280d3ef 734
f25bfb75 735static inline void
366eea36 736handle_symbol_code_dollar (char *text, location_t location)
9280d3ef 737{
366eea36 738 char *cp = text + 1;
9280d3ef 739 if (*cp == '$')
366eea36 740 obstack_sgrow (&string_obstack, "]b4_dollar_dollar[");
9280d3ef 741 else
c4d720cd 742 complain_at (location, _("invalid value: %s"), quote (text));
e9955c83
AD
743}
744
f25bfb75
AD
745
746/*-----------------------------------------------------------------.
747| Dispatch onto handle_action_dollar, or handle_destructor_dollar, |
748| depending upon CODE_KIND. |
749`-----------------------------------------------------------------*/
e9955c83
AD
750
751static void
f25bfb75
AD
752handle_dollar (braced_code_t braced_code_kind,
753 char *text, location_t location)
754{
755 switch (braced_code_kind)
756 {
757 case action_braced_code:
758 handle_action_dollar (text, location);
759 break;
760
761 case destructor_braced_code:
366eea36
AD
762 case printer_braced_code:
763 handle_symbol_code_dollar (text, location);
f25bfb75
AD
764 break;
765 }
766}
767
768
769/*------------------------------------------------------.
770| TEXT is a location token (i.e., a `@...'). Output to |
771| STRING_OBSTACK a reference to this location. |
772`------------------------------------------------------*/
773
774static inline void
775handle_action_at (char *text, location_t location)
e9955c83 776{
366eea36 777 char *cp = text + 1;
e9955c83 778 locations_flag = 1;
e9955c83 779
366eea36 780 if (*cp == '$')
e9955c83
AD
781 {
782 obstack_sgrow (&string_obstack, "]b4_lhs_location[");
783 }
d8d3f94a 784 else
e9955c83 785 {
d8d3f94a
PE
786 long num;
787 errno = 0;
788 num = strtol (cp, 0, 10);
dafdc66f 789
d8d3f94a
PE
790 if (INT_MIN <= num && num <= rule_length && ! errno)
791 {
792 int n = num;
793 obstack_fgrow2 (&string_obstack, "]b4_rhs_location([%d], [%d])[",
794 rule_length, n);
795 }
e9955c83 796 else
c4d720cd 797 complain_at (location, _("invalid value: %s"), quote (text));
f25bfb75
AD
798 }
799}
800
801
366eea36 802/*---------------------------------------------------------------.
d8d3f94a 803| TEXT is expected to be @$ in some code associated to a symbol: |
366eea36
AD
804| destructor or printer. |
805`---------------------------------------------------------------*/
f25bfb75
AD
806
807static inline void
366eea36 808handle_symbol_code_at (char *text, location_t location)
f25bfb75 809{
366eea36
AD
810 char *cp = text + 1;
811 if (*cp == '$')
812 obstack_sgrow (&string_obstack, "]b4_at_dollar[");
f25bfb75 813 else
c4d720cd 814 complain_at (location, _("invalid value: %s"), quote (text));
e9955c83 815}
4cdb01db 816
f25bfb75
AD
817
818/*-------------------------------------------------------------------.
819| Dispatch onto handle_action_at, or handle_destructor_at, depending |
820| upon CODE_KIND. |
821`-------------------------------------------------------------------*/
822
823static void
824handle_at (braced_code_t braced_code_kind,
825 char *text, location_t location)
826{
827 switch (braced_code_kind)
828 {
829 case action_braced_code:
830 handle_action_at (text, location);
831 break;
832
833 case destructor_braced_code:
366eea36
AD
834 case printer_braced_code:
835 handle_symbol_code_at (text, location);
f25bfb75
AD
836 break;
837 }
838}
839
840
d8d3f94a
PE
841/*------------------------------------------------------------------.
842| Convert universal character name UCN to a single-byte character, |
843| and return that character. Return -1 if UCN does not correspond |
844| to a single-byte character. |
845`------------------------------------------------------------------*/
846
847static int
848convert_ucn_to_byte (char const *ucn)
849{
850 unsigned long code = strtoul (ucn + 2, 0, 16);
851
852 /* FIXME: Currently we assume Unicode-compatible unibyte characters
853 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
854 non-ASCII hosts we support only the portable C character set.
855 These limitations should be removed once we add support for
856 multibyte characters. */
857
858 if (UCHAR_MAX < code)
859 return -1;
860
861#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
862 {
863 /* A non-ASCII host. Use CODE to index into a table of the C
864 basic execution character set, which is guaranteed to exist on
865 all Standard C platforms. This table also includes '$', '@',
866 and '`', which not in the basic execution character set but
867 which are unibyte characters on all the platforms that we know
868 about. */
869 static signed char const table[] =
870 {
871 '\0', -1, -1, -1, -1, -1, -1, '\a',
872 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
873 -1, -1, -1, -1, -1, -1, -1, -1,
874 -1, -1, -1, -1, -1, -1, -1, -1,
875 ' ', '!', '"', '#', '$', '%', '&', '\'',
876 '(', ')', '*', '+', ',', '-', '.', '/',
877 '0', '1', '2', '3', '4', '5', '6', '7',
878 '8', '9', ':', ';', '<', '=', '>', '?',
879 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
880 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
881 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
882 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
883 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
884 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
885 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
886 'x', 'y', 'z', '{', '|', '}', '~'
887 };
888
889 code = code < sizeof table ? table[code] : -1;
890 }
891#endif
c4d720cd 892
d8d3f94a
PE
893 return code;
894}
895
896
f25bfb75
AD
897/*-------------------------.
898| Initialize the scanner. |
899`-------------------------*/
900
1d6412ad
AD
901void
902scanner_initialize (void)
903{
904 obstack_init (&string_obstack);
905}
906
907
f25bfb75
AD
908/*-----------------------------------------------.
909| Free all the memory allocated to the scanner. |
910`-----------------------------------------------*/
911
4cdb01db
AD
912void
913scanner_free (void)
914{
915 obstack_free (&string_obstack, 0);
536545f3
AD
916 /* Reclaim Flex's buffers. */
917 yy_delete_buffer (YY_CURRENT_BUFFER);
4cdb01db 918}