]> git.saurik.com Git - bison.git/blame - src/scan-gram.l
Use more accurate diagnostics, e.g.
[bison.git] / src / scan-gram.l
CommitLineData
e9955c83
AD
1/* Bison Grammar Scanner -*- C -*-
2 Copyright (C) 2002 Free Software Foundation, Inc.
3
4 This file is part of Bison, the GNU Compiler Compiler.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA
20*/
21
22%option debug nodefault noyywrap nounput never-interactive stack
23%option prefix="gram_" outfile="lex.yy.c"
24
25%{
26#include "system.h"
d8d3f94a 27#include "mbswidth.h"
e9955c83
AD
28#include "complain.h"
29#include "quote.h"
30#include "getargs.h"
31#include "gram.h"
32#include "reader.h"
33
34/* Each time we match a string, move the end cursor to its end. */
8efe435c
AD
35#define YY_USER_INIT \
36do { \
37 LOCATION_RESET (*yylloc); \
1a715ef2 38 yylloc->file = infile; \
8efe435c
AD
39 /* This is only to avoid GCC warnings. */ \
40 if (yycontrol) {;}; \
41} while (0)
42
d8d3f94a
PE
43#define YY_USER_ACTION extend_location (yylloc, yytext, yyleng);
44#define YY_STEP LOCATION_STEP (*yylloc)
45
46#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
47
48
49/* Read bytes from FP into buffer BUF of size SIZE. Return the
50 number of bytes read. Remove '\r' from input, treating \r\n
51 and isolated \r as \n. */
52
53static size_t
54no_cr_read (FILE *fp, char *buf, size_t size)
55{
56 size_t s = fread (buf, 1, size, fp);
57 if (s)
58 {
59 char *w = memchr (buf, '\r', s);
60 if (w)
61 {
62 char const *r = ++w;
63 char const *lim = buf + s;
64
65 for (;;)
66 {
67 /* Found an '\r'. Treat it like '\n', but ignore any
68 '\n' that immediately follows. */
69 w[-1] = '\n';
70 if (r == lim)
71 {
72 int ch = getc (fp);
73 if (ch != '\n' && ungetc (ch, fp) != ch)
74 break;
75 }
76 else if (*r == '\n')
77 r++;
78
79 /* Copy until the next '\r'. */
80 do
81 {
82 if (r == lim)
83 return w - buf;
84 }
85 while ((*w++ = *r++) != '\r');
86 }
87
88 return w - buf;
89 }
90 }
91
92 return s;
93}
94
95
96/* Extend *LOC to account for token TOKEN of size SIZE. */
97
98static void
99extend_location (location_t *loc, char const *token, int size)
100{
101 int line = loc->last_line;
102 int column = loc->last_column;
103 char const *p0 = token;
104 char const *p = token;
105 char const *lim = token + size;
106
107 for (p = token; p < lim; p++)
108 switch (*p)
109 {
110 case '\r':
111 /* \r shouldn't survive no_cr_read. */
112 abort ();
113
114 case '\n':
115 line++;
116 column = 1;
117 p0 = p + 1;
118 break;
119
120 case '\t':
121 column += mbsnwidth (p0, p - p0, 0);
122 column += 8 - ((column - 1) & 7);
123 p0 = p + 1;
124 break;
125 }
126
127 loc->last_line = line;
128 loc->last_column = column + mbsnwidth (p0, p - p0, 0);
129}
130
131
e9955c83 132
44995b2e
AD
133/* STRING_OBSTACK -- Used to store all the characters that we need to
134 keep (to construct ID, STRINGS etc.). Use the following macros to
135 use it.
136
1d6412ad
AD
137 Use YY_OBS_GROW to append what has just been matched, and
138 YY_OBS_FINISH to end the string (it puts the ending 0).
139 YY_OBS_FINISH also stores this string in LAST_STRING, which can be
140 used, and which is used by YY_OBS_FREE to free the last string. */
44995b2e
AD
141
142static struct obstack string_obstack;
143char *last_string;
144
44995b2e
AD
145#define YY_OBS_GROW \
146 obstack_grow (&string_obstack, yytext, yyleng)
147
148#define YY_OBS_FINISH \
149 do { \
150 obstack_1grow (&string_obstack, '\0'); \
151 last_string = obstack_finish (&string_obstack); \
44995b2e
AD
152 } while (0)
153
154#define YY_OBS_FREE \
155 do { \
156 obstack_free (&string_obstack, last_string); \
157 } while (0)
e9955c83 158
4cdb01db
AD
159void
160scanner_last_string_free (void)
161{
162 YY_OBS_FREE;
163}
164
165
e9955c83
AD
166static int percent_percent_count = 0;
167
efcb44dd
PE
168/* Within well-formed rules, RULE_LENGTH is the number of values in
169 the current rule so far, which says where to find `$0' with respect
170 to the top of the stack. It is not the same as the rule->length in
171 the case of mid rule actions.
172
173 Outside of well-formed rules, RULE_LENGTH has an undefined value. */
174static int rule_length;
175
d33cb3ae
PE
176static void handle_dollar (braced_code_t code_kind,
177 char *cp, location_t location);
178static void handle_at (braced_code_t code_kind,
179 char *cp, location_t location);
d8d3f94a 180static int convert_ucn_to_byte (char const *hex_text);
e9955c83
AD
181
182%}
d8d3f94a 183%x SC_COMMENT SC_LINE_COMMENT SC_YACC_COMMENT
e9955c83
AD
184%x SC_STRING SC_CHARACTER
185%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
186%x SC_BRACED_CODE SC_PROLOGUE SC_EPILOGUE
187
29c01725
AD
188letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
189id {letter}({letter}|[0-9])*
190directive %{letter}({letter}|[0-9]|-)*
191int [0-9]+
d8d3f94a
PE
192
193/* POSIX says that a tag must be both an id and a C union member, but
194 historically almost any character is allowed in a tag. We disallow
195 NUL and newline, as this simplifies our implementation. */
196tag [^\0\n>]+
197
198/* Zero or more instances of backslash-newline. Following GCC, allow
199 white space between the backslash and the newline. */
200splice (\\[ \f\t\v]*\n)*
e9955c83
AD
201
202%%
203%{
1a9e39f1
PE
204 int braces_level IF_LINT (= 0);
205
e9955c83
AD
206 /* At each yylex invocation, mark the current position as the
207 start of the next token. */
e9955c83 208 YY_STEP;
e9955c83
AD
209%}
210
211
212 /*----------------------------.
213 | Scanning Bison directives. |
214 `----------------------------*/
215<INITIAL>
216{
217 "%binary" return PERCENT_NONASSOC;
218 "%debug" return PERCENT_DEBUG;
219 "%define" return PERCENT_DEFINE;
220 "%defines" return PERCENT_DEFINES;
9280d3ef 221 "%destructor" return PERCENT_DESTRUCTOR;
676385e2 222 "%dprec" return PERCENT_DPREC;
e9955c83
AD
223 "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
224 "%expect" return PERCENT_EXPECT;
225 "%file-prefix" return PERCENT_FILE_PREFIX;
226 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
ae7453f2 227 "%glr-parser" return PERCENT_GLR_PARSER;
e9955c83
AD
228 "%left" return PERCENT_LEFT;
229 "%locations" return PERCENT_LOCATIONS;
676385e2 230 "%merge" return PERCENT_MERGE;
e9955c83
AD
231 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
232 "%no"[-_]"lines" return PERCENT_NO_LINES;
233 "%nonassoc" return PERCENT_NONASSOC;
234 "%nterm" return PERCENT_NTERM;
235 "%output" return PERCENT_OUTPUT;
ae7453f2 236 "%parse-param" return PERCENT_PARSE_PARAM;
d8d3f94a 237 "%prec" rule_length--; return PERCENT_PREC;
366eea36 238 "%printer" return PERCENT_PRINTER;
e9955c83
AD
239 "%pure"[-_]"parser" return PERCENT_PURE_PARSER;
240 "%right" return PERCENT_RIGHT;
ae7453f2 241 "%lex-param" return PERCENT_LEX_PARAM;
e9955c83
AD
242 "%skeleton" return PERCENT_SKELETON;
243 "%start" return PERCENT_START;
244 "%term" return PERCENT_TOKEN;
245 "%token" return PERCENT_TOKEN;
246 "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
247 "%type" return PERCENT_TYPE;
248 "%union" return PERCENT_UNION;
249 "%verbose" return PERCENT_VERBOSE;
250 "%yacc" return PERCENT_YACC;
251
29c01725
AD
252 {directive} {
253 complain_at (*yylloc, _("invalid directive: %s"), quote (yytext));
254 YY_STEP;
255 }
256
e9955c83 257 "=" return EQUAL;
d8d3f94a
PE
258 ":" rule_length = 0; return COLON;
259 "|" rule_length = 0; return PIPE;
ae7453f2 260 "," return COMMA;
e9955c83
AD
261 ";" return SEMICOLON;
262
d8d3f94a
PE
263 [ \f\n\t\v]+ YY_STEP;
264
e9955c83 265 {id} {
39f41916 266 yylval->symbol = symbol_get (yytext, *yylloc);
efcb44dd 267 rule_length++;
e9955c83
AD
268 return ID;
269 }
270
d8d3f94a
PE
271 {int} {
272 unsigned long num;
273 errno = 0;
274 num = strtoul (yytext, 0, 10);
275 if (INT_MAX < num || errno)
276 {
98f2caaa 277 complain_at (*yylloc, _("integer out of range: %s"), quote (yytext));
d8d3f94a
PE
278 num = INT_MAX;
279 }
280 yylval->integer = num;
281 return INT;
282 }
e9955c83
AD
283
284 /* Characters. We don't check there is only one. */
db2cc12f 285 "'" YY_OBS_GROW; yy_push_state (SC_ESCAPED_CHARACTER);
e9955c83
AD
286
287 /* Strings. */
db2cc12f 288 "\"" YY_OBS_GROW; yy_push_state (SC_ESCAPED_STRING);
e9955c83
AD
289
290 /* Comments. */
d8d3f94a 291 "/*" BEGIN SC_YACC_COMMENT;
e9955c83
AD
292 "//".* YY_STEP;
293
294 /* Prologue. */
1d6412ad 295 "%{" yy_push_state (SC_PROLOGUE);
e9955c83
AD
296
297 /* Code in between braces. */
1a9e39f1 298 "{" YY_OBS_GROW; braces_level = 0; yy_push_state (SC_BRACED_CODE);
e9955c83
AD
299
300 /* A type. */
d8d3f94a 301 "<"{tag}">" {
4cdb01db
AD
302 obstack_grow (&string_obstack, yytext + 1, yyleng - 2);
303 YY_OBS_FINISH;
304 yylval->string = last_string;
305 return TYPE;
306 }
307
e9955c83
AD
308
309 "%%" {
310 if (++percent_percent_count == 2)
311 yy_push_state (SC_EPILOGUE);
312 return PERCENT_PERCENT;
313 }
314
315 . {
c4d720cd 316 complain_at (*yylloc, _("invalid character: %s"), quote (yytext));
e9955c83
AD
317 YY_STEP;
318 }
319}
320
321
d8d3f94a
PE
322 /*-------------------------------------------------------------------.
323 | Whatever the start condition (but those which correspond to |
324 | entities `swallowed' by Bison: SC_YACC_COMMENT, SC_ESCAPED_STRING, |
325 | and SC_ESCAPED_CHARACTER), no M4 character must escape as is. |
326 `-------------------------------------------------------------------*/
e9955c83 327
d8d3f94a 328<SC_COMMENT,SC_LINE_COMMENT,SC_STRING,SC_CHARACTER,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
e9955c83 329{
d8d3f94a
PE
330 \[ obstack_sgrow (&string_obstack, "@<:@");
331 \] obstack_sgrow (&string_obstack, "@:>@");
e9955c83
AD
332}
333
334
d8d3f94a
PE
335 /*---------------------------------------------------------------.
336 | Scanning a Yacc comment. The initial `/ *' is already eaten. |
337 `---------------------------------------------------------------*/
e9955c83 338
d8d3f94a 339<SC_YACC_COMMENT>
e9955c83 340{
d8d3f94a
PE
341 "*/" {
342 YY_STEP;
343 BEGIN INITIAL;
e9955c83
AD
344 }
345
d8d3f94a
PE
346 [^*]+|"*" ;
347
348 <<EOF>> {
c4d720cd 349 complain_at (*yylloc, _("unexpected end of file in a comment"));
d8d3f94a
PE
350 BEGIN INITIAL;
351 }
352}
353
354
355 /*------------------------------------------------------------.
356 | Scanning a C comment. The initial `/ *' is already eaten. |
357 `------------------------------------------------------------*/
358
359<SC_COMMENT>
360{
361 "*"{splice}"/" YY_OBS_GROW; yy_pop_state ();
362 [^*\[\]]+|"*" YY_OBS_GROW;
e9955c83
AD
363
364 <<EOF>> {
c4d720cd 365 complain_at (*yylloc, _("unexpected end of file in a comment"));
e9955c83
AD
366 yy_pop_state ();
367 }
368}
369
370
d8d3f94a
PE
371 /*--------------------------------------------------------------.
372 | Scanning a line comment. The initial `//' is already eaten. |
373 `--------------------------------------------------------------*/
374
375<SC_LINE_COMMENT>
376{
377 "\n" YY_OBS_GROW; yy_pop_state ();
378 ([^\n\[\]]|{splice})+ YY_OBS_GROW;
379 <<EOF>> yy_pop_state ();
380}
381
382
e9955c83
AD
383 /*----------------------------------------------------------------.
384 | Scanning a C string, including its escapes. The initial `"' is |
385 | already eaten. |
386 `----------------------------------------------------------------*/
387
388<SC_ESCAPED_STRING>
389{
db2cc12f 390 "\"" {
e9955c83 391 assert (yy_top_state () == INITIAL);
44995b2e
AD
392 YY_OBS_GROW;
393 YY_OBS_FINISH;
4cdb01db 394 yylval->string = last_string;
e9955c83 395 yy_pop_state ();
efcb44dd 396 rule_length++;
e9955c83
AD
397 return STRING;
398 }
399
d8d3f94a 400 [^\"\\]+ YY_OBS_GROW;
e9955c83
AD
401
402 <<EOF>> {
c4d720cd 403 complain_at (*yylloc, _("unexpected end of file in a string"));
e9955c83 404 assert (yy_top_state () == INITIAL);
44995b2e 405 YY_OBS_FINISH;
4cdb01db 406 yylval->string = last_string;
e9955c83
AD
407 yy_pop_state ();
408 return STRING;
409 }
410}
411
412 /*---------------------------------------------------------------.
413 | Scanning a C character, decoding its escapes. The initial "'" |
414 | is already eaten. |
415 `---------------------------------------------------------------*/
416
417<SC_ESCAPED_CHARACTER>
418{
db2cc12f 419 "'" {
44995b2e 420 YY_OBS_GROW;
e9955c83
AD
421 assert (yy_top_state () == INITIAL);
422 {
44995b2e 423 YY_OBS_FINISH;
39f41916 424 yylval->symbol = symbol_get (last_string, *yylloc);
e776192e 425 symbol_class_set (yylval->symbol, token_sym, *yylloc);
e68d4575
PE
426 symbol_user_token_number_set (yylval->symbol,
427 (unsigned char) last_string[1], *yylloc);
44995b2e 428 YY_OBS_FREE;
e9955c83 429 yy_pop_state ();
efcb44dd 430 rule_length++;
e9955c83
AD
431 return ID;
432 }
433 }
434
c4d720cd 435 [^\'\\]+ YY_OBS_GROW;
e9955c83
AD
436
437 <<EOF>> {
98f2caaa 438 complain_at (*yylloc, _("unexpected end of file in a character literal"));
e9955c83 439 assert (yy_top_state () == INITIAL);
44995b2e 440 YY_OBS_FINISH;
4cdb01db 441 yylval->string = last_string;
e9955c83
AD
442 yy_pop_state ();
443 return CHARACTER;
444 }
445}
446
447
448 /*----------------------------.
449 | Decode escaped characters. |
450 `----------------------------*/
451
452<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
453{
d8d3f94a
PE
454 \\[0-7]{1,3} {
455 unsigned long c = strtoul (yytext + 1, 0, 8);
456 if (UCHAR_MAX < c)
e9955c83 457 {
98f2caaa
PE
458 complain_at (*yylloc, _("invalid escape sequence: %s"),
459 quote (yytext));
e9955c83
AD
460 YY_STEP;
461 }
462 else
463 obstack_1grow (&string_obstack, c);
464 }
465
d8d3f94a
PE
466 \\x[0-9a-fA-F]+ {
467 unsigned long c;
468 errno = 0;
469 c = strtoul (yytext + 2, 0, 16);
470 if (UCHAR_MAX < c || errno)
471 {
98f2caaa
PE
472 complain_at (*yylloc, _("invalid escape sequence: %s"),
473 quote (yytext));
d8d3f94a
PE
474 YY_STEP;
475 }
476 else
477 obstack_1grow (&string_obstack, c);
e9955c83
AD
478 }
479
480 \\a obstack_1grow (&string_obstack, '\a');
481 \\b obstack_1grow (&string_obstack, '\b');
482 \\f obstack_1grow (&string_obstack, '\f');
483 \\n obstack_1grow (&string_obstack, '\n');
484 \\r obstack_1grow (&string_obstack, '\r');
485 \\t obstack_1grow (&string_obstack, '\t');
486 \\v obstack_1grow (&string_obstack, '\v');
c4d720cd 487 \\[\"\'?\\] obstack_1grow (&string_obstack, yytext[1]);
d8d3f94a
PE
488 \\(u|U[0-9a-fA-F]{4})[0-9a-fA-F]{4} {
489 int c = convert_ucn_to_byte (yytext);
490 if (c < 0)
491 {
98f2caaa
PE
492 complain_at (*yylloc, _("invalid escape sequence: %s"),
493 quote (yytext));
d8d3f94a
PE
494 YY_STEP;
495 }
496 else
497 obstack_1grow (&string_obstack, c);
498 }
4f25ebb0 499 \\(.|\n) {
98f2caaa
PE
500 complain_at (*yylloc, _("unrecognized escape sequence: %s"),
501 quote (yytext));
44995b2e 502 YY_OBS_GROW;
e9955c83 503 }
4f25ebb0
AD
504 /* FLex wants this rule, in case of a `\<<EOF>>'. */
505 \\ YY_OBS_GROW;
e9955c83
AD
506}
507
508
509 /*----------------------------------------------------------.
510 | Scanning a C character without decoding its escapes. The |
511 | initial "'" is already eaten. |
512 `----------------------------------------------------------*/
513
514<SC_CHARACTER>
515{
db2cc12f 516 "'" {
44995b2e 517 YY_OBS_GROW;
e9955c83
AD
518 assert (yy_top_state () != INITIAL);
519 yy_pop_state ();
520 }
521
d8d3f94a
PE
522 [^'\[\]\\]+ YY_OBS_GROW;
523 \\{splice}[^\[\]] YY_OBS_GROW;
524 {splice} YY_OBS_GROW;
525 /* Needed for `\<<EOF>>', `\\<<newline>>[', and `\\<<newline>>]'. */
4f25ebb0 526 \\ YY_OBS_GROW;
e9955c83 527
e9955c83 528 <<EOF>> {
98f2caaa 529 complain_at (*yylloc, _("unexpected end of file in a character literal"));
e9955c83
AD
530 assert (yy_top_state () != INITIAL);
531 yy_pop_state ();
532 }
533}
534
535
536 /*----------------------------------------------------------------.
537 | Scanning a C string, without decoding its escapes. The initial |
538 | `"' is already eaten. |
539 `----------------------------------------------------------------*/
540
541<SC_STRING>
542{
db2cc12f 543 "\"" {
e9955c83 544 assert (yy_top_state () != INITIAL);
44995b2e 545 YY_OBS_GROW;
e9955c83
AD
546 yy_pop_state ();
547 }
548
d8d3f94a
PE
549 [^\"\[\]\\]+ YY_OBS_GROW;
550 \\{splice}[^\[\]] YY_OBS_GROW;
551 {splice} YY_OBS_GROW;
552 /* Needed for `\<<EOF>>', `\\<<newline>>[', and `\\<<newline>>]'. */
4f25ebb0 553 \\ YY_OBS_GROW;
e9955c83 554
e9955c83 555 <<EOF>> {
c4d720cd 556 complain_at (*yylloc, _("unexpected end of file in a string"));
e9955c83
AD
557 assert (yy_top_state () != INITIAL);
558 yy_pop_state ();
559 }
560}
561
562
563 /*---------------------------------------------------.
564 | Strings, comments etc. can be found in user code. |
565 `---------------------------------------------------*/
566
567<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
568{
569 /* Characters. We don't check there is only one. */
db2cc12f 570 "'" YY_OBS_GROW; yy_push_state (SC_CHARACTER);
e9955c83
AD
571
572 /* Strings. */
db2cc12f 573 "\"" YY_OBS_GROW; yy_push_state (SC_STRING);
e9955c83
AD
574
575 /* Comments. */
d8d3f94a
PE
576 "/"{splice}"*" YY_OBS_GROW; yy_push_state (SC_COMMENT);
577 "/"{splice}"/" YY_OBS_GROW; yy_push_state (SC_LINE_COMMENT);
4f25ebb0
AD
578
579 /* Not comments. */
580 "/" YY_OBS_GROW;
e9955c83
AD
581}
582
583
584 /*---------------------------------------------------------------.
585 | Scanning some code in braces (%union and actions). The initial |
586 | "{" is already eaten. |
587 `---------------------------------------------------------------*/
588
589<SC_BRACED_CODE>
590{
1a9e39f1
PE
591 "{"|"<"{splice}"%" YY_OBS_GROW; braces_level++;
592 "%"{splice}">" YY_OBS_GROW; braces_level--;
e9955c83 593 "}" {
44995b2e 594 YY_OBS_GROW;
1a9e39f1
PE
595 braces_level--;
596 if (braces_level < 0)
e9955c83
AD
597 {
598 yy_pop_state ();
44995b2e 599 YY_OBS_FINISH;
4cdb01db 600 yylval->string = last_string;
efcb44dd 601 rule_length++;
e9955c83
AD
602 return BRACED_CODE;
603 }
604 }
605
d8d3f94a 606 "$"("<"{tag}">")?(-?[0-9]+|"$") { handle_dollar (current_braced_code,
f25bfb75
AD
607 yytext, *yylloc); }
608 "@"(-?[0-9]+|"$") { handle_at (current_braced_code,
609 yytext, *yylloc); }
e9955c83 610
1a9e39f1
PE
611 /* `"<"{splice}"<"' tokenizes `<<%' correctly (as `<<' `%') rather
612 than incorrrectly (as `<' `<%'). */
613 [^\"$%\'/<@\[\]\{\}]+|[$%/<@]|"<"{splice}"<" YY_OBS_GROW;
e9955c83
AD
614
615 <<EOF>> {
98f2caaa 616 complain_at (*yylloc, _("unexpected end of file in `{ ... }'"));
e9955c83 617 yy_pop_state ();
44995b2e 618 YY_OBS_FINISH;
4cdb01db
AD
619 yylval->string = last_string;
620 return BRACED_CODE;
e9955c83
AD
621 }
622
623}
624
625
626 /*--------------------------------------------------------------.
627 | Scanning some prologue: from "%{" (already scanned) to "%}". |
628 `--------------------------------------------------------------*/
629
630<SC_PROLOGUE>
631{
632 "%}" {
633 yy_pop_state ();
44995b2e 634 YY_OBS_FINISH;
4cdb01db 635 yylval->string = last_string;
e9955c83
AD
636 return PROLOGUE;
637 }
638
c4d720cd 639 [^%\[\]/\'\"]+ YY_OBS_GROW;
4f25ebb0 640 "%" YY_OBS_GROW;
e9955c83
AD
641
642 <<EOF>> {
98f2caaa 643 complain_at (*yylloc, _("unexpected end of file in `%%{ ... %%}'"));
e9955c83 644 yy_pop_state ();
44995b2e 645 YY_OBS_FINISH;
4cdb01db 646 yylval->string = last_string;
e9955c83
AD
647 return PROLOGUE;
648 }
e9955c83
AD
649}
650
651
652 /*---------------------------------------------------------------.
653 | Scanning the epilogue (everything after the second "%%", which |
d8d3f94a 654 | has already been eaten). |
e9955c83
AD
655 `---------------------------------------------------------------*/
656
657<SC_EPILOGUE>
658{
d8d3f94a 659 [^\[\]]+ YY_OBS_GROW;
e9955c83
AD
660
661 <<EOF>> {
662 yy_pop_state ();
44995b2e 663 YY_OBS_FINISH;
4cdb01db 664 yylval->string = last_string;
e9955c83
AD
665 return EPILOGUE;
666 }
667}
668
669
670%%
671
672/*------------------------------------------------------------------.
366eea36 673| TEXT is pointing to a wannabee semantic value (i.e., a `$'). |
e9955c83
AD
674| |
675| Possible inputs: $[<TYPENAME>]($|integer) |
676| |
677| Output to the STRING_OBSTACK a reference to this semantic value. |
678`------------------------------------------------------------------*/
679
f25bfb75 680static inline void
366eea36 681handle_action_dollar (char *text, location_t location)
e9955c83
AD
682{
683 const char *type_name = NULL;
366eea36 684 char *cp = text + 1;
e9955c83
AD
685
686 /* Get the type name if explicit. */
687 if (*cp == '<')
688 {
689 type_name = ++cp;
690 while (*cp != '>')
691 ++cp;
692 *cp = '\0';
693 ++cp;
694 }
695
696 if (*cp == '$')
697 {
698 if (!type_name)
56c47203 699 type_name = symbol_list_n_type_name_get (current_rule, location, 0);
e9955c83 700 if (!type_name && typed)
56c47203 701 complain_at (location, _("$$ of `%s' has no declared type"),
97650f4e 702 current_rule->sym->tag);
e9955c83
AD
703 if (!type_name)
704 type_name = "";
705 obstack_fgrow1 (&string_obstack,
706 "]b4_lhs_value([%s])[", type_name);
707 }
d8d3f94a 708 else
e9955c83 709 {
d8d3f94a
PE
710 long num;
711 errno = 0;
712 num = strtol (cp, 0, 10);
e9955c83 713
d8d3f94a 714 if (INT_MIN <= num && num <= rule_length && ! errno)
e9955c83 715 {
d8d3f94a 716 int n = num;
e9955c83 717 if (!type_name && n > 0)
56c47203
AD
718 type_name = symbol_list_n_type_name_get (current_rule, location,
719 n);
e9955c83 720 if (!type_name && typed)
56c47203 721 complain_at (location, _("$%d of `%s' has no declared type"),
97650f4e 722 n, current_rule->sym->tag);
e9955c83
AD
723 if (!type_name)
724 type_name = "";
725 obstack_fgrow3 (&string_obstack,
726 "]b4_rhs_value([%d], [%d], [%s])[",
727 rule_length, n, type_name);
728 }
d8d3f94a 729 else
98f2caaa 730 complain_at (location, _("integer out of range: %s"), quote (text));
9280d3ef
AD
731 }
732}
733
734
366eea36 735/*---------------------------------------------------------------.
d8d3f94a 736| TEXT is expected to be $$ in some code associated to a symbol: |
366eea36
AD
737| destructor or printer. |
738`---------------------------------------------------------------*/
9280d3ef 739
f25bfb75 740static inline void
366eea36 741handle_symbol_code_dollar (char *text, location_t location)
9280d3ef 742{
366eea36 743 char *cp = text + 1;
9280d3ef 744 if (*cp == '$')
366eea36 745 obstack_sgrow (&string_obstack, "]b4_dollar_dollar[");
9280d3ef 746 else
c4d720cd 747 complain_at (location, _("invalid value: %s"), quote (text));
e9955c83
AD
748}
749
f25bfb75
AD
750
751/*-----------------------------------------------------------------.
752| Dispatch onto handle_action_dollar, or handle_destructor_dollar, |
753| depending upon CODE_KIND. |
754`-----------------------------------------------------------------*/
e9955c83
AD
755
756static void
f25bfb75
AD
757handle_dollar (braced_code_t braced_code_kind,
758 char *text, location_t location)
759{
760 switch (braced_code_kind)
761 {
762 case action_braced_code:
763 handle_action_dollar (text, location);
764 break;
765
766 case destructor_braced_code:
366eea36
AD
767 case printer_braced_code:
768 handle_symbol_code_dollar (text, location);
f25bfb75
AD
769 break;
770 }
771}
772
773
774/*------------------------------------------------------.
775| TEXT is a location token (i.e., a `@...'). Output to |
776| STRING_OBSTACK a reference to this location. |
777`------------------------------------------------------*/
778
779static inline void
780handle_action_at (char *text, location_t location)
e9955c83 781{
366eea36 782 char *cp = text + 1;
e9955c83 783 locations_flag = 1;
e9955c83 784
366eea36 785 if (*cp == '$')
e9955c83
AD
786 {
787 obstack_sgrow (&string_obstack, "]b4_lhs_location[");
788 }
d8d3f94a 789 else
e9955c83 790 {
d8d3f94a
PE
791 long num;
792 errno = 0;
793 num = strtol (cp, 0, 10);
dafdc66f 794
d8d3f94a
PE
795 if (INT_MIN <= num && num <= rule_length && ! errno)
796 {
797 int n = num;
798 obstack_fgrow2 (&string_obstack, "]b4_rhs_location([%d], [%d])[",
799 rule_length, n);
800 }
e9955c83 801 else
98f2caaa 802 complain_at (location, _("integer out of range: %s"), quote (text));
f25bfb75
AD
803 }
804}
805
806
366eea36 807/*---------------------------------------------------------------.
d8d3f94a 808| TEXT is expected to be @$ in some code associated to a symbol: |
366eea36
AD
809| destructor or printer. |
810`---------------------------------------------------------------*/
f25bfb75
AD
811
812static inline void
366eea36 813handle_symbol_code_at (char *text, location_t location)
f25bfb75 814{
366eea36
AD
815 char *cp = text + 1;
816 if (*cp == '$')
817 obstack_sgrow (&string_obstack, "]b4_at_dollar[");
f25bfb75 818 else
c4d720cd 819 complain_at (location, _("invalid value: %s"), quote (text));
e9955c83 820}
4cdb01db 821
f25bfb75
AD
822
823/*-------------------------------------------------------------------.
824| Dispatch onto handle_action_at, or handle_destructor_at, depending |
825| upon CODE_KIND. |
826`-------------------------------------------------------------------*/
827
828static void
829handle_at (braced_code_t braced_code_kind,
830 char *text, location_t location)
831{
832 switch (braced_code_kind)
833 {
834 case action_braced_code:
835 handle_action_at (text, location);
836 break;
837
838 case destructor_braced_code:
366eea36
AD
839 case printer_braced_code:
840 handle_symbol_code_at (text, location);
f25bfb75
AD
841 break;
842 }
843}
844
845
d8d3f94a
PE
846/*------------------------------------------------------------------.
847| Convert universal character name UCN to a single-byte character, |
848| and return that character. Return -1 if UCN does not correspond |
849| to a single-byte character. |
850`------------------------------------------------------------------*/
851
852static int
853convert_ucn_to_byte (char const *ucn)
854{
855 unsigned long code = strtoul (ucn + 2, 0, 16);
856
857 /* FIXME: Currently we assume Unicode-compatible unibyte characters
858 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
859 non-ASCII hosts we support only the portable C character set.
860 These limitations should be removed once we add support for
861 multibyte characters. */
862
863 if (UCHAR_MAX < code)
864 return -1;
865
866#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
867 {
868 /* A non-ASCII host. Use CODE to index into a table of the C
869 basic execution character set, which is guaranteed to exist on
870 all Standard C platforms. This table also includes '$', '@',
871 and '`', which not in the basic execution character set but
872 which are unibyte characters on all the platforms that we know
873 about. */
874 static signed char const table[] =
875 {
876 '\0', -1, -1, -1, -1, -1, -1, '\a',
877 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
878 -1, -1, -1, -1, -1, -1, -1, -1,
879 -1, -1, -1, -1, -1, -1, -1, -1,
880 ' ', '!', '"', '#', '$', '%', '&', '\'',
881 '(', ')', '*', '+', ',', '-', '.', '/',
882 '0', '1', '2', '3', '4', '5', '6', '7',
883 '8', '9', ':', ';', '<', '=', '>', '?',
884 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
885 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
886 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
887 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
888 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
889 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
890 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
891 'x', 'y', 'z', '{', '|', '}', '~'
892 };
893
894 code = code < sizeof table ? table[code] : -1;
895 }
896#endif
c4d720cd 897
d8d3f94a
PE
898 return code;
899}
900
901
f25bfb75
AD
902/*-------------------------.
903| Initialize the scanner. |
904`-------------------------*/
905
1d6412ad
AD
906void
907scanner_initialize (void)
908{
909 obstack_init (&string_obstack);
910}
911
912
f25bfb75
AD
913/*-----------------------------------------------.
914| Free all the memory allocated to the scanner. |
915`-----------------------------------------------*/
916
4cdb01db
AD
917void
918scanner_free (void)
919{
920 obstack_free (&string_obstack, 0);
536545f3
AD
921 /* Reclaim Flex's buffers. */
922 yy_delete_buffer (YY_CURRENT_BUFFER);
4cdb01db 923}