]> git.saurik.com Git - bison.git/blame - src/scan-gram.l
* tests/regression.at (Invalid inputs): Adjust to the recent
[bison.git] / src / scan-gram.l
CommitLineData
e9955c83
AD
1/* Bison Grammar Scanner -*- C -*-
2 Copyright (C) 2002 Free Software Foundation, Inc.
3
4 This file is part of Bison, the GNU Compiler Compiler.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA
20*/
21
22%option debug nodefault noyywrap nounput never-interactive stack
23%option prefix="gram_" outfile="lex.yy.c"
24
25%{
26#include "system.h"
d8d3f94a 27#include "mbswidth.h"
e9955c83
AD
28#include "complain.h"
29#include "quote.h"
30#include "getargs.h"
31#include "gram.h"
32#include "reader.h"
33
34/* Each time we match a string, move the end cursor to its end. */
8efe435c
AD
35#define YY_USER_INIT \
36do { \
37 LOCATION_RESET (*yylloc); \
1a715ef2 38 yylloc->file = infile; \
8efe435c
AD
39 /* This is only to avoid GCC warnings. */ \
40 if (yycontrol) {;}; \
41} while (0)
42
d8d3f94a
PE
43#define YY_USER_ACTION extend_location (yylloc, yytext, yyleng);
44#define YY_STEP LOCATION_STEP (*yylloc)
45
46#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
47
48
49/* Read bytes from FP into buffer BUF of size SIZE. Return the
50 number of bytes read. Remove '\r' from input, treating \r\n
51 and isolated \r as \n. */
52
53static size_t
54no_cr_read (FILE *fp, char *buf, size_t size)
55{
56 size_t s = fread (buf, 1, size, fp);
57 if (s)
58 {
59 char *w = memchr (buf, '\r', s);
60 if (w)
61 {
62 char const *r = ++w;
63 char const *lim = buf + s;
64
65 for (;;)
66 {
67 /* Found an '\r'. Treat it like '\n', but ignore any
68 '\n' that immediately follows. */
69 w[-1] = '\n';
70 if (r == lim)
71 {
72 int ch = getc (fp);
73 if (ch != '\n' && ungetc (ch, fp) != ch)
74 break;
75 }
76 else if (*r == '\n')
77 r++;
78
79 /* Copy until the next '\r'. */
80 do
81 {
82 if (r == lim)
83 return w - buf;
84 }
85 while ((*w++ = *r++) != '\r');
86 }
87
88 return w - buf;
89 }
90 }
91
92 return s;
93}
94
95
96/* Extend *LOC to account for token TOKEN of size SIZE. */
97
98static void
99extend_location (location_t *loc, char const *token, int size)
100{
101 int line = loc->last_line;
102 int column = loc->last_column;
103 char const *p0 = token;
104 char const *p = token;
105 char const *lim = token + size;
106
107 for (p = token; p < lim; p++)
108 switch (*p)
109 {
110 case '\r':
111 /* \r shouldn't survive no_cr_read. */
112 abort ();
113
114 case '\n':
115 line++;
116 column = 1;
117 p0 = p + 1;
118 break;
119
120 case '\t':
121 column += mbsnwidth (p0, p - p0, 0);
122 column += 8 - ((column - 1) & 7);
123 p0 = p + 1;
124 break;
125 }
126
127 loc->last_line = line;
128 loc->last_column = column + mbsnwidth (p0, p - p0, 0);
129}
130
131
e9955c83 132
44995b2e
AD
133/* STRING_OBSTACK -- Used to store all the characters that we need to
134 keep (to construct ID, STRINGS etc.). Use the following macros to
135 use it.
136
1d6412ad
AD
137 Use YY_OBS_GROW to append what has just been matched, and
138 YY_OBS_FINISH to end the string (it puts the ending 0).
139 YY_OBS_FINISH also stores this string in LAST_STRING, which can be
140 used, and which is used by YY_OBS_FREE to free the last string. */
44995b2e
AD
141
142static struct obstack string_obstack;
143char *last_string;
144
44995b2e
AD
145#define YY_OBS_GROW \
146 obstack_grow (&string_obstack, yytext, yyleng)
147
148#define YY_OBS_FINISH \
149 do { \
150 obstack_1grow (&string_obstack, '\0'); \
151 last_string = obstack_finish (&string_obstack); \
44995b2e
AD
152 } while (0)
153
154#define YY_OBS_FREE \
155 do { \
156 obstack_free (&string_obstack, last_string); \
157 } while (0)
e9955c83 158
4cdb01db
AD
159void
160scanner_last_string_free (void)
161{
162 YY_OBS_FREE;
163}
164
165
e9955c83
AD
166static int percent_percent_count = 0;
167
efcb44dd
PE
168/* Within well-formed rules, RULE_LENGTH is the number of values in
169 the current rule so far, which says where to find `$0' with respect
170 to the top of the stack. It is not the same as the rule->length in
171 the case of mid rule actions.
172
173 Outside of well-formed rules, RULE_LENGTH has an undefined value. */
174static int rule_length;
175
d33cb3ae
PE
176static void handle_dollar (braced_code_t code_kind,
177 char *cp, location_t location);
178static void handle_at (braced_code_t code_kind,
179 char *cp, location_t location);
900c5db5 180static void handle_syncline (char *args, location_t *location);
d8d3f94a 181static int convert_ucn_to_byte (char const *hex_text);
e9955c83
AD
182
183%}
d8d3f94a 184%x SC_COMMENT SC_LINE_COMMENT SC_YACC_COMMENT
e9955c83
AD
185%x SC_STRING SC_CHARACTER
186%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
187%x SC_BRACED_CODE SC_PROLOGUE SC_EPILOGUE
188
29c01725
AD
189letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
190id {letter}({letter}|[0-9])*
191directive %{letter}({letter}|[0-9]|-)*
192int [0-9]+
d8d3f94a
PE
193
194/* POSIX says that a tag must be both an id and a C union member, but
195 historically almost any character is allowed in a tag. We disallow
196 NUL and newline, as this simplifies our implementation. */
197tag [^\0\n>]+
198
199/* Zero or more instances of backslash-newline. Following GCC, allow
200 white space between the backslash and the newline. */
201splice (\\[ \f\t\v]*\n)*
e9955c83
AD
202
203%%
204%{
1a9e39f1
PE
205 int braces_level IF_LINT (= 0);
206
e9955c83
AD
207 /* At each yylex invocation, mark the current position as the
208 start of the next token. */
e9955c83 209 YY_STEP;
e9955c83
AD
210%}
211
212
213 /*----------------------------.
214 | Scanning Bison directives. |
215 `----------------------------*/
216<INITIAL>
217{
218 "%binary" return PERCENT_NONASSOC;
219 "%debug" return PERCENT_DEBUG;
220 "%define" return PERCENT_DEFINE;
221 "%defines" return PERCENT_DEFINES;
9280d3ef 222 "%destructor" return PERCENT_DESTRUCTOR;
676385e2 223 "%dprec" return PERCENT_DPREC;
e9955c83
AD
224 "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
225 "%expect" return PERCENT_EXPECT;
226 "%file-prefix" return PERCENT_FILE_PREFIX;
227 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
ae7453f2 228 "%glr-parser" return PERCENT_GLR_PARSER;
e9955c83
AD
229 "%left" return PERCENT_LEFT;
230 "%locations" return PERCENT_LOCATIONS;
676385e2 231 "%merge" return PERCENT_MERGE;
e9955c83
AD
232 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
233 "%no"[-_]"lines" return PERCENT_NO_LINES;
234 "%nonassoc" return PERCENT_NONASSOC;
235 "%nterm" return PERCENT_NTERM;
236 "%output" return PERCENT_OUTPUT;
ae7453f2 237 "%parse-param" return PERCENT_PARSE_PARAM;
d8d3f94a 238 "%prec" rule_length--; return PERCENT_PREC;
366eea36 239 "%printer" return PERCENT_PRINTER;
e9955c83
AD
240 "%pure"[-_]"parser" return PERCENT_PURE_PARSER;
241 "%right" return PERCENT_RIGHT;
ae7453f2 242 "%lex-param" return PERCENT_LEX_PARAM;
e9955c83
AD
243 "%skeleton" return PERCENT_SKELETON;
244 "%start" return PERCENT_START;
245 "%term" return PERCENT_TOKEN;
246 "%token" return PERCENT_TOKEN;
247 "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
248 "%type" return PERCENT_TYPE;
249 "%union" return PERCENT_UNION;
250 "%verbose" return PERCENT_VERBOSE;
251 "%yacc" return PERCENT_YACC;
252
29c01725
AD
253 {directive} {
254 complain_at (*yylloc, _("invalid directive: %s"), quote (yytext));
255 YY_STEP;
256 }
257
900c5db5
AD
258 ^"#line "{int}" \""[^\"]*"\"\n" handle_syncline (yytext + strlen ("#line "), yylloc); YY_STEP;
259
e9955c83 260 "=" return EQUAL;
d8d3f94a
PE
261 ":" rule_length = 0; return COLON;
262 "|" rule_length = 0; return PIPE;
ae7453f2 263 "," return COMMA;
e9955c83
AD
264 ";" return SEMICOLON;
265
d8d3f94a
PE
266 [ \f\n\t\v]+ YY_STEP;
267
e9955c83 268 {id} {
39f41916 269 yylval->symbol = symbol_get (yytext, *yylloc);
efcb44dd 270 rule_length++;
e9955c83
AD
271 return ID;
272 }
273
d8d3f94a
PE
274 {int} {
275 unsigned long num;
276 errno = 0;
277 num = strtoul (yytext, 0, 10);
278 if (INT_MAX < num || errno)
279 {
98f2caaa 280 complain_at (*yylloc, _("integer out of range: %s"), quote (yytext));
d8d3f94a
PE
281 num = INT_MAX;
282 }
283 yylval->integer = num;
284 return INT;
285 }
e9955c83
AD
286
287 /* Characters. We don't check there is only one. */
db2cc12f 288 "'" YY_OBS_GROW; yy_push_state (SC_ESCAPED_CHARACTER);
e9955c83
AD
289
290 /* Strings. */
db2cc12f 291 "\"" YY_OBS_GROW; yy_push_state (SC_ESCAPED_STRING);
e9955c83
AD
292
293 /* Comments. */
d8d3f94a 294 "/*" BEGIN SC_YACC_COMMENT;
e9955c83
AD
295 "//".* YY_STEP;
296
297 /* Prologue. */
1d6412ad 298 "%{" yy_push_state (SC_PROLOGUE);
e9955c83
AD
299
300 /* Code in between braces. */
1a9e39f1 301 "{" YY_OBS_GROW; braces_level = 0; yy_push_state (SC_BRACED_CODE);
e9955c83
AD
302
303 /* A type. */
d8d3f94a 304 "<"{tag}">" {
4cdb01db
AD
305 obstack_grow (&string_obstack, yytext + 1, yyleng - 2);
306 YY_OBS_FINISH;
307 yylval->string = last_string;
308 return TYPE;
309 }
310
e9955c83
AD
311
312 "%%" {
313 if (++percent_percent_count == 2)
314 yy_push_state (SC_EPILOGUE);
315 return PERCENT_PERCENT;
316 }
317
318 . {
c4d720cd 319 complain_at (*yylloc, _("invalid character: %s"), quote (yytext));
e9955c83
AD
320 YY_STEP;
321 }
322}
323
324
d8d3f94a
PE
325 /*-------------------------------------------------------------------.
326 | Whatever the start condition (but those which correspond to |
327 | entities `swallowed' by Bison: SC_YACC_COMMENT, SC_ESCAPED_STRING, |
328 | and SC_ESCAPED_CHARACTER), no M4 character must escape as is. |
329 `-------------------------------------------------------------------*/
e9955c83 330
d8d3f94a 331<SC_COMMENT,SC_LINE_COMMENT,SC_STRING,SC_CHARACTER,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
e9955c83 332{
d8d3f94a
PE
333 \[ obstack_sgrow (&string_obstack, "@<:@");
334 \] obstack_sgrow (&string_obstack, "@:>@");
e9955c83
AD
335}
336
337
d8d3f94a
PE
338 /*---------------------------------------------------------------.
339 | Scanning a Yacc comment. The initial `/ *' is already eaten. |
340 `---------------------------------------------------------------*/
e9955c83 341
d8d3f94a 342<SC_YACC_COMMENT>
e9955c83 343{
d8d3f94a
PE
344 "*/" {
345 YY_STEP;
346 BEGIN INITIAL;
e9955c83
AD
347 }
348
d8d3f94a
PE
349 [^*]+|"*" ;
350
351 <<EOF>> {
c4d720cd 352 complain_at (*yylloc, _("unexpected end of file in a comment"));
d8d3f94a
PE
353 BEGIN INITIAL;
354 }
355}
356
357
358 /*------------------------------------------------------------.
359 | Scanning a C comment. The initial `/ *' is already eaten. |
360 `------------------------------------------------------------*/
361
362<SC_COMMENT>
363{
364 "*"{splice}"/" YY_OBS_GROW; yy_pop_state ();
365 [^*\[\]]+|"*" YY_OBS_GROW;
e9955c83
AD
366
367 <<EOF>> {
c4d720cd 368 complain_at (*yylloc, _("unexpected end of file in a comment"));
e9955c83
AD
369 yy_pop_state ();
370 }
371}
372
373
d8d3f94a
PE
374 /*--------------------------------------------------------------.
375 | Scanning a line comment. The initial `//' is already eaten. |
376 `--------------------------------------------------------------*/
377
378<SC_LINE_COMMENT>
379{
380 "\n" YY_OBS_GROW; yy_pop_state ();
381 ([^\n\[\]]|{splice})+ YY_OBS_GROW;
382 <<EOF>> yy_pop_state ();
383}
384
385
e9955c83
AD
386 /*----------------------------------------------------------------.
387 | Scanning a C string, including its escapes. The initial `"' is |
388 | already eaten. |
389 `----------------------------------------------------------------*/
390
391<SC_ESCAPED_STRING>
392{
db2cc12f 393 "\"" {
e9955c83 394 assert (yy_top_state () == INITIAL);
44995b2e
AD
395 YY_OBS_GROW;
396 YY_OBS_FINISH;
4cdb01db 397 yylval->string = last_string;
e9955c83 398 yy_pop_state ();
efcb44dd 399 rule_length++;
e9955c83
AD
400 return STRING;
401 }
402
d8d3f94a 403 [^\"\\]+ YY_OBS_GROW;
e9955c83
AD
404
405 <<EOF>> {
c4d720cd 406 complain_at (*yylloc, _("unexpected end of file in a string"));
e9955c83 407 assert (yy_top_state () == INITIAL);
44995b2e 408 YY_OBS_FINISH;
4cdb01db 409 yylval->string = last_string;
e9955c83
AD
410 yy_pop_state ();
411 return STRING;
412 }
413}
414
415 /*---------------------------------------------------------------.
416 | Scanning a C character, decoding its escapes. The initial "'" |
417 | is already eaten. |
418 `---------------------------------------------------------------*/
419
420<SC_ESCAPED_CHARACTER>
421{
db2cc12f 422 "'" {
44995b2e 423 YY_OBS_GROW;
e9955c83
AD
424 assert (yy_top_state () == INITIAL);
425 {
44995b2e 426 YY_OBS_FINISH;
39f41916 427 yylval->symbol = symbol_get (last_string, *yylloc);
e776192e 428 symbol_class_set (yylval->symbol, token_sym, *yylloc);
e68d4575
PE
429 symbol_user_token_number_set (yylval->symbol,
430 (unsigned char) last_string[1], *yylloc);
44995b2e 431 YY_OBS_FREE;
e9955c83 432 yy_pop_state ();
efcb44dd 433 rule_length++;
e9955c83
AD
434 return ID;
435 }
436 }
437
c4d720cd 438 [^\'\\]+ YY_OBS_GROW;
e9955c83
AD
439
440 <<EOF>> {
98f2caaa 441 complain_at (*yylloc, _("unexpected end of file in a character literal"));
e9955c83 442 assert (yy_top_state () == INITIAL);
44995b2e 443 YY_OBS_FINISH;
4cdb01db 444 yylval->string = last_string;
e9955c83
AD
445 yy_pop_state ();
446 return CHARACTER;
447 }
448}
449
450
451 /*----------------------------.
452 | Decode escaped characters. |
453 `----------------------------*/
454
455<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
456{
d8d3f94a
PE
457 \\[0-7]{1,3} {
458 unsigned long c = strtoul (yytext + 1, 0, 8);
459 if (UCHAR_MAX < c)
e9955c83 460 {
98f2caaa
PE
461 complain_at (*yylloc, _("invalid escape sequence: %s"),
462 quote (yytext));
e9955c83
AD
463 YY_STEP;
464 }
465 else
466 obstack_1grow (&string_obstack, c);
467 }
468
d8d3f94a
PE
469 \\x[0-9a-fA-F]+ {
470 unsigned long c;
471 errno = 0;
472 c = strtoul (yytext + 2, 0, 16);
473 if (UCHAR_MAX < c || errno)
474 {
98f2caaa
PE
475 complain_at (*yylloc, _("invalid escape sequence: %s"),
476 quote (yytext));
d8d3f94a
PE
477 YY_STEP;
478 }
479 else
480 obstack_1grow (&string_obstack, c);
e9955c83
AD
481 }
482
483 \\a obstack_1grow (&string_obstack, '\a');
484 \\b obstack_1grow (&string_obstack, '\b');
485 \\f obstack_1grow (&string_obstack, '\f');
486 \\n obstack_1grow (&string_obstack, '\n');
487 \\r obstack_1grow (&string_obstack, '\r');
488 \\t obstack_1grow (&string_obstack, '\t');
489 \\v obstack_1grow (&string_obstack, '\v');
c4d720cd 490 \\[\"\'?\\] obstack_1grow (&string_obstack, yytext[1]);
d8d3f94a
PE
491 \\(u|U[0-9a-fA-F]{4})[0-9a-fA-F]{4} {
492 int c = convert_ucn_to_byte (yytext);
493 if (c < 0)
494 {
98f2caaa
PE
495 complain_at (*yylloc, _("invalid escape sequence: %s"),
496 quote (yytext));
d8d3f94a
PE
497 YY_STEP;
498 }
499 else
500 obstack_1grow (&string_obstack, c);
501 }
4f25ebb0 502 \\(.|\n) {
98f2caaa
PE
503 complain_at (*yylloc, _("unrecognized escape sequence: %s"),
504 quote (yytext));
44995b2e 505 YY_OBS_GROW;
e9955c83 506 }
4f25ebb0
AD
507 /* FLex wants this rule, in case of a `\<<EOF>>'. */
508 \\ YY_OBS_GROW;
e9955c83
AD
509}
510
511
512 /*----------------------------------------------------------.
513 | Scanning a C character without decoding its escapes. The |
514 | initial "'" is already eaten. |
515 `----------------------------------------------------------*/
516
517<SC_CHARACTER>
518{
db2cc12f 519 "'" {
44995b2e 520 YY_OBS_GROW;
e9955c83
AD
521 assert (yy_top_state () != INITIAL);
522 yy_pop_state ();
523 }
524
d8d3f94a
PE
525 [^'\[\]\\]+ YY_OBS_GROW;
526 \\{splice}[^\[\]] YY_OBS_GROW;
527 {splice} YY_OBS_GROW;
528 /* Needed for `\<<EOF>>', `\\<<newline>>[', and `\\<<newline>>]'. */
4f25ebb0 529 \\ YY_OBS_GROW;
e9955c83 530
e9955c83 531 <<EOF>> {
98f2caaa 532 complain_at (*yylloc, _("unexpected end of file in a character literal"));
e9955c83
AD
533 assert (yy_top_state () != INITIAL);
534 yy_pop_state ();
535 }
536}
537
538
539 /*----------------------------------------------------------------.
540 | Scanning a C string, without decoding its escapes. The initial |
541 | `"' is already eaten. |
542 `----------------------------------------------------------------*/
543
544<SC_STRING>
545{
db2cc12f 546 "\"" {
e9955c83 547 assert (yy_top_state () != INITIAL);
44995b2e 548 YY_OBS_GROW;
e9955c83
AD
549 yy_pop_state ();
550 }
551
d8d3f94a
PE
552 [^\"\[\]\\]+ YY_OBS_GROW;
553 \\{splice}[^\[\]] YY_OBS_GROW;
554 {splice} YY_OBS_GROW;
555 /* Needed for `\<<EOF>>', `\\<<newline>>[', and `\\<<newline>>]'. */
4f25ebb0 556 \\ YY_OBS_GROW;
e9955c83 557
e9955c83 558 <<EOF>> {
c4d720cd 559 complain_at (*yylloc, _("unexpected end of file in a string"));
e9955c83
AD
560 assert (yy_top_state () != INITIAL);
561 yy_pop_state ();
562 }
563}
564
565
566 /*---------------------------------------------------.
567 | Strings, comments etc. can be found in user code. |
568 `---------------------------------------------------*/
569
570<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
571{
572 /* Characters. We don't check there is only one. */
db2cc12f 573 "'" YY_OBS_GROW; yy_push_state (SC_CHARACTER);
e9955c83
AD
574
575 /* Strings. */
db2cc12f 576 "\"" YY_OBS_GROW; yy_push_state (SC_STRING);
e9955c83
AD
577
578 /* Comments. */
d8d3f94a
PE
579 "/"{splice}"*" YY_OBS_GROW; yy_push_state (SC_COMMENT);
580 "/"{splice}"/" YY_OBS_GROW; yy_push_state (SC_LINE_COMMENT);
4f25ebb0
AD
581
582 /* Not comments. */
583 "/" YY_OBS_GROW;
e9955c83
AD
584}
585
586
587 /*---------------------------------------------------------------.
588 | Scanning some code in braces (%union and actions). The initial |
589 | "{" is already eaten. |
590 `---------------------------------------------------------------*/
591
592<SC_BRACED_CODE>
593{
1a9e39f1
PE
594 "{"|"<"{splice}"%" YY_OBS_GROW; braces_level++;
595 "%"{splice}">" YY_OBS_GROW; braces_level--;
e9955c83 596 "}" {
44995b2e 597 YY_OBS_GROW;
1a9e39f1
PE
598 braces_level--;
599 if (braces_level < 0)
e9955c83
AD
600 {
601 yy_pop_state ();
44995b2e 602 YY_OBS_FINISH;
4cdb01db 603 yylval->string = last_string;
efcb44dd 604 rule_length++;
e9955c83
AD
605 return BRACED_CODE;
606 }
607 }
608
d8d3f94a 609 "$"("<"{tag}">")?(-?[0-9]+|"$") { handle_dollar (current_braced_code,
f25bfb75
AD
610 yytext, *yylloc); }
611 "@"(-?[0-9]+|"$") { handle_at (current_braced_code,
612 yytext, *yylloc); }
e9955c83 613
1a9e39f1
PE
614 /* `"<"{splice}"<"' tokenizes `<<%' correctly (as `<<' `%') rather
615 than incorrrectly (as `<' `<%'). */
616 [^\"$%\'/<@\[\]\{\}]+|[$%/<@]|"<"{splice}"<" YY_OBS_GROW;
e9955c83
AD
617
618 <<EOF>> {
98f2caaa 619 complain_at (*yylloc, _("unexpected end of file in `{ ... }'"));
e9955c83 620 yy_pop_state ();
44995b2e 621 YY_OBS_FINISH;
4cdb01db
AD
622 yylval->string = last_string;
623 return BRACED_CODE;
e9955c83
AD
624 }
625
626}
627
628
629 /*--------------------------------------------------------------.
630 | Scanning some prologue: from "%{" (already scanned) to "%}". |
631 `--------------------------------------------------------------*/
632
633<SC_PROLOGUE>
634{
635 "%}" {
636 yy_pop_state ();
44995b2e 637 YY_OBS_FINISH;
4cdb01db 638 yylval->string = last_string;
e9955c83
AD
639 return PROLOGUE;
640 }
641
c4d720cd 642 [^%\[\]/\'\"]+ YY_OBS_GROW;
4f25ebb0 643 "%" YY_OBS_GROW;
e9955c83
AD
644
645 <<EOF>> {
98f2caaa 646 complain_at (*yylloc, _("unexpected end of file in `%%{ ... %%}'"));
e9955c83 647 yy_pop_state ();
44995b2e 648 YY_OBS_FINISH;
4cdb01db 649 yylval->string = last_string;
e9955c83
AD
650 return PROLOGUE;
651 }
e9955c83
AD
652}
653
654
655 /*---------------------------------------------------------------.
656 | Scanning the epilogue (everything after the second "%%", which |
d8d3f94a 657 | has already been eaten). |
e9955c83
AD
658 `---------------------------------------------------------------*/
659
660<SC_EPILOGUE>
661{
d8d3f94a 662 [^\[\]]+ YY_OBS_GROW;
e9955c83
AD
663
664 <<EOF>> {
665 yy_pop_state ();
44995b2e 666 YY_OBS_FINISH;
4cdb01db 667 yylval->string = last_string;
e9955c83
AD
668 return EPILOGUE;
669 }
670}
671
672
673%%
674
675/*------------------------------------------------------------------.
366eea36 676| TEXT is pointing to a wannabee semantic value (i.e., a `$'). |
e9955c83
AD
677| |
678| Possible inputs: $[<TYPENAME>]($|integer) |
679| |
680| Output to the STRING_OBSTACK a reference to this semantic value. |
681`------------------------------------------------------------------*/
682
f25bfb75 683static inline void
366eea36 684handle_action_dollar (char *text, location_t location)
e9955c83
AD
685{
686 const char *type_name = NULL;
366eea36 687 char *cp = text + 1;
e9955c83
AD
688
689 /* Get the type name if explicit. */
690 if (*cp == '<')
691 {
692 type_name = ++cp;
693 while (*cp != '>')
694 ++cp;
695 *cp = '\0';
696 ++cp;
697 }
698
699 if (*cp == '$')
700 {
701 if (!type_name)
56c47203 702 type_name = symbol_list_n_type_name_get (current_rule, location, 0);
e9955c83 703 if (!type_name && typed)
56c47203 704 complain_at (location, _("$$ of `%s' has no declared type"),
97650f4e 705 current_rule->sym->tag);
e9955c83
AD
706 if (!type_name)
707 type_name = "";
708 obstack_fgrow1 (&string_obstack,
709 "]b4_lhs_value([%s])[", type_name);
710 }
d8d3f94a 711 else
e9955c83 712 {
d8d3f94a
PE
713 long num;
714 errno = 0;
715 num = strtol (cp, 0, 10);
e9955c83 716
d8d3f94a 717 if (INT_MIN <= num && num <= rule_length && ! errno)
e9955c83 718 {
d8d3f94a 719 int n = num;
e9955c83 720 if (!type_name && n > 0)
56c47203
AD
721 type_name = symbol_list_n_type_name_get (current_rule, location,
722 n);
e9955c83 723 if (!type_name && typed)
56c47203 724 complain_at (location, _("$%d of `%s' has no declared type"),
97650f4e 725 n, current_rule->sym->tag);
e9955c83
AD
726 if (!type_name)
727 type_name = "";
728 obstack_fgrow3 (&string_obstack,
729 "]b4_rhs_value([%d], [%d], [%s])[",
730 rule_length, n, type_name);
731 }
d8d3f94a 732 else
98f2caaa 733 complain_at (location, _("integer out of range: %s"), quote (text));
9280d3ef
AD
734 }
735}
736
737
366eea36 738/*---------------------------------------------------------------.
d8d3f94a 739| TEXT is expected to be $$ in some code associated to a symbol: |
366eea36
AD
740| destructor or printer. |
741`---------------------------------------------------------------*/
9280d3ef 742
f25bfb75 743static inline void
366eea36 744handle_symbol_code_dollar (char *text, location_t location)
9280d3ef 745{
366eea36 746 char *cp = text + 1;
9280d3ef 747 if (*cp == '$')
366eea36 748 obstack_sgrow (&string_obstack, "]b4_dollar_dollar[");
9280d3ef 749 else
c4d720cd 750 complain_at (location, _("invalid value: %s"), quote (text));
e9955c83
AD
751}
752
f25bfb75
AD
753
754/*-----------------------------------------------------------------.
755| Dispatch onto handle_action_dollar, or handle_destructor_dollar, |
756| depending upon CODE_KIND. |
757`-----------------------------------------------------------------*/
e9955c83
AD
758
759static void
f25bfb75
AD
760handle_dollar (braced_code_t braced_code_kind,
761 char *text, location_t location)
762{
763 switch (braced_code_kind)
764 {
765 case action_braced_code:
766 handle_action_dollar (text, location);
767 break;
768
769 case destructor_braced_code:
366eea36
AD
770 case printer_braced_code:
771 handle_symbol_code_dollar (text, location);
f25bfb75
AD
772 break;
773 }
774}
775
776
777/*------------------------------------------------------.
778| TEXT is a location token (i.e., a `@...'). Output to |
779| STRING_OBSTACK a reference to this location. |
780`------------------------------------------------------*/
781
782static inline void
783handle_action_at (char *text, location_t location)
e9955c83 784{
366eea36 785 char *cp = text + 1;
e9955c83 786 locations_flag = 1;
e9955c83 787
366eea36 788 if (*cp == '$')
e9955c83
AD
789 {
790 obstack_sgrow (&string_obstack, "]b4_lhs_location[");
791 }
d8d3f94a 792 else
e9955c83 793 {
d8d3f94a
PE
794 long num;
795 errno = 0;
796 num = strtol (cp, 0, 10);
dafdc66f 797
d8d3f94a
PE
798 if (INT_MIN <= num && num <= rule_length && ! errno)
799 {
800 int n = num;
801 obstack_fgrow2 (&string_obstack, "]b4_rhs_location([%d], [%d])[",
802 rule_length, n);
803 }
e9955c83 804 else
98f2caaa 805 complain_at (location, _("integer out of range: %s"), quote (text));
f25bfb75
AD
806 }
807}
808
809
366eea36 810/*---------------------------------------------------------------.
d8d3f94a 811| TEXT is expected to be @$ in some code associated to a symbol: |
366eea36
AD
812| destructor or printer. |
813`---------------------------------------------------------------*/
f25bfb75
AD
814
815static inline void
366eea36 816handle_symbol_code_at (char *text, location_t location)
f25bfb75 817{
366eea36
AD
818 char *cp = text + 1;
819 if (*cp == '$')
820 obstack_sgrow (&string_obstack, "]b4_at_dollar[");
f25bfb75 821 else
c4d720cd 822 complain_at (location, _("invalid value: %s"), quote (text));
e9955c83 823}
4cdb01db 824
f25bfb75
AD
825
826/*-------------------------------------------------------------------.
827| Dispatch onto handle_action_at, or handle_destructor_at, depending |
828| upon CODE_KIND. |
829`-------------------------------------------------------------------*/
830
831static void
832handle_at (braced_code_t braced_code_kind,
833 char *text, location_t location)
834{
835 switch (braced_code_kind)
836 {
837 case action_braced_code:
838 handle_action_at (text, location);
839 break;
840
841 case destructor_braced_code:
366eea36
AD
842 case printer_braced_code:
843 handle_symbol_code_at (text, location);
f25bfb75
AD
844 break;
845 }
846}
847
848
d8d3f94a
PE
849/*------------------------------------------------------------------.
850| Convert universal character name UCN to a single-byte character, |
851| and return that character. Return -1 if UCN does not correspond |
852| to a single-byte character. |
853`------------------------------------------------------------------*/
854
855static int
856convert_ucn_to_byte (char const *ucn)
857{
858 unsigned long code = strtoul (ucn + 2, 0, 16);
859
860 /* FIXME: Currently we assume Unicode-compatible unibyte characters
861 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
862 non-ASCII hosts we support only the portable C character set.
863 These limitations should be removed once we add support for
864 multibyte characters. */
865
866 if (UCHAR_MAX < code)
867 return -1;
868
869#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
870 {
871 /* A non-ASCII host. Use CODE to index into a table of the C
872 basic execution character set, which is guaranteed to exist on
873 all Standard C platforms. This table also includes '$', '@',
874 and '`', which not in the basic execution character set but
875 which are unibyte characters on all the platforms that we know
876 about. */
877 static signed char const table[] =
878 {
879 '\0', -1, -1, -1, -1, -1, -1, '\a',
880 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
881 -1, -1, -1, -1, -1, -1, -1, -1,
882 -1, -1, -1, -1, -1, -1, -1, -1,
883 ' ', '!', '"', '#', '$', '%', '&', '\'',
884 '(', ')', '*', '+', ',', '-', '.', '/',
885 '0', '1', '2', '3', '4', '5', '6', '7',
886 '8', '9', ':', ';', '<', '=', '>', '?',
887 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
888 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
889 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
890 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
891 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
892 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
893 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
894 'x', 'y', 'z', '{', '|', '}', '~'
895 };
896
897 code = code < sizeof table ? table[code] : -1;
898 }
899#endif
c4d720cd 900
d8d3f94a
PE
901 return code;
902}
903
904
900c5db5
AD
905/*----------------------------------------------------------------.
906| Handle `#line INT "FILE"'. ARGS has already skipped `#line '. |
907`----------------------------------------------------------------*/
908
909static void
910handle_syncline (char *args, location_t *location)
911{
912 int lineno = strtol (args, &args, 10);
913 const char *file = NULL;
914 file = strchr (args, '"') + 1;
915 *strchr (file, '"') = 0;
916 /* FIXME: Leaking... Can't free, as some locations are still
917 pointing to the old file name. */
918 infile = xstrdup (file);
919 location->file = infile;
920 location->last_line = lineno;
921}
922
f25bfb75
AD
923/*-------------------------.
924| Initialize the scanner. |
925`-------------------------*/
926
1d6412ad
AD
927void
928scanner_initialize (void)
929{
930 obstack_init (&string_obstack);
931}
932
933
f25bfb75
AD
934/*-----------------------------------------------.
935| Free all the memory allocated to the scanner. |
936`-----------------------------------------------*/
937
4cdb01db
AD
938void
939scanner_free (void)
940{
941 obstack_free (&string_obstack, 0);
536545f3
AD
942 /* Reclaim Flex's buffers. */
943 yy_delete_buffer (YY_CURRENT_BUFFER);
4cdb01db 944}