]> git.saurik.com Git - bison.git/blame - src/scan-gram.l
Regenerate.
[bison.git] / src / scan-gram.l
CommitLineData
e9955c83 1/* Bison Grammar Scanner -*- C -*-
3b1e470c 2
05ac60f3 3 Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
e9955c83
AD
4
5 This file is part of Bison, the GNU Compiler Compiler.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
0fb669f9
PE
19 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
20 02110-1301 USA
e9955c83
AD
21*/
22
aa418041 23%option debug nodefault nounput noyywrap never-interactive
e9955c83
AD
24%option prefix="gram_" outfile="lex.yy.c"
25
26%{
4f6e011e
PE
27/* Work around a bug in flex 2.5.31. See Debian bug 333231
28 <http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=333231>. */
29#undef gram_wrap
30#define gram_wrap() 1
31
e9955c83 32#include "system.h"
223ff46e
PE
33
34#include <mbswidth.h>
35#include <get-errno.h>
36#include <quote.h>
37
e9955c83 38#include "complain.h"
3f2d73f1 39#include "files.h"
e9955c83
AD
40#include "getargs.h"
41#include "gram.h"
ca407bdf 42#include "quotearg.h"
e9955c83 43#include "reader.h"
223ff46e 44#include "uniqstr.h"
e9955c83 45
3f2d73f1
PE
46#define YY_USER_INIT \
47 do \
48 { \
49 scanner_cursor.file = current_file; \
50 scanner_cursor.line = 1; \
51 scanner_cursor.column = 1; \
379f0ac8 52 code_start = scanner_cursor; \
3f2d73f1
PE
53 } \
54 while (0)
8efe435c 55
dc9701e8
PE
56/* Pacify "gcc -Wmissing-prototypes" when flex 2.5.31 is used. */
57int gram_get_lineno (void);
58FILE *gram_get_in (void);
59FILE *gram_get_out (void);
60int gram_get_leng (void);
61char *gram_get_text (void);
62void gram_set_lineno (int);
63void gram_set_in (FILE *);
64void gram_set_out (FILE *);
65int gram_get_debug (void);
66void gram_set_debug (int);
67int gram_lex_destroy (void);
68
3f2d73f1
PE
69/* Location of scanner cursor. */
70boundary scanner_cursor;
41141c56 71
223ff46e 72static void adjust_location (location *, char const *, size_t);
3f2d73f1 73#define YY_USER_ACTION adjust_location (loc, yytext, yyleng);
d8d3f94a 74
6c30d641 75static size_t no_cr_read (FILE *, char *, size_t);
d8d3f94a
PE
76#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
77
78
223ff46e 79/* OBSTACK_FOR_STRING -- Used to store all the characters that we need to
44995b2e
AD
80 keep (to construct ID, STRINGS etc.). Use the following macros to
81 use it.
82
41141c56
PE
83 Use STRING_GROW to append what has just been matched, and
84 STRING_FINISH to end the string (it puts the ending 0).
85 STRING_FINISH also stores this string in LAST_STRING, which can be
86 used, and which is used by STRING_FREE to free the last string. */
44995b2e 87
223ff46e 88static struct obstack obstack_for_string;
44995b2e 89
7ec2d4cd
AD
90/* A string representing the most recently saved token. */
91static char *last_string;
92
93
41141c56 94#define STRING_GROW \
223ff46e 95 obstack_grow (&obstack_for_string, yytext, yyleng)
44995b2e 96
41141c56 97#define STRING_FINISH \
44995b2e 98 do { \
223ff46e
PE
99 obstack_1grow (&obstack_for_string, '\0'); \
100 last_string = obstack_finish (&obstack_for_string); \
44995b2e
AD
101 } while (0)
102
41141c56 103#define STRING_FREE \
223ff46e 104 obstack_free (&obstack_for_string, last_string)
e9955c83 105
7ec2d4cd
AD
106void
107scanner_last_string_free (void)
108{
41141c56 109 STRING_FREE;
7ec2d4cd 110}
e9955c83 111
efcb44dd
PE
112/* Within well-formed rules, RULE_LENGTH is the number of values in
113 the current rule so far, which says where to find `$0' with respect
114 to the top of the stack. It is not the same as the rule->length in
115 the case of mid rule actions.
116
117 Outside of well-formed rules, RULE_LENGTH has an undefined value. */
118static int rule_length;
119
624a35e2
PE
120static void handle_dollar (int token_type, char *cp, location loc);
121static void handle_at (int token_type, char *cp, location loc);
3f2d73f1 122static void handle_syncline (char *args);
1452af69 123static unsigned long int scan_integer (char const *p, int base, location loc);
d8d3f94a 124static int convert_ucn_to_byte (char const *hex_text);
aa418041 125static void unexpected_eof (boundary, char const *);
4febdd96 126static void unexpected_newline (boundary, char const *);
e9955c83
AD
127
128%}
d8d3f94a 129%x SC_COMMENT SC_LINE_COMMENT SC_YACC_COMMENT
e9955c83 130%x SC_STRING SC_CHARACTER
3f2d73f1 131%x SC_AFTER_IDENTIFIER
e9955c83 132%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
624a35e2 133%x SC_PRE_CODE SC_BRACED_CODE SC_PROLOGUE SC_EPILOGUE
e9955c83 134
29c01725
AD
135letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
136id {letter}({letter}|[0-9])*
137directive %{letter}({letter}|[0-9]|-)*
624a35e2 138int [0-9]+
d8d3f94a
PE
139
140/* POSIX says that a tag must be both an id and a C union member, but
141 historically almost any character is allowed in a tag. We disallow
142 NUL and newline, as this simplifies our implementation. */
143tag [^\0\n>]+
144
145/* Zero or more instances of backslash-newline. Following GCC, allow
146 white space between the backslash and the newline. */
147splice (\\[ \f\t\v]*\n)*
e9955c83
AD
148
149%%
150%{
a706a1cc 151 /* Nesting level of the current code in braces. */
1a9e39f1
PE
152 int braces_level IF_LINT (= 0);
153
3f2d73f1
PE
154 /* Parent context state, when applicable. */
155 int context_state IF_LINT (= 0);
a706a1cc 156
624a35e2
PE
157 /* Token type to return, when applicable. */
158 int token_type IF_LINT (= 0);
159
3f2d73f1 160 /* Location of most recent identifier, when applicable. */
a2bc9dbc 161 location id_loc IF_LINT (= empty_location);
3f2d73f1 162
a2bc9dbc
PE
163 /* Where containing code started, when applicable. Its initial
164 value is relevant only when yylex is invoked in the SC_EPILOGUE
165 start condition. */
166 boundary code_start = scanner_cursor;
3f2d73f1 167
223ff46e
PE
168 /* Where containing comment or string or character literal started,
169 when applicable. */
a2bc9dbc 170 boundary token_start IF_LINT (= scanner_cursor);
e9955c83
AD
171%}
172
173
3f2d73f1
PE
174 /*-----------------------.
175 | Scanning white space. |
176 `-----------------------*/
177
624a35e2 178<INITIAL,SC_AFTER_IDENTIFIER,SC_PRE_CODE>
3f2d73f1 179{
4febdd96 180 /* Comments and white space. */
83adb046 181 "," warn_at (*loc, _("stray `,' treated as white space"));
4febdd96 182 [ \f\n\t\v] |
3f2d73f1 183 "//".* ;
83adb046
PE
184 "/*" {
185 token_start = loc->start;
186 context_state = YY_START;
187 BEGIN SC_YACC_COMMENT;
188 }
3f2d73f1
PE
189
190 /* #line directives are not documented, and may be withdrawn or
191 modified in future versions of Bison. */
192 ^"#line "{int}" \"".*"\"\n" {
193 handle_syncline (yytext + sizeof "#line " - 1);
194 }
195}
196
197
e9955c83
AD
198 /*----------------------------.
199 | Scanning Bison directives. |
200 `----------------------------*/
201<INITIAL>
202{
203 "%binary" return PERCENT_NONASSOC;
204 "%debug" return PERCENT_DEBUG;
39a06c25 205 "%default"[-_]"prec" return PERCENT_DEFAULT_PREC;
e9955c83
AD
206 "%define" return PERCENT_DEFINE;
207 "%defines" return PERCENT_DEFINES;
624a35e2 208 "%destructor" token_type = PERCENT_DESTRUCTOR; BEGIN SC_PRE_CODE;
676385e2 209 "%dprec" return PERCENT_DPREC;
e9955c83
AD
210 "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
211 "%expect" return PERCENT_EXPECT;
d6328241 212 "%expect"[-_]"rr" return PERCENT_EXPECT_RR;
e9955c83
AD
213 "%file-prefix" return PERCENT_FILE_PREFIX;
214 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
cd3684cf 215 "%initial-action" token_type = PERCENT_INITIAL_ACTION; BEGIN SC_PRE_CODE;
ae7453f2 216 "%glr-parser" return PERCENT_GLR_PARSER;
e9955c83 217 "%left" return PERCENT_LEFT;
624a35e2 218 "%lex-param" token_type = PERCENT_LEX_PARAM; BEGIN SC_PRE_CODE;
e9955c83 219 "%locations" return PERCENT_LOCATIONS;
676385e2 220 "%merge" return PERCENT_MERGE;
e9955c83 221 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
22fccf95 222 "%no"[-_]"default"[-_]"prec" return PERCENT_NO_DEFAULT_PREC;
e9955c83
AD
223 "%no"[-_]"lines" return PERCENT_NO_LINES;
224 "%nonassoc" return PERCENT_NONASSOC;
916708d5 225 "%nondeterministic-parser" return PERCENT_NONDETERMINISTIC_PARSER;
e9955c83
AD
226 "%nterm" return PERCENT_NTERM;
227 "%output" return PERCENT_OUTPUT;
624a35e2 228 "%parse-param" token_type = PERCENT_PARSE_PARAM; BEGIN SC_PRE_CODE;
d8d3f94a 229 "%prec" rule_length--; return PERCENT_PREC;
624a35e2 230 "%printer" token_type = PERCENT_PRINTER; BEGIN SC_PRE_CODE;
e9955c83 231 "%pure"[-_]"parser" return PERCENT_PURE_PARSER;
b50d2359 232 "%require" return PERCENT_REQUIRE;
e9955c83
AD
233 "%right" return PERCENT_RIGHT;
234 "%skeleton" return PERCENT_SKELETON;
235 "%start" return PERCENT_START;
236 "%term" return PERCENT_TOKEN;
237 "%token" return PERCENT_TOKEN;
238 "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
239 "%type" return PERCENT_TYPE;
624a35e2 240 "%union" token_type = PERCENT_UNION; BEGIN SC_PRE_CODE;
e9955c83
AD
241 "%verbose" return PERCENT_VERBOSE;
242 "%yacc" return PERCENT_YACC;
243
3f2d73f1 244 {directive} {
41141c56 245 complain_at (*loc, _("invalid directive: %s"), quote (yytext));
412f8a59 246 }
900c5db5 247
e9955c83 248 "=" return EQUAL;
d8d3f94a 249 "|" rule_length = 0; return PIPE;
e9955c83
AD
250 ";" return SEMICOLON;
251
3f2d73f1 252 {id} {
41141c56 253 val->symbol = symbol_get (yytext, *loc);
3f2d73f1 254 id_loc = *loc;
efcb44dd 255 rule_length++;
3f2d73f1 256 BEGIN SC_AFTER_IDENTIFIER;
e9955c83
AD
257 }
258
d8d3f94a 259 {int} {
1452af69
PE
260 val->integer = scan_integer (yytext, 10, *loc);
261 return INT;
262 }
263 0[xX][0-9abcdefABCDEF]+ {
264 val->integer = scan_integer (yytext, 16, *loc);
d8d3f94a
PE
265 return INT;
266 }
e9955c83
AD
267
268 /* Characters. We don't check there is only one. */
3f2d73f1 269 "'" STRING_GROW; token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
e9955c83
AD
270
271 /* Strings. */
ca407bdf 272 "\"" token_start = loc->start; BEGIN SC_ESCAPED_STRING;
e9955c83
AD
273
274 /* Prologue. */
3f2d73f1 275 "%{" code_start = loc->start; BEGIN SC_PROLOGUE;
e9955c83
AD
276
277 /* Code in between braces. */
3f2d73f1
PE
278 "{" {
279 STRING_GROW;
624a35e2 280 token_type = BRACED_CODE;
3f2d73f1
PE
281 braces_level = 0;
282 code_start = loc->start;
283 BEGIN SC_BRACED_CODE;
284 }
e9955c83
AD
285
286 /* A type. */
d8d3f94a 287 "<"{tag}">" {
223ff46e 288 obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
41141c56 289 STRING_FINISH;
223ff46e 290 val->uniqstr = uniqstr_new (last_string);
41141c56 291 STRING_FREE;
4cdb01db
AD
292 return TYPE;
293 }
294
a706a1cc
PE
295 "%%" {
296 static int percent_percent_count;
e9955c83 297 if (++percent_percent_count == 2)
a2bc9dbc 298 BEGIN SC_EPILOGUE;
e9955c83
AD
299 return PERCENT_PERCENT;
300 }
301
a706a1cc 302 . {
41141c56 303 complain_at (*loc, _("invalid character: %s"), quote (yytext));
3f2d73f1 304 }
379f0ac8
PE
305
306 <<EOF>> {
307 loc->start = loc->end = scanner_cursor;
308 yyterminate ();
309 }
3f2d73f1
PE
310}
311
312
313 /*-----------------------------------------------------------------.
314 | Scanning after an identifier, checking whether a colon is next. |
315 `-----------------------------------------------------------------*/
316
317<SC_AFTER_IDENTIFIER>
318{
319 ":" {
320 rule_length = 0;
321 *loc = id_loc;
322 BEGIN INITIAL;
323 return ID_COLON;
324 }
325 . {
326 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
327 yyless (0);
328 *loc = id_loc;
329 BEGIN INITIAL;
330 return ID;
331 }
332 <<EOF>> {
333 *loc = id_loc;
334 BEGIN INITIAL;
335 return ID;
e9955c83
AD
336 }
337}
338
339
d8d3f94a
PE
340 /*---------------------------------------------------------------.
341 | Scanning a Yacc comment. The initial `/ *' is already eaten. |
342 `---------------------------------------------------------------*/
e9955c83 343
d8d3f94a 344<SC_YACC_COMMENT>
e9955c83 345{
3f2d73f1 346 "*/" BEGIN context_state;
a706a1cc 347 .|\n ;
aa418041 348 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
d8d3f94a
PE
349}
350
351
352 /*------------------------------------------------------------.
353 | Scanning a C comment. The initial `/ *' is already eaten. |
354 `------------------------------------------------------------*/
355
356<SC_COMMENT>
357{
3f2d73f1 358 "*"{splice}"/" STRING_GROW; BEGIN context_state;
aa418041 359 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
e9955c83
AD
360}
361
362
d8d3f94a
PE
363 /*--------------------------------------------------------------.
364 | Scanning a line comment. The initial `//' is already eaten. |
365 `--------------------------------------------------------------*/
366
367<SC_LINE_COMMENT>
368{
3f2d73f1 369 "\n" STRING_GROW; BEGIN context_state;
41141c56 370 {splice} STRING_GROW;
3f2d73f1 371 <<EOF>> BEGIN context_state;
d8d3f94a
PE
372}
373
374
4febdd96
PE
375 /*------------------------------------------------.
376 | Scanning a Bison string, including its escapes. |
377 | The initial quote is already eaten. |
378 `------------------------------------------------*/
e9955c83
AD
379
380<SC_ESCAPED_STRING>
381{
db2cc12f 382 "\"" {
41141c56 383 STRING_FINISH;
3f2d73f1 384 loc->start = token_start;
223ff46e 385 val->chars = last_string;
efcb44dd 386 rule_length++;
a706a1cc 387 BEGIN INITIAL;
e9955c83
AD
388 return STRING;
389 }
4febdd96
PE
390 \n unexpected_newline (token_start, "\""); BEGIN INITIAL;
391 <<EOF>> unexpected_eof (token_start, "\""); BEGIN INITIAL;
e9955c83
AD
392}
393
4febdd96
PE
394 /*----------------------------------------------------------.
395 | Scanning a Bison character literal, decoding its escapes. |
396 | The initial quote is already eaten. |
397 `----------------------------------------------------------*/
e9955c83
AD
398
399<SC_ESCAPED_CHARACTER>
400{
db2cc12f 401 "'" {
3b1e470c 402 unsigned char last_string_1;
41141c56
PE
403 STRING_GROW;
404 STRING_FINISH;
3f2d73f1 405 loc->start = token_start;
ca407bdf
PE
406 val->symbol = symbol_get (quotearg_style (escape_quoting_style,
407 last_string),
408 *loc);
41141c56 409 symbol_class_set (val->symbol, token_sym, *loc);
3b1e470c
PE
410 last_string_1 = last_string[1];
411 symbol_user_token_number_set (val->symbol, last_string_1, *loc);
41141c56 412 STRING_FREE;
a706a1cc
PE
413 rule_length++;
414 BEGIN INITIAL;
415 return ID;
e9955c83 416 }
4febdd96
PE
417 \n unexpected_newline (token_start, "'"); BEGIN INITIAL;
418 <<EOF>> unexpected_eof (token_start, "'"); BEGIN INITIAL;
419}
a706a1cc 420
4febdd96
PE
421<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING>
422{
92ac3705 423 \0 complain_at (*loc, _("invalid null character"));
e9955c83
AD
424}
425
426
427 /*----------------------------.
428 | Decode escaped characters. |
429 `----------------------------*/
430
431<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
432{
d8d3f94a 433 \\[0-7]{1,3} {
1452af69 434 unsigned long int c = strtoul (yytext + 1, 0, 8);
d8d3f94a 435 if (UCHAR_MAX < c)
3f2d73f1 436 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
05ac60f3 437 else if (! c)
92ac3705 438 complain_at (*loc, _("invalid null character: %s"), quote (yytext));
e9955c83 439 else
223ff46e 440 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
441 }
442
6b0d38ab 443 \\x[0-9abcdefABCDEF]+ {
1452af69 444 unsigned long int c;
223ff46e 445 set_errno (0);
d8d3f94a 446 c = strtoul (yytext + 2, 0, 16);
223ff46e 447 if (UCHAR_MAX < c || get_errno ())
3f2d73f1 448 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
92ac3705
PE
449 else if (! c)
450 complain_at (*loc, _("invalid null character: %s"), quote (yytext));
d8d3f94a 451 else
223ff46e 452 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
453 }
454
223ff46e
PE
455 \\a obstack_1grow (&obstack_for_string, '\a');
456 \\b obstack_1grow (&obstack_for_string, '\b');
457 \\f obstack_1grow (&obstack_for_string, '\f');
458 \\n obstack_1grow (&obstack_for_string, '\n');
459 \\r obstack_1grow (&obstack_for_string, '\r');
460 \\t obstack_1grow (&obstack_for_string, '\t');
461 \\v obstack_1grow (&obstack_for_string, '\v');
412f8a59
PE
462
463 /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
223ff46e 464 \\("\""|"'"|"?"|"\\") obstack_1grow (&obstack_for_string, yytext[1]);
412f8a59 465
6b0d38ab 466 \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
d8d3f94a
PE
467 int c = convert_ucn_to_byte (yytext);
468 if (c < 0)
3f2d73f1 469 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
92ac3705
PE
470 else if (! c)
471 complain_at (*loc, _("invalid null character: %s"), quote (yytext));
d8d3f94a 472 else
223ff46e 473 obstack_1grow (&obstack_for_string, c);
d8d3f94a 474 }
4f25ebb0 475 \\(.|\n) {
3f2d73f1 476 complain_at (*loc, _("unrecognized escape sequence: %s"), quote (yytext));
41141c56 477 STRING_GROW;
e9955c83
AD
478 }
479}
480
4febdd96
PE
481 /*--------------------------------------------.
482 | Scanning user-code characters and strings. |
483 `--------------------------------------------*/
e9955c83 484
4febdd96
PE
485<SC_CHARACTER,SC_STRING>
486{
487 {splice}|\\{splice}[^\n$@\[\]] STRING_GROW;
488}
e9955c83
AD
489
490<SC_CHARACTER>
491{
4febdd96
PE
492 "'" STRING_GROW; BEGIN context_state;
493 \n unexpected_newline (token_start, "'"); BEGIN context_state;
494 <<EOF>> unexpected_eof (token_start, "'"); BEGIN context_state;
e9955c83
AD
495}
496
e9955c83
AD
497<SC_STRING>
498{
4febdd96
PE
499 "\"" STRING_GROW; BEGIN context_state;
500 \n unexpected_newline (token_start, "\""); BEGIN context_state;
501 <<EOF>> unexpected_eof (token_start, "\""); BEGIN context_state;
e9955c83
AD
502}
503
504
505 /*---------------------------------------------------.
506 | Strings, comments etc. can be found in user code. |
507 `---------------------------------------------------*/
508
509<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
510{
3f2d73f1
PE
511 "'" {
512 STRING_GROW;
513 context_state = YY_START;
514 token_start = loc->start;
515 BEGIN SC_CHARACTER;
516 }
517 "\"" {
518 STRING_GROW;
519 context_state = YY_START;
520 token_start = loc->start;
521 BEGIN SC_STRING;
522 }
523 "/"{splice}"*" {
524 STRING_GROW;
525 context_state = YY_START;
526 token_start = loc->start;
527 BEGIN SC_COMMENT;
528 }
529 "/"{splice}"/" {
530 STRING_GROW;
531 context_state = YY_START;
532 BEGIN SC_LINE_COMMENT;
533 }
e9955c83
AD
534}
535
536
624a35e2
PE
537 /*---------------------------------------------------------------.
538 | Scanning after %union etc., possibly followed by white space. |
539 | For %union only, allow arbitrary C code to appear before the |
540 | following brace, as an extension to POSIX. |
541 `---------------------------------------------------------------*/
542
543<SC_PRE_CODE>
544{
545 . {
546 bool valid = yytext[0] == '{' || token_type == PERCENT_UNION;
547 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
548 yyless (0);
549
550 if (valid)
551 {
552 braces_level = -1;
553 code_start = loc->start;
554 BEGIN SC_BRACED_CODE;
555 }
556 else
557 {
6d07bacf 558 complain_at (*loc, _("missing `{' in %s"),
624a35e2
PE
559 token_name (token_type));
560 obstack_sgrow (&obstack_for_string, "{}");
561 STRING_FINISH;
562 val->chars = last_string;
563 BEGIN INITIAL;
564 return token_type;
565 }
566 }
379f0ac8 567
aa418041 568 <<EOF>> unexpected_eof (scanner_cursor, "{}"); BEGIN INITIAL;
624a35e2
PE
569}
570
571
e9955c83
AD
572 /*---------------------------------------------------------------.
573 | Scanning some code in braces (%union and actions). The initial |
574 | "{" is already eaten. |
575 `---------------------------------------------------------------*/
576
577<SC_BRACED_CODE>
578{
41141c56
PE
579 "{"|"<"{splice}"%" STRING_GROW; braces_level++;
580 "%"{splice}">" STRING_GROW; braces_level--;
e9955c83 581 "}" {
25522739
PE
582 bool outer_brace = --braces_level < 0;
583
584 /* As an undocumented Bison extension, append `;' before the last
585 brace in braced code, so that the user code can omit trailing
586 `;'. But do not append `;' if emulating Yacc, since Yacc does
587 not append one.
588
589 FIXME: Bison should warn if a semicolon seems to be necessary
590 here, and should omit the semicolon if it seems unnecessary
591 (e.g., after ';', '{', or '}', each followed by comments or
592 white space). Such a warning shouldn't depend on --yacc; it
593 should depend on a new --pedantic option, which would cause
594 Bison to warn if it detects an extension to POSIX. --pedantic
595 should also diagnose other Bison extensions like %yacc.
596 Perhaps there should also be a GCC-style --pedantic-errors
597 option, so that such warnings are diagnosed as errors. */
1deb9bdc 598 if (outer_brace && token_type == BRACED_CODE && ! yacc_flag)
25522739
PE
599 obstack_1grow (&obstack_for_string, ';');
600
601 obstack_1grow (&obstack_for_string, '}');
602
603 if (outer_brace)
e9955c83 604 {
41141c56 605 STRING_FINISH;
624a35e2 606 rule_length++;
3f2d73f1 607 loc->start = code_start;
223ff46e 608 val->chars = last_string;
a706a1cc 609 BEGIN INITIAL;
624a35e2 610 return token_type;
e9955c83
AD
611 }
612 }
613
a706a1cc
PE
614 /* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly
615 (as `<' `<%'). */
41141c56 616 "<"{splice}"<" STRING_GROW;
a706a1cc 617
624a35e2
PE
618 "$"("<"{tag}">")?(-?[0-9]+|"$") handle_dollar (token_type, yytext, *loc);
619 "@"(-?[0-9]+|"$") handle_at (token_type, yytext, *loc);
e9955c83 620
302c0aee
PE
621 "$" {
622 warn_at (*loc, _("stray `$'"));
623 obstack_sgrow (&obstack_for_string, "$][");
624 }
625 "@" {
626 warn_at (*loc, _("stray `@'"));
627 obstack_sgrow (&obstack_for_string, "@@");
628 }
629
aa418041 630 <<EOF>> unexpected_eof (code_start, "}"); BEGIN INITIAL;
e9955c83
AD
631}
632
633
634 /*--------------------------------------------------------------.
635 | Scanning some prologue: from "%{" (already scanned) to "%}". |
636 `--------------------------------------------------------------*/
637
638<SC_PROLOGUE>
639{
640 "%}" {
41141c56 641 STRING_FINISH;
3f2d73f1 642 loc->start = code_start;
223ff46e 643 val->chars = last_string;
a706a1cc 644 BEGIN INITIAL;
e9955c83
AD
645 return PROLOGUE;
646 }
647
aa418041 648 <<EOF>> unexpected_eof (code_start, "%}"); BEGIN INITIAL;
e9955c83
AD
649}
650
651
652 /*---------------------------------------------------------------.
653 | Scanning the epilogue (everything after the second "%%", which |
d8d3f94a 654 | has already been eaten). |
e9955c83
AD
655 `---------------------------------------------------------------*/
656
657<SC_EPILOGUE>
658{
e9955c83 659 <<EOF>> {
41141c56 660 STRING_FINISH;
3f2d73f1 661 loc->start = code_start;
223ff46e 662 val->chars = last_string;
a706a1cc 663 BEGIN INITIAL;
e9955c83
AD
664 return EPILOGUE;
665 }
666}
667
668
4febdd96
PE
669 /*-----------------------------------------.
670 | Escape M4 quoting characters in C code. |
671 `-----------------------------------------*/
a706a1cc
PE
672
673<SC_COMMENT,SC_LINE_COMMENT,SC_STRING,SC_CHARACTER,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
674{
223ff46e
PE
675 \$ obstack_sgrow (&obstack_for_string, "$][");
676 \@ obstack_sgrow (&obstack_for_string, "@@");
677 \[ obstack_sgrow (&obstack_for_string, "@{");
678 \] obstack_sgrow (&obstack_for_string, "@}");
a706a1cc
PE
679}
680
681
4febdd96
PE
682 /*-----------------------------------------------------.
683 | By default, grow the string obstack with the input. |
684 `-----------------------------------------------------*/
685
686<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>. |
687<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>\n STRING_GROW;
688
e9955c83
AD
689%%
690
cd3684cf
AD
691/* Keeps track of the maximum number of semantic values to the left of
692 a handle (those referenced by $0, $-1, etc.) are required by the
25005f6a
PH
693 semantic actions of this grammar. */
694int max_left_semantic_context = 0;
695
3f2d73f1
PE
696/* Set *LOC and adjust scanner cursor to account for token TOKEN of
697 size SIZE. */
6c30d641
PE
698
699static void
223ff46e 700adjust_location (location *loc, char const *token, size_t size)
6c30d641 701{
3f2d73f1
PE
702 int line = scanner_cursor.line;
703 int column = scanner_cursor.column;
6c30d641
PE
704 char const *p0 = token;
705 char const *p = token;
706 char const *lim = token + size;
707
3f2d73f1
PE
708 loc->start = scanner_cursor;
709
6c30d641
PE
710 for (p = token; p < lim; p++)
711 switch (*p)
712 {
6c30d641
PE
713 case '\n':
714 line++;
715 column = 1;
716 p0 = p + 1;
717 break;
718
719 case '\t':
720 column += mbsnwidth (p0, p - p0, 0);
721 column += 8 - ((column - 1) & 7);
722 p0 = p + 1;
723 break;
724 }
725
3f2d73f1
PE
726 scanner_cursor.line = line;
727 scanner_cursor.column = column + mbsnwidth (p0, p - p0, 0);
728
729 loc->end = scanner_cursor;
6c30d641
PE
730}
731
732
733/* Read bytes from FP into buffer BUF of size SIZE. Return the
734 number of bytes read. Remove '\r' from input, treating \r\n
735 and isolated \r as \n. */
736
737static size_t
738no_cr_read (FILE *fp, char *buf, size_t size)
739{
a737b216
PE
740 size_t bytes_read = fread (buf, 1, size, fp);
741 if (bytes_read)
6c30d641 742 {
a737b216 743 char *w = memchr (buf, '\r', bytes_read);
6c30d641
PE
744 if (w)
745 {
746 char const *r = ++w;
a737b216 747 char const *lim = buf + bytes_read;
6c30d641
PE
748
749 for (;;)
750 {
751 /* Found an '\r'. Treat it like '\n', but ignore any
752 '\n' that immediately follows. */
753 w[-1] = '\n';
754 if (r == lim)
755 {
756 int ch = getc (fp);
757 if (ch != '\n' && ungetc (ch, fp) != ch)
758 break;
759 }
760 else if (*r == '\n')
761 r++;
762
763 /* Copy until the next '\r'. */
764 do
765 {
766 if (r == lim)
767 return w - buf;
768 }
769 while ((*w++ = *r++) != '\r');
770 }
771
772 return w - buf;
773 }
774 }
775
a737b216 776 return bytes_read;
6c30d641
PE
777}
778
779
e9955c83 780/*------------------------------------------------------------------.
366eea36 781| TEXT is pointing to a wannabee semantic value (i.e., a `$'). |
e9955c83
AD
782| |
783| Possible inputs: $[<TYPENAME>]($|integer) |
784| |
223ff46e 785| Output to OBSTACK_FOR_STRING a reference to this semantic value. |
e9955c83
AD
786`------------------------------------------------------------------*/
787
624a35e2 788static inline bool
223ff46e 789handle_action_dollar (char *text, location loc)
e9955c83
AD
790{
791 const char *type_name = NULL;
366eea36 792 char *cp = text + 1;
e9955c83 793
624a35e2
PE
794 if (! current_rule)
795 return false;
796
e9955c83
AD
797 /* Get the type name if explicit. */
798 if (*cp == '<')
799 {
800 type_name = ++cp;
801 while (*cp != '>')
802 ++cp;
803 *cp = '\0';
804 ++cp;
805 }
806
807 if (*cp == '$')
808 {
809 if (!type_name)
223ff46e 810 type_name = symbol_list_n_type_name_get (current_rule, loc, 0);
e9955c83 811 if (!type_name && typed)
223ff46e 812 complain_at (loc, _("$$ of `%s' has no declared type"),
97650f4e 813 current_rule->sym->tag);
e9955c83
AD
814 if (!type_name)
815 type_name = "";
223ff46e 816 obstack_fgrow1 (&obstack_for_string,
e9955c83
AD
817 "]b4_lhs_value([%s])[", type_name);
818 }
d8d3f94a 819 else
e9955c83 820 {
1452af69 821 long int num;
223ff46e 822 set_errno (0);
d8d3f94a 823 num = strtol (cp, 0, 10);
e9955c83 824
223ff46e 825 if (INT_MIN <= num && num <= rule_length && ! get_errno ())
e9955c83 826 {
d8d3f94a 827 int n = num;
affac613
AD
828 if (max_left_semantic_context < 1 - n)
829 max_left_semantic_context = 1 - n;
830 if (!type_name && 0 < n)
223ff46e 831 type_name = symbol_list_n_type_name_get (current_rule, loc, n);
e9955c83 832 if (!type_name && typed)
223ff46e
PE
833 complain_at (loc, _("$%d of `%s' has no declared type"),
834 n, current_rule->sym->tag);
e9955c83
AD
835 if (!type_name)
836 type_name = "";
223ff46e 837 obstack_fgrow3 (&obstack_for_string,
05ac60f3 838 "]b4_rhs_value(%d, %d, [%s])[",
e9955c83 839 rule_length, n, type_name);
affac613
AD
840 if (typed)
841 symbol_list_n_used_set (current_rule, n, true);
e9955c83 842 }
d8d3f94a 843 else
223ff46e 844 complain_at (loc, _("integer out of range: %s"), quote (text));
9280d3ef 845 }
9280d3ef 846
624a35e2 847 return true;
e9955c83
AD
848}
849
f25bfb75 850
cd3684cf
AD
851/*----------------------------------------------------------------.
852| Map `$?' onto the proper M4 symbol, depending on its TOKEN_TYPE |
853| (are we in an action?). |
854`----------------------------------------------------------------*/
e9955c83
AD
855
856static void
624a35e2 857handle_dollar (int token_type, char *text, location loc)
f25bfb75 858{
624a35e2 859 switch (token_type)
f25bfb75 860 {
624a35e2
PE
861 case BRACED_CODE:
862 if (handle_action_dollar (text, loc))
863 return;
f25bfb75
AD
864 break;
865
624a35e2 866 case PERCENT_DESTRUCTOR:
cd3684cf 867 case PERCENT_INITIAL_ACTION:
624a35e2
PE
868 case PERCENT_PRINTER:
869 if (text[1] == '$')
870 {
871 obstack_sgrow (&obstack_for_string, "]b4_dollar_dollar[");
872 return;
873 }
874 break;
875
876 default:
f25bfb75
AD
877 break;
878 }
624a35e2
PE
879
880 complain_at (loc, _("invalid value: %s"), quote (text));
f25bfb75
AD
881}
882
883
884/*------------------------------------------------------.
885| TEXT is a location token (i.e., a `@...'). Output to |
223ff46e 886| OBSTACK_FOR_STRING a reference to this location. |
f25bfb75
AD
887`------------------------------------------------------*/
888
624a35e2 889static inline bool
223ff46e 890handle_action_at (char *text, location loc)
e9955c83 891{
366eea36 892 char *cp = text + 1;
d0829076 893 locations_flag = true;
e9955c83 894
624a35e2
PE
895 if (! current_rule)
896 return false;
897
366eea36 898 if (*cp == '$')
624a35e2 899 obstack_sgrow (&obstack_for_string, "]b4_lhs_location[");
d8d3f94a 900 else
e9955c83 901 {
1452af69 902 long int num;
223ff46e 903 set_errno (0);
d8d3f94a 904 num = strtol (cp, 0, 10);
dafdc66f 905
223ff46e 906 if (INT_MIN <= num && num <= rule_length && ! get_errno ())
d8d3f94a
PE
907 {
908 int n = num;
05ac60f3 909 obstack_fgrow2 (&obstack_for_string, "]b4_rhs_location(%d, %d)[",
d8d3f94a
PE
910 rule_length, n);
911 }
e9955c83 912 else
223ff46e 913 complain_at (loc, _("integer out of range: %s"), quote (text));
f25bfb75 914 }
f25bfb75 915
624a35e2 916 return true;
e9955c83 917}
4cdb01db 918
f25bfb75 919
cd3684cf
AD
920/*----------------------------------------------------------------.
921| Map `@?' onto the proper M4 symbol, depending on its TOKEN_TYPE |
922| (are we in an action?). |
923`----------------------------------------------------------------*/
f25bfb75
AD
924
925static void
624a35e2 926handle_at (int token_type, char *text, location loc)
f25bfb75 927{
624a35e2 928 switch (token_type)
f25bfb75 929 {
624a35e2 930 case BRACED_CODE:
223ff46e 931 handle_action_at (text, loc);
624a35e2
PE
932 return;
933
cd3684cf 934 case PERCENT_INITIAL_ACTION:
624a35e2
PE
935 case PERCENT_DESTRUCTOR:
936 case PERCENT_PRINTER:
937 if (text[1] == '$')
938 {
939 obstack_sgrow (&obstack_for_string, "]b4_at_dollar[");
940 return;
941 }
f25bfb75
AD
942 break;
943
624a35e2 944 default:
f25bfb75
AD
945 break;
946 }
624a35e2
PE
947
948 complain_at (loc, _("invalid value: %s"), quote (text));
f25bfb75
AD
949}
950
951
1452af69
PE
952/*------------------------------------------------------.
953| Scan NUMBER for a base-BASE integer at location LOC. |
954`------------------------------------------------------*/
955
956static unsigned long int
957scan_integer (char const *number, int base, location loc)
958{
959 unsigned long int num;
960 set_errno (0);
961 num = strtoul (number, 0, base);
962 if (INT_MAX < num || get_errno ())
963 {
964 complain_at (loc, _("integer out of range: %s"), quote (number));
965 num = INT_MAX;
966 }
967 return num;
968}
969
970
d8d3f94a
PE
971/*------------------------------------------------------------------.
972| Convert universal character name UCN to a single-byte character, |
973| and return that character. Return -1 if UCN does not correspond |
974| to a single-byte character. |
975`------------------------------------------------------------------*/
976
977static int
978convert_ucn_to_byte (char const *ucn)
979{
1452af69 980 unsigned long int code = strtoul (ucn + 2, 0, 16);
d8d3f94a
PE
981
982 /* FIXME: Currently we assume Unicode-compatible unibyte characters
983 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
984 non-ASCII hosts we support only the portable C character set.
985 These limitations should be removed once we add support for
986 multibyte characters. */
987
988 if (UCHAR_MAX < code)
989 return -1;
990
991#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
992 {
993 /* A non-ASCII host. Use CODE to index into a table of the C
994 basic execution character set, which is guaranteed to exist on
995 all Standard C platforms. This table also includes '$', '@',
8e6ef483 996 and '`', which are not in the basic execution character set but
d8d3f94a
PE
997 which are unibyte characters on all the platforms that we know
998 about. */
999 static signed char const table[] =
1000 {
1001 '\0', -1, -1, -1, -1, -1, -1, '\a',
1002 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
1003 -1, -1, -1, -1, -1, -1, -1, -1,
1004 -1, -1, -1, -1, -1, -1, -1, -1,
1005 ' ', '!', '"', '#', '$', '%', '&', '\'',
1006 '(', ')', '*', '+', ',', '-', '.', '/',
1007 '0', '1', '2', '3', '4', '5', '6', '7',
1008 '8', '9', ':', ';', '<', '=', '>', '?',
1009 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
1010 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
1011 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
1012 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
1013 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
1014 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
1015 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
1016 'x', 'y', 'z', '{', '|', '}', '~'
1017 };
1018
1019 code = code < sizeof table ? table[code] : -1;
1020 }
1021#endif
c4d720cd 1022
d8d3f94a
PE
1023 return code;
1024}
1025
1026
900c5db5
AD
1027/*----------------------------------------------------------------.
1028| Handle `#line INT "FILE"'. ARGS has already skipped `#line '. |
1029`----------------------------------------------------------------*/
1030
1031static void
3f2d73f1 1032handle_syncline (char *args)
900c5db5
AD
1033{
1034 int lineno = strtol (args, &args, 10);
1035 const char *file = NULL;
1036 file = strchr (args, '"') + 1;
1037 *strchr (file, '"') = 0;
dca81a78 1038 scanner_cursor.file = current_file = uniqstr_new (file);
3f2d73f1
PE
1039 scanner_cursor.line = lineno;
1040 scanner_cursor.column = 1;
900c5db5
AD
1041}
1042
a706a1cc 1043
4febdd96
PE
1044/*----------------------------------------------------------------.
1045| For a token or comment starting at START, report message MSGID, |
1046| which should say that an end marker was found before |
1047| the expected TOKEN_END. |
1048`----------------------------------------------------------------*/
1049
1050static void
1051unexpected_end (boundary start, char const *msgid, char const *token_end)
1052{
1053 location loc;
1054 loc.start = start;
1055 loc.end = scanner_cursor;
1056 complain_at (loc, _(msgid), token_end);
1057}
1058
1059
3f2d73f1
PE
1060/*------------------------------------------------------------------------.
1061| Report an unexpected EOF in a token or comment starting at START. |
1062| An end of file was encountered and the expected TOKEN_END was missing. |
3f2d73f1 1063`------------------------------------------------------------------------*/
a706a1cc
PE
1064
1065static void
aa418041 1066unexpected_eof (boundary start, char const *token_end)
a706a1cc 1067{
4febdd96
PE
1068 unexpected_end (start, N_("missing `%s' at end of file"), token_end);
1069}
1070
1071
1072/*----------------------------------------.
1073| Likewise, but for unexpected newlines. |
1074`----------------------------------------*/
1075
1076static void
1077unexpected_newline (boundary start, char const *token_end)
1078{
1079 unexpected_end (start, N_("missing `%s' at end of line"), token_end);
a706a1cc
PE
1080}
1081
1082
f25bfb75
AD
1083/*-------------------------.
1084| Initialize the scanner. |
1085`-------------------------*/
1086
1d6412ad
AD
1087void
1088scanner_initialize (void)
1089{
223ff46e 1090 obstack_init (&obstack_for_string);
1d6412ad
AD
1091}
1092
1093
f25bfb75
AD
1094/*-----------------------------------------------.
1095| Free all the memory allocated to the scanner. |
1096`-----------------------------------------------*/
1097
4cdb01db
AD
1098void
1099scanner_free (void)
1100{
223ff46e 1101 obstack_free (&obstack_for_string, 0);
536545f3
AD
1102 /* Reclaim Flex's buffers. */
1103 yy_delete_buffer (YY_CURRENT_BUFFER);
4cdb01db 1104}