]> git.saurik.com Git - bison.git/blame - src/scan-gram.l
* doc/bison.texinfo (Bison Options): Say more accurately what
[bison.git] / src / scan-gram.l
CommitLineData
e9955c83 1/* Bison Grammar Scanner -*- C -*-
3b1e470c 2
073f9288 3 Copyright (C) 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
e9955c83
AD
4
5 This file is part of Bison, the GNU Compiler Compiler.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
0fb669f9
PE
19 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
20 02110-1301 USA
e9955c83
AD
21*/
22
aa418041 23%option debug nodefault nounput noyywrap never-interactive
e9955c83
AD
24%option prefix="gram_" outfile="lex.yy.c"
25
26%{
4f6e011e
PE
27/* Work around a bug in flex 2.5.31. See Debian bug 333231
28 <http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=333231>. */
29#undef gram_wrap
30#define gram_wrap() 1
31
e9955c83 32#include "system.h"
223ff46e
PE
33
34#include <mbswidth.h>
223ff46e
PE
35#include <quote.h>
36
e9955c83 37#include "complain.h"
3f2d73f1 38#include "files.h"
e9955c83
AD
39#include "getargs.h"
40#include "gram.h"
ca407bdf 41#include "quotearg.h"
e9955c83 42#include "reader.h"
4517da37 43#include "verify.h"
223ff46e 44#include "uniqstr.h"
e9955c83 45
3f2d73f1
PE
46#define YY_USER_INIT \
47 do \
48 { \
49 scanner_cursor.file = current_file; \
50 scanner_cursor.line = 1; \
51 scanner_cursor.column = 1; \
379f0ac8 52 code_start = scanner_cursor; \
3f2d73f1
PE
53 } \
54 while (0)
8efe435c 55
dc9701e8
PE
56/* Pacify "gcc -Wmissing-prototypes" when flex 2.5.31 is used. */
57int gram_get_lineno (void);
58FILE *gram_get_in (void);
59FILE *gram_get_out (void);
60int gram_get_leng (void);
61char *gram_get_text (void);
62void gram_set_lineno (int);
63void gram_set_in (FILE *);
64void gram_set_out (FILE *);
65int gram_get_debug (void);
66void gram_set_debug (int);
67int gram_lex_destroy (void);
68
3f2d73f1
PE
69/* Location of scanner cursor. */
70boundary scanner_cursor;
41141c56 71
223ff46e 72static void adjust_location (location *, char const *, size_t);
3f2d73f1 73#define YY_USER_ACTION adjust_location (loc, yytext, yyleng);
d8d3f94a 74
6c30d641 75static size_t no_cr_read (FILE *, char *, size_t);
d8d3f94a
PE
76#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
77
78
223ff46e 79/* OBSTACK_FOR_STRING -- Used to store all the characters that we need to
44995b2e
AD
80 keep (to construct ID, STRINGS etc.). Use the following macros to
81 use it.
82
41141c56
PE
83 Use STRING_GROW to append what has just been matched, and
84 STRING_FINISH to end the string (it puts the ending 0).
85 STRING_FINISH also stores this string in LAST_STRING, which can be
86 used, and which is used by STRING_FREE to free the last string. */
44995b2e 87
223ff46e 88static struct obstack obstack_for_string;
44995b2e 89
7ec2d4cd
AD
90/* A string representing the most recently saved token. */
91static char *last_string;
92
93
41141c56 94#define STRING_GROW \
223ff46e 95 obstack_grow (&obstack_for_string, yytext, yyleng)
44995b2e 96
41141c56 97#define STRING_FINISH \
44995b2e 98 do { \
223ff46e
PE
99 obstack_1grow (&obstack_for_string, '\0'); \
100 last_string = obstack_finish (&obstack_for_string); \
44995b2e
AD
101 } while (0)
102
41141c56 103#define STRING_FREE \
223ff46e 104 obstack_free (&obstack_for_string, last_string)
e9955c83 105
7ec2d4cd
AD
106void
107scanner_last_string_free (void)
108{
41141c56 109 STRING_FREE;
7ec2d4cd 110}
e9955c83 111
efcb44dd
PE
112/* Within well-formed rules, RULE_LENGTH is the number of values in
113 the current rule so far, which says where to find `$0' with respect
114 to the top of the stack. It is not the same as the rule->length in
115 the case of mid rule actions.
116
117 Outside of well-formed rules, RULE_LENGTH has an undefined value. */
118static int rule_length;
119
4517da37
PE
120static void rule_length_overflow (location) __attribute__ ((__noreturn__));
121
122/* Increment the rule length by one, checking for overflow. */
123static inline void
124increment_rule_length (location loc)
125{
126 rule_length++;
127
128 /* Don't allow rule_length == INT_MAX, since that might cause
129 confusion with strtol if INT_MAX == LONG_MAX. */
130 if (rule_length == INT_MAX)
131 rule_length_overflow (loc);
132}
133
624a35e2
PE
134static void handle_dollar (int token_type, char *cp, location loc);
135static void handle_at (int token_type, char *cp, location loc);
4517da37 136static void handle_syncline (char *, location);
1452af69 137static unsigned long int scan_integer (char const *p, int base, location loc);
d8d3f94a 138static int convert_ucn_to_byte (char const *hex_text);
aa418041 139static void unexpected_eof (boundary, char const *);
4febdd96 140static void unexpected_newline (boundary, char const *);
e9955c83
AD
141
142%}
d8d3f94a 143%x SC_COMMENT SC_LINE_COMMENT SC_YACC_COMMENT
e9955c83 144%x SC_STRING SC_CHARACTER
3f2d73f1 145%x SC_AFTER_IDENTIFIER
e9955c83 146%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
624a35e2 147%x SC_PRE_CODE SC_BRACED_CODE SC_PROLOGUE SC_EPILOGUE
e9955c83 148
29c01725
AD
149letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
150id {letter}({letter}|[0-9])*
151directive %{letter}({letter}|[0-9]|-)*
624a35e2 152int [0-9]+
d8d3f94a
PE
153
154/* POSIX says that a tag must be both an id and a C union member, but
155 historically almost any character is allowed in a tag. We disallow
156 NUL and newline, as this simplifies our implementation. */
157tag [^\0\n>]+
158
159/* Zero or more instances of backslash-newline. Following GCC, allow
160 white space between the backslash and the newline. */
161splice (\\[ \f\t\v]*\n)*
e9955c83
AD
162
163%%
164%{
a706a1cc 165 /* Nesting level of the current code in braces. */
1a9e39f1
PE
166 int braces_level IF_LINT (= 0);
167
3f2d73f1
PE
168 /* Parent context state, when applicable. */
169 int context_state IF_LINT (= 0);
a706a1cc 170
624a35e2
PE
171 /* Token type to return, when applicable. */
172 int token_type IF_LINT (= 0);
173
3f2d73f1 174 /* Location of most recent identifier, when applicable. */
a2bc9dbc 175 location id_loc IF_LINT (= empty_location);
3f2d73f1 176
a2bc9dbc
PE
177 /* Where containing code started, when applicable. Its initial
178 value is relevant only when yylex is invoked in the SC_EPILOGUE
179 start condition. */
180 boundary code_start = scanner_cursor;
3f2d73f1 181
223ff46e
PE
182 /* Where containing comment or string or character literal started,
183 when applicable. */
a2bc9dbc 184 boundary token_start IF_LINT (= scanner_cursor);
e9955c83
AD
185%}
186
187
3f2d73f1
PE
188 /*-----------------------.
189 | Scanning white space. |
190 `-----------------------*/
191
624a35e2 192<INITIAL,SC_AFTER_IDENTIFIER,SC_PRE_CODE>
3f2d73f1 193{
4febdd96 194 /* Comments and white space. */
83adb046 195 "," warn_at (*loc, _("stray `,' treated as white space"));
4febdd96 196 [ \f\n\t\v] |
3f2d73f1 197 "//".* ;
83adb046
PE
198 "/*" {
199 token_start = loc->start;
200 context_state = YY_START;
201 BEGIN SC_YACC_COMMENT;
202 }
3f2d73f1
PE
203
204 /* #line directives are not documented, and may be withdrawn or
205 modified in future versions of Bison. */
206 ^"#line "{int}" \"".*"\"\n" {
4517da37 207 handle_syncline (yytext + sizeof "#line " - 1, *loc);
3f2d73f1
PE
208 }
209}
210
211
e9955c83
AD
212 /*----------------------------.
213 | Scanning Bison directives. |
214 `----------------------------*/
215<INITIAL>
216{
217 "%binary" return PERCENT_NONASSOC;
218 "%debug" return PERCENT_DEBUG;
39a06c25 219 "%default"[-_]"prec" return PERCENT_DEFAULT_PREC;
e9955c83
AD
220 "%define" return PERCENT_DEFINE;
221 "%defines" return PERCENT_DEFINES;
624a35e2 222 "%destructor" token_type = PERCENT_DESTRUCTOR; BEGIN SC_PRE_CODE;
676385e2 223 "%dprec" return PERCENT_DPREC;
e9955c83
AD
224 "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
225 "%expect" return PERCENT_EXPECT;
d6328241 226 "%expect"[-_]"rr" return PERCENT_EXPECT_RR;
e9955c83
AD
227 "%file-prefix" return PERCENT_FILE_PREFIX;
228 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
cd3684cf 229 "%initial-action" token_type = PERCENT_INITIAL_ACTION; BEGIN SC_PRE_CODE;
ae7453f2 230 "%glr-parser" return PERCENT_GLR_PARSER;
e9955c83 231 "%left" return PERCENT_LEFT;
624a35e2 232 "%lex-param" token_type = PERCENT_LEX_PARAM; BEGIN SC_PRE_CODE;
e9955c83 233 "%locations" return PERCENT_LOCATIONS;
676385e2 234 "%merge" return PERCENT_MERGE;
e9955c83 235 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
22fccf95 236 "%no"[-_]"default"[-_]"prec" return PERCENT_NO_DEFAULT_PREC;
e9955c83
AD
237 "%no"[-_]"lines" return PERCENT_NO_LINES;
238 "%nonassoc" return PERCENT_NONASSOC;
916708d5 239 "%nondeterministic-parser" return PERCENT_NONDETERMINISTIC_PARSER;
e9955c83
AD
240 "%nterm" return PERCENT_NTERM;
241 "%output" return PERCENT_OUTPUT;
624a35e2 242 "%parse-param" token_type = PERCENT_PARSE_PARAM; BEGIN SC_PRE_CODE;
d8d3f94a 243 "%prec" rule_length--; return PERCENT_PREC;
624a35e2 244 "%printer" token_type = PERCENT_PRINTER; BEGIN SC_PRE_CODE;
e9955c83 245 "%pure"[-_]"parser" return PERCENT_PURE_PARSER;
b50d2359 246 "%require" return PERCENT_REQUIRE;
e9955c83
AD
247 "%right" return PERCENT_RIGHT;
248 "%skeleton" return PERCENT_SKELETON;
249 "%start" return PERCENT_START;
250 "%term" return PERCENT_TOKEN;
251 "%token" return PERCENT_TOKEN;
252 "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
253 "%type" return PERCENT_TYPE;
624a35e2 254 "%union" token_type = PERCENT_UNION; BEGIN SC_PRE_CODE;
e9955c83
AD
255 "%verbose" return PERCENT_VERBOSE;
256 "%yacc" return PERCENT_YACC;
257
3f2d73f1 258 {directive} {
41141c56 259 complain_at (*loc, _("invalid directive: %s"), quote (yytext));
412f8a59 260 }
900c5db5 261
e9955c83 262 "=" return EQUAL;
d8d3f94a 263 "|" rule_length = 0; return PIPE;
e9955c83
AD
264 ";" return SEMICOLON;
265
3f2d73f1 266 {id} {
41141c56 267 val->symbol = symbol_get (yytext, *loc);
3f2d73f1 268 id_loc = *loc;
4517da37 269 increment_rule_length (*loc);
3f2d73f1 270 BEGIN SC_AFTER_IDENTIFIER;
e9955c83
AD
271 }
272
d8d3f94a 273 {int} {
1452af69
PE
274 val->integer = scan_integer (yytext, 10, *loc);
275 return INT;
276 }
277 0[xX][0-9abcdefABCDEF]+ {
278 val->integer = scan_integer (yytext, 16, *loc);
d8d3f94a
PE
279 return INT;
280 }
e9955c83
AD
281
282 /* Characters. We don't check there is only one. */
3f2d73f1 283 "'" STRING_GROW; token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
e9955c83
AD
284
285 /* Strings. */
ca407bdf 286 "\"" token_start = loc->start; BEGIN SC_ESCAPED_STRING;
e9955c83
AD
287
288 /* Prologue. */
3f2d73f1 289 "%{" code_start = loc->start; BEGIN SC_PROLOGUE;
e9955c83
AD
290
291 /* Code in between braces. */
3f2d73f1
PE
292 "{" {
293 STRING_GROW;
624a35e2 294 token_type = BRACED_CODE;
3f2d73f1
PE
295 braces_level = 0;
296 code_start = loc->start;
297 BEGIN SC_BRACED_CODE;
298 }
e9955c83
AD
299
300 /* A type. */
d8d3f94a 301 "<"{tag}">" {
223ff46e 302 obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
41141c56 303 STRING_FINISH;
223ff46e 304 val->uniqstr = uniqstr_new (last_string);
41141c56 305 STRING_FREE;
4cdb01db
AD
306 return TYPE;
307 }
308
a706a1cc
PE
309 "%%" {
310 static int percent_percent_count;
e9955c83 311 if (++percent_percent_count == 2)
a2bc9dbc 312 BEGIN SC_EPILOGUE;
e9955c83
AD
313 return PERCENT_PERCENT;
314 }
315
a706a1cc 316 . {
41141c56 317 complain_at (*loc, _("invalid character: %s"), quote (yytext));
3f2d73f1 318 }
379f0ac8
PE
319
320 <<EOF>> {
321 loc->start = loc->end = scanner_cursor;
322 yyterminate ();
323 }
3f2d73f1
PE
324}
325
326
327 /*-----------------------------------------------------------------.
328 | Scanning after an identifier, checking whether a colon is next. |
329 `-----------------------------------------------------------------*/
330
331<SC_AFTER_IDENTIFIER>
332{
333 ":" {
334 rule_length = 0;
335 *loc = id_loc;
336 BEGIN INITIAL;
337 return ID_COLON;
338 }
339 . {
340 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
341 yyless (0);
342 *loc = id_loc;
343 BEGIN INITIAL;
344 return ID;
345 }
346 <<EOF>> {
347 *loc = id_loc;
348 BEGIN INITIAL;
349 return ID;
e9955c83
AD
350 }
351}
352
353
d8d3f94a
PE
354 /*---------------------------------------------------------------.
355 | Scanning a Yacc comment. The initial `/ *' is already eaten. |
356 `---------------------------------------------------------------*/
e9955c83 357
d8d3f94a 358<SC_YACC_COMMENT>
e9955c83 359{
3f2d73f1 360 "*/" BEGIN context_state;
a706a1cc 361 .|\n ;
aa418041 362 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
d8d3f94a
PE
363}
364
365
366 /*------------------------------------------------------------.
367 | Scanning a C comment. The initial `/ *' is already eaten. |
368 `------------------------------------------------------------*/
369
370<SC_COMMENT>
371{
3f2d73f1 372 "*"{splice}"/" STRING_GROW; BEGIN context_state;
aa418041 373 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
e9955c83
AD
374}
375
376
d8d3f94a
PE
377 /*--------------------------------------------------------------.
378 | Scanning a line comment. The initial `//' is already eaten. |
379 `--------------------------------------------------------------*/
380
381<SC_LINE_COMMENT>
382{
3f2d73f1 383 "\n" STRING_GROW; BEGIN context_state;
41141c56 384 {splice} STRING_GROW;
3f2d73f1 385 <<EOF>> BEGIN context_state;
d8d3f94a
PE
386}
387
388
4febdd96
PE
389 /*------------------------------------------------.
390 | Scanning a Bison string, including its escapes. |
391 | The initial quote is already eaten. |
392 `------------------------------------------------*/
e9955c83
AD
393
394<SC_ESCAPED_STRING>
395{
db2cc12f 396 "\"" {
41141c56 397 STRING_FINISH;
3f2d73f1 398 loc->start = token_start;
223ff46e 399 val->chars = last_string;
4517da37 400 increment_rule_length (*loc);
a706a1cc 401 BEGIN INITIAL;
e9955c83
AD
402 return STRING;
403 }
4febdd96
PE
404 \n unexpected_newline (token_start, "\""); BEGIN INITIAL;
405 <<EOF>> unexpected_eof (token_start, "\""); BEGIN INITIAL;
e9955c83
AD
406}
407
4febdd96
PE
408 /*----------------------------------------------------------.
409 | Scanning a Bison character literal, decoding its escapes. |
410 | The initial quote is already eaten. |
411 `----------------------------------------------------------*/
e9955c83
AD
412
413<SC_ESCAPED_CHARACTER>
414{
db2cc12f 415 "'" {
3b1e470c 416 unsigned char last_string_1;
41141c56
PE
417 STRING_GROW;
418 STRING_FINISH;
3f2d73f1 419 loc->start = token_start;
ca407bdf
PE
420 val->symbol = symbol_get (quotearg_style (escape_quoting_style,
421 last_string),
422 *loc);
073f9288 423 symbol_class_set (val->symbol, token_sym, *loc, false);
3b1e470c
PE
424 last_string_1 = last_string[1];
425 symbol_user_token_number_set (val->symbol, last_string_1, *loc);
41141c56 426 STRING_FREE;
4517da37 427 increment_rule_length (*loc);
a706a1cc
PE
428 BEGIN INITIAL;
429 return ID;
e9955c83 430 }
4febdd96
PE
431 \n unexpected_newline (token_start, "'"); BEGIN INITIAL;
432 <<EOF>> unexpected_eof (token_start, "'"); BEGIN INITIAL;
433}
a706a1cc 434
4febdd96
PE
435<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING>
436{
92ac3705 437 \0 complain_at (*loc, _("invalid null character"));
e9955c83
AD
438}
439
440
441 /*----------------------------.
442 | Decode escaped characters. |
443 `----------------------------*/
444
445<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
446{
d8d3f94a 447 \\[0-7]{1,3} {
4517da37 448 unsigned long int c = strtoul (yytext + 1, NULL, 8);
d8d3f94a 449 if (UCHAR_MAX < c)
3f2d73f1 450 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
05ac60f3 451 else if (! c)
92ac3705 452 complain_at (*loc, _("invalid null character: %s"), quote (yytext));
e9955c83 453 else
223ff46e 454 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
455 }
456
6b0d38ab 457 \\x[0-9abcdefABCDEF]+ {
4517da37
PE
458 verify (UCHAR_MAX < ULONG_MAX);
459 unsigned long int c = strtoul (yytext + 2, NULL, 16);
460 if (UCHAR_MAX < c)
3f2d73f1 461 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
92ac3705
PE
462 else if (! c)
463 complain_at (*loc, _("invalid null character: %s"), quote (yytext));
d8d3f94a 464 else
223ff46e 465 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
466 }
467
223ff46e
PE
468 \\a obstack_1grow (&obstack_for_string, '\a');
469 \\b obstack_1grow (&obstack_for_string, '\b');
470 \\f obstack_1grow (&obstack_for_string, '\f');
471 \\n obstack_1grow (&obstack_for_string, '\n');
472 \\r obstack_1grow (&obstack_for_string, '\r');
473 \\t obstack_1grow (&obstack_for_string, '\t');
474 \\v obstack_1grow (&obstack_for_string, '\v');
412f8a59
PE
475
476 /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
223ff46e 477 \\("\""|"'"|"?"|"\\") obstack_1grow (&obstack_for_string, yytext[1]);
412f8a59 478
6b0d38ab 479 \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
d8d3f94a
PE
480 int c = convert_ucn_to_byte (yytext);
481 if (c < 0)
3f2d73f1 482 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
92ac3705
PE
483 else if (! c)
484 complain_at (*loc, _("invalid null character: %s"), quote (yytext));
d8d3f94a 485 else
223ff46e 486 obstack_1grow (&obstack_for_string, c);
d8d3f94a 487 }
4f25ebb0 488 \\(.|\n) {
3f2d73f1 489 complain_at (*loc, _("unrecognized escape sequence: %s"), quote (yytext));
41141c56 490 STRING_GROW;
e9955c83
AD
491 }
492}
493
4febdd96
PE
494 /*--------------------------------------------.
495 | Scanning user-code characters and strings. |
496 `--------------------------------------------*/
e9955c83 497
4febdd96
PE
498<SC_CHARACTER,SC_STRING>
499{
500 {splice}|\\{splice}[^\n$@\[\]] STRING_GROW;
501}
e9955c83
AD
502
503<SC_CHARACTER>
504{
4febdd96
PE
505 "'" STRING_GROW; BEGIN context_state;
506 \n unexpected_newline (token_start, "'"); BEGIN context_state;
507 <<EOF>> unexpected_eof (token_start, "'"); BEGIN context_state;
e9955c83
AD
508}
509
e9955c83
AD
510<SC_STRING>
511{
4febdd96
PE
512 "\"" STRING_GROW; BEGIN context_state;
513 \n unexpected_newline (token_start, "\""); BEGIN context_state;
514 <<EOF>> unexpected_eof (token_start, "\""); BEGIN context_state;
e9955c83
AD
515}
516
517
518 /*---------------------------------------------------.
519 | Strings, comments etc. can be found in user code. |
520 `---------------------------------------------------*/
521
522<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
523{
3f2d73f1
PE
524 "'" {
525 STRING_GROW;
526 context_state = YY_START;
527 token_start = loc->start;
528 BEGIN SC_CHARACTER;
529 }
530 "\"" {
531 STRING_GROW;
532 context_state = YY_START;
533 token_start = loc->start;
534 BEGIN SC_STRING;
535 }
536 "/"{splice}"*" {
537 STRING_GROW;
538 context_state = YY_START;
539 token_start = loc->start;
540 BEGIN SC_COMMENT;
541 }
542 "/"{splice}"/" {
543 STRING_GROW;
544 context_state = YY_START;
545 BEGIN SC_LINE_COMMENT;
546 }
e9955c83
AD
547}
548
549
624a35e2
PE
550 /*---------------------------------------------------------------.
551 | Scanning after %union etc., possibly followed by white space. |
552 | For %union only, allow arbitrary C code to appear before the |
553 | following brace, as an extension to POSIX. |
554 `---------------------------------------------------------------*/
555
556<SC_PRE_CODE>
557{
558 . {
559 bool valid = yytext[0] == '{' || token_type == PERCENT_UNION;
560 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
561 yyless (0);
562
563 if (valid)
564 {
565 braces_level = -1;
566 code_start = loc->start;
567 BEGIN SC_BRACED_CODE;
568 }
569 else
570 {
6d07bacf 571 complain_at (*loc, _("missing `{' in %s"),
624a35e2
PE
572 token_name (token_type));
573 obstack_sgrow (&obstack_for_string, "{}");
574 STRING_FINISH;
575 val->chars = last_string;
576 BEGIN INITIAL;
577 return token_type;
578 }
579 }
379f0ac8 580
aa418041 581 <<EOF>> unexpected_eof (scanner_cursor, "{}"); BEGIN INITIAL;
624a35e2
PE
582}
583
584
e9955c83
AD
585 /*---------------------------------------------------------------.
586 | Scanning some code in braces (%union and actions). The initial |
587 | "{" is already eaten. |
588 `---------------------------------------------------------------*/
589
590<SC_BRACED_CODE>
591{
41141c56
PE
592 "{"|"<"{splice}"%" STRING_GROW; braces_level++;
593 "%"{splice}">" STRING_GROW; braces_level--;
e9955c83 594 "}" {
25522739
PE
595 bool outer_brace = --braces_level < 0;
596
597 /* As an undocumented Bison extension, append `;' before the last
598 brace in braced code, so that the user code can omit trailing
599 `;'. But do not append `;' if emulating Yacc, since Yacc does
600 not append one.
601
602 FIXME: Bison should warn if a semicolon seems to be necessary
603 here, and should omit the semicolon if it seems unnecessary
604 (e.g., after ';', '{', or '}', each followed by comments or
605 white space). Such a warning shouldn't depend on --yacc; it
606 should depend on a new --pedantic option, which would cause
607 Bison to warn if it detects an extension to POSIX. --pedantic
608 should also diagnose other Bison extensions like %yacc.
609 Perhaps there should also be a GCC-style --pedantic-errors
610 option, so that such warnings are diagnosed as errors. */
1deb9bdc 611 if (outer_brace && token_type == BRACED_CODE && ! yacc_flag)
25522739
PE
612 obstack_1grow (&obstack_for_string, ';');
613
614 obstack_1grow (&obstack_for_string, '}');
615
616 if (outer_brace)
e9955c83 617 {
41141c56 618 STRING_FINISH;
3f2d73f1 619 loc->start = code_start;
223ff46e 620 val->chars = last_string;
4517da37 621 increment_rule_length (*loc);
a706a1cc 622 BEGIN INITIAL;
624a35e2 623 return token_type;
e9955c83
AD
624 }
625 }
626
a706a1cc
PE
627 /* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly
628 (as `<' `<%'). */
41141c56 629 "<"{splice}"<" STRING_GROW;
a706a1cc 630
624a35e2
PE
631 "$"("<"{tag}">")?(-?[0-9]+|"$") handle_dollar (token_type, yytext, *loc);
632 "@"(-?[0-9]+|"$") handle_at (token_type, yytext, *loc);
e9955c83 633
302c0aee
PE
634 "$" {
635 warn_at (*loc, _("stray `$'"));
636 obstack_sgrow (&obstack_for_string, "$][");
637 }
638 "@" {
639 warn_at (*loc, _("stray `@'"));
640 obstack_sgrow (&obstack_for_string, "@@");
641 }
642
aa418041 643 <<EOF>> unexpected_eof (code_start, "}"); BEGIN INITIAL;
e9955c83
AD
644}
645
646
647 /*--------------------------------------------------------------.
648 | Scanning some prologue: from "%{" (already scanned) to "%}". |
649 `--------------------------------------------------------------*/
650
651<SC_PROLOGUE>
652{
653 "%}" {
41141c56 654 STRING_FINISH;
3f2d73f1 655 loc->start = code_start;
223ff46e 656 val->chars = last_string;
a706a1cc 657 BEGIN INITIAL;
e9955c83
AD
658 return PROLOGUE;
659 }
660
aa418041 661 <<EOF>> unexpected_eof (code_start, "%}"); BEGIN INITIAL;
e9955c83
AD
662}
663
664
665 /*---------------------------------------------------------------.
666 | Scanning the epilogue (everything after the second "%%", which |
d8d3f94a 667 | has already been eaten). |
e9955c83
AD
668 `---------------------------------------------------------------*/
669
670<SC_EPILOGUE>
671{
e9955c83 672 <<EOF>> {
41141c56 673 STRING_FINISH;
3f2d73f1 674 loc->start = code_start;
223ff46e 675 val->chars = last_string;
a706a1cc 676 BEGIN INITIAL;
e9955c83
AD
677 return EPILOGUE;
678 }
679}
680
681
4febdd96
PE
682 /*-----------------------------------------.
683 | Escape M4 quoting characters in C code. |
684 `-----------------------------------------*/
a706a1cc
PE
685
686<SC_COMMENT,SC_LINE_COMMENT,SC_STRING,SC_CHARACTER,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
687{
223ff46e
PE
688 \$ obstack_sgrow (&obstack_for_string, "$][");
689 \@ obstack_sgrow (&obstack_for_string, "@@");
690 \[ obstack_sgrow (&obstack_for_string, "@{");
691 \] obstack_sgrow (&obstack_for_string, "@}");
a706a1cc
PE
692}
693
694
4febdd96
PE
695 /*-----------------------------------------------------.
696 | By default, grow the string obstack with the input. |
697 `-----------------------------------------------------*/
698
699<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>. |
700<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>\n STRING_GROW;
701
e9955c83
AD
702%%
703
cd3684cf
AD
704/* Keeps track of the maximum number of semantic values to the left of
705 a handle (those referenced by $0, $-1, etc.) are required by the
25005f6a
PH
706 semantic actions of this grammar. */
707int max_left_semantic_context = 0;
708
4517da37
PE
709/* If BUF is null, add BUFSIZE (which in this case must be less than
710 INT_MAX) to COLUMN; otherwise, add mbsnwidth (BUF, BUFSIZE, 0) to
711 COLUMN. If an overflow occurs, or might occur but is undetectable,
712 return INT_MAX. Assume COLUMN is nonnegative. */
713
714static inline int
715add_column_width (int column, char const *buf, size_t bufsize)
716{
717 size_t width;
718 unsigned int remaining_columns = INT_MAX - column;
719
720 if (buf)
721 {
722 if (INT_MAX / 2 <= bufsize)
723 return INT_MAX;
724 width = mbsnwidth (buf, bufsize, 0);
725 }
726 else
727 width = bufsize;
728
729 return width <= remaining_columns ? column + width : INT_MAX;
730}
731
3f2d73f1
PE
732/* Set *LOC and adjust scanner cursor to account for token TOKEN of
733 size SIZE. */
6c30d641
PE
734
735static void
223ff46e 736adjust_location (location *loc, char const *token, size_t size)
6c30d641 737{
3f2d73f1
PE
738 int line = scanner_cursor.line;
739 int column = scanner_cursor.column;
6c30d641
PE
740 char const *p0 = token;
741 char const *p = token;
742 char const *lim = token + size;
743
3f2d73f1
PE
744 loc->start = scanner_cursor;
745
6c30d641
PE
746 for (p = token; p < lim; p++)
747 switch (*p)
748 {
6c30d641 749 case '\n':
4517da37 750 line += line < INT_MAX;
6c30d641
PE
751 column = 1;
752 p0 = p + 1;
753 break;
754
755 case '\t':
4517da37
PE
756 {
757 column = add_column_width (column, p0, p - p0);
758 column = add_column_width (column, NULL, 8 - ((column - 1) & 7));
759 p0 = p + 1;
760 break;
761 }
6c30d641
PE
762 }
763
3f2d73f1 764 scanner_cursor.line = line;
4517da37 765 scanner_cursor.column = column = add_column_width (column, p0, p - p0);
3f2d73f1
PE
766
767 loc->end = scanner_cursor;
4517da37
PE
768
769 if (line == INT_MAX && loc->start.line != INT_MAX)
770 warn_at (*loc, _("line number overflow"));
771 if (column == INT_MAX && loc->start.column != INT_MAX)
772 warn_at (*loc, _("column number overflow"));
6c30d641
PE
773}
774
775
776/* Read bytes from FP into buffer BUF of size SIZE. Return the
777 number of bytes read. Remove '\r' from input, treating \r\n
778 and isolated \r as \n. */
779
780static size_t
781no_cr_read (FILE *fp, char *buf, size_t size)
782{
a737b216
PE
783 size_t bytes_read = fread (buf, 1, size, fp);
784 if (bytes_read)
6c30d641 785 {
a737b216 786 char *w = memchr (buf, '\r', bytes_read);
6c30d641
PE
787 if (w)
788 {
789 char const *r = ++w;
a737b216 790 char const *lim = buf + bytes_read;
6c30d641
PE
791
792 for (;;)
793 {
794 /* Found an '\r'. Treat it like '\n', but ignore any
795 '\n' that immediately follows. */
796 w[-1] = '\n';
797 if (r == lim)
798 {
799 int ch = getc (fp);
800 if (ch != '\n' && ungetc (ch, fp) != ch)
801 break;
802 }
803 else if (*r == '\n')
804 r++;
805
806 /* Copy until the next '\r'. */
807 do
808 {
809 if (r == lim)
810 return w - buf;
811 }
812 while ((*w++ = *r++) != '\r');
813 }
814
815 return w - buf;
816 }
817 }
818
a737b216 819 return bytes_read;
6c30d641
PE
820}
821
822
e9955c83 823/*------------------------------------------------------------------.
366eea36 824| TEXT is pointing to a wannabee semantic value (i.e., a `$'). |
e9955c83
AD
825| |
826| Possible inputs: $[<TYPENAME>]($|integer) |
827| |
223ff46e 828| Output to OBSTACK_FOR_STRING a reference to this semantic value. |
e9955c83
AD
829`------------------------------------------------------------------*/
830
624a35e2 831static inline bool
223ff46e 832handle_action_dollar (char *text, location loc)
e9955c83
AD
833{
834 const char *type_name = NULL;
366eea36 835 char *cp = text + 1;
e9955c83 836
624a35e2
PE
837 if (! current_rule)
838 return false;
839
e9955c83
AD
840 /* Get the type name if explicit. */
841 if (*cp == '<')
842 {
843 type_name = ++cp;
844 while (*cp != '>')
845 ++cp;
846 *cp = '\0';
847 ++cp;
848 }
849
850 if (*cp == '$')
851 {
852 if (!type_name)
223ff46e 853 type_name = symbol_list_n_type_name_get (current_rule, loc, 0);
e9955c83 854 if (!type_name && typed)
223ff46e 855 complain_at (loc, _("$$ of `%s' has no declared type"),
97650f4e 856 current_rule->sym->tag);
e9955c83
AD
857 if (!type_name)
858 type_name = "";
223ff46e 859 obstack_fgrow1 (&obstack_for_string,
e9955c83 860 "]b4_lhs_value([%s])[", type_name);
8f3596a6 861 current_rule->used = true;
e9955c83 862 }
d8d3f94a 863 else
e9955c83 864 {
4517da37 865 long int num = strtol (cp, NULL, 10);
e9955c83 866
4517da37 867 if (1 - INT_MAX + rule_length <= num && num <= rule_length)
e9955c83 868 {
d8d3f94a 869 int n = num;
affac613
AD
870 if (max_left_semantic_context < 1 - n)
871 max_left_semantic_context = 1 - n;
872 if (!type_name && 0 < n)
223ff46e 873 type_name = symbol_list_n_type_name_get (current_rule, loc, n);
e9955c83 874 if (!type_name && typed)
223ff46e
PE
875 complain_at (loc, _("$%d of `%s' has no declared type"),
876 n, current_rule->sym->tag);
e9955c83
AD
877 if (!type_name)
878 type_name = "";
223ff46e 879 obstack_fgrow3 (&obstack_for_string,
05ac60f3 880 "]b4_rhs_value(%d, %d, [%s])[",
e9955c83 881 rule_length, n, type_name);
8f3596a6 882 symbol_list_n_used_set (current_rule, n, true);
e9955c83 883 }
d8d3f94a 884 else
223ff46e 885 complain_at (loc, _("integer out of range: %s"), quote (text));
9280d3ef 886 }
9280d3ef 887
624a35e2 888 return true;
e9955c83
AD
889}
890
f25bfb75 891
cd3684cf
AD
892/*----------------------------------------------------------------.
893| Map `$?' onto the proper M4 symbol, depending on its TOKEN_TYPE |
894| (are we in an action?). |
895`----------------------------------------------------------------*/
e9955c83
AD
896
897static void
624a35e2 898handle_dollar (int token_type, char *text, location loc)
f25bfb75 899{
624a35e2 900 switch (token_type)
f25bfb75 901 {
624a35e2
PE
902 case BRACED_CODE:
903 if (handle_action_dollar (text, loc))
904 return;
f25bfb75
AD
905 break;
906
624a35e2 907 case PERCENT_DESTRUCTOR:
cd3684cf 908 case PERCENT_INITIAL_ACTION:
624a35e2
PE
909 case PERCENT_PRINTER:
910 if (text[1] == '$')
911 {
912 obstack_sgrow (&obstack_for_string, "]b4_dollar_dollar[");
913 return;
914 }
915 break;
916
917 default:
f25bfb75
AD
918 break;
919 }
624a35e2
PE
920
921 complain_at (loc, _("invalid value: %s"), quote (text));
f25bfb75
AD
922}
923
924
925/*------------------------------------------------------.
926| TEXT is a location token (i.e., a `@...'). Output to |
223ff46e 927| OBSTACK_FOR_STRING a reference to this location. |
f25bfb75
AD
928`------------------------------------------------------*/
929
624a35e2 930static inline bool
223ff46e 931handle_action_at (char *text, location loc)
e9955c83 932{
366eea36 933 char *cp = text + 1;
d0829076 934 locations_flag = true;
e9955c83 935
624a35e2
PE
936 if (! current_rule)
937 return false;
938
366eea36 939 if (*cp == '$')
624a35e2 940 obstack_sgrow (&obstack_for_string, "]b4_lhs_location[");
d8d3f94a 941 else
e9955c83 942 {
4517da37 943 long int num = strtol (cp, NULL, 10);
dafdc66f 944
4517da37 945 if (1 - INT_MAX + rule_length <= num && num <= rule_length)
d8d3f94a
PE
946 {
947 int n = num;
05ac60f3 948 obstack_fgrow2 (&obstack_for_string, "]b4_rhs_location(%d, %d)[",
d8d3f94a
PE
949 rule_length, n);
950 }
e9955c83 951 else
223ff46e 952 complain_at (loc, _("integer out of range: %s"), quote (text));
f25bfb75 953 }
f25bfb75 954
624a35e2 955 return true;
e9955c83 956}
4cdb01db 957
f25bfb75 958
cd3684cf
AD
959/*----------------------------------------------------------------.
960| Map `@?' onto the proper M4 symbol, depending on its TOKEN_TYPE |
961| (are we in an action?). |
962`----------------------------------------------------------------*/
f25bfb75
AD
963
964static void
624a35e2 965handle_at (int token_type, char *text, location loc)
f25bfb75 966{
624a35e2 967 switch (token_type)
f25bfb75 968 {
624a35e2 969 case BRACED_CODE:
223ff46e 970 handle_action_at (text, loc);
624a35e2
PE
971 return;
972
cd3684cf 973 case PERCENT_INITIAL_ACTION:
624a35e2
PE
974 case PERCENT_DESTRUCTOR:
975 case PERCENT_PRINTER:
976 if (text[1] == '$')
977 {
978 obstack_sgrow (&obstack_for_string, "]b4_at_dollar[");
979 return;
980 }
f25bfb75
AD
981 break;
982
624a35e2 983 default:
f25bfb75
AD
984 break;
985 }
624a35e2
PE
986
987 complain_at (loc, _("invalid value: %s"), quote (text));
f25bfb75
AD
988}
989
990
1452af69
PE
991/*------------------------------------------------------.
992| Scan NUMBER for a base-BASE integer at location LOC. |
993`------------------------------------------------------*/
994
995static unsigned long int
996scan_integer (char const *number, int base, location loc)
997{
4517da37
PE
998 verify (INT_MAX < ULONG_MAX);
999 unsigned long int num = strtoul (number, NULL, base);
1000
1001 if (INT_MAX < num)
1452af69
PE
1002 {
1003 complain_at (loc, _("integer out of range: %s"), quote (number));
1004 num = INT_MAX;
1005 }
4517da37 1006
1452af69
PE
1007 return num;
1008}
1009
1010
d8d3f94a
PE
1011/*------------------------------------------------------------------.
1012| Convert universal character name UCN to a single-byte character, |
1013| and return that character. Return -1 if UCN does not correspond |
1014| to a single-byte character. |
1015`------------------------------------------------------------------*/
1016
1017static int
1018convert_ucn_to_byte (char const *ucn)
1019{
4517da37
PE
1020 verify (UCHAR_MAX <= INT_MAX);
1021 unsigned long int code = strtoul (ucn + 2, NULL, 16);
d8d3f94a
PE
1022
1023 /* FIXME: Currently we assume Unicode-compatible unibyte characters
1024 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
1025 non-ASCII hosts we support only the portable C character set.
1026 These limitations should be removed once we add support for
1027 multibyte characters. */
1028
1029 if (UCHAR_MAX < code)
1030 return -1;
1031
1032#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
1033 {
1034 /* A non-ASCII host. Use CODE to index into a table of the C
1035 basic execution character set, which is guaranteed to exist on
1036 all Standard C platforms. This table also includes '$', '@',
8e6ef483 1037 and '`', which are not in the basic execution character set but
d8d3f94a
PE
1038 which are unibyte characters on all the platforms that we know
1039 about. */
1040 static signed char const table[] =
1041 {
1042 '\0', -1, -1, -1, -1, -1, -1, '\a',
1043 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
1044 -1, -1, -1, -1, -1, -1, -1, -1,
1045 -1, -1, -1, -1, -1, -1, -1, -1,
1046 ' ', '!', '"', '#', '$', '%', '&', '\'',
1047 '(', ')', '*', '+', ',', '-', '.', '/',
1048 '0', '1', '2', '3', '4', '5', '6', '7',
1049 '8', '9', ':', ';', '<', '=', '>', '?',
1050 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
1051 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
1052 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
1053 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
1054 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
1055 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
1056 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
1057 'x', 'y', 'z', '{', '|', '}', '~'
1058 };
1059
1060 code = code < sizeof table ? table[code] : -1;
1061 }
1062#endif
c4d720cd 1063
d8d3f94a
PE
1064 return code;
1065}
1066
1067
900c5db5
AD
1068/*----------------------------------------------------------------.
1069| Handle `#line INT "FILE"'. ARGS has already skipped `#line '. |
1070`----------------------------------------------------------------*/
1071
1072static void
4517da37 1073handle_syncline (char *args, location loc)
900c5db5 1074{
4517da37
PE
1075 char *after_num;
1076 unsigned long int lineno = strtoul (args, &after_num, 10);
1077 char *file = strchr (after_num, '"') + 1;
1078 *strchr (file, '"') = '\0';
1079 if (INT_MAX <= lineno)
1080 {
1081 warn_at (loc, _("line number overflow"));
1082 lineno = INT_MAX;
1083 }
dca81a78 1084 scanner_cursor.file = current_file = uniqstr_new (file);
3f2d73f1
PE
1085 scanner_cursor.line = lineno;
1086 scanner_cursor.column = 1;
900c5db5
AD
1087}
1088
a706a1cc 1089
4517da37
PE
1090/*---------------------------------.
1091| Report a rule that is too long. |
1092`---------------------------------*/
1093
1094static void
1095rule_length_overflow (location loc)
1096{
1097 fatal_at (loc, _("rule is too long"));
1098}
1099
1100
4febdd96
PE
1101/*----------------------------------------------------------------.
1102| For a token or comment starting at START, report message MSGID, |
1103| which should say that an end marker was found before |
1104| the expected TOKEN_END. |
1105`----------------------------------------------------------------*/
1106
1107static void
1108unexpected_end (boundary start, char const *msgid, char const *token_end)
1109{
1110 location loc;
1111 loc.start = start;
1112 loc.end = scanner_cursor;
1113 complain_at (loc, _(msgid), token_end);
1114}
1115
1116
3f2d73f1
PE
1117/*------------------------------------------------------------------------.
1118| Report an unexpected EOF in a token or comment starting at START. |
1119| An end of file was encountered and the expected TOKEN_END was missing. |
3f2d73f1 1120`------------------------------------------------------------------------*/
a706a1cc
PE
1121
1122static void
aa418041 1123unexpected_eof (boundary start, char const *token_end)
a706a1cc 1124{
4febdd96
PE
1125 unexpected_end (start, N_("missing `%s' at end of file"), token_end);
1126}
1127
1128
1129/*----------------------------------------.
1130| Likewise, but for unexpected newlines. |
1131`----------------------------------------*/
1132
1133static void
1134unexpected_newline (boundary start, char const *token_end)
1135{
1136 unexpected_end (start, N_("missing `%s' at end of line"), token_end);
a706a1cc
PE
1137}
1138
1139
f25bfb75
AD
1140/*-------------------------.
1141| Initialize the scanner. |
1142`-------------------------*/
1143
1d6412ad
AD
1144void
1145scanner_initialize (void)
1146{
223ff46e 1147 obstack_init (&obstack_for_string);
1d6412ad
AD
1148}
1149
1150
f25bfb75
AD
1151/*-----------------------------------------------.
1152| Free all the memory allocated to the scanner. |
1153`-----------------------------------------------*/
1154
4cdb01db
AD
1155void
1156scanner_free (void)
1157{
223ff46e 1158 obstack_free (&obstack_for_string, 0);
536545f3
AD
1159 /* Reclaim Flex's buffers. */
1160 yy_delete_buffer (YY_CURRENT_BUFFER);
4cdb01db 1161}