]> git.saurik.com Git - bison.git/blame - src/scan-gram.l
(add_param): Fix bug where NUL was mishandled.
[bison.git] / src / scan-gram.l
CommitLineData
e9955c83 1/* Bison Grammar Scanner -*- C -*-
3b1e470c 2
1452af69 3 Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
e9955c83
AD
4
5 This file is part of Bison, the GNU Compiler Compiler.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
20 02111-1307 USA
21*/
22
aa418041 23%option debug nodefault nounput noyywrap never-interactive
e9955c83
AD
24%option prefix="gram_" outfile="lex.yy.c"
25
26%{
27#include "system.h"
223ff46e
PE
28
29#include <mbswidth.h>
30#include <get-errno.h>
31#include <quote.h>
32
e9955c83 33#include "complain.h"
3f2d73f1 34#include "files.h"
e9955c83
AD
35#include "getargs.h"
36#include "gram.h"
37#include "reader.h"
223ff46e 38#include "uniqstr.h"
e9955c83 39
3f2d73f1
PE
40#define YY_USER_INIT \
41 do \
42 { \
43 scanner_cursor.file = current_file; \
44 scanner_cursor.line = 1; \
45 scanner_cursor.column = 1; \
379f0ac8 46 code_start = scanner_cursor; \
3f2d73f1
PE
47 } \
48 while (0)
8efe435c 49
3f2d73f1
PE
50/* Location of scanner cursor. */
51boundary scanner_cursor;
41141c56 52
223ff46e 53static void adjust_location (location *, char const *, size_t);
3f2d73f1 54#define YY_USER_ACTION adjust_location (loc, yytext, yyleng);
d8d3f94a 55
6c30d641 56static size_t no_cr_read (FILE *, char *, size_t);
d8d3f94a
PE
57#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
58
59
223ff46e 60/* OBSTACK_FOR_STRING -- Used to store all the characters that we need to
44995b2e
AD
61 keep (to construct ID, STRINGS etc.). Use the following macros to
62 use it.
63
41141c56
PE
64 Use STRING_GROW to append what has just been matched, and
65 STRING_FINISH to end the string (it puts the ending 0).
66 STRING_FINISH also stores this string in LAST_STRING, which can be
67 used, and which is used by STRING_FREE to free the last string. */
44995b2e 68
223ff46e 69static struct obstack obstack_for_string;
44995b2e 70
7ec2d4cd
AD
71/* A string representing the most recently saved token. */
72static char *last_string;
73
74
41141c56 75#define STRING_GROW \
223ff46e 76 obstack_grow (&obstack_for_string, yytext, yyleng)
44995b2e 77
41141c56 78#define STRING_FINISH \
44995b2e 79 do { \
223ff46e
PE
80 obstack_1grow (&obstack_for_string, '\0'); \
81 last_string = obstack_finish (&obstack_for_string); \
44995b2e
AD
82 } while (0)
83
41141c56 84#define STRING_FREE \
223ff46e 85 obstack_free (&obstack_for_string, last_string)
e9955c83 86
7ec2d4cd
AD
87void
88scanner_last_string_free (void)
89{
41141c56 90 STRING_FREE;
7ec2d4cd 91}
e9955c83 92
efcb44dd
PE
93/* Within well-formed rules, RULE_LENGTH is the number of values in
94 the current rule so far, which says where to find `$0' with respect
95 to the top of the stack. It is not the same as the rule->length in
96 the case of mid rule actions.
97
98 Outside of well-formed rules, RULE_LENGTH has an undefined value. */
99static int rule_length;
100
624a35e2
PE
101static void handle_dollar (int token_type, char *cp, location loc);
102static void handle_at (int token_type, char *cp, location loc);
3f2d73f1 103static void handle_syncline (char *args);
1452af69 104static unsigned long int scan_integer (char const *p, int base, location loc);
d8d3f94a 105static int convert_ucn_to_byte (char const *hex_text);
aa418041 106static void unexpected_eof (boundary, char const *);
4febdd96 107static void unexpected_newline (boundary, char const *);
e9955c83
AD
108
109%}
d8d3f94a 110%x SC_COMMENT SC_LINE_COMMENT SC_YACC_COMMENT
e9955c83 111%x SC_STRING SC_CHARACTER
3f2d73f1 112%x SC_AFTER_IDENTIFIER
e9955c83 113%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
624a35e2 114%x SC_PRE_CODE SC_BRACED_CODE SC_PROLOGUE SC_EPILOGUE
e9955c83 115
29c01725
AD
116letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
117id {letter}({letter}|[0-9])*
118directive %{letter}({letter}|[0-9]|-)*
624a35e2 119int [0-9]+
d8d3f94a
PE
120
121/* POSIX says that a tag must be both an id and a C union member, but
122 historically almost any character is allowed in a tag. We disallow
123 NUL and newline, as this simplifies our implementation. */
124tag [^\0\n>]+
125
126/* Zero or more instances of backslash-newline. Following GCC, allow
127 white space between the backslash and the newline. */
128splice (\\[ \f\t\v]*\n)*
e9955c83
AD
129
130%%
131%{
a706a1cc 132 /* Nesting level of the current code in braces. */
1a9e39f1
PE
133 int braces_level IF_LINT (= 0);
134
3f2d73f1
PE
135 /* Parent context state, when applicable. */
136 int context_state IF_LINT (= 0);
a706a1cc 137
624a35e2
PE
138 /* Token type to return, when applicable. */
139 int token_type IF_LINT (= 0);
140
3f2d73f1 141 /* Location of most recent identifier, when applicable. */
a2bc9dbc 142 location id_loc IF_LINT (= empty_location);
3f2d73f1 143
a2bc9dbc
PE
144 /* Where containing code started, when applicable. Its initial
145 value is relevant only when yylex is invoked in the SC_EPILOGUE
146 start condition. */
147 boundary code_start = scanner_cursor;
3f2d73f1 148
223ff46e
PE
149 /* Where containing comment or string or character literal started,
150 when applicable. */
a2bc9dbc 151 boundary token_start IF_LINT (= scanner_cursor);
e9955c83
AD
152%}
153
154
3f2d73f1
PE
155 /*-----------------------.
156 | Scanning white space. |
157 `-----------------------*/
158
624a35e2 159<INITIAL,SC_AFTER_IDENTIFIER,SC_PRE_CODE>
3f2d73f1 160{
4febdd96 161 /* Comments and white space. */
83adb046 162 "," warn_at (*loc, _("stray `,' treated as white space"));
4febdd96 163 [ \f\n\t\v] |
3f2d73f1 164 "//".* ;
83adb046
PE
165 "/*" {
166 token_start = loc->start;
167 context_state = YY_START;
168 BEGIN SC_YACC_COMMENT;
169 }
3f2d73f1
PE
170
171 /* #line directives are not documented, and may be withdrawn or
172 modified in future versions of Bison. */
173 ^"#line "{int}" \"".*"\"\n" {
174 handle_syncline (yytext + sizeof "#line " - 1);
175 }
176}
177
178
e9955c83
AD
179 /*----------------------------.
180 | Scanning Bison directives. |
181 `----------------------------*/
182<INITIAL>
183{
184 "%binary" return PERCENT_NONASSOC;
185 "%debug" return PERCENT_DEBUG;
39a06c25 186 "%default"[-_]"prec" return PERCENT_DEFAULT_PREC;
e9955c83
AD
187 "%define" return PERCENT_DEFINE;
188 "%defines" return PERCENT_DEFINES;
624a35e2 189 "%destructor" token_type = PERCENT_DESTRUCTOR; BEGIN SC_PRE_CODE;
676385e2 190 "%dprec" return PERCENT_DPREC;
e9955c83
AD
191 "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
192 "%expect" return PERCENT_EXPECT;
d6328241 193 "%expect"[-_]"rr" return PERCENT_EXPECT_RR;
e9955c83
AD
194 "%file-prefix" return PERCENT_FILE_PREFIX;
195 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
cd3684cf 196 "%initial-action" token_type = PERCENT_INITIAL_ACTION; BEGIN SC_PRE_CODE;
ae7453f2 197 "%glr-parser" return PERCENT_GLR_PARSER;
e9955c83 198 "%left" return PERCENT_LEFT;
624a35e2 199 "%lex-param" token_type = PERCENT_LEX_PARAM; BEGIN SC_PRE_CODE;
e9955c83 200 "%locations" return PERCENT_LOCATIONS;
676385e2 201 "%merge" return PERCENT_MERGE;
e9955c83 202 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
22fccf95 203 "%no"[-_]"default"[-_]"prec" return PERCENT_NO_DEFAULT_PREC;
e9955c83
AD
204 "%no"[-_]"lines" return PERCENT_NO_LINES;
205 "%nonassoc" return PERCENT_NONASSOC;
916708d5 206 "%nondeterministic-parser" return PERCENT_NONDETERMINISTIC_PARSER;
e9955c83
AD
207 "%nterm" return PERCENT_NTERM;
208 "%output" return PERCENT_OUTPUT;
624a35e2 209 "%parse-param" token_type = PERCENT_PARSE_PARAM; BEGIN SC_PRE_CODE;
d8d3f94a 210 "%prec" rule_length--; return PERCENT_PREC;
624a35e2 211 "%printer" token_type = PERCENT_PRINTER; BEGIN SC_PRE_CODE;
e9955c83
AD
212 "%pure"[-_]"parser" return PERCENT_PURE_PARSER;
213 "%right" return PERCENT_RIGHT;
214 "%skeleton" return PERCENT_SKELETON;
215 "%start" return PERCENT_START;
216 "%term" return PERCENT_TOKEN;
217 "%token" return PERCENT_TOKEN;
218 "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
219 "%type" return PERCENT_TYPE;
624a35e2 220 "%union" token_type = PERCENT_UNION; BEGIN SC_PRE_CODE;
e9955c83
AD
221 "%verbose" return PERCENT_VERBOSE;
222 "%yacc" return PERCENT_YACC;
223
3f2d73f1 224 {directive} {
41141c56 225 complain_at (*loc, _("invalid directive: %s"), quote (yytext));
412f8a59 226 }
900c5db5 227
e9955c83 228 "=" return EQUAL;
d8d3f94a 229 "|" rule_length = 0; return PIPE;
e9955c83
AD
230 ";" return SEMICOLON;
231
3f2d73f1 232 {id} {
41141c56 233 val->symbol = symbol_get (yytext, *loc);
3f2d73f1 234 id_loc = *loc;
efcb44dd 235 rule_length++;
3f2d73f1 236 BEGIN SC_AFTER_IDENTIFIER;
e9955c83
AD
237 }
238
d8d3f94a 239 {int} {
1452af69
PE
240 val->integer = scan_integer (yytext, 10, *loc);
241 return INT;
242 }
243 0[xX][0-9abcdefABCDEF]+ {
244 val->integer = scan_integer (yytext, 16, *loc);
d8d3f94a
PE
245 return INT;
246 }
e9955c83
AD
247
248 /* Characters. We don't check there is only one. */
3f2d73f1 249 "'" STRING_GROW; token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
e9955c83
AD
250
251 /* Strings. */
3f2d73f1 252 "\"" STRING_GROW; token_start = loc->start; BEGIN SC_ESCAPED_STRING;
e9955c83
AD
253
254 /* Prologue. */
3f2d73f1 255 "%{" code_start = loc->start; BEGIN SC_PROLOGUE;
e9955c83
AD
256
257 /* Code in between braces. */
3f2d73f1
PE
258 "{" {
259 STRING_GROW;
624a35e2 260 token_type = BRACED_CODE;
3f2d73f1
PE
261 braces_level = 0;
262 code_start = loc->start;
263 BEGIN SC_BRACED_CODE;
264 }
e9955c83
AD
265
266 /* A type. */
d8d3f94a 267 "<"{tag}">" {
223ff46e 268 obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
41141c56 269 STRING_FINISH;
223ff46e 270 val->uniqstr = uniqstr_new (last_string);
41141c56 271 STRING_FREE;
4cdb01db
AD
272 return TYPE;
273 }
274
a706a1cc
PE
275 "%%" {
276 static int percent_percent_count;
e9955c83 277 if (++percent_percent_count == 2)
a2bc9dbc 278 BEGIN SC_EPILOGUE;
e9955c83
AD
279 return PERCENT_PERCENT;
280 }
281
a706a1cc 282 . {
41141c56 283 complain_at (*loc, _("invalid character: %s"), quote (yytext));
3f2d73f1 284 }
379f0ac8
PE
285
286 <<EOF>> {
287 loc->start = loc->end = scanner_cursor;
288 yyterminate ();
289 }
3f2d73f1
PE
290}
291
292
293 /*-----------------------------------------------------------------.
294 | Scanning after an identifier, checking whether a colon is next. |
295 `-----------------------------------------------------------------*/
296
297<SC_AFTER_IDENTIFIER>
298{
299 ":" {
300 rule_length = 0;
301 *loc = id_loc;
302 BEGIN INITIAL;
303 return ID_COLON;
304 }
305 . {
306 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
307 yyless (0);
308 *loc = id_loc;
309 BEGIN INITIAL;
310 return ID;
311 }
312 <<EOF>> {
313 *loc = id_loc;
314 BEGIN INITIAL;
315 return ID;
e9955c83
AD
316 }
317}
318
319
d8d3f94a
PE
320 /*---------------------------------------------------------------.
321 | Scanning a Yacc comment. The initial `/ *' is already eaten. |
322 `---------------------------------------------------------------*/
e9955c83 323
d8d3f94a 324<SC_YACC_COMMENT>
e9955c83 325{
3f2d73f1 326 "*/" BEGIN context_state;
a706a1cc 327 .|\n ;
aa418041 328 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
d8d3f94a
PE
329}
330
331
332 /*------------------------------------------------------------.
333 | Scanning a C comment. The initial `/ *' is already eaten. |
334 `------------------------------------------------------------*/
335
336<SC_COMMENT>
337{
3f2d73f1 338 "*"{splice}"/" STRING_GROW; BEGIN context_state;
aa418041 339 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
e9955c83
AD
340}
341
342
d8d3f94a
PE
343 /*--------------------------------------------------------------.
344 | Scanning a line comment. The initial `//' is already eaten. |
345 `--------------------------------------------------------------*/
346
347<SC_LINE_COMMENT>
348{
3f2d73f1 349 "\n" STRING_GROW; BEGIN context_state;
41141c56 350 {splice} STRING_GROW;
3f2d73f1 351 <<EOF>> BEGIN context_state;
d8d3f94a
PE
352}
353
354
4febdd96
PE
355 /*------------------------------------------------.
356 | Scanning a Bison string, including its escapes. |
357 | The initial quote is already eaten. |
358 `------------------------------------------------*/
e9955c83
AD
359
360<SC_ESCAPED_STRING>
361{
db2cc12f 362 "\"" {
41141c56
PE
363 STRING_GROW;
364 STRING_FINISH;
3f2d73f1 365 loc->start = token_start;
223ff46e 366 val->chars = last_string;
efcb44dd 367 rule_length++;
a706a1cc 368 BEGIN INITIAL;
e9955c83
AD
369 return STRING;
370 }
4febdd96
PE
371 \n unexpected_newline (token_start, "\""); BEGIN INITIAL;
372 <<EOF>> unexpected_eof (token_start, "\""); BEGIN INITIAL;
e9955c83
AD
373}
374
4febdd96
PE
375 /*----------------------------------------------------------.
376 | Scanning a Bison character literal, decoding its escapes. |
377 | The initial quote is already eaten. |
378 `----------------------------------------------------------*/
e9955c83
AD
379
380<SC_ESCAPED_CHARACTER>
381{
db2cc12f 382 "'" {
3b1e470c 383 unsigned char last_string_1;
41141c56
PE
384 STRING_GROW;
385 STRING_FINISH;
3f2d73f1 386 loc->start = token_start;
41141c56
PE
387 val->symbol = symbol_get (last_string, *loc);
388 symbol_class_set (val->symbol, token_sym, *loc);
3b1e470c
PE
389 last_string_1 = last_string[1];
390 symbol_user_token_number_set (val->symbol, last_string_1, *loc);
41141c56 391 STRING_FREE;
a706a1cc
PE
392 rule_length++;
393 BEGIN INITIAL;
394 return ID;
e9955c83 395 }
4febdd96
PE
396 \n unexpected_newline (token_start, "'"); BEGIN INITIAL;
397 <<EOF>> unexpected_eof (token_start, "'"); BEGIN INITIAL;
398}
a706a1cc 399
4febdd96
PE
400<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING>
401{
92ac3705 402 \0 complain_at (*loc, _("invalid null character"));
e9955c83
AD
403}
404
405
406 /*----------------------------.
407 | Decode escaped characters. |
408 `----------------------------*/
409
410<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
411{
d8d3f94a 412 \\[0-7]{1,3} {
1452af69 413 unsigned long int c = strtoul (yytext + 1, 0, 8);
d8d3f94a 414 if (UCHAR_MAX < c)
3f2d73f1 415 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
92ac3705
PE
416 else if (! c)
417 complain_at (*loc, _("invalid null character: %s"), quote (yytext));
e9955c83 418 else
223ff46e 419 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
420 }
421
6b0d38ab 422 \\x[0-9abcdefABCDEF]+ {
1452af69 423 unsigned long int c;
223ff46e 424 set_errno (0);
d8d3f94a 425 c = strtoul (yytext + 2, 0, 16);
223ff46e 426 if (UCHAR_MAX < c || get_errno ())
3f2d73f1 427 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
92ac3705
PE
428 else if (! c)
429 complain_at (*loc, _("invalid null character: %s"), quote (yytext));
d8d3f94a 430 else
223ff46e 431 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
432 }
433
223ff46e
PE
434 \\a obstack_1grow (&obstack_for_string, '\a');
435 \\b obstack_1grow (&obstack_for_string, '\b');
436 \\f obstack_1grow (&obstack_for_string, '\f');
437 \\n obstack_1grow (&obstack_for_string, '\n');
438 \\r obstack_1grow (&obstack_for_string, '\r');
439 \\t obstack_1grow (&obstack_for_string, '\t');
440 \\v obstack_1grow (&obstack_for_string, '\v');
412f8a59
PE
441
442 /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
223ff46e 443 \\("\""|"'"|"?"|"\\") obstack_1grow (&obstack_for_string, yytext[1]);
412f8a59 444
6b0d38ab 445 \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
d8d3f94a
PE
446 int c = convert_ucn_to_byte (yytext);
447 if (c < 0)
3f2d73f1 448 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
92ac3705
PE
449 else if (! c)
450 complain_at (*loc, _("invalid null character: %s"), quote (yytext));
d8d3f94a 451 else
223ff46e 452 obstack_1grow (&obstack_for_string, c);
d8d3f94a 453 }
4f25ebb0 454 \\(.|\n) {
3f2d73f1 455 complain_at (*loc, _("unrecognized escape sequence: %s"), quote (yytext));
41141c56 456 STRING_GROW;
e9955c83
AD
457 }
458}
459
4febdd96
PE
460 /*--------------------------------------------.
461 | Scanning user-code characters and strings. |
462 `--------------------------------------------*/
e9955c83 463
4febdd96
PE
464<SC_CHARACTER,SC_STRING>
465{
466 {splice}|\\{splice}[^\n$@\[\]] STRING_GROW;
467}
e9955c83
AD
468
469<SC_CHARACTER>
470{
4febdd96
PE
471 "'" STRING_GROW; BEGIN context_state;
472 \n unexpected_newline (token_start, "'"); BEGIN context_state;
473 <<EOF>> unexpected_eof (token_start, "'"); BEGIN context_state;
e9955c83
AD
474}
475
e9955c83
AD
476<SC_STRING>
477{
4febdd96
PE
478 "\"" STRING_GROW; BEGIN context_state;
479 \n unexpected_newline (token_start, "\""); BEGIN context_state;
480 <<EOF>> unexpected_eof (token_start, "\""); BEGIN context_state;
e9955c83
AD
481}
482
483
484 /*---------------------------------------------------.
485 | Strings, comments etc. can be found in user code. |
486 `---------------------------------------------------*/
487
488<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
489{
3f2d73f1
PE
490 "'" {
491 STRING_GROW;
492 context_state = YY_START;
493 token_start = loc->start;
494 BEGIN SC_CHARACTER;
495 }
496 "\"" {
497 STRING_GROW;
498 context_state = YY_START;
499 token_start = loc->start;
500 BEGIN SC_STRING;
501 }
502 "/"{splice}"*" {
503 STRING_GROW;
504 context_state = YY_START;
505 token_start = loc->start;
506 BEGIN SC_COMMENT;
507 }
508 "/"{splice}"/" {
509 STRING_GROW;
510 context_state = YY_START;
511 BEGIN SC_LINE_COMMENT;
512 }
e9955c83
AD
513}
514
515
624a35e2
PE
516 /*---------------------------------------------------------------.
517 | Scanning after %union etc., possibly followed by white space. |
518 | For %union only, allow arbitrary C code to appear before the |
519 | following brace, as an extension to POSIX. |
520 `---------------------------------------------------------------*/
521
522<SC_PRE_CODE>
523{
524 . {
525 bool valid = yytext[0] == '{' || token_type == PERCENT_UNION;
526 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
527 yyless (0);
528
529 if (valid)
530 {
531 braces_level = -1;
532 code_start = loc->start;
533 BEGIN SC_BRACED_CODE;
534 }
535 else
536 {
537 complain_at (*loc, _("missing `{' in `%s'"),
538 token_name (token_type));
539 obstack_sgrow (&obstack_for_string, "{}");
540 STRING_FINISH;
541 val->chars = last_string;
542 BEGIN INITIAL;
543 return token_type;
544 }
545 }
379f0ac8 546
aa418041 547 <<EOF>> unexpected_eof (scanner_cursor, "{}"); BEGIN INITIAL;
624a35e2
PE
548}
549
550
e9955c83
AD
551 /*---------------------------------------------------------------.
552 | Scanning some code in braces (%union and actions). The initial |
553 | "{" is already eaten. |
554 `---------------------------------------------------------------*/
555
556<SC_BRACED_CODE>
557{
41141c56
PE
558 "{"|"<"{splice}"%" STRING_GROW; braces_level++;
559 "%"{splice}">" STRING_GROW; braces_level--;
e9955c83 560 "}" {
25522739
PE
561 bool outer_brace = --braces_level < 0;
562
563 /* As an undocumented Bison extension, append `;' before the last
564 brace in braced code, so that the user code can omit trailing
565 `;'. But do not append `;' if emulating Yacc, since Yacc does
566 not append one.
567
568 FIXME: Bison should warn if a semicolon seems to be necessary
569 here, and should omit the semicolon if it seems unnecessary
570 (e.g., after ';', '{', or '}', each followed by comments or
571 white space). Such a warning shouldn't depend on --yacc; it
572 should depend on a new --pedantic option, which would cause
573 Bison to warn if it detects an extension to POSIX. --pedantic
574 should also diagnose other Bison extensions like %yacc.
575 Perhaps there should also be a GCC-style --pedantic-errors
576 option, so that such warnings are diagnosed as errors. */
1deb9bdc 577 if (outer_brace && token_type == BRACED_CODE && ! yacc_flag)
25522739
PE
578 obstack_1grow (&obstack_for_string, ';');
579
580 obstack_1grow (&obstack_for_string, '}');
581
582 if (outer_brace)
e9955c83 583 {
41141c56 584 STRING_FINISH;
624a35e2 585 rule_length++;
3f2d73f1 586 loc->start = code_start;
223ff46e 587 val->chars = last_string;
a706a1cc 588 BEGIN INITIAL;
624a35e2 589 return token_type;
e9955c83
AD
590 }
591 }
592
a706a1cc
PE
593 /* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly
594 (as `<' `<%'). */
41141c56 595 "<"{splice}"<" STRING_GROW;
a706a1cc 596
624a35e2
PE
597 "$"("<"{tag}">")?(-?[0-9]+|"$") handle_dollar (token_type, yytext, *loc);
598 "@"(-?[0-9]+|"$") handle_at (token_type, yytext, *loc);
e9955c83 599
aa418041 600 <<EOF>> unexpected_eof (code_start, "}"); BEGIN INITIAL;
e9955c83
AD
601}
602
603
604 /*--------------------------------------------------------------.
605 | Scanning some prologue: from "%{" (already scanned) to "%}". |
606 `--------------------------------------------------------------*/
607
608<SC_PROLOGUE>
609{
610 "%}" {
41141c56 611 STRING_FINISH;
3f2d73f1 612 loc->start = code_start;
223ff46e 613 val->chars = last_string;
a706a1cc 614 BEGIN INITIAL;
e9955c83
AD
615 return PROLOGUE;
616 }
617
aa418041 618 <<EOF>> unexpected_eof (code_start, "%}"); BEGIN INITIAL;
e9955c83
AD
619}
620
621
622 /*---------------------------------------------------------------.
623 | Scanning the epilogue (everything after the second "%%", which |
d8d3f94a 624 | has already been eaten). |
e9955c83
AD
625 `---------------------------------------------------------------*/
626
627<SC_EPILOGUE>
628{
e9955c83 629 <<EOF>> {
41141c56 630 STRING_FINISH;
3f2d73f1 631 loc->start = code_start;
223ff46e 632 val->chars = last_string;
a706a1cc 633 BEGIN INITIAL;
e9955c83
AD
634 return EPILOGUE;
635 }
636}
637
638
4febdd96
PE
639 /*-----------------------------------------.
640 | Escape M4 quoting characters in C code. |
641 `-----------------------------------------*/
a706a1cc
PE
642
643<SC_COMMENT,SC_LINE_COMMENT,SC_STRING,SC_CHARACTER,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
644{
223ff46e
PE
645 \$ obstack_sgrow (&obstack_for_string, "$][");
646 \@ obstack_sgrow (&obstack_for_string, "@@");
647 \[ obstack_sgrow (&obstack_for_string, "@{");
648 \] obstack_sgrow (&obstack_for_string, "@}");
a706a1cc
PE
649}
650
651
4febdd96
PE
652 /*-----------------------------------------------------.
653 | By default, grow the string obstack with the input. |
654 `-----------------------------------------------------*/
655
656<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>. |
657<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>\n STRING_GROW;
658
e9955c83
AD
659%%
660
cd3684cf
AD
661/* Keeps track of the maximum number of semantic values to the left of
662 a handle (those referenced by $0, $-1, etc.) are required by the
25005f6a
PH
663 semantic actions of this grammar. */
664int max_left_semantic_context = 0;
665
3f2d73f1
PE
666/* Set *LOC and adjust scanner cursor to account for token TOKEN of
667 size SIZE. */
6c30d641
PE
668
669static void
223ff46e 670adjust_location (location *loc, char const *token, size_t size)
6c30d641 671{
3f2d73f1
PE
672 int line = scanner_cursor.line;
673 int column = scanner_cursor.column;
6c30d641
PE
674 char const *p0 = token;
675 char const *p = token;
676 char const *lim = token + size;
677
3f2d73f1
PE
678 loc->start = scanner_cursor;
679
6c30d641
PE
680 for (p = token; p < lim; p++)
681 switch (*p)
682 {
6c30d641
PE
683 case '\n':
684 line++;
685 column = 1;
686 p0 = p + 1;
687 break;
688
689 case '\t':
690 column += mbsnwidth (p0, p - p0, 0);
691 column += 8 - ((column - 1) & 7);
692 p0 = p + 1;
693 break;
694 }
695
3f2d73f1
PE
696 scanner_cursor.line = line;
697 scanner_cursor.column = column + mbsnwidth (p0, p - p0, 0);
698
699 loc->end = scanner_cursor;
6c30d641
PE
700}
701
702
703/* Read bytes from FP into buffer BUF of size SIZE. Return the
704 number of bytes read. Remove '\r' from input, treating \r\n
705 and isolated \r as \n. */
706
707static size_t
708no_cr_read (FILE *fp, char *buf, size_t size)
709{
a737b216
PE
710 size_t bytes_read = fread (buf, 1, size, fp);
711 if (bytes_read)
6c30d641 712 {
a737b216 713 char *w = memchr (buf, '\r', bytes_read);
6c30d641
PE
714 if (w)
715 {
716 char const *r = ++w;
a737b216 717 char const *lim = buf + bytes_read;
6c30d641
PE
718
719 for (;;)
720 {
721 /* Found an '\r'. Treat it like '\n', but ignore any
722 '\n' that immediately follows. */
723 w[-1] = '\n';
724 if (r == lim)
725 {
726 int ch = getc (fp);
727 if (ch != '\n' && ungetc (ch, fp) != ch)
728 break;
729 }
730 else if (*r == '\n')
731 r++;
732
733 /* Copy until the next '\r'. */
734 do
735 {
736 if (r == lim)
737 return w - buf;
738 }
739 while ((*w++ = *r++) != '\r');
740 }
741
742 return w - buf;
743 }
744 }
745
a737b216 746 return bytes_read;
6c30d641
PE
747}
748
749
e9955c83 750/*------------------------------------------------------------------.
366eea36 751| TEXT is pointing to a wannabee semantic value (i.e., a `$'). |
e9955c83
AD
752| |
753| Possible inputs: $[<TYPENAME>]($|integer) |
754| |
223ff46e 755| Output to OBSTACK_FOR_STRING a reference to this semantic value. |
e9955c83
AD
756`------------------------------------------------------------------*/
757
624a35e2 758static inline bool
223ff46e 759handle_action_dollar (char *text, location loc)
e9955c83
AD
760{
761 const char *type_name = NULL;
366eea36 762 char *cp = text + 1;
e9955c83 763
624a35e2
PE
764 if (! current_rule)
765 return false;
766
e9955c83
AD
767 /* Get the type name if explicit. */
768 if (*cp == '<')
769 {
770 type_name = ++cp;
771 while (*cp != '>')
772 ++cp;
773 *cp = '\0';
774 ++cp;
775 }
776
777 if (*cp == '$')
778 {
779 if (!type_name)
223ff46e 780 type_name = symbol_list_n_type_name_get (current_rule, loc, 0);
e9955c83 781 if (!type_name && typed)
223ff46e 782 complain_at (loc, _("$$ of `%s' has no declared type"),
97650f4e 783 current_rule->sym->tag);
e9955c83
AD
784 if (!type_name)
785 type_name = "";
223ff46e 786 obstack_fgrow1 (&obstack_for_string,
e9955c83
AD
787 "]b4_lhs_value([%s])[", type_name);
788 }
d8d3f94a 789 else
e9955c83 790 {
1452af69 791 long int num;
223ff46e 792 set_errno (0);
d8d3f94a 793 num = strtol (cp, 0, 10);
e9955c83 794
223ff46e 795 if (INT_MIN <= num && num <= rule_length && ! get_errno ())
e9955c83 796 {
d8d3f94a 797 int n = num;
25005f6a
PH
798 if (1-n > max_left_semantic_context)
799 max_left_semantic_context = 1-n;
e9955c83 800 if (!type_name && n > 0)
223ff46e 801 type_name = symbol_list_n_type_name_get (current_rule, loc, n);
e9955c83 802 if (!type_name && typed)
223ff46e
PE
803 complain_at (loc, _("$%d of `%s' has no declared type"),
804 n, current_rule->sym->tag);
e9955c83
AD
805 if (!type_name)
806 type_name = "";
223ff46e 807 obstack_fgrow3 (&obstack_for_string,
e9955c83
AD
808 "]b4_rhs_value([%d], [%d], [%s])[",
809 rule_length, n, type_name);
810 }
d8d3f94a 811 else
223ff46e 812 complain_at (loc, _("integer out of range: %s"), quote (text));
9280d3ef 813 }
9280d3ef 814
624a35e2 815 return true;
e9955c83
AD
816}
817
f25bfb75 818
cd3684cf
AD
819/*----------------------------------------------------------------.
820| Map `$?' onto the proper M4 symbol, depending on its TOKEN_TYPE |
821| (are we in an action?). |
822`----------------------------------------------------------------*/
e9955c83
AD
823
824static void
624a35e2 825handle_dollar (int token_type, char *text, location loc)
f25bfb75 826{
624a35e2 827 switch (token_type)
f25bfb75 828 {
624a35e2
PE
829 case BRACED_CODE:
830 if (handle_action_dollar (text, loc))
831 return;
f25bfb75
AD
832 break;
833
624a35e2 834 case PERCENT_DESTRUCTOR:
cd3684cf 835 case PERCENT_INITIAL_ACTION:
624a35e2
PE
836 case PERCENT_PRINTER:
837 if (text[1] == '$')
838 {
839 obstack_sgrow (&obstack_for_string, "]b4_dollar_dollar[");
840 return;
841 }
842 break;
843
844 default:
f25bfb75
AD
845 break;
846 }
624a35e2
PE
847
848 complain_at (loc, _("invalid value: %s"), quote (text));
f25bfb75
AD
849}
850
851
852/*------------------------------------------------------.
853| TEXT is a location token (i.e., a `@...'). Output to |
223ff46e 854| OBSTACK_FOR_STRING a reference to this location. |
f25bfb75
AD
855`------------------------------------------------------*/
856
624a35e2 857static inline bool
223ff46e 858handle_action_at (char *text, location loc)
e9955c83 859{
366eea36 860 char *cp = text + 1;
d0829076 861 locations_flag = true;
e9955c83 862
624a35e2
PE
863 if (! current_rule)
864 return false;
865
366eea36 866 if (*cp == '$')
624a35e2 867 obstack_sgrow (&obstack_for_string, "]b4_lhs_location[");
d8d3f94a 868 else
e9955c83 869 {
1452af69 870 long int num;
223ff46e 871 set_errno (0);
d8d3f94a 872 num = strtol (cp, 0, 10);
dafdc66f 873
223ff46e 874 if (INT_MIN <= num && num <= rule_length && ! get_errno ())
d8d3f94a
PE
875 {
876 int n = num;
223ff46e 877 obstack_fgrow2 (&obstack_for_string, "]b4_rhs_location([%d], [%d])[",
d8d3f94a
PE
878 rule_length, n);
879 }
e9955c83 880 else
223ff46e 881 complain_at (loc, _("integer out of range: %s"), quote (text));
f25bfb75 882 }
f25bfb75 883
624a35e2 884 return true;
e9955c83 885}
4cdb01db 886
f25bfb75 887
cd3684cf
AD
888/*----------------------------------------------------------------.
889| Map `@?' onto the proper M4 symbol, depending on its TOKEN_TYPE |
890| (are we in an action?). |
891`----------------------------------------------------------------*/
f25bfb75
AD
892
893static void
624a35e2 894handle_at (int token_type, char *text, location loc)
f25bfb75 895{
624a35e2 896 switch (token_type)
f25bfb75 897 {
624a35e2 898 case BRACED_CODE:
223ff46e 899 handle_action_at (text, loc);
624a35e2
PE
900 return;
901
cd3684cf 902 case PERCENT_INITIAL_ACTION:
624a35e2
PE
903 case PERCENT_DESTRUCTOR:
904 case PERCENT_PRINTER:
905 if (text[1] == '$')
906 {
907 obstack_sgrow (&obstack_for_string, "]b4_at_dollar[");
908 return;
909 }
f25bfb75
AD
910 break;
911
624a35e2 912 default:
f25bfb75
AD
913 break;
914 }
624a35e2
PE
915
916 complain_at (loc, _("invalid value: %s"), quote (text));
f25bfb75
AD
917}
918
919
1452af69
PE
920/*------------------------------------------------------.
921| Scan NUMBER for a base-BASE integer at location LOC. |
922`------------------------------------------------------*/
923
924static unsigned long int
925scan_integer (char const *number, int base, location loc)
926{
927 unsigned long int num;
928 set_errno (0);
929 num = strtoul (number, 0, base);
930 if (INT_MAX < num || get_errno ())
931 {
932 complain_at (loc, _("integer out of range: %s"), quote (number));
933 num = INT_MAX;
934 }
935 return num;
936}
937
938
d8d3f94a
PE
939/*------------------------------------------------------------------.
940| Convert universal character name UCN to a single-byte character, |
941| and return that character. Return -1 if UCN does not correspond |
942| to a single-byte character. |
943`------------------------------------------------------------------*/
944
945static int
946convert_ucn_to_byte (char const *ucn)
947{
1452af69 948 unsigned long int code = strtoul (ucn + 2, 0, 16);
d8d3f94a
PE
949
950 /* FIXME: Currently we assume Unicode-compatible unibyte characters
951 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
952 non-ASCII hosts we support only the portable C character set.
953 These limitations should be removed once we add support for
954 multibyte characters. */
955
956 if (UCHAR_MAX < code)
957 return -1;
958
959#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
960 {
961 /* A non-ASCII host. Use CODE to index into a table of the C
962 basic execution character set, which is guaranteed to exist on
963 all Standard C platforms. This table also includes '$', '@',
8e6ef483 964 and '`', which are not in the basic execution character set but
d8d3f94a
PE
965 which are unibyte characters on all the platforms that we know
966 about. */
967 static signed char const table[] =
968 {
969 '\0', -1, -1, -1, -1, -1, -1, '\a',
970 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
971 -1, -1, -1, -1, -1, -1, -1, -1,
972 -1, -1, -1, -1, -1, -1, -1, -1,
973 ' ', '!', '"', '#', '$', '%', '&', '\'',
974 '(', ')', '*', '+', ',', '-', '.', '/',
975 '0', '1', '2', '3', '4', '5', '6', '7',
976 '8', '9', ':', ';', '<', '=', '>', '?',
977 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
978 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
979 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
980 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
981 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
982 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
983 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
984 'x', 'y', 'z', '{', '|', '}', '~'
985 };
986
987 code = code < sizeof table ? table[code] : -1;
988 }
989#endif
c4d720cd 990
d8d3f94a
PE
991 return code;
992}
993
994
900c5db5
AD
995/*----------------------------------------------------------------.
996| Handle `#line INT "FILE"'. ARGS has already skipped `#line '. |
997`----------------------------------------------------------------*/
998
999static void
3f2d73f1 1000handle_syncline (char *args)
900c5db5
AD
1001{
1002 int lineno = strtol (args, &args, 10);
1003 const char *file = NULL;
1004 file = strchr (args, '"') + 1;
1005 *strchr (file, '"') = 0;
dca81a78 1006 scanner_cursor.file = current_file = uniqstr_new (file);
3f2d73f1
PE
1007 scanner_cursor.line = lineno;
1008 scanner_cursor.column = 1;
900c5db5
AD
1009}
1010
a706a1cc 1011
4febdd96
PE
1012/*----------------------------------------------------------------.
1013| For a token or comment starting at START, report message MSGID, |
1014| which should say that an end marker was found before |
1015| the expected TOKEN_END. |
1016`----------------------------------------------------------------*/
1017
1018static void
1019unexpected_end (boundary start, char const *msgid, char const *token_end)
1020{
1021 location loc;
1022 loc.start = start;
1023 loc.end = scanner_cursor;
1024 complain_at (loc, _(msgid), token_end);
1025}
1026
1027
3f2d73f1
PE
1028/*------------------------------------------------------------------------.
1029| Report an unexpected EOF in a token or comment starting at START. |
1030| An end of file was encountered and the expected TOKEN_END was missing. |
3f2d73f1 1031`------------------------------------------------------------------------*/
a706a1cc
PE
1032
1033static void
aa418041 1034unexpected_eof (boundary start, char const *token_end)
a706a1cc 1035{
4febdd96
PE
1036 unexpected_end (start, N_("missing `%s' at end of file"), token_end);
1037}
1038
1039
1040/*----------------------------------------.
1041| Likewise, but for unexpected newlines. |
1042`----------------------------------------*/
1043
1044static void
1045unexpected_newline (boundary start, char const *token_end)
1046{
1047 unexpected_end (start, N_("missing `%s' at end of line"), token_end);
a706a1cc
PE
1048}
1049
1050
f25bfb75
AD
1051/*-------------------------.
1052| Initialize the scanner. |
1053`-------------------------*/
1054
1d6412ad
AD
1055void
1056scanner_initialize (void)
1057{
223ff46e 1058 obstack_init (&obstack_for_string);
1d6412ad
AD
1059}
1060
1061
f25bfb75
AD
1062/*-----------------------------------------------.
1063| Free all the memory allocated to the scanner. |
1064`-----------------------------------------------*/
1065
4cdb01db
AD
1066void
1067scanner_free (void)
1068{
223ff46e 1069 obstack_free (&obstack_for_string, 0);
536545f3
AD
1070 /* Reclaim Flex's buffers. */
1071 yy_delete_buffer (YY_CURRENT_BUFFER);
4cdb01db 1072}