]> git.saurik.com Git - bison.git/blame - src/scan-gram.l
Bison-generated C parser -> Bison-generated parser
[bison.git] / src / scan-gram.l
CommitLineData
e9955c83 1/* Bison Grammar Scanner -*- C -*-
3b1e470c 2
05ac60f3 3 Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
e9955c83
AD
4
5 This file is part of Bison, the GNU Compiler Compiler.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
0fb669f9
PE
19 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
20 02110-1301 USA
e9955c83
AD
21*/
22
aa418041 23%option debug nodefault nounput noyywrap never-interactive
e9955c83
AD
24%option prefix="gram_" outfile="lex.yy.c"
25
26%{
27#include "system.h"
223ff46e
PE
28
29#include <mbswidth.h>
30#include <get-errno.h>
31#include <quote.h>
32
e9955c83 33#include "complain.h"
3f2d73f1 34#include "files.h"
e9955c83
AD
35#include "getargs.h"
36#include "gram.h"
ca407bdf 37#include "quotearg.h"
e9955c83 38#include "reader.h"
223ff46e 39#include "uniqstr.h"
e9955c83 40
3f2d73f1
PE
41#define YY_USER_INIT \
42 do \
43 { \
44 scanner_cursor.file = current_file; \
45 scanner_cursor.line = 1; \
46 scanner_cursor.column = 1; \
379f0ac8 47 code_start = scanner_cursor; \
3f2d73f1
PE
48 } \
49 while (0)
8efe435c 50
3f2d73f1
PE
51/* Location of scanner cursor. */
52boundary scanner_cursor;
41141c56 53
223ff46e 54static void adjust_location (location *, char const *, size_t);
3f2d73f1 55#define YY_USER_ACTION adjust_location (loc, yytext, yyleng);
d8d3f94a 56
6c30d641 57static size_t no_cr_read (FILE *, char *, size_t);
d8d3f94a
PE
58#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
59
60
223ff46e 61/* OBSTACK_FOR_STRING -- Used to store all the characters that we need to
44995b2e
AD
62 keep (to construct ID, STRINGS etc.). Use the following macros to
63 use it.
64
41141c56
PE
65 Use STRING_GROW to append what has just been matched, and
66 STRING_FINISH to end the string (it puts the ending 0).
67 STRING_FINISH also stores this string in LAST_STRING, which can be
68 used, and which is used by STRING_FREE to free the last string. */
44995b2e 69
223ff46e 70static struct obstack obstack_for_string;
44995b2e 71
7ec2d4cd
AD
72/* A string representing the most recently saved token. */
73static char *last_string;
74
75
41141c56 76#define STRING_GROW \
223ff46e 77 obstack_grow (&obstack_for_string, yytext, yyleng)
44995b2e 78
41141c56 79#define STRING_FINISH \
44995b2e 80 do { \
223ff46e
PE
81 obstack_1grow (&obstack_for_string, '\0'); \
82 last_string = obstack_finish (&obstack_for_string); \
44995b2e
AD
83 } while (0)
84
41141c56 85#define STRING_FREE \
223ff46e 86 obstack_free (&obstack_for_string, last_string)
e9955c83 87
7ec2d4cd
AD
88void
89scanner_last_string_free (void)
90{
41141c56 91 STRING_FREE;
7ec2d4cd 92}
e9955c83 93
efcb44dd
PE
94/* Within well-formed rules, RULE_LENGTH is the number of values in
95 the current rule so far, which says where to find `$0' with respect
96 to the top of the stack. It is not the same as the rule->length in
97 the case of mid rule actions.
98
99 Outside of well-formed rules, RULE_LENGTH has an undefined value. */
100static int rule_length;
101
624a35e2
PE
102static void handle_dollar (int token_type, char *cp, location loc);
103static void handle_at (int token_type, char *cp, location loc);
3f2d73f1 104static void handle_syncline (char *args);
1452af69 105static unsigned long int scan_integer (char const *p, int base, location loc);
d8d3f94a 106static int convert_ucn_to_byte (char const *hex_text);
aa418041 107static void unexpected_eof (boundary, char const *);
4febdd96 108static void unexpected_newline (boundary, char const *);
e9955c83
AD
109
110%}
d8d3f94a 111%x SC_COMMENT SC_LINE_COMMENT SC_YACC_COMMENT
e9955c83 112%x SC_STRING SC_CHARACTER
3f2d73f1 113%x SC_AFTER_IDENTIFIER
e9955c83 114%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
624a35e2 115%x SC_PRE_CODE SC_BRACED_CODE SC_PROLOGUE SC_EPILOGUE
e9955c83 116
29c01725
AD
117letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
118id {letter}({letter}|[0-9])*
119directive %{letter}({letter}|[0-9]|-)*
624a35e2 120int [0-9]+
d8d3f94a
PE
121
122/* POSIX says that a tag must be both an id and a C union member, but
123 historically almost any character is allowed in a tag. We disallow
124 NUL and newline, as this simplifies our implementation. */
125tag [^\0\n>]+
126
127/* Zero or more instances of backslash-newline. Following GCC, allow
128 white space between the backslash and the newline. */
129splice (\\[ \f\t\v]*\n)*
e9955c83
AD
130
131%%
132%{
a706a1cc 133 /* Nesting level of the current code in braces. */
1a9e39f1
PE
134 int braces_level IF_LINT (= 0);
135
3f2d73f1
PE
136 /* Parent context state, when applicable. */
137 int context_state IF_LINT (= 0);
a706a1cc 138
624a35e2
PE
139 /* Token type to return, when applicable. */
140 int token_type IF_LINT (= 0);
141
3f2d73f1 142 /* Location of most recent identifier, when applicable. */
a2bc9dbc 143 location id_loc IF_LINT (= empty_location);
3f2d73f1 144
a2bc9dbc
PE
145 /* Where containing code started, when applicable. Its initial
146 value is relevant only when yylex is invoked in the SC_EPILOGUE
147 start condition. */
148 boundary code_start = scanner_cursor;
3f2d73f1 149
223ff46e
PE
150 /* Where containing comment or string or character literal started,
151 when applicable. */
a2bc9dbc 152 boundary token_start IF_LINT (= scanner_cursor);
e9955c83
AD
153%}
154
155
3f2d73f1
PE
156 /*-----------------------.
157 | Scanning white space. |
158 `-----------------------*/
159
624a35e2 160<INITIAL,SC_AFTER_IDENTIFIER,SC_PRE_CODE>
3f2d73f1 161{
4febdd96 162 /* Comments and white space. */
83adb046 163 "," warn_at (*loc, _("stray `,' treated as white space"));
4febdd96 164 [ \f\n\t\v] |
3f2d73f1 165 "//".* ;
83adb046
PE
166 "/*" {
167 token_start = loc->start;
168 context_state = YY_START;
169 BEGIN SC_YACC_COMMENT;
170 }
3f2d73f1
PE
171
172 /* #line directives are not documented, and may be withdrawn or
173 modified in future versions of Bison. */
174 ^"#line "{int}" \"".*"\"\n" {
175 handle_syncline (yytext + sizeof "#line " - 1);
176 }
177}
178
179
e9955c83
AD
180 /*----------------------------.
181 | Scanning Bison directives. |
182 `----------------------------*/
183<INITIAL>
184{
185 "%binary" return PERCENT_NONASSOC;
186 "%debug" return PERCENT_DEBUG;
39a06c25 187 "%default"[-_]"prec" return PERCENT_DEFAULT_PREC;
e9955c83
AD
188 "%define" return PERCENT_DEFINE;
189 "%defines" return PERCENT_DEFINES;
624a35e2 190 "%destructor" token_type = PERCENT_DESTRUCTOR; BEGIN SC_PRE_CODE;
676385e2 191 "%dprec" return PERCENT_DPREC;
e9955c83
AD
192 "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
193 "%expect" return PERCENT_EXPECT;
d6328241 194 "%expect"[-_]"rr" return PERCENT_EXPECT_RR;
e9955c83
AD
195 "%file-prefix" return PERCENT_FILE_PREFIX;
196 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
cd3684cf 197 "%initial-action" token_type = PERCENT_INITIAL_ACTION; BEGIN SC_PRE_CODE;
ae7453f2 198 "%glr-parser" return PERCENT_GLR_PARSER;
e9955c83 199 "%left" return PERCENT_LEFT;
624a35e2 200 "%lex-param" token_type = PERCENT_LEX_PARAM; BEGIN SC_PRE_CODE;
e9955c83 201 "%locations" return PERCENT_LOCATIONS;
676385e2 202 "%merge" return PERCENT_MERGE;
e9955c83 203 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
22fccf95 204 "%no"[-_]"default"[-_]"prec" return PERCENT_NO_DEFAULT_PREC;
e9955c83
AD
205 "%no"[-_]"lines" return PERCENT_NO_LINES;
206 "%nonassoc" return PERCENT_NONASSOC;
916708d5 207 "%nondeterministic-parser" return PERCENT_NONDETERMINISTIC_PARSER;
e9955c83
AD
208 "%nterm" return PERCENT_NTERM;
209 "%output" return PERCENT_OUTPUT;
624a35e2 210 "%parse-param" token_type = PERCENT_PARSE_PARAM; BEGIN SC_PRE_CODE;
d8d3f94a 211 "%prec" rule_length--; return PERCENT_PREC;
624a35e2 212 "%printer" token_type = PERCENT_PRINTER; BEGIN SC_PRE_CODE;
e9955c83
AD
213 "%pure"[-_]"parser" return PERCENT_PURE_PARSER;
214 "%right" return PERCENT_RIGHT;
215 "%skeleton" return PERCENT_SKELETON;
216 "%start" return PERCENT_START;
217 "%term" return PERCENT_TOKEN;
218 "%token" return PERCENT_TOKEN;
219 "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
220 "%type" return PERCENT_TYPE;
624a35e2 221 "%union" token_type = PERCENT_UNION; BEGIN SC_PRE_CODE;
e9955c83
AD
222 "%verbose" return PERCENT_VERBOSE;
223 "%yacc" return PERCENT_YACC;
224
3f2d73f1 225 {directive} {
41141c56 226 complain_at (*loc, _("invalid directive: %s"), quote (yytext));
412f8a59 227 }
900c5db5 228
e9955c83 229 "=" return EQUAL;
d8d3f94a 230 "|" rule_length = 0; return PIPE;
e9955c83
AD
231 ";" return SEMICOLON;
232
3f2d73f1 233 {id} {
41141c56 234 val->symbol = symbol_get (yytext, *loc);
3f2d73f1 235 id_loc = *loc;
efcb44dd 236 rule_length++;
3f2d73f1 237 BEGIN SC_AFTER_IDENTIFIER;
e9955c83
AD
238 }
239
d8d3f94a 240 {int} {
1452af69
PE
241 val->integer = scan_integer (yytext, 10, *loc);
242 return INT;
243 }
244 0[xX][0-9abcdefABCDEF]+ {
245 val->integer = scan_integer (yytext, 16, *loc);
d8d3f94a
PE
246 return INT;
247 }
e9955c83
AD
248
249 /* Characters. We don't check there is only one. */
3f2d73f1 250 "'" STRING_GROW; token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
e9955c83
AD
251
252 /* Strings. */
ca407bdf 253 "\"" token_start = loc->start; BEGIN SC_ESCAPED_STRING;
e9955c83
AD
254
255 /* Prologue. */
3f2d73f1 256 "%{" code_start = loc->start; BEGIN SC_PROLOGUE;
e9955c83
AD
257
258 /* Code in between braces. */
3f2d73f1
PE
259 "{" {
260 STRING_GROW;
624a35e2 261 token_type = BRACED_CODE;
3f2d73f1
PE
262 braces_level = 0;
263 code_start = loc->start;
264 BEGIN SC_BRACED_CODE;
265 }
e9955c83
AD
266
267 /* A type. */
d8d3f94a 268 "<"{tag}">" {
223ff46e 269 obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
41141c56 270 STRING_FINISH;
223ff46e 271 val->uniqstr = uniqstr_new (last_string);
41141c56 272 STRING_FREE;
4cdb01db
AD
273 return TYPE;
274 }
275
a706a1cc
PE
276 "%%" {
277 static int percent_percent_count;
e9955c83 278 if (++percent_percent_count == 2)
a2bc9dbc 279 BEGIN SC_EPILOGUE;
e9955c83
AD
280 return PERCENT_PERCENT;
281 }
282
a706a1cc 283 . {
41141c56 284 complain_at (*loc, _("invalid character: %s"), quote (yytext));
3f2d73f1 285 }
379f0ac8
PE
286
287 <<EOF>> {
288 loc->start = loc->end = scanner_cursor;
289 yyterminate ();
290 }
3f2d73f1
PE
291}
292
293
294 /*-----------------------------------------------------------------.
295 | Scanning after an identifier, checking whether a colon is next. |
296 `-----------------------------------------------------------------*/
297
298<SC_AFTER_IDENTIFIER>
299{
300 ":" {
301 rule_length = 0;
302 *loc = id_loc;
303 BEGIN INITIAL;
304 return ID_COLON;
305 }
306 . {
307 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
308 yyless (0);
309 *loc = id_loc;
310 BEGIN INITIAL;
311 return ID;
312 }
313 <<EOF>> {
314 *loc = id_loc;
315 BEGIN INITIAL;
316 return ID;
e9955c83
AD
317 }
318}
319
320
d8d3f94a
PE
321 /*---------------------------------------------------------------.
322 | Scanning a Yacc comment. The initial `/ *' is already eaten. |
323 `---------------------------------------------------------------*/
e9955c83 324
d8d3f94a 325<SC_YACC_COMMENT>
e9955c83 326{
3f2d73f1 327 "*/" BEGIN context_state;
a706a1cc 328 .|\n ;
aa418041 329 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
d8d3f94a
PE
330}
331
332
333 /*------------------------------------------------------------.
334 | Scanning a C comment. The initial `/ *' is already eaten. |
335 `------------------------------------------------------------*/
336
337<SC_COMMENT>
338{
3f2d73f1 339 "*"{splice}"/" STRING_GROW; BEGIN context_state;
aa418041 340 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
e9955c83
AD
341}
342
343
d8d3f94a
PE
344 /*--------------------------------------------------------------.
345 | Scanning a line comment. The initial `//' is already eaten. |
346 `--------------------------------------------------------------*/
347
348<SC_LINE_COMMENT>
349{
3f2d73f1 350 "\n" STRING_GROW; BEGIN context_state;
41141c56 351 {splice} STRING_GROW;
3f2d73f1 352 <<EOF>> BEGIN context_state;
d8d3f94a
PE
353}
354
355
4febdd96
PE
356 /*------------------------------------------------.
357 | Scanning a Bison string, including its escapes. |
358 | The initial quote is already eaten. |
359 `------------------------------------------------*/
e9955c83
AD
360
361<SC_ESCAPED_STRING>
362{
db2cc12f 363 "\"" {
41141c56 364 STRING_FINISH;
3f2d73f1 365 loc->start = token_start;
223ff46e 366 val->chars = last_string;
efcb44dd 367 rule_length++;
a706a1cc 368 BEGIN INITIAL;
e9955c83
AD
369 return STRING;
370 }
4febdd96
PE
371 \n unexpected_newline (token_start, "\""); BEGIN INITIAL;
372 <<EOF>> unexpected_eof (token_start, "\""); BEGIN INITIAL;
e9955c83
AD
373}
374
4febdd96
PE
375 /*----------------------------------------------------------.
376 | Scanning a Bison character literal, decoding its escapes. |
377 | The initial quote is already eaten. |
378 `----------------------------------------------------------*/
e9955c83
AD
379
380<SC_ESCAPED_CHARACTER>
381{
db2cc12f 382 "'" {
3b1e470c 383 unsigned char last_string_1;
41141c56
PE
384 STRING_GROW;
385 STRING_FINISH;
3f2d73f1 386 loc->start = token_start;
ca407bdf
PE
387 val->symbol = symbol_get (quotearg_style (escape_quoting_style,
388 last_string),
389 *loc);
41141c56 390 symbol_class_set (val->symbol, token_sym, *loc);
3b1e470c
PE
391 last_string_1 = last_string[1];
392 symbol_user_token_number_set (val->symbol, last_string_1, *loc);
41141c56 393 STRING_FREE;
a706a1cc
PE
394 rule_length++;
395 BEGIN INITIAL;
396 return ID;
e9955c83 397 }
4febdd96
PE
398 \n unexpected_newline (token_start, "'"); BEGIN INITIAL;
399 <<EOF>> unexpected_eof (token_start, "'"); BEGIN INITIAL;
400}
a706a1cc 401
4febdd96
PE
402<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING>
403{
92ac3705 404 \0 complain_at (*loc, _("invalid null character"));
e9955c83
AD
405}
406
407
408 /*----------------------------.
409 | Decode escaped characters. |
410 `----------------------------*/
411
412<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
413{
d8d3f94a 414 \\[0-7]{1,3} {
1452af69 415 unsigned long int c = strtoul (yytext + 1, 0, 8);
d8d3f94a 416 if (UCHAR_MAX < c)
3f2d73f1 417 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
05ac60f3 418 else if (! c)
92ac3705 419 complain_at (*loc, _("invalid null character: %s"), quote (yytext));
e9955c83 420 else
223ff46e 421 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
422 }
423
6b0d38ab 424 \\x[0-9abcdefABCDEF]+ {
1452af69 425 unsigned long int c;
223ff46e 426 set_errno (0);
d8d3f94a 427 c = strtoul (yytext + 2, 0, 16);
223ff46e 428 if (UCHAR_MAX < c || get_errno ())
3f2d73f1 429 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
92ac3705
PE
430 else if (! c)
431 complain_at (*loc, _("invalid null character: %s"), quote (yytext));
d8d3f94a 432 else
223ff46e 433 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
434 }
435
223ff46e
PE
436 \\a obstack_1grow (&obstack_for_string, '\a');
437 \\b obstack_1grow (&obstack_for_string, '\b');
438 \\f obstack_1grow (&obstack_for_string, '\f');
439 \\n obstack_1grow (&obstack_for_string, '\n');
440 \\r obstack_1grow (&obstack_for_string, '\r');
441 \\t obstack_1grow (&obstack_for_string, '\t');
442 \\v obstack_1grow (&obstack_for_string, '\v');
412f8a59
PE
443
444 /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
223ff46e 445 \\("\""|"'"|"?"|"\\") obstack_1grow (&obstack_for_string, yytext[1]);
412f8a59 446
6b0d38ab 447 \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
d8d3f94a
PE
448 int c = convert_ucn_to_byte (yytext);
449 if (c < 0)
3f2d73f1 450 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
92ac3705
PE
451 else if (! c)
452 complain_at (*loc, _("invalid null character: %s"), quote (yytext));
d8d3f94a 453 else
223ff46e 454 obstack_1grow (&obstack_for_string, c);
d8d3f94a 455 }
4f25ebb0 456 \\(.|\n) {
3f2d73f1 457 complain_at (*loc, _("unrecognized escape sequence: %s"), quote (yytext));
41141c56 458 STRING_GROW;
e9955c83
AD
459 }
460}
461
4febdd96
PE
462 /*--------------------------------------------.
463 | Scanning user-code characters and strings. |
464 `--------------------------------------------*/
e9955c83 465
4febdd96
PE
466<SC_CHARACTER,SC_STRING>
467{
468 {splice}|\\{splice}[^\n$@\[\]] STRING_GROW;
469}
e9955c83
AD
470
471<SC_CHARACTER>
472{
4febdd96
PE
473 "'" STRING_GROW; BEGIN context_state;
474 \n unexpected_newline (token_start, "'"); BEGIN context_state;
475 <<EOF>> unexpected_eof (token_start, "'"); BEGIN context_state;
e9955c83
AD
476}
477
e9955c83
AD
478<SC_STRING>
479{
4febdd96
PE
480 "\"" STRING_GROW; BEGIN context_state;
481 \n unexpected_newline (token_start, "\""); BEGIN context_state;
482 <<EOF>> unexpected_eof (token_start, "\""); BEGIN context_state;
e9955c83
AD
483}
484
485
486 /*---------------------------------------------------.
487 | Strings, comments etc. can be found in user code. |
488 `---------------------------------------------------*/
489
490<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
491{
3f2d73f1
PE
492 "'" {
493 STRING_GROW;
494 context_state = YY_START;
495 token_start = loc->start;
496 BEGIN SC_CHARACTER;
497 }
498 "\"" {
499 STRING_GROW;
500 context_state = YY_START;
501 token_start = loc->start;
502 BEGIN SC_STRING;
503 }
504 "/"{splice}"*" {
505 STRING_GROW;
506 context_state = YY_START;
507 token_start = loc->start;
508 BEGIN SC_COMMENT;
509 }
510 "/"{splice}"/" {
511 STRING_GROW;
512 context_state = YY_START;
513 BEGIN SC_LINE_COMMENT;
514 }
e9955c83
AD
515}
516
517
624a35e2
PE
518 /*---------------------------------------------------------------.
519 | Scanning after %union etc., possibly followed by white space. |
520 | For %union only, allow arbitrary C code to appear before the |
521 | following brace, as an extension to POSIX. |
522 `---------------------------------------------------------------*/
523
524<SC_PRE_CODE>
525{
526 . {
527 bool valid = yytext[0] == '{' || token_type == PERCENT_UNION;
528 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
529 yyless (0);
530
531 if (valid)
532 {
533 braces_level = -1;
534 code_start = loc->start;
535 BEGIN SC_BRACED_CODE;
536 }
537 else
538 {
539 complain_at (*loc, _("missing `{' in `%s'"),
540 token_name (token_type));
541 obstack_sgrow (&obstack_for_string, "{}");
542 STRING_FINISH;
543 val->chars = last_string;
544 BEGIN INITIAL;
545 return token_type;
546 }
547 }
379f0ac8 548
aa418041 549 <<EOF>> unexpected_eof (scanner_cursor, "{}"); BEGIN INITIAL;
624a35e2
PE
550}
551
552
e9955c83
AD
553 /*---------------------------------------------------------------.
554 | Scanning some code in braces (%union and actions). The initial |
555 | "{" is already eaten. |
556 `---------------------------------------------------------------*/
557
558<SC_BRACED_CODE>
559{
41141c56
PE
560 "{"|"<"{splice}"%" STRING_GROW; braces_level++;
561 "%"{splice}">" STRING_GROW; braces_level--;
e9955c83 562 "}" {
25522739
PE
563 bool outer_brace = --braces_level < 0;
564
565 /* As an undocumented Bison extension, append `;' before the last
566 brace in braced code, so that the user code can omit trailing
567 `;'. But do not append `;' if emulating Yacc, since Yacc does
568 not append one.
569
570 FIXME: Bison should warn if a semicolon seems to be necessary
571 here, and should omit the semicolon if it seems unnecessary
572 (e.g., after ';', '{', or '}', each followed by comments or
573 white space). Such a warning shouldn't depend on --yacc; it
574 should depend on a new --pedantic option, which would cause
575 Bison to warn if it detects an extension to POSIX. --pedantic
576 should also diagnose other Bison extensions like %yacc.
577 Perhaps there should also be a GCC-style --pedantic-errors
578 option, so that such warnings are diagnosed as errors. */
1deb9bdc 579 if (outer_brace && token_type == BRACED_CODE && ! yacc_flag)
25522739
PE
580 obstack_1grow (&obstack_for_string, ';');
581
582 obstack_1grow (&obstack_for_string, '}');
583
584 if (outer_brace)
e9955c83 585 {
41141c56 586 STRING_FINISH;
624a35e2 587 rule_length++;
3f2d73f1 588 loc->start = code_start;
223ff46e 589 val->chars = last_string;
a706a1cc 590 BEGIN INITIAL;
624a35e2 591 return token_type;
e9955c83
AD
592 }
593 }
594
a706a1cc
PE
595 /* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly
596 (as `<' `<%'). */
41141c56 597 "<"{splice}"<" STRING_GROW;
a706a1cc 598
624a35e2
PE
599 "$"("<"{tag}">")?(-?[0-9]+|"$") handle_dollar (token_type, yytext, *loc);
600 "@"(-?[0-9]+|"$") handle_at (token_type, yytext, *loc);
e9955c83 601
aa418041 602 <<EOF>> unexpected_eof (code_start, "}"); BEGIN INITIAL;
e9955c83
AD
603}
604
605
606 /*--------------------------------------------------------------.
607 | Scanning some prologue: from "%{" (already scanned) to "%}". |
608 `--------------------------------------------------------------*/
609
610<SC_PROLOGUE>
611{
612 "%}" {
41141c56 613 STRING_FINISH;
3f2d73f1 614 loc->start = code_start;
223ff46e 615 val->chars = last_string;
a706a1cc 616 BEGIN INITIAL;
e9955c83
AD
617 return PROLOGUE;
618 }
619
aa418041 620 <<EOF>> unexpected_eof (code_start, "%}"); BEGIN INITIAL;
e9955c83
AD
621}
622
623
624 /*---------------------------------------------------------------.
625 | Scanning the epilogue (everything after the second "%%", which |
d8d3f94a 626 | has already been eaten). |
e9955c83
AD
627 `---------------------------------------------------------------*/
628
629<SC_EPILOGUE>
630{
e9955c83 631 <<EOF>> {
41141c56 632 STRING_FINISH;
3f2d73f1 633 loc->start = code_start;
223ff46e 634 val->chars = last_string;
a706a1cc 635 BEGIN INITIAL;
e9955c83
AD
636 return EPILOGUE;
637 }
638}
639
640
4febdd96
PE
641 /*-----------------------------------------.
642 | Escape M4 quoting characters in C code. |
643 `-----------------------------------------*/
a706a1cc
PE
644
645<SC_COMMENT,SC_LINE_COMMENT,SC_STRING,SC_CHARACTER,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
646{
223ff46e
PE
647 \$ obstack_sgrow (&obstack_for_string, "$][");
648 \@ obstack_sgrow (&obstack_for_string, "@@");
649 \[ obstack_sgrow (&obstack_for_string, "@{");
650 \] obstack_sgrow (&obstack_for_string, "@}");
a706a1cc
PE
651}
652
653
4febdd96
PE
654 /*-----------------------------------------------------.
655 | By default, grow the string obstack with the input. |
656 `-----------------------------------------------------*/
657
658<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>. |
659<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>\n STRING_GROW;
660
e9955c83
AD
661%%
662
cd3684cf
AD
663/* Keeps track of the maximum number of semantic values to the left of
664 a handle (those referenced by $0, $-1, etc.) are required by the
25005f6a
PH
665 semantic actions of this grammar. */
666int max_left_semantic_context = 0;
667
3f2d73f1
PE
668/* Set *LOC and adjust scanner cursor to account for token TOKEN of
669 size SIZE. */
6c30d641
PE
670
671static void
223ff46e 672adjust_location (location *loc, char const *token, size_t size)
6c30d641 673{
3f2d73f1
PE
674 int line = scanner_cursor.line;
675 int column = scanner_cursor.column;
6c30d641
PE
676 char const *p0 = token;
677 char const *p = token;
678 char const *lim = token + size;
679
3f2d73f1
PE
680 loc->start = scanner_cursor;
681
6c30d641
PE
682 for (p = token; p < lim; p++)
683 switch (*p)
684 {
6c30d641
PE
685 case '\n':
686 line++;
687 column = 1;
688 p0 = p + 1;
689 break;
690
691 case '\t':
692 column += mbsnwidth (p0, p - p0, 0);
693 column += 8 - ((column - 1) & 7);
694 p0 = p + 1;
695 break;
696 }
697
3f2d73f1
PE
698 scanner_cursor.line = line;
699 scanner_cursor.column = column + mbsnwidth (p0, p - p0, 0);
700
701 loc->end = scanner_cursor;
6c30d641
PE
702}
703
704
705/* Read bytes from FP into buffer BUF of size SIZE. Return the
706 number of bytes read. Remove '\r' from input, treating \r\n
707 and isolated \r as \n. */
708
709static size_t
710no_cr_read (FILE *fp, char *buf, size_t size)
711{
a737b216
PE
712 size_t bytes_read = fread (buf, 1, size, fp);
713 if (bytes_read)
6c30d641 714 {
a737b216 715 char *w = memchr (buf, '\r', bytes_read);
6c30d641
PE
716 if (w)
717 {
718 char const *r = ++w;
a737b216 719 char const *lim = buf + bytes_read;
6c30d641
PE
720
721 for (;;)
722 {
723 /* Found an '\r'. Treat it like '\n', but ignore any
724 '\n' that immediately follows. */
725 w[-1] = '\n';
726 if (r == lim)
727 {
728 int ch = getc (fp);
729 if (ch != '\n' && ungetc (ch, fp) != ch)
730 break;
731 }
732 else if (*r == '\n')
733 r++;
734
735 /* Copy until the next '\r'. */
736 do
737 {
738 if (r == lim)
739 return w - buf;
740 }
741 while ((*w++ = *r++) != '\r');
742 }
743
744 return w - buf;
745 }
746 }
747
a737b216 748 return bytes_read;
6c30d641
PE
749}
750
751
e9955c83 752/*------------------------------------------------------------------.
366eea36 753| TEXT is pointing to a wannabee semantic value (i.e., a `$'). |
e9955c83
AD
754| |
755| Possible inputs: $[<TYPENAME>]($|integer) |
756| |
223ff46e 757| Output to OBSTACK_FOR_STRING a reference to this semantic value. |
e9955c83
AD
758`------------------------------------------------------------------*/
759
624a35e2 760static inline bool
223ff46e 761handle_action_dollar (char *text, location loc)
e9955c83
AD
762{
763 const char *type_name = NULL;
366eea36 764 char *cp = text + 1;
e9955c83 765
624a35e2
PE
766 if (! current_rule)
767 return false;
768
e9955c83
AD
769 /* Get the type name if explicit. */
770 if (*cp == '<')
771 {
772 type_name = ++cp;
773 while (*cp != '>')
774 ++cp;
775 *cp = '\0';
776 ++cp;
777 }
778
779 if (*cp == '$')
780 {
781 if (!type_name)
223ff46e 782 type_name = symbol_list_n_type_name_get (current_rule, loc, 0);
e9955c83 783 if (!type_name && typed)
223ff46e 784 complain_at (loc, _("$$ of `%s' has no declared type"),
97650f4e 785 current_rule->sym->tag);
e9955c83
AD
786 if (!type_name)
787 type_name = "";
223ff46e 788 obstack_fgrow1 (&obstack_for_string,
e9955c83
AD
789 "]b4_lhs_value([%s])[", type_name);
790 }
d8d3f94a 791 else
e9955c83 792 {
1452af69 793 long int num;
223ff46e 794 set_errno (0);
d8d3f94a 795 num = strtol (cp, 0, 10);
e9955c83 796
223ff46e 797 if (INT_MIN <= num && num <= rule_length && ! get_errno ())
e9955c83 798 {
d8d3f94a 799 int n = num;
25005f6a
PH
800 if (1-n > max_left_semantic_context)
801 max_left_semantic_context = 1-n;
e9955c83 802 if (!type_name && n > 0)
223ff46e 803 type_name = symbol_list_n_type_name_get (current_rule, loc, n);
e9955c83 804 if (!type_name && typed)
223ff46e
PE
805 complain_at (loc, _("$%d of `%s' has no declared type"),
806 n, current_rule->sym->tag);
e9955c83
AD
807 if (!type_name)
808 type_name = "";
223ff46e 809 obstack_fgrow3 (&obstack_for_string,
05ac60f3 810 "]b4_rhs_value(%d, %d, [%s])[",
e9955c83
AD
811 rule_length, n, type_name);
812 }
d8d3f94a 813 else
223ff46e 814 complain_at (loc, _("integer out of range: %s"), quote (text));
9280d3ef 815 }
9280d3ef 816
624a35e2 817 return true;
e9955c83
AD
818}
819
f25bfb75 820
cd3684cf
AD
821/*----------------------------------------------------------------.
822| Map `$?' onto the proper M4 symbol, depending on its TOKEN_TYPE |
823| (are we in an action?). |
824`----------------------------------------------------------------*/
e9955c83
AD
825
826static void
624a35e2 827handle_dollar (int token_type, char *text, location loc)
f25bfb75 828{
624a35e2 829 switch (token_type)
f25bfb75 830 {
624a35e2
PE
831 case BRACED_CODE:
832 if (handle_action_dollar (text, loc))
833 return;
f25bfb75
AD
834 break;
835
624a35e2 836 case PERCENT_DESTRUCTOR:
cd3684cf 837 case PERCENT_INITIAL_ACTION:
624a35e2
PE
838 case PERCENT_PRINTER:
839 if (text[1] == '$')
840 {
841 obstack_sgrow (&obstack_for_string, "]b4_dollar_dollar[");
842 return;
843 }
844 break;
845
846 default:
f25bfb75
AD
847 break;
848 }
624a35e2
PE
849
850 complain_at (loc, _("invalid value: %s"), quote (text));
f25bfb75
AD
851}
852
853
854/*------------------------------------------------------.
855| TEXT is a location token (i.e., a `@...'). Output to |
223ff46e 856| OBSTACK_FOR_STRING a reference to this location. |
f25bfb75
AD
857`------------------------------------------------------*/
858
624a35e2 859static inline bool
223ff46e 860handle_action_at (char *text, location loc)
e9955c83 861{
366eea36 862 char *cp = text + 1;
d0829076 863 locations_flag = true;
e9955c83 864
624a35e2
PE
865 if (! current_rule)
866 return false;
867
366eea36 868 if (*cp == '$')
624a35e2 869 obstack_sgrow (&obstack_for_string, "]b4_lhs_location[");
d8d3f94a 870 else
e9955c83 871 {
1452af69 872 long int num;
223ff46e 873 set_errno (0);
d8d3f94a 874 num = strtol (cp, 0, 10);
dafdc66f 875
223ff46e 876 if (INT_MIN <= num && num <= rule_length && ! get_errno ())
d8d3f94a
PE
877 {
878 int n = num;
05ac60f3 879 obstack_fgrow2 (&obstack_for_string, "]b4_rhs_location(%d, %d)[",
d8d3f94a
PE
880 rule_length, n);
881 }
e9955c83 882 else
223ff46e 883 complain_at (loc, _("integer out of range: %s"), quote (text));
f25bfb75 884 }
f25bfb75 885
624a35e2 886 return true;
e9955c83 887}
4cdb01db 888
f25bfb75 889
cd3684cf
AD
890/*----------------------------------------------------------------.
891| Map `@?' onto the proper M4 symbol, depending on its TOKEN_TYPE |
892| (are we in an action?). |
893`----------------------------------------------------------------*/
f25bfb75
AD
894
895static void
624a35e2 896handle_at (int token_type, char *text, location loc)
f25bfb75 897{
624a35e2 898 switch (token_type)
f25bfb75 899 {
624a35e2 900 case BRACED_CODE:
223ff46e 901 handle_action_at (text, loc);
624a35e2
PE
902 return;
903
cd3684cf 904 case PERCENT_INITIAL_ACTION:
624a35e2
PE
905 case PERCENT_DESTRUCTOR:
906 case PERCENT_PRINTER:
907 if (text[1] == '$')
908 {
909 obstack_sgrow (&obstack_for_string, "]b4_at_dollar[");
910 return;
911 }
f25bfb75
AD
912 break;
913
624a35e2 914 default:
f25bfb75
AD
915 break;
916 }
624a35e2
PE
917
918 complain_at (loc, _("invalid value: %s"), quote (text));
f25bfb75
AD
919}
920
921
1452af69
PE
922/*------------------------------------------------------.
923| Scan NUMBER for a base-BASE integer at location LOC. |
924`------------------------------------------------------*/
925
926static unsigned long int
927scan_integer (char const *number, int base, location loc)
928{
929 unsigned long int num;
930 set_errno (0);
931 num = strtoul (number, 0, base);
932 if (INT_MAX < num || get_errno ())
933 {
934 complain_at (loc, _("integer out of range: %s"), quote (number));
935 num = INT_MAX;
936 }
937 return num;
938}
939
940
d8d3f94a
PE
941/*------------------------------------------------------------------.
942| Convert universal character name UCN to a single-byte character, |
943| and return that character. Return -1 if UCN does not correspond |
944| to a single-byte character. |
945`------------------------------------------------------------------*/
946
947static int
948convert_ucn_to_byte (char const *ucn)
949{
1452af69 950 unsigned long int code = strtoul (ucn + 2, 0, 16);
d8d3f94a
PE
951
952 /* FIXME: Currently we assume Unicode-compatible unibyte characters
953 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
954 non-ASCII hosts we support only the portable C character set.
955 These limitations should be removed once we add support for
956 multibyte characters. */
957
958 if (UCHAR_MAX < code)
959 return -1;
960
961#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
962 {
963 /* A non-ASCII host. Use CODE to index into a table of the C
964 basic execution character set, which is guaranteed to exist on
965 all Standard C platforms. This table also includes '$', '@',
8e6ef483 966 and '`', which are not in the basic execution character set but
d8d3f94a
PE
967 which are unibyte characters on all the platforms that we know
968 about. */
969 static signed char const table[] =
970 {
971 '\0', -1, -1, -1, -1, -1, -1, '\a',
972 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
973 -1, -1, -1, -1, -1, -1, -1, -1,
974 -1, -1, -1, -1, -1, -1, -1, -1,
975 ' ', '!', '"', '#', '$', '%', '&', '\'',
976 '(', ')', '*', '+', ',', '-', '.', '/',
977 '0', '1', '2', '3', '4', '5', '6', '7',
978 '8', '9', ':', ';', '<', '=', '>', '?',
979 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
980 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
981 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
982 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
983 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
984 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
985 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
986 'x', 'y', 'z', '{', '|', '}', '~'
987 };
988
989 code = code < sizeof table ? table[code] : -1;
990 }
991#endif
c4d720cd 992
d8d3f94a
PE
993 return code;
994}
995
996
900c5db5
AD
997/*----------------------------------------------------------------.
998| Handle `#line INT "FILE"'. ARGS has already skipped `#line '. |
999`----------------------------------------------------------------*/
1000
1001static void
3f2d73f1 1002handle_syncline (char *args)
900c5db5
AD
1003{
1004 int lineno = strtol (args, &args, 10);
1005 const char *file = NULL;
1006 file = strchr (args, '"') + 1;
1007 *strchr (file, '"') = 0;
dca81a78 1008 scanner_cursor.file = current_file = uniqstr_new (file);
3f2d73f1
PE
1009 scanner_cursor.line = lineno;
1010 scanner_cursor.column = 1;
900c5db5
AD
1011}
1012
a706a1cc 1013
4febdd96
PE
1014/*----------------------------------------------------------------.
1015| For a token or comment starting at START, report message MSGID, |
1016| which should say that an end marker was found before |
1017| the expected TOKEN_END. |
1018`----------------------------------------------------------------*/
1019
1020static void
1021unexpected_end (boundary start, char const *msgid, char const *token_end)
1022{
1023 location loc;
1024 loc.start = start;
1025 loc.end = scanner_cursor;
1026 complain_at (loc, _(msgid), token_end);
1027}
1028
1029
3f2d73f1
PE
1030/*------------------------------------------------------------------------.
1031| Report an unexpected EOF in a token or comment starting at START. |
1032| An end of file was encountered and the expected TOKEN_END was missing. |
3f2d73f1 1033`------------------------------------------------------------------------*/
a706a1cc
PE
1034
1035static void
aa418041 1036unexpected_eof (boundary start, char const *token_end)
a706a1cc 1037{
4febdd96
PE
1038 unexpected_end (start, N_("missing `%s' at end of file"), token_end);
1039}
1040
1041
1042/*----------------------------------------.
1043| Likewise, but for unexpected newlines. |
1044`----------------------------------------*/
1045
1046static void
1047unexpected_newline (boundary start, char const *token_end)
1048{
1049 unexpected_end (start, N_("missing `%s' at end of line"), token_end);
a706a1cc
PE
1050}
1051
1052
f25bfb75
AD
1053/*-------------------------.
1054| Initialize the scanner. |
1055`-------------------------*/
1056
1d6412ad
AD
1057void
1058scanner_initialize (void)
1059{
223ff46e 1060 obstack_init (&obstack_for_string);
1d6412ad
AD
1061}
1062
1063
f25bfb75
AD
1064/*-----------------------------------------------.
1065| Free all the memory allocated to the scanner. |
1066`-----------------------------------------------*/
1067
4cdb01db
AD
1068void
1069scanner_free (void)
1070{
223ff46e 1071 obstack_free (&obstack_for_string, 0);
536545f3
AD
1072 /* Reclaim Flex's buffers. */
1073 yy_delete_buffer (YY_CURRENT_BUFFER);
4cdb01db 1074}