]> git.saurik.com Git - bison.git/blame - src/scan-gram.l
Add Makevars.template, stamp-po.
[bison.git] / src / scan-gram.l
CommitLineData
e9955c83 1/* Bison Grammar Scanner -*- C -*-
3b1e470c 2
a737b216 3 Copyright (C) 2002, 2003 Free Software Foundation, Inc.
e9955c83
AD
4
5 This file is part of Bison, the GNU Compiler Compiler.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
20 02111-1307 USA
21*/
22
aa418041 23%option debug nodefault nounput noyywrap never-interactive
e9955c83
AD
24%option prefix="gram_" outfile="lex.yy.c"
25
26%{
27#include "system.h"
223ff46e
PE
28
29#include <mbswidth.h>
30#include <get-errno.h>
31#include <quote.h>
32
e9955c83 33#include "complain.h"
3f2d73f1 34#include "files.h"
e9955c83
AD
35#include "getargs.h"
36#include "gram.h"
37#include "reader.h"
223ff46e 38#include "uniqstr.h"
e9955c83 39
3f2d73f1
PE
40#define YY_USER_INIT \
41 do \
42 { \
43 scanner_cursor.file = current_file; \
44 scanner_cursor.line = 1; \
45 scanner_cursor.column = 1; \
379f0ac8 46 code_start = scanner_cursor; \
3f2d73f1
PE
47 } \
48 while (0)
8efe435c 49
3f2d73f1
PE
50/* Location of scanner cursor. */
51boundary scanner_cursor;
41141c56 52
223ff46e 53static void adjust_location (location *, char const *, size_t);
3f2d73f1 54#define YY_USER_ACTION adjust_location (loc, yytext, yyleng);
d8d3f94a 55
6c30d641 56static size_t no_cr_read (FILE *, char *, size_t);
d8d3f94a
PE
57#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
58
59
223ff46e 60/* OBSTACK_FOR_STRING -- Used to store all the characters that we need to
44995b2e
AD
61 keep (to construct ID, STRINGS etc.). Use the following macros to
62 use it.
63
41141c56
PE
64 Use STRING_GROW to append what has just been matched, and
65 STRING_FINISH to end the string (it puts the ending 0).
66 STRING_FINISH also stores this string in LAST_STRING, which can be
67 used, and which is used by STRING_FREE to free the last string. */
44995b2e 68
223ff46e 69static struct obstack obstack_for_string;
44995b2e 70
7ec2d4cd
AD
71/* A string representing the most recently saved token. */
72static char *last_string;
73
74
41141c56 75#define STRING_GROW \
223ff46e 76 obstack_grow (&obstack_for_string, yytext, yyleng)
44995b2e 77
41141c56 78#define STRING_FINISH \
44995b2e 79 do { \
223ff46e
PE
80 obstack_1grow (&obstack_for_string, '\0'); \
81 last_string = obstack_finish (&obstack_for_string); \
44995b2e
AD
82 } while (0)
83
41141c56 84#define STRING_FREE \
223ff46e 85 obstack_free (&obstack_for_string, last_string)
e9955c83 86
7ec2d4cd
AD
87void
88scanner_last_string_free (void)
89{
41141c56 90 STRING_FREE;
7ec2d4cd 91}
e9955c83 92
efcb44dd
PE
93/* Within well-formed rules, RULE_LENGTH is the number of values in
94 the current rule so far, which says where to find `$0' with respect
95 to the top of the stack. It is not the same as the rule->length in
96 the case of mid rule actions.
97
98 Outside of well-formed rules, RULE_LENGTH has an undefined value. */
99static int rule_length;
100
624a35e2
PE
101static void handle_dollar (int token_type, char *cp, location loc);
102static void handle_at (int token_type, char *cp, location loc);
3f2d73f1 103static void handle_syncline (char *args);
d8d3f94a 104static int convert_ucn_to_byte (char const *hex_text);
aa418041 105static void unexpected_eof (boundary, char const *);
e9955c83
AD
106
107%}
d8d3f94a 108%x SC_COMMENT SC_LINE_COMMENT SC_YACC_COMMENT
e9955c83 109%x SC_STRING SC_CHARACTER
3f2d73f1 110%x SC_AFTER_IDENTIFIER
e9955c83 111%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
624a35e2 112%x SC_PRE_CODE SC_BRACED_CODE SC_PROLOGUE SC_EPILOGUE
e9955c83 113
29c01725
AD
114letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
115id {letter}({letter}|[0-9])*
116directive %{letter}({letter}|[0-9]|-)*
624a35e2 117int [0-9]+
d8d3f94a
PE
118
119/* POSIX says that a tag must be both an id and a C union member, but
120 historically almost any character is allowed in a tag. We disallow
121 NUL and newline, as this simplifies our implementation. */
122tag [^\0\n>]+
123
124/* Zero or more instances of backslash-newline. Following GCC, allow
125 white space between the backslash and the newline. */
126splice (\\[ \f\t\v]*\n)*
e9955c83
AD
127
128%%
129%{
a706a1cc 130 /* Nesting level of the current code in braces. */
1a9e39f1
PE
131 int braces_level IF_LINT (= 0);
132
3f2d73f1
PE
133 /* Parent context state, when applicable. */
134 int context_state IF_LINT (= 0);
a706a1cc 135
624a35e2
PE
136 /* Token type to return, when applicable. */
137 int token_type IF_LINT (= 0);
138
3f2d73f1 139 /* Location of most recent identifier, when applicable. */
a2bc9dbc 140 location id_loc IF_LINT (= empty_location);
3f2d73f1 141
a2bc9dbc
PE
142 /* Where containing code started, when applicable. Its initial
143 value is relevant only when yylex is invoked in the SC_EPILOGUE
144 start condition. */
145 boundary code_start = scanner_cursor;
3f2d73f1 146
223ff46e
PE
147 /* Where containing comment or string or character literal started,
148 when applicable. */
a2bc9dbc 149 boundary token_start IF_LINT (= scanner_cursor);
e9955c83
AD
150%}
151
152
3f2d73f1
PE
153 /*-----------------------.
154 | Scanning white space. |
155 `-----------------------*/
156
624a35e2 157<INITIAL,SC_AFTER_IDENTIFIER,SC_PRE_CODE>
3f2d73f1
PE
158{
159 [ \f\n\t\v] ;
83adb046 160 "," warn_at (*loc, _("stray `,' treated as white space"));
3f2d73f1
PE
161
162 /* Comments. */
3f2d73f1 163 "//".* ;
83adb046
PE
164 "/*" {
165 token_start = loc->start;
166 context_state = YY_START;
167 BEGIN SC_YACC_COMMENT;
168 }
3f2d73f1
PE
169
170 /* #line directives are not documented, and may be withdrawn or
171 modified in future versions of Bison. */
172 ^"#line "{int}" \"".*"\"\n" {
173 handle_syncline (yytext + sizeof "#line " - 1);
174 }
175}
176
177
e9955c83
AD
178 /*----------------------------.
179 | Scanning Bison directives. |
180 `----------------------------*/
181<INITIAL>
182{
183 "%binary" return PERCENT_NONASSOC;
184 "%debug" return PERCENT_DEBUG;
39a06c25 185 "%default"[-_]"prec" return PERCENT_DEFAULT_PREC;
e9955c83
AD
186 "%define" return PERCENT_DEFINE;
187 "%defines" return PERCENT_DEFINES;
624a35e2 188 "%destructor" token_type = PERCENT_DESTRUCTOR; BEGIN SC_PRE_CODE;
676385e2 189 "%dprec" return PERCENT_DPREC;
e9955c83
AD
190 "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
191 "%expect" return PERCENT_EXPECT;
192 "%file-prefix" return PERCENT_FILE_PREFIX;
193 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
cd3684cf 194 "%initial-action" token_type = PERCENT_INITIAL_ACTION; BEGIN SC_PRE_CODE;
ae7453f2 195 "%glr-parser" return PERCENT_GLR_PARSER;
e9955c83 196 "%left" return PERCENT_LEFT;
624a35e2 197 "%lex-param" token_type = PERCENT_LEX_PARAM; BEGIN SC_PRE_CODE;
e9955c83 198 "%locations" return PERCENT_LOCATIONS;
676385e2 199 "%merge" return PERCENT_MERGE;
e9955c83 200 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
22fccf95 201 "%no"[-_]"default"[-_]"prec" return PERCENT_NO_DEFAULT_PREC;
e9955c83
AD
202 "%no"[-_]"lines" return PERCENT_NO_LINES;
203 "%nonassoc" return PERCENT_NONASSOC;
916708d5 204 "%nondeterministic-parser" return PERCENT_NONDETERMINISTIC_PARSER;
e9955c83
AD
205 "%nterm" return PERCENT_NTERM;
206 "%output" return PERCENT_OUTPUT;
624a35e2 207 "%parse-param" token_type = PERCENT_PARSE_PARAM; BEGIN SC_PRE_CODE;
d8d3f94a 208 "%prec" rule_length--; return PERCENT_PREC;
624a35e2 209 "%printer" token_type = PERCENT_PRINTER; BEGIN SC_PRE_CODE;
e9955c83
AD
210 "%pure"[-_]"parser" return PERCENT_PURE_PARSER;
211 "%right" return PERCENT_RIGHT;
212 "%skeleton" return PERCENT_SKELETON;
213 "%start" return PERCENT_START;
214 "%term" return PERCENT_TOKEN;
215 "%token" return PERCENT_TOKEN;
216 "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
217 "%type" return PERCENT_TYPE;
624a35e2 218 "%union" token_type = PERCENT_UNION; BEGIN SC_PRE_CODE;
e9955c83
AD
219 "%verbose" return PERCENT_VERBOSE;
220 "%yacc" return PERCENT_YACC;
221
3f2d73f1 222 {directive} {
41141c56 223 complain_at (*loc, _("invalid directive: %s"), quote (yytext));
412f8a59 224 }
900c5db5 225
e9955c83 226 "=" return EQUAL;
d8d3f94a 227 "|" rule_length = 0; return PIPE;
e9955c83
AD
228 ";" return SEMICOLON;
229
3f2d73f1 230 {id} {
41141c56 231 val->symbol = symbol_get (yytext, *loc);
3f2d73f1 232 id_loc = *loc;
efcb44dd 233 rule_length++;
3f2d73f1 234 BEGIN SC_AFTER_IDENTIFIER;
e9955c83
AD
235 }
236
d8d3f94a
PE
237 {int} {
238 unsigned long num;
223ff46e 239 set_errno (0);
d8d3f94a 240 num = strtoul (yytext, 0, 10);
223ff46e 241 if (INT_MAX < num || get_errno ())
d8d3f94a 242 {
41141c56 243 complain_at (*loc, _("integer out of range: %s"), quote (yytext));
d8d3f94a
PE
244 num = INT_MAX;
245 }
41141c56 246 val->integer = num;
d8d3f94a
PE
247 return INT;
248 }
e9955c83
AD
249
250 /* Characters. We don't check there is only one. */
3f2d73f1 251 "'" STRING_GROW; token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
e9955c83
AD
252
253 /* Strings. */
3f2d73f1 254 "\"" STRING_GROW; token_start = loc->start; BEGIN SC_ESCAPED_STRING;
e9955c83
AD
255
256 /* Prologue. */
3f2d73f1 257 "%{" code_start = loc->start; BEGIN SC_PROLOGUE;
e9955c83
AD
258
259 /* Code in between braces. */
3f2d73f1
PE
260 "{" {
261 STRING_GROW;
624a35e2 262 token_type = BRACED_CODE;
3f2d73f1
PE
263 braces_level = 0;
264 code_start = loc->start;
265 BEGIN SC_BRACED_CODE;
266 }
e9955c83
AD
267
268 /* A type. */
d8d3f94a 269 "<"{tag}">" {
223ff46e 270 obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
41141c56 271 STRING_FINISH;
223ff46e 272 val->uniqstr = uniqstr_new (last_string);
41141c56 273 STRING_FREE;
4cdb01db
AD
274 return TYPE;
275 }
276
a706a1cc
PE
277 "%%" {
278 static int percent_percent_count;
e9955c83 279 if (++percent_percent_count == 2)
a2bc9dbc 280 BEGIN SC_EPILOGUE;
e9955c83
AD
281 return PERCENT_PERCENT;
282 }
283
a706a1cc 284 . {
41141c56 285 complain_at (*loc, _("invalid character: %s"), quote (yytext));
3f2d73f1 286 }
379f0ac8
PE
287
288 <<EOF>> {
289 loc->start = loc->end = scanner_cursor;
290 yyterminate ();
291 }
3f2d73f1
PE
292}
293
294
295 /*-----------------------------------------------------------------.
296 | Scanning after an identifier, checking whether a colon is next. |
297 `-----------------------------------------------------------------*/
298
299<SC_AFTER_IDENTIFIER>
300{
301 ":" {
302 rule_length = 0;
303 *loc = id_loc;
304 BEGIN INITIAL;
305 return ID_COLON;
306 }
307 . {
308 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
309 yyless (0);
310 *loc = id_loc;
311 BEGIN INITIAL;
312 return ID;
313 }
314 <<EOF>> {
315 *loc = id_loc;
316 BEGIN INITIAL;
317 return ID;
e9955c83
AD
318 }
319}
320
321
d8d3f94a
PE
322 /*---------------------------------------------------------------.
323 | Scanning a Yacc comment. The initial `/ *' is already eaten. |
324 `---------------------------------------------------------------*/
e9955c83 325
d8d3f94a 326<SC_YACC_COMMENT>
e9955c83 327{
3f2d73f1 328 "*/" BEGIN context_state;
a706a1cc 329 .|\n ;
aa418041 330 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
d8d3f94a
PE
331}
332
333
334 /*------------------------------------------------------------.
335 | Scanning a C comment. The initial `/ *' is already eaten. |
336 `------------------------------------------------------------*/
337
338<SC_COMMENT>
339{
3f2d73f1 340 "*"{splice}"/" STRING_GROW; BEGIN context_state;
aa418041 341 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
e9955c83
AD
342}
343
344
d8d3f94a
PE
345 /*--------------------------------------------------------------.
346 | Scanning a line comment. The initial `//' is already eaten. |
347 `--------------------------------------------------------------*/
348
349<SC_LINE_COMMENT>
350{
3f2d73f1 351 "\n" STRING_GROW; BEGIN context_state;
41141c56 352 {splice} STRING_GROW;
3f2d73f1 353 <<EOF>> BEGIN context_state;
d8d3f94a
PE
354}
355
356
e9955c83
AD
357 /*----------------------------------------------------------------.
358 | Scanning a C string, including its escapes. The initial `"' is |
359 | already eaten. |
360 `----------------------------------------------------------------*/
361
362<SC_ESCAPED_STRING>
363{
db2cc12f 364 "\"" {
41141c56
PE
365 STRING_GROW;
366 STRING_FINISH;
3f2d73f1 367 loc->start = token_start;
223ff46e 368 val->chars = last_string;
efcb44dd 369 rule_length++;
a706a1cc 370 BEGIN INITIAL;
e9955c83
AD
371 return STRING;
372 }
373
92ac3705 374 \0 complain_at (*loc, _("invalid null character"));
41141c56 375 .|\n STRING_GROW;
aa418041 376 <<EOF>> unexpected_eof (token_start, "\""); BEGIN INITIAL;
e9955c83
AD
377}
378
379 /*---------------------------------------------------------------.
380 | Scanning a C character, decoding its escapes. The initial "'" |
381 | is already eaten. |
382 `---------------------------------------------------------------*/
383
384<SC_ESCAPED_CHARACTER>
385{
db2cc12f 386 "'" {
3b1e470c 387 unsigned char last_string_1;
41141c56
PE
388 STRING_GROW;
389 STRING_FINISH;
3f2d73f1 390 loc->start = token_start;
41141c56
PE
391 val->symbol = symbol_get (last_string, *loc);
392 symbol_class_set (val->symbol, token_sym, *loc);
3b1e470c
PE
393 last_string_1 = last_string[1];
394 symbol_user_token_number_set (val->symbol, last_string_1, *loc);
41141c56 395 STRING_FREE;
a706a1cc
PE
396 rule_length++;
397 BEGIN INITIAL;
398 return ID;
e9955c83 399 }
a706a1cc 400
92ac3705 401 \0 complain_at (*loc, _("invalid null character"));
41141c56 402 .|\n STRING_GROW;
aa418041 403 <<EOF>> unexpected_eof (token_start, "'"); BEGIN INITIAL;
e9955c83
AD
404}
405
406
407 /*----------------------------.
408 | Decode escaped characters. |
409 `----------------------------*/
410
411<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
412{
d8d3f94a
PE
413 \\[0-7]{1,3} {
414 unsigned long c = strtoul (yytext + 1, 0, 8);
415 if (UCHAR_MAX < c)
3f2d73f1 416 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
92ac3705
PE
417 else if (! c)
418 complain_at (*loc, _("invalid null character: %s"), quote (yytext));
e9955c83 419 else
223ff46e 420 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
421 }
422
6b0d38ab 423 \\x[0-9abcdefABCDEF]+ {
d8d3f94a 424 unsigned long c;
223ff46e 425 set_errno (0);
d8d3f94a 426 c = strtoul (yytext + 2, 0, 16);
223ff46e 427 if (UCHAR_MAX < c || get_errno ())
3f2d73f1 428 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
92ac3705
PE
429 else if (! c)
430 complain_at (*loc, _("invalid null character: %s"), quote (yytext));
d8d3f94a 431 else
223ff46e 432 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
433 }
434
223ff46e
PE
435 \\a obstack_1grow (&obstack_for_string, '\a');
436 \\b obstack_1grow (&obstack_for_string, '\b');
437 \\f obstack_1grow (&obstack_for_string, '\f');
438 \\n obstack_1grow (&obstack_for_string, '\n');
439 \\r obstack_1grow (&obstack_for_string, '\r');
440 \\t obstack_1grow (&obstack_for_string, '\t');
441 \\v obstack_1grow (&obstack_for_string, '\v');
412f8a59
PE
442
443 /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
223ff46e 444 \\("\""|"'"|"?"|"\\") obstack_1grow (&obstack_for_string, yytext[1]);
412f8a59 445
6b0d38ab 446 \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
d8d3f94a
PE
447 int c = convert_ucn_to_byte (yytext);
448 if (c < 0)
3f2d73f1 449 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
92ac3705
PE
450 else if (! c)
451 complain_at (*loc, _("invalid null character: %s"), quote (yytext));
d8d3f94a 452 else
223ff46e 453 obstack_1grow (&obstack_for_string, c);
d8d3f94a 454 }
4f25ebb0 455 \\(.|\n) {
3f2d73f1 456 complain_at (*loc, _("unrecognized escape sequence: %s"), quote (yytext));
41141c56 457 STRING_GROW;
e9955c83
AD
458 }
459}
460
461
462 /*----------------------------------------------------------.
463 | Scanning a C character without decoding its escapes. The |
464 | initial "'" is already eaten. |
465 `----------------------------------------------------------*/
466
467<SC_CHARACTER>
468{
3f2d73f1 469 "'" STRING_GROW; BEGIN context_state;
41141c56 470 \\{splice}[^$@\[\]] STRING_GROW;
aa418041 471 <<EOF>> unexpected_eof (token_start, "'"); BEGIN context_state;
e9955c83
AD
472}
473
474
475 /*----------------------------------------------------------------.
476 | Scanning a C string, without decoding its escapes. The initial |
477 | `"' is already eaten. |
478 `----------------------------------------------------------------*/
479
480<SC_STRING>
481{
3f2d73f1 482 "\"" STRING_GROW; BEGIN context_state;
41141c56 483 \\{splice}[^$@\[\]] STRING_GROW;
aa418041
PE
484 <<EOF>> {
485 unexpected_eof (token_start, "\"");
486 BEGIN context_state;
487 }
e9955c83
AD
488}
489
490
491 /*---------------------------------------------------.
492 | Strings, comments etc. can be found in user code. |
493 `---------------------------------------------------*/
494
495<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
496{
3f2d73f1
PE
497 "'" {
498 STRING_GROW;
499 context_state = YY_START;
500 token_start = loc->start;
501 BEGIN SC_CHARACTER;
502 }
503 "\"" {
504 STRING_GROW;
505 context_state = YY_START;
506 token_start = loc->start;
507 BEGIN SC_STRING;
508 }
509 "/"{splice}"*" {
510 STRING_GROW;
511 context_state = YY_START;
512 token_start = loc->start;
513 BEGIN SC_COMMENT;
514 }
515 "/"{splice}"/" {
516 STRING_GROW;
517 context_state = YY_START;
518 BEGIN SC_LINE_COMMENT;
519 }
e9955c83
AD
520}
521
522
624a35e2
PE
523 /*---------------------------------------------------------------.
524 | Scanning after %union etc., possibly followed by white space. |
525 | For %union only, allow arbitrary C code to appear before the |
526 | following brace, as an extension to POSIX. |
527 `---------------------------------------------------------------*/
528
529<SC_PRE_CODE>
530{
531 . {
532 bool valid = yytext[0] == '{' || token_type == PERCENT_UNION;
533 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
534 yyless (0);
535
536 if (valid)
537 {
538 braces_level = -1;
539 code_start = loc->start;
540 BEGIN SC_BRACED_CODE;
541 }
542 else
543 {
544 complain_at (*loc, _("missing `{' in `%s'"),
545 token_name (token_type));
546 obstack_sgrow (&obstack_for_string, "{}");
547 STRING_FINISH;
548 val->chars = last_string;
549 BEGIN INITIAL;
550 return token_type;
551 }
552 }
379f0ac8 553
aa418041 554 <<EOF>> unexpected_eof (scanner_cursor, "{}"); BEGIN INITIAL;
624a35e2
PE
555}
556
557
e9955c83
AD
558 /*---------------------------------------------------------------.
559 | Scanning some code in braces (%union and actions). The initial |
560 | "{" is already eaten. |
561 `---------------------------------------------------------------*/
562
563<SC_BRACED_CODE>
564{
41141c56
PE
565 "{"|"<"{splice}"%" STRING_GROW; braces_level++;
566 "%"{splice}">" STRING_GROW; braces_level--;
e9955c83 567 "}" {
25522739
PE
568 bool outer_brace = --braces_level < 0;
569
570 /* As an undocumented Bison extension, append `;' before the last
571 brace in braced code, so that the user code can omit trailing
572 `;'. But do not append `;' if emulating Yacc, since Yacc does
573 not append one.
574
575 FIXME: Bison should warn if a semicolon seems to be necessary
576 here, and should omit the semicolon if it seems unnecessary
577 (e.g., after ';', '{', or '}', each followed by comments or
578 white space). Such a warning shouldn't depend on --yacc; it
579 should depend on a new --pedantic option, which would cause
580 Bison to warn if it detects an extension to POSIX. --pedantic
581 should also diagnose other Bison extensions like %yacc.
582 Perhaps there should also be a GCC-style --pedantic-errors
583 option, so that such warnings are diagnosed as errors. */
1deb9bdc 584 if (outer_brace && token_type == BRACED_CODE && ! yacc_flag)
25522739
PE
585 obstack_1grow (&obstack_for_string, ';');
586
587 obstack_1grow (&obstack_for_string, '}');
588
589 if (outer_brace)
e9955c83 590 {
41141c56 591 STRING_FINISH;
624a35e2 592 rule_length++;
3f2d73f1 593 loc->start = code_start;
223ff46e 594 val->chars = last_string;
a706a1cc 595 BEGIN INITIAL;
624a35e2 596 return token_type;
e9955c83
AD
597 }
598 }
599
a706a1cc
PE
600 /* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly
601 (as `<' `<%'). */
41141c56 602 "<"{splice}"<" STRING_GROW;
a706a1cc 603
624a35e2
PE
604 "$"("<"{tag}">")?(-?[0-9]+|"$") handle_dollar (token_type, yytext, *loc);
605 "@"(-?[0-9]+|"$") handle_at (token_type, yytext, *loc);
e9955c83 606
aa418041 607 <<EOF>> unexpected_eof (code_start, "}"); BEGIN INITIAL;
e9955c83
AD
608}
609
610
611 /*--------------------------------------------------------------.
612 | Scanning some prologue: from "%{" (already scanned) to "%}". |
613 `--------------------------------------------------------------*/
614
615<SC_PROLOGUE>
616{
617 "%}" {
41141c56 618 STRING_FINISH;
3f2d73f1 619 loc->start = code_start;
223ff46e 620 val->chars = last_string;
a706a1cc 621 BEGIN INITIAL;
e9955c83
AD
622 return PROLOGUE;
623 }
624
aa418041 625 <<EOF>> unexpected_eof (code_start, "%}"); BEGIN INITIAL;
e9955c83
AD
626}
627
628
629 /*---------------------------------------------------------------.
630 | Scanning the epilogue (everything after the second "%%", which |
d8d3f94a 631 | has already been eaten). |
e9955c83
AD
632 `---------------------------------------------------------------*/
633
634<SC_EPILOGUE>
635{
e9955c83 636 <<EOF>> {
41141c56 637 STRING_FINISH;
3f2d73f1 638 loc->start = code_start;
223ff46e 639 val->chars = last_string;
a706a1cc 640 BEGIN INITIAL;
e9955c83
AD
641 return EPILOGUE;
642 }
643}
644
645
a706a1cc
PE
646 /*----------------------------------------------------------------.
647 | By default, grow the string obstack with the input, escaping M4 |
648 | quoting characters. |
649 `----------------------------------------------------------------*/
650
651<SC_COMMENT,SC_LINE_COMMENT,SC_STRING,SC_CHARACTER,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
652{
223ff46e
PE
653 \$ obstack_sgrow (&obstack_for_string, "$][");
654 \@ obstack_sgrow (&obstack_for_string, "@@");
655 \[ obstack_sgrow (&obstack_for_string, "@{");
656 \] obstack_sgrow (&obstack_for_string, "@}");
41141c56 657 .|\n STRING_GROW;
a706a1cc
PE
658}
659
660
e9955c83
AD
661%%
662
cd3684cf
AD
663/* Keeps track of the maximum number of semantic values to the left of
664 a handle (those referenced by $0, $-1, etc.) are required by the
25005f6a
PH
665 semantic actions of this grammar. */
666int max_left_semantic_context = 0;
667
3f2d73f1
PE
668/* Set *LOC and adjust scanner cursor to account for token TOKEN of
669 size SIZE. */
6c30d641
PE
670
671static void
223ff46e 672adjust_location (location *loc, char const *token, size_t size)
6c30d641 673{
3f2d73f1
PE
674 int line = scanner_cursor.line;
675 int column = scanner_cursor.column;
6c30d641
PE
676 char const *p0 = token;
677 char const *p = token;
678 char const *lim = token + size;
679
3f2d73f1
PE
680 loc->start = scanner_cursor;
681
6c30d641
PE
682 for (p = token; p < lim; p++)
683 switch (*p)
684 {
6c30d641
PE
685 case '\n':
686 line++;
687 column = 1;
688 p0 = p + 1;
689 break;
690
691 case '\t':
692 column += mbsnwidth (p0, p - p0, 0);
693 column += 8 - ((column - 1) & 7);
694 p0 = p + 1;
695 break;
696 }
697
3f2d73f1
PE
698 scanner_cursor.line = line;
699 scanner_cursor.column = column + mbsnwidth (p0, p - p0, 0);
700
701 loc->end = scanner_cursor;
6c30d641
PE
702}
703
704
705/* Read bytes from FP into buffer BUF of size SIZE. Return the
706 number of bytes read. Remove '\r' from input, treating \r\n
707 and isolated \r as \n. */
708
709static size_t
710no_cr_read (FILE *fp, char *buf, size_t size)
711{
a737b216
PE
712 size_t bytes_read = fread (buf, 1, size, fp);
713 if (bytes_read)
6c30d641 714 {
a737b216 715 char *w = memchr (buf, '\r', bytes_read);
6c30d641
PE
716 if (w)
717 {
718 char const *r = ++w;
a737b216 719 char const *lim = buf + bytes_read;
6c30d641
PE
720
721 for (;;)
722 {
723 /* Found an '\r'. Treat it like '\n', but ignore any
724 '\n' that immediately follows. */
725 w[-1] = '\n';
726 if (r == lim)
727 {
728 int ch = getc (fp);
729 if (ch != '\n' && ungetc (ch, fp) != ch)
730 break;
731 }
732 else if (*r == '\n')
733 r++;
734
735 /* Copy until the next '\r'. */
736 do
737 {
738 if (r == lim)
739 return w - buf;
740 }
741 while ((*w++ = *r++) != '\r');
742 }
743
744 return w - buf;
745 }
746 }
747
a737b216 748 return bytes_read;
6c30d641
PE
749}
750
751
e9955c83 752/*------------------------------------------------------------------.
366eea36 753| TEXT is pointing to a wannabee semantic value (i.e., a `$'). |
e9955c83
AD
754| |
755| Possible inputs: $[<TYPENAME>]($|integer) |
756| |
223ff46e 757| Output to OBSTACK_FOR_STRING a reference to this semantic value. |
e9955c83
AD
758`------------------------------------------------------------------*/
759
624a35e2 760static inline bool
223ff46e 761handle_action_dollar (char *text, location loc)
e9955c83
AD
762{
763 const char *type_name = NULL;
366eea36 764 char *cp = text + 1;
e9955c83 765
624a35e2
PE
766 if (! current_rule)
767 return false;
768
e9955c83
AD
769 /* Get the type name if explicit. */
770 if (*cp == '<')
771 {
772 type_name = ++cp;
773 while (*cp != '>')
774 ++cp;
775 *cp = '\0';
776 ++cp;
777 }
778
779 if (*cp == '$')
780 {
781 if (!type_name)
223ff46e 782 type_name = symbol_list_n_type_name_get (current_rule, loc, 0);
e9955c83 783 if (!type_name && typed)
223ff46e 784 complain_at (loc, _("$$ of `%s' has no declared type"),
97650f4e 785 current_rule->sym->tag);
e9955c83
AD
786 if (!type_name)
787 type_name = "";
223ff46e 788 obstack_fgrow1 (&obstack_for_string,
e9955c83
AD
789 "]b4_lhs_value([%s])[", type_name);
790 }
d8d3f94a 791 else
e9955c83 792 {
d8d3f94a 793 long num;
223ff46e 794 set_errno (0);
d8d3f94a 795 num = strtol (cp, 0, 10);
e9955c83 796
223ff46e 797 if (INT_MIN <= num && num <= rule_length && ! get_errno ())
e9955c83 798 {
d8d3f94a 799 int n = num;
25005f6a
PH
800 if (1-n > max_left_semantic_context)
801 max_left_semantic_context = 1-n;
e9955c83 802 if (!type_name && n > 0)
223ff46e 803 type_name = symbol_list_n_type_name_get (current_rule, loc, n);
e9955c83 804 if (!type_name && typed)
223ff46e
PE
805 complain_at (loc, _("$%d of `%s' has no declared type"),
806 n, current_rule->sym->tag);
e9955c83
AD
807 if (!type_name)
808 type_name = "";
223ff46e 809 obstack_fgrow3 (&obstack_for_string,
e9955c83
AD
810 "]b4_rhs_value([%d], [%d], [%s])[",
811 rule_length, n, type_name);
812 }
d8d3f94a 813 else
223ff46e 814 complain_at (loc, _("integer out of range: %s"), quote (text));
9280d3ef 815 }
9280d3ef 816
624a35e2 817 return true;
e9955c83
AD
818}
819
f25bfb75 820
cd3684cf
AD
821/*----------------------------------------------------------------.
822| Map `$?' onto the proper M4 symbol, depending on its TOKEN_TYPE |
823| (are we in an action?). |
824`----------------------------------------------------------------*/
e9955c83
AD
825
826static void
624a35e2 827handle_dollar (int token_type, char *text, location loc)
f25bfb75 828{
624a35e2 829 switch (token_type)
f25bfb75 830 {
624a35e2
PE
831 case BRACED_CODE:
832 if (handle_action_dollar (text, loc))
833 return;
f25bfb75
AD
834 break;
835
624a35e2 836 case PERCENT_DESTRUCTOR:
cd3684cf 837 case PERCENT_INITIAL_ACTION:
624a35e2
PE
838 case PERCENT_PRINTER:
839 if (text[1] == '$')
840 {
841 obstack_sgrow (&obstack_for_string, "]b4_dollar_dollar[");
842 return;
843 }
844 break;
845
846 default:
f25bfb75
AD
847 break;
848 }
624a35e2
PE
849
850 complain_at (loc, _("invalid value: %s"), quote (text));
f25bfb75
AD
851}
852
853
854/*------------------------------------------------------.
855| TEXT is a location token (i.e., a `@...'). Output to |
223ff46e 856| OBSTACK_FOR_STRING a reference to this location. |
f25bfb75
AD
857`------------------------------------------------------*/
858
624a35e2 859static inline bool
223ff46e 860handle_action_at (char *text, location loc)
e9955c83 861{
366eea36 862 char *cp = text + 1;
d0829076 863 locations_flag = true;
e9955c83 864
624a35e2
PE
865 if (! current_rule)
866 return false;
867
366eea36 868 if (*cp == '$')
624a35e2 869 obstack_sgrow (&obstack_for_string, "]b4_lhs_location[");
d8d3f94a 870 else
e9955c83 871 {
d8d3f94a 872 long num;
223ff46e 873 set_errno (0);
d8d3f94a 874 num = strtol (cp, 0, 10);
dafdc66f 875
223ff46e 876 if (INT_MIN <= num && num <= rule_length && ! get_errno ())
d8d3f94a
PE
877 {
878 int n = num;
223ff46e 879 obstack_fgrow2 (&obstack_for_string, "]b4_rhs_location([%d], [%d])[",
d8d3f94a
PE
880 rule_length, n);
881 }
e9955c83 882 else
223ff46e 883 complain_at (loc, _("integer out of range: %s"), quote (text));
f25bfb75 884 }
f25bfb75 885
624a35e2 886 return true;
e9955c83 887}
4cdb01db 888
f25bfb75 889
cd3684cf
AD
890/*----------------------------------------------------------------.
891| Map `@?' onto the proper M4 symbol, depending on its TOKEN_TYPE |
892| (are we in an action?). |
893`----------------------------------------------------------------*/
f25bfb75
AD
894
895static void
624a35e2 896handle_at (int token_type, char *text, location loc)
f25bfb75 897{
624a35e2 898 switch (token_type)
f25bfb75 899 {
624a35e2 900 case BRACED_CODE:
223ff46e 901 handle_action_at (text, loc);
624a35e2
PE
902 return;
903
cd3684cf 904 case PERCENT_INITIAL_ACTION:
624a35e2
PE
905 case PERCENT_DESTRUCTOR:
906 case PERCENT_PRINTER:
907 if (text[1] == '$')
908 {
909 obstack_sgrow (&obstack_for_string, "]b4_at_dollar[");
910 return;
911 }
f25bfb75
AD
912 break;
913
624a35e2 914 default:
f25bfb75
AD
915 break;
916 }
624a35e2
PE
917
918 complain_at (loc, _("invalid value: %s"), quote (text));
f25bfb75
AD
919}
920
921
d8d3f94a
PE
922/*------------------------------------------------------------------.
923| Convert universal character name UCN to a single-byte character, |
924| and return that character. Return -1 if UCN does not correspond |
925| to a single-byte character. |
926`------------------------------------------------------------------*/
927
928static int
929convert_ucn_to_byte (char const *ucn)
930{
931 unsigned long code = strtoul (ucn + 2, 0, 16);
932
933 /* FIXME: Currently we assume Unicode-compatible unibyte characters
934 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
935 non-ASCII hosts we support only the portable C character set.
936 These limitations should be removed once we add support for
937 multibyte characters. */
938
939 if (UCHAR_MAX < code)
940 return -1;
941
942#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
943 {
944 /* A non-ASCII host. Use CODE to index into a table of the C
945 basic execution character set, which is guaranteed to exist on
946 all Standard C platforms. This table also includes '$', '@',
8e6ef483 947 and '`', which are not in the basic execution character set but
d8d3f94a
PE
948 which are unibyte characters on all the platforms that we know
949 about. */
950 static signed char const table[] =
951 {
952 '\0', -1, -1, -1, -1, -1, -1, '\a',
953 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
954 -1, -1, -1, -1, -1, -1, -1, -1,
955 -1, -1, -1, -1, -1, -1, -1, -1,
956 ' ', '!', '"', '#', '$', '%', '&', '\'',
957 '(', ')', '*', '+', ',', '-', '.', '/',
958 '0', '1', '2', '3', '4', '5', '6', '7',
959 '8', '9', ':', ';', '<', '=', '>', '?',
960 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
961 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
962 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
963 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
964 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
965 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
966 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
967 'x', 'y', 'z', '{', '|', '}', '~'
968 };
969
970 code = code < sizeof table ? table[code] : -1;
971 }
972#endif
c4d720cd 973
d8d3f94a
PE
974 return code;
975}
976
977
900c5db5
AD
978/*----------------------------------------------------------------.
979| Handle `#line INT "FILE"'. ARGS has already skipped `#line '. |
980`----------------------------------------------------------------*/
981
982static void
3f2d73f1 983handle_syncline (char *args)
900c5db5
AD
984{
985 int lineno = strtol (args, &args, 10);
986 const char *file = NULL;
987 file = strchr (args, '"') + 1;
988 *strchr (file, '"') = 0;
3f2d73f1
PE
989 scanner_cursor.file = current_file = xstrdup (file);
990 scanner_cursor.line = lineno;
991 scanner_cursor.column = 1;
900c5db5
AD
992}
993
a706a1cc 994
3f2d73f1
PE
995/*------------------------------------------------------------------------.
996| Report an unexpected EOF in a token or comment starting at START. |
997| An end of file was encountered and the expected TOKEN_END was missing. |
3f2d73f1 998`------------------------------------------------------------------------*/
a706a1cc
PE
999
1000static void
aa418041 1001unexpected_eof (boundary start, char const *token_end)
a706a1cc 1002{
223ff46e
PE
1003 location loc;
1004 loc.start = start;
1005 loc.end = scanner_cursor;
1006 complain_at (loc, _("missing `%s' at end of file"), token_end);
a706a1cc
PE
1007}
1008
1009
f25bfb75
AD
1010/*-------------------------.
1011| Initialize the scanner. |
1012`-------------------------*/
1013
1d6412ad
AD
1014void
1015scanner_initialize (void)
1016{
223ff46e 1017 obstack_init (&obstack_for_string);
1d6412ad
AD
1018}
1019
1020
f25bfb75
AD
1021/*-----------------------------------------------.
1022| Free all the memory allocated to the scanner. |
1023`-----------------------------------------------*/
1024
4cdb01db
AD
1025void
1026scanner_free (void)
1027{
223ff46e 1028 obstack_free (&obstack_for_string, 0);
536545f3
AD
1029 /* Reclaim Flex's buffers. */
1030 yy_delete_buffer (YY_CURRENT_BUFFER);
4cdb01db 1031}