]> git.saurik.com Git - bison.git/blame - src/scan-gram.l
Regenerate.
[bison.git] / src / scan-gram.l
CommitLineData
e9955c83 1/* Bison Grammar Scanner -*- C -*-
3b1e470c 2
a737b216 3 Copyright (C) 2002, 2003 Free Software Foundation, Inc.
e9955c83
AD
4
5 This file is part of Bison, the GNU Compiler Compiler.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
20 02111-1307 USA
21*/
22
aa418041 23%option debug nodefault nounput noyywrap never-interactive
e9955c83
AD
24%option prefix="gram_" outfile="lex.yy.c"
25
26%{
27#include "system.h"
223ff46e
PE
28
29#include <mbswidth.h>
30#include <get-errno.h>
31#include <quote.h>
32
e9955c83 33#include "complain.h"
3f2d73f1 34#include "files.h"
e9955c83
AD
35#include "getargs.h"
36#include "gram.h"
37#include "reader.h"
223ff46e 38#include "uniqstr.h"
e9955c83 39
3f2d73f1
PE
40#define YY_USER_INIT \
41 do \
42 { \
43 scanner_cursor.file = current_file; \
44 scanner_cursor.line = 1; \
45 scanner_cursor.column = 1; \
379f0ac8 46 code_start = scanner_cursor; \
3f2d73f1
PE
47 } \
48 while (0)
8efe435c 49
3f2d73f1
PE
50/* Location of scanner cursor. */
51boundary scanner_cursor;
41141c56 52
223ff46e 53static void adjust_location (location *, char const *, size_t);
3f2d73f1 54#define YY_USER_ACTION adjust_location (loc, yytext, yyleng);
d8d3f94a 55
6c30d641 56static size_t no_cr_read (FILE *, char *, size_t);
d8d3f94a
PE
57#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
58
59
223ff46e 60/* OBSTACK_FOR_STRING -- Used to store all the characters that we need to
44995b2e
AD
61 keep (to construct ID, STRINGS etc.). Use the following macros to
62 use it.
63
41141c56
PE
64 Use STRING_GROW to append what has just been matched, and
65 STRING_FINISH to end the string (it puts the ending 0).
66 STRING_FINISH also stores this string in LAST_STRING, which can be
67 used, and which is used by STRING_FREE to free the last string. */
44995b2e 68
223ff46e 69static struct obstack obstack_for_string;
44995b2e 70
7ec2d4cd
AD
71/* A string representing the most recently saved token. */
72static char *last_string;
73
74
41141c56 75#define STRING_GROW \
223ff46e 76 obstack_grow (&obstack_for_string, yytext, yyleng)
44995b2e 77
41141c56 78#define STRING_FINISH \
44995b2e 79 do { \
223ff46e
PE
80 obstack_1grow (&obstack_for_string, '\0'); \
81 last_string = obstack_finish (&obstack_for_string); \
44995b2e
AD
82 } while (0)
83
41141c56 84#define STRING_FREE \
223ff46e 85 obstack_free (&obstack_for_string, last_string)
e9955c83 86
7ec2d4cd
AD
87void
88scanner_last_string_free (void)
89{
41141c56 90 STRING_FREE;
7ec2d4cd 91}
e9955c83 92
efcb44dd
PE
93/* Within well-formed rules, RULE_LENGTH is the number of values in
94 the current rule so far, which says where to find `$0' with respect
95 to the top of the stack. It is not the same as the rule->length in
96 the case of mid rule actions.
97
98 Outside of well-formed rules, RULE_LENGTH has an undefined value. */
99static int rule_length;
100
624a35e2
PE
101static void handle_dollar (int token_type, char *cp, location loc);
102static void handle_at (int token_type, char *cp, location loc);
3f2d73f1 103static void handle_syncline (char *args);
d8d3f94a 104static int convert_ucn_to_byte (char const *hex_text);
aa418041 105static void unexpected_eof (boundary, char const *);
e9955c83
AD
106
107%}
d8d3f94a 108%x SC_COMMENT SC_LINE_COMMENT SC_YACC_COMMENT
e9955c83 109%x SC_STRING SC_CHARACTER
3f2d73f1 110%x SC_AFTER_IDENTIFIER
e9955c83 111%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
624a35e2 112%x SC_PRE_CODE SC_BRACED_CODE SC_PROLOGUE SC_EPILOGUE
e9955c83 113
29c01725
AD
114letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
115id {letter}({letter}|[0-9])*
116directive %{letter}({letter}|[0-9]|-)*
624a35e2 117int [0-9]+
d8d3f94a
PE
118
119/* POSIX says that a tag must be both an id and a C union member, but
120 historically almost any character is allowed in a tag. We disallow
121 NUL and newline, as this simplifies our implementation. */
122tag [^\0\n>]+
123
124/* Zero or more instances of backslash-newline. Following GCC, allow
125 white space between the backslash and the newline. */
126splice (\\[ \f\t\v]*\n)*
e9955c83
AD
127
128%%
129%{
a706a1cc 130 /* Nesting level of the current code in braces. */
1a9e39f1
PE
131 int braces_level IF_LINT (= 0);
132
3f2d73f1
PE
133 /* Parent context state, when applicable. */
134 int context_state IF_LINT (= 0);
a706a1cc 135
624a35e2
PE
136 /* Token type to return, when applicable. */
137 int token_type IF_LINT (= 0);
138
3f2d73f1 139 /* Location of most recent identifier, when applicable. */
a2bc9dbc 140 location id_loc IF_LINT (= empty_location);
3f2d73f1 141
a2bc9dbc
PE
142 /* Where containing code started, when applicable. Its initial
143 value is relevant only when yylex is invoked in the SC_EPILOGUE
144 start condition. */
145 boundary code_start = scanner_cursor;
3f2d73f1 146
223ff46e
PE
147 /* Where containing comment or string or character literal started,
148 when applicable. */
a2bc9dbc 149 boundary token_start IF_LINT (= scanner_cursor);
e9955c83
AD
150%}
151
152
3f2d73f1
PE
153 /*-----------------------.
154 | Scanning white space. |
155 `-----------------------*/
156
624a35e2 157<INITIAL,SC_AFTER_IDENTIFIER,SC_PRE_CODE>
3f2d73f1
PE
158{
159 [ \f\n\t\v] ;
83adb046 160 "," warn_at (*loc, _("stray `,' treated as white space"));
3f2d73f1
PE
161
162 /* Comments. */
3f2d73f1 163 "//".* ;
83adb046
PE
164 "/*" {
165 token_start = loc->start;
166 context_state = YY_START;
167 BEGIN SC_YACC_COMMENT;
168 }
3f2d73f1
PE
169
170 /* #line directives are not documented, and may be withdrawn or
171 modified in future versions of Bison. */
172 ^"#line "{int}" \"".*"\"\n" {
173 handle_syncline (yytext + sizeof "#line " - 1);
174 }
175}
176
177
e9955c83
AD
178 /*----------------------------.
179 | Scanning Bison directives. |
180 `----------------------------*/
181<INITIAL>
182{
183 "%binary" return PERCENT_NONASSOC;
184 "%debug" return PERCENT_DEBUG;
39a06c25 185 "%default"[-_]"prec" return PERCENT_DEFAULT_PREC;
e9955c83
AD
186 "%define" return PERCENT_DEFINE;
187 "%defines" return PERCENT_DEFINES;
624a35e2 188 "%destructor" token_type = PERCENT_DESTRUCTOR; BEGIN SC_PRE_CODE;
676385e2 189 "%dprec" return PERCENT_DPREC;
e9955c83
AD
190 "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
191 "%expect" return PERCENT_EXPECT;
192 "%file-prefix" return PERCENT_FILE_PREFIX;
193 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
cd3684cf 194 "%initial-action" token_type = PERCENT_INITIAL_ACTION; BEGIN SC_PRE_CODE;
ae7453f2 195 "%glr-parser" return PERCENT_GLR_PARSER;
e9955c83 196 "%left" return PERCENT_LEFT;
624a35e2 197 "%lex-param" token_type = PERCENT_LEX_PARAM; BEGIN SC_PRE_CODE;
e9955c83 198 "%locations" return PERCENT_LOCATIONS;
676385e2 199 "%merge" return PERCENT_MERGE;
e9955c83 200 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
22fccf95 201 "%no"[-_]"default"[-_]"prec" return PERCENT_NO_DEFAULT_PREC;
e9955c83
AD
202 "%no"[-_]"lines" return PERCENT_NO_LINES;
203 "%nonassoc" return PERCENT_NONASSOC;
916708d5 204 "%nondeterministic-parser" return PERCENT_NONDETERMINISTIC_PARSER;
e9955c83
AD
205 "%nterm" return PERCENT_NTERM;
206 "%output" return PERCENT_OUTPUT;
624a35e2 207 "%parse-param" token_type = PERCENT_PARSE_PARAM; BEGIN SC_PRE_CODE;
d8d3f94a 208 "%prec" rule_length--; return PERCENT_PREC;
624a35e2 209 "%printer" token_type = PERCENT_PRINTER; BEGIN SC_PRE_CODE;
e9955c83
AD
210 "%pure"[-_]"parser" return PERCENT_PURE_PARSER;
211 "%right" return PERCENT_RIGHT;
212 "%skeleton" return PERCENT_SKELETON;
213 "%start" return PERCENT_START;
214 "%term" return PERCENT_TOKEN;
215 "%token" return PERCENT_TOKEN;
216 "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
217 "%type" return PERCENT_TYPE;
624a35e2 218 "%union" token_type = PERCENT_UNION; BEGIN SC_PRE_CODE;
e9955c83
AD
219 "%verbose" return PERCENT_VERBOSE;
220 "%yacc" return PERCENT_YACC;
221
3f2d73f1 222 {directive} {
41141c56 223 complain_at (*loc, _("invalid directive: %s"), quote (yytext));
412f8a59 224 }
900c5db5 225
e9955c83 226 "=" return EQUAL;
d8d3f94a 227 "|" rule_length = 0; return PIPE;
e9955c83
AD
228 ";" return SEMICOLON;
229
3f2d73f1 230 {id} {
41141c56 231 val->symbol = symbol_get (yytext, *loc);
3f2d73f1 232 id_loc = *loc;
efcb44dd 233 rule_length++;
3f2d73f1 234 BEGIN SC_AFTER_IDENTIFIER;
e9955c83
AD
235 }
236
d8d3f94a
PE
237 {int} {
238 unsigned long num;
223ff46e 239 set_errno (0);
d8d3f94a 240 num = strtoul (yytext, 0, 10);
223ff46e 241 if (INT_MAX < num || get_errno ())
d8d3f94a 242 {
41141c56 243 complain_at (*loc, _("integer out of range: %s"), quote (yytext));
d8d3f94a
PE
244 num = INT_MAX;
245 }
41141c56 246 val->integer = num;
d8d3f94a
PE
247 return INT;
248 }
e9955c83
AD
249
250 /* Characters. We don't check there is only one. */
3f2d73f1 251 "'" STRING_GROW; token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
e9955c83
AD
252
253 /* Strings. */
3f2d73f1 254 "\"" STRING_GROW; token_start = loc->start; BEGIN SC_ESCAPED_STRING;
e9955c83
AD
255
256 /* Prologue. */
3f2d73f1 257 "%{" code_start = loc->start; BEGIN SC_PROLOGUE;
e9955c83
AD
258
259 /* Code in between braces. */
3f2d73f1
PE
260 "{" {
261 STRING_GROW;
624a35e2 262 token_type = BRACED_CODE;
3f2d73f1
PE
263 braces_level = 0;
264 code_start = loc->start;
265 BEGIN SC_BRACED_CODE;
266 }
e9955c83
AD
267
268 /* A type. */
d8d3f94a 269 "<"{tag}">" {
223ff46e 270 obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
41141c56 271 STRING_FINISH;
223ff46e 272 val->uniqstr = uniqstr_new (last_string);
41141c56 273 STRING_FREE;
4cdb01db
AD
274 return TYPE;
275 }
276
a706a1cc
PE
277 "%%" {
278 static int percent_percent_count;
e9955c83 279 if (++percent_percent_count == 2)
a2bc9dbc 280 BEGIN SC_EPILOGUE;
e9955c83
AD
281 return PERCENT_PERCENT;
282 }
283
a706a1cc 284 . {
41141c56 285 complain_at (*loc, _("invalid character: %s"), quote (yytext));
3f2d73f1 286 }
379f0ac8
PE
287
288 <<EOF>> {
289 loc->start = loc->end = scanner_cursor;
290 yyterminate ();
291 }
3f2d73f1
PE
292}
293
294
295 /*-----------------------------------------------------------------.
296 | Scanning after an identifier, checking whether a colon is next. |
297 `-----------------------------------------------------------------*/
298
299<SC_AFTER_IDENTIFIER>
300{
301 ":" {
302 rule_length = 0;
303 *loc = id_loc;
304 BEGIN INITIAL;
305 return ID_COLON;
306 }
307 . {
308 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
309 yyless (0);
310 *loc = id_loc;
311 BEGIN INITIAL;
312 return ID;
313 }
314 <<EOF>> {
315 *loc = id_loc;
316 BEGIN INITIAL;
317 return ID;
e9955c83
AD
318 }
319}
320
321
d8d3f94a
PE
322 /*---------------------------------------------------------------.
323 | Scanning a Yacc comment. The initial `/ *' is already eaten. |
324 `---------------------------------------------------------------*/
e9955c83 325
d8d3f94a 326<SC_YACC_COMMENT>
e9955c83 327{
3f2d73f1 328 "*/" BEGIN context_state;
a706a1cc 329 .|\n ;
aa418041 330 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
d8d3f94a
PE
331}
332
333
334 /*------------------------------------------------------------.
335 | Scanning a C comment. The initial `/ *' is already eaten. |
336 `------------------------------------------------------------*/
337
338<SC_COMMENT>
339{
3f2d73f1 340 "*"{splice}"/" STRING_GROW; BEGIN context_state;
aa418041 341 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
e9955c83
AD
342}
343
344
d8d3f94a
PE
345 /*--------------------------------------------------------------.
346 | Scanning a line comment. The initial `//' is already eaten. |
347 `--------------------------------------------------------------*/
348
349<SC_LINE_COMMENT>
350{
3f2d73f1 351 "\n" STRING_GROW; BEGIN context_state;
41141c56 352 {splice} STRING_GROW;
3f2d73f1 353 <<EOF>> BEGIN context_state;
d8d3f94a
PE
354}
355
356
e9955c83
AD
357 /*----------------------------------------------------------------.
358 | Scanning a C string, including its escapes. The initial `"' is |
359 | already eaten. |
360 `----------------------------------------------------------------*/
361
362<SC_ESCAPED_STRING>
363{
db2cc12f 364 "\"" {
41141c56
PE
365 STRING_GROW;
366 STRING_FINISH;
3f2d73f1 367 loc->start = token_start;
223ff46e 368 val->chars = last_string;
efcb44dd 369 rule_length++;
a706a1cc 370 BEGIN INITIAL;
e9955c83
AD
371 return STRING;
372 }
373
41141c56 374 .|\n STRING_GROW;
aa418041 375 <<EOF>> unexpected_eof (token_start, "\""); BEGIN INITIAL;
e9955c83
AD
376}
377
378 /*---------------------------------------------------------------.
379 | Scanning a C character, decoding its escapes. The initial "'" |
380 | is already eaten. |
381 `---------------------------------------------------------------*/
382
383<SC_ESCAPED_CHARACTER>
384{
db2cc12f 385 "'" {
3b1e470c 386 unsigned char last_string_1;
41141c56
PE
387 STRING_GROW;
388 STRING_FINISH;
3f2d73f1 389 loc->start = token_start;
41141c56
PE
390 val->symbol = symbol_get (last_string, *loc);
391 symbol_class_set (val->symbol, token_sym, *loc);
3b1e470c
PE
392 last_string_1 = last_string[1];
393 symbol_user_token_number_set (val->symbol, last_string_1, *loc);
41141c56 394 STRING_FREE;
a706a1cc
PE
395 rule_length++;
396 BEGIN INITIAL;
397 return ID;
e9955c83 398 }
a706a1cc 399
41141c56 400 .|\n STRING_GROW;
aa418041 401 <<EOF>> unexpected_eof (token_start, "'"); BEGIN INITIAL;
e9955c83
AD
402}
403
404
405 /*----------------------------.
406 | Decode escaped characters. |
407 `----------------------------*/
408
409<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
410{
d8d3f94a
PE
411 \\[0-7]{1,3} {
412 unsigned long c = strtoul (yytext + 1, 0, 8);
413 if (UCHAR_MAX < c)
3f2d73f1 414 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
e9955c83 415 else
223ff46e 416 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
417 }
418
6b0d38ab 419 \\x[0-9abcdefABCDEF]+ {
d8d3f94a 420 unsigned long c;
223ff46e 421 set_errno (0);
d8d3f94a 422 c = strtoul (yytext + 2, 0, 16);
223ff46e 423 if (UCHAR_MAX < c || get_errno ())
3f2d73f1 424 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
d8d3f94a 425 else
223ff46e 426 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
427 }
428
223ff46e
PE
429 \\a obstack_1grow (&obstack_for_string, '\a');
430 \\b obstack_1grow (&obstack_for_string, '\b');
431 \\f obstack_1grow (&obstack_for_string, '\f');
432 \\n obstack_1grow (&obstack_for_string, '\n');
433 \\r obstack_1grow (&obstack_for_string, '\r');
434 \\t obstack_1grow (&obstack_for_string, '\t');
435 \\v obstack_1grow (&obstack_for_string, '\v');
412f8a59
PE
436
437 /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
223ff46e 438 \\("\""|"'"|"?"|"\\") obstack_1grow (&obstack_for_string, yytext[1]);
412f8a59 439
6b0d38ab 440 \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
d8d3f94a
PE
441 int c = convert_ucn_to_byte (yytext);
442 if (c < 0)
3f2d73f1 443 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
d8d3f94a 444 else
223ff46e 445 obstack_1grow (&obstack_for_string, c);
d8d3f94a 446 }
4f25ebb0 447 \\(.|\n) {
3f2d73f1 448 complain_at (*loc, _("unrecognized escape sequence: %s"), quote (yytext));
41141c56 449 STRING_GROW;
e9955c83
AD
450 }
451}
452
453
454 /*----------------------------------------------------------.
455 | Scanning a C character without decoding its escapes. The |
456 | initial "'" is already eaten. |
457 `----------------------------------------------------------*/
458
459<SC_CHARACTER>
460{
3f2d73f1 461 "'" STRING_GROW; BEGIN context_state;
41141c56 462 \\{splice}[^$@\[\]] STRING_GROW;
aa418041 463 <<EOF>> unexpected_eof (token_start, "'"); BEGIN context_state;
e9955c83
AD
464}
465
466
467 /*----------------------------------------------------------------.
468 | Scanning a C string, without decoding its escapes. The initial |
469 | `"' is already eaten. |
470 `----------------------------------------------------------------*/
471
472<SC_STRING>
473{
3f2d73f1 474 "\"" STRING_GROW; BEGIN context_state;
41141c56 475 \\{splice}[^$@\[\]] STRING_GROW;
aa418041
PE
476 <<EOF>> {
477 unexpected_eof (token_start, "\"");
478 BEGIN context_state;
479 }
e9955c83
AD
480}
481
482
483 /*---------------------------------------------------.
484 | Strings, comments etc. can be found in user code. |
485 `---------------------------------------------------*/
486
487<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
488{
3f2d73f1
PE
489 "'" {
490 STRING_GROW;
491 context_state = YY_START;
492 token_start = loc->start;
493 BEGIN SC_CHARACTER;
494 }
495 "\"" {
496 STRING_GROW;
497 context_state = YY_START;
498 token_start = loc->start;
499 BEGIN SC_STRING;
500 }
501 "/"{splice}"*" {
502 STRING_GROW;
503 context_state = YY_START;
504 token_start = loc->start;
505 BEGIN SC_COMMENT;
506 }
507 "/"{splice}"/" {
508 STRING_GROW;
509 context_state = YY_START;
510 BEGIN SC_LINE_COMMENT;
511 }
e9955c83
AD
512}
513
514
624a35e2
PE
515 /*---------------------------------------------------------------.
516 | Scanning after %union etc., possibly followed by white space. |
517 | For %union only, allow arbitrary C code to appear before the |
518 | following brace, as an extension to POSIX. |
519 `---------------------------------------------------------------*/
520
521<SC_PRE_CODE>
522{
523 . {
524 bool valid = yytext[0] == '{' || token_type == PERCENT_UNION;
525 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
526 yyless (0);
527
528 if (valid)
529 {
530 braces_level = -1;
531 code_start = loc->start;
532 BEGIN SC_BRACED_CODE;
533 }
534 else
535 {
536 complain_at (*loc, _("missing `{' in `%s'"),
537 token_name (token_type));
538 obstack_sgrow (&obstack_for_string, "{}");
539 STRING_FINISH;
540 val->chars = last_string;
541 BEGIN INITIAL;
542 return token_type;
543 }
544 }
379f0ac8 545
aa418041 546 <<EOF>> unexpected_eof (scanner_cursor, "{}"); BEGIN INITIAL;
624a35e2
PE
547}
548
549
e9955c83
AD
550 /*---------------------------------------------------------------.
551 | Scanning some code in braces (%union and actions). The initial |
552 | "{" is already eaten. |
553 `---------------------------------------------------------------*/
554
555<SC_BRACED_CODE>
556{
41141c56
PE
557 "{"|"<"{splice}"%" STRING_GROW; braces_level++;
558 "%"{splice}">" STRING_GROW; braces_level--;
e9955c83 559 "}" {
25522739
PE
560 bool outer_brace = --braces_level < 0;
561
562 /* As an undocumented Bison extension, append `;' before the last
563 brace in braced code, so that the user code can omit trailing
564 `;'. But do not append `;' if emulating Yacc, since Yacc does
565 not append one.
566
567 FIXME: Bison should warn if a semicolon seems to be necessary
568 here, and should omit the semicolon if it seems unnecessary
569 (e.g., after ';', '{', or '}', each followed by comments or
570 white space). Such a warning shouldn't depend on --yacc; it
571 should depend on a new --pedantic option, which would cause
572 Bison to warn if it detects an extension to POSIX. --pedantic
573 should also diagnose other Bison extensions like %yacc.
574 Perhaps there should also be a GCC-style --pedantic-errors
575 option, so that such warnings are diagnosed as errors. */
1deb9bdc 576 if (outer_brace && token_type == BRACED_CODE && ! yacc_flag)
25522739
PE
577 obstack_1grow (&obstack_for_string, ';');
578
579 obstack_1grow (&obstack_for_string, '}');
580
581 if (outer_brace)
e9955c83 582 {
41141c56 583 STRING_FINISH;
624a35e2 584 rule_length++;
3f2d73f1 585 loc->start = code_start;
223ff46e 586 val->chars = last_string;
a706a1cc 587 BEGIN INITIAL;
624a35e2 588 return token_type;
e9955c83
AD
589 }
590 }
591
a706a1cc
PE
592 /* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly
593 (as `<' `<%'). */
41141c56 594 "<"{splice}"<" STRING_GROW;
a706a1cc 595
624a35e2
PE
596 "$"("<"{tag}">")?(-?[0-9]+|"$") handle_dollar (token_type, yytext, *loc);
597 "@"(-?[0-9]+|"$") handle_at (token_type, yytext, *loc);
e9955c83 598
aa418041 599 <<EOF>> unexpected_eof (code_start, "}"); BEGIN INITIAL;
e9955c83
AD
600}
601
602
603 /*--------------------------------------------------------------.
604 | Scanning some prologue: from "%{" (already scanned) to "%}". |
605 `--------------------------------------------------------------*/
606
607<SC_PROLOGUE>
608{
609 "%}" {
41141c56 610 STRING_FINISH;
3f2d73f1 611 loc->start = code_start;
223ff46e 612 val->chars = last_string;
a706a1cc 613 BEGIN INITIAL;
e9955c83
AD
614 return PROLOGUE;
615 }
616
aa418041 617 <<EOF>> unexpected_eof (code_start, "%}"); BEGIN INITIAL;
e9955c83
AD
618}
619
620
621 /*---------------------------------------------------------------.
622 | Scanning the epilogue (everything after the second "%%", which |
d8d3f94a 623 | has already been eaten). |
e9955c83
AD
624 `---------------------------------------------------------------*/
625
626<SC_EPILOGUE>
627{
e9955c83 628 <<EOF>> {
41141c56 629 STRING_FINISH;
3f2d73f1 630 loc->start = code_start;
223ff46e 631 val->chars = last_string;
a706a1cc 632 BEGIN INITIAL;
e9955c83
AD
633 return EPILOGUE;
634 }
635}
636
637
a706a1cc
PE
638 /*----------------------------------------------------------------.
639 | By default, grow the string obstack with the input, escaping M4 |
640 | quoting characters. |
641 `----------------------------------------------------------------*/
642
643<SC_COMMENT,SC_LINE_COMMENT,SC_STRING,SC_CHARACTER,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
644{
223ff46e
PE
645 \$ obstack_sgrow (&obstack_for_string, "$][");
646 \@ obstack_sgrow (&obstack_for_string, "@@");
647 \[ obstack_sgrow (&obstack_for_string, "@{");
648 \] obstack_sgrow (&obstack_for_string, "@}");
41141c56 649 .|\n STRING_GROW;
a706a1cc
PE
650}
651
652
e9955c83
AD
653%%
654
cd3684cf
AD
655/* Keeps track of the maximum number of semantic values to the left of
656 a handle (those referenced by $0, $-1, etc.) are required by the
25005f6a
PH
657 semantic actions of this grammar. */
658int max_left_semantic_context = 0;
659
3f2d73f1
PE
660/* Set *LOC and adjust scanner cursor to account for token TOKEN of
661 size SIZE. */
6c30d641
PE
662
663static void
223ff46e 664adjust_location (location *loc, char const *token, size_t size)
6c30d641 665{
3f2d73f1
PE
666 int line = scanner_cursor.line;
667 int column = scanner_cursor.column;
6c30d641
PE
668 char const *p0 = token;
669 char const *p = token;
670 char const *lim = token + size;
671
3f2d73f1
PE
672 loc->start = scanner_cursor;
673
6c30d641
PE
674 for (p = token; p < lim; p++)
675 switch (*p)
676 {
6c30d641
PE
677 case '\n':
678 line++;
679 column = 1;
680 p0 = p + 1;
681 break;
682
683 case '\t':
684 column += mbsnwidth (p0, p - p0, 0);
685 column += 8 - ((column - 1) & 7);
686 p0 = p + 1;
687 break;
688 }
689
3f2d73f1
PE
690 scanner_cursor.line = line;
691 scanner_cursor.column = column + mbsnwidth (p0, p - p0, 0);
692
693 loc->end = scanner_cursor;
6c30d641
PE
694}
695
696
697/* Read bytes from FP into buffer BUF of size SIZE. Return the
698 number of bytes read. Remove '\r' from input, treating \r\n
699 and isolated \r as \n. */
700
701static size_t
702no_cr_read (FILE *fp, char *buf, size_t size)
703{
a737b216
PE
704 size_t bytes_read = fread (buf, 1, size, fp);
705 if (bytes_read)
6c30d641 706 {
a737b216 707 char *w = memchr (buf, '\r', bytes_read);
6c30d641
PE
708 if (w)
709 {
710 char const *r = ++w;
a737b216 711 char const *lim = buf + bytes_read;
6c30d641
PE
712
713 for (;;)
714 {
715 /* Found an '\r'. Treat it like '\n', but ignore any
716 '\n' that immediately follows. */
717 w[-1] = '\n';
718 if (r == lim)
719 {
720 int ch = getc (fp);
721 if (ch != '\n' && ungetc (ch, fp) != ch)
722 break;
723 }
724 else if (*r == '\n')
725 r++;
726
727 /* Copy until the next '\r'. */
728 do
729 {
730 if (r == lim)
731 return w - buf;
732 }
733 while ((*w++ = *r++) != '\r');
734 }
735
736 return w - buf;
737 }
738 }
739
a737b216 740 return bytes_read;
6c30d641
PE
741}
742
743
e9955c83 744/*------------------------------------------------------------------.
366eea36 745| TEXT is pointing to a wannabee semantic value (i.e., a `$'). |
e9955c83
AD
746| |
747| Possible inputs: $[<TYPENAME>]($|integer) |
748| |
223ff46e 749| Output to OBSTACK_FOR_STRING a reference to this semantic value. |
e9955c83
AD
750`------------------------------------------------------------------*/
751
624a35e2 752static inline bool
223ff46e 753handle_action_dollar (char *text, location loc)
e9955c83
AD
754{
755 const char *type_name = NULL;
366eea36 756 char *cp = text + 1;
e9955c83 757
624a35e2
PE
758 if (! current_rule)
759 return false;
760
e9955c83
AD
761 /* Get the type name if explicit. */
762 if (*cp == '<')
763 {
764 type_name = ++cp;
765 while (*cp != '>')
766 ++cp;
767 *cp = '\0';
768 ++cp;
769 }
770
771 if (*cp == '$')
772 {
773 if (!type_name)
223ff46e 774 type_name = symbol_list_n_type_name_get (current_rule, loc, 0);
e9955c83 775 if (!type_name && typed)
223ff46e 776 complain_at (loc, _("$$ of `%s' has no declared type"),
97650f4e 777 current_rule->sym->tag);
e9955c83
AD
778 if (!type_name)
779 type_name = "";
223ff46e 780 obstack_fgrow1 (&obstack_for_string,
e9955c83
AD
781 "]b4_lhs_value([%s])[", type_name);
782 }
d8d3f94a 783 else
e9955c83 784 {
d8d3f94a 785 long num;
223ff46e 786 set_errno (0);
d8d3f94a 787 num = strtol (cp, 0, 10);
e9955c83 788
223ff46e 789 if (INT_MIN <= num && num <= rule_length && ! get_errno ())
e9955c83 790 {
d8d3f94a 791 int n = num;
25005f6a
PH
792 if (1-n > max_left_semantic_context)
793 max_left_semantic_context = 1-n;
e9955c83 794 if (!type_name && n > 0)
223ff46e 795 type_name = symbol_list_n_type_name_get (current_rule, loc, n);
e9955c83 796 if (!type_name && typed)
223ff46e
PE
797 complain_at (loc, _("$%d of `%s' has no declared type"),
798 n, current_rule->sym->tag);
e9955c83
AD
799 if (!type_name)
800 type_name = "";
223ff46e 801 obstack_fgrow3 (&obstack_for_string,
e9955c83
AD
802 "]b4_rhs_value([%d], [%d], [%s])[",
803 rule_length, n, type_name);
804 }
d8d3f94a 805 else
223ff46e 806 complain_at (loc, _("integer out of range: %s"), quote (text));
9280d3ef 807 }
9280d3ef 808
624a35e2 809 return true;
e9955c83
AD
810}
811
f25bfb75 812
cd3684cf
AD
813/*----------------------------------------------------------------.
814| Map `$?' onto the proper M4 symbol, depending on its TOKEN_TYPE |
815| (are we in an action?). |
816`----------------------------------------------------------------*/
e9955c83
AD
817
818static void
624a35e2 819handle_dollar (int token_type, char *text, location loc)
f25bfb75 820{
624a35e2 821 switch (token_type)
f25bfb75 822 {
624a35e2
PE
823 case BRACED_CODE:
824 if (handle_action_dollar (text, loc))
825 return;
f25bfb75
AD
826 break;
827
624a35e2 828 case PERCENT_DESTRUCTOR:
cd3684cf 829 case PERCENT_INITIAL_ACTION:
624a35e2
PE
830 case PERCENT_PRINTER:
831 if (text[1] == '$')
832 {
833 obstack_sgrow (&obstack_for_string, "]b4_dollar_dollar[");
834 return;
835 }
836 break;
837
838 default:
f25bfb75
AD
839 break;
840 }
624a35e2
PE
841
842 complain_at (loc, _("invalid value: %s"), quote (text));
f25bfb75
AD
843}
844
845
846/*------------------------------------------------------.
847| TEXT is a location token (i.e., a `@...'). Output to |
223ff46e 848| OBSTACK_FOR_STRING a reference to this location. |
f25bfb75
AD
849`------------------------------------------------------*/
850
624a35e2 851static inline bool
223ff46e 852handle_action_at (char *text, location loc)
e9955c83 853{
366eea36 854 char *cp = text + 1;
d0829076 855 locations_flag = true;
e9955c83 856
624a35e2
PE
857 if (! current_rule)
858 return false;
859
366eea36 860 if (*cp == '$')
624a35e2 861 obstack_sgrow (&obstack_for_string, "]b4_lhs_location[");
d8d3f94a 862 else
e9955c83 863 {
d8d3f94a 864 long num;
223ff46e 865 set_errno (0);
d8d3f94a 866 num = strtol (cp, 0, 10);
dafdc66f 867
223ff46e 868 if (INT_MIN <= num && num <= rule_length && ! get_errno ())
d8d3f94a
PE
869 {
870 int n = num;
223ff46e 871 obstack_fgrow2 (&obstack_for_string, "]b4_rhs_location([%d], [%d])[",
d8d3f94a
PE
872 rule_length, n);
873 }
e9955c83 874 else
223ff46e 875 complain_at (loc, _("integer out of range: %s"), quote (text));
f25bfb75 876 }
f25bfb75 877
624a35e2 878 return true;
e9955c83 879}
4cdb01db 880
f25bfb75 881
cd3684cf
AD
882/*----------------------------------------------------------------.
883| Map `@?' onto the proper M4 symbol, depending on its TOKEN_TYPE |
884| (are we in an action?). |
885`----------------------------------------------------------------*/
f25bfb75
AD
886
887static void
624a35e2 888handle_at (int token_type, char *text, location loc)
f25bfb75 889{
624a35e2 890 switch (token_type)
f25bfb75 891 {
624a35e2 892 case BRACED_CODE:
223ff46e 893 handle_action_at (text, loc);
624a35e2
PE
894 return;
895
cd3684cf 896 case PERCENT_INITIAL_ACTION:
624a35e2
PE
897 case PERCENT_DESTRUCTOR:
898 case PERCENT_PRINTER:
899 if (text[1] == '$')
900 {
901 obstack_sgrow (&obstack_for_string, "]b4_at_dollar[");
902 return;
903 }
f25bfb75
AD
904 break;
905
624a35e2 906 default:
f25bfb75
AD
907 break;
908 }
624a35e2
PE
909
910 complain_at (loc, _("invalid value: %s"), quote (text));
f25bfb75
AD
911}
912
913
d8d3f94a
PE
914/*------------------------------------------------------------------.
915| Convert universal character name UCN to a single-byte character, |
916| and return that character. Return -1 if UCN does not correspond |
917| to a single-byte character. |
918`------------------------------------------------------------------*/
919
920static int
921convert_ucn_to_byte (char const *ucn)
922{
923 unsigned long code = strtoul (ucn + 2, 0, 16);
924
925 /* FIXME: Currently we assume Unicode-compatible unibyte characters
926 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
927 non-ASCII hosts we support only the portable C character set.
928 These limitations should be removed once we add support for
929 multibyte characters. */
930
931 if (UCHAR_MAX < code)
932 return -1;
933
934#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
935 {
936 /* A non-ASCII host. Use CODE to index into a table of the C
937 basic execution character set, which is guaranteed to exist on
938 all Standard C platforms. This table also includes '$', '@',
8e6ef483 939 and '`', which are not in the basic execution character set but
d8d3f94a
PE
940 which are unibyte characters on all the platforms that we know
941 about. */
942 static signed char const table[] =
943 {
944 '\0', -1, -1, -1, -1, -1, -1, '\a',
945 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
946 -1, -1, -1, -1, -1, -1, -1, -1,
947 -1, -1, -1, -1, -1, -1, -1, -1,
948 ' ', '!', '"', '#', '$', '%', '&', '\'',
949 '(', ')', '*', '+', ',', '-', '.', '/',
950 '0', '1', '2', '3', '4', '5', '6', '7',
951 '8', '9', ':', ';', '<', '=', '>', '?',
952 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
953 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
954 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
955 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
956 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
957 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
958 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
959 'x', 'y', 'z', '{', '|', '}', '~'
960 };
961
962 code = code < sizeof table ? table[code] : -1;
963 }
964#endif
c4d720cd 965
d8d3f94a
PE
966 return code;
967}
968
969
900c5db5
AD
970/*----------------------------------------------------------------.
971| Handle `#line INT "FILE"'. ARGS has already skipped `#line '. |
972`----------------------------------------------------------------*/
973
974static void
3f2d73f1 975handle_syncline (char *args)
900c5db5
AD
976{
977 int lineno = strtol (args, &args, 10);
978 const char *file = NULL;
979 file = strchr (args, '"') + 1;
980 *strchr (file, '"') = 0;
3f2d73f1
PE
981 scanner_cursor.file = current_file = xstrdup (file);
982 scanner_cursor.line = lineno;
983 scanner_cursor.column = 1;
900c5db5
AD
984}
985
a706a1cc 986
3f2d73f1
PE
987/*------------------------------------------------------------------------.
988| Report an unexpected EOF in a token or comment starting at START. |
989| An end of file was encountered and the expected TOKEN_END was missing. |
3f2d73f1 990`------------------------------------------------------------------------*/
a706a1cc
PE
991
992static void
aa418041 993unexpected_eof (boundary start, char const *token_end)
a706a1cc 994{
223ff46e
PE
995 location loc;
996 loc.start = start;
997 loc.end = scanner_cursor;
998 complain_at (loc, _("missing `%s' at end of file"), token_end);
a706a1cc
PE
999}
1000
1001
f25bfb75
AD
1002/*-------------------------.
1003| Initialize the scanner. |
1004`-------------------------*/
1005
1d6412ad
AD
1006void
1007scanner_initialize (void)
1008{
223ff46e 1009 obstack_init (&obstack_for_string);
1d6412ad
AD
1010}
1011
1012
f25bfb75
AD
1013/*-----------------------------------------------.
1014| Free all the memory allocated to the scanner. |
1015`-----------------------------------------------*/
1016
4cdb01db
AD
1017void
1018scanner_free (void)
1019{
223ff46e 1020 obstack_free (&obstack_for_string, 0);
536545f3
AD
1021 /* Reclaim Flex's buffers. */
1022 yy_delete_buffer (YY_CURRENT_BUFFER);
4cdb01db 1023}