]> git.saurik.com Git - bison.git/blame - src/scan-gram.l
When reducing initial empty rules, Bison parser read an initial
[bison.git] / src / scan-gram.l
CommitLineData
e9955c83 1/* Bison Grammar Scanner -*- C -*-
3b1e470c 2
a737b216 3 Copyright (C) 2002, 2003 Free Software Foundation, Inc.
e9955c83
AD
4
5 This file is part of Bison, the GNU Compiler Compiler.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
20 02111-1307 USA
21*/
22
aa418041 23%option debug nodefault nounput noyywrap never-interactive
e9955c83
AD
24%option prefix="gram_" outfile="lex.yy.c"
25
26%{
27#include "system.h"
223ff46e
PE
28
29#include <mbswidth.h>
30#include <get-errno.h>
31#include <quote.h>
32
e9955c83 33#include "complain.h"
3f2d73f1 34#include "files.h"
e9955c83
AD
35#include "getargs.h"
36#include "gram.h"
37#include "reader.h"
223ff46e 38#include "uniqstr.h"
e9955c83 39
3f2d73f1
PE
40#define YY_USER_INIT \
41 do \
42 { \
43 scanner_cursor.file = current_file; \
44 scanner_cursor.line = 1; \
45 scanner_cursor.column = 1; \
379f0ac8 46 code_start = scanner_cursor; \
3f2d73f1
PE
47 } \
48 while (0)
8efe435c 49
3f2d73f1
PE
50/* Location of scanner cursor. */
51boundary scanner_cursor;
41141c56 52
223ff46e 53static void adjust_location (location *, char const *, size_t);
3f2d73f1 54#define YY_USER_ACTION adjust_location (loc, yytext, yyleng);
d8d3f94a 55
6c30d641 56static size_t no_cr_read (FILE *, char *, size_t);
d8d3f94a
PE
57#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
58
59
223ff46e 60/* OBSTACK_FOR_STRING -- Used to store all the characters that we need to
44995b2e
AD
61 keep (to construct ID, STRINGS etc.). Use the following macros to
62 use it.
63
41141c56
PE
64 Use STRING_GROW to append what has just been matched, and
65 STRING_FINISH to end the string (it puts the ending 0).
66 STRING_FINISH also stores this string in LAST_STRING, which can be
67 used, and which is used by STRING_FREE to free the last string. */
44995b2e 68
223ff46e 69static struct obstack obstack_for_string;
44995b2e 70
7ec2d4cd
AD
71/* A string representing the most recently saved token. */
72static char *last_string;
73
74
41141c56 75#define STRING_GROW \
223ff46e 76 obstack_grow (&obstack_for_string, yytext, yyleng)
44995b2e 77
41141c56 78#define STRING_FINISH \
44995b2e 79 do { \
223ff46e
PE
80 obstack_1grow (&obstack_for_string, '\0'); \
81 last_string = obstack_finish (&obstack_for_string); \
44995b2e
AD
82 } while (0)
83
41141c56 84#define STRING_FREE \
223ff46e 85 obstack_free (&obstack_for_string, last_string)
e9955c83 86
7ec2d4cd
AD
87void
88scanner_last_string_free (void)
89{
41141c56 90 STRING_FREE;
7ec2d4cd 91}
e9955c83 92
efcb44dd
PE
93/* Within well-formed rules, RULE_LENGTH is the number of values in
94 the current rule so far, which says where to find `$0' with respect
95 to the top of the stack. It is not the same as the rule->length in
96 the case of mid rule actions.
97
98 Outside of well-formed rules, RULE_LENGTH has an undefined value. */
99static int rule_length;
100
624a35e2
PE
101static void handle_dollar (int token_type, char *cp, location loc);
102static void handle_at (int token_type, char *cp, location loc);
3f2d73f1 103static void handle_syncline (char *args);
d8d3f94a 104static int convert_ucn_to_byte (char const *hex_text);
aa418041 105static void unexpected_eof (boundary, char const *);
e9955c83
AD
106
107%}
d8d3f94a 108%x SC_COMMENT SC_LINE_COMMENT SC_YACC_COMMENT
e9955c83 109%x SC_STRING SC_CHARACTER
3f2d73f1 110%x SC_AFTER_IDENTIFIER
e9955c83 111%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
624a35e2 112%x SC_PRE_CODE SC_BRACED_CODE SC_PROLOGUE SC_EPILOGUE
e9955c83 113
29c01725
AD
114letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
115id {letter}({letter}|[0-9])*
116directive %{letter}({letter}|[0-9]|-)*
624a35e2 117int [0-9]+
d8d3f94a
PE
118
119/* POSIX says that a tag must be both an id and a C union member, but
120 historically almost any character is allowed in a tag. We disallow
121 NUL and newline, as this simplifies our implementation. */
122tag [^\0\n>]+
123
124/* Zero or more instances of backslash-newline. Following GCC, allow
125 white space between the backslash and the newline. */
126splice (\\[ \f\t\v]*\n)*
e9955c83
AD
127
128%%
129%{
a706a1cc 130 /* Nesting level of the current code in braces. */
1a9e39f1
PE
131 int braces_level IF_LINT (= 0);
132
3f2d73f1
PE
133 /* Parent context state, when applicable. */
134 int context_state IF_LINT (= 0);
a706a1cc 135
624a35e2
PE
136 /* Token type to return, when applicable. */
137 int token_type IF_LINT (= 0);
138
3f2d73f1 139 /* Location of most recent identifier, when applicable. */
a2bc9dbc 140 location id_loc IF_LINT (= empty_location);
3f2d73f1 141
a2bc9dbc
PE
142 /* Where containing code started, when applicable. Its initial
143 value is relevant only when yylex is invoked in the SC_EPILOGUE
144 start condition. */
145 boundary code_start = scanner_cursor;
3f2d73f1 146
223ff46e
PE
147 /* Where containing comment or string or character literal started,
148 when applicable. */
a2bc9dbc 149 boundary token_start IF_LINT (= scanner_cursor);
e9955c83
AD
150%}
151
152
3f2d73f1
PE
153 /*-----------------------.
154 | Scanning white space. |
155 `-----------------------*/
156
624a35e2 157<INITIAL,SC_AFTER_IDENTIFIER,SC_PRE_CODE>
3f2d73f1
PE
158{
159 [ \f\n\t\v] ;
83adb046 160 "," warn_at (*loc, _("stray `,' treated as white space"));
3f2d73f1
PE
161
162 /* Comments. */
3f2d73f1 163 "//".* ;
83adb046
PE
164 "/*" {
165 token_start = loc->start;
166 context_state = YY_START;
167 BEGIN SC_YACC_COMMENT;
168 }
3f2d73f1
PE
169
170 /* #line directives are not documented, and may be withdrawn or
171 modified in future versions of Bison. */
172 ^"#line "{int}" \"".*"\"\n" {
173 handle_syncline (yytext + sizeof "#line " - 1);
174 }
175}
176
177
e9955c83
AD
178 /*----------------------------.
179 | Scanning Bison directives. |
180 `----------------------------*/
181<INITIAL>
182{
183 "%binary" return PERCENT_NONASSOC;
184 "%debug" return PERCENT_DEBUG;
185 "%define" return PERCENT_DEFINE;
186 "%defines" return PERCENT_DEFINES;
624a35e2 187 "%destructor" token_type = PERCENT_DESTRUCTOR; BEGIN SC_PRE_CODE;
676385e2 188 "%dprec" return PERCENT_DPREC;
e9955c83
AD
189 "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
190 "%expect" return PERCENT_EXPECT;
191 "%file-prefix" return PERCENT_FILE_PREFIX;
192 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
cd3684cf 193 "%initial-action" token_type = PERCENT_INITIAL_ACTION; BEGIN SC_PRE_CODE;
ae7453f2 194 "%glr-parser" return PERCENT_GLR_PARSER;
e9955c83 195 "%left" return PERCENT_LEFT;
624a35e2 196 "%lex-param" token_type = PERCENT_LEX_PARAM; BEGIN SC_PRE_CODE;
e9955c83 197 "%locations" return PERCENT_LOCATIONS;
676385e2 198 "%merge" return PERCENT_MERGE;
e9955c83
AD
199 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
200 "%no"[-_]"lines" return PERCENT_NO_LINES;
201 "%nonassoc" return PERCENT_NONASSOC;
916708d5 202 "%nondeterministic-parser" return PERCENT_NONDETERMINISTIC_PARSER;
e9955c83
AD
203 "%nterm" return PERCENT_NTERM;
204 "%output" return PERCENT_OUTPUT;
624a35e2 205 "%parse-param" token_type = PERCENT_PARSE_PARAM; BEGIN SC_PRE_CODE;
d8d3f94a 206 "%prec" rule_length--; return PERCENT_PREC;
624a35e2 207 "%printer" token_type = PERCENT_PRINTER; BEGIN SC_PRE_CODE;
e9955c83
AD
208 "%pure"[-_]"parser" return PERCENT_PURE_PARSER;
209 "%right" return PERCENT_RIGHT;
210 "%skeleton" return PERCENT_SKELETON;
211 "%start" return PERCENT_START;
212 "%term" return PERCENT_TOKEN;
213 "%token" return PERCENT_TOKEN;
214 "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
215 "%type" return PERCENT_TYPE;
624a35e2 216 "%union" token_type = PERCENT_UNION; BEGIN SC_PRE_CODE;
e9955c83
AD
217 "%verbose" return PERCENT_VERBOSE;
218 "%yacc" return PERCENT_YACC;
219
3f2d73f1 220 {directive} {
41141c56 221 complain_at (*loc, _("invalid directive: %s"), quote (yytext));
412f8a59 222 }
900c5db5 223
e9955c83 224 "=" return EQUAL;
d8d3f94a 225 "|" rule_length = 0; return PIPE;
e9955c83
AD
226 ";" return SEMICOLON;
227
3f2d73f1 228 {id} {
41141c56 229 val->symbol = symbol_get (yytext, *loc);
3f2d73f1 230 id_loc = *loc;
efcb44dd 231 rule_length++;
3f2d73f1 232 BEGIN SC_AFTER_IDENTIFIER;
e9955c83
AD
233 }
234
d8d3f94a
PE
235 {int} {
236 unsigned long num;
223ff46e 237 set_errno (0);
d8d3f94a 238 num = strtoul (yytext, 0, 10);
223ff46e 239 if (INT_MAX < num || get_errno ())
d8d3f94a 240 {
41141c56 241 complain_at (*loc, _("integer out of range: %s"), quote (yytext));
d8d3f94a
PE
242 num = INT_MAX;
243 }
41141c56 244 val->integer = num;
d8d3f94a
PE
245 return INT;
246 }
e9955c83
AD
247
248 /* Characters. We don't check there is only one. */
3f2d73f1 249 "'" STRING_GROW; token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
e9955c83
AD
250
251 /* Strings. */
3f2d73f1 252 "\"" STRING_GROW; token_start = loc->start; BEGIN SC_ESCAPED_STRING;
e9955c83
AD
253
254 /* Prologue. */
3f2d73f1 255 "%{" code_start = loc->start; BEGIN SC_PROLOGUE;
e9955c83
AD
256
257 /* Code in between braces. */
3f2d73f1
PE
258 "{" {
259 STRING_GROW;
624a35e2 260 token_type = BRACED_CODE;
3f2d73f1
PE
261 braces_level = 0;
262 code_start = loc->start;
263 BEGIN SC_BRACED_CODE;
264 }
e9955c83
AD
265
266 /* A type. */
d8d3f94a 267 "<"{tag}">" {
223ff46e 268 obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
41141c56 269 STRING_FINISH;
223ff46e 270 val->uniqstr = uniqstr_new (last_string);
41141c56 271 STRING_FREE;
4cdb01db
AD
272 return TYPE;
273 }
274
a706a1cc
PE
275 "%%" {
276 static int percent_percent_count;
e9955c83 277 if (++percent_percent_count == 2)
a2bc9dbc 278 BEGIN SC_EPILOGUE;
e9955c83
AD
279 return PERCENT_PERCENT;
280 }
281
a706a1cc 282 . {
41141c56 283 complain_at (*loc, _("invalid character: %s"), quote (yytext));
3f2d73f1 284 }
379f0ac8
PE
285
286 <<EOF>> {
287 loc->start = loc->end = scanner_cursor;
288 yyterminate ();
289 }
3f2d73f1
PE
290}
291
292
293 /*-----------------------------------------------------------------.
294 | Scanning after an identifier, checking whether a colon is next. |
295 `-----------------------------------------------------------------*/
296
297<SC_AFTER_IDENTIFIER>
298{
299 ":" {
300 rule_length = 0;
301 *loc = id_loc;
302 BEGIN INITIAL;
303 return ID_COLON;
304 }
305 . {
306 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
307 yyless (0);
308 *loc = id_loc;
309 BEGIN INITIAL;
310 return ID;
311 }
312 <<EOF>> {
313 *loc = id_loc;
314 BEGIN INITIAL;
315 return ID;
e9955c83
AD
316 }
317}
318
319
d8d3f94a
PE
320 /*---------------------------------------------------------------.
321 | Scanning a Yacc comment. The initial `/ *' is already eaten. |
322 `---------------------------------------------------------------*/
e9955c83 323
d8d3f94a 324<SC_YACC_COMMENT>
e9955c83 325{
3f2d73f1 326 "*/" BEGIN context_state;
a706a1cc 327 .|\n ;
aa418041 328 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
d8d3f94a
PE
329}
330
331
332 /*------------------------------------------------------------.
333 | Scanning a C comment. The initial `/ *' is already eaten. |
334 `------------------------------------------------------------*/
335
336<SC_COMMENT>
337{
3f2d73f1 338 "*"{splice}"/" STRING_GROW; BEGIN context_state;
aa418041 339 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
e9955c83
AD
340}
341
342
d8d3f94a
PE
343 /*--------------------------------------------------------------.
344 | Scanning a line comment. The initial `//' is already eaten. |
345 `--------------------------------------------------------------*/
346
347<SC_LINE_COMMENT>
348{
3f2d73f1 349 "\n" STRING_GROW; BEGIN context_state;
41141c56 350 {splice} STRING_GROW;
3f2d73f1 351 <<EOF>> BEGIN context_state;
d8d3f94a
PE
352}
353
354
e9955c83
AD
355 /*----------------------------------------------------------------.
356 | Scanning a C string, including its escapes. The initial `"' is |
357 | already eaten. |
358 `----------------------------------------------------------------*/
359
360<SC_ESCAPED_STRING>
361{
db2cc12f 362 "\"" {
41141c56
PE
363 STRING_GROW;
364 STRING_FINISH;
3f2d73f1 365 loc->start = token_start;
223ff46e 366 val->chars = last_string;
efcb44dd 367 rule_length++;
a706a1cc 368 BEGIN INITIAL;
e9955c83
AD
369 return STRING;
370 }
371
41141c56 372 .|\n STRING_GROW;
aa418041 373 <<EOF>> unexpected_eof (token_start, "\""); BEGIN INITIAL;
e9955c83
AD
374}
375
376 /*---------------------------------------------------------------.
377 | Scanning a C character, decoding its escapes. The initial "'" |
378 | is already eaten. |
379 `---------------------------------------------------------------*/
380
381<SC_ESCAPED_CHARACTER>
382{
db2cc12f 383 "'" {
3b1e470c 384 unsigned char last_string_1;
41141c56
PE
385 STRING_GROW;
386 STRING_FINISH;
3f2d73f1 387 loc->start = token_start;
41141c56
PE
388 val->symbol = symbol_get (last_string, *loc);
389 symbol_class_set (val->symbol, token_sym, *loc);
3b1e470c
PE
390 last_string_1 = last_string[1];
391 symbol_user_token_number_set (val->symbol, last_string_1, *loc);
41141c56 392 STRING_FREE;
a706a1cc
PE
393 rule_length++;
394 BEGIN INITIAL;
395 return ID;
e9955c83 396 }
a706a1cc 397
41141c56 398 .|\n STRING_GROW;
aa418041 399 <<EOF>> unexpected_eof (token_start, "'"); BEGIN INITIAL;
e9955c83
AD
400}
401
402
403 /*----------------------------.
404 | Decode escaped characters. |
405 `----------------------------*/
406
407<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
408{
d8d3f94a
PE
409 \\[0-7]{1,3} {
410 unsigned long c = strtoul (yytext + 1, 0, 8);
411 if (UCHAR_MAX < c)
3f2d73f1 412 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
e9955c83 413 else
223ff46e 414 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
415 }
416
6b0d38ab 417 \\x[0-9abcdefABCDEF]+ {
d8d3f94a 418 unsigned long c;
223ff46e 419 set_errno (0);
d8d3f94a 420 c = strtoul (yytext + 2, 0, 16);
223ff46e 421 if (UCHAR_MAX < c || get_errno ())
3f2d73f1 422 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
d8d3f94a 423 else
223ff46e 424 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
425 }
426
223ff46e
PE
427 \\a obstack_1grow (&obstack_for_string, '\a');
428 \\b obstack_1grow (&obstack_for_string, '\b');
429 \\f obstack_1grow (&obstack_for_string, '\f');
430 \\n obstack_1grow (&obstack_for_string, '\n');
431 \\r obstack_1grow (&obstack_for_string, '\r');
432 \\t obstack_1grow (&obstack_for_string, '\t');
433 \\v obstack_1grow (&obstack_for_string, '\v');
412f8a59
PE
434
435 /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
223ff46e 436 \\("\""|"'"|"?"|"\\") obstack_1grow (&obstack_for_string, yytext[1]);
412f8a59 437
6b0d38ab 438 \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
d8d3f94a
PE
439 int c = convert_ucn_to_byte (yytext);
440 if (c < 0)
3f2d73f1 441 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
d8d3f94a 442 else
223ff46e 443 obstack_1grow (&obstack_for_string, c);
d8d3f94a 444 }
4f25ebb0 445 \\(.|\n) {
3f2d73f1 446 complain_at (*loc, _("unrecognized escape sequence: %s"), quote (yytext));
41141c56 447 STRING_GROW;
e9955c83
AD
448 }
449}
450
451
452 /*----------------------------------------------------------.
453 | Scanning a C character without decoding its escapes. The |
454 | initial "'" is already eaten. |
455 `----------------------------------------------------------*/
456
457<SC_CHARACTER>
458{
3f2d73f1 459 "'" STRING_GROW; BEGIN context_state;
41141c56 460 \\{splice}[^$@\[\]] STRING_GROW;
aa418041 461 <<EOF>> unexpected_eof (token_start, "'"); BEGIN context_state;
e9955c83
AD
462}
463
464
465 /*----------------------------------------------------------------.
466 | Scanning a C string, without decoding its escapes. The initial |
467 | `"' is already eaten. |
468 `----------------------------------------------------------------*/
469
470<SC_STRING>
471{
3f2d73f1 472 "\"" STRING_GROW; BEGIN context_state;
41141c56 473 \\{splice}[^$@\[\]] STRING_GROW;
aa418041
PE
474 <<EOF>> {
475 unexpected_eof (token_start, "\"");
476 BEGIN context_state;
477 }
e9955c83
AD
478}
479
480
481 /*---------------------------------------------------.
482 | Strings, comments etc. can be found in user code. |
483 `---------------------------------------------------*/
484
485<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
486{
3f2d73f1
PE
487 "'" {
488 STRING_GROW;
489 context_state = YY_START;
490 token_start = loc->start;
491 BEGIN SC_CHARACTER;
492 }
493 "\"" {
494 STRING_GROW;
495 context_state = YY_START;
496 token_start = loc->start;
497 BEGIN SC_STRING;
498 }
499 "/"{splice}"*" {
500 STRING_GROW;
501 context_state = YY_START;
502 token_start = loc->start;
503 BEGIN SC_COMMENT;
504 }
505 "/"{splice}"/" {
506 STRING_GROW;
507 context_state = YY_START;
508 BEGIN SC_LINE_COMMENT;
509 }
e9955c83
AD
510}
511
512
624a35e2
PE
513 /*---------------------------------------------------------------.
514 | Scanning after %union etc., possibly followed by white space. |
515 | For %union only, allow arbitrary C code to appear before the |
516 | following brace, as an extension to POSIX. |
517 `---------------------------------------------------------------*/
518
519<SC_PRE_CODE>
520{
521 . {
522 bool valid = yytext[0] == '{' || token_type == PERCENT_UNION;
523 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
524 yyless (0);
525
526 if (valid)
527 {
528 braces_level = -1;
529 code_start = loc->start;
530 BEGIN SC_BRACED_CODE;
531 }
532 else
533 {
534 complain_at (*loc, _("missing `{' in `%s'"),
535 token_name (token_type));
536 obstack_sgrow (&obstack_for_string, "{}");
537 STRING_FINISH;
538 val->chars = last_string;
539 BEGIN INITIAL;
540 return token_type;
541 }
542 }
379f0ac8 543
aa418041 544 <<EOF>> unexpected_eof (scanner_cursor, "{}"); BEGIN INITIAL;
624a35e2
PE
545}
546
547
e9955c83
AD
548 /*---------------------------------------------------------------.
549 | Scanning some code in braces (%union and actions). The initial |
550 | "{" is already eaten. |
551 `---------------------------------------------------------------*/
552
553<SC_BRACED_CODE>
554{
41141c56
PE
555 "{"|"<"{splice}"%" STRING_GROW; braces_level++;
556 "%"{splice}">" STRING_GROW; braces_level--;
e9955c83 557 "}" {
25522739
PE
558 bool outer_brace = --braces_level < 0;
559
560 /* As an undocumented Bison extension, append `;' before the last
561 brace in braced code, so that the user code can omit trailing
562 `;'. But do not append `;' if emulating Yacc, since Yacc does
563 not append one.
564
565 FIXME: Bison should warn if a semicolon seems to be necessary
566 here, and should omit the semicolon if it seems unnecessary
567 (e.g., after ';', '{', or '}', each followed by comments or
568 white space). Such a warning shouldn't depend on --yacc; it
569 should depend on a new --pedantic option, which would cause
570 Bison to warn if it detects an extension to POSIX. --pedantic
571 should also diagnose other Bison extensions like %yacc.
572 Perhaps there should also be a GCC-style --pedantic-errors
573 option, so that such warnings are diagnosed as errors. */
1deb9bdc 574 if (outer_brace && token_type == BRACED_CODE && ! yacc_flag)
25522739
PE
575 obstack_1grow (&obstack_for_string, ';');
576
577 obstack_1grow (&obstack_for_string, '}');
578
579 if (outer_brace)
e9955c83 580 {
41141c56 581 STRING_FINISH;
624a35e2 582 rule_length++;
3f2d73f1 583 loc->start = code_start;
223ff46e 584 val->chars = last_string;
a706a1cc 585 BEGIN INITIAL;
624a35e2 586 return token_type;
e9955c83
AD
587 }
588 }
589
a706a1cc
PE
590 /* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly
591 (as `<' `<%'). */
41141c56 592 "<"{splice}"<" STRING_GROW;
a706a1cc 593
624a35e2
PE
594 "$"("<"{tag}">")?(-?[0-9]+|"$") handle_dollar (token_type, yytext, *loc);
595 "@"(-?[0-9]+|"$") handle_at (token_type, yytext, *loc);
e9955c83 596
aa418041 597 <<EOF>> unexpected_eof (code_start, "}"); BEGIN INITIAL;
e9955c83
AD
598}
599
600
601 /*--------------------------------------------------------------.
602 | Scanning some prologue: from "%{" (already scanned) to "%}". |
603 `--------------------------------------------------------------*/
604
605<SC_PROLOGUE>
606{
607 "%}" {
41141c56 608 STRING_FINISH;
3f2d73f1 609 loc->start = code_start;
223ff46e 610 val->chars = last_string;
a706a1cc 611 BEGIN INITIAL;
e9955c83
AD
612 return PROLOGUE;
613 }
614
aa418041 615 <<EOF>> unexpected_eof (code_start, "%}"); BEGIN INITIAL;
e9955c83
AD
616}
617
618
619 /*---------------------------------------------------------------.
620 | Scanning the epilogue (everything after the second "%%", which |
d8d3f94a 621 | has already been eaten). |
e9955c83
AD
622 `---------------------------------------------------------------*/
623
624<SC_EPILOGUE>
625{
e9955c83 626 <<EOF>> {
41141c56 627 STRING_FINISH;
3f2d73f1 628 loc->start = code_start;
223ff46e 629 val->chars = last_string;
a706a1cc 630 BEGIN INITIAL;
e9955c83
AD
631 return EPILOGUE;
632 }
633}
634
635
a706a1cc
PE
636 /*----------------------------------------------------------------.
637 | By default, grow the string obstack with the input, escaping M4 |
638 | quoting characters. |
639 `----------------------------------------------------------------*/
640
641<SC_COMMENT,SC_LINE_COMMENT,SC_STRING,SC_CHARACTER,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
642{
223ff46e
PE
643 \$ obstack_sgrow (&obstack_for_string, "$][");
644 \@ obstack_sgrow (&obstack_for_string, "@@");
645 \[ obstack_sgrow (&obstack_for_string, "@{");
646 \] obstack_sgrow (&obstack_for_string, "@}");
41141c56 647 .|\n STRING_GROW;
a706a1cc
PE
648}
649
650
e9955c83
AD
651%%
652
cd3684cf
AD
653/* Keeps track of the maximum number of semantic values to the left of
654 a handle (those referenced by $0, $-1, etc.) are required by the
25005f6a
PH
655 semantic actions of this grammar. */
656int max_left_semantic_context = 0;
657
3f2d73f1
PE
658/* Set *LOC and adjust scanner cursor to account for token TOKEN of
659 size SIZE. */
6c30d641
PE
660
661static void
223ff46e 662adjust_location (location *loc, char const *token, size_t size)
6c30d641 663{
3f2d73f1
PE
664 int line = scanner_cursor.line;
665 int column = scanner_cursor.column;
6c30d641
PE
666 char const *p0 = token;
667 char const *p = token;
668 char const *lim = token + size;
669
3f2d73f1
PE
670 loc->start = scanner_cursor;
671
6c30d641
PE
672 for (p = token; p < lim; p++)
673 switch (*p)
674 {
6c30d641
PE
675 case '\n':
676 line++;
677 column = 1;
678 p0 = p + 1;
679 break;
680
681 case '\t':
682 column += mbsnwidth (p0, p - p0, 0);
683 column += 8 - ((column - 1) & 7);
684 p0 = p + 1;
685 break;
686 }
687
3f2d73f1
PE
688 scanner_cursor.line = line;
689 scanner_cursor.column = column + mbsnwidth (p0, p - p0, 0);
690
691 loc->end = scanner_cursor;
6c30d641
PE
692}
693
694
695/* Read bytes from FP into buffer BUF of size SIZE. Return the
696 number of bytes read. Remove '\r' from input, treating \r\n
697 and isolated \r as \n. */
698
699static size_t
700no_cr_read (FILE *fp, char *buf, size_t size)
701{
a737b216
PE
702 size_t bytes_read = fread (buf, 1, size, fp);
703 if (bytes_read)
6c30d641 704 {
a737b216 705 char *w = memchr (buf, '\r', bytes_read);
6c30d641
PE
706 if (w)
707 {
708 char const *r = ++w;
a737b216 709 char const *lim = buf + bytes_read;
6c30d641
PE
710
711 for (;;)
712 {
713 /* Found an '\r'. Treat it like '\n', but ignore any
714 '\n' that immediately follows. */
715 w[-1] = '\n';
716 if (r == lim)
717 {
718 int ch = getc (fp);
719 if (ch != '\n' && ungetc (ch, fp) != ch)
720 break;
721 }
722 else if (*r == '\n')
723 r++;
724
725 /* Copy until the next '\r'. */
726 do
727 {
728 if (r == lim)
729 return w - buf;
730 }
731 while ((*w++ = *r++) != '\r');
732 }
733
734 return w - buf;
735 }
736 }
737
a737b216 738 return bytes_read;
6c30d641
PE
739}
740
741
e9955c83 742/*------------------------------------------------------------------.
366eea36 743| TEXT is pointing to a wannabee semantic value (i.e., a `$'). |
e9955c83
AD
744| |
745| Possible inputs: $[<TYPENAME>]($|integer) |
746| |
223ff46e 747| Output to OBSTACK_FOR_STRING a reference to this semantic value. |
e9955c83
AD
748`------------------------------------------------------------------*/
749
624a35e2 750static inline bool
223ff46e 751handle_action_dollar (char *text, location loc)
e9955c83
AD
752{
753 const char *type_name = NULL;
366eea36 754 char *cp = text + 1;
e9955c83 755
624a35e2
PE
756 if (! current_rule)
757 return false;
758
e9955c83
AD
759 /* Get the type name if explicit. */
760 if (*cp == '<')
761 {
762 type_name = ++cp;
763 while (*cp != '>')
764 ++cp;
765 *cp = '\0';
766 ++cp;
767 }
768
769 if (*cp == '$')
770 {
771 if (!type_name)
223ff46e 772 type_name = symbol_list_n_type_name_get (current_rule, loc, 0);
e9955c83 773 if (!type_name && typed)
223ff46e 774 complain_at (loc, _("$$ of `%s' has no declared type"),
97650f4e 775 current_rule->sym->tag);
e9955c83
AD
776 if (!type_name)
777 type_name = "";
223ff46e 778 obstack_fgrow1 (&obstack_for_string,
e9955c83
AD
779 "]b4_lhs_value([%s])[", type_name);
780 }
d8d3f94a 781 else
e9955c83 782 {
d8d3f94a 783 long num;
223ff46e 784 set_errno (0);
d8d3f94a 785 num = strtol (cp, 0, 10);
e9955c83 786
223ff46e 787 if (INT_MIN <= num && num <= rule_length && ! get_errno ())
e9955c83 788 {
d8d3f94a 789 int n = num;
25005f6a
PH
790 if (1-n > max_left_semantic_context)
791 max_left_semantic_context = 1-n;
e9955c83 792 if (!type_name && n > 0)
223ff46e 793 type_name = symbol_list_n_type_name_get (current_rule, loc, n);
e9955c83 794 if (!type_name && typed)
223ff46e
PE
795 complain_at (loc, _("$%d of `%s' has no declared type"),
796 n, current_rule->sym->tag);
e9955c83
AD
797 if (!type_name)
798 type_name = "";
223ff46e 799 obstack_fgrow3 (&obstack_for_string,
e9955c83
AD
800 "]b4_rhs_value([%d], [%d], [%s])[",
801 rule_length, n, type_name);
802 }
d8d3f94a 803 else
223ff46e 804 complain_at (loc, _("integer out of range: %s"), quote (text));
9280d3ef 805 }
9280d3ef 806
624a35e2 807 return true;
e9955c83
AD
808}
809
f25bfb75 810
cd3684cf
AD
811/*----------------------------------------------------------------.
812| Map `$?' onto the proper M4 symbol, depending on its TOKEN_TYPE |
813| (are we in an action?). |
814`----------------------------------------------------------------*/
e9955c83
AD
815
816static void
624a35e2 817handle_dollar (int token_type, char *text, location loc)
f25bfb75 818{
624a35e2 819 switch (token_type)
f25bfb75 820 {
624a35e2
PE
821 case BRACED_CODE:
822 if (handle_action_dollar (text, loc))
823 return;
f25bfb75
AD
824 break;
825
624a35e2 826 case PERCENT_DESTRUCTOR:
cd3684cf 827 case PERCENT_INITIAL_ACTION:
624a35e2
PE
828 case PERCENT_PRINTER:
829 if (text[1] == '$')
830 {
831 obstack_sgrow (&obstack_for_string, "]b4_dollar_dollar[");
832 return;
833 }
834 break;
835
836 default:
f25bfb75
AD
837 break;
838 }
624a35e2
PE
839
840 complain_at (loc, _("invalid value: %s"), quote (text));
f25bfb75
AD
841}
842
843
844/*------------------------------------------------------.
845| TEXT is a location token (i.e., a `@...'). Output to |
223ff46e 846| OBSTACK_FOR_STRING a reference to this location. |
f25bfb75
AD
847`------------------------------------------------------*/
848
624a35e2 849static inline bool
223ff46e 850handle_action_at (char *text, location loc)
e9955c83 851{
366eea36 852 char *cp = text + 1;
d0829076 853 locations_flag = true;
e9955c83 854
624a35e2
PE
855 if (! current_rule)
856 return false;
857
366eea36 858 if (*cp == '$')
624a35e2 859 obstack_sgrow (&obstack_for_string, "]b4_lhs_location[");
d8d3f94a 860 else
e9955c83 861 {
d8d3f94a 862 long num;
223ff46e 863 set_errno (0);
d8d3f94a 864 num = strtol (cp, 0, 10);
dafdc66f 865
223ff46e 866 if (INT_MIN <= num && num <= rule_length && ! get_errno ())
d8d3f94a
PE
867 {
868 int n = num;
223ff46e 869 obstack_fgrow2 (&obstack_for_string, "]b4_rhs_location([%d], [%d])[",
d8d3f94a
PE
870 rule_length, n);
871 }
e9955c83 872 else
223ff46e 873 complain_at (loc, _("integer out of range: %s"), quote (text));
f25bfb75 874 }
f25bfb75 875
624a35e2 876 return true;
e9955c83 877}
4cdb01db 878
f25bfb75 879
cd3684cf
AD
880/*----------------------------------------------------------------.
881| Map `@?' onto the proper M4 symbol, depending on its TOKEN_TYPE |
882| (are we in an action?). |
883`----------------------------------------------------------------*/
f25bfb75
AD
884
885static void
624a35e2 886handle_at (int token_type, char *text, location loc)
f25bfb75 887{
624a35e2 888 switch (token_type)
f25bfb75 889 {
624a35e2 890 case BRACED_CODE:
223ff46e 891 handle_action_at (text, loc);
624a35e2
PE
892 return;
893
cd3684cf 894 case PERCENT_INITIAL_ACTION:
624a35e2
PE
895 case PERCENT_DESTRUCTOR:
896 case PERCENT_PRINTER:
897 if (text[1] == '$')
898 {
899 obstack_sgrow (&obstack_for_string, "]b4_at_dollar[");
900 return;
901 }
f25bfb75
AD
902 break;
903
624a35e2 904 default:
f25bfb75
AD
905 break;
906 }
624a35e2
PE
907
908 complain_at (loc, _("invalid value: %s"), quote (text));
f25bfb75
AD
909}
910
911
d8d3f94a
PE
912/*------------------------------------------------------------------.
913| Convert universal character name UCN to a single-byte character, |
914| and return that character. Return -1 if UCN does not correspond |
915| to a single-byte character. |
916`------------------------------------------------------------------*/
917
918static int
919convert_ucn_to_byte (char const *ucn)
920{
921 unsigned long code = strtoul (ucn + 2, 0, 16);
922
923 /* FIXME: Currently we assume Unicode-compatible unibyte characters
924 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
925 non-ASCII hosts we support only the portable C character set.
926 These limitations should be removed once we add support for
927 multibyte characters. */
928
929 if (UCHAR_MAX < code)
930 return -1;
931
932#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
933 {
934 /* A non-ASCII host. Use CODE to index into a table of the C
935 basic execution character set, which is guaranteed to exist on
936 all Standard C platforms. This table also includes '$', '@',
8e6ef483 937 and '`', which are not in the basic execution character set but
d8d3f94a
PE
938 which are unibyte characters on all the platforms that we know
939 about. */
940 static signed char const table[] =
941 {
942 '\0', -1, -1, -1, -1, -1, -1, '\a',
943 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
944 -1, -1, -1, -1, -1, -1, -1, -1,
945 -1, -1, -1, -1, -1, -1, -1, -1,
946 ' ', '!', '"', '#', '$', '%', '&', '\'',
947 '(', ')', '*', '+', ',', '-', '.', '/',
948 '0', '1', '2', '3', '4', '5', '6', '7',
949 '8', '9', ':', ';', '<', '=', '>', '?',
950 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
951 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
952 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
953 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
954 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
955 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
956 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
957 'x', 'y', 'z', '{', '|', '}', '~'
958 };
959
960 code = code < sizeof table ? table[code] : -1;
961 }
962#endif
c4d720cd 963
d8d3f94a
PE
964 return code;
965}
966
967
900c5db5
AD
968/*----------------------------------------------------------------.
969| Handle `#line INT "FILE"'. ARGS has already skipped `#line '. |
970`----------------------------------------------------------------*/
971
972static void
3f2d73f1 973handle_syncline (char *args)
900c5db5
AD
974{
975 int lineno = strtol (args, &args, 10);
976 const char *file = NULL;
977 file = strchr (args, '"') + 1;
978 *strchr (file, '"') = 0;
3f2d73f1
PE
979 scanner_cursor.file = current_file = xstrdup (file);
980 scanner_cursor.line = lineno;
981 scanner_cursor.column = 1;
900c5db5
AD
982}
983
a706a1cc 984
3f2d73f1
PE
985/*------------------------------------------------------------------------.
986| Report an unexpected EOF in a token or comment starting at START. |
987| An end of file was encountered and the expected TOKEN_END was missing. |
3f2d73f1 988`------------------------------------------------------------------------*/
a706a1cc
PE
989
990static void
aa418041 991unexpected_eof (boundary start, char const *token_end)
a706a1cc 992{
223ff46e
PE
993 location loc;
994 loc.start = start;
995 loc.end = scanner_cursor;
996 complain_at (loc, _("missing `%s' at end of file"), token_end);
a706a1cc
PE
997}
998
999
f25bfb75
AD
1000/*-------------------------.
1001| Initialize the scanner. |
1002`-------------------------*/
1003
1d6412ad
AD
1004void
1005scanner_initialize (void)
1006{
223ff46e 1007 obstack_init (&obstack_for_string);
1d6412ad
AD
1008}
1009
1010
f25bfb75
AD
1011/*-----------------------------------------------.
1012| Free all the memory allocated to the scanner. |
1013`-----------------------------------------------*/
1014
4cdb01db
AD
1015void
1016scanner_free (void)
1017{
223ff46e 1018 obstack_free (&obstack_for_string, 0);
536545f3
AD
1019 /* Reclaim Flex's buffers. */
1020 yy_delete_buffer (YY_CURRENT_BUFFER);
4cdb01db 1021}