]> git.saurik.com Git - bison.git/blame - src/scan-gram.l
VCG no longer supports long_straight_phase.
[bison.git] / src / scan-gram.l
CommitLineData
e9955c83 1/* Bison Grammar Scanner -*- C -*-
3b1e470c 2
a737b216 3 Copyright (C) 2002, 2003 Free Software Foundation, Inc.
e9955c83
AD
4
5 This file is part of Bison, the GNU Compiler Compiler.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
20 02111-1307 USA
21*/
22
aa418041 23%option debug nodefault nounput noyywrap never-interactive
e9955c83
AD
24%option prefix="gram_" outfile="lex.yy.c"
25
26%{
27#include "system.h"
223ff46e
PE
28
29#include <mbswidth.h>
30#include <get-errno.h>
31#include <quote.h>
32
e9955c83 33#include "complain.h"
3f2d73f1 34#include "files.h"
e9955c83
AD
35#include "getargs.h"
36#include "gram.h"
37#include "reader.h"
223ff46e 38#include "uniqstr.h"
e9955c83 39
3f2d73f1
PE
40#define YY_USER_INIT \
41 do \
42 { \
43 scanner_cursor.file = current_file; \
44 scanner_cursor.line = 1; \
45 scanner_cursor.column = 1; \
379f0ac8 46 code_start = scanner_cursor; \
3f2d73f1
PE
47 } \
48 while (0)
8efe435c 49
3f2d73f1
PE
50/* Location of scanner cursor. */
51boundary scanner_cursor;
41141c56 52
223ff46e 53static void adjust_location (location *, char const *, size_t);
3f2d73f1 54#define YY_USER_ACTION adjust_location (loc, yytext, yyleng);
d8d3f94a 55
6c30d641 56static size_t no_cr_read (FILE *, char *, size_t);
d8d3f94a
PE
57#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
58
59
223ff46e 60/* OBSTACK_FOR_STRING -- Used to store all the characters that we need to
44995b2e
AD
61 keep (to construct ID, STRINGS etc.). Use the following macros to
62 use it.
63
41141c56
PE
64 Use STRING_GROW to append what has just been matched, and
65 STRING_FINISH to end the string (it puts the ending 0).
66 STRING_FINISH also stores this string in LAST_STRING, which can be
67 used, and which is used by STRING_FREE to free the last string. */
44995b2e 68
223ff46e 69static struct obstack obstack_for_string;
44995b2e 70
7ec2d4cd
AD
71/* A string representing the most recently saved token. */
72static char *last_string;
73
74
41141c56 75#define STRING_GROW \
223ff46e 76 obstack_grow (&obstack_for_string, yytext, yyleng)
44995b2e 77
41141c56 78#define STRING_FINISH \
44995b2e 79 do { \
223ff46e
PE
80 obstack_1grow (&obstack_for_string, '\0'); \
81 last_string = obstack_finish (&obstack_for_string); \
44995b2e
AD
82 } while (0)
83
41141c56 84#define STRING_FREE \
223ff46e 85 obstack_free (&obstack_for_string, last_string)
e9955c83 86
7ec2d4cd
AD
87void
88scanner_last_string_free (void)
89{
41141c56 90 STRING_FREE;
7ec2d4cd 91}
e9955c83 92
efcb44dd
PE
93/* Within well-formed rules, RULE_LENGTH is the number of values in
94 the current rule so far, which says where to find `$0' with respect
95 to the top of the stack. It is not the same as the rule->length in
96 the case of mid rule actions.
97
98 Outside of well-formed rules, RULE_LENGTH has an undefined value. */
99static int rule_length;
100
624a35e2
PE
101static void handle_dollar (int token_type, char *cp, location loc);
102static void handle_at (int token_type, char *cp, location loc);
3f2d73f1 103static void handle_syncline (char *args);
d8d3f94a 104static int convert_ucn_to_byte (char const *hex_text);
aa418041 105static void unexpected_eof (boundary, char const *);
e9955c83
AD
106
107%}
d8d3f94a 108%x SC_COMMENT SC_LINE_COMMENT SC_YACC_COMMENT
e9955c83 109%x SC_STRING SC_CHARACTER
3f2d73f1 110%x SC_AFTER_IDENTIFIER
e9955c83 111%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
624a35e2 112%x SC_PRE_CODE SC_BRACED_CODE SC_PROLOGUE SC_EPILOGUE
e9955c83 113
29c01725
AD
114letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
115id {letter}({letter}|[0-9])*
116directive %{letter}({letter}|[0-9]|-)*
624a35e2 117int [0-9]+
d8d3f94a
PE
118
119/* POSIX says that a tag must be both an id and a C union member, but
120 historically almost any character is allowed in a tag. We disallow
121 NUL and newline, as this simplifies our implementation. */
122tag [^\0\n>]+
123
124/* Zero or more instances of backslash-newline. Following GCC, allow
125 white space between the backslash and the newline. */
126splice (\\[ \f\t\v]*\n)*
e9955c83
AD
127
128%%
129%{
a706a1cc 130 /* Nesting level of the current code in braces. */
1a9e39f1
PE
131 int braces_level IF_LINT (= 0);
132
3f2d73f1
PE
133 /* Parent context state, when applicable. */
134 int context_state IF_LINT (= 0);
a706a1cc 135
624a35e2
PE
136 /* Token type to return, when applicable. */
137 int token_type IF_LINT (= 0);
138
3f2d73f1 139 /* Location of most recent identifier, when applicable. */
a2bc9dbc 140 location id_loc IF_LINT (= empty_location);
3f2d73f1 141
a2bc9dbc
PE
142 /* Where containing code started, when applicable. Its initial
143 value is relevant only when yylex is invoked in the SC_EPILOGUE
144 start condition. */
145 boundary code_start = scanner_cursor;
3f2d73f1 146
223ff46e
PE
147 /* Where containing comment or string or character literal started,
148 when applicable. */
a2bc9dbc 149 boundary token_start IF_LINT (= scanner_cursor);
e9955c83
AD
150%}
151
152
3f2d73f1
PE
153 /*-----------------------.
154 | Scanning white space. |
155 `-----------------------*/
156
624a35e2 157<INITIAL,SC_AFTER_IDENTIFIER,SC_PRE_CODE>
3f2d73f1
PE
158{
159 [ \f\n\t\v] ;
83adb046 160 "," warn_at (*loc, _("stray `,' treated as white space"));
3f2d73f1
PE
161
162 /* Comments. */
3f2d73f1 163 "//".* ;
83adb046
PE
164 "/*" {
165 token_start = loc->start;
166 context_state = YY_START;
167 BEGIN SC_YACC_COMMENT;
168 }
3f2d73f1
PE
169
170 /* #line directives are not documented, and may be withdrawn or
171 modified in future versions of Bison. */
172 ^"#line "{int}" \"".*"\"\n" {
173 handle_syncline (yytext + sizeof "#line " - 1);
174 }
175}
176
177
e9955c83
AD
178 /*----------------------------.
179 | Scanning Bison directives. |
180 `----------------------------*/
181<INITIAL>
182{
183 "%binary" return PERCENT_NONASSOC;
184 "%debug" return PERCENT_DEBUG;
39a06c25 185 "%default"[-_]"prec" return PERCENT_DEFAULT_PREC;
e9955c83
AD
186 "%define" return PERCENT_DEFINE;
187 "%defines" return PERCENT_DEFINES;
624a35e2 188 "%destructor" token_type = PERCENT_DESTRUCTOR; BEGIN SC_PRE_CODE;
676385e2 189 "%dprec" return PERCENT_DPREC;
e9955c83
AD
190 "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
191 "%expect" return PERCENT_EXPECT;
192 "%file-prefix" return PERCENT_FILE_PREFIX;
193 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
cd3684cf 194 "%initial-action" token_type = PERCENT_INITIAL_ACTION; BEGIN SC_PRE_CODE;
ae7453f2 195 "%glr-parser" return PERCENT_GLR_PARSER;
e9955c83 196 "%left" return PERCENT_LEFT;
624a35e2 197 "%lex-param" token_type = PERCENT_LEX_PARAM; BEGIN SC_PRE_CODE;
e9955c83 198 "%locations" return PERCENT_LOCATIONS;
676385e2 199 "%merge" return PERCENT_MERGE;
e9955c83
AD
200 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
201 "%no"[-_]"lines" return PERCENT_NO_LINES;
202 "%nonassoc" return PERCENT_NONASSOC;
916708d5 203 "%nondeterministic-parser" return PERCENT_NONDETERMINISTIC_PARSER;
e9955c83
AD
204 "%nterm" return PERCENT_NTERM;
205 "%output" return PERCENT_OUTPUT;
624a35e2 206 "%parse-param" token_type = PERCENT_PARSE_PARAM; BEGIN SC_PRE_CODE;
d8d3f94a 207 "%prec" rule_length--; return PERCENT_PREC;
624a35e2 208 "%printer" token_type = PERCENT_PRINTER; BEGIN SC_PRE_CODE;
e9955c83
AD
209 "%pure"[-_]"parser" return PERCENT_PURE_PARSER;
210 "%right" return PERCENT_RIGHT;
211 "%skeleton" return PERCENT_SKELETON;
212 "%start" return PERCENT_START;
213 "%term" return PERCENT_TOKEN;
214 "%token" return PERCENT_TOKEN;
215 "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
216 "%type" return PERCENT_TYPE;
624a35e2 217 "%union" token_type = PERCENT_UNION; BEGIN SC_PRE_CODE;
e9955c83
AD
218 "%verbose" return PERCENT_VERBOSE;
219 "%yacc" return PERCENT_YACC;
220
3f2d73f1 221 {directive} {
41141c56 222 complain_at (*loc, _("invalid directive: %s"), quote (yytext));
412f8a59 223 }
900c5db5 224
e9955c83 225 "=" return EQUAL;
d8d3f94a 226 "|" rule_length = 0; return PIPE;
e9955c83
AD
227 ";" return SEMICOLON;
228
3f2d73f1 229 {id} {
41141c56 230 val->symbol = symbol_get (yytext, *loc);
3f2d73f1 231 id_loc = *loc;
efcb44dd 232 rule_length++;
3f2d73f1 233 BEGIN SC_AFTER_IDENTIFIER;
e9955c83
AD
234 }
235
d8d3f94a
PE
236 {int} {
237 unsigned long num;
223ff46e 238 set_errno (0);
d8d3f94a 239 num = strtoul (yytext, 0, 10);
223ff46e 240 if (INT_MAX < num || get_errno ())
d8d3f94a 241 {
41141c56 242 complain_at (*loc, _("integer out of range: %s"), quote (yytext));
d8d3f94a
PE
243 num = INT_MAX;
244 }
41141c56 245 val->integer = num;
d8d3f94a
PE
246 return INT;
247 }
e9955c83
AD
248
249 /* Characters. We don't check there is only one. */
3f2d73f1 250 "'" STRING_GROW; token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
e9955c83
AD
251
252 /* Strings. */
3f2d73f1 253 "\"" STRING_GROW; token_start = loc->start; BEGIN SC_ESCAPED_STRING;
e9955c83
AD
254
255 /* Prologue. */
3f2d73f1 256 "%{" code_start = loc->start; BEGIN SC_PROLOGUE;
e9955c83
AD
257
258 /* Code in between braces. */
3f2d73f1
PE
259 "{" {
260 STRING_GROW;
624a35e2 261 token_type = BRACED_CODE;
3f2d73f1
PE
262 braces_level = 0;
263 code_start = loc->start;
264 BEGIN SC_BRACED_CODE;
265 }
e9955c83
AD
266
267 /* A type. */
d8d3f94a 268 "<"{tag}">" {
223ff46e 269 obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
41141c56 270 STRING_FINISH;
223ff46e 271 val->uniqstr = uniqstr_new (last_string);
41141c56 272 STRING_FREE;
4cdb01db
AD
273 return TYPE;
274 }
275
a706a1cc
PE
276 "%%" {
277 static int percent_percent_count;
e9955c83 278 if (++percent_percent_count == 2)
a2bc9dbc 279 BEGIN SC_EPILOGUE;
e9955c83
AD
280 return PERCENT_PERCENT;
281 }
282
a706a1cc 283 . {
41141c56 284 complain_at (*loc, _("invalid character: %s"), quote (yytext));
3f2d73f1 285 }
379f0ac8
PE
286
287 <<EOF>> {
288 loc->start = loc->end = scanner_cursor;
289 yyterminate ();
290 }
3f2d73f1
PE
291}
292
293
294 /*-----------------------------------------------------------------.
295 | Scanning after an identifier, checking whether a colon is next. |
296 `-----------------------------------------------------------------*/
297
298<SC_AFTER_IDENTIFIER>
299{
300 ":" {
301 rule_length = 0;
302 *loc = id_loc;
303 BEGIN INITIAL;
304 return ID_COLON;
305 }
306 . {
307 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
308 yyless (0);
309 *loc = id_loc;
310 BEGIN INITIAL;
311 return ID;
312 }
313 <<EOF>> {
314 *loc = id_loc;
315 BEGIN INITIAL;
316 return ID;
e9955c83
AD
317 }
318}
319
320
d8d3f94a
PE
321 /*---------------------------------------------------------------.
322 | Scanning a Yacc comment. The initial `/ *' is already eaten. |
323 `---------------------------------------------------------------*/
e9955c83 324
d8d3f94a 325<SC_YACC_COMMENT>
e9955c83 326{
3f2d73f1 327 "*/" BEGIN context_state;
a706a1cc 328 .|\n ;
aa418041 329 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
d8d3f94a
PE
330}
331
332
333 /*------------------------------------------------------------.
334 | Scanning a C comment. The initial `/ *' is already eaten. |
335 `------------------------------------------------------------*/
336
337<SC_COMMENT>
338{
3f2d73f1 339 "*"{splice}"/" STRING_GROW; BEGIN context_state;
aa418041 340 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
e9955c83
AD
341}
342
343
d8d3f94a
PE
344 /*--------------------------------------------------------------.
345 | Scanning a line comment. The initial `//' is already eaten. |
346 `--------------------------------------------------------------*/
347
348<SC_LINE_COMMENT>
349{
3f2d73f1 350 "\n" STRING_GROW; BEGIN context_state;
41141c56 351 {splice} STRING_GROW;
3f2d73f1 352 <<EOF>> BEGIN context_state;
d8d3f94a
PE
353}
354
355
e9955c83
AD
356 /*----------------------------------------------------------------.
357 | Scanning a C string, including its escapes. The initial `"' is |
358 | already eaten. |
359 `----------------------------------------------------------------*/
360
361<SC_ESCAPED_STRING>
362{
db2cc12f 363 "\"" {
41141c56
PE
364 STRING_GROW;
365 STRING_FINISH;
3f2d73f1 366 loc->start = token_start;
223ff46e 367 val->chars = last_string;
efcb44dd 368 rule_length++;
a706a1cc 369 BEGIN INITIAL;
e9955c83
AD
370 return STRING;
371 }
372
41141c56 373 .|\n STRING_GROW;
aa418041 374 <<EOF>> unexpected_eof (token_start, "\""); BEGIN INITIAL;
e9955c83
AD
375}
376
377 /*---------------------------------------------------------------.
378 | Scanning a C character, decoding its escapes. The initial "'" |
379 | is already eaten. |
380 `---------------------------------------------------------------*/
381
382<SC_ESCAPED_CHARACTER>
383{
db2cc12f 384 "'" {
3b1e470c 385 unsigned char last_string_1;
41141c56
PE
386 STRING_GROW;
387 STRING_FINISH;
3f2d73f1 388 loc->start = token_start;
41141c56
PE
389 val->symbol = symbol_get (last_string, *loc);
390 symbol_class_set (val->symbol, token_sym, *loc);
3b1e470c
PE
391 last_string_1 = last_string[1];
392 symbol_user_token_number_set (val->symbol, last_string_1, *loc);
41141c56 393 STRING_FREE;
a706a1cc
PE
394 rule_length++;
395 BEGIN INITIAL;
396 return ID;
e9955c83 397 }
a706a1cc 398
41141c56 399 .|\n STRING_GROW;
aa418041 400 <<EOF>> unexpected_eof (token_start, "'"); BEGIN INITIAL;
e9955c83
AD
401}
402
403
404 /*----------------------------.
405 | Decode escaped characters. |
406 `----------------------------*/
407
408<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
409{
d8d3f94a
PE
410 \\[0-7]{1,3} {
411 unsigned long c = strtoul (yytext + 1, 0, 8);
412 if (UCHAR_MAX < c)
3f2d73f1 413 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
e9955c83 414 else
223ff46e 415 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
416 }
417
6b0d38ab 418 \\x[0-9abcdefABCDEF]+ {
d8d3f94a 419 unsigned long c;
223ff46e 420 set_errno (0);
d8d3f94a 421 c = strtoul (yytext + 2, 0, 16);
223ff46e 422 if (UCHAR_MAX < c || get_errno ())
3f2d73f1 423 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
d8d3f94a 424 else
223ff46e 425 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
426 }
427
223ff46e
PE
428 \\a obstack_1grow (&obstack_for_string, '\a');
429 \\b obstack_1grow (&obstack_for_string, '\b');
430 \\f obstack_1grow (&obstack_for_string, '\f');
431 \\n obstack_1grow (&obstack_for_string, '\n');
432 \\r obstack_1grow (&obstack_for_string, '\r');
433 \\t obstack_1grow (&obstack_for_string, '\t');
434 \\v obstack_1grow (&obstack_for_string, '\v');
412f8a59
PE
435
436 /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
223ff46e 437 \\("\""|"'"|"?"|"\\") obstack_1grow (&obstack_for_string, yytext[1]);
412f8a59 438
6b0d38ab 439 \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
d8d3f94a
PE
440 int c = convert_ucn_to_byte (yytext);
441 if (c < 0)
3f2d73f1 442 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
d8d3f94a 443 else
223ff46e 444 obstack_1grow (&obstack_for_string, c);
d8d3f94a 445 }
4f25ebb0 446 \\(.|\n) {
3f2d73f1 447 complain_at (*loc, _("unrecognized escape sequence: %s"), quote (yytext));
41141c56 448 STRING_GROW;
e9955c83
AD
449 }
450}
451
452
453 /*----------------------------------------------------------.
454 | Scanning a C character without decoding its escapes. The |
455 | initial "'" is already eaten. |
456 `----------------------------------------------------------*/
457
458<SC_CHARACTER>
459{
3f2d73f1 460 "'" STRING_GROW; BEGIN context_state;
41141c56 461 \\{splice}[^$@\[\]] STRING_GROW;
aa418041 462 <<EOF>> unexpected_eof (token_start, "'"); BEGIN context_state;
e9955c83
AD
463}
464
465
466 /*----------------------------------------------------------------.
467 | Scanning a C string, without decoding its escapes. The initial |
468 | `"' is already eaten. |
469 `----------------------------------------------------------------*/
470
471<SC_STRING>
472{
3f2d73f1 473 "\"" STRING_GROW; BEGIN context_state;
41141c56 474 \\{splice}[^$@\[\]] STRING_GROW;
aa418041
PE
475 <<EOF>> {
476 unexpected_eof (token_start, "\"");
477 BEGIN context_state;
478 }
e9955c83
AD
479}
480
481
482 /*---------------------------------------------------.
483 | Strings, comments etc. can be found in user code. |
484 `---------------------------------------------------*/
485
486<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
487{
3f2d73f1
PE
488 "'" {
489 STRING_GROW;
490 context_state = YY_START;
491 token_start = loc->start;
492 BEGIN SC_CHARACTER;
493 }
494 "\"" {
495 STRING_GROW;
496 context_state = YY_START;
497 token_start = loc->start;
498 BEGIN SC_STRING;
499 }
500 "/"{splice}"*" {
501 STRING_GROW;
502 context_state = YY_START;
503 token_start = loc->start;
504 BEGIN SC_COMMENT;
505 }
506 "/"{splice}"/" {
507 STRING_GROW;
508 context_state = YY_START;
509 BEGIN SC_LINE_COMMENT;
510 }
e9955c83
AD
511}
512
513
624a35e2
PE
514 /*---------------------------------------------------------------.
515 | Scanning after %union etc., possibly followed by white space. |
516 | For %union only, allow arbitrary C code to appear before the |
517 | following brace, as an extension to POSIX. |
518 `---------------------------------------------------------------*/
519
520<SC_PRE_CODE>
521{
522 . {
523 bool valid = yytext[0] == '{' || token_type == PERCENT_UNION;
524 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
525 yyless (0);
526
527 if (valid)
528 {
529 braces_level = -1;
530 code_start = loc->start;
531 BEGIN SC_BRACED_CODE;
532 }
533 else
534 {
535 complain_at (*loc, _("missing `{' in `%s'"),
536 token_name (token_type));
537 obstack_sgrow (&obstack_for_string, "{}");
538 STRING_FINISH;
539 val->chars = last_string;
540 BEGIN INITIAL;
541 return token_type;
542 }
543 }
379f0ac8 544
aa418041 545 <<EOF>> unexpected_eof (scanner_cursor, "{}"); BEGIN INITIAL;
624a35e2
PE
546}
547
548
e9955c83
AD
549 /*---------------------------------------------------------------.
550 | Scanning some code in braces (%union and actions). The initial |
551 | "{" is already eaten. |
552 `---------------------------------------------------------------*/
553
554<SC_BRACED_CODE>
555{
41141c56
PE
556 "{"|"<"{splice}"%" STRING_GROW; braces_level++;
557 "%"{splice}">" STRING_GROW; braces_level--;
e9955c83 558 "}" {
25522739
PE
559 bool outer_brace = --braces_level < 0;
560
561 /* As an undocumented Bison extension, append `;' before the last
562 brace in braced code, so that the user code can omit trailing
563 `;'. But do not append `;' if emulating Yacc, since Yacc does
564 not append one.
565
566 FIXME: Bison should warn if a semicolon seems to be necessary
567 here, and should omit the semicolon if it seems unnecessary
568 (e.g., after ';', '{', or '}', each followed by comments or
569 white space). Such a warning shouldn't depend on --yacc; it
570 should depend on a new --pedantic option, which would cause
571 Bison to warn if it detects an extension to POSIX. --pedantic
572 should also diagnose other Bison extensions like %yacc.
573 Perhaps there should also be a GCC-style --pedantic-errors
574 option, so that such warnings are diagnosed as errors. */
1deb9bdc 575 if (outer_brace && token_type == BRACED_CODE && ! yacc_flag)
25522739
PE
576 obstack_1grow (&obstack_for_string, ';');
577
578 obstack_1grow (&obstack_for_string, '}');
579
580 if (outer_brace)
e9955c83 581 {
41141c56 582 STRING_FINISH;
624a35e2 583 rule_length++;
3f2d73f1 584 loc->start = code_start;
223ff46e 585 val->chars = last_string;
a706a1cc 586 BEGIN INITIAL;
624a35e2 587 return token_type;
e9955c83
AD
588 }
589 }
590
a706a1cc
PE
591 /* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly
592 (as `<' `<%'). */
41141c56 593 "<"{splice}"<" STRING_GROW;
a706a1cc 594
624a35e2
PE
595 "$"("<"{tag}">")?(-?[0-9]+|"$") handle_dollar (token_type, yytext, *loc);
596 "@"(-?[0-9]+|"$") handle_at (token_type, yytext, *loc);
e9955c83 597
aa418041 598 <<EOF>> unexpected_eof (code_start, "}"); BEGIN INITIAL;
e9955c83
AD
599}
600
601
602 /*--------------------------------------------------------------.
603 | Scanning some prologue: from "%{" (already scanned) to "%}". |
604 `--------------------------------------------------------------*/
605
606<SC_PROLOGUE>
607{
608 "%}" {
41141c56 609 STRING_FINISH;
3f2d73f1 610 loc->start = code_start;
223ff46e 611 val->chars = last_string;
a706a1cc 612 BEGIN INITIAL;
e9955c83
AD
613 return PROLOGUE;
614 }
615
aa418041 616 <<EOF>> unexpected_eof (code_start, "%}"); BEGIN INITIAL;
e9955c83
AD
617}
618
619
620 /*---------------------------------------------------------------.
621 | Scanning the epilogue (everything after the second "%%", which |
d8d3f94a 622 | has already been eaten). |
e9955c83
AD
623 `---------------------------------------------------------------*/
624
625<SC_EPILOGUE>
626{
e9955c83 627 <<EOF>> {
41141c56 628 STRING_FINISH;
3f2d73f1 629 loc->start = code_start;
223ff46e 630 val->chars = last_string;
a706a1cc 631 BEGIN INITIAL;
e9955c83
AD
632 return EPILOGUE;
633 }
634}
635
636
a706a1cc
PE
637 /*----------------------------------------------------------------.
638 | By default, grow the string obstack with the input, escaping M4 |
639 | quoting characters. |
640 `----------------------------------------------------------------*/
641
642<SC_COMMENT,SC_LINE_COMMENT,SC_STRING,SC_CHARACTER,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
643{
223ff46e
PE
644 \$ obstack_sgrow (&obstack_for_string, "$][");
645 \@ obstack_sgrow (&obstack_for_string, "@@");
646 \[ obstack_sgrow (&obstack_for_string, "@{");
647 \] obstack_sgrow (&obstack_for_string, "@}");
41141c56 648 .|\n STRING_GROW;
a706a1cc
PE
649}
650
651
e9955c83
AD
652%%
653
cd3684cf
AD
654/* Keeps track of the maximum number of semantic values to the left of
655 a handle (those referenced by $0, $-1, etc.) are required by the
25005f6a
PH
656 semantic actions of this grammar. */
657int max_left_semantic_context = 0;
658
3f2d73f1
PE
659/* Set *LOC and adjust scanner cursor to account for token TOKEN of
660 size SIZE. */
6c30d641
PE
661
662static void
223ff46e 663adjust_location (location *loc, char const *token, size_t size)
6c30d641 664{
3f2d73f1
PE
665 int line = scanner_cursor.line;
666 int column = scanner_cursor.column;
6c30d641
PE
667 char const *p0 = token;
668 char const *p = token;
669 char const *lim = token + size;
670
3f2d73f1
PE
671 loc->start = scanner_cursor;
672
6c30d641
PE
673 for (p = token; p < lim; p++)
674 switch (*p)
675 {
6c30d641
PE
676 case '\n':
677 line++;
678 column = 1;
679 p0 = p + 1;
680 break;
681
682 case '\t':
683 column += mbsnwidth (p0, p - p0, 0);
684 column += 8 - ((column - 1) & 7);
685 p0 = p + 1;
686 break;
687 }
688
3f2d73f1
PE
689 scanner_cursor.line = line;
690 scanner_cursor.column = column + mbsnwidth (p0, p - p0, 0);
691
692 loc->end = scanner_cursor;
6c30d641
PE
693}
694
695
696/* Read bytes from FP into buffer BUF of size SIZE. Return the
697 number of bytes read. Remove '\r' from input, treating \r\n
698 and isolated \r as \n. */
699
700static size_t
701no_cr_read (FILE *fp, char *buf, size_t size)
702{
a737b216
PE
703 size_t bytes_read = fread (buf, 1, size, fp);
704 if (bytes_read)
6c30d641 705 {
a737b216 706 char *w = memchr (buf, '\r', bytes_read);
6c30d641
PE
707 if (w)
708 {
709 char const *r = ++w;
a737b216 710 char const *lim = buf + bytes_read;
6c30d641
PE
711
712 for (;;)
713 {
714 /* Found an '\r'. Treat it like '\n', but ignore any
715 '\n' that immediately follows. */
716 w[-1] = '\n';
717 if (r == lim)
718 {
719 int ch = getc (fp);
720 if (ch != '\n' && ungetc (ch, fp) != ch)
721 break;
722 }
723 else if (*r == '\n')
724 r++;
725
726 /* Copy until the next '\r'. */
727 do
728 {
729 if (r == lim)
730 return w - buf;
731 }
732 while ((*w++ = *r++) != '\r');
733 }
734
735 return w - buf;
736 }
737 }
738
a737b216 739 return bytes_read;
6c30d641
PE
740}
741
742
e9955c83 743/*------------------------------------------------------------------.
366eea36 744| TEXT is pointing to a wannabee semantic value (i.e., a `$'). |
e9955c83
AD
745| |
746| Possible inputs: $[<TYPENAME>]($|integer) |
747| |
223ff46e 748| Output to OBSTACK_FOR_STRING a reference to this semantic value. |
e9955c83
AD
749`------------------------------------------------------------------*/
750
624a35e2 751static inline bool
223ff46e 752handle_action_dollar (char *text, location loc)
e9955c83
AD
753{
754 const char *type_name = NULL;
366eea36 755 char *cp = text + 1;
e9955c83 756
624a35e2
PE
757 if (! current_rule)
758 return false;
759
e9955c83
AD
760 /* Get the type name if explicit. */
761 if (*cp == '<')
762 {
763 type_name = ++cp;
764 while (*cp != '>')
765 ++cp;
766 *cp = '\0';
767 ++cp;
768 }
769
770 if (*cp == '$')
771 {
772 if (!type_name)
223ff46e 773 type_name = symbol_list_n_type_name_get (current_rule, loc, 0);
e9955c83 774 if (!type_name && typed)
223ff46e 775 complain_at (loc, _("$$ of `%s' has no declared type"),
97650f4e 776 current_rule->sym->tag);
e9955c83
AD
777 if (!type_name)
778 type_name = "";
223ff46e 779 obstack_fgrow1 (&obstack_for_string,
e9955c83
AD
780 "]b4_lhs_value([%s])[", type_name);
781 }
d8d3f94a 782 else
e9955c83 783 {
d8d3f94a 784 long num;
223ff46e 785 set_errno (0);
d8d3f94a 786 num = strtol (cp, 0, 10);
e9955c83 787
223ff46e 788 if (INT_MIN <= num && num <= rule_length && ! get_errno ())
e9955c83 789 {
d8d3f94a 790 int n = num;
25005f6a
PH
791 if (1-n > max_left_semantic_context)
792 max_left_semantic_context = 1-n;
e9955c83 793 if (!type_name && n > 0)
223ff46e 794 type_name = symbol_list_n_type_name_get (current_rule, loc, n);
e9955c83 795 if (!type_name && typed)
223ff46e
PE
796 complain_at (loc, _("$%d of `%s' has no declared type"),
797 n, current_rule->sym->tag);
e9955c83
AD
798 if (!type_name)
799 type_name = "";
223ff46e 800 obstack_fgrow3 (&obstack_for_string,
e9955c83
AD
801 "]b4_rhs_value([%d], [%d], [%s])[",
802 rule_length, n, type_name);
803 }
d8d3f94a 804 else
223ff46e 805 complain_at (loc, _("integer out of range: %s"), quote (text));
9280d3ef 806 }
9280d3ef 807
624a35e2 808 return true;
e9955c83
AD
809}
810
f25bfb75 811
cd3684cf
AD
812/*----------------------------------------------------------------.
813| Map `$?' onto the proper M4 symbol, depending on its TOKEN_TYPE |
814| (are we in an action?). |
815`----------------------------------------------------------------*/
e9955c83
AD
816
817static void
624a35e2 818handle_dollar (int token_type, char *text, location loc)
f25bfb75 819{
624a35e2 820 switch (token_type)
f25bfb75 821 {
624a35e2
PE
822 case BRACED_CODE:
823 if (handle_action_dollar (text, loc))
824 return;
f25bfb75
AD
825 break;
826
624a35e2 827 case PERCENT_DESTRUCTOR:
cd3684cf 828 case PERCENT_INITIAL_ACTION:
624a35e2
PE
829 case PERCENT_PRINTER:
830 if (text[1] == '$')
831 {
832 obstack_sgrow (&obstack_for_string, "]b4_dollar_dollar[");
833 return;
834 }
835 break;
836
837 default:
f25bfb75
AD
838 break;
839 }
624a35e2
PE
840
841 complain_at (loc, _("invalid value: %s"), quote (text));
f25bfb75
AD
842}
843
844
845/*------------------------------------------------------.
846| TEXT is a location token (i.e., a `@...'). Output to |
223ff46e 847| OBSTACK_FOR_STRING a reference to this location. |
f25bfb75
AD
848`------------------------------------------------------*/
849
624a35e2 850static inline bool
223ff46e 851handle_action_at (char *text, location loc)
e9955c83 852{
366eea36 853 char *cp = text + 1;
d0829076 854 locations_flag = true;
e9955c83 855
624a35e2
PE
856 if (! current_rule)
857 return false;
858
366eea36 859 if (*cp == '$')
624a35e2 860 obstack_sgrow (&obstack_for_string, "]b4_lhs_location[");
d8d3f94a 861 else
e9955c83 862 {
d8d3f94a 863 long num;
223ff46e 864 set_errno (0);
d8d3f94a 865 num = strtol (cp, 0, 10);
dafdc66f 866
223ff46e 867 if (INT_MIN <= num && num <= rule_length && ! get_errno ())
d8d3f94a
PE
868 {
869 int n = num;
223ff46e 870 obstack_fgrow2 (&obstack_for_string, "]b4_rhs_location([%d], [%d])[",
d8d3f94a
PE
871 rule_length, n);
872 }
e9955c83 873 else
223ff46e 874 complain_at (loc, _("integer out of range: %s"), quote (text));
f25bfb75 875 }
f25bfb75 876
624a35e2 877 return true;
e9955c83 878}
4cdb01db 879
f25bfb75 880
cd3684cf
AD
881/*----------------------------------------------------------------.
882| Map `@?' onto the proper M4 symbol, depending on its TOKEN_TYPE |
883| (are we in an action?). |
884`----------------------------------------------------------------*/
f25bfb75
AD
885
886static void
624a35e2 887handle_at (int token_type, char *text, location loc)
f25bfb75 888{
624a35e2 889 switch (token_type)
f25bfb75 890 {
624a35e2 891 case BRACED_CODE:
223ff46e 892 handle_action_at (text, loc);
624a35e2
PE
893 return;
894
cd3684cf 895 case PERCENT_INITIAL_ACTION:
624a35e2
PE
896 case PERCENT_DESTRUCTOR:
897 case PERCENT_PRINTER:
898 if (text[1] == '$')
899 {
900 obstack_sgrow (&obstack_for_string, "]b4_at_dollar[");
901 return;
902 }
f25bfb75
AD
903 break;
904
624a35e2 905 default:
f25bfb75
AD
906 break;
907 }
624a35e2
PE
908
909 complain_at (loc, _("invalid value: %s"), quote (text));
f25bfb75
AD
910}
911
912
d8d3f94a
PE
913/*------------------------------------------------------------------.
914| Convert universal character name UCN to a single-byte character, |
915| and return that character. Return -1 if UCN does not correspond |
916| to a single-byte character. |
917`------------------------------------------------------------------*/
918
919static int
920convert_ucn_to_byte (char const *ucn)
921{
922 unsigned long code = strtoul (ucn + 2, 0, 16);
923
924 /* FIXME: Currently we assume Unicode-compatible unibyte characters
925 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
926 non-ASCII hosts we support only the portable C character set.
927 These limitations should be removed once we add support for
928 multibyte characters. */
929
930 if (UCHAR_MAX < code)
931 return -1;
932
933#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
934 {
935 /* A non-ASCII host. Use CODE to index into a table of the C
936 basic execution character set, which is guaranteed to exist on
937 all Standard C platforms. This table also includes '$', '@',
8e6ef483 938 and '`', which are not in the basic execution character set but
d8d3f94a
PE
939 which are unibyte characters on all the platforms that we know
940 about. */
941 static signed char const table[] =
942 {
943 '\0', -1, -1, -1, -1, -1, -1, '\a',
944 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
945 -1, -1, -1, -1, -1, -1, -1, -1,
946 -1, -1, -1, -1, -1, -1, -1, -1,
947 ' ', '!', '"', '#', '$', '%', '&', '\'',
948 '(', ')', '*', '+', ',', '-', '.', '/',
949 '0', '1', '2', '3', '4', '5', '6', '7',
950 '8', '9', ':', ';', '<', '=', '>', '?',
951 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
952 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
953 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
954 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
955 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
956 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
957 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
958 'x', 'y', 'z', '{', '|', '}', '~'
959 };
960
961 code = code < sizeof table ? table[code] : -1;
962 }
963#endif
c4d720cd 964
d8d3f94a
PE
965 return code;
966}
967
968
900c5db5
AD
969/*----------------------------------------------------------------.
970| Handle `#line INT "FILE"'. ARGS has already skipped `#line '. |
971`----------------------------------------------------------------*/
972
973static void
3f2d73f1 974handle_syncline (char *args)
900c5db5
AD
975{
976 int lineno = strtol (args, &args, 10);
977 const char *file = NULL;
978 file = strchr (args, '"') + 1;
979 *strchr (file, '"') = 0;
3f2d73f1
PE
980 scanner_cursor.file = current_file = xstrdup (file);
981 scanner_cursor.line = lineno;
982 scanner_cursor.column = 1;
900c5db5
AD
983}
984
a706a1cc 985
3f2d73f1
PE
986/*------------------------------------------------------------------------.
987| Report an unexpected EOF in a token or comment starting at START. |
988| An end of file was encountered and the expected TOKEN_END was missing. |
3f2d73f1 989`------------------------------------------------------------------------*/
a706a1cc
PE
990
991static void
aa418041 992unexpected_eof (boundary start, char const *token_end)
a706a1cc 993{
223ff46e
PE
994 location loc;
995 loc.start = start;
996 loc.end = scanner_cursor;
997 complain_at (loc, _("missing `%s' at end of file"), token_end);
a706a1cc
PE
998}
999
1000
f25bfb75
AD
1001/*-------------------------.
1002| Initialize the scanner. |
1003`-------------------------*/
1004
1d6412ad
AD
1005void
1006scanner_initialize (void)
1007{
223ff46e 1008 obstack_init (&obstack_for_string);
1d6412ad
AD
1009}
1010
1011
f25bfb75
AD
1012/*-----------------------------------------------.
1013| Free all the memory allocated to the scanner. |
1014`-----------------------------------------------*/
1015
4cdb01db
AD
1016void
1017scanner_free (void)
1018{
223ff46e 1019 obstack_free (&obstack_for_string, 0);
536545f3
AD
1020 /* Reclaim Flex's buffers. */
1021 yy_delete_buffer (YY_CURRENT_BUFFER);
4cdb01db 1022}