]> git.saurik.com Git - bison.git/blame - src/scan-gram.l
Do not use @output_header_name@ unless
[bison.git] / src / scan-gram.l
CommitLineData
e9955c83 1/* Bison Grammar Scanner -*- C -*-
3b1e470c 2
e9955c83
AD
3 Copyright (C) 2002 Free Software Foundation, Inc.
4
5 This file is part of Bison, the GNU Compiler Compiler.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
20 02111-1307 USA
21*/
22
a706a1cc 23%option debug nodefault noyywrap never-interactive
e9955c83
AD
24%option prefix="gram_" outfile="lex.yy.c"
25
26%{
27#include "system.h"
223ff46e
PE
28
29#include <mbswidth.h>
30#include <get-errno.h>
31#include <quote.h>
32
e9955c83 33#include "complain.h"
3f2d73f1 34#include "files.h"
e9955c83
AD
35#include "getargs.h"
36#include "gram.h"
37#include "reader.h"
223ff46e 38#include "uniqstr.h"
e9955c83 39
3f2d73f1
PE
40#define YY_USER_INIT \
41 do \
42 { \
43 scanner_cursor.file = current_file; \
44 scanner_cursor.line = 1; \
45 scanner_cursor.column = 1; \
46 } \
47 while (0)
8efe435c 48
3f2d73f1
PE
49/* Location of scanner cursor. */
50boundary scanner_cursor;
41141c56 51
223ff46e 52static void adjust_location (location *, char const *, size_t);
3f2d73f1 53#define YY_USER_ACTION adjust_location (loc, yytext, yyleng);
d8d3f94a 54
6c30d641 55static size_t no_cr_read (FILE *, char *, size_t);
d8d3f94a
PE
56#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
57
58
223ff46e 59/* OBSTACK_FOR_STRING -- Used to store all the characters that we need to
44995b2e
AD
60 keep (to construct ID, STRINGS etc.). Use the following macros to
61 use it.
62
41141c56
PE
63 Use STRING_GROW to append what has just been matched, and
64 STRING_FINISH to end the string (it puts the ending 0).
65 STRING_FINISH also stores this string in LAST_STRING, which can be
66 used, and which is used by STRING_FREE to free the last string. */
44995b2e 67
223ff46e 68static struct obstack obstack_for_string;
44995b2e 69
7ec2d4cd
AD
70/* A string representing the most recently saved token. */
71static char *last_string;
72
73
41141c56 74#define STRING_GROW \
223ff46e 75 obstack_grow (&obstack_for_string, yytext, yyleng)
44995b2e 76
41141c56 77#define STRING_FINISH \
44995b2e 78 do { \
223ff46e
PE
79 obstack_1grow (&obstack_for_string, '\0'); \
80 last_string = obstack_finish (&obstack_for_string); \
44995b2e
AD
81 } while (0)
82
41141c56 83#define STRING_FREE \
223ff46e 84 obstack_free (&obstack_for_string, last_string)
e9955c83 85
7ec2d4cd
AD
86void
87scanner_last_string_free (void)
88{
41141c56 89 STRING_FREE;
7ec2d4cd 90}
e9955c83 91
efcb44dd
PE
92/* Within well-formed rules, RULE_LENGTH is the number of values in
93 the current rule so far, which says where to find `$0' with respect
94 to the top of the stack. It is not the same as the rule->length in
95 the case of mid rule actions.
96
97 Outside of well-formed rules, RULE_LENGTH has an undefined value. */
98static int rule_length;
99
624a35e2
PE
100static void handle_dollar (int token_type, char *cp, location loc);
101static void handle_at (int token_type, char *cp, location loc);
3f2d73f1 102static void handle_syncline (char *args);
d8d3f94a 103static int convert_ucn_to_byte (char const *hex_text);
3f2d73f1 104static void unexpected_end_of_file (boundary, char const *);
e9955c83
AD
105
106%}
d8d3f94a 107%x SC_COMMENT SC_LINE_COMMENT SC_YACC_COMMENT
e9955c83 108%x SC_STRING SC_CHARACTER
3f2d73f1 109%x SC_AFTER_IDENTIFIER
e9955c83 110%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
624a35e2 111%x SC_PRE_CODE SC_BRACED_CODE SC_PROLOGUE SC_EPILOGUE
e9955c83 112
29c01725
AD
113letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
114id {letter}({letter}|[0-9])*
115directive %{letter}({letter}|[0-9]|-)*
624a35e2 116int [0-9]+
d8d3f94a
PE
117
118/* POSIX says that a tag must be both an id and a C union member, but
119 historically almost any character is allowed in a tag. We disallow
120 NUL and newline, as this simplifies our implementation. */
121tag [^\0\n>]+
122
123/* Zero or more instances of backslash-newline. Following GCC, allow
124 white space between the backslash and the newline. */
125splice (\\[ \f\t\v]*\n)*
e9955c83
AD
126
127%%
128%{
a706a1cc 129 /* Nesting level of the current code in braces. */
1a9e39f1
PE
130 int braces_level IF_LINT (= 0);
131
3f2d73f1
PE
132 /* Parent context state, when applicable. */
133 int context_state IF_LINT (= 0);
a706a1cc 134
624a35e2
PE
135 /* Token type to return, when applicable. */
136 int token_type IF_LINT (= 0);
137
3f2d73f1 138 /* Location of most recent identifier, when applicable. */
223ff46e 139 location id_loc IF_LINT (= *loc);
3f2d73f1 140
223ff46e 141 /* Where containing code started, when applicable. */
3f2d73f1
PE
142 boundary code_start IF_LINT (= loc->start);
143
223ff46e
PE
144 /* Where containing comment or string or character literal started,
145 when applicable. */
3f2d73f1 146 boundary token_start IF_LINT (= loc->start);
e9955c83
AD
147%}
148
149
3f2d73f1
PE
150 /*-----------------------.
151 | Scanning white space. |
152 `-----------------------*/
153
624a35e2 154<INITIAL,SC_AFTER_IDENTIFIER,SC_PRE_CODE>
3f2d73f1
PE
155{
156 [ \f\n\t\v] ;
83adb046 157 "," warn_at (*loc, _("stray `,' treated as white space"));
3f2d73f1
PE
158
159 /* Comments. */
3f2d73f1 160 "//".* ;
83adb046
PE
161 "/*" {
162 token_start = loc->start;
163 context_state = YY_START;
164 BEGIN SC_YACC_COMMENT;
165 }
3f2d73f1
PE
166
167 /* #line directives are not documented, and may be withdrawn or
168 modified in future versions of Bison. */
169 ^"#line "{int}" \"".*"\"\n" {
170 handle_syncline (yytext + sizeof "#line " - 1);
171 }
172}
173
174
e9955c83
AD
175 /*----------------------------.
176 | Scanning Bison directives. |
177 `----------------------------*/
178<INITIAL>
179{
180 "%binary" return PERCENT_NONASSOC;
181 "%debug" return PERCENT_DEBUG;
182 "%define" return PERCENT_DEFINE;
183 "%defines" return PERCENT_DEFINES;
624a35e2 184 "%destructor" token_type = PERCENT_DESTRUCTOR; BEGIN SC_PRE_CODE;
676385e2 185 "%dprec" return PERCENT_DPREC;
e9955c83
AD
186 "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
187 "%expect" return PERCENT_EXPECT;
188 "%file-prefix" return PERCENT_FILE_PREFIX;
189 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
ae7453f2 190 "%glr-parser" return PERCENT_GLR_PARSER;
e9955c83 191 "%left" return PERCENT_LEFT;
624a35e2 192 "%lex-param" token_type = PERCENT_LEX_PARAM; BEGIN SC_PRE_CODE;
e9955c83 193 "%locations" return PERCENT_LOCATIONS;
676385e2 194 "%merge" return PERCENT_MERGE;
e9955c83
AD
195 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
196 "%no"[-_]"lines" return PERCENT_NO_LINES;
197 "%nonassoc" return PERCENT_NONASSOC;
198 "%nterm" return PERCENT_NTERM;
199 "%output" return PERCENT_OUTPUT;
624a35e2 200 "%parse-param" token_type = PERCENT_PARSE_PARAM; BEGIN SC_PRE_CODE;
d8d3f94a 201 "%prec" rule_length--; return PERCENT_PREC;
624a35e2 202 "%printer" token_type = PERCENT_PRINTER; BEGIN SC_PRE_CODE;
e9955c83
AD
203 "%pure"[-_]"parser" return PERCENT_PURE_PARSER;
204 "%right" return PERCENT_RIGHT;
205 "%skeleton" return PERCENT_SKELETON;
206 "%start" return PERCENT_START;
207 "%term" return PERCENT_TOKEN;
208 "%token" return PERCENT_TOKEN;
209 "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
210 "%type" return PERCENT_TYPE;
624a35e2 211 "%union" token_type = PERCENT_UNION; BEGIN SC_PRE_CODE;
e9955c83
AD
212 "%verbose" return PERCENT_VERBOSE;
213 "%yacc" return PERCENT_YACC;
214
3f2d73f1 215 {directive} {
41141c56 216 complain_at (*loc, _("invalid directive: %s"), quote (yytext));
412f8a59 217 }
900c5db5 218
e9955c83 219 "=" return EQUAL;
d8d3f94a 220 "|" rule_length = 0; return PIPE;
e9955c83
AD
221 ";" return SEMICOLON;
222
3f2d73f1 223 {id} {
41141c56 224 val->symbol = symbol_get (yytext, *loc);
3f2d73f1 225 id_loc = *loc;
efcb44dd 226 rule_length++;
3f2d73f1 227 BEGIN SC_AFTER_IDENTIFIER;
e9955c83
AD
228 }
229
d8d3f94a
PE
230 {int} {
231 unsigned long num;
223ff46e 232 set_errno (0);
d8d3f94a 233 num = strtoul (yytext, 0, 10);
223ff46e 234 if (INT_MAX < num || get_errno ())
d8d3f94a 235 {
41141c56 236 complain_at (*loc, _("integer out of range: %s"), quote (yytext));
d8d3f94a
PE
237 num = INT_MAX;
238 }
41141c56 239 val->integer = num;
d8d3f94a
PE
240 return INT;
241 }
e9955c83
AD
242
243 /* Characters. We don't check there is only one. */
3f2d73f1 244 "'" STRING_GROW; token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
e9955c83
AD
245
246 /* Strings. */
3f2d73f1 247 "\"" STRING_GROW; token_start = loc->start; BEGIN SC_ESCAPED_STRING;
e9955c83
AD
248
249 /* Prologue. */
3f2d73f1 250 "%{" code_start = loc->start; BEGIN SC_PROLOGUE;
e9955c83
AD
251
252 /* Code in between braces. */
3f2d73f1
PE
253 "{" {
254 STRING_GROW;
624a35e2 255 token_type = BRACED_CODE;
3f2d73f1
PE
256 braces_level = 0;
257 code_start = loc->start;
258 BEGIN SC_BRACED_CODE;
259 }
e9955c83
AD
260
261 /* A type. */
d8d3f94a 262 "<"{tag}">" {
223ff46e 263 obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
41141c56 264 STRING_FINISH;
223ff46e 265 val->uniqstr = uniqstr_new (last_string);
41141c56 266 STRING_FREE;
4cdb01db
AD
267 return TYPE;
268 }
269
a706a1cc
PE
270 "%%" {
271 static int percent_percent_count;
e9955c83 272 if (++percent_percent_count == 2)
3f2d73f1
PE
273 {
274 code_start = loc->start;
275 BEGIN SC_EPILOGUE;
276 }
e9955c83
AD
277 return PERCENT_PERCENT;
278 }
279
a706a1cc 280 . {
41141c56 281 complain_at (*loc, _("invalid character: %s"), quote (yytext));
3f2d73f1
PE
282 }
283}
284
285
286 /*-----------------------------------------------------------------.
287 | Scanning after an identifier, checking whether a colon is next. |
288 `-----------------------------------------------------------------*/
289
290<SC_AFTER_IDENTIFIER>
291{
292 ":" {
293 rule_length = 0;
294 *loc = id_loc;
295 BEGIN INITIAL;
296 return ID_COLON;
297 }
298 . {
299 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
300 yyless (0);
301 *loc = id_loc;
302 BEGIN INITIAL;
303 return ID;
304 }
305 <<EOF>> {
306 *loc = id_loc;
307 BEGIN INITIAL;
308 return ID;
e9955c83
AD
309 }
310}
311
312
d8d3f94a
PE
313 /*---------------------------------------------------------------.
314 | Scanning a Yacc comment. The initial `/ *' is already eaten. |
315 `---------------------------------------------------------------*/
e9955c83 316
d8d3f94a 317<SC_YACC_COMMENT>
e9955c83 318{
3f2d73f1 319 "*/" BEGIN context_state;
a706a1cc 320 .|\n ;
3f2d73f1 321 <<EOF>> unexpected_end_of_file (token_start, "*/");
d8d3f94a
PE
322}
323
324
325 /*------------------------------------------------------------.
326 | Scanning a C comment. The initial `/ *' is already eaten. |
327 `------------------------------------------------------------*/
328
329<SC_COMMENT>
330{
3f2d73f1
PE
331 "*"{splice}"/" STRING_GROW; BEGIN context_state;
332 <<EOF>> unexpected_end_of_file (token_start, "*/");
e9955c83
AD
333}
334
335
d8d3f94a
PE
336 /*--------------------------------------------------------------.
337 | Scanning a line comment. The initial `//' is already eaten. |
338 `--------------------------------------------------------------*/
339
340<SC_LINE_COMMENT>
341{
3f2d73f1 342 "\n" STRING_GROW; BEGIN context_state;
41141c56 343 {splice} STRING_GROW;
3f2d73f1 344 <<EOF>> BEGIN context_state;
d8d3f94a
PE
345}
346
347
e9955c83
AD
348 /*----------------------------------------------------------------.
349 | Scanning a C string, including its escapes. The initial `"' is |
350 | already eaten. |
351 `----------------------------------------------------------------*/
352
353<SC_ESCAPED_STRING>
354{
db2cc12f 355 "\"" {
41141c56
PE
356 STRING_GROW;
357 STRING_FINISH;
3f2d73f1 358 loc->start = token_start;
223ff46e 359 val->chars = last_string;
efcb44dd 360 rule_length++;
a706a1cc 361 BEGIN INITIAL;
e9955c83
AD
362 return STRING;
363 }
364
41141c56 365 .|\n STRING_GROW;
3f2d73f1 366 <<EOF>> unexpected_end_of_file (token_start, "\"");
e9955c83
AD
367}
368
369 /*---------------------------------------------------------------.
370 | Scanning a C character, decoding its escapes. The initial "'" |
371 | is already eaten. |
372 `---------------------------------------------------------------*/
373
374<SC_ESCAPED_CHARACTER>
375{
db2cc12f 376 "'" {
3b1e470c 377 unsigned char last_string_1;
41141c56
PE
378 STRING_GROW;
379 STRING_FINISH;
3f2d73f1 380 loc->start = token_start;
41141c56
PE
381 val->symbol = symbol_get (last_string, *loc);
382 symbol_class_set (val->symbol, token_sym, *loc);
3b1e470c
PE
383 last_string_1 = last_string[1];
384 symbol_user_token_number_set (val->symbol, last_string_1, *loc);
41141c56 385 STRING_FREE;
a706a1cc
PE
386 rule_length++;
387 BEGIN INITIAL;
388 return ID;
e9955c83 389 }
a706a1cc 390
41141c56 391 .|\n STRING_GROW;
3f2d73f1 392 <<EOF>> unexpected_end_of_file (token_start, "'");
e9955c83
AD
393}
394
395
396 /*----------------------------.
397 | Decode escaped characters. |
398 `----------------------------*/
399
400<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
401{
d8d3f94a
PE
402 \\[0-7]{1,3} {
403 unsigned long c = strtoul (yytext + 1, 0, 8);
404 if (UCHAR_MAX < c)
3f2d73f1 405 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
e9955c83 406 else
223ff46e 407 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
408 }
409
6b0d38ab 410 \\x[0-9abcdefABCDEF]+ {
d8d3f94a 411 unsigned long c;
223ff46e 412 set_errno (0);
d8d3f94a 413 c = strtoul (yytext + 2, 0, 16);
223ff46e 414 if (UCHAR_MAX < c || get_errno ())
3f2d73f1 415 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
d8d3f94a 416 else
223ff46e 417 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
418 }
419
223ff46e
PE
420 \\a obstack_1grow (&obstack_for_string, '\a');
421 \\b obstack_1grow (&obstack_for_string, '\b');
422 \\f obstack_1grow (&obstack_for_string, '\f');
423 \\n obstack_1grow (&obstack_for_string, '\n');
424 \\r obstack_1grow (&obstack_for_string, '\r');
425 \\t obstack_1grow (&obstack_for_string, '\t');
426 \\v obstack_1grow (&obstack_for_string, '\v');
412f8a59
PE
427
428 /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
223ff46e 429 \\("\""|"'"|"?"|"\\") obstack_1grow (&obstack_for_string, yytext[1]);
412f8a59 430
6b0d38ab 431 \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
d8d3f94a
PE
432 int c = convert_ucn_to_byte (yytext);
433 if (c < 0)
3f2d73f1 434 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
d8d3f94a 435 else
223ff46e 436 obstack_1grow (&obstack_for_string, c);
d8d3f94a 437 }
4f25ebb0 438 \\(.|\n) {
3f2d73f1 439 complain_at (*loc, _("unrecognized escape sequence: %s"), quote (yytext));
41141c56 440 STRING_GROW;
e9955c83
AD
441 }
442}
443
444
445 /*----------------------------------------------------------.
446 | Scanning a C character without decoding its escapes. The |
447 | initial "'" is already eaten. |
448 `----------------------------------------------------------*/
449
450<SC_CHARACTER>
451{
3f2d73f1 452 "'" STRING_GROW; BEGIN context_state;
41141c56 453 \\{splice}[^$@\[\]] STRING_GROW;
3f2d73f1 454 <<EOF>> unexpected_end_of_file (token_start, "'");
e9955c83
AD
455}
456
457
458 /*----------------------------------------------------------------.
459 | Scanning a C string, without decoding its escapes. The initial |
460 | `"' is already eaten. |
461 `----------------------------------------------------------------*/
462
463<SC_STRING>
464{
3f2d73f1 465 "\"" STRING_GROW; BEGIN context_state;
41141c56 466 \\{splice}[^$@\[\]] STRING_GROW;
3f2d73f1 467 <<EOF>> unexpected_end_of_file (token_start, "\"");
e9955c83
AD
468}
469
470
471 /*---------------------------------------------------.
472 | Strings, comments etc. can be found in user code. |
473 `---------------------------------------------------*/
474
475<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
476{
3f2d73f1
PE
477 "'" {
478 STRING_GROW;
479 context_state = YY_START;
480 token_start = loc->start;
481 BEGIN SC_CHARACTER;
482 }
483 "\"" {
484 STRING_GROW;
485 context_state = YY_START;
486 token_start = loc->start;
487 BEGIN SC_STRING;
488 }
489 "/"{splice}"*" {
490 STRING_GROW;
491 context_state = YY_START;
492 token_start = loc->start;
493 BEGIN SC_COMMENT;
494 }
495 "/"{splice}"/" {
496 STRING_GROW;
497 context_state = YY_START;
498 BEGIN SC_LINE_COMMENT;
499 }
e9955c83
AD
500}
501
502
624a35e2
PE
503 /*---------------------------------------------------------------.
504 | Scanning after %union etc., possibly followed by white space. |
505 | For %union only, allow arbitrary C code to appear before the |
506 | following brace, as an extension to POSIX. |
507 `---------------------------------------------------------------*/
508
509<SC_PRE_CODE>
510{
511 . {
512 bool valid = yytext[0] == '{' || token_type == PERCENT_UNION;
513 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
514 yyless (0);
515
516 if (valid)
517 {
518 braces_level = -1;
519 code_start = loc->start;
520 BEGIN SC_BRACED_CODE;
521 }
522 else
523 {
524 complain_at (*loc, _("missing `{' in `%s'"),
525 token_name (token_type));
526 obstack_sgrow (&obstack_for_string, "{}");
527 STRING_FINISH;
528 val->chars = last_string;
529 BEGIN INITIAL;
530 return token_type;
531 }
532 }
533}
534
535
e9955c83
AD
536 /*---------------------------------------------------------------.
537 | Scanning some code in braces (%union and actions). The initial |
538 | "{" is already eaten. |
539 `---------------------------------------------------------------*/
540
541<SC_BRACED_CODE>
542{
41141c56
PE
543 "{"|"<"{splice}"%" STRING_GROW; braces_level++;
544 "%"{splice}">" STRING_GROW; braces_level--;
e9955c83 545 "}" {
25522739
PE
546 bool outer_brace = --braces_level < 0;
547
548 /* As an undocumented Bison extension, append `;' before the last
549 brace in braced code, so that the user code can omit trailing
550 `;'. But do not append `;' if emulating Yacc, since Yacc does
551 not append one.
552
553 FIXME: Bison should warn if a semicolon seems to be necessary
554 here, and should omit the semicolon if it seems unnecessary
555 (e.g., after ';', '{', or '}', each followed by comments or
556 white space). Such a warning shouldn't depend on --yacc; it
557 should depend on a new --pedantic option, which would cause
558 Bison to warn if it detects an extension to POSIX. --pedantic
559 should also diagnose other Bison extensions like %yacc.
560 Perhaps there should also be a GCC-style --pedantic-errors
561 option, so that such warnings are diagnosed as errors. */
1deb9bdc 562 if (outer_brace && token_type == BRACED_CODE && ! yacc_flag)
25522739
PE
563 obstack_1grow (&obstack_for_string, ';');
564
565 obstack_1grow (&obstack_for_string, '}');
566
567 if (outer_brace)
e9955c83 568 {
41141c56 569 STRING_FINISH;
624a35e2 570 rule_length++;
3f2d73f1 571 loc->start = code_start;
223ff46e 572 val->chars = last_string;
a706a1cc 573 BEGIN INITIAL;
624a35e2 574 return token_type;
e9955c83
AD
575 }
576 }
577
a706a1cc
PE
578 /* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly
579 (as `<' `<%'). */
41141c56 580 "<"{splice}"<" STRING_GROW;
a706a1cc 581
624a35e2
PE
582 "$"("<"{tag}">")?(-?[0-9]+|"$") handle_dollar (token_type, yytext, *loc);
583 "@"(-?[0-9]+|"$") handle_at (token_type, yytext, *loc);
e9955c83 584
3f2d73f1 585 <<EOF>> unexpected_end_of_file (code_start, "}");
e9955c83
AD
586}
587
588
589 /*--------------------------------------------------------------.
590 | Scanning some prologue: from "%{" (already scanned) to "%}". |
591 `--------------------------------------------------------------*/
592
593<SC_PROLOGUE>
594{
595 "%}" {
41141c56 596 STRING_FINISH;
3f2d73f1 597 loc->start = code_start;
223ff46e 598 val->chars = last_string;
a706a1cc 599 BEGIN INITIAL;
e9955c83
AD
600 return PROLOGUE;
601 }
602
3f2d73f1 603 <<EOF>> unexpected_end_of_file (code_start, "%}");
e9955c83
AD
604}
605
606
607 /*---------------------------------------------------------------.
608 | Scanning the epilogue (everything after the second "%%", which |
d8d3f94a 609 | has already been eaten). |
e9955c83
AD
610 `---------------------------------------------------------------*/
611
612<SC_EPILOGUE>
613{
e9955c83 614 <<EOF>> {
41141c56 615 STRING_FINISH;
3f2d73f1 616 loc->start = code_start;
223ff46e 617 val->chars = last_string;
a706a1cc 618 BEGIN INITIAL;
e9955c83
AD
619 return EPILOGUE;
620 }
621}
622
623
a706a1cc
PE
624 /*----------------------------------------------------------------.
625 | By default, grow the string obstack with the input, escaping M4 |
626 | quoting characters. |
627 `----------------------------------------------------------------*/
628
629<SC_COMMENT,SC_LINE_COMMENT,SC_STRING,SC_CHARACTER,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
630{
223ff46e
PE
631 \$ obstack_sgrow (&obstack_for_string, "$][");
632 \@ obstack_sgrow (&obstack_for_string, "@@");
633 \[ obstack_sgrow (&obstack_for_string, "@{");
634 \] obstack_sgrow (&obstack_for_string, "@}");
41141c56 635 .|\n STRING_GROW;
a706a1cc
PE
636}
637
638
e9955c83
AD
639%%
640
3f2d73f1
PE
641/* Set *LOC and adjust scanner cursor to account for token TOKEN of
642 size SIZE. */
6c30d641
PE
643
644static void
223ff46e 645adjust_location (location *loc, char const *token, size_t size)
6c30d641 646{
3f2d73f1
PE
647 int line = scanner_cursor.line;
648 int column = scanner_cursor.column;
6c30d641
PE
649 char const *p0 = token;
650 char const *p = token;
651 char const *lim = token + size;
652
3f2d73f1
PE
653 loc->start = scanner_cursor;
654
6c30d641
PE
655 for (p = token; p < lim; p++)
656 switch (*p)
657 {
6c30d641
PE
658 case '\n':
659 line++;
660 column = 1;
661 p0 = p + 1;
662 break;
663
664 case '\t':
665 column += mbsnwidth (p0, p - p0, 0);
666 column += 8 - ((column - 1) & 7);
667 p0 = p + 1;
668 break;
669 }
670
3f2d73f1
PE
671 scanner_cursor.line = line;
672 scanner_cursor.column = column + mbsnwidth (p0, p - p0, 0);
673
674 loc->end = scanner_cursor;
6c30d641
PE
675}
676
677
678/* Read bytes from FP into buffer BUF of size SIZE. Return the
679 number of bytes read. Remove '\r' from input, treating \r\n
680 and isolated \r as \n. */
681
682static size_t
683no_cr_read (FILE *fp, char *buf, size_t size)
684{
685 size_t s = fread (buf, 1, size, fp);
686 if (s)
687 {
688 char *w = memchr (buf, '\r', s);
689 if (w)
690 {
691 char const *r = ++w;
692 char const *lim = buf + s;
693
694 for (;;)
695 {
696 /* Found an '\r'. Treat it like '\n', but ignore any
697 '\n' that immediately follows. */
698 w[-1] = '\n';
699 if (r == lim)
700 {
701 int ch = getc (fp);
702 if (ch != '\n' && ungetc (ch, fp) != ch)
703 break;
704 }
705 else if (*r == '\n')
706 r++;
707
708 /* Copy until the next '\r'. */
709 do
710 {
711 if (r == lim)
712 return w - buf;
713 }
714 while ((*w++ = *r++) != '\r');
715 }
716
717 return w - buf;
718 }
719 }
720
721 return s;
722}
723
724
e9955c83 725/*------------------------------------------------------------------.
366eea36 726| TEXT is pointing to a wannabee semantic value (i.e., a `$'). |
e9955c83
AD
727| |
728| Possible inputs: $[<TYPENAME>]($|integer) |
729| |
223ff46e 730| Output to OBSTACK_FOR_STRING a reference to this semantic value. |
e9955c83
AD
731`------------------------------------------------------------------*/
732
624a35e2 733static inline bool
223ff46e 734handle_action_dollar (char *text, location loc)
e9955c83
AD
735{
736 const char *type_name = NULL;
366eea36 737 char *cp = text + 1;
e9955c83 738
624a35e2
PE
739 if (! current_rule)
740 return false;
741
e9955c83
AD
742 /* Get the type name if explicit. */
743 if (*cp == '<')
744 {
745 type_name = ++cp;
746 while (*cp != '>')
747 ++cp;
748 *cp = '\0';
749 ++cp;
750 }
751
752 if (*cp == '$')
753 {
754 if (!type_name)
223ff46e 755 type_name = symbol_list_n_type_name_get (current_rule, loc, 0);
e9955c83 756 if (!type_name && typed)
223ff46e 757 complain_at (loc, _("$$ of `%s' has no declared type"),
97650f4e 758 current_rule->sym->tag);
e9955c83
AD
759 if (!type_name)
760 type_name = "";
223ff46e 761 obstack_fgrow1 (&obstack_for_string,
e9955c83
AD
762 "]b4_lhs_value([%s])[", type_name);
763 }
d8d3f94a 764 else
e9955c83 765 {
d8d3f94a 766 long num;
223ff46e 767 set_errno (0);
d8d3f94a 768 num = strtol (cp, 0, 10);
e9955c83 769
223ff46e 770 if (INT_MIN <= num && num <= rule_length && ! get_errno ())
e9955c83 771 {
d8d3f94a 772 int n = num;
e9955c83 773 if (!type_name && n > 0)
223ff46e 774 type_name = symbol_list_n_type_name_get (current_rule, loc, n);
e9955c83 775 if (!type_name && typed)
223ff46e
PE
776 complain_at (loc, _("$%d of `%s' has no declared type"),
777 n, current_rule->sym->tag);
e9955c83
AD
778 if (!type_name)
779 type_name = "";
223ff46e 780 obstack_fgrow3 (&obstack_for_string,
e9955c83
AD
781 "]b4_rhs_value([%d], [%d], [%s])[",
782 rule_length, n, type_name);
783 }
d8d3f94a 784 else
223ff46e 785 complain_at (loc, _("integer out of range: %s"), quote (text));
9280d3ef 786 }
9280d3ef 787
624a35e2 788 return true;
e9955c83
AD
789}
790
f25bfb75
AD
791
792/*-----------------------------------------------------------------.
793| Dispatch onto handle_action_dollar, or handle_destructor_dollar, |
624a35e2 794| depending upon TOKEN_TYPE. |
f25bfb75 795`-----------------------------------------------------------------*/
e9955c83
AD
796
797static void
624a35e2 798handle_dollar (int token_type, char *text, location loc)
f25bfb75 799{
624a35e2 800 switch (token_type)
f25bfb75 801 {
624a35e2
PE
802 case BRACED_CODE:
803 if (handle_action_dollar (text, loc))
804 return;
f25bfb75
AD
805 break;
806
624a35e2
PE
807 case PERCENT_DESTRUCTOR:
808 case PERCENT_PRINTER:
809 if (text[1] == '$')
810 {
811 obstack_sgrow (&obstack_for_string, "]b4_dollar_dollar[");
812 return;
813 }
814 break;
815
816 default:
f25bfb75
AD
817 break;
818 }
624a35e2
PE
819
820 complain_at (loc, _("invalid value: %s"), quote (text));
f25bfb75
AD
821}
822
823
824/*------------------------------------------------------.
825| TEXT is a location token (i.e., a `@...'). Output to |
223ff46e 826| OBSTACK_FOR_STRING a reference to this location. |
f25bfb75
AD
827`------------------------------------------------------*/
828
624a35e2 829static inline bool
223ff46e 830handle_action_at (char *text, location loc)
e9955c83 831{
366eea36 832 char *cp = text + 1;
e9955c83 833 locations_flag = 1;
e9955c83 834
624a35e2
PE
835 if (! current_rule)
836 return false;
837
366eea36 838 if (*cp == '$')
624a35e2 839 obstack_sgrow (&obstack_for_string, "]b4_lhs_location[");
d8d3f94a 840 else
e9955c83 841 {
d8d3f94a 842 long num;
223ff46e 843 set_errno (0);
d8d3f94a 844 num = strtol (cp, 0, 10);
dafdc66f 845
223ff46e 846 if (INT_MIN <= num && num <= rule_length && ! get_errno ())
d8d3f94a
PE
847 {
848 int n = num;
223ff46e 849 obstack_fgrow2 (&obstack_for_string, "]b4_rhs_location([%d], [%d])[",
d8d3f94a
PE
850 rule_length, n);
851 }
e9955c83 852 else
223ff46e 853 complain_at (loc, _("integer out of range: %s"), quote (text));
f25bfb75 854 }
f25bfb75 855
624a35e2 856 return true;
e9955c83 857}
4cdb01db 858
f25bfb75
AD
859
860/*-------------------------------------------------------------------.
861| Dispatch onto handle_action_at, or handle_destructor_at, depending |
862| upon CODE_KIND. |
863`-------------------------------------------------------------------*/
864
865static void
624a35e2 866handle_at (int token_type, char *text, location loc)
f25bfb75 867{
624a35e2 868 switch (token_type)
f25bfb75 869 {
624a35e2 870 case BRACED_CODE:
223ff46e 871 handle_action_at (text, loc);
624a35e2
PE
872 return;
873
874 case PERCENT_DESTRUCTOR:
875 case PERCENT_PRINTER:
876 if (text[1] == '$')
877 {
878 obstack_sgrow (&obstack_for_string, "]b4_at_dollar[");
879 return;
880 }
f25bfb75
AD
881 break;
882
624a35e2 883 default:
f25bfb75
AD
884 break;
885 }
624a35e2
PE
886
887 complain_at (loc, _("invalid value: %s"), quote (text));
f25bfb75
AD
888}
889
890
d8d3f94a
PE
891/*------------------------------------------------------------------.
892| Convert universal character name UCN to a single-byte character, |
893| and return that character. Return -1 if UCN does not correspond |
894| to a single-byte character. |
895`------------------------------------------------------------------*/
896
897static int
898convert_ucn_to_byte (char const *ucn)
899{
900 unsigned long code = strtoul (ucn + 2, 0, 16);
901
902 /* FIXME: Currently we assume Unicode-compatible unibyte characters
903 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
904 non-ASCII hosts we support only the portable C character set.
905 These limitations should be removed once we add support for
906 multibyte characters. */
907
908 if (UCHAR_MAX < code)
909 return -1;
910
911#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
912 {
913 /* A non-ASCII host. Use CODE to index into a table of the C
914 basic execution character set, which is guaranteed to exist on
915 all Standard C platforms. This table also includes '$', '@',
8e6ef483 916 and '`', which are not in the basic execution character set but
d8d3f94a
PE
917 which are unibyte characters on all the platforms that we know
918 about. */
919 static signed char const table[] =
920 {
921 '\0', -1, -1, -1, -1, -1, -1, '\a',
922 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
923 -1, -1, -1, -1, -1, -1, -1, -1,
924 -1, -1, -1, -1, -1, -1, -1, -1,
925 ' ', '!', '"', '#', '$', '%', '&', '\'',
926 '(', ')', '*', '+', ',', '-', '.', '/',
927 '0', '1', '2', '3', '4', '5', '6', '7',
928 '8', '9', ':', ';', '<', '=', '>', '?',
929 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
930 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
931 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
932 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
933 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
934 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
935 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
936 'x', 'y', 'z', '{', '|', '}', '~'
937 };
938
939 code = code < sizeof table ? table[code] : -1;
940 }
941#endif
c4d720cd 942
d8d3f94a
PE
943 return code;
944}
945
946
900c5db5
AD
947/*----------------------------------------------------------------.
948| Handle `#line INT "FILE"'. ARGS has already skipped `#line '. |
949`----------------------------------------------------------------*/
950
951static void
3f2d73f1 952handle_syncline (char *args)
900c5db5
AD
953{
954 int lineno = strtol (args, &args, 10);
955 const char *file = NULL;
956 file = strchr (args, '"') + 1;
957 *strchr (file, '"') = 0;
3f2d73f1
PE
958 scanner_cursor.file = current_file = xstrdup (file);
959 scanner_cursor.line = lineno;
960 scanner_cursor.column = 1;
900c5db5
AD
961}
962
a706a1cc 963
3f2d73f1
PE
964/*------------------------------------------------------------------------.
965| Report an unexpected EOF in a token or comment starting at START. |
966| An end of file was encountered and the expected TOKEN_END was missing. |
967| After reporting the problem, pretend that TOKEN_END was found. |
968`------------------------------------------------------------------------*/
a706a1cc
PE
969
970static void
3f2d73f1 971unexpected_end_of_file (boundary start, char const *token_end)
a706a1cc 972{
345532d7 973 size_t i = strlen (token_end);
a706a1cc 974
223ff46e
PE
975 location loc;
976 loc.start = start;
977 loc.end = scanner_cursor;
978 complain_at (loc, _("missing `%s' at end of file"), token_end);
345532d7 979
3f2d73f1
PE
980 /* Adjust scanner cursor so that any later message does not count
981 the characters about to be inserted. */
982 scanner_cursor.column -= i;
345532d7
PE
983
984 while (i != 0)
985 unput (token_end[--i]);
a706a1cc
PE
986}
987
988
f25bfb75
AD
989/*-------------------------.
990| Initialize the scanner. |
991`-------------------------*/
992
1d6412ad
AD
993void
994scanner_initialize (void)
995{
223ff46e 996 obstack_init (&obstack_for_string);
1d6412ad
AD
997}
998
999
f25bfb75
AD
1000/*-----------------------------------------------.
1001| Free all the memory allocated to the scanner. |
1002`-----------------------------------------------*/
1003
4cdb01db
AD
1004void
1005scanner_free (void)
1006{
223ff46e 1007 obstack_free (&obstack_for_string, 0);
536545f3
AD
1008 /* Reclaim Flex's buffers. */
1009 yy_delete_buffer (YY_CURRENT_BUFFER);
4cdb01db 1010}