]> git.saurik.com Git - bison.git/blame - src/scan-gram.l
* src/scan-gram.l (gram_get_lineno, gram_get_in, gram_get_out):
[bison.git] / src / scan-gram.l
CommitLineData
e9955c83 1/* Bison Grammar Scanner -*- C -*-
3b1e470c 2
05ac60f3 3 Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
e9955c83
AD
4
5 This file is part of Bison, the GNU Compiler Compiler.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
0fb669f9
PE
19 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
20 02110-1301 USA
e9955c83
AD
21*/
22
aa418041 23%option debug nodefault nounput noyywrap never-interactive
e9955c83
AD
24%option prefix="gram_" outfile="lex.yy.c"
25
26%{
27#include "system.h"
223ff46e
PE
28
29#include <mbswidth.h>
30#include <get-errno.h>
31#include <quote.h>
32
e9955c83 33#include "complain.h"
3f2d73f1 34#include "files.h"
e9955c83
AD
35#include "getargs.h"
36#include "gram.h"
ca407bdf 37#include "quotearg.h"
e9955c83 38#include "reader.h"
223ff46e 39#include "uniqstr.h"
e9955c83 40
3f2d73f1
PE
41#define YY_USER_INIT \
42 do \
43 { \
44 scanner_cursor.file = current_file; \
45 scanner_cursor.line = 1; \
46 scanner_cursor.column = 1; \
379f0ac8 47 code_start = scanner_cursor; \
3f2d73f1
PE
48 } \
49 while (0)
8efe435c 50
dc9701e8
PE
51/* Pacify "gcc -Wmissing-prototypes" when flex 2.5.31 is used. */
52int gram_get_lineno (void);
53FILE *gram_get_in (void);
54FILE *gram_get_out (void);
55int gram_get_leng (void);
56char *gram_get_text (void);
57void gram_set_lineno (int);
58void gram_set_in (FILE *);
59void gram_set_out (FILE *);
60int gram_get_debug (void);
61void gram_set_debug (int);
62int gram_lex_destroy (void);
63
3f2d73f1
PE
64/* Location of scanner cursor. */
65boundary scanner_cursor;
41141c56 66
223ff46e 67static void adjust_location (location *, char const *, size_t);
3f2d73f1 68#define YY_USER_ACTION adjust_location (loc, yytext, yyleng);
d8d3f94a 69
6c30d641 70static size_t no_cr_read (FILE *, char *, size_t);
d8d3f94a
PE
71#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
72
73
223ff46e 74/* OBSTACK_FOR_STRING -- Used to store all the characters that we need to
44995b2e
AD
75 keep (to construct ID, STRINGS etc.). Use the following macros to
76 use it.
77
41141c56
PE
78 Use STRING_GROW to append what has just been matched, and
79 STRING_FINISH to end the string (it puts the ending 0).
80 STRING_FINISH also stores this string in LAST_STRING, which can be
81 used, and which is used by STRING_FREE to free the last string. */
44995b2e 82
223ff46e 83static struct obstack obstack_for_string;
44995b2e 84
7ec2d4cd
AD
85/* A string representing the most recently saved token. */
86static char *last_string;
87
88
41141c56 89#define STRING_GROW \
223ff46e 90 obstack_grow (&obstack_for_string, yytext, yyleng)
44995b2e 91
41141c56 92#define STRING_FINISH \
44995b2e 93 do { \
223ff46e
PE
94 obstack_1grow (&obstack_for_string, '\0'); \
95 last_string = obstack_finish (&obstack_for_string); \
44995b2e
AD
96 } while (0)
97
41141c56 98#define STRING_FREE \
223ff46e 99 obstack_free (&obstack_for_string, last_string)
e9955c83 100
7ec2d4cd
AD
101void
102scanner_last_string_free (void)
103{
41141c56 104 STRING_FREE;
7ec2d4cd 105}
e9955c83 106
efcb44dd
PE
107/* Within well-formed rules, RULE_LENGTH is the number of values in
108 the current rule so far, which says where to find `$0' with respect
109 to the top of the stack. It is not the same as the rule->length in
110 the case of mid rule actions.
111
112 Outside of well-formed rules, RULE_LENGTH has an undefined value. */
113static int rule_length;
114
624a35e2
PE
115static void handle_dollar (int token_type, char *cp, location loc);
116static void handle_at (int token_type, char *cp, location loc);
3f2d73f1 117static void handle_syncline (char *args);
1452af69 118static unsigned long int scan_integer (char const *p, int base, location loc);
d8d3f94a 119static int convert_ucn_to_byte (char const *hex_text);
aa418041 120static void unexpected_eof (boundary, char const *);
4febdd96 121static void unexpected_newline (boundary, char const *);
e9955c83
AD
122
123%}
d8d3f94a 124%x SC_COMMENT SC_LINE_COMMENT SC_YACC_COMMENT
e9955c83 125%x SC_STRING SC_CHARACTER
3f2d73f1 126%x SC_AFTER_IDENTIFIER
e9955c83 127%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
624a35e2 128%x SC_PRE_CODE SC_BRACED_CODE SC_PROLOGUE SC_EPILOGUE
e9955c83 129
29c01725
AD
130letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
131id {letter}({letter}|[0-9])*
132directive %{letter}({letter}|[0-9]|-)*
624a35e2 133int [0-9]+
d8d3f94a
PE
134
135/* POSIX says that a tag must be both an id and a C union member, but
136 historically almost any character is allowed in a tag. We disallow
137 NUL and newline, as this simplifies our implementation. */
138tag [^\0\n>]+
139
140/* Zero or more instances of backslash-newline. Following GCC, allow
141 white space between the backslash and the newline. */
142splice (\\[ \f\t\v]*\n)*
e9955c83
AD
143
144%%
145%{
a706a1cc 146 /* Nesting level of the current code in braces. */
1a9e39f1
PE
147 int braces_level IF_LINT (= 0);
148
3f2d73f1
PE
149 /* Parent context state, when applicable. */
150 int context_state IF_LINT (= 0);
a706a1cc 151
624a35e2
PE
152 /* Token type to return, when applicable. */
153 int token_type IF_LINT (= 0);
154
3f2d73f1 155 /* Location of most recent identifier, when applicable. */
a2bc9dbc 156 location id_loc IF_LINT (= empty_location);
3f2d73f1 157
a2bc9dbc
PE
158 /* Where containing code started, when applicable. Its initial
159 value is relevant only when yylex is invoked in the SC_EPILOGUE
160 start condition. */
161 boundary code_start = scanner_cursor;
3f2d73f1 162
223ff46e
PE
163 /* Where containing comment or string or character literal started,
164 when applicable. */
a2bc9dbc 165 boundary token_start IF_LINT (= scanner_cursor);
e9955c83
AD
166%}
167
168
3f2d73f1
PE
169 /*-----------------------.
170 | Scanning white space. |
171 `-----------------------*/
172
624a35e2 173<INITIAL,SC_AFTER_IDENTIFIER,SC_PRE_CODE>
3f2d73f1 174{
4febdd96 175 /* Comments and white space. */
83adb046 176 "," warn_at (*loc, _("stray `,' treated as white space"));
4febdd96 177 [ \f\n\t\v] |
3f2d73f1 178 "//".* ;
83adb046
PE
179 "/*" {
180 token_start = loc->start;
181 context_state = YY_START;
182 BEGIN SC_YACC_COMMENT;
183 }
3f2d73f1
PE
184
185 /* #line directives are not documented, and may be withdrawn or
186 modified in future versions of Bison. */
187 ^"#line "{int}" \"".*"\"\n" {
188 handle_syncline (yytext + sizeof "#line " - 1);
189 }
190}
191
192
e9955c83
AD
193 /*----------------------------.
194 | Scanning Bison directives. |
195 `----------------------------*/
196<INITIAL>
197{
198 "%binary" return PERCENT_NONASSOC;
199 "%debug" return PERCENT_DEBUG;
39a06c25 200 "%default"[-_]"prec" return PERCENT_DEFAULT_PREC;
e9955c83
AD
201 "%define" return PERCENT_DEFINE;
202 "%defines" return PERCENT_DEFINES;
624a35e2 203 "%destructor" token_type = PERCENT_DESTRUCTOR; BEGIN SC_PRE_CODE;
676385e2 204 "%dprec" return PERCENT_DPREC;
e9955c83
AD
205 "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
206 "%expect" return PERCENT_EXPECT;
d6328241 207 "%expect"[-_]"rr" return PERCENT_EXPECT_RR;
e9955c83
AD
208 "%file-prefix" return PERCENT_FILE_PREFIX;
209 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
cd3684cf 210 "%initial-action" token_type = PERCENT_INITIAL_ACTION; BEGIN SC_PRE_CODE;
ae7453f2 211 "%glr-parser" return PERCENT_GLR_PARSER;
e9955c83 212 "%left" return PERCENT_LEFT;
624a35e2 213 "%lex-param" token_type = PERCENT_LEX_PARAM; BEGIN SC_PRE_CODE;
e9955c83 214 "%locations" return PERCENT_LOCATIONS;
676385e2 215 "%merge" return PERCENT_MERGE;
e9955c83 216 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
22fccf95 217 "%no"[-_]"default"[-_]"prec" return PERCENT_NO_DEFAULT_PREC;
e9955c83
AD
218 "%no"[-_]"lines" return PERCENT_NO_LINES;
219 "%nonassoc" return PERCENT_NONASSOC;
916708d5 220 "%nondeterministic-parser" return PERCENT_NONDETERMINISTIC_PARSER;
e9955c83
AD
221 "%nterm" return PERCENT_NTERM;
222 "%output" return PERCENT_OUTPUT;
624a35e2 223 "%parse-param" token_type = PERCENT_PARSE_PARAM; BEGIN SC_PRE_CODE;
d8d3f94a 224 "%prec" rule_length--; return PERCENT_PREC;
624a35e2 225 "%printer" token_type = PERCENT_PRINTER; BEGIN SC_PRE_CODE;
e9955c83
AD
226 "%pure"[-_]"parser" return PERCENT_PURE_PARSER;
227 "%right" return PERCENT_RIGHT;
228 "%skeleton" return PERCENT_SKELETON;
229 "%start" return PERCENT_START;
230 "%term" return PERCENT_TOKEN;
231 "%token" return PERCENT_TOKEN;
232 "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
233 "%type" return PERCENT_TYPE;
624a35e2 234 "%union" token_type = PERCENT_UNION; BEGIN SC_PRE_CODE;
e9955c83
AD
235 "%verbose" return PERCENT_VERBOSE;
236 "%yacc" return PERCENT_YACC;
237
3f2d73f1 238 {directive} {
41141c56 239 complain_at (*loc, _("invalid directive: %s"), quote (yytext));
412f8a59 240 }
900c5db5 241
e9955c83 242 "=" return EQUAL;
d8d3f94a 243 "|" rule_length = 0; return PIPE;
e9955c83
AD
244 ";" return SEMICOLON;
245
3f2d73f1 246 {id} {
41141c56 247 val->symbol = symbol_get (yytext, *loc);
3f2d73f1 248 id_loc = *loc;
efcb44dd 249 rule_length++;
3f2d73f1 250 BEGIN SC_AFTER_IDENTIFIER;
e9955c83
AD
251 }
252
d8d3f94a 253 {int} {
1452af69
PE
254 val->integer = scan_integer (yytext, 10, *loc);
255 return INT;
256 }
257 0[xX][0-9abcdefABCDEF]+ {
258 val->integer = scan_integer (yytext, 16, *loc);
d8d3f94a
PE
259 return INT;
260 }
e9955c83
AD
261
262 /* Characters. We don't check there is only one. */
3f2d73f1 263 "'" STRING_GROW; token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
e9955c83
AD
264
265 /* Strings. */
ca407bdf 266 "\"" token_start = loc->start; BEGIN SC_ESCAPED_STRING;
e9955c83
AD
267
268 /* Prologue. */
3f2d73f1 269 "%{" code_start = loc->start; BEGIN SC_PROLOGUE;
e9955c83
AD
270
271 /* Code in between braces. */
3f2d73f1
PE
272 "{" {
273 STRING_GROW;
624a35e2 274 token_type = BRACED_CODE;
3f2d73f1
PE
275 braces_level = 0;
276 code_start = loc->start;
277 BEGIN SC_BRACED_CODE;
278 }
e9955c83
AD
279
280 /* A type. */
d8d3f94a 281 "<"{tag}">" {
223ff46e 282 obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
41141c56 283 STRING_FINISH;
223ff46e 284 val->uniqstr = uniqstr_new (last_string);
41141c56 285 STRING_FREE;
4cdb01db
AD
286 return TYPE;
287 }
288
a706a1cc
PE
289 "%%" {
290 static int percent_percent_count;
e9955c83 291 if (++percent_percent_count == 2)
a2bc9dbc 292 BEGIN SC_EPILOGUE;
e9955c83
AD
293 return PERCENT_PERCENT;
294 }
295
a706a1cc 296 . {
41141c56 297 complain_at (*loc, _("invalid character: %s"), quote (yytext));
3f2d73f1 298 }
379f0ac8
PE
299
300 <<EOF>> {
301 loc->start = loc->end = scanner_cursor;
302 yyterminate ();
303 }
3f2d73f1
PE
304}
305
306
307 /*-----------------------------------------------------------------.
308 | Scanning after an identifier, checking whether a colon is next. |
309 `-----------------------------------------------------------------*/
310
311<SC_AFTER_IDENTIFIER>
312{
313 ":" {
314 rule_length = 0;
315 *loc = id_loc;
316 BEGIN INITIAL;
317 return ID_COLON;
318 }
319 . {
320 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
321 yyless (0);
322 *loc = id_loc;
323 BEGIN INITIAL;
324 return ID;
325 }
326 <<EOF>> {
327 *loc = id_loc;
328 BEGIN INITIAL;
329 return ID;
e9955c83
AD
330 }
331}
332
333
d8d3f94a
PE
334 /*---------------------------------------------------------------.
335 | Scanning a Yacc comment. The initial `/ *' is already eaten. |
336 `---------------------------------------------------------------*/
e9955c83 337
d8d3f94a 338<SC_YACC_COMMENT>
e9955c83 339{
3f2d73f1 340 "*/" BEGIN context_state;
a706a1cc 341 .|\n ;
aa418041 342 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
d8d3f94a
PE
343}
344
345
346 /*------------------------------------------------------------.
347 | Scanning a C comment. The initial `/ *' is already eaten. |
348 `------------------------------------------------------------*/
349
350<SC_COMMENT>
351{
3f2d73f1 352 "*"{splice}"/" STRING_GROW; BEGIN context_state;
aa418041 353 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
e9955c83
AD
354}
355
356
d8d3f94a
PE
357 /*--------------------------------------------------------------.
358 | Scanning a line comment. The initial `//' is already eaten. |
359 `--------------------------------------------------------------*/
360
361<SC_LINE_COMMENT>
362{
3f2d73f1 363 "\n" STRING_GROW; BEGIN context_state;
41141c56 364 {splice} STRING_GROW;
3f2d73f1 365 <<EOF>> BEGIN context_state;
d8d3f94a
PE
366}
367
368
4febdd96
PE
369 /*------------------------------------------------.
370 | Scanning a Bison string, including its escapes. |
371 | The initial quote is already eaten. |
372 `------------------------------------------------*/
e9955c83
AD
373
374<SC_ESCAPED_STRING>
375{
db2cc12f 376 "\"" {
41141c56 377 STRING_FINISH;
3f2d73f1 378 loc->start = token_start;
223ff46e 379 val->chars = last_string;
efcb44dd 380 rule_length++;
a706a1cc 381 BEGIN INITIAL;
e9955c83
AD
382 return STRING;
383 }
4febdd96
PE
384 \n unexpected_newline (token_start, "\""); BEGIN INITIAL;
385 <<EOF>> unexpected_eof (token_start, "\""); BEGIN INITIAL;
e9955c83
AD
386}
387
4febdd96
PE
388 /*----------------------------------------------------------.
389 | Scanning a Bison character literal, decoding its escapes. |
390 | The initial quote is already eaten. |
391 `----------------------------------------------------------*/
e9955c83
AD
392
393<SC_ESCAPED_CHARACTER>
394{
db2cc12f 395 "'" {
3b1e470c 396 unsigned char last_string_1;
41141c56
PE
397 STRING_GROW;
398 STRING_FINISH;
3f2d73f1 399 loc->start = token_start;
ca407bdf
PE
400 val->symbol = symbol_get (quotearg_style (escape_quoting_style,
401 last_string),
402 *loc);
41141c56 403 symbol_class_set (val->symbol, token_sym, *loc);
3b1e470c
PE
404 last_string_1 = last_string[1];
405 symbol_user_token_number_set (val->symbol, last_string_1, *loc);
41141c56 406 STRING_FREE;
a706a1cc
PE
407 rule_length++;
408 BEGIN INITIAL;
409 return ID;
e9955c83 410 }
4febdd96
PE
411 \n unexpected_newline (token_start, "'"); BEGIN INITIAL;
412 <<EOF>> unexpected_eof (token_start, "'"); BEGIN INITIAL;
413}
a706a1cc 414
4febdd96
PE
415<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING>
416{
92ac3705 417 \0 complain_at (*loc, _("invalid null character"));
e9955c83
AD
418}
419
420
421 /*----------------------------.
422 | Decode escaped characters. |
423 `----------------------------*/
424
425<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
426{
d8d3f94a 427 \\[0-7]{1,3} {
1452af69 428 unsigned long int c = strtoul (yytext + 1, 0, 8);
d8d3f94a 429 if (UCHAR_MAX < c)
3f2d73f1 430 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
05ac60f3 431 else if (! c)
92ac3705 432 complain_at (*loc, _("invalid null character: %s"), quote (yytext));
e9955c83 433 else
223ff46e 434 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
435 }
436
6b0d38ab 437 \\x[0-9abcdefABCDEF]+ {
1452af69 438 unsigned long int c;
223ff46e 439 set_errno (0);
d8d3f94a 440 c = strtoul (yytext + 2, 0, 16);
223ff46e 441 if (UCHAR_MAX < c || get_errno ())
3f2d73f1 442 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
92ac3705
PE
443 else if (! c)
444 complain_at (*loc, _("invalid null character: %s"), quote (yytext));
d8d3f94a 445 else
223ff46e 446 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
447 }
448
223ff46e
PE
449 \\a obstack_1grow (&obstack_for_string, '\a');
450 \\b obstack_1grow (&obstack_for_string, '\b');
451 \\f obstack_1grow (&obstack_for_string, '\f');
452 \\n obstack_1grow (&obstack_for_string, '\n');
453 \\r obstack_1grow (&obstack_for_string, '\r');
454 \\t obstack_1grow (&obstack_for_string, '\t');
455 \\v obstack_1grow (&obstack_for_string, '\v');
412f8a59
PE
456
457 /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
223ff46e 458 \\("\""|"'"|"?"|"\\") obstack_1grow (&obstack_for_string, yytext[1]);
412f8a59 459
6b0d38ab 460 \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
d8d3f94a
PE
461 int c = convert_ucn_to_byte (yytext);
462 if (c < 0)
3f2d73f1 463 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
92ac3705
PE
464 else if (! c)
465 complain_at (*loc, _("invalid null character: %s"), quote (yytext));
d8d3f94a 466 else
223ff46e 467 obstack_1grow (&obstack_for_string, c);
d8d3f94a 468 }
4f25ebb0 469 \\(.|\n) {
3f2d73f1 470 complain_at (*loc, _("unrecognized escape sequence: %s"), quote (yytext));
41141c56 471 STRING_GROW;
e9955c83
AD
472 }
473}
474
4febdd96
PE
475 /*--------------------------------------------.
476 | Scanning user-code characters and strings. |
477 `--------------------------------------------*/
e9955c83 478
4febdd96
PE
479<SC_CHARACTER,SC_STRING>
480{
481 {splice}|\\{splice}[^\n$@\[\]] STRING_GROW;
482}
e9955c83
AD
483
484<SC_CHARACTER>
485{
4febdd96
PE
486 "'" STRING_GROW; BEGIN context_state;
487 \n unexpected_newline (token_start, "'"); BEGIN context_state;
488 <<EOF>> unexpected_eof (token_start, "'"); BEGIN context_state;
e9955c83
AD
489}
490
e9955c83
AD
491<SC_STRING>
492{
4febdd96
PE
493 "\"" STRING_GROW; BEGIN context_state;
494 \n unexpected_newline (token_start, "\""); BEGIN context_state;
495 <<EOF>> unexpected_eof (token_start, "\""); BEGIN context_state;
e9955c83
AD
496}
497
498
499 /*---------------------------------------------------.
500 | Strings, comments etc. can be found in user code. |
501 `---------------------------------------------------*/
502
503<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
504{
3f2d73f1
PE
505 "'" {
506 STRING_GROW;
507 context_state = YY_START;
508 token_start = loc->start;
509 BEGIN SC_CHARACTER;
510 }
511 "\"" {
512 STRING_GROW;
513 context_state = YY_START;
514 token_start = loc->start;
515 BEGIN SC_STRING;
516 }
517 "/"{splice}"*" {
518 STRING_GROW;
519 context_state = YY_START;
520 token_start = loc->start;
521 BEGIN SC_COMMENT;
522 }
523 "/"{splice}"/" {
524 STRING_GROW;
525 context_state = YY_START;
526 BEGIN SC_LINE_COMMENT;
527 }
e9955c83
AD
528}
529
530
624a35e2
PE
531 /*---------------------------------------------------------------.
532 | Scanning after %union etc., possibly followed by white space. |
533 | For %union only, allow arbitrary C code to appear before the |
534 | following brace, as an extension to POSIX. |
535 `---------------------------------------------------------------*/
536
537<SC_PRE_CODE>
538{
539 . {
540 bool valid = yytext[0] == '{' || token_type == PERCENT_UNION;
541 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
542 yyless (0);
543
544 if (valid)
545 {
546 braces_level = -1;
547 code_start = loc->start;
548 BEGIN SC_BRACED_CODE;
549 }
550 else
551 {
552 complain_at (*loc, _("missing `{' in `%s'"),
553 token_name (token_type));
554 obstack_sgrow (&obstack_for_string, "{}");
555 STRING_FINISH;
556 val->chars = last_string;
557 BEGIN INITIAL;
558 return token_type;
559 }
560 }
379f0ac8 561
aa418041 562 <<EOF>> unexpected_eof (scanner_cursor, "{}"); BEGIN INITIAL;
624a35e2
PE
563}
564
565
e9955c83
AD
566 /*---------------------------------------------------------------.
567 | Scanning some code in braces (%union and actions). The initial |
568 | "{" is already eaten. |
569 `---------------------------------------------------------------*/
570
571<SC_BRACED_CODE>
572{
41141c56
PE
573 "{"|"<"{splice}"%" STRING_GROW; braces_level++;
574 "%"{splice}">" STRING_GROW; braces_level--;
e9955c83 575 "}" {
25522739
PE
576 bool outer_brace = --braces_level < 0;
577
578 /* As an undocumented Bison extension, append `;' before the last
579 brace in braced code, so that the user code can omit trailing
580 `;'. But do not append `;' if emulating Yacc, since Yacc does
581 not append one.
582
583 FIXME: Bison should warn if a semicolon seems to be necessary
584 here, and should omit the semicolon if it seems unnecessary
585 (e.g., after ';', '{', or '}', each followed by comments or
586 white space). Such a warning shouldn't depend on --yacc; it
587 should depend on a new --pedantic option, which would cause
588 Bison to warn if it detects an extension to POSIX. --pedantic
589 should also diagnose other Bison extensions like %yacc.
590 Perhaps there should also be a GCC-style --pedantic-errors
591 option, so that such warnings are diagnosed as errors. */
1deb9bdc 592 if (outer_brace && token_type == BRACED_CODE && ! yacc_flag)
25522739
PE
593 obstack_1grow (&obstack_for_string, ';');
594
595 obstack_1grow (&obstack_for_string, '}');
596
597 if (outer_brace)
e9955c83 598 {
41141c56 599 STRING_FINISH;
624a35e2 600 rule_length++;
3f2d73f1 601 loc->start = code_start;
223ff46e 602 val->chars = last_string;
a706a1cc 603 BEGIN INITIAL;
624a35e2 604 return token_type;
e9955c83
AD
605 }
606 }
607
a706a1cc
PE
608 /* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly
609 (as `<' `<%'). */
41141c56 610 "<"{splice}"<" STRING_GROW;
a706a1cc 611
624a35e2
PE
612 "$"("<"{tag}">")?(-?[0-9]+|"$") handle_dollar (token_type, yytext, *loc);
613 "@"(-?[0-9]+|"$") handle_at (token_type, yytext, *loc);
e9955c83 614
aa418041 615 <<EOF>> unexpected_eof (code_start, "}"); BEGIN INITIAL;
e9955c83
AD
616}
617
618
619 /*--------------------------------------------------------------.
620 | Scanning some prologue: from "%{" (already scanned) to "%}". |
621 `--------------------------------------------------------------*/
622
623<SC_PROLOGUE>
624{
625 "%}" {
41141c56 626 STRING_FINISH;
3f2d73f1 627 loc->start = code_start;
223ff46e 628 val->chars = last_string;
a706a1cc 629 BEGIN INITIAL;
e9955c83
AD
630 return PROLOGUE;
631 }
632
aa418041 633 <<EOF>> unexpected_eof (code_start, "%}"); BEGIN INITIAL;
e9955c83
AD
634}
635
636
637 /*---------------------------------------------------------------.
638 | Scanning the epilogue (everything after the second "%%", which |
d8d3f94a 639 | has already been eaten). |
e9955c83
AD
640 `---------------------------------------------------------------*/
641
642<SC_EPILOGUE>
643{
e9955c83 644 <<EOF>> {
41141c56 645 STRING_FINISH;
3f2d73f1 646 loc->start = code_start;
223ff46e 647 val->chars = last_string;
a706a1cc 648 BEGIN INITIAL;
e9955c83
AD
649 return EPILOGUE;
650 }
651}
652
653
4febdd96
PE
654 /*-----------------------------------------.
655 | Escape M4 quoting characters in C code. |
656 `-----------------------------------------*/
a706a1cc
PE
657
658<SC_COMMENT,SC_LINE_COMMENT,SC_STRING,SC_CHARACTER,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
659{
223ff46e
PE
660 \$ obstack_sgrow (&obstack_for_string, "$][");
661 \@ obstack_sgrow (&obstack_for_string, "@@");
662 \[ obstack_sgrow (&obstack_for_string, "@{");
663 \] obstack_sgrow (&obstack_for_string, "@}");
a706a1cc
PE
664}
665
666
4febdd96
PE
667 /*-----------------------------------------------------.
668 | By default, grow the string obstack with the input. |
669 `-----------------------------------------------------*/
670
671<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>. |
672<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>\n STRING_GROW;
673
e9955c83
AD
674%%
675
cd3684cf
AD
676/* Keeps track of the maximum number of semantic values to the left of
677 a handle (those referenced by $0, $-1, etc.) are required by the
25005f6a
PH
678 semantic actions of this grammar. */
679int max_left_semantic_context = 0;
680
3f2d73f1
PE
681/* Set *LOC and adjust scanner cursor to account for token TOKEN of
682 size SIZE. */
6c30d641
PE
683
684static void
223ff46e 685adjust_location (location *loc, char const *token, size_t size)
6c30d641 686{
3f2d73f1
PE
687 int line = scanner_cursor.line;
688 int column = scanner_cursor.column;
6c30d641
PE
689 char const *p0 = token;
690 char const *p = token;
691 char const *lim = token + size;
692
3f2d73f1
PE
693 loc->start = scanner_cursor;
694
6c30d641
PE
695 for (p = token; p < lim; p++)
696 switch (*p)
697 {
6c30d641
PE
698 case '\n':
699 line++;
700 column = 1;
701 p0 = p + 1;
702 break;
703
704 case '\t':
705 column += mbsnwidth (p0, p - p0, 0);
706 column += 8 - ((column - 1) & 7);
707 p0 = p + 1;
708 break;
709 }
710
3f2d73f1
PE
711 scanner_cursor.line = line;
712 scanner_cursor.column = column + mbsnwidth (p0, p - p0, 0);
713
714 loc->end = scanner_cursor;
6c30d641
PE
715}
716
717
718/* Read bytes from FP into buffer BUF of size SIZE. Return the
719 number of bytes read. Remove '\r' from input, treating \r\n
720 and isolated \r as \n. */
721
722static size_t
723no_cr_read (FILE *fp, char *buf, size_t size)
724{
a737b216
PE
725 size_t bytes_read = fread (buf, 1, size, fp);
726 if (bytes_read)
6c30d641 727 {
a737b216 728 char *w = memchr (buf, '\r', bytes_read);
6c30d641
PE
729 if (w)
730 {
731 char const *r = ++w;
a737b216 732 char const *lim = buf + bytes_read;
6c30d641
PE
733
734 for (;;)
735 {
736 /* Found an '\r'. Treat it like '\n', but ignore any
737 '\n' that immediately follows. */
738 w[-1] = '\n';
739 if (r == lim)
740 {
741 int ch = getc (fp);
742 if (ch != '\n' && ungetc (ch, fp) != ch)
743 break;
744 }
745 else if (*r == '\n')
746 r++;
747
748 /* Copy until the next '\r'. */
749 do
750 {
751 if (r == lim)
752 return w - buf;
753 }
754 while ((*w++ = *r++) != '\r');
755 }
756
757 return w - buf;
758 }
759 }
760
a737b216 761 return bytes_read;
6c30d641
PE
762}
763
764
e9955c83 765/*------------------------------------------------------------------.
366eea36 766| TEXT is pointing to a wannabee semantic value (i.e., a `$'). |
e9955c83
AD
767| |
768| Possible inputs: $[<TYPENAME>]($|integer) |
769| |
223ff46e 770| Output to OBSTACK_FOR_STRING a reference to this semantic value. |
e9955c83
AD
771`------------------------------------------------------------------*/
772
624a35e2 773static inline bool
223ff46e 774handle_action_dollar (char *text, location loc)
e9955c83
AD
775{
776 const char *type_name = NULL;
366eea36 777 char *cp = text + 1;
e9955c83 778
624a35e2
PE
779 if (! current_rule)
780 return false;
781
e9955c83
AD
782 /* Get the type name if explicit. */
783 if (*cp == '<')
784 {
785 type_name = ++cp;
786 while (*cp != '>')
787 ++cp;
788 *cp = '\0';
789 ++cp;
790 }
791
792 if (*cp == '$')
793 {
794 if (!type_name)
223ff46e 795 type_name = symbol_list_n_type_name_get (current_rule, loc, 0);
e9955c83 796 if (!type_name && typed)
223ff46e 797 complain_at (loc, _("$$ of `%s' has no declared type"),
97650f4e 798 current_rule->sym->tag);
e9955c83
AD
799 if (!type_name)
800 type_name = "";
223ff46e 801 obstack_fgrow1 (&obstack_for_string,
e9955c83
AD
802 "]b4_lhs_value([%s])[", type_name);
803 }
d8d3f94a 804 else
e9955c83 805 {
1452af69 806 long int num;
223ff46e 807 set_errno (0);
d8d3f94a 808 num = strtol (cp, 0, 10);
e9955c83 809
223ff46e 810 if (INT_MIN <= num && num <= rule_length && ! get_errno ())
e9955c83 811 {
d8d3f94a 812 int n = num;
25005f6a
PH
813 if (1-n > max_left_semantic_context)
814 max_left_semantic_context = 1-n;
e9955c83 815 if (!type_name && n > 0)
223ff46e 816 type_name = symbol_list_n_type_name_get (current_rule, loc, n);
e9955c83 817 if (!type_name && typed)
223ff46e
PE
818 complain_at (loc, _("$%d of `%s' has no declared type"),
819 n, current_rule->sym->tag);
e9955c83
AD
820 if (!type_name)
821 type_name = "";
223ff46e 822 obstack_fgrow3 (&obstack_for_string,
05ac60f3 823 "]b4_rhs_value(%d, %d, [%s])[",
e9955c83
AD
824 rule_length, n, type_name);
825 }
d8d3f94a 826 else
223ff46e 827 complain_at (loc, _("integer out of range: %s"), quote (text));
9280d3ef 828 }
9280d3ef 829
624a35e2 830 return true;
e9955c83
AD
831}
832
f25bfb75 833
cd3684cf
AD
834/*----------------------------------------------------------------.
835| Map `$?' onto the proper M4 symbol, depending on its TOKEN_TYPE |
836| (are we in an action?). |
837`----------------------------------------------------------------*/
e9955c83
AD
838
839static void
624a35e2 840handle_dollar (int token_type, char *text, location loc)
f25bfb75 841{
624a35e2 842 switch (token_type)
f25bfb75 843 {
624a35e2
PE
844 case BRACED_CODE:
845 if (handle_action_dollar (text, loc))
846 return;
f25bfb75
AD
847 break;
848
624a35e2 849 case PERCENT_DESTRUCTOR:
cd3684cf 850 case PERCENT_INITIAL_ACTION:
624a35e2
PE
851 case PERCENT_PRINTER:
852 if (text[1] == '$')
853 {
854 obstack_sgrow (&obstack_for_string, "]b4_dollar_dollar[");
855 return;
856 }
857 break;
858
859 default:
f25bfb75
AD
860 break;
861 }
624a35e2
PE
862
863 complain_at (loc, _("invalid value: %s"), quote (text));
f25bfb75
AD
864}
865
866
867/*------------------------------------------------------.
868| TEXT is a location token (i.e., a `@...'). Output to |
223ff46e 869| OBSTACK_FOR_STRING a reference to this location. |
f25bfb75
AD
870`------------------------------------------------------*/
871
624a35e2 872static inline bool
223ff46e 873handle_action_at (char *text, location loc)
e9955c83 874{
366eea36 875 char *cp = text + 1;
d0829076 876 locations_flag = true;
e9955c83 877
624a35e2
PE
878 if (! current_rule)
879 return false;
880
366eea36 881 if (*cp == '$')
624a35e2 882 obstack_sgrow (&obstack_for_string, "]b4_lhs_location[");
d8d3f94a 883 else
e9955c83 884 {
1452af69 885 long int num;
223ff46e 886 set_errno (0);
d8d3f94a 887 num = strtol (cp, 0, 10);
dafdc66f 888
223ff46e 889 if (INT_MIN <= num && num <= rule_length && ! get_errno ())
d8d3f94a
PE
890 {
891 int n = num;
05ac60f3 892 obstack_fgrow2 (&obstack_for_string, "]b4_rhs_location(%d, %d)[",
d8d3f94a
PE
893 rule_length, n);
894 }
e9955c83 895 else
223ff46e 896 complain_at (loc, _("integer out of range: %s"), quote (text));
f25bfb75 897 }
f25bfb75 898
624a35e2 899 return true;
e9955c83 900}
4cdb01db 901
f25bfb75 902
cd3684cf
AD
903/*----------------------------------------------------------------.
904| Map `@?' onto the proper M4 symbol, depending on its TOKEN_TYPE |
905| (are we in an action?). |
906`----------------------------------------------------------------*/
f25bfb75
AD
907
908static void
624a35e2 909handle_at (int token_type, char *text, location loc)
f25bfb75 910{
624a35e2 911 switch (token_type)
f25bfb75 912 {
624a35e2 913 case BRACED_CODE:
223ff46e 914 handle_action_at (text, loc);
624a35e2
PE
915 return;
916
cd3684cf 917 case PERCENT_INITIAL_ACTION:
624a35e2
PE
918 case PERCENT_DESTRUCTOR:
919 case PERCENT_PRINTER:
920 if (text[1] == '$')
921 {
922 obstack_sgrow (&obstack_for_string, "]b4_at_dollar[");
923 return;
924 }
f25bfb75
AD
925 break;
926
624a35e2 927 default:
f25bfb75
AD
928 break;
929 }
624a35e2
PE
930
931 complain_at (loc, _("invalid value: %s"), quote (text));
f25bfb75
AD
932}
933
934
1452af69
PE
935/*------------------------------------------------------.
936| Scan NUMBER for a base-BASE integer at location LOC. |
937`------------------------------------------------------*/
938
939static unsigned long int
940scan_integer (char const *number, int base, location loc)
941{
942 unsigned long int num;
943 set_errno (0);
944 num = strtoul (number, 0, base);
945 if (INT_MAX < num || get_errno ())
946 {
947 complain_at (loc, _("integer out of range: %s"), quote (number));
948 num = INT_MAX;
949 }
950 return num;
951}
952
953
d8d3f94a
PE
954/*------------------------------------------------------------------.
955| Convert universal character name UCN to a single-byte character, |
956| and return that character. Return -1 if UCN does not correspond |
957| to a single-byte character. |
958`------------------------------------------------------------------*/
959
960static int
961convert_ucn_to_byte (char const *ucn)
962{
1452af69 963 unsigned long int code = strtoul (ucn + 2, 0, 16);
d8d3f94a
PE
964
965 /* FIXME: Currently we assume Unicode-compatible unibyte characters
966 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
967 non-ASCII hosts we support only the portable C character set.
968 These limitations should be removed once we add support for
969 multibyte characters. */
970
971 if (UCHAR_MAX < code)
972 return -1;
973
974#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
975 {
976 /* A non-ASCII host. Use CODE to index into a table of the C
977 basic execution character set, which is guaranteed to exist on
978 all Standard C platforms. This table also includes '$', '@',
8e6ef483 979 and '`', which are not in the basic execution character set but
d8d3f94a
PE
980 which are unibyte characters on all the platforms that we know
981 about. */
982 static signed char const table[] =
983 {
984 '\0', -1, -1, -1, -1, -1, -1, '\a',
985 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
986 -1, -1, -1, -1, -1, -1, -1, -1,
987 -1, -1, -1, -1, -1, -1, -1, -1,
988 ' ', '!', '"', '#', '$', '%', '&', '\'',
989 '(', ')', '*', '+', ',', '-', '.', '/',
990 '0', '1', '2', '3', '4', '5', '6', '7',
991 '8', '9', ':', ';', '<', '=', '>', '?',
992 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
993 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
994 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
995 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
996 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
997 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
998 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
999 'x', 'y', 'z', '{', '|', '}', '~'
1000 };
1001
1002 code = code < sizeof table ? table[code] : -1;
1003 }
1004#endif
c4d720cd 1005
d8d3f94a
PE
1006 return code;
1007}
1008
1009
900c5db5
AD
1010/*----------------------------------------------------------------.
1011| Handle `#line INT "FILE"'. ARGS has already skipped `#line '. |
1012`----------------------------------------------------------------*/
1013
1014static void
3f2d73f1 1015handle_syncline (char *args)
900c5db5
AD
1016{
1017 int lineno = strtol (args, &args, 10);
1018 const char *file = NULL;
1019 file = strchr (args, '"') + 1;
1020 *strchr (file, '"') = 0;
dca81a78 1021 scanner_cursor.file = current_file = uniqstr_new (file);
3f2d73f1
PE
1022 scanner_cursor.line = lineno;
1023 scanner_cursor.column = 1;
900c5db5
AD
1024}
1025
a706a1cc 1026
4febdd96
PE
1027/*----------------------------------------------------------------.
1028| For a token or comment starting at START, report message MSGID, |
1029| which should say that an end marker was found before |
1030| the expected TOKEN_END. |
1031`----------------------------------------------------------------*/
1032
1033static void
1034unexpected_end (boundary start, char const *msgid, char const *token_end)
1035{
1036 location loc;
1037 loc.start = start;
1038 loc.end = scanner_cursor;
1039 complain_at (loc, _(msgid), token_end);
1040}
1041
1042
3f2d73f1
PE
1043/*------------------------------------------------------------------------.
1044| Report an unexpected EOF in a token or comment starting at START. |
1045| An end of file was encountered and the expected TOKEN_END was missing. |
3f2d73f1 1046`------------------------------------------------------------------------*/
a706a1cc
PE
1047
1048static void
aa418041 1049unexpected_eof (boundary start, char const *token_end)
a706a1cc 1050{
4febdd96
PE
1051 unexpected_end (start, N_("missing `%s' at end of file"), token_end);
1052}
1053
1054
1055/*----------------------------------------.
1056| Likewise, but for unexpected newlines. |
1057`----------------------------------------*/
1058
1059static void
1060unexpected_newline (boundary start, char const *token_end)
1061{
1062 unexpected_end (start, N_("missing `%s' at end of line"), token_end);
a706a1cc
PE
1063}
1064
1065
f25bfb75
AD
1066/*-------------------------.
1067| Initialize the scanner. |
1068`-------------------------*/
1069
1d6412ad
AD
1070void
1071scanner_initialize (void)
1072{
223ff46e 1073 obstack_init (&obstack_for_string);
1d6412ad
AD
1074}
1075
1076
f25bfb75
AD
1077/*-----------------------------------------------.
1078| Free all the memory allocated to the scanner. |
1079`-----------------------------------------------*/
1080
4cdb01db
AD
1081void
1082scanner_free (void)
1083{
223ff46e 1084 obstack_free (&obstack_for_string, 0);
536545f3
AD
1085 /* Reclaim Flex's buffers. */
1086 yy_delete_buffer (YY_CURRENT_BUFFER);
4cdb01db 1087}