]> git.saurik.com Git - bison.git/blame - src/scan-gram.l
* bootstrap (gnulib_modules): Add strverscmp.
[bison.git] / src / scan-gram.l
CommitLineData
e9955c83 1/* Bison Grammar Scanner -*- C -*-
3b1e470c 2
05ac60f3 3 Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
e9955c83
AD
4
5 This file is part of Bison, the GNU Compiler Compiler.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
0fb669f9
PE
19 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
20 02110-1301 USA
e9955c83
AD
21*/
22
aa418041 23%option debug nodefault nounput noyywrap never-interactive
e9955c83
AD
24%option prefix="gram_" outfile="lex.yy.c"
25
26%{
27#include "system.h"
223ff46e
PE
28
29#include <mbswidth.h>
30#include <get-errno.h>
31#include <quote.h>
32
e9955c83 33#include "complain.h"
3f2d73f1 34#include "files.h"
e9955c83
AD
35#include "getargs.h"
36#include "gram.h"
ca407bdf 37#include "quotearg.h"
e9955c83 38#include "reader.h"
223ff46e 39#include "uniqstr.h"
e9955c83 40
3f2d73f1
PE
41#define YY_USER_INIT \
42 do \
43 { \
44 scanner_cursor.file = current_file; \
45 scanner_cursor.line = 1; \
46 scanner_cursor.column = 1; \
379f0ac8 47 code_start = scanner_cursor; \
3f2d73f1
PE
48 } \
49 while (0)
8efe435c 50
dc9701e8
PE
51/* Pacify "gcc -Wmissing-prototypes" when flex 2.5.31 is used. */
52int gram_get_lineno (void);
53FILE *gram_get_in (void);
54FILE *gram_get_out (void);
55int gram_get_leng (void);
56char *gram_get_text (void);
57void gram_set_lineno (int);
58void gram_set_in (FILE *);
59void gram_set_out (FILE *);
60int gram_get_debug (void);
61void gram_set_debug (int);
62int gram_lex_destroy (void);
63
3f2d73f1
PE
64/* Location of scanner cursor. */
65boundary scanner_cursor;
41141c56 66
223ff46e 67static void adjust_location (location *, char const *, size_t);
3f2d73f1 68#define YY_USER_ACTION adjust_location (loc, yytext, yyleng);
d8d3f94a 69
6c30d641 70static size_t no_cr_read (FILE *, char *, size_t);
d8d3f94a
PE
71#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
72
73
223ff46e 74/* OBSTACK_FOR_STRING -- Used to store all the characters that we need to
44995b2e
AD
75 keep (to construct ID, STRINGS etc.). Use the following macros to
76 use it.
77
41141c56
PE
78 Use STRING_GROW to append what has just been matched, and
79 STRING_FINISH to end the string (it puts the ending 0).
80 STRING_FINISH also stores this string in LAST_STRING, which can be
81 used, and which is used by STRING_FREE to free the last string. */
44995b2e 82
223ff46e 83static struct obstack obstack_for_string;
44995b2e 84
7ec2d4cd
AD
85/* A string representing the most recently saved token. */
86static char *last_string;
87
88
41141c56 89#define STRING_GROW \
223ff46e 90 obstack_grow (&obstack_for_string, yytext, yyleng)
44995b2e 91
41141c56 92#define STRING_FINISH \
44995b2e 93 do { \
223ff46e
PE
94 obstack_1grow (&obstack_for_string, '\0'); \
95 last_string = obstack_finish (&obstack_for_string); \
44995b2e
AD
96 } while (0)
97
41141c56 98#define STRING_FREE \
223ff46e 99 obstack_free (&obstack_for_string, last_string)
e9955c83 100
7ec2d4cd
AD
101void
102scanner_last_string_free (void)
103{
41141c56 104 STRING_FREE;
7ec2d4cd 105}
e9955c83 106
efcb44dd
PE
107/* Within well-formed rules, RULE_LENGTH is the number of values in
108 the current rule so far, which says where to find `$0' with respect
109 to the top of the stack. It is not the same as the rule->length in
110 the case of mid rule actions.
111
112 Outside of well-formed rules, RULE_LENGTH has an undefined value. */
113static int rule_length;
114
624a35e2
PE
115static void handle_dollar (int token_type, char *cp, location loc);
116static void handle_at (int token_type, char *cp, location loc);
3f2d73f1 117static void handle_syncline (char *args);
1452af69 118static unsigned long int scan_integer (char const *p, int base, location loc);
d8d3f94a 119static int convert_ucn_to_byte (char const *hex_text);
aa418041 120static void unexpected_eof (boundary, char const *);
4febdd96 121static void unexpected_newline (boundary, char const *);
e9955c83
AD
122
123%}
d8d3f94a 124%x SC_COMMENT SC_LINE_COMMENT SC_YACC_COMMENT
e9955c83 125%x SC_STRING SC_CHARACTER
3f2d73f1 126%x SC_AFTER_IDENTIFIER
e9955c83 127%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
624a35e2 128%x SC_PRE_CODE SC_BRACED_CODE SC_PROLOGUE SC_EPILOGUE
e9955c83 129
29c01725
AD
130letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
131id {letter}({letter}|[0-9])*
132directive %{letter}({letter}|[0-9]|-)*
624a35e2 133int [0-9]+
d8d3f94a
PE
134
135/* POSIX says that a tag must be both an id and a C union member, but
136 historically almost any character is allowed in a tag. We disallow
137 NUL and newline, as this simplifies our implementation. */
138tag [^\0\n>]+
139
140/* Zero or more instances of backslash-newline. Following GCC, allow
141 white space between the backslash and the newline. */
142splice (\\[ \f\t\v]*\n)*
e9955c83
AD
143
144%%
145%{
a706a1cc 146 /* Nesting level of the current code in braces. */
1a9e39f1
PE
147 int braces_level IF_LINT (= 0);
148
3f2d73f1
PE
149 /* Parent context state, when applicable. */
150 int context_state IF_LINT (= 0);
a706a1cc 151
624a35e2
PE
152 /* Token type to return, when applicable. */
153 int token_type IF_LINT (= 0);
154
3f2d73f1 155 /* Location of most recent identifier, when applicable. */
a2bc9dbc 156 location id_loc IF_LINT (= empty_location);
3f2d73f1 157
a2bc9dbc
PE
158 /* Where containing code started, when applicable. Its initial
159 value is relevant only when yylex is invoked in the SC_EPILOGUE
160 start condition. */
161 boundary code_start = scanner_cursor;
3f2d73f1 162
223ff46e
PE
163 /* Where containing comment or string or character literal started,
164 when applicable. */
a2bc9dbc 165 boundary token_start IF_LINT (= scanner_cursor);
e9955c83
AD
166%}
167
168
3f2d73f1
PE
169 /*-----------------------.
170 | Scanning white space. |
171 `-----------------------*/
172
624a35e2 173<INITIAL,SC_AFTER_IDENTIFIER,SC_PRE_CODE>
3f2d73f1 174{
4febdd96 175 /* Comments and white space. */
83adb046 176 "," warn_at (*loc, _("stray `,' treated as white space"));
4febdd96 177 [ \f\n\t\v] |
3f2d73f1 178 "//".* ;
83adb046
PE
179 "/*" {
180 token_start = loc->start;
181 context_state = YY_START;
182 BEGIN SC_YACC_COMMENT;
183 }
3f2d73f1
PE
184
185 /* #line directives are not documented, and may be withdrawn or
186 modified in future versions of Bison. */
187 ^"#line "{int}" \"".*"\"\n" {
188 handle_syncline (yytext + sizeof "#line " - 1);
189 }
190}
191
192
e9955c83
AD
193 /*----------------------------.
194 | Scanning Bison directives. |
195 `----------------------------*/
196<INITIAL>
197{
198 "%binary" return PERCENT_NONASSOC;
199 "%debug" return PERCENT_DEBUG;
39a06c25 200 "%default"[-_]"prec" return PERCENT_DEFAULT_PREC;
e9955c83
AD
201 "%define" return PERCENT_DEFINE;
202 "%defines" return PERCENT_DEFINES;
624a35e2 203 "%destructor" token_type = PERCENT_DESTRUCTOR; BEGIN SC_PRE_CODE;
676385e2 204 "%dprec" return PERCENT_DPREC;
e9955c83
AD
205 "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
206 "%expect" return PERCENT_EXPECT;
d6328241 207 "%expect"[-_]"rr" return PERCENT_EXPECT_RR;
e9955c83
AD
208 "%file-prefix" return PERCENT_FILE_PREFIX;
209 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
cd3684cf 210 "%initial-action" token_type = PERCENT_INITIAL_ACTION; BEGIN SC_PRE_CODE;
ae7453f2 211 "%glr-parser" return PERCENT_GLR_PARSER;
e9955c83 212 "%left" return PERCENT_LEFT;
624a35e2 213 "%lex-param" token_type = PERCENT_LEX_PARAM; BEGIN SC_PRE_CODE;
e9955c83 214 "%locations" return PERCENT_LOCATIONS;
676385e2 215 "%merge" return PERCENT_MERGE;
e9955c83 216 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
22fccf95 217 "%no"[-_]"default"[-_]"prec" return PERCENT_NO_DEFAULT_PREC;
e9955c83
AD
218 "%no"[-_]"lines" return PERCENT_NO_LINES;
219 "%nonassoc" return PERCENT_NONASSOC;
916708d5 220 "%nondeterministic-parser" return PERCENT_NONDETERMINISTIC_PARSER;
e9955c83
AD
221 "%nterm" return PERCENT_NTERM;
222 "%output" return PERCENT_OUTPUT;
624a35e2 223 "%parse-param" token_type = PERCENT_PARSE_PARAM; BEGIN SC_PRE_CODE;
d8d3f94a 224 "%prec" rule_length--; return PERCENT_PREC;
624a35e2 225 "%printer" token_type = PERCENT_PRINTER; BEGIN SC_PRE_CODE;
e9955c83 226 "%pure"[-_]"parser" return PERCENT_PURE_PARSER;
b50d2359 227 "%require" return PERCENT_REQUIRE;
e9955c83
AD
228 "%right" return PERCENT_RIGHT;
229 "%skeleton" return PERCENT_SKELETON;
230 "%start" return PERCENT_START;
231 "%term" return PERCENT_TOKEN;
232 "%token" return PERCENT_TOKEN;
233 "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
234 "%type" return PERCENT_TYPE;
624a35e2 235 "%union" token_type = PERCENT_UNION; BEGIN SC_PRE_CODE;
e9955c83
AD
236 "%verbose" return PERCENT_VERBOSE;
237 "%yacc" return PERCENT_YACC;
238
3f2d73f1 239 {directive} {
41141c56 240 complain_at (*loc, _("invalid directive: %s"), quote (yytext));
412f8a59 241 }
900c5db5 242
e9955c83 243 "=" return EQUAL;
d8d3f94a 244 "|" rule_length = 0; return PIPE;
e9955c83
AD
245 ";" return SEMICOLON;
246
3f2d73f1 247 {id} {
41141c56 248 val->symbol = symbol_get (yytext, *loc);
3f2d73f1 249 id_loc = *loc;
efcb44dd 250 rule_length++;
3f2d73f1 251 BEGIN SC_AFTER_IDENTIFIER;
e9955c83
AD
252 }
253
d8d3f94a 254 {int} {
1452af69
PE
255 val->integer = scan_integer (yytext, 10, *loc);
256 return INT;
257 }
258 0[xX][0-9abcdefABCDEF]+ {
259 val->integer = scan_integer (yytext, 16, *loc);
d8d3f94a
PE
260 return INT;
261 }
e9955c83
AD
262
263 /* Characters. We don't check there is only one. */
3f2d73f1 264 "'" STRING_GROW; token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
e9955c83
AD
265
266 /* Strings. */
ca407bdf 267 "\"" token_start = loc->start; BEGIN SC_ESCAPED_STRING;
e9955c83
AD
268
269 /* Prologue. */
3f2d73f1 270 "%{" code_start = loc->start; BEGIN SC_PROLOGUE;
e9955c83
AD
271
272 /* Code in between braces. */
3f2d73f1
PE
273 "{" {
274 STRING_GROW;
624a35e2 275 token_type = BRACED_CODE;
3f2d73f1
PE
276 braces_level = 0;
277 code_start = loc->start;
278 BEGIN SC_BRACED_CODE;
279 }
e9955c83
AD
280
281 /* A type. */
d8d3f94a 282 "<"{tag}">" {
223ff46e 283 obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
41141c56 284 STRING_FINISH;
223ff46e 285 val->uniqstr = uniqstr_new (last_string);
41141c56 286 STRING_FREE;
4cdb01db
AD
287 return TYPE;
288 }
289
a706a1cc
PE
290 "%%" {
291 static int percent_percent_count;
e9955c83 292 if (++percent_percent_count == 2)
a2bc9dbc 293 BEGIN SC_EPILOGUE;
e9955c83
AD
294 return PERCENT_PERCENT;
295 }
296
a706a1cc 297 . {
41141c56 298 complain_at (*loc, _("invalid character: %s"), quote (yytext));
3f2d73f1 299 }
379f0ac8
PE
300
301 <<EOF>> {
302 loc->start = loc->end = scanner_cursor;
303 yyterminate ();
304 }
3f2d73f1
PE
305}
306
307
308 /*-----------------------------------------------------------------.
309 | Scanning after an identifier, checking whether a colon is next. |
310 `-----------------------------------------------------------------*/
311
312<SC_AFTER_IDENTIFIER>
313{
314 ":" {
315 rule_length = 0;
316 *loc = id_loc;
317 BEGIN INITIAL;
318 return ID_COLON;
319 }
320 . {
321 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
322 yyless (0);
323 *loc = id_loc;
324 BEGIN INITIAL;
325 return ID;
326 }
327 <<EOF>> {
328 *loc = id_loc;
329 BEGIN INITIAL;
330 return ID;
e9955c83
AD
331 }
332}
333
334
d8d3f94a
PE
335 /*---------------------------------------------------------------.
336 | Scanning a Yacc comment. The initial `/ *' is already eaten. |
337 `---------------------------------------------------------------*/
e9955c83 338
d8d3f94a 339<SC_YACC_COMMENT>
e9955c83 340{
3f2d73f1 341 "*/" BEGIN context_state;
a706a1cc 342 .|\n ;
aa418041 343 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
d8d3f94a
PE
344}
345
346
347 /*------------------------------------------------------------.
348 | Scanning a C comment. The initial `/ *' is already eaten. |
349 `------------------------------------------------------------*/
350
351<SC_COMMENT>
352{
3f2d73f1 353 "*"{splice}"/" STRING_GROW; BEGIN context_state;
aa418041 354 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
e9955c83
AD
355}
356
357
d8d3f94a
PE
358 /*--------------------------------------------------------------.
359 | Scanning a line comment. The initial `//' is already eaten. |
360 `--------------------------------------------------------------*/
361
362<SC_LINE_COMMENT>
363{
3f2d73f1 364 "\n" STRING_GROW; BEGIN context_state;
41141c56 365 {splice} STRING_GROW;
3f2d73f1 366 <<EOF>> BEGIN context_state;
d8d3f94a
PE
367}
368
369
4febdd96
PE
370 /*------------------------------------------------.
371 | Scanning a Bison string, including its escapes. |
372 | The initial quote is already eaten. |
373 `------------------------------------------------*/
e9955c83
AD
374
375<SC_ESCAPED_STRING>
376{
db2cc12f 377 "\"" {
41141c56 378 STRING_FINISH;
3f2d73f1 379 loc->start = token_start;
223ff46e 380 val->chars = last_string;
efcb44dd 381 rule_length++;
a706a1cc 382 BEGIN INITIAL;
e9955c83
AD
383 return STRING;
384 }
4febdd96
PE
385 \n unexpected_newline (token_start, "\""); BEGIN INITIAL;
386 <<EOF>> unexpected_eof (token_start, "\""); BEGIN INITIAL;
e9955c83
AD
387}
388
4febdd96
PE
389 /*----------------------------------------------------------.
390 | Scanning a Bison character literal, decoding its escapes. |
391 | The initial quote is already eaten. |
392 `----------------------------------------------------------*/
e9955c83
AD
393
394<SC_ESCAPED_CHARACTER>
395{
db2cc12f 396 "'" {
3b1e470c 397 unsigned char last_string_1;
41141c56
PE
398 STRING_GROW;
399 STRING_FINISH;
3f2d73f1 400 loc->start = token_start;
ca407bdf
PE
401 val->symbol = symbol_get (quotearg_style (escape_quoting_style,
402 last_string),
403 *loc);
41141c56 404 symbol_class_set (val->symbol, token_sym, *loc);
3b1e470c
PE
405 last_string_1 = last_string[1];
406 symbol_user_token_number_set (val->symbol, last_string_1, *loc);
41141c56 407 STRING_FREE;
a706a1cc
PE
408 rule_length++;
409 BEGIN INITIAL;
410 return ID;
e9955c83 411 }
4febdd96
PE
412 \n unexpected_newline (token_start, "'"); BEGIN INITIAL;
413 <<EOF>> unexpected_eof (token_start, "'"); BEGIN INITIAL;
414}
a706a1cc 415
4febdd96
PE
416<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING>
417{
92ac3705 418 \0 complain_at (*loc, _("invalid null character"));
e9955c83
AD
419}
420
421
422 /*----------------------------.
423 | Decode escaped characters. |
424 `----------------------------*/
425
426<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
427{
d8d3f94a 428 \\[0-7]{1,3} {
1452af69 429 unsigned long int c = strtoul (yytext + 1, 0, 8);
d8d3f94a 430 if (UCHAR_MAX < c)
3f2d73f1 431 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
05ac60f3 432 else if (! c)
92ac3705 433 complain_at (*loc, _("invalid null character: %s"), quote (yytext));
e9955c83 434 else
223ff46e 435 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
436 }
437
6b0d38ab 438 \\x[0-9abcdefABCDEF]+ {
1452af69 439 unsigned long int c;
223ff46e 440 set_errno (0);
d8d3f94a 441 c = strtoul (yytext + 2, 0, 16);
223ff46e 442 if (UCHAR_MAX < c || get_errno ())
3f2d73f1 443 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
92ac3705
PE
444 else if (! c)
445 complain_at (*loc, _("invalid null character: %s"), quote (yytext));
d8d3f94a 446 else
223ff46e 447 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
448 }
449
223ff46e
PE
450 \\a obstack_1grow (&obstack_for_string, '\a');
451 \\b obstack_1grow (&obstack_for_string, '\b');
452 \\f obstack_1grow (&obstack_for_string, '\f');
453 \\n obstack_1grow (&obstack_for_string, '\n');
454 \\r obstack_1grow (&obstack_for_string, '\r');
455 \\t obstack_1grow (&obstack_for_string, '\t');
456 \\v obstack_1grow (&obstack_for_string, '\v');
412f8a59
PE
457
458 /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
223ff46e 459 \\("\""|"'"|"?"|"\\") obstack_1grow (&obstack_for_string, yytext[1]);
412f8a59 460
6b0d38ab 461 \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
d8d3f94a
PE
462 int c = convert_ucn_to_byte (yytext);
463 if (c < 0)
3f2d73f1 464 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
92ac3705
PE
465 else if (! c)
466 complain_at (*loc, _("invalid null character: %s"), quote (yytext));
d8d3f94a 467 else
223ff46e 468 obstack_1grow (&obstack_for_string, c);
d8d3f94a 469 }
4f25ebb0 470 \\(.|\n) {
3f2d73f1 471 complain_at (*loc, _("unrecognized escape sequence: %s"), quote (yytext));
41141c56 472 STRING_GROW;
e9955c83
AD
473 }
474}
475
4febdd96
PE
476 /*--------------------------------------------.
477 | Scanning user-code characters and strings. |
478 `--------------------------------------------*/
e9955c83 479
4febdd96
PE
480<SC_CHARACTER,SC_STRING>
481{
482 {splice}|\\{splice}[^\n$@\[\]] STRING_GROW;
483}
e9955c83
AD
484
485<SC_CHARACTER>
486{
4febdd96
PE
487 "'" STRING_GROW; BEGIN context_state;
488 \n unexpected_newline (token_start, "'"); BEGIN context_state;
489 <<EOF>> unexpected_eof (token_start, "'"); BEGIN context_state;
e9955c83
AD
490}
491
e9955c83
AD
492<SC_STRING>
493{
4febdd96
PE
494 "\"" STRING_GROW; BEGIN context_state;
495 \n unexpected_newline (token_start, "\""); BEGIN context_state;
496 <<EOF>> unexpected_eof (token_start, "\""); BEGIN context_state;
e9955c83
AD
497}
498
499
500 /*---------------------------------------------------.
501 | Strings, comments etc. can be found in user code. |
502 `---------------------------------------------------*/
503
504<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
505{
3f2d73f1
PE
506 "'" {
507 STRING_GROW;
508 context_state = YY_START;
509 token_start = loc->start;
510 BEGIN SC_CHARACTER;
511 }
512 "\"" {
513 STRING_GROW;
514 context_state = YY_START;
515 token_start = loc->start;
516 BEGIN SC_STRING;
517 }
518 "/"{splice}"*" {
519 STRING_GROW;
520 context_state = YY_START;
521 token_start = loc->start;
522 BEGIN SC_COMMENT;
523 }
524 "/"{splice}"/" {
525 STRING_GROW;
526 context_state = YY_START;
527 BEGIN SC_LINE_COMMENT;
528 }
e9955c83
AD
529}
530
531
624a35e2
PE
532 /*---------------------------------------------------------------.
533 | Scanning after %union etc., possibly followed by white space. |
534 | For %union only, allow arbitrary C code to appear before the |
535 | following brace, as an extension to POSIX. |
536 `---------------------------------------------------------------*/
537
538<SC_PRE_CODE>
539{
540 . {
541 bool valid = yytext[0] == '{' || token_type == PERCENT_UNION;
542 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
543 yyless (0);
544
545 if (valid)
546 {
547 braces_level = -1;
548 code_start = loc->start;
549 BEGIN SC_BRACED_CODE;
550 }
551 else
552 {
6d07bacf 553 complain_at (*loc, _("missing `{' in %s"),
624a35e2
PE
554 token_name (token_type));
555 obstack_sgrow (&obstack_for_string, "{}");
556 STRING_FINISH;
557 val->chars = last_string;
558 BEGIN INITIAL;
559 return token_type;
560 }
561 }
379f0ac8 562
aa418041 563 <<EOF>> unexpected_eof (scanner_cursor, "{}"); BEGIN INITIAL;
624a35e2
PE
564}
565
566
e9955c83
AD
567 /*---------------------------------------------------------------.
568 | Scanning some code in braces (%union and actions). The initial |
569 | "{" is already eaten. |
570 `---------------------------------------------------------------*/
571
572<SC_BRACED_CODE>
573{
41141c56
PE
574 "{"|"<"{splice}"%" STRING_GROW; braces_level++;
575 "%"{splice}">" STRING_GROW; braces_level--;
e9955c83 576 "}" {
25522739
PE
577 bool outer_brace = --braces_level < 0;
578
579 /* As an undocumented Bison extension, append `;' before the last
580 brace in braced code, so that the user code can omit trailing
581 `;'. But do not append `;' if emulating Yacc, since Yacc does
582 not append one.
583
584 FIXME: Bison should warn if a semicolon seems to be necessary
585 here, and should omit the semicolon if it seems unnecessary
586 (e.g., after ';', '{', or '}', each followed by comments or
587 white space). Such a warning shouldn't depend on --yacc; it
588 should depend on a new --pedantic option, which would cause
589 Bison to warn if it detects an extension to POSIX. --pedantic
590 should also diagnose other Bison extensions like %yacc.
591 Perhaps there should also be a GCC-style --pedantic-errors
592 option, so that such warnings are diagnosed as errors. */
1deb9bdc 593 if (outer_brace && token_type == BRACED_CODE && ! yacc_flag)
25522739
PE
594 obstack_1grow (&obstack_for_string, ';');
595
596 obstack_1grow (&obstack_for_string, '}');
597
598 if (outer_brace)
e9955c83 599 {
41141c56 600 STRING_FINISH;
624a35e2 601 rule_length++;
3f2d73f1 602 loc->start = code_start;
223ff46e 603 val->chars = last_string;
a706a1cc 604 BEGIN INITIAL;
624a35e2 605 return token_type;
e9955c83
AD
606 }
607 }
608
a706a1cc
PE
609 /* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly
610 (as `<' `<%'). */
41141c56 611 "<"{splice}"<" STRING_GROW;
a706a1cc 612
624a35e2
PE
613 "$"("<"{tag}">")?(-?[0-9]+|"$") handle_dollar (token_type, yytext, *loc);
614 "@"(-?[0-9]+|"$") handle_at (token_type, yytext, *loc);
e9955c83 615
aa418041 616 <<EOF>> unexpected_eof (code_start, "}"); BEGIN INITIAL;
e9955c83
AD
617}
618
619
620 /*--------------------------------------------------------------.
621 | Scanning some prologue: from "%{" (already scanned) to "%}". |
622 `--------------------------------------------------------------*/
623
624<SC_PROLOGUE>
625{
626 "%}" {
41141c56 627 STRING_FINISH;
3f2d73f1 628 loc->start = code_start;
223ff46e 629 val->chars = last_string;
a706a1cc 630 BEGIN INITIAL;
e9955c83
AD
631 return PROLOGUE;
632 }
633
aa418041 634 <<EOF>> unexpected_eof (code_start, "%}"); BEGIN INITIAL;
e9955c83
AD
635}
636
637
638 /*---------------------------------------------------------------.
639 | Scanning the epilogue (everything after the second "%%", which |
d8d3f94a 640 | has already been eaten). |
e9955c83
AD
641 `---------------------------------------------------------------*/
642
643<SC_EPILOGUE>
644{
e9955c83 645 <<EOF>> {
41141c56 646 STRING_FINISH;
3f2d73f1 647 loc->start = code_start;
223ff46e 648 val->chars = last_string;
a706a1cc 649 BEGIN INITIAL;
e9955c83
AD
650 return EPILOGUE;
651 }
652}
653
654
4febdd96
PE
655 /*-----------------------------------------.
656 | Escape M4 quoting characters in C code. |
657 `-----------------------------------------*/
a706a1cc
PE
658
659<SC_COMMENT,SC_LINE_COMMENT,SC_STRING,SC_CHARACTER,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
660{
223ff46e
PE
661 \$ obstack_sgrow (&obstack_for_string, "$][");
662 \@ obstack_sgrow (&obstack_for_string, "@@");
663 \[ obstack_sgrow (&obstack_for_string, "@{");
664 \] obstack_sgrow (&obstack_for_string, "@}");
a706a1cc
PE
665}
666
667
4febdd96
PE
668 /*-----------------------------------------------------.
669 | By default, grow the string obstack with the input. |
670 `-----------------------------------------------------*/
671
672<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>. |
673<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>\n STRING_GROW;
674
e9955c83
AD
675%%
676
cd3684cf
AD
677/* Keeps track of the maximum number of semantic values to the left of
678 a handle (those referenced by $0, $-1, etc.) are required by the
25005f6a
PH
679 semantic actions of this grammar. */
680int max_left_semantic_context = 0;
681
3f2d73f1
PE
682/* Set *LOC and adjust scanner cursor to account for token TOKEN of
683 size SIZE. */
6c30d641
PE
684
685static void
223ff46e 686adjust_location (location *loc, char const *token, size_t size)
6c30d641 687{
3f2d73f1
PE
688 int line = scanner_cursor.line;
689 int column = scanner_cursor.column;
6c30d641
PE
690 char const *p0 = token;
691 char const *p = token;
692 char const *lim = token + size;
693
3f2d73f1
PE
694 loc->start = scanner_cursor;
695
6c30d641
PE
696 for (p = token; p < lim; p++)
697 switch (*p)
698 {
6c30d641
PE
699 case '\n':
700 line++;
701 column = 1;
702 p0 = p + 1;
703 break;
704
705 case '\t':
706 column += mbsnwidth (p0, p - p0, 0);
707 column += 8 - ((column - 1) & 7);
708 p0 = p + 1;
709 break;
710 }
711
3f2d73f1
PE
712 scanner_cursor.line = line;
713 scanner_cursor.column = column + mbsnwidth (p0, p - p0, 0);
714
715 loc->end = scanner_cursor;
6c30d641
PE
716}
717
718
719/* Read bytes from FP into buffer BUF of size SIZE. Return the
720 number of bytes read. Remove '\r' from input, treating \r\n
721 and isolated \r as \n. */
722
723static size_t
724no_cr_read (FILE *fp, char *buf, size_t size)
725{
a737b216
PE
726 size_t bytes_read = fread (buf, 1, size, fp);
727 if (bytes_read)
6c30d641 728 {
a737b216 729 char *w = memchr (buf, '\r', bytes_read);
6c30d641
PE
730 if (w)
731 {
732 char const *r = ++w;
a737b216 733 char const *lim = buf + bytes_read;
6c30d641
PE
734
735 for (;;)
736 {
737 /* Found an '\r'. Treat it like '\n', but ignore any
738 '\n' that immediately follows. */
739 w[-1] = '\n';
740 if (r == lim)
741 {
742 int ch = getc (fp);
743 if (ch != '\n' && ungetc (ch, fp) != ch)
744 break;
745 }
746 else if (*r == '\n')
747 r++;
748
749 /* Copy until the next '\r'. */
750 do
751 {
752 if (r == lim)
753 return w - buf;
754 }
755 while ((*w++ = *r++) != '\r');
756 }
757
758 return w - buf;
759 }
760 }
761
a737b216 762 return bytes_read;
6c30d641
PE
763}
764
765
e9955c83 766/*------------------------------------------------------------------.
366eea36 767| TEXT is pointing to a wannabee semantic value (i.e., a `$'). |
e9955c83
AD
768| |
769| Possible inputs: $[<TYPENAME>]($|integer) |
770| |
223ff46e 771| Output to OBSTACK_FOR_STRING a reference to this semantic value. |
e9955c83
AD
772`------------------------------------------------------------------*/
773
624a35e2 774static inline bool
223ff46e 775handle_action_dollar (char *text, location loc)
e9955c83
AD
776{
777 const char *type_name = NULL;
366eea36 778 char *cp = text + 1;
e9955c83 779
624a35e2
PE
780 if (! current_rule)
781 return false;
782
e9955c83
AD
783 /* Get the type name if explicit. */
784 if (*cp == '<')
785 {
786 type_name = ++cp;
787 while (*cp != '>')
788 ++cp;
789 *cp = '\0';
790 ++cp;
791 }
792
793 if (*cp == '$')
794 {
795 if (!type_name)
223ff46e 796 type_name = symbol_list_n_type_name_get (current_rule, loc, 0);
e9955c83 797 if (!type_name && typed)
223ff46e 798 complain_at (loc, _("$$ of `%s' has no declared type"),
97650f4e 799 current_rule->sym->tag);
e9955c83
AD
800 if (!type_name)
801 type_name = "";
223ff46e 802 obstack_fgrow1 (&obstack_for_string,
e9955c83
AD
803 "]b4_lhs_value([%s])[", type_name);
804 }
d8d3f94a 805 else
e9955c83 806 {
1452af69 807 long int num;
223ff46e 808 set_errno (0);
d8d3f94a 809 num = strtol (cp, 0, 10);
e9955c83 810
223ff46e 811 if (INT_MIN <= num && num <= rule_length && ! get_errno ())
e9955c83 812 {
d8d3f94a 813 int n = num;
25005f6a
PH
814 if (1-n > max_left_semantic_context)
815 max_left_semantic_context = 1-n;
e9955c83 816 if (!type_name && n > 0)
223ff46e 817 type_name = symbol_list_n_type_name_get (current_rule, loc, n);
e9955c83 818 if (!type_name && typed)
223ff46e
PE
819 complain_at (loc, _("$%d of `%s' has no declared type"),
820 n, current_rule->sym->tag);
e9955c83
AD
821 if (!type_name)
822 type_name = "";
223ff46e 823 obstack_fgrow3 (&obstack_for_string,
05ac60f3 824 "]b4_rhs_value(%d, %d, [%s])[",
e9955c83
AD
825 rule_length, n, type_name);
826 }
d8d3f94a 827 else
223ff46e 828 complain_at (loc, _("integer out of range: %s"), quote (text));
9280d3ef 829 }
9280d3ef 830
624a35e2 831 return true;
e9955c83
AD
832}
833
f25bfb75 834
cd3684cf
AD
835/*----------------------------------------------------------------.
836| Map `$?' onto the proper M4 symbol, depending on its TOKEN_TYPE |
837| (are we in an action?). |
838`----------------------------------------------------------------*/
e9955c83
AD
839
840static void
624a35e2 841handle_dollar (int token_type, char *text, location loc)
f25bfb75 842{
624a35e2 843 switch (token_type)
f25bfb75 844 {
624a35e2
PE
845 case BRACED_CODE:
846 if (handle_action_dollar (text, loc))
847 return;
f25bfb75
AD
848 break;
849
624a35e2 850 case PERCENT_DESTRUCTOR:
cd3684cf 851 case PERCENT_INITIAL_ACTION:
624a35e2
PE
852 case PERCENT_PRINTER:
853 if (text[1] == '$')
854 {
855 obstack_sgrow (&obstack_for_string, "]b4_dollar_dollar[");
856 return;
857 }
858 break;
859
860 default:
f25bfb75
AD
861 break;
862 }
624a35e2
PE
863
864 complain_at (loc, _("invalid value: %s"), quote (text));
f25bfb75
AD
865}
866
867
868/*------------------------------------------------------.
869| TEXT is a location token (i.e., a `@...'). Output to |
223ff46e 870| OBSTACK_FOR_STRING a reference to this location. |
f25bfb75
AD
871`------------------------------------------------------*/
872
624a35e2 873static inline bool
223ff46e 874handle_action_at (char *text, location loc)
e9955c83 875{
366eea36 876 char *cp = text + 1;
d0829076 877 locations_flag = true;
e9955c83 878
624a35e2
PE
879 if (! current_rule)
880 return false;
881
366eea36 882 if (*cp == '$')
624a35e2 883 obstack_sgrow (&obstack_for_string, "]b4_lhs_location[");
d8d3f94a 884 else
e9955c83 885 {
1452af69 886 long int num;
223ff46e 887 set_errno (0);
d8d3f94a 888 num = strtol (cp, 0, 10);
dafdc66f 889
223ff46e 890 if (INT_MIN <= num && num <= rule_length && ! get_errno ())
d8d3f94a
PE
891 {
892 int n = num;
05ac60f3 893 obstack_fgrow2 (&obstack_for_string, "]b4_rhs_location(%d, %d)[",
d8d3f94a
PE
894 rule_length, n);
895 }
e9955c83 896 else
223ff46e 897 complain_at (loc, _("integer out of range: %s"), quote (text));
f25bfb75 898 }
f25bfb75 899
624a35e2 900 return true;
e9955c83 901}
4cdb01db 902
f25bfb75 903
cd3684cf
AD
904/*----------------------------------------------------------------.
905| Map `@?' onto the proper M4 symbol, depending on its TOKEN_TYPE |
906| (are we in an action?). |
907`----------------------------------------------------------------*/
f25bfb75
AD
908
909static void
624a35e2 910handle_at (int token_type, char *text, location loc)
f25bfb75 911{
624a35e2 912 switch (token_type)
f25bfb75 913 {
624a35e2 914 case BRACED_CODE:
223ff46e 915 handle_action_at (text, loc);
624a35e2
PE
916 return;
917
cd3684cf 918 case PERCENT_INITIAL_ACTION:
624a35e2
PE
919 case PERCENT_DESTRUCTOR:
920 case PERCENT_PRINTER:
921 if (text[1] == '$')
922 {
923 obstack_sgrow (&obstack_for_string, "]b4_at_dollar[");
924 return;
925 }
f25bfb75
AD
926 break;
927
624a35e2 928 default:
f25bfb75
AD
929 break;
930 }
624a35e2
PE
931
932 complain_at (loc, _("invalid value: %s"), quote (text));
f25bfb75
AD
933}
934
935
1452af69
PE
936/*------------------------------------------------------.
937| Scan NUMBER for a base-BASE integer at location LOC. |
938`------------------------------------------------------*/
939
940static unsigned long int
941scan_integer (char const *number, int base, location loc)
942{
943 unsigned long int num;
944 set_errno (0);
945 num = strtoul (number, 0, base);
946 if (INT_MAX < num || get_errno ())
947 {
948 complain_at (loc, _("integer out of range: %s"), quote (number));
949 num = INT_MAX;
950 }
951 return num;
952}
953
954
d8d3f94a
PE
955/*------------------------------------------------------------------.
956| Convert universal character name UCN to a single-byte character, |
957| and return that character. Return -1 if UCN does not correspond |
958| to a single-byte character. |
959`------------------------------------------------------------------*/
960
961static int
962convert_ucn_to_byte (char const *ucn)
963{
1452af69 964 unsigned long int code = strtoul (ucn + 2, 0, 16);
d8d3f94a
PE
965
966 /* FIXME: Currently we assume Unicode-compatible unibyte characters
967 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
968 non-ASCII hosts we support only the portable C character set.
969 These limitations should be removed once we add support for
970 multibyte characters. */
971
972 if (UCHAR_MAX < code)
973 return -1;
974
975#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
976 {
977 /* A non-ASCII host. Use CODE to index into a table of the C
978 basic execution character set, which is guaranteed to exist on
979 all Standard C platforms. This table also includes '$', '@',
8e6ef483 980 and '`', which are not in the basic execution character set but
d8d3f94a
PE
981 which are unibyte characters on all the platforms that we know
982 about. */
983 static signed char const table[] =
984 {
985 '\0', -1, -1, -1, -1, -1, -1, '\a',
986 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
987 -1, -1, -1, -1, -1, -1, -1, -1,
988 -1, -1, -1, -1, -1, -1, -1, -1,
989 ' ', '!', '"', '#', '$', '%', '&', '\'',
990 '(', ')', '*', '+', ',', '-', '.', '/',
991 '0', '1', '2', '3', '4', '5', '6', '7',
992 '8', '9', ':', ';', '<', '=', '>', '?',
993 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
994 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
995 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
996 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
997 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
998 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
999 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
1000 'x', 'y', 'z', '{', '|', '}', '~'
1001 };
1002
1003 code = code < sizeof table ? table[code] : -1;
1004 }
1005#endif
c4d720cd 1006
d8d3f94a
PE
1007 return code;
1008}
1009
1010
900c5db5
AD
1011/*----------------------------------------------------------------.
1012| Handle `#line INT "FILE"'. ARGS has already skipped `#line '. |
1013`----------------------------------------------------------------*/
1014
1015static void
3f2d73f1 1016handle_syncline (char *args)
900c5db5
AD
1017{
1018 int lineno = strtol (args, &args, 10);
1019 const char *file = NULL;
1020 file = strchr (args, '"') + 1;
1021 *strchr (file, '"') = 0;
dca81a78 1022 scanner_cursor.file = current_file = uniqstr_new (file);
3f2d73f1
PE
1023 scanner_cursor.line = lineno;
1024 scanner_cursor.column = 1;
900c5db5
AD
1025}
1026
a706a1cc 1027
4febdd96
PE
1028/*----------------------------------------------------------------.
1029| For a token or comment starting at START, report message MSGID, |
1030| which should say that an end marker was found before |
1031| the expected TOKEN_END. |
1032`----------------------------------------------------------------*/
1033
1034static void
1035unexpected_end (boundary start, char const *msgid, char const *token_end)
1036{
1037 location loc;
1038 loc.start = start;
1039 loc.end = scanner_cursor;
1040 complain_at (loc, _(msgid), token_end);
1041}
1042
1043
3f2d73f1
PE
1044/*------------------------------------------------------------------------.
1045| Report an unexpected EOF in a token or comment starting at START. |
1046| An end of file was encountered and the expected TOKEN_END was missing. |
3f2d73f1 1047`------------------------------------------------------------------------*/
a706a1cc
PE
1048
1049static void
aa418041 1050unexpected_eof (boundary start, char const *token_end)
a706a1cc 1051{
4febdd96
PE
1052 unexpected_end (start, N_("missing `%s' at end of file"), token_end);
1053}
1054
1055
1056/*----------------------------------------.
1057| Likewise, but for unexpected newlines. |
1058`----------------------------------------*/
1059
1060static void
1061unexpected_newline (boundary start, char const *token_end)
1062{
1063 unexpected_end (start, N_("missing `%s' at end of line"), token_end);
a706a1cc
PE
1064}
1065
1066
f25bfb75
AD
1067/*-------------------------.
1068| Initialize the scanner. |
1069`-------------------------*/
1070
1d6412ad
AD
1071void
1072scanner_initialize (void)
1073{
223ff46e 1074 obstack_init (&obstack_for_string);
1d6412ad
AD
1075}
1076
1077
f25bfb75
AD
1078/*-----------------------------------------------.
1079| Free all the memory allocated to the scanner. |
1080`-----------------------------------------------*/
1081
4cdb01db
AD
1082void
1083scanner_free (void)
1084{
223ff46e 1085 obstack_free (&obstack_for_string, 0);
536545f3
AD
1086 /* Reclaim Flex's buffers. */
1087 yy_delete_buffer (YY_CURRENT_BUFFER);
4cdb01db 1088}