]> git.saurik.com Git - bison.git/blob - src/scan-gram.l
Warn about dubious constructions like "%token T T".
[bison.git] / src / scan-gram.l
1 /* Bison Grammar Scanner -*- C -*-
2
3 Copyright (C) 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
4
5 This file is part of Bison, the GNU Compiler Compiler.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
20 02110-1301 USA
21 */
22
23 %option debug nodefault nounput noyywrap never-interactive
24 %option prefix="gram_" outfile="lex.yy.c"
25
26 %{
27 /* Work around a bug in flex 2.5.31. See Debian bug 333231
28 <http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=333231>. */
29 #undef gram_wrap
30 #define gram_wrap() 1
31
32 #include "system.h"
33
34 #include <mbswidth.h>
35 #include <quote.h>
36
37 #include "complain.h"
38 #include "files.h"
39 #include "getargs.h"
40 #include "gram.h"
41 #include "quotearg.h"
42 #include "reader.h"
43 #include "verify.h"
44 #include "uniqstr.h"
45
46 #define YY_USER_INIT \
47 do \
48 { \
49 scanner_cursor.file = current_file; \
50 scanner_cursor.line = 1; \
51 scanner_cursor.column = 1; \
52 code_start = scanner_cursor; \
53 } \
54 while (0)
55
56 /* Pacify "gcc -Wmissing-prototypes" when flex 2.5.31 is used. */
57 int gram_get_lineno (void);
58 FILE *gram_get_in (void);
59 FILE *gram_get_out (void);
60 int gram_get_leng (void);
61 char *gram_get_text (void);
62 void gram_set_lineno (int);
63 void gram_set_in (FILE *);
64 void gram_set_out (FILE *);
65 int gram_get_debug (void);
66 void gram_set_debug (int);
67 int gram_lex_destroy (void);
68
69 /* Location of scanner cursor. */
70 boundary scanner_cursor;
71
72 static void adjust_location (location *, char const *, size_t);
73 #define YY_USER_ACTION adjust_location (loc, yytext, yyleng);
74
75 static size_t no_cr_read (FILE *, char *, size_t);
76 #define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
77
78
79 /* OBSTACK_FOR_STRING -- Used to store all the characters that we need to
80 keep (to construct ID, STRINGS etc.). Use the following macros to
81 use it.
82
83 Use STRING_GROW to append what has just been matched, and
84 STRING_FINISH to end the string (it puts the ending 0).
85 STRING_FINISH also stores this string in LAST_STRING, which can be
86 used, and which is used by STRING_FREE to free the last string. */
87
88 static struct obstack obstack_for_string;
89
90 /* A string representing the most recently saved token. */
91 static char *last_string;
92
93
94 #define STRING_GROW \
95 obstack_grow (&obstack_for_string, yytext, yyleng)
96
97 #define STRING_FINISH \
98 do { \
99 obstack_1grow (&obstack_for_string, '\0'); \
100 last_string = obstack_finish (&obstack_for_string); \
101 } while (0)
102
103 #define STRING_FREE \
104 obstack_free (&obstack_for_string, last_string)
105
106 void
107 scanner_last_string_free (void)
108 {
109 STRING_FREE;
110 }
111
112 /* Within well-formed rules, RULE_LENGTH is the number of values in
113 the current rule so far, which says where to find `$0' with respect
114 to the top of the stack. It is not the same as the rule->length in
115 the case of mid rule actions.
116
117 Outside of well-formed rules, RULE_LENGTH has an undefined value. */
118 static int rule_length;
119
120 static void rule_length_overflow (location) __attribute__ ((__noreturn__));
121
122 /* Increment the rule length by one, checking for overflow. */
123 static inline void
124 increment_rule_length (location loc)
125 {
126 rule_length++;
127
128 /* Don't allow rule_length == INT_MAX, since that might cause
129 confusion with strtol if INT_MAX == LONG_MAX. */
130 if (rule_length == INT_MAX)
131 rule_length_overflow (loc);
132 }
133
134 static void handle_dollar (int token_type, char *cp, location loc);
135 static void handle_at (int token_type, char *cp, location loc);
136 static void handle_syncline (char *, location);
137 static unsigned long int scan_integer (char const *p, int base, location loc);
138 static int convert_ucn_to_byte (char const *hex_text);
139 static void unexpected_eof (boundary, char const *);
140 static void unexpected_newline (boundary, char const *);
141
142 %}
143 %x SC_COMMENT SC_LINE_COMMENT SC_YACC_COMMENT
144 %x SC_STRING SC_CHARACTER
145 %x SC_AFTER_IDENTIFIER
146 %x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
147 %x SC_PRE_CODE SC_BRACED_CODE SC_PROLOGUE SC_EPILOGUE
148
149 letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
150 id {letter}({letter}|[0-9])*
151 directive %{letter}({letter}|[0-9]|-)*
152 int [0-9]+
153
154 /* POSIX says that a tag must be both an id and a C union member, but
155 historically almost any character is allowed in a tag. We disallow
156 NUL and newline, as this simplifies our implementation. */
157 tag [^\0\n>]+
158
159 /* Zero or more instances of backslash-newline. Following GCC, allow
160 white space between the backslash and the newline. */
161 splice (\\[ \f\t\v]*\n)*
162
163 %%
164 %{
165 /* Nesting level of the current code in braces. */
166 int braces_level IF_LINT (= 0);
167
168 /* Parent context state, when applicable. */
169 int context_state IF_LINT (= 0);
170
171 /* Token type to return, when applicable. */
172 int token_type IF_LINT (= 0);
173
174 /* Location of most recent identifier, when applicable. */
175 location id_loc IF_LINT (= empty_location);
176
177 /* Where containing code started, when applicable. Its initial
178 value is relevant only when yylex is invoked in the SC_EPILOGUE
179 start condition. */
180 boundary code_start = scanner_cursor;
181
182 /* Where containing comment or string or character literal started,
183 when applicable. */
184 boundary token_start IF_LINT (= scanner_cursor);
185 %}
186
187
188 /*-----------------------.
189 | Scanning white space. |
190 `-----------------------*/
191
192 <INITIAL,SC_AFTER_IDENTIFIER,SC_PRE_CODE>
193 {
194 /* Comments and white space. */
195 "," warn_at (*loc, _("stray `,' treated as white space"));
196 [ \f\n\t\v] |
197 "//".* ;
198 "/*" {
199 token_start = loc->start;
200 context_state = YY_START;
201 BEGIN SC_YACC_COMMENT;
202 }
203
204 /* #line directives are not documented, and may be withdrawn or
205 modified in future versions of Bison. */
206 ^"#line "{int}" \"".*"\"\n" {
207 handle_syncline (yytext + sizeof "#line " - 1, *loc);
208 }
209 }
210
211
212 /*----------------------------.
213 | Scanning Bison directives. |
214 `----------------------------*/
215 <INITIAL>
216 {
217 "%binary" return PERCENT_NONASSOC;
218 "%debug" return PERCENT_DEBUG;
219 "%default"[-_]"prec" return PERCENT_DEFAULT_PREC;
220 "%define" return PERCENT_DEFINE;
221 "%defines" return PERCENT_DEFINES;
222 "%destructor" token_type = PERCENT_DESTRUCTOR; BEGIN SC_PRE_CODE;
223 "%dprec" return PERCENT_DPREC;
224 "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
225 "%expect" return PERCENT_EXPECT;
226 "%expect"[-_]"rr" return PERCENT_EXPECT_RR;
227 "%file-prefix" return PERCENT_FILE_PREFIX;
228 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
229 "%initial-action" token_type = PERCENT_INITIAL_ACTION; BEGIN SC_PRE_CODE;
230 "%glr-parser" return PERCENT_GLR_PARSER;
231 "%left" return PERCENT_LEFT;
232 "%lex-param" token_type = PERCENT_LEX_PARAM; BEGIN SC_PRE_CODE;
233 "%locations" return PERCENT_LOCATIONS;
234 "%merge" return PERCENT_MERGE;
235 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
236 "%no"[-_]"default"[-_]"prec" return PERCENT_NO_DEFAULT_PREC;
237 "%no"[-_]"lines" return PERCENT_NO_LINES;
238 "%nonassoc" return PERCENT_NONASSOC;
239 "%nondeterministic-parser" return PERCENT_NONDETERMINISTIC_PARSER;
240 "%nterm" return PERCENT_NTERM;
241 "%output" return PERCENT_OUTPUT;
242 "%parse-param" token_type = PERCENT_PARSE_PARAM; BEGIN SC_PRE_CODE;
243 "%prec" rule_length--; return PERCENT_PREC;
244 "%printer" token_type = PERCENT_PRINTER; BEGIN SC_PRE_CODE;
245 "%pure"[-_]"parser" return PERCENT_PURE_PARSER;
246 "%require" return PERCENT_REQUIRE;
247 "%right" return PERCENT_RIGHT;
248 "%skeleton" return PERCENT_SKELETON;
249 "%start" return PERCENT_START;
250 "%term" return PERCENT_TOKEN;
251 "%token" return PERCENT_TOKEN;
252 "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
253 "%type" return PERCENT_TYPE;
254 "%union" token_type = PERCENT_UNION; BEGIN SC_PRE_CODE;
255 "%verbose" return PERCENT_VERBOSE;
256 "%yacc" return PERCENT_YACC;
257
258 {directive} {
259 complain_at (*loc, _("invalid directive: %s"), quote (yytext));
260 }
261
262 "=" return EQUAL;
263 "|" rule_length = 0; return PIPE;
264 ";" return SEMICOLON;
265
266 {id} {
267 val->symbol = symbol_get (yytext, *loc);
268 id_loc = *loc;
269 increment_rule_length (*loc);
270 BEGIN SC_AFTER_IDENTIFIER;
271 }
272
273 {int} {
274 val->integer = scan_integer (yytext, 10, *loc);
275 return INT;
276 }
277 0[xX][0-9abcdefABCDEF]+ {
278 val->integer = scan_integer (yytext, 16, *loc);
279 return INT;
280 }
281
282 /* Characters. We don't check there is only one. */
283 "'" STRING_GROW; token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
284
285 /* Strings. */
286 "\"" token_start = loc->start; BEGIN SC_ESCAPED_STRING;
287
288 /* Prologue. */
289 "%{" code_start = loc->start; BEGIN SC_PROLOGUE;
290
291 /* Code in between braces. */
292 "{" {
293 STRING_GROW;
294 token_type = BRACED_CODE;
295 braces_level = 0;
296 code_start = loc->start;
297 BEGIN SC_BRACED_CODE;
298 }
299
300 /* A type. */
301 "<"{tag}">" {
302 obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
303 STRING_FINISH;
304 val->uniqstr = uniqstr_new (last_string);
305 STRING_FREE;
306 return TYPE;
307 }
308
309 "%%" {
310 static int percent_percent_count;
311 if (++percent_percent_count == 2)
312 BEGIN SC_EPILOGUE;
313 return PERCENT_PERCENT;
314 }
315
316 . {
317 complain_at (*loc, _("invalid character: %s"), quote (yytext));
318 }
319
320 <<EOF>> {
321 loc->start = loc->end = scanner_cursor;
322 yyterminate ();
323 }
324 }
325
326
327 /*-----------------------------------------------------------------.
328 | Scanning after an identifier, checking whether a colon is next. |
329 `-----------------------------------------------------------------*/
330
331 <SC_AFTER_IDENTIFIER>
332 {
333 ":" {
334 rule_length = 0;
335 *loc = id_loc;
336 BEGIN INITIAL;
337 return ID_COLON;
338 }
339 . {
340 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
341 yyless (0);
342 *loc = id_loc;
343 BEGIN INITIAL;
344 return ID;
345 }
346 <<EOF>> {
347 *loc = id_loc;
348 BEGIN INITIAL;
349 return ID;
350 }
351 }
352
353
354 /*---------------------------------------------------------------.
355 | Scanning a Yacc comment. The initial `/ *' is already eaten. |
356 `---------------------------------------------------------------*/
357
358 <SC_YACC_COMMENT>
359 {
360 "*/" BEGIN context_state;
361 .|\n ;
362 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
363 }
364
365
366 /*------------------------------------------------------------.
367 | Scanning a C comment. The initial `/ *' is already eaten. |
368 `------------------------------------------------------------*/
369
370 <SC_COMMENT>
371 {
372 "*"{splice}"/" STRING_GROW; BEGIN context_state;
373 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
374 }
375
376
377 /*--------------------------------------------------------------.
378 | Scanning a line comment. The initial `//' is already eaten. |
379 `--------------------------------------------------------------*/
380
381 <SC_LINE_COMMENT>
382 {
383 "\n" STRING_GROW; BEGIN context_state;
384 {splice} STRING_GROW;
385 <<EOF>> BEGIN context_state;
386 }
387
388
389 /*------------------------------------------------.
390 | Scanning a Bison string, including its escapes. |
391 | The initial quote is already eaten. |
392 `------------------------------------------------*/
393
394 <SC_ESCAPED_STRING>
395 {
396 "\"" {
397 STRING_FINISH;
398 loc->start = token_start;
399 val->chars = last_string;
400 increment_rule_length (*loc);
401 BEGIN INITIAL;
402 return STRING;
403 }
404 \n unexpected_newline (token_start, "\""); BEGIN INITIAL;
405 <<EOF>> unexpected_eof (token_start, "\""); BEGIN INITIAL;
406 }
407
408 /*----------------------------------------------------------.
409 | Scanning a Bison character literal, decoding its escapes. |
410 | The initial quote is already eaten. |
411 `----------------------------------------------------------*/
412
413 <SC_ESCAPED_CHARACTER>
414 {
415 "'" {
416 unsigned char last_string_1;
417 STRING_GROW;
418 STRING_FINISH;
419 loc->start = token_start;
420 val->symbol = symbol_get (quotearg_style (escape_quoting_style,
421 last_string),
422 *loc);
423 symbol_class_set (val->symbol, token_sym, *loc, false);
424 last_string_1 = last_string[1];
425 symbol_user_token_number_set (val->symbol, last_string_1, *loc);
426 STRING_FREE;
427 increment_rule_length (*loc);
428 BEGIN INITIAL;
429 return ID;
430 }
431 \n unexpected_newline (token_start, "'"); BEGIN INITIAL;
432 <<EOF>> unexpected_eof (token_start, "'"); BEGIN INITIAL;
433 }
434
435 <SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING>
436 {
437 \0 complain_at (*loc, _("invalid null character"));
438 }
439
440
441 /*----------------------------.
442 | Decode escaped characters. |
443 `----------------------------*/
444
445 <SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
446 {
447 \\[0-7]{1,3} {
448 unsigned long int c = strtoul (yytext + 1, NULL, 8);
449 if (UCHAR_MAX < c)
450 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
451 else if (! c)
452 complain_at (*loc, _("invalid null character: %s"), quote (yytext));
453 else
454 obstack_1grow (&obstack_for_string, c);
455 }
456
457 \\x[0-9abcdefABCDEF]+ {
458 verify (UCHAR_MAX < ULONG_MAX);
459 unsigned long int c = strtoul (yytext + 2, NULL, 16);
460 if (UCHAR_MAX < c)
461 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
462 else if (! c)
463 complain_at (*loc, _("invalid null character: %s"), quote (yytext));
464 else
465 obstack_1grow (&obstack_for_string, c);
466 }
467
468 \\a obstack_1grow (&obstack_for_string, '\a');
469 \\b obstack_1grow (&obstack_for_string, '\b');
470 \\f obstack_1grow (&obstack_for_string, '\f');
471 \\n obstack_1grow (&obstack_for_string, '\n');
472 \\r obstack_1grow (&obstack_for_string, '\r');
473 \\t obstack_1grow (&obstack_for_string, '\t');
474 \\v obstack_1grow (&obstack_for_string, '\v');
475
476 /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
477 \\("\""|"'"|"?"|"\\") obstack_1grow (&obstack_for_string, yytext[1]);
478
479 \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
480 int c = convert_ucn_to_byte (yytext);
481 if (c < 0)
482 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
483 else if (! c)
484 complain_at (*loc, _("invalid null character: %s"), quote (yytext));
485 else
486 obstack_1grow (&obstack_for_string, c);
487 }
488 \\(.|\n) {
489 complain_at (*loc, _("unrecognized escape sequence: %s"), quote (yytext));
490 STRING_GROW;
491 }
492 }
493
494 /*--------------------------------------------.
495 | Scanning user-code characters and strings. |
496 `--------------------------------------------*/
497
498 <SC_CHARACTER,SC_STRING>
499 {
500 {splice}|\\{splice}[^\n$@\[\]] STRING_GROW;
501 }
502
503 <SC_CHARACTER>
504 {
505 "'" STRING_GROW; BEGIN context_state;
506 \n unexpected_newline (token_start, "'"); BEGIN context_state;
507 <<EOF>> unexpected_eof (token_start, "'"); BEGIN context_state;
508 }
509
510 <SC_STRING>
511 {
512 "\"" STRING_GROW; BEGIN context_state;
513 \n unexpected_newline (token_start, "\""); BEGIN context_state;
514 <<EOF>> unexpected_eof (token_start, "\""); BEGIN context_state;
515 }
516
517
518 /*---------------------------------------------------.
519 | Strings, comments etc. can be found in user code. |
520 `---------------------------------------------------*/
521
522 <SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
523 {
524 "'" {
525 STRING_GROW;
526 context_state = YY_START;
527 token_start = loc->start;
528 BEGIN SC_CHARACTER;
529 }
530 "\"" {
531 STRING_GROW;
532 context_state = YY_START;
533 token_start = loc->start;
534 BEGIN SC_STRING;
535 }
536 "/"{splice}"*" {
537 STRING_GROW;
538 context_state = YY_START;
539 token_start = loc->start;
540 BEGIN SC_COMMENT;
541 }
542 "/"{splice}"/" {
543 STRING_GROW;
544 context_state = YY_START;
545 BEGIN SC_LINE_COMMENT;
546 }
547 }
548
549
550 /*---------------------------------------------------------------.
551 | Scanning after %union etc., possibly followed by white space. |
552 | For %union only, allow arbitrary C code to appear before the |
553 | following brace, as an extension to POSIX. |
554 `---------------------------------------------------------------*/
555
556 <SC_PRE_CODE>
557 {
558 . {
559 bool valid = yytext[0] == '{' || token_type == PERCENT_UNION;
560 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
561 yyless (0);
562
563 if (valid)
564 {
565 braces_level = -1;
566 code_start = loc->start;
567 BEGIN SC_BRACED_CODE;
568 }
569 else
570 {
571 complain_at (*loc, _("missing `{' in %s"),
572 token_name (token_type));
573 obstack_sgrow (&obstack_for_string, "{}");
574 STRING_FINISH;
575 val->chars = last_string;
576 BEGIN INITIAL;
577 return token_type;
578 }
579 }
580
581 <<EOF>> unexpected_eof (scanner_cursor, "{}"); BEGIN INITIAL;
582 }
583
584
585 /*---------------------------------------------------------------.
586 | Scanning some code in braces (%union and actions). The initial |
587 | "{" is already eaten. |
588 `---------------------------------------------------------------*/
589
590 <SC_BRACED_CODE>
591 {
592 "{"|"<"{splice}"%" STRING_GROW; braces_level++;
593 "%"{splice}">" STRING_GROW; braces_level--;
594 "}" {
595 bool outer_brace = --braces_level < 0;
596
597 /* As an undocumented Bison extension, append `;' before the last
598 brace in braced code, so that the user code can omit trailing
599 `;'. But do not append `;' if emulating Yacc, since Yacc does
600 not append one.
601
602 FIXME: Bison should warn if a semicolon seems to be necessary
603 here, and should omit the semicolon if it seems unnecessary
604 (e.g., after ';', '{', or '}', each followed by comments or
605 white space). Such a warning shouldn't depend on --yacc; it
606 should depend on a new --pedantic option, which would cause
607 Bison to warn if it detects an extension to POSIX. --pedantic
608 should also diagnose other Bison extensions like %yacc.
609 Perhaps there should also be a GCC-style --pedantic-errors
610 option, so that such warnings are diagnosed as errors. */
611 if (outer_brace && token_type == BRACED_CODE && ! yacc_flag)
612 obstack_1grow (&obstack_for_string, ';');
613
614 obstack_1grow (&obstack_for_string, '}');
615
616 if (outer_brace)
617 {
618 STRING_FINISH;
619 loc->start = code_start;
620 val->chars = last_string;
621 increment_rule_length (*loc);
622 BEGIN INITIAL;
623 return token_type;
624 }
625 }
626
627 /* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly
628 (as `<' `<%'). */
629 "<"{splice}"<" STRING_GROW;
630
631 "$"("<"{tag}">")?(-?[0-9]+|"$") handle_dollar (token_type, yytext, *loc);
632 "@"(-?[0-9]+|"$") handle_at (token_type, yytext, *loc);
633
634 "$" {
635 warn_at (*loc, _("stray `$'"));
636 obstack_sgrow (&obstack_for_string, "$][");
637 }
638 "@" {
639 warn_at (*loc, _("stray `@'"));
640 obstack_sgrow (&obstack_for_string, "@@");
641 }
642
643 <<EOF>> unexpected_eof (code_start, "}"); BEGIN INITIAL;
644 }
645
646
647 /*--------------------------------------------------------------.
648 | Scanning some prologue: from "%{" (already scanned) to "%}". |
649 `--------------------------------------------------------------*/
650
651 <SC_PROLOGUE>
652 {
653 "%}" {
654 STRING_FINISH;
655 loc->start = code_start;
656 val->chars = last_string;
657 BEGIN INITIAL;
658 return PROLOGUE;
659 }
660
661 <<EOF>> unexpected_eof (code_start, "%}"); BEGIN INITIAL;
662 }
663
664
665 /*---------------------------------------------------------------.
666 | Scanning the epilogue (everything after the second "%%", which |
667 | has already been eaten). |
668 `---------------------------------------------------------------*/
669
670 <SC_EPILOGUE>
671 {
672 <<EOF>> {
673 STRING_FINISH;
674 loc->start = code_start;
675 val->chars = last_string;
676 BEGIN INITIAL;
677 return EPILOGUE;
678 }
679 }
680
681
682 /*-----------------------------------------.
683 | Escape M4 quoting characters in C code. |
684 `-----------------------------------------*/
685
686 <SC_COMMENT,SC_LINE_COMMENT,SC_STRING,SC_CHARACTER,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
687 {
688 \$ obstack_sgrow (&obstack_for_string, "$][");
689 \@ obstack_sgrow (&obstack_for_string, "@@");
690 \[ obstack_sgrow (&obstack_for_string, "@{");
691 \] obstack_sgrow (&obstack_for_string, "@}");
692 }
693
694
695 /*-----------------------------------------------------.
696 | By default, grow the string obstack with the input. |
697 `-----------------------------------------------------*/
698
699 <SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>. |
700 <SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>\n STRING_GROW;
701
702 %%
703
704 /* Keeps track of the maximum number of semantic values to the left of
705 a handle (those referenced by $0, $-1, etc.) are required by the
706 semantic actions of this grammar. */
707 int max_left_semantic_context = 0;
708
709 /* If BUF is null, add BUFSIZE (which in this case must be less than
710 INT_MAX) to COLUMN; otherwise, add mbsnwidth (BUF, BUFSIZE, 0) to
711 COLUMN. If an overflow occurs, or might occur but is undetectable,
712 return INT_MAX. Assume COLUMN is nonnegative. */
713
714 static inline int
715 add_column_width (int column, char const *buf, size_t bufsize)
716 {
717 size_t width;
718 unsigned int remaining_columns = INT_MAX - column;
719
720 if (buf)
721 {
722 if (INT_MAX / 2 <= bufsize)
723 return INT_MAX;
724 width = mbsnwidth (buf, bufsize, 0);
725 }
726 else
727 width = bufsize;
728
729 return width <= remaining_columns ? column + width : INT_MAX;
730 }
731
732 /* Set *LOC and adjust scanner cursor to account for token TOKEN of
733 size SIZE. */
734
735 static void
736 adjust_location (location *loc, char const *token, size_t size)
737 {
738 int line = scanner_cursor.line;
739 int column = scanner_cursor.column;
740 char const *p0 = token;
741 char const *p = token;
742 char const *lim = token + size;
743
744 loc->start = scanner_cursor;
745
746 for (p = token; p < lim; p++)
747 switch (*p)
748 {
749 case '\n':
750 line += line < INT_MAX;
751 column = 1;
752 p0 = p + 1;
753 break;
754
755 case '\t':
756 {
757 column = add_column_width (column, p0, p - p0);
758 column = add_column_width (column, NULL, 8 - ((column - 1) & 7));
759 p0 = p + 1;
760 break;
761 }
762 }
763
764 scanner_cursor.line = line;
765 scanner_cursor.column = column = add_column_width (column, p0, p - p0);
766
767 loc->end = scanner_cursor;
768
769 if (line == INT_MAX && loc->start.line != INT_MAX)
770 warn_at (*loc, _("line number overflow"));
771 if (column == INT_MAX && loc->start.column != INT_MAX)
772 warn_at (*loc, _("column number overflow"));
773 }
774
775
776 /* Read bytes from FP into buffer BUF of size SIZE. Return the
777 number of bytes read. Remove '\r' from input, treating \r\n
778 and isolated \r as \n. */
779
780 static size_t
781 no_cr_read (FILE *fp, char *buf, size_t size)
782 {
783 size_t bytes_read = fread (buf, 1, size, fp);
784 if (bytes_read)
785 {
786 char *w = memchr (buf, '\r', bytes_read);
787 if (w)
788 {
789 char const *r = ++w;
790 char const *lim = buf + bytes_read;
791
792 for (;;)
793 {
794 /* Found an '\r'. Treat it like '\n', but ignore any
795 '\n' that immediately follows. */
796 w[-1] = '\n';
797 if (r == lim)
798 {
799 int ch = getc (fp);
800 if (ch != '\n' && ungetc (ch, fp) != ch)
801 break;
802 }
803 else if (*r == '\n')
804 r++;
805
806 /* Copy until the next '\r'. */
807 do
808 {
809 if (r == lim)
810 return w - buf;
811 }
812 while ((*w++ = *r++) != '\r');
813 }
814
815 return w - buf;
816 }
817 }
818
819 return bytes_read;
820 }
821
822
823 /*------------------------------------------------------------------.
824 | TEXT is pointing to a wannabee semantic value (i.e., a `$'). |
825 | |
826 | Possible inputs: $[<TYPENAME>]($|integer) |
827 | |
828 | Output to OBSTACK_FOR_STRING a reference to this semantic value. |
829 `------------------------------------------------------------------*/
830
831 static inline bool
832 handle_action_dollar (char *text, location loc)
833 {
834 const char *type_name = NULL;
835 char *cp = text + 1;
836
837 if (! current_rule)
838 return false;
839
840 /* Get the type name if explicit. */
841 if (*cp == '<')
842 {
843 type_name = ++cp;
844 while (*cp != '>')
845 ++cp;
846 *cp = '\0';
847 ++cp;
848 }
849
850 if (*cp == '$')
851 {
852 if (!type_name)
853 type_name = symbol_list_n_type_name_get (current_rule, loc, 0);
854 if (!type_name && typed)
855 complain_at (loc, _("$$ of `%s' has no declared type"),
856 current_rule->sym->tag);
857 if (!type_name)
858 type_name = "";
859 obstack_fgrow1 (&obstack_for_string,
860 "]b4_lhs_value([%s])[", type_name);
861 current_rule->used = true;
862 }
863 else
864 {
865 long int num = strtol (cp, NULL, 10);
866
867 if (1 - INT_MAX + rule_length <= num && num <= rule_length)
868 {
869 int n = num;
870 if (max_left_semantic_context < 1 - n)
871 max_left_semantic_context = 1 - n;
872 if (!type_name && 0 < n)
873 type_name = symbol_list_n_type_name_get (current_rule, loc, n);
874 if (!type_name && typed)
875 complain_at (loc, _("$%d of `%s' has no declared type"),
876 n, current_rule->sym->tag);
877 if (!type_name)
878 type_name = "";
879 obstack_fgrow3 (&obstack_for_string,
880 "]b4_rhs_value(%d, %d, [%s])[",
881 rule_length, n, type_name);
882 symbol_list_n_used_set (current_rule, n, true);
883 }
884 else
885 complain_at (loc, _("integer out of range: %s"), quote (text));
886 }
887
888 return true;
889 }
890
891
892 /*----------------------------------------------------------------.
893 | Map `$?' onto the proper M4 symbol, depending on its TOKEN_TYPE |
894 | (are we in an action?). |
895 `----------------------------------------------------------------*/
896
897 static void
898 handle_dollar (int token_type, char *text, location loc)
899 {
900 switch (token_type)
901 {
902 case BRACED_CODE:
903 if (handle_action_dollar (text, loc))
904 return;
905 break;
906
907 case PERCENT_DESTRUCTOR:
908 case PERCENT_INITIAL_ACTION:
909 case PERCENT_PRINTER:
910 if (text[1] == '$')
911 {
912 obstack_sgrow (&obstack_for_string, "]b4_dollar_dollar[");
913 return;
914 }
915 break;
916
917 default:
918 break;
919 }
920
921 complain_at (loc, _("invalid value: %s"), quote (text));
922 }
923
924
925 /*------------------------------------------------------.
926 | TEXT is a location token (i.e., a `@...'). Output to |
927 | OBSTACK_FOR_STRING a reference to this location. |
928 `------------------------------------------------------*/
929
930 static inline bool
931 handle_action_at (char *text, location loc)
932 {
933 char *cp = text + 1;
934 locations_flag = true;
935
936 if (! current_rule)
937 return false;
938
939 if (*cp == '$')
940 obstack_sgrow (&obstack_for_string, "]b4_lhs_location[");
941 else
942 {
943 long int num = strtol (cp, NULL, 10);
944
945 if (1 - INT_MAX + rule_length <= num && num <= rule_length)
946 {
947 int n = num;
948 obstack_fgrow2 (&obstack_for_string, "]b4_rhs_location(%d, %d)[",
949 rule_length, n);
950 }
951 else
952 complain_at (loc, _("integer out of range: %s"), quote (text));
953 }
954
955 return true;
956 }
957
958
959 /*----------------------------------------------------------------.
960 | Map `@?' onto the proper M4 symbol, depending on its TOKEN_TYPE |
961 | (are we in an action?). |
962 `----------------------------------------------------------------*/
963
964 static void
965 handle_at (int token_type, char *text, location loc)
966 {
967 switch (token_type)
968 {
969 case BRACED_CODE:
970 handle_action_at (text, loc);
971 return;
972
973 case PERCENT_INITIAL_ACTION:
974 case PERCENT_DESTRUCTOR:
975 case PERCENT_PRINTER:
976 if (text[1] == '$')
977 {
978 obstack_sgrow (&obstack_for_string, "]b4_at_dollar[");
979 return;
980 }
981 break;
982
983 default:
984 break;
985 }
986
987 complain_at (loc, _("invalid value: %s"), quote (text));
988 }
989
990
991 /*------------------------------------------------------.
992 | Scan NUMBER for a base-BASE integer at location LOC. |
993 `------------------------------------------------------*/
994
995 static unsigned long int
996 scan_integer (char const *number, int base, location loc)
997 {
998 verify (INT_MAX < ULONG_MAX);
999 unsigned long int num = strtoul (number, NULL, base);
1000
1001 if (INT_MAX < num)
1002 {
1003 complain_at (loc, _("integer out of range: %s"), quote (number));
1004 num = INT_MAX;
1005 }
1006
1007 return num;
1008 }
1009
1010
1011 /*------------------------------------------------------------------.
1012 | Convert universal character name UCN to a single-byte character, |
1013 | and return that character. Return -1 if UCN does not correspond |
1014 | to a single-byte character. |
1015 `------------------------------------------------------------------*/
1016
1017 static int
1018 convert_ucn_to_byte (char const *ucn)
1019 {
1020 verify (UCHAR_MAX <= INT_MAX);
1021 unsigned long int code = strtoul (ucn + 2, NULL, 16);
1022
1023 /* FIXME: Currently we assume Unicode-compatible unibyte characters
1024 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
1025 non-ASCII hosts we support only the portable C character set.
1026 These limitations should be removed once we add support for
1027 multibyte characters. */
1028
1029 if (UCHAR_MAX < code)
1030 return -1;
1031
1032 #if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
1033 {
1034 /* A non-ASCII host. Use CODE to index into a table of the C
1035 basic execution character set, which is guaranteed to exist on
1036 all Standard C platforms. This table also includes '$', '@',
1037 and '`', which are not in the basic execution character set but
1038 which are unibyte characters on all the platforms that we know
1039 about. */
1040 static signed char const table[] =
1041 {
1042 '\0', -1, -1, -1, -1, -1, -1, '\a',
1043 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
1044 -1, -1, -1, -1, -1, -1, -1, -1,
1045 -1, -1, -1, -1, -1, -1, -1, -1,
1046 ' ', '!', '"', '#', '$', '%', '&', '\'',
1047 '(', ')', '*', '+', ',', '-', '.', '/',
1048 '0', '1', '2', '3', '4', '5', '6', '7',
1049 '8', '9', ':', ';', '<', '=', '>', '?',
1050 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
1051 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
1052 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
1053 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
1054 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
1055 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
1056 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
1057 'x', 'y', 'z', '{', '|', '}', '~'
1058 };
1059
1060 code = code < sizeof table ? table[code] : -1;
1061 }
1062 #endif
1063
1064 return code;
1065 }
1066
1067
1068 /*----------------------------------------------------------------.
1069 | Handle `#line INT "FILE"'. ARGS has already skipped `#line '. |
1070 `----------------------------------------------------------------*/
1071
1072 static void
1073 handle_syncline (char *args, location loc)
1074 {
1075 char *after_num;
1076 unsigned long int lineno = strtoul (args, &after_num, 10);
1077 char *file = strchr (after_num, '"') + 1;
1078 *strchr (file, '"') = '\0';
1079 if (INT_MAX <= lineno)
1080 {
1081 warn_at (loc, _("line number overflow"));
1082 lineno = INT_MAX;
1083 }
1084 scanner_cursor.file = current_file = uniqstr_new (file);
1085 scanner_cursor.line = lineno;
1086 scanner_cursor.column = 1;
1087 }
1088
1089
1090 /*---------------------------------.
1091 | Report a rule that is too long. |
1092 `---------------------------------*/
1093
1094 static void
1095 rule_length_overflow (location loc)
1096 {
1097 fatal_at (loc, _("rule is too long"));
1098 }
1099
1100
1101 /*----------------------------------------------------------------.
1102 | For a token or comment starting at START, report message MSGID, |
1103 | which should say that an end marker was found before |
1104 | the expected TOKEN_END. |
1105 `----------------------------------------------------------------*/
1106
1107 static void
1108 unexpected_end (boundary start, char const *msgid, char const *token_end)
1109 {
1110 location loc;
1111 loc.start = start;
1112 loc.end = scanner_cursor;
1113 complain_at (loc, _(msgid), token_end);
1114 }
1115
1116
1117 /*------------------------------------------------------------------------.
1118 | Report an unexpected EOF in a token or comment starting at START. |
1119 | An end of file was encountered and the expected TOKEN_END was missing. |
1120 `------------------------------------------------------------------------*/
1121
1122 static void
1123 unexpected_eof (boundary start, char const *token_end)
1124 {
1125 unexpected_end (start, N_("missing `%s' at end of file"), token_end);
1126 }
1127
1128
1129 /*----------------------------------------.
1130 | Likewise, but for unexpected newlines. |
1131 `----------------------------------------*/
1132
1133 static void
1134 unexpected_newline (boundary start, char const *token_end)
1135 {
1136 unexpected_end (start, N_("missing `%s' at end of line"), token_end);
1137 }
1138
1139
1140 /*-------------------------.
1141 | Initialize the scanner. |
1142 `-------------------------*/
1143
1144 void
1145 scanner_initialize (void)
1146 {
1147 obstack_init (&obstack_for_string);
1148 }
1149
1150
1151 /*-----------------------------------------------.
1152 | Free all the memory allocated to the scanner. |
1153 `-----------------------------------------------*/
1154
1155 void
1156 scanner_free (void)
1157 {
1158 obstack_free (&obstack_for_string, 0);
1159 /* Reclaim Flex's buffers. */
1160 yy_delete_buffer (YY_CURRENT_BUFFER);
1161 }