]> git.saurik.com Git - bison.git/blob - src/scan-gram.l
parser: factor the handling of code_props
[bison.git] / src / scan-gram.l
1 /* Bison Grammar Scanner -*- C -*-
2
3 Copyright (C) 2002-2012 Free Software Foundation, Inc.
4
5 This file is part of Bison, the GNU Compiler Compiler.
6
7 This program is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19
20 %option debug nodefault noinput nounput noyywrap never-interactive
21 %option prefix="gram_" outfile="lex.yy.c"
22
23 %{
24 /* Work around a bug in flex 2.5.31. See Debian bug 333231
25 <http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=333231>. */
26 #undef gram_wrap
27 #define gram_wrap() 1
28
29 #define FLEX_PREFIX(Id) gram_ ## Id
30 #include <src/flex-scanner.h>
31
32 #include <src/complain.h>
33 #include <src/files.h>
34 #include <src/gram.h>
35 #include <quotearg.h>
36 #include <src/reader.h>
37 #include <src/uniqstr.h>
38
39 #include <ctype.h>
40 #include <mbswidth.h>
41 #include <quote.h>
42
43 #include <src/scan-gram.h>
44
45 #define YY_DECL GRAM_LEX_DECL
46
47 #define YY_USER_INIT \
48 code_start = scanner_cursor = loc->start; \
49
50 /* Location of scanner cursor. */
51 static boundary scanner_cursor;
52
53 #define YY_USER_ACTION location_compute (loc, &scanner_cursor, yytext, yyleng);
54
55 static size_t no_cr_read (FILE *, char *, size_t);
56 #define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
57
58 #define RETURN_PERCENT_PARAM(Value) \
59 RETURN_VALUE(PERCENT_PARAM, param, param_ ## Value)
60
61 #define RETURN_PERCENT_FLAG(Value) \
62 RETURN_VALUE(PERCENT_FLAG, uniqstr, uniqstr_new (Value))
63
64 #define RETURN_VALUE(Token, Field, Value) \
65 do { \
66 val->Field = Value; \
67 return Token; \
68 } while (0)
69
70 #define ROLLBACK_CURRENT_TOKEN \
71 do { \
72 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0); \
73 yyless (0); \
74 } while (0)
75
76 /* A string representing the most recently saved token. */
77 static char *last_string;
78
79 /* Bracketed identifier. */
80 static uniqstr bracketed_id_str = 0;
81 static location bracketed_id_loc;
82 static boundary bracketed_id_start;
83 static int bracketed_id_context_state = 0;
84
85 void
86 gram_scanner_last_string_free (void)
87 {
88 STRING_FREE;
89 }
90
91 static void handle_syncline (char *, location);
92 static unsigned long int scan_integer (char const *p, int base, location loc);
93 static int convert_ucn_to_byte (char const *hex_text);
94 static void unexpected_eof (boundary, char const *);
95 static void unexpected_newline (boundary, char const *);
96
97 %}
98 /* A C-like comment in directives/rules. */
99 %x SC_YACC_COMMENT
100 /* Strings and characters in directives/rules. */
101 %x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
102 /* A identifier was just read in directives/rules. Special state
103 to capture the sequence 'identifier :'. */
104 %x SC_AFTER_IDENTIFIER
105 /* A complex tag, with nested angles brackets. */
106 %x SC_TAG
107
108 /* Four types of user code:
109 - prologue (code between '%{' '%}' in the first section, before %%);
110 - actions, printers, union, etc, (between braced in the middle section);
111 - epilogue (everything after the second %%).
112 - predicate (code between '%?{' and '{' in middle section); */
113 %x SC_PROLOGUE SC_BRACED_CODE SC_EPILOGUE SC_PREDICATE
114 /* C and C++ comments in code. */
115 %x SC_COMMENT SC_LINE_COMMENT
116 /* Strings and characters in code. */
117 %x SC_STRING SC_CHARACTER
118 /* Bracketed identifiers support. */
119 %x SC_BRACKETED_ID SC_RETURN_BRACKETED_ID
120
121 letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
122 id {letter}({letter}|[-0-9])*
123 directive %{id}
124 int [0-9]+
125
126 /* POSIX says that a tag must be both an id and a C union member, but
127 historically almost any character is allowed in a tag. We disallow
128 NUL, as this simplifies our implementation. We disallow angle
129 bracket to match them in nested pairs: several languages use them
130 for generics/template types. */
131 tag [^\0<>]+
132
133 /* Zero or more instances of backslash-newline. Following GCC, allow
134 white space between the backslash and the newline. */
135 splice (\\[ \f\t\v]*\n)*
136
137 %%
138 %{
139 /* Nesting level. Either for nested braces, or nested angle brackets
140 (but not mixed). */
141 int nesting PACIFY_CC (= 0);
142
143 /* Parent context state, when applicable. */
144 int context_state PACIFY_CC (= 0);
145
146 /* Location of most recent identifier, when applicable. */
147 location id_loc PACIFY_CC (= empty_location);
148
149 /* Where containing code started, when applicable. Its initial
150 value is relevant only when yylex is invoked in the SC_EPILOGUE
151 start condition. */
152 boundary code_start = scanner_cursor;
153
154 /* Where containing comment or string or character literal started,
155 when applicable. */
156 boundary token_start PACIFY_CC (= scanner_cursor);
157 %}
158
159
160 /*-----------------------.
161 | Scanning white space. |
162 `-----------------------*/
163
164 <INITIAL,SC_AFTER_IDENTIFIER,SC_BRACKETED_ID,SC_RETURN_BRACKETED_ID>
165 {
166 /* Comments and white space. */
167 "," {
168 complain_at (*loc, Wother, _("stray ',' treated as white space"));
169 }
170 [ \f\n\t\v] |
171 "//".* ;
172 "/*" {
173 token_start = loc->start;
174 context_state = YY_START;
175 BEGIN SC_YACC_COMMENT;
176 }
177
178 /* #line directives are not documented, and may be withdrawn or
179 modified in future versions of Bison. */
180 ^"#line "{int}" \"".*"\"\n" {
181 handle_syncline (yytext + sizeof "#line " - 1, *loc);
182 }
183 }
184
185
186 /*----------------------------.
187 | Scanning Bison directives. |
188 `----------------------------*/
189
190 /* For directives that are also command line options, the regex must be
191 "%..."
192 after "[-_]"s are removed, and the directive must match the --long
193 option name, with a single string argument. Otherwise, add exceptions
194 to ../build-aux/cross-options.pl. */
195
196 <INITIAL>
197 {
198 "%binary" return PERCENT_NONASSOC;
199 "%code" return PERCENT_CODE;
200 "%debug" RETURN_PERCENT_FLAG("parse.trace");
201 "%default"[-_]"prec" return PERCENT_DEFAULT_PREC;
202 "%define" return PERCENT_DEFINE;
203 "%defines" return PERCENT_DEFINES;
204 "%destructor" return PERCENT_DESTRUCTOR;
205 "%dprec" return PERCENT_DPREC;
206 "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
207 "%expect" return PERCENT_EXPECT;
208 "%expect"[-_]"rr" return PERCENT_EXPECT_RR;
209 "%file-prefix" return PERCENT_FILE_PREFIX;
210 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
211 "%initial-action" return PERCENT_INITIAL_ACTION;
212 "%glr-parser" return PERCENT_GLR_PARSER;
213 "%language" return PERCENT_LANGUAGE;
214 "%left" return PERCENT_LEFT;
215 "%lex-param" RETURN_PERCENT_PARAM(lex);
216 "%locations" RETURN_PERCENT_FLAG("locations");
217 "%merge" return PERCENT_MERGE;
218 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
219 "%no"[-_]"default"[-_]"prec" return PERCENT_NO_DEFAULT_PREC;
220 "%no"[-_]"lines" return PERCENT_NO_LINES;
221 "%nonassoc" return PERCENT_NONASSOC;
222 "%nondeterministic-parser" return PERCENT_NONDETERMINISTIC_PARSER;
223 "%nterm" return PERCENT_NTERM;
224 "%output" return PERCENT_OUTPUT;
225 "%param" RETURN_PERCENT_PARAM(both);
226 "%parse-param" RETURN_PERCENT_PARAM(parse);
227 "%prec" return PERCENT_PREC;
228 "%precedence" return PERCENT_PRECEDENCE;
229 "%printer" return PERCENT_PRINTER;
230 "%pure"[-_]"parser" RETURN_PERCENT_FLAG("api.pure");
231 "%require" return PERCENT_REQUIRE;
232 "%right" return PERCENT_RIGHT;
233 "%skeleton" return PERCENT_SKELETON;
234 "%start" return PERCENT_START;
235 "%term" return PERCENT_TOKEN;
236 "%token" return PERCENT_TOKEN;
237 "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
238 "%type" return PERCENT_TYPE;
239 "%union" return PERCENT_UNION;
240 "%verbose" return PERCENT_VERBOSE;
241 "%yacc" return PERCENT_YACC;
242
243 {directive} {
244 complain_at (*loc, complaint, _("invalid directive: %s"), quote (yytext));
245 }
246
247 "=" return EQUAL;
248 "|" return PIPE;
249 ";" return SEMICOLON;
250
251 {id} {
252 val->uniqstr = uniqstr_new (yytext);
253 id_loc = *loc;
254 bracketed_id_str = NULL;
255 BEGIN SC_AFTER_IDENTIFIER;
256 }
257
258 {int} {
259 val->integer = scan_integer (yytext, 10, *loc);
260 return INT;
261 }
262 0[xX][0-9abcdefABCDEF]+ {
263 val->integer = scan_integer (yytext, 16, *loc);
264 return INT;
265 }
266
267 /* Identifiers may not start with a digit. Yet, don't silently
268 accept "1FOO" as "1 FOO". */
269 {int}{id} {
270 complain_at (*loc, complaint, _("invalid identifier: %s"), quote (yytext));
271 }
272
273 /* Characters. */
274 "'" token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
275
276 /* Strings. */
277 "\"" token_start = loc->start; BEGIN SC_ESCAPED_STRING;
278
279 /* Prologue. */
280 "%{" code_start = loc->start; BEGIN SC_PROLOGUE;
281
282 /* Code in between braces. */
283 "{" {
284 STRING_GROW;
285 nesting = 0;
286 code_start = loc->start;
287 BEGIN SC_BRACED_CODE;
288 }
289
290 /* Semantic predicate. */
291 "%?"[ \f\n\t\v]*"{" {
292 nesting = 0;
293 code_start = loc->start;
294 BEGIN SC_PREDICATE;
295 }
296
297 /* A type. */
298 "<*>" return TAG_ANY;
299 "<>" return TAG_NONE;
300 "<"{tag}">" {
301 obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
302 STRING_FINISH;
303 val->uniqstr = uniqstr_new (last_string);
304 STRING_FREE;
305 return TAG;
306 }
307 "<" {
308 nesting = 0;
309 token_start = loc->start;
310 BEGIN SC_TAG;
311 }
312
313 "%%" {
314 static int percent_percent_count;
315 if (++percent_percent_count == 2)
316 BEGIN SC_EPILOGUE;
317 return PERCENT_PERCENT;
318 }
319
320 "[" {
321 bracketed_id_str = NULL;
322 bracketed_id_start = loc->start;
323 bracketed_id_context_state = YY_START;
324 BEGIN SC_BRACKETED_ID;
325 }
326
327 . {
328 complain_at (*loc, complaint, _("invalid character: %s"), quote (yytext));
329 }
330
331 <<EOF>> {
332 loc->start = loc->end = scanner_cursor;
333 yyterminate ();
334 }
335 }
336
337
338 /*--------------------------------------------------------------.
339 | Supporting \0 complexifies our implementation for no expected |
340 | added value. |
341 `--------------------------------------------------------------*/
342
343 <SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING,SC_TAG>
344 {
345 \0 complain_at (*loc, complaint, _("invalid null character"));
346 }
347
348
349 /*-----------------------------------------------------------------.
350 | Scanning after an identifier, checking whether a colon is next. |
351 `-----------------------------------------------------------------*/
352
353 <SC_AFTER_IDENTIFIER>
354 {
355 "[" {
356 if (bracketed_id_str)
357 {
358 ROLLBACK_CURRENT_TOKEN;
359 BEGIN SC_RETURN_BRACKETED_ID;
360 *loc = id_loc;
361 return ID;
362 }
363 else
364 {
365 bracketed_id_start = loc->start;
366 bracketed_id_context_state = YY_START;
367 BEGIN SC_BRACKETED_ID;
368 }
369 }
370 ":" {
371 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
372 *loc = id_loc;
373 return ID_COLON;
374 }
375 . {
376 ROLLBACK_CURRENT_TOKEN;
377 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
378 *loc = id_loc;
379 return ID;
380 }
381 <<EOF>> {
382 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
383 *loc = id_loc;
384 return ID;
385 }
386 }
387
388 /*--------------------------------.
389 | Scanning bracketed identifiers. |
390 `--------------------------------*/
391
392 <SC_BRACKETED_ID>
393 {
394 {id} {
395 if (bracketed_id_str)
396 {
397 complain_at (*loc, complaint,
398 _("unexpected identifier in bracketed name: %s"),
399 quote (yytext));
400 }
401 else
402 {
403 bracketed_id_str = uniqstr_new (yytext);
404 bracketed_id_loc = *loc;
405 }
406 }
407 "]" {
408 BEGIN bracketed_id_context_state;
409 if (bracketed_id_str)
410 {
411 if (INITIAL == bracketed_id_context_state)
412 {
413 val->uniqstr = bracketed_id_str;
414 bracketed_id_str = 0;
415 *loc = bracketed_id_loc;
416 return BRACKETED_ID;
417 }
418 }
419 else
420 complain_at (*loc, complaint, _("an identifier expected"));
421 }
422 . {
423 complain_at (*loc, complaint, _("invalid character in bracketed name: %s"),
424 quote (yytext));
425 }
426 <<EOF>> {
427 BEGIN bracketed_id_context_state;
428 unexpected_eof (bracketed_id_start, "]");
429 }
430 }
431
432 <SC_RETURN_BRACKETED_ID>
433 {
434 . {
435 ROLLBACK_CURRENT_TOKEN;
436 val->uniqstr = bracketed_id_str;
437 bracketed_id_str = 0;
438 *loc = bracketed_id_loc;
439 BEGIN INITIAL;
440 return BRACKETED_ID;
441 }
442 }
443
444
445 /*---------------------------------------------------------------.
446 | Scanning a Yacc comment. The initial '/ *' is already eaten. |
447 `---------------------------------------------------------------*/
448
449 <SC_YACC_COMMENT>
450 {
451 "*/" BEGIN context_state;
452 .|\n ;
453 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
454 }
455
456
457 /*------------------------------------------------------------.
458 | Scanning a C comment. The initial '/ *' is already eaten. |
459 `------------------------------------------------------------*/
460
461 <SC_COMMENT>
462 {
463 "*"{splice}"/" STRING_GROW; BEGIN context_state;
464 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
465 }
466
467
468 /*--------------------------------------------------------------.
469 | Scanning a line comment. The initial '//' is already eaten. |
470 `--------------------------------------------------------------*/
471
472 <SC_LINE_COMMENT>
473 {
474 "\n" STRING_GROW; BEGIN context_state;
475 {splice} STRING_GROW;
476 <<EOF>> BEGIN context_state;
477 }
478
479
480 /*------------------------------------------------.
481 | Scanning a Bison string, including its escapes. |
482 | The initial quote is already eaten. |
483 `------------------------------------------------*/
484
485 <SC_ESCAPED_STRING>
486 {
487 "\""|"\n" {
488 if (yytext[0] == '\n')
489 unexpected_newline (token_start, "\"");
490 STRING_FINISH;
491 loc->start = token_start;
492 val->chars = last_string;
493 BEGIN INITIAL;
494 return STRING;
495 }
496 <<EOF>> {
497 unexpected_eof (token_start, "\"");
498 STRING_FINISH;
499 loc->start = token_start;
500 val->chars = last_string;
501 BEGIN INITIAL;
502 return STRING;
503 }
504 }
505
506 /*----------------------------------------------------------.
507 | Scanning a Bison character literal, decoding its escapes. |
508 | The initial quote is already eaten. |
509 `----------------------------------------------------------*/
510
511 <SC_ESCAPED_CHARACTER>
512 {
513 "'"|"\n" {
514 STRING_FINISH;
515 loc->start = token_start;
516 val->character = last_string[0];
517 {
518 /* FIXME: Eventually, make these errors. */
519 if (last_string[0] == '\0')
520 {
521 complain_at (*loc, Wother, _("empty character literal"));
522 /* '\0' seems dangerous even if we are about to complain. */
523 val->character = '\'';
524 }
525 else if (last_string[1] != '\0')
526 complain_at (*loc, Wother,
527 _("extra characters in character literal"));
528 }
529 if (yytext[0] == '\n')
530 unexpected_newline (token_start, "'");
531 STRING_FREE;
532 BEGIN INITIAL;
533 return CHAR;
534 }
535 <<EOF>> {
536 STRING_FINISH;
537 loc->start = token_start;
538 val->character = last_string[0];
539 {
540 /* FIXME: Eventually, make these errors. */
541 if (last_string[0] == '\0')
542 {
543 complain_at (*loc, Wother, _("empty character literal"));
544 /* '\0' seems dangerous even if we are about to complain. */
545 val->character = '\'';
546 }
547 else if (last_string[1] != '\0')
548 complain_at (*loc, Wother,
549 _("extra characters in character literal"));
550 }
551 unexpected_eof (token_start, "'");
552 STRING_FREE;
553 BEGIN INITIAL;
554 return CHAR;
555 }
556 }
557
558 /*-----------------------------------------------------------.
559 | Scanning a Bison nested tag. The initial angle bracket is |
560 | already eaten. |
561 `-----------------------------------------------------------*/
562
563 <SC_TAG>
564 {
565 ">" {
566 --nesting;
567 if (nesting < 0)
568 {
569 STRING_FINISH;
570 loc->start = token_start;
571 val->uniqstr = uniqstr_new (last_string);
572 STRING_FREE;
573 BEGIN INITIAL;
574 return TAG;
575 }
576 STRING_GROW;
577 }
578
579 [^<>]+ STRING_GROW;
580 "<"+ STRING_GROW; nesting += yyleng;
581
582 <<EOF>> {
583 unexpected_eof (token_start, ">");
584 STRING_FINISH;
585 loc->start = token_start;
586 val->uniqstr = uniqstr_new (last_string);
587 STRING_FREE;
588 BEGIN INITIAL;
589 return TAG;
590 }
591 }
592
593 /*----------------------------.
594 | Decode escaped characters. |
595 `----------------------------*/
596
597 <SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
598 {
599 \\[0-7]{1,3} {
600 unsigned long int c = strtoul (yytext + 1, NULL, 8);
601 if (!c || UCHAR_MAX < c)
602 complain_at (*loc, complaint, _("invalid number after \\-escape: %s"),
603 yytext+1);
604 else
605 obstack_1grow (&obstack_for_string, c);
606 }
607
608 \\x[0-9abcdefABCDEF]+ {
609 verify (UCHAR_MAX < ULONG_MAX);
610 unsigned long int c = strtoul (yytext + 2, NULL, 16);
611 if (!c || UCHAR_MAX < c)
612 complain_at (*loc, complaint, _("invalid number after \\-escape: %s"),
613 yytext+1);
614 else
615 obstack_1grow (&obstack_for_string, c);
616 }
617
618 \\a obstack_1grow (&obstack_for_string, '\a');
619 \\b obstack_1grow (&obstack_for_string, '\b');
620 \\f obstack_1grow (&obstack_for_string, '\f');
621 \\n obstack_1grow (&obstack_for_string, '\n');
622 \\r obstack_1grow (&obstack_for_string, '\r');
623 \\t obstack_1grow (&obstack_for_string, '\t');
624 \\v obstack_1grow (&obstack_for_string, '\v');
625
626 /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
627 \\("\""|"'"|"?"|"\\") obstack_1grow (&obstack_for_string, yytext[1]);
628
629 \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
630 int c = convert_ucn_to_byte (yytext);
631 if (c <= 0)
632 complain_at (*loc, complaint, _("invalid number after \\-escape: %s"),
633 yytext+1);
634 else
635 obstack_1grow (&obstack_for_string, c);
636 }
637 \\(.|\n) {
638 char const *p = yytext + 1;
639 /* Quote only if escaping won't make the character visible. */
640 if (isspace ((unsigned char) *p) && isprint ((unsigned char) *p))
641 p = quote (p);
642 else
643 p = quotearg_style_mem (escape_quoting_style, p, 1);
644 complain_at (*loc, complaint, _("invalid character after \\-escape: %s"),
645 p);
646 }
647 }
648
649 /*--------------------------------------------.
650 | Scanning user-code characters and strings. |
651 `--------------------------------------------*/
652
653 <SC_CHARACTER,SC_STRING>
654 {
655 {splice}|\\{splice}[^\n\[\]] STRING_GROW;
656 }
657
658 <SC_CHARACTER>
659 {
660 "'" STRING_GROW; BEGIN context_state;
661 \n unexpected_newline (token_start, "'"); BEGIN context_state;
662 <<EOF>> unexpected_eof (token_start, "'"); BEGIN context_state;
663 }
664
665 <SC_STRING>
666 {
667 "\"" STRING_GROW; BEGIN context_state;
668 \n unexpected_newline (token_start, "\""); BEGIN context_state;
669 <<EOF>> unexpected_eof (token_start, "\""); BEGIN context_state;
670 }
671
672
673 /*---------------------------------------------------.
674 | Strings, comments etc. can be found in user code. |
675 `---------------------------------------------------*/
676
677 <SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE,SC_PREDICATE>
678 {
679 "'" {
680 STRING_GROW;
681 context_state = YY_START;
682 token_start = loc->start;
683 BEGIN SC_CHARACTER;
684 }
685 "\"" {
686 STRING_GROW;
687 context_state = YY_START;
688 token_start = loc->start;
689 BEGIN SC_STRING;
690 }
691 "/"{splice}"*" {
692 STRING_GROW;
693 context_state = YY_START;
694 token_start = loc->start;
695 BEGIN SC_COMMENT;
696 }
697 "/"{splice}"/" {
698 STRING_GROW;
699 context_state = YY_START;
700 BEGIN SC_LINE_COMMENT;
701 }
702 }
703
704
705
706 /*-----------------------------------------------------------.
707 | Scanning some code in braces (actions, predicates). The |
708 | initial "{" is already eaten. |
709 `-----------------------------------------------------------*/
710
711 <SC_BRACED_CODE,SC_PREDICATE>
712 {
713 "{"|"<"{splice}"%" STRING_GROW; nesting++;
714 "%"{splice}">" STRING_GROW; nesting--;
715
716 /* Tokenize '<<%' correctly (as '<<' '%') rather than incorrrectly
717 (as '<' '<%'). */
718 "<"{splice}"<" STRING_GROW;
719
720 <<EOF>> {
721 int token = (YY_START == SC_BRACED_CODE) ? BRACED_CODE : BRACED_PREDICATE;
722 unexpected_eof (code_start, "}");
723 STRING_FINISH;
724 loc->start = code_start;
725 val->code = last_string;
726 BEGIN INITIAL;
727 return token;
728 }
729 }
730
731 <SC_BRACED_CODE>
732 {
733 "}" {
734 obstack_1grow (&obstack_for_string, '}');
735
736 --nesting;
737 if (nesting < 0)
738 {
739 STRING_FINISH;
740 loc->start = code_start;
741 val->code = last_string;
742 BEGIN INITIAL;
743 return BRACED_CODE;
744 }
745 }
746 }
747
748 <SC_PREDICATE>
749 {
750 "}" {
751 --nesting;
752 if (nesting < 0)
753 {
754 STRING_FINISH;
755 loc->start = code_start;
756 val->code = last_string;
757 BEGIN INITIAL;
758 return BRACED_PREDICATE;
759 }
760 else
761 obstack_1grow (&obstack_for_string, '}');
762 }
763 }
764
765 /*--------------------------------------------------------------.
766 | Scanning some prologue: from "%{" (already scanned) to "%}". |
767 `--------------------------------------------------------------*/
768
769 <SC_PROLOGUE>
770 {
771 "%}" {
772 STRING_FINISH;
773 loc->start = code_start;
774 val->chars = last_string;
775 BEGIN INITIAL;
776 return PROLOGUE;
777 }
778
779 <<EOF>> {
780 unexpected_eof (code_start, "%}");
781 STRING_FINISH;
782 loc->start = code_start;
783 val->chars = last_string;
784 BEGIN INITIAL;
785 return PROLOGUE;
786 }
787 }
788
789
790 /*---------------------------------------------------------------.
791 | Scanning the epilogue (everything after the second "%%", which |
792 | has already been eaten). |
793 `---------------------------------------------------------------*/
794
795 <SC_EPILOGUE>
796 {
797 <<EOF>> {
798 STRING_FINISH;
799 loc->start = code_start;
800 val->chars = last_string;
801 BEGIN INITIAL;
802 return EPILOGUE;
803 }
804 }
805
806
807 /*-----------------------------------------------------.
808 | By default, grow the string obstack with the input. |
809 `-----------------------------------------------------*/
810
811 <SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PREDICATE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>. |
812 <SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PREDICATE,SC_PROLOGUE,SC_EPILOGUE>\n STRING_GROW;
813
814 %%
815
816 /* Read bytes from FP into buffer BUF of size SIZE. Return the
817 number of bytes read. Remove '\r' from input, treating \r\n
818 and isolated \r as \n. */
819
820 static size_t
821 no_cr_read (FILE *fp, char *buf, size_t size)
822 {
823 size_t bytes_read = fread (buf, 1, size, fp);
824 if (bytes_read)
825 {
826 char *w = memchr (buf, '\r', bytes_read);
827 if (w)
828 {
829 char const *r = ++w;
830 char const *lim = buf + bytes_read;
831
832 for (;;)
833 {
834 /* Found an '\r'. Treat it like '\n', but ignore any
835 '\n' that immediately follows. */
836 w[-1] = '\n';
837 if (r == lim)
838 {
839 int ch = getc (fp);
840 if (ch != '\n' && ungetc (ch, fp) != ch)
841 break;
842 }
843 else if (*r == '\n')
844 r++;
845
846 /* Copy until the next '\r'. */
847 do
848 {
849 if (r == lim)
850 return w - buf;
851 }
852 while ((*w++ = *r++) != '\r');
853 }
854
855 return w - buf;
856 }
857 }
858
859 return bytes_read;
860 }
861
862
863
864 /*------------------------------------------------------.
865 | Scan NUMBER for a base-BASE integer at location LOC. |
866 `------------------------------------------------------*/
867
868 static unsigned long int
869 scan_integer (char const *number, int base, location loc)
870 {
871 verify (INT_MAX < ULONG_MAX);
872 unsigned long int num = strtoul (number, NULL, base);
873
874 if (INT_MAX < num)
875 {
876 complain_at (loc, complaint, _("integer out of range: %s"),
877 quote (number));
878 num = INT_MAX;
879 }
880
881 return num;
882 }
883
884
885 /*------------------------------------------------------------------.
886 | Convert universal character name UCN to a single-byte character, |
887 | and return that character. Return -1 if UCN does not correspond |
888 | to a single-byte character. |
889 `------------------------------------------------------------------*/
890
891 static int
892 convert_ucn_to_byte (char const *ucn)
893 {
894 verify (UCHAR_MAX <= INT_MAX);
895 unsigned long int code = strtoul (ucn + 2, NULL, 16);
896
897 /* FIXME: Currently we assume Unicode-compatible unibyte characters
898 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
899 non-ASCII hosts we support only the portable C character set.
900 These limitations should be removed once we add support for
901 multibyte characters. */
902
903 if (UCHAR_MAX < code)
904 return -1;
905
906 #if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
907 {
908 /* A non-ASCII host. Use CODE to index into a table of the C
909 basic execution character set, which is guaranteed to exist on
910 all Standard C platforms. This table also includes '$', '@',
911 and '`', which are not in the basic execution character set but
912 which are unibyte characters on all the platforms that we know
913 about. */
914 static signed char const table[] =
915 {
916 '\0', -1, -1, -1, -1, -1, -1, '\a',
917 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
918 -1, -1, -1, -1, -1, -1, -1, -1,
919 -1, -1, -1, -1, -1, -1, -1, -1,
920 ' ', '!', '"', '#', '$', '%', '&', '\'',
921 '(', ')', '*', '+', ',', '-', '.', '/',
922 '0', '1', '2', '3', '4', '5', '6', '7',
923 '8', '9', ':', ';', '<', '=', '>', '?',
924 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
925 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
926 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
927 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
928 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
929 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
930 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
931 'x', 'y', 'z', '{', '|', '}', '~'
932 };
933
934 code = code < sizeof table ? table[code] : -1;
935 }
936 #endif
937
938 return code;
939 }
940
941
942 /*----------------------------------------------------------------.
943 | Handle '#line INT "FILE"'. ARGS has already skipped '#line '. |
944 `----------------------------------------------------------------*/
945
946 static void
947 handle_syncline (char *args, location loc)
948 {
949 char *after_num;
950 unsigned long int lineno = strtoul (args, &after_num, 10);
951 char *file = strchr (after_num, '"') + 1;
952 *strchr (file, '"') = '\0';
953 if (INT_MAX <= lineno)
954 {
955 complain_at (loc, Wother, _("line number overflow"));
956 lineno = INT_MAX;
957 }
958 current_file = uniqstr_new (file);
959 boundary_set (&scanner_cursor, current_file, lineno, 1);
960 }
961
962
963 /*----------------------------------------------------------------.
964 | For a token or comment starting at START, report message MSGID, |
965 | which should say that an end marker was found before |
966 | the expected TOKEN_END. |
967 `----------------------------------------------------------------*/
968
969 static void
970 unexpected_end (boundary start, char const *msgid, char const *token_end)
971 {
972 location loc;
973 loc.start = start;
974 loc.end = scanner_cursor;
975 token_end = quote (token_end);
976 // Instead of '\'', display "'".
977 if (STREQ (token_end, "'\\''"))
978 token_end = "\"'\"";
979 complain_at (loc, complaint, _(msgid), token_end);
980 }
981
982
983 /*------------------------------------------------------------------------.
984 | Report an unexpected EOF in a token or comment starting at START. |
985 | An end of file was encountered and the expected TOKEN_END was missing. |
986 `------------------------------------------------------------------------*/
987
988 static void
989 unexpected_eof (boundary start, char const *token_end)
990 {
991 unexpected_end (start, N_("missing %s at end of file"), token_end);
992 }
993
994
995 /*----------------------------------------.
996 | Likewise, but for unexpected newlines. |
997 `----------------------------------------*/
998
999 static void
1000 unexpected_newline (boundary start, char const *token_end)
1001 {
1002 unexpected_end (start, N_("missing %s at end of line"), token_end);
1003 }
1004
1005
1006 /*-------------------------.
1007 | Initialize the scanner. |
1008 `-------------------------*/
1009
1010 void
1011 gram_scanner_initialize (void)
1012 {
1013 obstack_init (&obstack_for_string);
1014 }
1015
1016
1017 /*-----------------------------------------------.
1018 | Free all the memory allocated to the scanner. |
1019 `-----------------------------------------------*/
1020
1021 void
1022 gram_scanner_free (void)
1023 {
1024 obstack_free (&obstack_for_string, 0);
1025 /* Reclaim Flex's buffers. */
1026 yylex_destroy ();
1027 }