]> git.saurik.com Git - bison.git/blob - src/scan-gram.l
83d76506a85a5b5092b57a32a9122de2bf3a9a55
[bison.git] / src / scan-gram.l
1 /* Bison Grammar Scanner -*- C -*-
2
3 Copyright (C) 2002-2011 Free Software Foundation, Inc.
4
5 This file is part of Bison, the GNU Compiler Compiler.
6
7 This program is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19
20 %option debug nodefault noinput nounput noyywrap never-interactive
21 %option prefix="gram_" outfile="lex.yy.c"
22
23 %{
24 /* Work around a bug in flex 2.5.31. See Debian bug 333231
25 <http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=333231>. */
26 #undef gram_wrap
27 #define gram_wrap() 1
28
29 #define FLEX_PREFIX(Id) gram_ ## Id
30 #include <src/flex-scanner.h>
31
32 #include <src/complain.h>
33 #include <src/files.h>
34 #include <src/gram.h>
35 #include <quotearg.h>
36 #include <src/reader.h>
37 #include <src/uniqstr.h>
38
39 #include <ctype.h>
40 #include <mbswidth.h>
41 #include <quote.h>
42
43 #include <src/scan-gram.h>
44
45 #define YY_DECL GRAM_LEX_DECL
46
47 #define YY_USER_INIT \
48 code_start = scanner_cursor = loc->start; \
49
50 /* Location of scanner cursor. */
51 static boundary scanner_cursor;
52
53 #define YY_USER_ACTION location_compute (loc, &scanner_cursor, yytext, yyleng);
54
55 static size_t no_cr_read (FILE *, char *, size_t);
56 #define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
57
58 #define RETURN_PERCENT_PARAM(Value) \
59 RETURN_VALUE(PERCENT_PARAM, param, param_ ## Value)
60
61 #define RETURN_PERCENT_FLAG(Value) \
62 RETURN_VALUE(PERCENT_FLAG, uniqstr, uniqstr_new (Value))
63
64 #define RETURN_VALUE(Token, Field, Value) \
65 do { \
66 val->Field = Value; \
67 return Token; \
68 } while (0)
69
70 #define ROLLBACK_CURRENT_TOKEN \
71 do { \
72 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0); \
73 yyless (0); \
74 } while (0)
75
76 /* A string representing the most recently saved token. */
77 static char *last_string;
78
79 /* Bracketed identifier. */
80 static uniqstr bracketed_id_str = 0;
81 static location bracketed_id_loc;
82 static boundary bracketed_id_start;
83 static int bracketed_id_context_state = 0;
84
85 void
86 gram_scanner_last_string_free (void)
87 {
88 STRING_FREE;
89 }
90
91 static void handle_syncline (char *, location);
92 static unsigned long int scan_integer (char const *p, int base, location loc);
93 static int convert_ucn_to_byte (char const *hex_text);
94 static void unexpected_eof (boundary, char const *);
95 static void unexpected_newline (boundary, char const *);
96
97 %}
98 /* A C-like comment in directives/rules. */
99 %x SC_YACC_COMMENT
100 /* Strings and characters in directives/rules. */
101 %x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
102 /* A identifier was just read in directives/rules. Special state
103 to capture the sequence `identifier :'. */
104 %x SC_AFTER_IDENTIFIER
105 /* A complex tag, with nested angles brackets. */
106 %x SC_TAG
107
108 /* Four types of user code:
109 - prologue (code between `%{' `%}' in the first section, before %%);
110 - actions, printers, union, etc, (between braced in the middle section);
111 - epilogue (everything after the second %%).
112 - predicate (code between `%?{' and `{' in middle section); */
113 %x SC_PROLOGUE SC_BRACED_CODE SC_EPILOGUE SC_PREDICATE
114 /* C and C++ comments in code. */
115 %x SC_COMMENT SC_LINE_COMMENT
116 /* Strings and characters in code. */
117 %x SC_STRING SC_CHARACTER
118 /* Bracketed identifiers support. */
119 %x SC_BRACKETED_ID SC_RETURN_BRACKETED_ID
120
121 letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
122 id {letter}({letter}|[-0-9])*
123 directive %{id}
124 int [0-9]+
125
126 /* POSIX says that a tag must be both an id and a C union member, but
127 historically almost any character is allowed in a tag. We disallow
128 NUL, as this simplifies our implementation. We disallow angle
129 bracket to match them in nested pairs: several languages use them
130 for generics/template types. */
131 tag [^\0<>]+
132
133 /* Zero or more instances of backslash-newline. Following GCC, allow
134 white space between the backslash and the newline. */
135 splice (\\[ \f\t\v]*\n)*
136
137 %%
138 %{
139 /* Nesting level. Either for nested braces, or nested angle brackets
140 (but not mixed). */
141 int nesting IF_LINT (= 0);
142
143 /* Parent context state, when applicable. */
144 int context_state IF_LINT (= 0);
145
146 /* Location of most recent identifier, when applicable. */
147 location id_loc IF_LINT (= empty_location);
148
149 /* Where containing code started, when applicable. Its initial
150 value is relevant only when yylex is invoked in the SC_EPILOGUE
151 start condition. */
152 boundary code_start = scanner_cursor;
153
154 /* Where containing comment or string or character literal started,
155 when applicable. */
156 boundary token_start IF_LINT (= scanner_cursor);
157 %}
158
159
160 /*-----------------------.
161 | Scanning white space. |
162 `-----------------------*/
163
164 <INITIAL,SC_AFTER_IDENTIFIER,SC_BRACKETED_ID,SC_RETURN_BRACKETED_ID>
165 {
166 /* Comments and white space. */
167 "," warn_at (*loc, _("stray `,' treated as white space"));
168 [ \f\n\t\v] |
169 "//".* ;
170 "/*" {
171 token_start = loc->start;
172 context_state = YY_START;
173 BEGIN SC_YACC_COMMENT;
174 }
175
176 /* #line directives are not documented, and may be withdrawn or
177 modified in future versions of Bison. */
178 ^"#line "{int}" \"".*"\"\n" {
179 handle_syncline (yytext + sizeof "#line " - 1, *loc);
180 }
181 }
182
183
184 /*----------------------------.
185 | Scanning Bison directives. |
186 `----------------------------*/
187
188 /* For directives that are also command line options, the regex must be
189 "%..."
190 after "[-_]"s are removed, and the directive must match the --long
191 option name, with a single string argument. Otherwise, add exceptions
192 to ../build-aux/cross-options.pl. */
193
194 <INITIAL>
195 {
196 "%binary" return PERCENT_NONASSOC;
197 "%code" return PERCENT_CODE;
198 "%debug" RETURN_PERCENT_FLAG("parse.trace");
199 "%default"[-_]"prec" return PERCENT_DEFAULT_PREC;
200 "%define" return PERCENT_DEFINE;
201 "%defines" return PERCENT_DEFINES;
202 "%destructor" return PERCENT_DESTRUCTOR;
203 "%dprec" return PERCENT_DPREC;
204 "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
205 "%expect" return PERCENT_EXPECT;
206 "%expect"[-_]"rr" return PERCENT_EXPECT_RR;
207 "%file-prefix" return PERCENT_FILE_PREFIX;
208 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
209 "%initial-action" return PERCENT_INITIAL_ACTION;
210 "%glr-parser" return PERCENT_GLR_PARSER;
211 "%language" return PERCENT_LANGUAGE;
212 "%left" return PERCENT_LEFT;
213 "%lex-param" RETURN_PERCENT_PARAM(lex);
214 "%locations" RETURN_PERCENT_FLAG("locations");
215 "%merge" return PERCENT_MERGE;
216 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
217 "%no"[-_]"default"[-_]"prec" return PERCENT_NO_DEFAULT_PREC;
218 "%no"[-_]"lines" return PERCENT_NO_LINES;
219 "%nonassoc" return PERCENT_NONASSOC;
220 "%nondeterministic-parser" return PERCENT_NONDETERMINISTIC_PARSER;
221 "%nterm" return PERCENT_NTERM;
222 "%output" return PERCENT_OUTPUT;
223 "%param" RETURN_PERCENT_PARAM(both);
224 "%parse-param" RETURN_PERCENT_PARAM(parse);
225 "%prec" return PERCENT_PREC;
226 "%precedence" return PERCENT_PRECEDENCE;
227 "%printer" return PERCENT_PRINTER;
228 "%pure"[-_]"parser" RETURN_PERCENT_FLAG("api.pure");
229 "%require" return PERCENT_REQUIRE;
230 "%right" return PERCENT_RIGHT;
231 "%skeleton" return PERCENT_SKELETON;
232 "%start" return PERCENT_START;
233 "%term" return PERCENT_TOKEN;
234 "%token" return PERCENT_TOKEN;
235 "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
236 "%type" return PERCENT_TYPE;
237 "%union" return PERCENT_UNION;
238 "%verbose" return PERCENT_VERBOSE;
239 "%yacc" return PERCENT_YACC;
240
241 {directive} {
242 complain_at (*loc, _("invalid directive: %s"), quote (yytext));
243 }
244
245 "=" return EQUAL;
246 "|" return PIPE;
247 ";" return SEMICOLON;
248
249 {id} {
250 val->uniqstr = uniqstr_new (yytext);
251 id_loc = *loc;
252 bracketed_id_str = NULL;
253 BEGIN SC_AFTER_IDENTIFIER;
254 }
255
256 {int} {
257 val->integer = scan_integer (yytext, 10, *loc);
258 return INT;
259 }
260 0[xX][0-9abcdefABCDEF]+ {
261 val->integer = scan_integer (yytext, 16, *loc);
262 return INT;
263 }
264
265 /* Identifiers may not start with a digit. Yet, don't silently
266 accept "1FOO" as "1 FOO". */
267 {int}{id} {
268 complain_at (*loc, _("invalid identifier: %s"), quote (yytext));
269 }
270
271 /* Characters. */
272 "'" token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
273
274 /* Strings. */
275 "\"" token_start = loc->start; BEGIN SC_ESCAPED_STRING;
276
277 /* Prologue. */
278 "%{" code_start = loc->start; BEGIN SC_PROLOGUE;
279
280 /* Code in between braces. */
281 "{" {
282 STRING_GROW;
283 nesting = 0;
284 code_start = loc->start;
285 BEGIN SC_BRACED_CODE;
286 }
287
288 /* Semantic predicate. */
289 "%?"[ \f\n\t\v]*"{" {
290 nesting = 0;
291 code_start = loc->start;
292 BEGIN SC_PREDICATE;
293 }
294
295 /* A type. */
296 "<*>" return TAG_ANY;
297 "<>" return TAG_NONE;
298 "<"{tag}">" {
299 obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
300 STRING_FINISH;
301 val->uniqstr = uniqstr_new (last_string);
302 STRING_FREE;
303 return TAG;
304 }
305 "<" {
306 nesting = 0;
307 token_start = loc->start;
308 BEGIN SC_TAG;
309 }
310
311 "%%" {
312 static int percent_percent_count;
313 if (++percent_percent_count == 2)
314 BEGIN SC_EPILOGUE;
315 return PERCENT_PERCENT;
316 }
317
318 "[" {
319 bracketed_id_str = NULL;
320 bracketed_id_start = loc->start;
321 bracketed_id_context_state = YY_START;
322 BEGIN SC_BRACKETED_ID;
323 }
324
325 . {
326 complain_at (*loc, _("invalid character: %s"), quote (yytext));
327 }
328
329 <<EOF>> {
330 loc->start = loc->end = scanner_cursor;
331 yyterminate ();
332 }
333 }
334
335
336 /*--------------------------------------------------------------.
337 | Supporting \0 complexifies our implementation for no expected |
338 | added value. |
339 `--------------------------------------------------------------*/
340
341 <SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING,SC_TAG>
342 {
343 \0 complain_at (*loc, _("invalid null character"));
344 }
345
346
347 /*-----------------------------------------------------------------.
348 | Scanning after an identifier, checking whether a colon is next. |
349 `-----------------------------------------------------------------*/
350
351 <SC_AFTER_IDENTIFIER>
352 {
353 "[" {
354 if (bracketed_id_str)
355 {
356 ROLLBACK_CURRENT_TOKEN;
357 BEGIN SC_RETURN_BRACKETED_ID;
358 *loc = id_loc;
359 return ID;
360 }
361 else
362 {
363 bracketed_id_start = loc->start;
364 bracketed_id_context_state = YY_START;
365 BEGIN SC_BRACKETED_ID;
366 }
367 }
368 ":" {
369 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
370 *loc = id_loc;
371 return ID_COLON;
372 }
373 . {
374 ROLLBACK_CURRENT_TOKEN;
375 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
376 *loc = id_loc;
377 return ID;
378 }
379 <<EOF>> {
380 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
381 *loc = id_loc;
382 return ID;
383 }
384 }
385
386 /*--------------------------------.
387 | Scanning bracketed identifiers. |
388 `--------------------------------*/
389
390 <SC_BRACKETED_ID>
391 {
392 {id} {
393 if (bracketed_id_str)
394 {
395 complain_at (*loc, _("unexpected identifier in bracketed name: %s"),
396 quote (yytext));
397 }
398 else
399 {
400 bracketed_id_str = uniqstr_new (yytext);
401 bracketed_id_loc = *loc;
402 }
403 }
404 "]" {
405 BEGIN bracketed_id_context_state;
406 if (bracketed_id_str)
407 {
408 if (INITIAL == bracketed_id_context_state)
409 {
410 val->uniqstr = bracketed_id_str;
411 bracketed_id_str = 0;
412 *loc = bracketed_id_loc;
413 return BRACKETED_ID;
414 }
415 }
416 else
417 complain_at (*loc, _("an identifier expected"));
418 }
419 . {
420 complain_at (*loc, _("invalid character in bracketed name: %s"),
421 quote (yytext));
422 }
423 <<EOF>> {
424 BEGIN bracketed_id_context_state;
425 unexpected_eof (bracketed_id_start, "]");
426 }
427 }
428
429 <SC_RETURN_BRACKETED_ID>
430 {
431 . {
432 ROLLBACK_CURRENT_TOKEN;
433 val->uniqstr = bracketed_id_str;
434 bracketed_id_str = 0;
435 *loc = bracketed_id_loc;
436 BEGIN INITIAL;
437 return BRACKETED_ID;
438 }
439 }
440
441
442 /*---------------------------------------------------------------.
443 | Scanning a Yacc comment. The initial `/ *' is already eaten. |
444 `---------------------------------------------------------------*/
445
446 <SC_YACC_COMMENT>
447 {
448 "*/" BEGIN context_state;
449 .|\n ;
450 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
451 }
452
453
454 /*------------------------------------------------------------.
455 | Scanning a C comment. The initial `/ *' is already eaten. |
456 `------------------------------------------------------------*/
457
458 <SC_COMMENT>
459 {
460 "*"{splice}"/" STRING_GROW; BEGIN context_state;
461 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
462 }
463
464
465 /*--------------------------------------------------------------.
466 | Scanning a line comment. The initial `//' is already eaten. |
467 `--------------------------------------------------------------*/
468
469 <SC_LINE_COMMENT>
470 {
471 "\n" STRING_GROW; BEGIN context_state;
472 {splice} STRING_GROW;
473 <<EOF>> BEGIN context_state;
474 }
475
476
477 /*------------------------------------------------.
478 | Scanning a Bison string, including its escapes. |
479 | The initial quote is already eaten. |
480 `------------------------------------------------*/
481
482 <SC_ESCAPED_STRING>
483 {
484 "\""|"\n" {
485 if (yytext[0] == '\n')
486 unexpected_newline (token_start, "\"");
487 STRING_FINISH;
488 loc->start = token_start;
489 val->chars = last_string;
490 BEGIN INITIAL;
491 return STRING;
492 }
493 <<EOF>> {
494 unexpected_eof (token_start, "\"");
495 STRING_FINISH;
496 loc->start = token_start;
497 val->chars = last_string;
498 BEGIN INITIAL;
499 return STRING;
500 }
501 }
502
503 /*----------------------------------------------------------.
504 | Scanning a Bison character literal, decoding its escapes. |
505 | The initial quote is already eaten. |
506 `----------------------------------------------------------*/
507
508 <SC_ESCAPED_CHARACTER>
509 {
510 "'"|"\n" {
511 STRING_FINISH;
512 loc->start = token_start;
513 val->character = last_string[0];
514 {
515 /* FIXME: Eventually, make these errors. */
516 if (last_string[0] == '\0')
517 {
518 warn_at (*loc, _("empty character literal"));
519 /* '\0' seems dangerous even if we are about to complain. */
520 val->character = '\'';
521 }
522 else if (last_string[1] != '\0')
523 warn_at (*loc, _("extra characters in character literal"));
524 }
525 if (yytext[0] == '\n')
526 unexpected_newline (token_start, "'");
527 STRING_FREE;
528 BEGIN INITIAL;
529 return CHAR;
530 }
531 <<EOF>> {
532 STRING_FINISH;
533 loc->start = token_start;
534 val->character = last_string[0];
535 {
536 /* FIXME: Eventually, make these errors. */
537 if (last_string[0] == '\0')
538 {
539 warn_at (*loc, _("empty character literal"));
540 /* '\0' seems dangerous even if we are about to complain. */
541 val->character = '\'';
542 }
543 else if (last_string[1] != '\0')
544 warn_at (*loc, _("extra characters in character literal"));
545 }
546 unexpected_eof (token_start, "'");
547 STRING_FREE;
548 BEGIN INITIAL;
549 return CHAR;
550 }
551 }
552
553 /*-----------------------------------------------------------.
554 | Scanning a Bison nested tag. The initial angle bracket is |
555 | already eaten. |
556 `-----------------------------------------------------------*/
557
558 <SC_TAG>
559 {
560 ">" {
561 --nesting;
562 if (nesting < 0)
563 {
564 STRING_FINISH;
565 loc->start = token_start;
566 val->uniqstr = uniqstr_new (last_string);
567 STRING_FREE;
568 BEGIN INITIAL;
569 return TAG;
570 }
571 STRING_GROW;
572 }
573
574 [^<>]+ STRING_GROW;
575 "<"+ STRING_GROW; nesting += yyleng;
576
577 <<EOF>> {
578 unexpected_eof (token_start, ">");
579 STRING_FINISH;
580 loc->start = token_start;
581 val->uniqstr = uniqstr_new (last_string);
582 STRING_FREE;
583 BEGIN INITIAL;
584 return TAG;
585 }
586 }
587
588 /*----------------------------.
589 | Decode escaped characters. |
590 `----------------------------*/
591
592 <SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
593 {
594 \\[0-7]{1,3} {
595 unsigned long int c = strtoul (yytext + 1, NULL, 8);
596 if (!c || UCHAR_MAX < c)
597 complain_at (*loc, _("invalid number after \\-escape: %s"),
598 yytext+1);
599 else
600 obstack_1grow (&obstack_for_string, c);
601 }
602
603 \\x[0-9abcdefABCDEF]+ {
604 verify (UCHAR_MAX < ULONG_MAX);
605 unsigned long int c = strtoul (yytext + 2, NULL, 16);
606 if (!c || UCHAR_MAX < c)
607 complain_at (*loc, _("invalid number after \\-escape: %s"),
608 yytext+1);
609 else
610 obstack_1grow (&obstack_for_string, c);
611 }
612
613 \\a obstack_1grow (&obstack_for_string, '\a');
614 \\b obstack_1grow (&obstack_for_string, '\b');
615 \\f obstack_1grow (&obstack_for_string, '\f');
616 \\n obstack_1grow (&obstack_for_string, '\n');
617 \\r obstack_1grow (&obstack_for_string, '\r');
618 \\t obstack_1grow (&obstack_for_string, '\t');
619 \\v obstack_1grow (&obstack_for_string, '\v');
620
621 /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
622 \\("\""|"'"|"?"|"\\") obstack_1grow (&obstack_for_string, yytext[1]);
623
624 \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
625 int c = convert_ucn_to_byte (yytext);
626 if (c <= 0)
627 complain_at (*loc, _("invalid number after \\-escape: %s"),
628 yytext+1);
629 else
630 obstack_1grow (&obstack_for_string, c);
631 }
632 \\(.|\n) {
633 char const *p = yytext + 1;
634 /* Quote only if escaping won't make the character visible. */
635 if (isspace ((unsigned char) *p) && isprint ((unsigned char) *p))
636 p = quote (p);
637 else
638 p = quotearg_style_mem (escape_quoting_style, p, 1);
639 complain_at (*loc, _("invalid character after \\-escape: %s"), p);
640 }
641 }
642
643 /*--------------------------------------------.
644 | Scanning user-code characters and strings. |
645 `--------------------------------------------*/
646
647 <SC_CHARACTER,SC_STRING>
648 {
649 {splice}|\\{splice}[^\n\[\]] STRING_GROW;
650 }
651
652 <SC_CHARACTER>
653 {
654 "'" STRING_GROW; BEGIN context_state;
655 \n unexpected_newline (token_start, "'"); BEGIN context_state;
656 <<EOF>> unexpected_eof (token_start, "'"); BEGIN context_state;
657 }
658
659 <SC_STRING>
660 {
661 "\"" STRING_GROW; BEGIN context_state;
662 \n unexpected_newline (token_start, "\""); BEGIN context_state;
663 <<EOF>> unexpected_eof (token_start, "\""); BEGIN context_state;
664 }
665
666
667 /*---------------------------------------------------.
668 | Strings, comments etc. can be found in user code. |
669 `---------------------------------------------------*/
670
671 <SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE,SC_PREDICATE>
672 {
673 "'" {
674 STRING_GROW;
675 context_state = YY_START;
676 token_start = loc->start;
677 BEGIN SC_CHARACTER;
678 }
679 "\"" {
680 STRING_GROW;
681 context_state = YY_START;
682 token_start = loc->start;
683 BEGIN SC_STRING;
684 }
685 "/"{splice}"*" {
686 STRING_GROW;
687 context_state = YY_START;
688 token_start = loc->start;
689 BEGIN SC_COMMENT;
690 }
691 "/"{splice}"/" {
692 STRING_GROW;
693 context_state = YY_START;
694 BEGIN SC_LINE_COMMENT;
695 }
696 }
697
698
699
700 /*-----------------------------------------------------------.
701 | Scanning some code in braces (actions, predicates). The |
702 | initial "{" is already eaten. |
703 `-----------------------------------------------------------*/
704
705 <SC_BRACED_CODE,SC_PREDICATE>
706 {
707 "{"|"<"{splice}"%" STRING_GROW; nesting++;
708 "%"{splice}">" STRING_GROW; nesting--;
709
710 /* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly
711 (as `<' `<%'). */
712 "<"{splice}"<" STRING_GROW;
713
714 <<EOF>> {
715 int token = (YY_START == SC_BRACED_CODE) ? BRACED_CODE : BRACED_PREDICATE;
716 unexpected_eof (code_start, "}");
717 STRING_FINISH;
718 loc->start = code_start;
719 val->code = last_string;
720 BEGIN INITIAL;
721 return token;
722 }
723 }
724
725 <SC_BRACED_CODE>
726 {
727 "}" {
728 obstack_1grow (&obstack_for_string, '}');
729
730 --nesting;
731 if (nesting < 0)
732 {
733 STRING_FINISH;
734 loc->start = code_start;
735 val->code = last_string;
736 BEGIN INITIAL;
737 return BRACED_CODE;
738 }
739 }
740 }
741
742 <SC_PREDICATE>
743 {
744 "}" {
745 --nesting;
746 if (nesting < 0)
747 {
748 STRING_FINISH;
749 loc->start = code_start;
750 val->code = last_string;
751 BEGIN INITIAL;
752 return BRACED_PREDICATE;
753 }
754 else
755 obstack_1grow (&obstack_for_string, '}');
756 }
757 }
758
759 /*--------------------------------------------------------------.
760 | Scanning some prologue: from "%{" (already scanned) to "%}". |
761 `--------------------------------------------------------------*/
762
763 <SC_PROLOGUE>
764 {
765 "%}" {
766 STRING_FINISH;
767 loc->start = code_start;
768 val->chars = last_string;
769 BEGIN INITIAL;
770 return PROLOGUE;
771 }
772
773 <<EOF>> {
774 unexpected_eof (code_start, "%}");
775 STRING_FINISH;
776 loc->start = code_start;
777 val->chars = last_string;
778 BEGIN INITIAL;
779 return PROLOGUE;
780 }
781 }
782
783
784 /*---------------------------------------------------------------.
785 | Scanning the epilogue (everything after the second "%%", which |
786 | has already been eaten). |
787 `---------------------------------------------------------------*/
788
789 <SC_EPILOGUE>
790 {
791 <<EOF>> {
792 STRING_FINISH;
793 loc->start = code_start;
794 val->chars = last_string;
795 BEGIN INITIAL;
796 return EPILOGUE;
797 }
798 }
799
800
801 /*-----------------------------------------------------.
802 | By default, grow the string obstack with the input. |
803 `-----------------------------------------------------*/
804
805 <SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PREDICATE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>. |
806 <SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PREDICATE,SC_PROLOGUE,SC_EPILOGUE>\n STRING_GROW;
807
808 %%
809
810 /* Read bytes from FP into buffer BUF of size SIZE. Return the
811 number of bytes read. Remove '\r' from input, treating \r\n
812 and isolated \r as \n. */
813
814 static size_t
815 no_cr_read (FILE *fp, char *buf, size_t size)
816 {
817 size_t bytes_read = fread (buf, 1, size, fp);
818 if (bytes_read)
819 {
820 char *w = memchr (buf, '\r', bytes_read);
821 if (w)
822 {
823 char const *r = ++w;
824 char const *lim = buf + bytes_read;
825
826 for (;;)
827 {
828 /* Found an '\r'. Treat it like '\n', but ignore any
829 '\n' that immediately follows. */
830 w[-1] = '\n';
831 if (r == lim)
832 {
833 int ch = getc (fp);
834 if (ch != '\n' && ungetc (ch, fp) != ch)
835 break;
836 }
837 else if (*r == '\n')
838 r++;
839
840 /* Copy until the next '\r'. */
841 do
842 {
843 if (r == lim)
844 return w - buf;
845 }
846 while ((*w++ = *r++) != '\r');
847 }
848
849 return w - buf;
850 }
851 }
852
853 return bytes_read;
854 }
855
856
857
858 /*------------------------------------------------------.
859 | Scan NUMBER for a base-BASE integer at location LOC. |
860 `------------------------------------------------------*/
861
862 static unsigned long int
863 scan_integer (char const *number, int base, location loc)
864 {
865 verify (INT_MAX < ULONG_MAX);
866 unsigned long int num = strtoul (number, NULL, base);
867
868 if (INT_MAX < num)
869 {
870 complain_at (loc, _("integer out of range: %s"), quote (number));
871 num = INT_MAX;
872 }
873
874 return num;
875 }
876
877
878 /*------------------------------------------------------------------.
879 | Convert universal character name UCN to a single-byte character, |
880 | and return that character. Return -1 if UCN does not correspond |
881 | to a single-byte character. |
882 `------------------------------------------------------------------*/
883
884 static int
885 convert_ucn_to_byte (char const *ucn)
886 {
887 verify (UCHAR_MAX <= INT_MAX);
888 unsigned long int code = strtoul (ucn + 2, NULL, 16);
889
890 /* FIXME: Currently we assume Unicode-compatible unibyte characters
891 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
892 non-ASCII hosts we support only the portable C character set.
893 These limitations should be removed once we add support for
894 multibyte characters. */
895
896 if (UCHAR_MAX < code)
897 return -1;
898
899 #if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
900 {
901 /* A non-ASCII host. Use CODE to index into a table of the C
902 basic execution character set, which is guaranteed to exist on
903 all Standard C platforms. This table also includes '$', '@',
904 and '`', which are not in the basic execution character set but
905 which are unibyte characters on all the platforms that we know
906 about. */
907 static signed char const table[] =
908 {
909 '\0', -1, -1, -1, -1, -1, -1, '\a',
910 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
911 -1, -1, -1, -1, -1, -1, -1, -1,
912 -1, -1, -1, -1, -1, -1, -1, -1,
913 ' ', '!', '"', '#', '$', '%', '&', '\'',
914 '(', ')', '*', '+', ',', '-', '.', '/',
915 '0', '1', '2', '3', '4', '5', '6', '7',
916 '8', '9', ':', ';', '<', '=', '>', '?',
917 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
918 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
919 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
920 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
921 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
922 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
923 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
924 'x', 'y', 'z', '{', '|', '}', '~'
925 };
926
927 code = code < sizeof table ? table[code] : -1;
928 }
929 #endif
930
931 return code;
932 }
933
934
935 /*----------------------------------------------------------------.
936 | Handle `#line INT "FILE"'. ARGS has already skipped `#line '. |
937 `----------------------------------------------------------------*/
938
939 static void
940 handle_syncline (char *args, location loc)
941 {
942 char *after_num;
943 unsigned long int lineno = strtoul (args, &after_num, 10);
944 char *file = strchr (after_num, '"') + 1;
945 *strchr (file, '"') = '\0';
946 if (INT_MAX <= lineno)
947 {
948 warn_at (loc, _("line number overflow"));
949 lineno = INT_MAX;
950 }
951 current_file = uniqstr_new (file);
952 boundary_set (&scanner_cursor, current_file, lineno, 1);
953 }
954
955
956 /*----------------------------------------------------------------.
957 | For a token or comment starting at START, report message MSGID, |
958 | which should say that an end marker was found before |
959 | the expected TOKEN_END. |
960 `----------------------------------------------------------------*/
961
962 static void
963 unexpected_end (boundary start, char const *msgid, char const *token_end)
964 {
965 location loc;
966 loc.start = start;
967 loc.end = scanner_cursor;
968 complain_at (loc, _(msgid), token_end);
969 }
970
971
972 /*------------------------------------------------------------------------.
973 | Report an unexpected EOF in a token or comment starting at START. |
974 | An end of file was encountered and the expected TOKEN_END was missing. |
975 `------------------------------------------------------------------------*/
976
977 static void
978 unexpected_eof (boundary start, char const *token_end)
979 {
980 unexpected_end (start, N_("missing `%s' at end of file"), token_end);
981 }
982
983
984 /*----------------------------------------.
985 | Likewise, but for unexpected newlines. |
986 `----------------------------------------*/
987
988 static void
989 unexpected_newline (boundary start, char const *token_end)
990 {
991 unexpected_end (start, N_("missing `%s' at end of line"), token_end);
992 }
993
994
995 /*-------------------------.
996 | Initialize the scanner. |
997 `-------------------------*/
998
999 void
1000 gram_scanner_initialize (void)
1001 {
1002 obstack_init (&obstack_for_string);
1003 }
1004
1005
1006 /*-----------------------------------------------.
1007 | Free all the memory allocated to the scanner. |
1008 `-----------------------------------------------*/
1009
1010 void
1011 gram_scanner_free (void)
1012 {
1013 obstack_free (&obstack_for_string, 0);
1014 /* Reclaim Flex's buffers. */
1015 yylex_destroy ();
1016 }