]> git.saurik.com Git - bison.git/blob - src/scan-gram.l
52762508cd57bf053ee617b33c1d990e4cc61b70
[bison.git] / src / scan-gram.l
1 /* Bison Grammar Scanner -*- C -*-
2
3 Copyright (C) 2002-2010 Free Software Foundation, Inc.
4
5 This file is part of Bison, the GNU Compiler Compiler.
6
7 This program is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19
20 %option debug nodefault noinput nounput noyywrap never-interactive
21 %option prefix="gram_" outfile="lex.yy.c"
22
23 %{
24 /* Work around a bug in flex 2.5.31. See Debian bug 333231
25 <http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=333231>. */
26 #undef gram_wrap
27 #define gram_wrap() 1
28
29 #define FLEX_PREFIX(Id) gram_ ## Id
30 #include <src/flex-scanner.h>
31
32 #include <src/complain.h>
33 #include <src/files.h>
34 #include <src/gram.h>
35 #include <quotearg.h>
36 #include <src/reader.h>
37 #include <src/uniqstr.h>
38
39 #include <ctype.h>
40 #include <mbswidth.h>
41 #include <quote.h>
42
43 #include <src/scan-gram.h>
44
45 #define YY_DECL GRAM_LEX_DECL
46
47 #define YY_USER_INIT \
48 code_start = scanner_cursor = loc->start; \
49
50 /* Location of scanner cursor. */
51 static boundary scanner_cursor;
52
53 #define YY_USER_ACTION location_compute (loc, &scanner_cursor, yytext, yyleng);
54
55 static size_t no_cr_read (FILE *, char *, size_t);
56 #define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
57
58 #define RETURN_PERCENT_PARAM(Value) \
59 RETURN_VALUE(PERCENT_PARAM, param, param_ ## Value)
60
61 #define RETURN_PERCENT_FLAG(Value) \
62 RETURN_VALUE(PERCENT_FLAG, uniqstr, uniqstr_new (Value))
63
64 #define RETURN_VALUE(Token, Field, Value) \
65 do { \
66 val->Field = Value; \
67 return Token; \
68 } while (0)
69
70 #define ROLLBACK_CURRENT_TOKEN \
71 do { \
72 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0); \
73 yyless (0); \
74 } while (0)
75
76 /* A string representing the most recently saved token. */
77 static char *last_string;
78
79 /* Bracketed identifier. */
80 static uniqstr bracketed_id_str = 0;
81 static location bracketed_id_loc;
82 static boundary bracketed_id_start;
83 static int bracketed_id_context_state = 0;
84
85 void
86 gram_scanner_last_string_free (void)
87 {
88 STRING_FREE;
89 }
90
91 static void handle_syncline (char *, location);
92 static unsigned long int scan_integer (char const *p, int base, location loc);
93 static int convert_ucn_to_byte (char const *hex_text);
94 static void unexpected_eof (boundary, char const *);
95 static void unexpected_newline (boundary, char const *);
96
97 %}
98 /* A C-like comment in directives/rules. */
99 %x SC_YACC_COMMENT
100 /* Strings and characters in directives/rules. */
101 %x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
102 /* A identifier was just read in directives/rules. Special state
103 to capture the sequence `identifier :'. */
104 %x SC_AFTER_IDENTIFIER
105 /* A complex tag, with nested angles brackets. */
106 %x SC_TAG
107
108 /* Three types of user code:
109 - prologue (code between `%{' `%}' in the first section, before %%);
110 - actions, printers, union, etc, (between braced in the middle section);
111 - epilogue (everything after the second %%). */
112 %x SC_PROLOGUE SC_BRACED_CODE SC_EPILOGUE
113 /* C and C++ comments in code. */
114 %x SC_COMMENT SC_LINE_COMMENT
115 /* Strings and characters in code. */
116 %x SC_STRING SC_CHARACTER
117 /* Bracketed identifiers support. */
118 %x SC_BRACKETED_ID SC_RETURN_BRACKETED_ID
119
120 letter [-.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
121 id {letter}({letter}|[0-9])*
122 directive %{id}
123 int [0-9]+
124
125 /* POSIX says that a tag must be both an id and a C union member, but
126 historically almost any character is allowed in a tag. We disallow
127 NUL, as this simplifies our implementation. We disallow angle
128 bracket to match them in nested pairs: several languages use them
129 for generics/template types. */
130 tag [^\0<>]+
131
132 /* Zero or more instances of backslash-newline. Following GCC, allow
133 white space between the backslash and the newline. */
134 splice (\\[ \f\t\v]*\n)*
135
136 %%
137 %{
138 /* Nesting level. Either for nested braces, or nested angle brackets
139 (but not mixed). */
140 int nesting IF_LINT (= 0);
141
142 /* Parent context state, when applicable. */
143 int context_state IF_LINT (= 0);
144
145 /* Location of most recent identifier, when applicable. */
146 location id_loc IF_LINT (= empty_location);
147
148 /* Where containing code started, when applicable. Its initial
149 value is relevant only when yylex is invoked in the SC_EPILOGUE
150 start condition. */
151 boundary code_start = scanner_cursor;
152
153 /* Where containing comment or string or character literal started,
154 when applicable. */
155 boundary token_start IF_LINT (= scanner_cursor);
156 %}
157
158
159 /*-----------------------.
160 | Scanning white space. |
161 `-----------------------*/
162
163 <INITIAL,SC_AFTER_IDENTIFIER,SC_BRACKETED_ID,SC_RETURN_BRACKETED_ID>
164 {
165 /* Comments and white space. */
166 "," warn_at (*loc, _("stray `,' treated as white space"));
167 [ \f\n\t\v] |
168 "//".* ;
169 "/*" {
170 token_start = loc->start;
171 context_state = YY_START;
172 BEGIN SC_YACC_COMMENT;
173 }
174
175 /* #line directives are not documented, and may be withdrawn or
176 modified in future versions of Bison. */
177 ^"#line "{int}" \"".*"\"\n" {
178 handle_syncline (yytext + sizeof "#line " - 1, *loc);
179 }
180 }
181
182
183 /*----------------------------.
184 | Scanning Bison directives. |
185 `----------------------------*/
186
187 /* For directives that are also command line options, the regex must be
188 "%..."
189 after "[-_]"s are removed, and the directive must match the --long
190 option name, with a single string argument. Otherwise, add exceptions
191 to ../build-aux/cross-options.pl. */
192
193 <INITIAL>
194 {
195 "%binary" return PERCENT_NONASSOC;
196 "%code" return PERCENT_CODE;
197 "%debug" RETURN_PERCENT_FLAG("parse.trace");
198 "%default"[-_]"prec" return PERCENT_DEFAULT_PREC;
199 "%define" return PERCENT_DEFINE;
200 "%defines" return PERCENT_DEFINES;
201 "%destructor" return PERCENT_DESTRUCTOR;
202 "%dprec" return PERCENT_DPREC;
203 "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
204 "%expect" return PERCENT_EXPECT;
205 "%expect"[-_]"rr" return PERCENT_EXPECT_RR;
206 "%file-prefix" return PERCENT_FILE_PREFIX;
207 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
208 "%initial-action" return PERCENT_INITIAL_ACTION;
209 "%glr-parser" return PERCENT_GLR_PARSER;
210 "%language" return PERCENT_LANGUAGE;
211 "%left" return PERCENT_LEFT;
212 "%lex-param" RETURN_PERCENT_PARAM(lex);
213 "%locations" RETURN_PERCENT_FLAG("locations");
214 "%merge" return PERCENT_MERGE;
215 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
216 "%no"[-_]"default"[-_]"prec" return PERCENT_NO_DEFAULT_PREC;
217 "%no"[-_]"lines" return PERCENT_NO_LINES;
218 "%nonassoc" return PERCENT_NONASSOC;
219 "%nondeterministic-parser" return PERCENT_NONDETERMINISTIC_PARSER;
220 "%nterm" return PERCENT_NTERM;
221 "%output" return PERCENT_OUTPUT;
222 "%param" RETURN_PERCENT_PARAM(both);
223 "%parse-param" RETURN_PERCENT_PARAM(parse);
224 "%prec" return PERCENT_PREC;
225 "%precedence" return PERCENT_PRECEDENCE;
226 "%printer" return PERCENT_PRINTER;
227 "%pure"[-_]"parser" RETURN_PERCENT_FLAG("api.pure");
228 "%require" return PERCENT_REQUIRE;
229 "%right" return PERCENT_RIGHT;
230 "%skeleton" return PERCENT_SKELETON;
231 "%start" return PERCENT_START;
232 "%term" return PERCENT_TOKEN;
233 "%token" return PERCENT_TOKEN;
234 "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
235 "%type" return PERCENT_TYPE;
236 "%union" return PERCENT_UNION;
237 "%verbose" return PERCENT_VERBOSE;
238 "%yacc" return PERCENT_YACC;
239
240 {directive} {
241 complain_at (*loc, _("invalid directive: %s"), quote (yytext));
242 }
243
244 "=" return EQUAL;
245 "|" return PIPE;
246 ";" return SEMICOLON;
247
248 {id} {
249 val->uniqstr = uniqstr_new (yytext);
250 id_loc = *loc;
251 bracketed_id_str = NULL;
252 BEGIN SC_AFTER_IDENTIFIER;
253 }
254
255 {int} {
256 val->integer = scan_integer (yytext, 10, *loc);
257 return INT;
258 }
259 0[xX][0-9abcdefABCDEF]+ {
260 val->integer = scan_integer (yytext, 16, *loc);
261 return INT;
262 }
263
264 /* Identifiers may not start with a digit. Yet, don't silently
265 accept "1FOO" as "1 FOO". */
266 {int}{id} {
267 complain_at (*loc, _("invalid identifier: %s"), quote (yytext));
268 }
269
270 /* Characters. */
271 "'" token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
272
273 /* Strings. */
274 "\"" token_start = loc->start; BEGIN SC_ESCAPED_STRING;
275
276 /* Prologue. */
277 "%{" code_start = loc->start; BEGIN SC_PROLOGUE;
278
279 /* Code in between braces. */
280 "{" {
281 STRING_GROW;
282 nesting = 0;
283 code_start = loc->start;
284 BEGIN SC_BRACED_CODE;
285 }
286
287 /* A type. */
288 "<*>" return TAG_ANY;
289 "<>" return TAG_NONE;
290 "<"{tag}">" {
291 obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
292 STRING_FINISH;
293 val->uniqstr = uniqstr_new (last_string);
294 STRING_FREE;
295 return TAG;
296 }
297 "<" {
298 nesting = 0;
299 token_start = loc->start;
300 BEGIN SC_TAG;
301 }
302
303 "%%" {
304 static int percent_percent_count;
305 if (++percent_percent_count == 2)
306 BEGIN SC_EPILOGUE;
307 return PERCENT_PERCENT;
308 }
309
310 "[" {
311 bracketed_id_str = NULL;
312 bracketed_id_start = loc->start;
313 bracketed_id_context_state = YY_START;
314 BEGIN SC_BRACKETED_ID;
315 }
316
317 . {
318 complain_at (*loc, _("invalid character: %s"), quote (yytext));
319 }
320
321 <<EOF>> {
322 loc->start = loc->end = scanner_cursor;
323 yyterminate ();
324 }
325 }
326
327
328 /*--------------------------------------------------------------.
329 | Supporting \0 complexifies our implementation for no expected |
330 | added value. |
331 `--------------------------------------------------------------*/
332
333 <SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING,SC_TAG>
334 {
335 \0 complain_at (*loc, _("invalid null character"));
336 }
337
338
339 /*-----------------------------------------------------------------.
340 | Scanning after an identifier, checking whether a colon is next. |
341 `-----------------------------------------------------------------*/
342
343 <SC_AFTER_IDENTIFIER>
344 {
345 "[" {
346 if (bracketed_id_str)
347 {
348 ROLLBACK_CURRENT_TOKEN;
349 BEGIN SC_RETURN_BRACKETED_ID;
350 *loc = id_loc;
351 return ID;
352 }
353 else
354 {
355 bracketed_id_start = loc->start;
356 bracketed_id_context_state = YY_START;
357 BEGIN SC_BRACKETED_ID;
358 }
359 }
360 ":" {
361 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
362 *loc = id_loc;
363 return ID_COLON;
364 }
365 . {
366 ROLLBACK_CURRENT_TOKEN;
367 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
368 *loc = id_loc;
369 return ID;
370 }
371 <<EOF>> {
372 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
373 *loc = id_loc;
374 return ID;
375 }
376 }
377
378 /*--------------------------------.
379 | Scanning bracketed identifiers. |
380 `--------------------------------*/
381
382 <SC_BRACKETED_ID>
383 {
384 {id} {
385 if (bracketed_id_str)
386 {
387 complain_at (*loc, _("unexpected identifier in bracketed name: %s"),
388 quote (yytext));
389 }
390 else
391 {
392 bracketed_id_str = uniqstr_new (yytext);
393 bracketed_id_loc = *loc;
394 }
395 }
396 "]" {
397 BEGIN bracketed_id_context_state;
398 if (bracketed_id_str)
399 {
400 if (INITIAL == bracketed_id_context_state)
401 {
402 val->uniqstr = bracketed_id_str;
403 bracketed_id_str = 0;
404 *loc = bracketed_id_loc;
405 return BRACKETED_ID;
406 }
407 }
408 else
409 complain_at (*loc, _("an identifier expected"));
410 }
411 . {
412 complain_at (*loc, _("invalid character in bracketed name: %s"),
413 quote (yytext));
414 }
415 <<EOF>> {
416 BEGIN bracketed_id_context_state;
417 unexpected_eof (bracketed_id_start, "]");
418 }
419 }
420
421 <SC_RETURN_BRACKETED_ID>
422 {
423 . {
424 ROLLBACK_CURRENT_TOKEN;
425 val->uniqstr = bracketed_id_str;
426 bracketed_id_str = 0;
427 *loc = bracketed_id_loc;
428 BEGIN INITIAL;
429 return BRACKETED_ID;
430 }
431 }
432
433
434 /*---------------------------------------------------------------.
435 | Scanning a Yacc comment. The initial `/ *' is already eaten. |
436 `---------------------------------------------------------------*/
437
438 <SC_YACC_COMMENT>
439 {
440 "*/" BEGIN context_state;
441 .|\n ;
442 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
443 }
444
445
446 /*------------------------------------------------------------.
447 | Scanning a C comment. The initial `/ *' is already eaten. |
448 `------------------------------------------------------------*/
449
450 <SC_COMMENT>
451 {
452 "*"{splice}"/" STRING_GROW; BEGIN context_state;
453 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
454 }
455
456
457 /*--------------------------------------------------------------.
458 | Scanning a line comment. The initial `//' is already eaten. |
459 `--------------------------------------------------------------*/
460
461 <SC_LINE_COMMENT>
462 {
463 "\n" STRING_GROW; BEGIN context_state;
464 {splice} STRING_GROW;
465 <<EOF>> BEGIN context_state;
466 }
467
468
469 /*------------------------------------------------.
470 | Scanning a Bison string, including its escapes. |
471 | The initial quote is already eaten. |
472 `------------------------------------------------*/
473
474 <SC_ESCAPED_STRING>
475 {
476 "\""|"\n" {
477 if (yytext[0] == '\n')
478 unexpected_newline (token_start, "\"");
479 STRING_FINISH;
480 loc->start = token_start;
481 val->chars = last_string;
482 BEGIN INITIAL;
483 return STRING;
484 }
485 <<EOF>> {
486 unexpected_eof (token_start, "\"");
487 STRING_FINISH;
488 loc->start = token_start;
489 val->chars = last_string;
490 BEGIN INITIAL;
491 return STRING;
492 }
493 }
494
495 /*----------------------------------------------------------.
496 | Scanning a Bison character literal, decoding its escapes. |
497 | The initial quote is already eaten. |
498 `----------------------------------------------------------*/
499
500 <SC_ESCAPED_CHARACTER>
501 {
502 "'"|"\n" {
503 STRING_FINISH;
504 loc->start = token_start;
505 val->character = last_string[0];
506 {
507 /* FIXME: Eventually, make these errors. */
508 if (last_string[0] == '\0')
509 {
510 warn_at (*loc, _("empty character literal"));
511 /* '\0' seems dangerous even if we are about to complain. */
512 val->character = '\'';
513 }
514 else if (last_string[1] != '\0')
515 warn_at (*loc, _("extra characters in character literal"));
516 }
517 if (yytext[0] == '\n')
518 unexpected_newline (token_start, "'");
519 STRING_FREE;
520 BEGIN INITIAL;
521 return CHAR;
522 }
523 <<EOF>> {
524 STRING_FINISH;
525 loc->start = token_start;
526 val->character = last_string[0];
527 {
528 /* FIXME: Eventually, make these errors. */
529 if (last_string[0] == '\0')
530 {
531 warn_at (*loc, _("empty character literal"));
532 /* '\0' seems dangerous even if we are about to complain. */
533 val->character = '\'';
534 }
535 else if (last_string[1] != '\0')
536 warn_at (*loc, _("extra characters in character literal"));
537 }
538 unexpected_eof (token_start, "'");
539 STRING_FREE;
540 BEGIN INITIAL;
541 return CHAR;
542 }
543 }
544
545 /*-----------------------------------------------------------.
546 | Scanning a Bison nested tag. The initial angle bracket is |
547 | already eaten. |
548 `-----------------------------------------------------------*/
549
550 <SC_TAG>
551 {
552 ">" {
553 --nesting;
554 if (nesting < 0)
555 {
556 STRING_FINISH;
557 loc->start = token_start;
558 val->uniqstr = uniqstr_new (last_string);
559 STRING_FREE;
560 BEGIN INITIAL;
561 return TAG;
562 }
563 STRING_GROW;
564 }
565
566 [^<>]+ STRING_GROW;
567 "<"+ STRING_GROW; nesting += yyleng;
568
569 <<EOF>> {
570 unexpected_eof (token_start, ">");
571 STRING_FINISH;
572 loc->start = token_start;
573 val->uniqstr = uniqstr_new (last_string);
574 STRING_FREE;
575 BEGIN INITIAL;
576 return TAG;
577 }
578 }
579
580 /*----------------------------.
581 | Decode escaped characters. |
582 `----------------------------*/
583
584 <SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
585 {
586 \\[0-7]{1,3} {
587 unsigned long int c = strtoul (yytext + 1, NULL, 8);
588 if (!c || UCHAR_MAX < c)
589 complain_at (*loc, _("invalid number after \\-escape: %s"),
590 yytext+1);
591 else
592 obstack_1grow (&obstack_for_string, c);
593 }
594
595 \\x[0-9abcdefABCDEF]+ {
596 verify (UCHAR_MAX < ULONG_MAX);
597 unsigned long int c = strtoul (yytext + 2, NULL, 16);
598 if (!c || UCHAR_MAX < c)
599 complain_at (*loc, _("invalid number after \\-escape: %s"),
600 yytext+1);
601 else
602 obstack_1grow (&obstack_for_string, c);
603 }
604
605 \\a obstack_1grow (&obstack_for_string, '\a');
606 \\b obstack_1grow (&obstack_for_string, '\b');
607 \\f obstack_1grow (&obstack_for_string, '\f');
608 \\n obstack_1grow (&obstack_for_string, '\n');
609 \\r obstack_1grow (&obstack_for_string, '\r');
610 \\t obstack_1grow (&obstack_for_string, '\t');
611 \\v obstack_1grow (&obstack_for_string, '\v');
612
613 /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
614 \\("\""|"'"|"?"|"\\") obstack_1grow (&obstack_for_string, yytext[1]);
615
616 \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
617 int c = convert_ucn_to_byte (yytext);
618 if (c <= 0)
619 complain_at (*loc, _("invalid number after \\-escape: %s"),
620 yytext+1);
621 else
622 obstack_1grow (&obstack_for_string, c);
623 }
624 \\(.|\n) {
625 char const *p = yytext + 1;
626 /* Quote only if escaping won't make the character visible. */
627 if (isspace ((unsigned char) *p) && isprint ((unsigned char) *p))
628 p = quote (p);
629 else
630 p = quotearg_style_mem (escape_quoting_style, p, 1);
631 complain_at (*loc, _("invalid character after \\-escape: %s"), p);
632 }
633 }
634
635 /*--------------------------------------------.
636 | Scanning user-code characters and strings. |
637 `--------------------------------------------*/
638
639 <SC_CHARACTER,SC_STRING>
640 {
641 {splice}|\\{splice}[^\n\[\]] STRING_GROW;
642 }
643
644 <SC_CHARACTER>
645 {
646 "'" STRING_GROW; BEGIN context_state;
647 \n unexpected_newline (token_start, "'"); BEGIN context_state;
648 <<EOF>> unexpected_eof (token_start, "'"); BEGIN context_state;
649 }
650
651 <SC_STRING>
652 {
653 "\"" STRING_GROW; BEGIN context_state;
654 \n unexpected_newline (token_start, "\""); BEGIN context_state;
655 <<EOF>> unexpected_eof (token_start, "\""); BEGIN context_state;
656 }
657
658
659 /*---------------------------------------------------.
660 | Strings, comments etc. can be found in user code. |
661 `---------------------------------------------------*/
662
663 <SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
664 {
665 "'" {
666 STRING_GROW;
667 context_state = YY_START;
668 token_start = loc->start;
669 BEGIN SC_CHARACTER;
670 }
671 "\"" {
672 STRING_GROW;
673 context_state = YY_START;
674 token_start = loc->start;
675 BEGIN SC_STRING;
676 }
677 "/"{splice}"*" {
678 STRING_GROW;
679 context_state = YY_START;
680 token_start = loc->start;
681 BEGIN SC_COMMENT;
682 }
683 "/"{splice}"/" {
684 STRING_GROW;
685 context_state = YY_START;
686 BEGIN SC_LINE_COMMENT;
687 }
688 }
689
690
691
692 /*-----------------------------------------------------------.
693 | Scanning some code in braces (actions). The initial "{" is |
694 | already eaten. |
695 `-----------------------------------------------------------*/
696
697 <SC_BRACED_CODE>
698 {
699 "{"|"<"{splice}"%" STRING_GROW; nesting++;
700 "%"{splice}">" STRING_GROW; nesting--;
701 "}" {
702 obstack_1grow (&obstack_for_string, '}');
703
704 --nesting;
705 if (nesting < 0)
706 {
707 STRING_FINISH;
708 loc->start = code_start;
709 val->code = last_string;
710 BEGIN INITIAL;
711 return BRACED_CODE;
712 }
713 }
714
715 /* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly
716 (as `<' `<%'). */
717 "<"{splice}"<" STRING_GROW;
718
719 <<EOF>> {
720 unexpected_eof (code_start, "}");
721 STRING_FINISH;
722 loc->start = code_start;
723 val->code = last_string;
724 BEGIN INITIAL;
725 return BRACED_CODE;
726 }
727 }
728
729
730 /*--------------------------------------------------------------.
731 | Scanning some prologue: from "%{" (already scanned) to "%}". |
732 `--------------------------------------------------------------*/
733
734 <SC_PROLOGUE>
735 {
736 "%}" {
737 STRING_FINISH;
738 loc->start = code_start;
739 val->chars = last_string;
740 BEGIN INITIAL;
741 return PROLOGUE;
742 }
743
744 <<EOF>> {
745 unexpected_eof (code_start, "%}");
746 STRING_FINISH;
747 loc->start = code_start;
748 val->chars = last_string;
749 BEGIN INITIAL;
750 return PROLOGUE;
751 }
752 }
753
754
755 /*---------------------------------------------------------------.
756 | Scanning the epilogue (everything after the second "%%", which |
757 | has already been eaten). |
758 `---------------------------------------------------------------*/
759
760 <SC_EPILOGUE>
761 {
762 <<EOF>> {
763 STRING_FINISH;
764 loc->start = code_start;
765 val->chars = last_string;
766 BEGIN INITIAL;
767 return EPILOGUE;
768 }
769 }
770
771
772 /*-----------------------------------------------------.
773 | By default, grow the string obstack with the input. |
774 `-----------------------------------------------------*/
775
776 <SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>. |
777 <SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>\n STRING_GROW;
778
779 %%
780
781 /* Read bytes from FP into buffer BUF of size SIZE. Return the
782 number of bytes read. Remove '\r' from input, treating \r\n
783 and isolated \r as \n. */
784
785 static size_t
786 no_cr_read (FILE *fp, char *buf, size_t size)
787 {
788 size_t bytes_read = fread (buf, 1, size, fp);
789 if (bytes_read)
790 {
791 char *w = memchr (buf, '\r', bytes_read);
792 if (w)
793 {
794 char const *r = ++w;
795 char const *lim = buf + bytes_read;
796
797 for (;;)
798 {
799 /* Found an '\r'. Treat it like '\n', but ignore any
800 '\n' that immediately follows. */
801 w[-1] = '\n';
802 if (r == lim)
803 {
804 int ch = getc (fp);
805 if (ch != '\n' && ungetc (ch, fp) != ch)
806 break;
807 }
808 else if (*r == '\n')
809 r++;
810
811 /* Copy until the next '\r'. */
812 do
813 {
814 if (r == lim)
815 return w - buf;
816 }
817 while ((*w++ = *r++) != '\r');
818 }
819
820 return w - buf;
821 }
822 }
823
824 return bytes_read;
825 }
826
827
828
829 /*------------------------------------------------------.
830 | Scan NUMBER for a base-BASE integer at location LOC. |
831 `------------------------------------------------------*/
832
833 static unsigned long int
834 scan_integer (char const *number, int base, location loc)
835 {
836 verify (INT_MAX < ULONG_MAX);
837 unsigned long int num = strtoul (number, NULL, base);
838
839 if (INT_MAX < num)
840 {
841 complain_at (loc, _("integer out of range: %s"), quote (number));
842 num = INT_MAX;
843 }
844
845 return num;
846 }
847
848
849 /*------------------------------------------------------------------.
850 | Convert universal character name UCN to a single-byte character, |
851 | and return that character. Return -1 if UCN does not correspond |
852 | to a single-byte character. |
853 `------------------------------------------------------------------*/
854
855 static int
856 convert_ucn_to_byte (char const *ucn)
857 {
858 verify (UCHAR_MAX <= INT_MAX);
859 unsigned long int code = strtoul (ucn + 2, NULL, 16);
860
861 /* FIXME: Currently we assume Unicode-compatible unibyte characters
862 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
863 non-ASCII hosts we support only the portable C character set.
864 These limitations should be removed once we add support for
865 multibyte characters. */
866
867 if (UCHAR_MAX < code)
868 return -1;
869
870 #if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
871 {
872 /* A non-ASCII host. Use CODE to index into a table of the C
873 basic execution character set, which is guaranteed to exist on
874 all Standard C platforms. This table also includes '$', '@',
875 and '`', which are not in the basic execution character set but
876 which are unibyte characters on all the platforms that we know
877 about. */
878 static signed char const table[] =
879 {
880 '\0', -1, -1, -1, -1, -1, -1, '\a',
881 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
882 -1, -1, -1, -1, -1, -1, -1, -1,
883 -1, -1, -1, -1, -1, -1, -1, -1,
884 ' ', '!', '"', '#', '$', '%', '&', '\'',
885 '(', ')', '*', '+', ',', '-', '.', '/',
886 '0', '1', '2', '3', '4', '5', '6', '7',
887 '8', '9', ':', ';', '<', '=', '>', '?',
888 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
889 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
890 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
891 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
892 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
893 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
894 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
895 'x', 'y', 'z', '{', '|', '}', '~'
896 };
897
898 code = code < sizeof table ? table[code] : -1;
899 }
900 #endif
901
902 return code;
903 }
904
905
906 /*----------------------------------------------------------------.
907 | Handle `#line INT "FILE"'. ARGS has already skipped `#line '. |
908 `----------------------------------------------------------------*/
909
910 static void
911 handle_syncline (char *args, location loc)
912 {
913 char *after_num;
914 unsigned long int lineno = strtoul (args, &after_num, 10);
915 char *file = strchr (after_num, '"') + 1;
916 *strchr (file, '"') = '\0';
917 if (INT_MAX <= lineno)
918 {
919 warn_at (loc, _("line number overflow"));
920 lineno = INT_MAX;
921 }
922 current_file = uniqstr_new (file);
923 boundary_set (&scanner_cursor, current_file, lineno, 1);
924 }
925
926
927 /*----------------------------------------------------------------.
928 | For a token or comment starting at START, report message MSGID, |
929 | which should say that an end marker was found before |
930 | the expected TOKEN_END. |
931 `----------------------------------------------------------------*/
932
933 static void
934 unexpected_end (boundary start, char const *msgid, char const *token_end)
935 {
936 location loc;
937 loc.start = start;
938 loc.end = scanner_cursor;
939 complain_at (loc, _(msgid), token_end);
940 }
941
942
943 /*------------------------------------------------------------------------.
944 | Report an unexpected EOF in a token or comment starting at START. |
945 | An end of file was encountered and the expected TOKEN_END was missing. |
946 `------------------------------------------------------------------------*/
947
948 static void
949 unexpected_eof (boundary start, char const *token_end)
950 {
951 unexpected_end (start, N_("missing `%s' at end of file"), token_end);
952 }
953
954
955 /*----------------------------------------.
956 | Likewise, but for unexpected newlines. |
957 `----------------------------------------*/
958
959 static void
960 unexpected_newline (boundary start, char const *token_end)
961 {
962 unexpected_end (start, N_("missing `%s' at end of line"), token_end);
963 }
964
965
966 /*-------------------------.
967 | Initialize the scanner. |
968 `-------------------------*/
969
970 void
971 gram_scanner_initialize (void)
972 {
973 obstack_init (&obstack_for_string);
974 }
975
976
977 /*-----------------------------------------------.
978 | Free all the memory allocated to the scanner. |
979 `-----------------------------------------------*/
980
981 void
982 gram_scanner_free (void)
983 {
984 obstack_free (&obstack_for_string, 0);
985 /* Reclaim Flex's buffers. */
986 yylex_destroy ();
987 }