]> git.saurik.com Git - bison.git/blob - src/scan-gram.l
tests: check token numbers.
[bison.git] / src / scan-gram.l
1 /* Bison Grammar Scanner -*- C -*-
2
3 Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
4 Free Software Foundation, Inc.
5
6 This file is part of Bison, the GNU Compiler Compiler.
7
8 This program is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
20
21 %option debug nodefault nounput noyywrap never-interactive
22 %option prefix="gram_" outfile="lex.yy.c"
23
24 %{
25 /* Work around a bug in flex 2.5.31. See Debian bug 333231
26 <http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=333231>. */
27 #undef gram_wrap
28 #define gram_wrap() 1
29
30 #define FLEX_PREFIX(Id) gram_ ## Id
31 #include <src/flex-scanner.h>
32
33 #include <src/complain.h>
34 #include <src/files.h>
35 #include <src/gram.h>
36 #include <quotearg.h>
37 #include <src/reader.h>
38 #include <src/uniqstr.h>
39
40 #include <mbswidth.h>
41 #include <quote.h>
42
43 #include <src/scan-gram.h>
44
45 #define YY_DECL GRAM_LEX_DECL
46
47 #define YY_USER_INIT \
48 code_start = scanner_cursor = loc->start; \
49
50 /* Location of scanner cursor. */
51 static boundary scanner_cursor;
52
53 #define YY_USER_ACTION location_compute (loc, &scanner_cursor, yytext, yyleng);
54
55 static size_t no_cr_read (FILE *, char *, size_t);
56 #define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
57
58 #define RETURN_PERCENT_FLAG(Value) \
59 do { \
60 val->uniqstr = uniqstr_new (Value); \
61 return PERCENT_FLAG; \
62 } while (0)
63
64
65 /* A string representing the most recently saved token. */
66 static char *last_string;
67
68 void
69 gram_scanner_last_string_free (void)
70 {
71 STRING_FREE;
72 }
73
74 static void handle_syncline (char *, location);
75 static unsigned long int scan_integer (char const *p, int base, location loc);
76 static int convert_ucn_to_byte (char const *hex_text);
77 static void unexpected_eof (boundary, char const *);
78 static void unexpected_newline (boundary, char const *);
79
80 %}
81 /* A C-like comment in directives/rules. */
82 %x SC_YACC_COMMENT
83 /* Strings and characters in directives/rules. */
84 %x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
85 /* A identifier was just read in directives/rules. Special state
86 to capture the sequence `identifier :'. */
87 %x SC_AFTER_IDENTIFIER
88 /* A complex tag, with nested angles brackets. */
89 %x SC_TAG
90
91 /* Three types of user code:
92 - prologue (code between `%{' `%}' in the first section, before %%);
93 - actions, printers, union, etc, (between braced in the middle section);
94 - epilogue (everything after the second %%). */
95 %x SC_PROLOGUE SC_BRACED_CODE SC_EPILOGUE
96 /* C and C++ comments in code. */
97 %x SC_COMMENT SC_LINE_COMMENT
98 /* Strings and characters in code. */
99 %x SC_STRING SC_CHARACTER
100
101 letter [-.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
102 id {letter}({letter}|[0-9])*
103 directive %{id}
104 int [0-9]+
105
106 /* POSIX says that a tag must be both an id and a C union member, but
107 historically almost any character is allowed in a tag. We disallow
108 NUL, as this simplifies our implementation. We disallow angle
109 bracket to match them in nested pairs: several languages use them
110 for generics/template types. */
111 tag [^\0<>]+
112
113 /* Zero or more instances of backslash-newline. Following GCC, allow
114 white space between the backslash and the newline. */
115 splice (\\[ \f\t\v]*\n)*
116
117 %%
118 %{
119 /* Nesting level. Either for nested braces, or nested angle brackets
120 (but not mixed). */
121 int nesting IF_LINT (= 0);
122
123 /* Parent context state, when applicable. */
124 int context_state IF_LINT (= 0);
125
126 /* Location of most recent identifier, when applicable. */
127 location id_loc IF_LINT (= empty_location);
128
129 /* Where containing code started, when applicable. Its initial
130 value is relevant only when yylex is invoked in the SC_EPILOGUE
131 start condition. */
132 boundary code_start = scanner_cursor;
133
134 /* Where containing comment or string or character literal started,
135 when applicable. */
136 boundary token_start IF_LINT (= scanner_cursor);
137 %}
138
139
140 /*-----------------------.
141 | Scanning white space. |
142 `-----------------------*/
143
144 <INITIAL,SC_AFTER_IDENTIFIER>
145 {
146 /* Comments and white space. */
147 "," warn_at (*loc, _("stray `,' treated as white space"));
148 [ \f\n\t\v] |
149 "//".* ;
150 "/*" {
151 token_start = loc->start;
152 context_state = YY_START;
153 BEGIN SC_YACC_COMMENT;
154 }
155
156 /* #line directives are not documented, and may be withdrawn or
157 modified in future versions of Bison. */
158 ^"#line "{int}" \"".*"\"\n" {
159 handle_syncline (yytext + sizeof "#line " - 1, *loc);
160 }
161 }
162
163
164 /*----------------------------.
165 | Scanning Bison directives. |
166 `----------------------------*/
167
168 /* For directives that are also command line options, the regex must be
169 "%..."
170 after "[-_]"s are removed, and the directive must match the --long
171 option name, with a single string argument. Otherwise, add exceptions
172 to ../build-aux/cross-options.pl. */
173
174 <INITIAL>
175 {
176 "%binary" return PERCENT_NONASSOC;
177 "%code" return PERCENT_CODE;
178 "%debug" RETURN_PERCENT_FLAG("parse.trace");
179 "%default"[-_]"prec" return PERCENT_DEFAULT_PREC;
180 "%define" return PERCENT_DEFINE;
181 "%defines" return PERCENT_DEFINES;
182 "%destructor" return PERCENT_DESTRUCTOR;
183 "%dprec" return PERCENT_DPREC;
184 "%error"[-_]"verbose" RETURN_PERCENT_FLAG("error-verbose");
185 "%expect" return PERCENT_EXPECT;
186 "%expect"[-_]"rr" return PERCENT_EXPECT_RR;
187 "%file-prefix" return PERCENT_FILE_PREFIX;
188 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
189 "%initial-action" return PERCENT_INITIAL_ACTION;
190 "%glr-parser" return PERCENT_GLR_PARSER;
191 "%language" return PERCENT_LANGUAGE;
192 "%left" return PERCENT_LEFT;
193 "%lex-param" return PERCENT_LEX_PARAM;
194 "%locations" RETURN_PERCENT_FLAG("locations");
195 "%merge" return PERCENT_MERGE;
196 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
197 "%no"[-_]"default"[-_]"prec" return PERCENT_NO_DEFAULT_PREC;
198 "%no"[-_]"lines" return PERCENT_NO_LINES;
199 "%nonassoc" return PERCENT_NONASSOC;
200 "%nondeterministic-parser" return PERCENT_NONDETERMINISTIC_PARSER;
201 "%nterm" return PERCENT_NTERM;
202 "%output" return PERCENT_OUTPUT;
203 "%parse-param" return PERCENT_PARSE_PARAM;
204 "%prec" return PERCENT_PREC;
205 "%precedence" return PERCENT_PRECEDENCE;
206 "%printer" return PERCENT_PRINTER;
207 "%pure"[-_]"parser" RETURN_PERCENT_FLAG("api.pure");
208 "%require" return PERCENT_REQUIRE;
209 "%right" return PERCENT_RIGHT;
210 "%skeleton" return PERCENT_SKELETON;
211 "%start" return PERCENT_START;
212 "%term" return PERCENT_TOKEN;
213 "%token" return PERCENT_TOKEN;
214 "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
215 "%type" return PERCENT_TYPE;
216 "%union" return PERCENT_UNION;
217 "%verbose" return PERCENT_VERBOSE;
218 "%yacc" return PERCENT_YACC;
219
220 {directive} {
221 complain_at (*loc, _("invalid directive: %s"), quote (yytext));
222 }
223
224 "=" return EQUAL;
225 "|" return PIPE;
226 ";" return SEMICOLON;
227
228 {id} {
229 val->uniqstr = uniqstr_new (yytext);
230 id_loc = *loc;
231 BEGIN SC_AFTER_IDENTIFIER;
232 }
233
234 {int} {
235 val->integer = scan_integer (yytext, 10, *loc);
236 return INT;
237 }
238 0[xX][0-9abcdefABCDEF]+ {
239 val->integer = scan_integer (yytext, 16, *loc);
240 return INT;
241 }
242
243 /* Identifiers may not start with a digit. Yet, don't silently
244 accept "1FOO" as "1 FOO". */
245 {int}{id} {
246 complain_at (*loc, _("invalid identifier: %s"), quote (yytext));
247 }
248
249 /* Characters. We don't check there is only one. */
250 "'" STRING_GROW; token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
251
252 /* Strings. */
253 "\"" token_start = loc->start; BEGIN SC_ESCAPED_STRING;
254
255 /* Prologue. */
256 "%{" code_start = loc->start; BEGIN SC_PROLOGUE;
257
258 /* Code in between braces. */
259 "{" {
260 STRING_GROW;
261 nesting = 0;
262 code_start = loc->start;
263 BEGIN SC_BRACED_CODE;
264 }
265
266 /* A type. */
267 "<*>" return TAG_ANY;
268 "<>" return TAG_NONE;
269 "<"{tag}">" {
270 obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
271 STRING_FINISH;
272 val->uniqstr = uniqstr_new (last_string);
273 STRING_FREE;
274 return TAG;
275 }
276 "<" {
277 nesting = 0;
278 token_start = loc->start;
279 BEGIN SC_TAG;
280 }
281
282 "%%" {
283 static int percent_percent_count;
284 if (++percent_percent_count == 2)
285 BEGIN SC_EPILOGUE;
286 return PERCENT_PERCENT;
287 }
288
289 . {
290 complain_at (*loc, _("invalid character: %s"), quote (yytext));
291 }
292
293 <<EOF>> {
294 loc->start = loc->end = scanner_cursor;
295 yyterminate ();
296 }
297 }
298
299
300 /*--------------------------------------------------------------.
301 | Supporting \0 complexifies our implementation for no expected |
302 | added value. |
303 `--------------------------------------------------------------*/
304
305 <SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING,SC_TAG>
306 {
307 \0 complain_at (*loc, _("invalid null character"));
308 }
309
310
311 /*-----------------------------------------------------------------.
312 | Scanning after an identifier, checking whether a colon is next. |
313 `-----------------------------------------------------------------*/
314
315 <SC_AFTER_IDENTIFIER>
316 {
317 ":" {
318 *loc = id_loc;
319 BEGIN INITIAL;
320 return ID_COLON;
321 }
322 . {
323 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
324 yyless (0);
325 *loc = id_loc;
326 BEGIN INITIAL;
327 return ID;
328 }
329 <<EOF>> {
330 *loc = id_loc;
331 BEGIN INITIAL;
332 return ID;
333 }
334 }
335
336
337 /*---------------------------------------------------------------.
338 | Scanning a Yacc comment. The initial `/ *' is already eaten. |
339 `---------------------------------------------------------------*/
340
341 <SC_YACC_COMMENT>
342 {
343 "*/" BEGIN context_state;
344 .|\n ;
345 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
346 }
347
348
349 /*------------------------------------------------------------.
350 | Scanning a C comment. The initial `/ *' is already eaten. |
351 `------------------------------------------------------------*/
352
353 <SC_COMMENT>
354 {
355 "*"{splice}"/" STRING_GROW; BEGIN context_state;
356 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
357 }
358
359
360 /*--------------------------------------------------------------.
361 | Scanning a line comment. The initial `//' is already eaten. |
362 `--------------------------------------------------------------*/
363
364 <SC_LINE_COMMENT>
365 {
366 "\n" STRING_GROW; BEGIN context_state;
367 {splice} STRING_GROW;
368 <<EOF>> BEGIN context_state;
369 }
370
371
372 /*------------------------------------------------.
373 | Scanning a Bison string, including its escapes. |
374 | The initial quote is already eaten. |
375 `------------------------------------------------*/
376
377 <SC_ESCAPED_STRING>
378 {
379 "\""|"\n" {
380 if (yytext[0] == '\n')
381 unexpected_newline (token_start, "\"");
382 STRING_FINISH;
383 loc->start = token_start;
384 val->chars = last_string;
385 BEGIN INITIAL;
386 return STRING;
387 }
388 <<EOF>> {
389 unexpected_eof (token_start, "\"");
390 STRING_FINISH;
391 loc->start = token_start;
392 val->chars = last_string;
393 BEGIN INITIAL;
394 return STRING;
395 }
396 }
397
398 /*----------------------------------------------------------.
399 | Scanning a Bison character literal, decoding its escapes. |
400 | The initial quote is already eaten. |
401 `----------------------------------------------------------*/
402
403 <SC_ESCAPED_CHARACTER>
404 {
405 "'"|"\n" {
406 if (yytext[0] == '\n')
407 unexpected_newline (token_start, "'");
408 STRING_GROW;
409 STRING_FINISH;
410 loc->start = token_start;
411 val->character = last_string[1];
412 STRING_FREE;
413 BEGIN INITIAL;
414 return CHAR;
415 }
416 <<EOF>> {
417 unexpected_eof (token_start, "'");
418 STRING_FINISH;
419 loc->start = token_start;
420 if (strlen (last_string) > 1)
421 val->character = last_string[1];
422 else
423 val->character = last_string[0];
424 STRING_FREE;
425 BEGIN INITIAL;
426 return CHAR;
427 }
428 }
429
430 /*-----------------------------------------------------------.
431 | Scanning a Bison nested tag. The initial angle bracket is |
432 | already eaten. |
433 `-----------------------------------------------------------*/
434
435 <SC_TAG>
436 {
437 ">" {
438 --nesting;
439 if (nesting < 0)
440 {
441 STRING_FINISH;
442 loc->start = token_start;
443 val->uniqstr = uniqstr_new (last_string);
444 STRING_FREE;
445 BEGIN INITIAL;
446 return TAG;
447 }
448 STRING_GROW;
449 }
450
451 [^<>]+ STRING_GROW;
452 "<"+ STRING_GROW; nesting += yyleng;
453
454 <<EOF>> {
455 unexpected_eof (token_start, ">");
456 STRING_FINISH;
457 loc->start = token_start;
458 val->uniqstr = uniqstr_new (last_string);
459 STRING_FREE;
460 BEGIN INITIAL;
461 return TAG;
462 }
463 }
464
465 /*----------------------------.
466 | Decode escaped characters. |
467 `----------------------------*/
468
469 <SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
470 {
471 \\[0-7]{1,3} {
472 unsigned long int c = strtoul (yytext + 1, NULL, 8);
473 if (UCHAR_MAX < c)
474 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
475 else if (! c)
476 complain_at (*loc, _("invalid null character: %s"), quote (yytext));
477 else
478 obstack_1grow (&obstack_for_string, c);
479 }
480
481 \\x[0-9abcdefABCDEF]+ {
482 verify (UCHAR_MAX < ULONG_MAX);
483 unsigned long int c = strtoul (yytext + 2, NULL, 16);
484 if (UCHAR_MAX < c)
485 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
486 else if (! c)
487 complain_at (*loc, _("invalid null character: %s"), quote (yytext));
488 else
489 obstack_1grow (&obstack_for_string, c);
490 }
491
492 \\a obstack_1grow (&obstack_for_string, '\a');
493 \\b obstack_1grow (&obstack_for_string, '\b');
494 \\f obstack_1grow (&obstack_for_string, '\f');
495 \\n obstack_1grow (&obstack_for_string, '\n');
496 \\r obstack_1grow (&obstack_for_string, '\r');
497 \\t obstack_1grow (&obstack_for_string, '\t');
498 \\v obstack_1grow (&obstack_for_string, '\v');
499
500 /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
501 \\("\""|"'"|"?"|"\\") obstack_1grow (&obstack_for_string, yytext[1]);
502
503 \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
504 int c = convert_ucn_to_byte (yytext);
505 if (c < 0)
506 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
507 else if (! c)
508 complain_at (*loc, _("invalid null character: %s"), quote (yytext));
509 else
510 obstack_1grow (&obstack_for_string, c);
511 }
512 \\(.|\n) {
513 complain_at (*loc, _("unrecognized escape sequence: %s"), quote (yytext));
514 STRING_GROW;
515 }
516 }
517
518 /*--------------------------------------------.
519 | Scanning user-code characters and strings. |
520 `--------------------------------------------*/
521
522 <SC_CHARACTER,SC_STRING>
523 {
524 {splice}|\\{splice}[^\n\[\]] STRING_GROW;
525 }
526
527 <SC_CHARACTER>
528 {
529 "'" STRING_GROW; BEGIN context_state;
530 \n unexpected_newline (token_start, "'"); BEGIN context_state;
531 <<EOF>> unexpected_eof (token_start, "'"); BEGIN context_state;
532 }
533
534 <SC_STRING>
535 {
536 "\"" STRING_GROW; BEGIN context_state;
537 \n unexpected_newline (token_start, "\""); BEGIN context_state;
538 <<EOF>> unexpected_eof (token_start, "\""); BEGIN context_state;
539 }
540
541
542 /*---------------------------------------------------.
543 | Strings, comments etc. can be found in user code. |
544 `---------------------------------------------------*/
545
546 <SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
547 {
548 "'" {
549 STRING_GROW;
550 context_state = YY_START;
551 token_start = loc->start;
552 BEGIN SC_CHARACTER;
553 }
554 "\"" {
555 STRING_GROW;
556 context_state = YY_START;
557 token_start = loc->start;
558 BEGIN SC_STRING;
559 }
560 "/"{splice}"*" {
561 STRING_GROW;
562 context_state = YY_START;
563 token_start = loc->start;
564 BEGIN SC_COMMENT;
565 }
566 "/"{splice}"/" {
567 STRING_GROW;
568 context_state = YY_START;
569 BEGIN SC_LINE_COMMENT;
570 }
571 }
572
573
574
575 /*-----------------------------------------------------------.
576 | Scanning some code in braces (actions). The initial "{" is |
577 | already eaten. |
578 `-----------------------------------------------------------*/
579
580 <SC_BRACED_CODE>
581 {
582 "{"|"<"{splice}"%" STRING_GROW; nesting++;
583 "%"{splice}">" STRING_GROW; nesting--;
584 "}" {
585 obstack_1grow (&obstack_for_string, '}');
586
587 --nesting;
588 if (nesting < 0)
589 {
590 STRING_FINISH;
591 loc->start = code_start;
592 val->code = last_string;
593 BEGIN INITIAL;
594 return BRACED_CODE;
595 }
596 }
597
598 /* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly
599 (as `<' `<%'). */
600 "<"{splice}"<" STRING_GROW;
601
602 <<EOF>> {
603 unexpected_eof (code_start, "}");
604 STRING_FINISH;
605 loc->start = code_start;
606 val->code = last_string;
607 BEGIN INITIAL;
608 return BRACED_CODE;
609 }
610 }
611
612
613 /*--------------------------------------------------------------.
614 | Scanning some prologue: from "%{" (already scanned) to "%}". |
615 `--------------------------------------------------------------*/
616
617 <SC_PROLOGUE>
618 {
619 "%}" {
620 STRING_FINISH;
621 loc->start = code_start;
622 val->chars = last_string;
623 BEGIN INITIAL;
624 return PROLOGUE;
625 }
626
627 <<EOF>> {
628 unexpected_eof (code_start, "%}");
629 STRING_FINISH;
630 loc->start = code_start;
631 val->chars = last_string;
632 BEGIN INITIAL;
633 return PROLOGUE;
634 }
635 }
636
637
638 /*---------------------------------------------------------------.
639 | Scanning the epilogue (everything after the second "%%", which |
640 | has already been eaten). |
641 `---------------------------------------------------------------*/
642
643 <SC_EPILOGUE>
644 {
645 <<EOF>> {
646 STRING_FINISH;
647 loc->start = code_start;
648 val->chars = last_string;
649 BEGIN INITIAL;
650 return EPILOGUE;
651 }
652 }
653
654
655 /*-----------------------------------------------------.
656 | By default, grow the string obstack with the input. |
657 `-----------------------------------------------------*/
658
659 <SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>. |
660 <SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>\n STRING_GROW;
661
662 %%
663
664 /* Read bytes from FP into buffer BUF of size SIZE. Return the
665 number of bytes read. Remove '\r' from input, treating \r\n
666 and isolated \r as \n. */
667
668 static size_t
669 no_cr_read (FILE *fp, char *buf, size_t size)
670 {
671 size_t bytes_read = fread (buf, 1, size, fp);
672 if (bytes_read)
673 {
674 char *w = memchr (buf, '\r', bytes_read);
675 if (w)
676 {
677 char const *r = ++w;
678 char const *lim = buf + bytes_read;
679
680 for (;;)
681 {
682 /* Found an '\r'. Treat it like '\n', but ignore any
683 '\n' that immediately follows. */
684 w[-1] = '\n';
685 if (r == lim)
686 {
687 int ch = getc (fp);
688 if (ch != '\n' && ungetc (ch, fp) != ch)
689 break;
690 }
691 else if (*r == '\n')
692 r++;
693
694 /* Copy until the next '\r'. */
695 do
696 {
697 if (r == lim)
698 return w - buf;
699 }
700 while ((*w++ = *r++) != '\r');
701 }
702
703 return w - buf;
704 }
705 }
706
707 return bytes_read;
708 }
709
710
711
712 /*------------------------------------------------------.
713 | Scan NUMBER for a base-BASE integer at location LOC. |
714 `------------------------------------------------------*/
715
716 static unsigned long int
717 scan_integer (char const *number, int base, location loc)
718 {
719 verify (INT_MAX < ULONG_MAX);
720 unsigned long int num = strtoul (number, NULL, base);
721
722 if (INT_MAX < num)
723 {
724 complain_at (loc, _("integer out of range: %s"), quote (number));
725 num = INT_MAX;
726 }
727
728 return num;
729 }
730
731
732 /*------------------------------------------------------------------.
733 | Convert universal character name UCN to a single-byte character, |
734 | and return that character. Return -1 if UCN does not correspond |
735 | to a single-byte character. |
736 `------------------------------------------------------------------*/
737
738 static int
739 convert_ucn_to_byte (char const *ucn)
740 {
741 verify (UCHAR_MAX <= INT_MAX);
742 unsigned long int code = strtoul (ucn + 2, NULL, 16);
743
744 /* FIXME: Currently we assume Unicode-compatible unibyte characters
745 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
746 non-ASCII hosts we support only the portable C character set.
747 These limitations should be removed once we add support for
748 multibyte characters. */
749
750 if (UCHAR_MAX < code)
751 return -1;
752
753 #if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
754 {
755 /* A non-ASCII host. Use CODE to index into a table of the C
756 basic execution character set, which is guaranteed to exist on
757 all Standard C platforms. This table also includes '$', '@',
758 and '`', which are not in the basic execution character set but
759 which are unibyte characters on all the platforms that we know
760 about. */
761 static signed char const table[] =
762 {
763 '\0', -1, -1, -1, -1, -1, -1, '\a',
764 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
765 -1, -1, -1, -1, -1, -1, -1, -1,
766 -1, -1, -1, -1, -1, -1, -1, -1,
767 ' ', '!', '"', '#', '$', '%', '&', '\'',
768 '(', ')', '*', '+', ',', '-', '.', '/',
769 '0', '1', '2', '3', '4', '5', '6', '7',
770 '8', '9', ':', ';', '<', '=', '>', '?',
771 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
772 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
773 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
774 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
775 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
776 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
777 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
778 'x', 'y', 'z', '{', '|', '}', '~'
779 };
780
781 code = code < sizeof table ? table[code] : -1;
782 }
783 #endif
784
785 return code;
786 }
787
788
789 /*----------------------------------------------------------------.
790 | Handle `#line INT "FILE"'. ARGS has already skipped `#line '. |
791 `----------------------------------------------------------------*/
792
793 static void
794 handle_syncline (char *args, location loc)
795 {
796 char *after_num;
797 unsigned long int lineno = strtoul (args, &after_num, 10);
798 char *file = strchr (after_num, '"') + 1;
799 *strchr (file, '"') = '\0';
800 if (INT_MAX <= lineno)
801 {
802 warn_at (loc, _("line number overflow"));
803 lineno = INT_MAX;
804 }
805 current_file = uniqstr_new (file);
806 boundary_set (&scanner_cursor, current_file, lineno, 1);
807 }
808
809
810 /*----------------------------------------------------------------.
811 | For a token or comment starting at START, report message MSGID, |
812 | which should say that an end marker was found before |
813 | the expected TOKEN_END. |
814 `----------------------------------------------------------------*/
815
816 static void
817 unexpected_end (boundary start, char const *msgid, char const *token_end)
818 {
819 location loc;
820 loc.start = start;
821 loc.end = scanner_cursor;
822 complain_at (loc, _(msgid), token_end);
823 }
824
825
826 /*------------------------------------------------------------------------.
827 | Report an unexpected EOF in a token or comment starting at START. |
828 | An end of file was encountered and the expected TOKEN_END was missing. |
829 `------------------------------------------------------------------------*/
830
831 static void
832 unexpected_eof (boundary start, char const *token_end)
833 {
834 unexpected_end (start, N_("missing `%s' at end of file"), token_end);
835 }
836
837
838 /*----------------------------------------.
839 | Likewise, but for unexpected newlines. |
840 `----------------------------------------*/
841
842 static void
843 unexpected_newline (boundary start, char const *token_end)
844 {
845 unexpected_end (start, N_("missing `%s' at end of line"), token_end);
846 }
847
848
849 /*-------------------------.
850 | Initialize the scanner. |
851 `-------------------------*/
852
853 void
854 gram_scanner_initialize (void)
855 {
856 obstack_init (&obstack_for_string);
857 }
858
859
860 /*-----------------------------------------------.
861 | Free all the memory allocated to the scanner. |
862 `-----------------------------------------------*/
863
864 void
865 gram_scanner_free (void)
866 {
867 obstack_free (&obstack_for_string, 0);
868 /* Reclaim Flex's buffers. */
869 yylex_destroy ();
870 }