]> git.saurik.com Git - bison.git/blame - src/scan-gram.l
tests: check token numbers.
[bison.git] / src / scan-gram.l
CommitLineData
e9955c83 1/* Bison Grammar Scanner -*- C -*-
3b1e470c 2
ba061fa6
AD
3 Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
4 Free Software Foundation, Inc.
e9955c83
AD
5
6 This file is part of Bison, the GNU Compiler Compiler.
7
f16b0819 8 This program is free software: you can redistribute it and/or modify
e9955c83 9 it under the terms of the GNU General Public License as published by
f16b0819 10 the Free Software Foundation, either version 3 of the License, or
e9955c83
AD
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
f16b0819 19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
e9955c83 20
aa418041 21%option debug nodefault nounput noyywrap never-interactive
e9955c83
AD
22%option prefix="gram_" outfile="lex.yy.c"
23
24%{
4f6e011e
PE
25/* Work around a bug in flex 2.5.31. See Debian bug 333231
26 <http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=333231>. */
27#undef gram_wrap
28#define gram_wrap() 1
29
e9071366 30#define FLEX_PREFIX(Id) gram_ ## Id
0305d25e 31#include <src/flex-scanner.h>
223ff46e 32
0305d25e
AD
33#include <src/complain.h>
34#include <src/files.h>
35#include <src/gram.h>
36#include <quotearg.h>
37#include <src/reader.h>
38#include <src/uniqstr.h>
e9955c83 39
e9071366
AD
40#include <mbswidth.h>
41#include <quote.h>
42
0305d25e 43#include <src/scan-gram.h>
e9071366
AD
44
45#define YY_DECL GRAM_LEX_DECL
2346344a 46
3f2d73f1 47#define YY_USER_INIT \
e9071366 48 code_start = scanner_cursor = loc->start; \
dc9701e8 49
3f2d73f1 50/* Location of scanner cursor. */
4a678af8 51static boundary scanner_cursor;
41141c56 52
e9071366 53#define YY_USER_ACTION location_compute (loc, &scanner_cursor, yytext, yyleng);
d8d3f94a 54
6c30d641 55static size_t no_cr_read (FILE *, char *, size_t);
d8d3f94a
PE
56#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
57
ba061fa6
AD
58#define RETURN_PERCENT_FLAG(Value) \
59 do { \
60 val->uniqstr = uniqstr_new (Value); \
61 return PERCENT_FLAG; \
62 } while (0)
63
64
7ec2d4cd 65/* A string representing the most recently saved token. */
7c0c6181 66static char *last_string;
7ec2d4cd 67
7ec2d4cd 68void
e9071366 69gram_scanner_last_string_free (void)
7ec2d4cd 70{
41141c56 71 STRING_FREE;
7ec2d4cd 72}
e9955c83 73
4517da37 74static void handle_syncline (char *, location);
1452af69 75static unsigned long int scan_integer (char const *p, int base, location loc);
d8d3f94a 76static int convert_ucn_to_byte (char const *hex_text);
aa418041 77static void unexpected_eof (boundary, char const *);
4febdd96 78static void unexpected_newline (boundary, char const *);
e9955c83
AD
79
80%}
e9071366
AD
81 /* A C-like comment in directives/rules. */
82%x SC_YACC_COMMENT
83 /* Strings and characters in directives/rules. */
e9955c83 84%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
e9071366
AD
85 /* A identifier was just read in directives/rules. Special state
86 to capture the sequence `identifier :'. */
87%x SC_AFTER_IDENTIFIER
cb823b6f
AD
88 /* A complex tag, with nested angles brackets. */
89%x SC_TAG
e9071366
AD
90
91 /* Three types of user code:
92 - prologue (code between `%{' `%}' in the first section, before %%);
93 - actions, printers, union, etc, (between braced in the middle section);
94 - epilogue (everything after the second %%). */
95%x SC_PROLOGUE SC_BRACED_CODE SC_EPILOGUE
96 /* C and C++ comments in code. */
97%x SC_COMMENT SC_LINE_COMMENT
98 /* Strings and characters in code. */
99%x SC_STRING SC_CHARACTER
e9955c83 100
cdf3f113
AD
101letter [-.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
102id {letter}({letter}|[0-9])*
4f646c37 103directive %{id}
624a35e2 104int [0-9]+
d8d3f94a
PE
105
106/* POSIX says that a tag must be both an id and a C union member, but
107 historically almost any character is allowed in a tag. We disallow
cb823b6f
AD
108 NUL, as this simplifies our implementation. We disallow angle
109 bracket to match them in nested pairs: several languages use them
110 for generics/template types. */
111tag [^\0<>]+
d8d3f94a
PE
112
113/* Zero or more instances of backslash-newline. Following GCC, allow
114 white space between the backslash and the newline. */
115splice (\\[ \f\t\v]*\n)*
e9955c83
AD
116
117%%
118%{
cb823b6f
AD
119 /* Nesting level. Either for nested braces, or nested angle brackets
120 (but not mixed). */
121 int nesting IF_LINT (= 0);
1a9e39f1 122
3f2d73f1 123 /* Parent context state, when applicable. */
5362ed19 124 int context_state IF_LINT (= 0);
a706a1cc 125
3f2d73f1 126 /* Location of most recent identifier, when applicable. */
a2bc9dbc 127 location id_loc IF_LINT (= empty_location);
3f2d73f1 128
a2bc9dbc
PE
129 /* Where containing code started, when applicable. Its initial
130 value is relevant only when yylex is invoked in the SC_EPILOGUE
131 start condition. */
132 boundary code_start = scanner_cursor;
3f2d73f1 133
223ff46e
PE
134 /* Where containing comment or string or character literal started,
135 when applicable. */
a2bc9dbc 136 boundary token_start IF_LINT (= scanner_cursor);
e9955c83
AD
137%}
138
139
3f2d73f1
PE
140 /*-----------------------.
141 | Scanning white space. |
142 `-----------------------*/
143
58d7a1a1 144<INITIAL,SC_AFTER_IDENTIFIER>
3f2d73f1 145{
4febdd96 146 /* Comments and white space. */
83adb046 147 "," warn_at (*loc, _("stray `,' treated as white space"));
4febdd96 148 [ \f\n\t\v] |
3f2d73f1 149 "//".* ;
83adb046
PE
150 "/*" {
151 token_start = loc->start;
152 context_state = YY_START;
153 BEGIN SC_YACC_COMMENT;
154 }
3f2d73f1
PE
155
156 /* #line directives are not documented, and may be withdrawn or
157 modified in future versions of Bison. */
158 ^"#line "{int}" \"".*"\"\n" {
4517da37 159 handle_syncline (yytext + sizeof "#line " - 1, *loc);
3f2d73f1
PE
160 }
161}
162
163
e9955c83
AD
164 /*----------------------------.
165 | Scanning Bison directives. |
166 `----------------------------*/
a7c09cba
DJ
167
168 /* For directives that are also command line options, the regex must be
169 "%..."
170 after "[-_]"s are removed, and the directive must match the --long
171 option name, with a single string argument. Otherwise, add exceptions
172 to ../build-aux/cross-options.pl. */
173
e9955c83
AD
174<INITIAL>
175{
deef2a0a 176 "%binary" return PERCENT_NONASSOC;
136a0f76 177 "%code" return PERCENT_CODE;
fa819509 178 "%debug" RETURN_PERCENT_FLAG("parse.trace");
deef2a0a
AD
179 "%default"[-_]"prec" return PERCENT_DEFAULT_PREC;
180 "%define" return PERCENT_DEFINE;
181 "%defines" return PERCENT_DEFINES;
182 "%destructor" return PERCENT_DESTRUCTOR;
183 "%dprec" return PERCENT_DPREC;
71b00ed8 184 "%error"[-_]"verbose" RETURN_PERCENT_FLAG("error-verbose");
deef2a0a
AD
185 "%expect" return PERCENT_EXPECT;
186 "%expect"[-_]"rr" return PERCENT_EXPECT_RR;
187 "%file-prefix" return PERCENT_FILE_PREFIX;
e9955c83 188 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
deef2a0a
AD
189 "%initial-action" return PERCENT_INITIAL_ACTION;
190 "%glr-parser" return PERCENT_GLR_PARSER;
191 "%language" return PERCENT_LANGUAGE;
192 "%left" return PERCENT_LEFT;
193 "%lex-param" return PERCENT_LEX_PARAM;
bc0f5737 194 "%locations" RETURN_PERCENT_FLAG("locations");
deef2a0a
AD
195 "%merge" return PERCENT_MERGE;
196 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
197 "%no"[-_]"default"[-_]"prec" return PERCENT_NO_DEFAULT_PREC;
198 "%no"[-_]"lines" return PERCENT_NO_LINES;
199 "%nonassoc" return PERCENT_NONASSOC;
200 "%nondeterministic-parser" return PERCENT_NONDETERMINISTIC_PARSER;
201 "%nterm" return PERCENT_NTERM;
202 "%output" return PERCENT_OUTPUT;
203 "%parse-param" return PERCENT_PARSE_PARAM;
204 "%prec" return PERCENT_PREC;
d78f0ac9 205 "%precedence" return PERCENT_PRECEDENCE;
deef2a0a 206 "%printer" return PERCENT_PRINTER;
4920ae8b 207 "%pure"[-_]"parser" RETURN_PERCENT_FLAG("api.pure");
deef2a0a
AD
208 "%require" return PERCENT_REQUIRE;
209 "%right" return PERCENT_RIGHT;
210 "%skeleton" return PERCENT_SKELETON;
211 "%start" return PERCENT_START;
212 "%term" return PERCENT_TOKEN;
213 "%token" return PERCENT_TOKEN;
214 "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
215 "%type" return PERCENT_TYPE;
216 "%union" return PERCENT_UNION;
217 "%verbose" return PERCENT_VERBOSE;
218 "%yacc" return PERCENT_YACC;
e9955c83 219
3f2d73f1 220 {directive} {
41141c56 221 complain_at (*loc, _("invalid directive: %s"), quote (yytext));
412f8a59 222 }
900c5db5 223
e9955c83 224 "=" return EQUAL;
e9071366 225 "|" return PIPE;
e9955c83
AD
226 ";" return SEMICOLON;
227
3f2d73f1 228 {id} {
58d7a1a1 229 val->uniqstr = uniqstr_new (yytext);
3f2d73f1 230 id_loc = *loc;
3f2d73f1 231 BEGIN SC_AFTER_IDENTIFIER;
e9955c83
AD
232 }
233
d8d3f94a 234 {int} {
1452af69
PE
235 val->integer = scan_integer (yytext, 10, *loc);
236 return INT;
237 }
238 0[xX][0-9abcdefABCDEF]+ {
239 val->integer = scan_integer (yytext, 16, *loc);
d8d3f94a
PE
240 return INT;
241 }
e9955c83 242
84a1cb5a
AD
243 /* Identifiers may not start with a digit. Yet, don't silently
244 accept "1FOO" as "1 FOO". */
245 {int}{id} {
246 complain_at (*loc, _("invalid identifier: %s"), quote (yytext));
247 }
248
e9955c83 249 /* Characters. We don't check there is only one. */
3f2d73f1 250 "'" STRING_GROW; token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
e9955c83
AD
251
252 /* Strings. */
ca407bdf 253 "\"" token_start = loc->start; BEGIN SC_ESCAPED_STRING;
e9955c83
AD
254
255 /* Prologue. */
3f2d73f1 256 "%{" code_start = loc->start; BEGIN SC_PROLOGUE;
e9955c83
AD
257
258 /* Code in between braces. */
3f2d73f1
PE
259 "{" {
260 STRING_GROW;
cb823b6f 261 nesting = 0;
3f2d73f1
PE
262 code_start = loc->start;
263 BEGIN SC_BRACED_CODE;
264 }
e9955c83
AD
265
266 /* A type. */
cb823b6f
AD
267 "<*>" return TAG_ANY;
268 "<>" return TAG_NONE;
d8d3f94a 269 "<"{tag}">" {
223ff46e 270 obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
41141c56 271 STRING_FINISH;
223ff46e 272 val->uniqstr = uniqstr_new (last_string);
41141c56 273 STRING_FREE;
cb823b6f
AD
274 return TAG;
275 }
276 "<" {
277 nesting = 0;
278 token_start = loc->start;
279 BEGIN SC_TAG;
4cdb01db
AD
280 }
281
a706a1cc
PE
282 "%%" {
283 static int percent_percent_count;
e9955c83 284 if (++percent_percent_count == 2)
a2bc9dbc 285 BEGIN SC_EPILOGUE;
e9955c83
AD
286 return PERCENT_PERCENT;
287 }
288
a706a1cc 289 . {
41141c56 290 complain_at (*loc, _("invalid character: %s"), quote (yytext));
3f2d73f1 291 }
379f0ac8
PE
292
293 <<EOF>> {
294 loc->start = loc->end = scanner_cursor;
295 yyterminate ();
296 }
3f2d73f1
PE
297}
298
299
cb823b6f
AD
300 /*--------------------------------------------------------------.
301 | Supporting \0 complexifies our implementation for no expected |
302 | added value. |
303 `--------------------------------------------------------------*/
304
305<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING,SC_TAG>
306{
307 \0 complain_at (*loc, _("invalid null character"));
308}
309
310
3f2d73f1
PE
311 /*-----------------------------------------------------------------.
312 | Scanning after an identifier, checking whether a colon is next. |
313 `-----------------------------------------------------------------*/
314
315<SC_AFTER_IDENTIFIER>
316{
317 ":" {
3f2d73f1
PE
318 *loc = id_loc;
319 BEGIN INITIAL;
320 return ID_COLON;
321 }
322 . {
323 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
324 yyless (0);
325 *loc = id_loc;
326 BEGIN INITIAL;
327 return ID;
328 }
329 <<EOF>> {
330 *loc = id_loc;
331 BEGIN INITIAL;
332 return ID;
e9955c83
AD
333 }
334}
335
336
d8d3f94a
PE
337 /*---------------------------------------------------------------.
338 | Scanning a Yacc comment. The initial `/ *' is already eaten. |
339 `---------------------------------------------------------------*/
e9955c83 340
d8d3f94a 341<SC_YACC_COMMENT>
e9955c83 342{
3f2d73f1 343 "*/" BEGIN context_state;
a706a1cc 344 .|\n ;
aa418041 345 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
d8d3f94a
PE
346}
347
348
349 /*------------------------------------------------------------.
350 | Scanning a C comment. The initial `/ *' is already eaten. |
351 `------------------------------------------------------------*/
352
353<SC_COMMENT>
354{
3f2d73f1 355 "*"{splice}"/" STRING_GROW; BEGIN context_state;
aa418041 356 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
e9955c83
AD
357}
358
359
d8d3f94a
PE
360 /*--------------------------------------------------------------.
361 | Scanning a line comment. The initial `//' is already eaten. |
362 `--------------------------------------------------------------*/
363
364<SC_LINE_COMMENT>
365{
3f2d73f1 366 "\n" STRING_GROW; BEGIN context_state;
41141c56 367 {splice} STRING_GROW;
3f2d73f1 368 <<EOF>> BEGIN context_state;
d8d3f94a
PE
369}
370
371
4febdd96
PE
372 /*------------------------------------------------.
373 | Scanning a Bison string, including its escapes. |
374 | The initial quote is already eaten. |
375 `------------------------------------------------*/
e9955c83
AD
376
377<SC_ESCAPED_STRING>
378{
47aee066
JD
379 "\""|"\n" {
380 if (yytext[0] == '\n')
381 unexpected_newline (token_start, "\"");
382 STRING_FINISH;
383 loc->start = token_start;
384 val->chars = last_string;
385 BEGIN INITIAL;
386 return STRING;
387 }
388 <<EOF>> {
389 unexpected_eof (token_start, "\"");
41141c56 390 STRING_FINISH;
3f2d73f1 391 loc->start = token_start;
223ff46e 392 val->chars = last_string;
a706a1cc 393 BEGIN INITIAL;
e9955c83
AD
394 return STRING;
395 }
e9955c83
AD
396}
397
4febdd96
PE
398 /*----------------------------------------------------------.
399 | Scanning a Bison character literal, decoding its escapes. |
400 | The initial quote is already eaten. |
401 `----------------------------------------------------------*/
e9955c83
AD
402
403<SC_ESCAPED_CHARACTER>
404{
47aee066
JD
405 "'"|"\n" {
406 if (yytext[0] == '\n')
407 unexpected_newline (token_start, "'");
41141c56
PE
408 STRING_GROW;
409 STRING_FINISH;
3f2d73f1 410 loc->start = token_start;
58d7a1a1 411 val->character = last_string[1];
41141c56 412 STRING_FREE;
a706a1cc 413 BEGIN INITIAL;
58d7a1a1 414 return CHAR;
e9955c83 415 }
47aee066
JD
416 <<EOF>> {
417 unexpected_eof (token_start, "'");
418 STRING_FINISH;
419 loc->start = token_start;
4f646c37 420 if (strlen (last_string) > 1)
47aee066
JD
421 val->character = last_string[1];
422 else
423 val->character = last_string[0];
424 STRING_FREE;
425 BEGIN INITIAL;
426 return CHAR;
427 }
4febdd96 428}
a706a1cc 429
cb823b6f
AD
430 /*-----------------------------------------------------------.
431 | Scanning a Bison nested tag. The initial angle bracket is |
432 | already eaten. |
433 `-----------------------------------------------------------*/
434
435<SC_TAG>
4febdd96 436{
cb823b6f
AD
437 ">" {
438 --nesting;
439 if (nesting < 0)
440 {
441 STRING_FINISH;
442 loc->start = token_start;
443 val->uniqstr = uniqstr_new (last_string);
444 STRING_FREE;
445 BEGIN INITIAL;
446 return TAG;
447 }
448 STRING_GROW;
449 }
450
451 [^<>]+ STRING_GROW;
452 "<"+ STRING_GROW; nesting += yyleng;
e9955c83 453
cb823b6f
AD
454 <<EOF>> {
455 unexpected_eof (token_start, ">");
456 STRING_FINISH;
457 loc->start = token_start;
458 val->uniqstr = uniqstr_new (last_string);
459 STRING_FREE;
460 BEGIN INITIAL;
461 return TAG;
462 }
463}
e9955c83
AD
464
465 /*----------------------------.
466 | Decode escaped characters. |
467 `----------------------------*/
468
469<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
470{
d8d3f94a 471 \\[0-7]{1,3} {
4517da37 472 unsigned long int c = strtoul (yytext + 1, NULL, 8);
d8d3f94a 473 if (UCHAR_MAX < c)
3f2d73f1 474 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
05ac60f3 475 else if (! c)
92ac3705 476 complain_at (*loc, _("invalid null character: %s"), quote (yytext));
e9955c83 477 else
223ff46e 478 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
479 }
480
6b0d38ab 481 \\x[0-9abcdefABCDEF]+ {
4517da37
PE
482 verify (UCHAR_MAX < ULONG_MAX);
483 unsigned long int c = strtoul (yytext + 2, NULL, 16);
484 if (UCHAR_MAX < c)
3f2d73f1 485 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
92ac3705
PE
486 else if (! c)
487 complain_at (*loc, _("invalid null character: %s"), quote (yytext));
d8d3f94a 488 else
223ff46e 489 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
490 }
491
223ff46e
PE
492 \\a obstack_1grow (&obstack_for_string, '\a');
493 \\b obstack_1grow (&obstack_for_string, '\b');
494 \\f obstack_1grow (&obstack_for_string, '\f');
495 \\n obstack_1grow (&obstack_for_string, '\n');
496 \\r obstack_1grow (&obstack_for_string, '\r');
497 \\t obstack_1grow (&obstack_for_string, '\t');
498 \\v obstack_1grow (&obstack_for_string, '\v');
412f8a59
PE
499
500 /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
223ff46e 501 \\("\""|"'"|"?"|"\\") obstack_1grow (&obstack_for_string, yytext[1]);
412f8a59 502
6b0d38ab 503 \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
d8d3f94a
PE
504 int c = convert_ucn_to_byte (yytext);
505 if (c < 0)
3f2d73f1 506 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
92ac3705
PE
507 else if (! c)
508 complain_at (*loc, _("invalid null character: %s"), quote (yytext));
d8d3f94a 509 else
223ff46e 510 obstack_1grow (&obstack_for_string, c);
d8d3f94a 511 }
4f25ebb0 512 \\(.|\n) {
3f2d73f1 513 complain_at (*loc, _("unrecognized escape sequence: %s"), quote (yytext));
41141c56 514 STRING_GROW;
e9955c83
AD
515 }
516}
517
4febdd96
PE
518 /*--------------------------------------------.
519 | Scanning user-code characters and strings. |
520 `--------------------------------------------*/
e9955c83 521
4febdd96
PE
522<SC_CHARACTER,SC_STRING>
523{
e9071366 524 {splice}|\\{splice}[^\n\[\]] STRING_GROW;
4febdd96 525}
e9955c83
AD
526
527<SC_CHARACTER>
528{
4febdd96
PE
529 "'" STRING_GROW; BEGIN context_state;
530 \n unexpected_newline (token_start, "'"); BEGIN context_state;
531 <<EOF>> unexpected_eof (token_start, "'"); BEGIN context_state;
e9955c83
AD
532}
533
e9955c83
AD
534<SC_STRING>
535{
4febdd96
PE
536 "\"" STRING_GROW; BEGIN context_state;
537 \n unexpected_newline (token_start, "\""); BEGIN context_state;
538 <<EOF>> unexpected_eof (token_start, "\""); BEGIN context_state;
e9955c83
AD
539}
540
541
542 /*---------------------------------------------------.
543 | Strings, comments etc. can be found in user code. |
544 `---------------------------------------------------*/
545
546<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
547{
3f2d73f1
PE
548 "'" {
549 STRING_GROW;
550 context_state = YY_START;
551 token_start = loc->start;
552 BEGIN SC_CHARACTER;
553 }
554 "\"" {
555 STRING_GROW;
556 context_state = YY_START;
557 token_start = loc->start;
558 BEGIN SC_STRING;
559 }
560 "/"{splice}"*" {
561 STRING_GROW;
562 context_state = YY_START;
563 token_start = loc->start;
564 BEGIN SC_COMMENT;
565 }
566 "/"{splice}"/" {
567 STRING_GROW;
568 context_state = YY_START;
569 BEGIN SC_LINE_COMMENT;
570 }
e9955c83
AD
571}
572
573
624a35e2 574
58d7a1a1
AD
575 /*-----------------------------------------------------------.
576 | Scanning some code in braces (actions). The initial "{" is |
577 | already eaten. |
578 `-----------------------------------------------------------*/
e9955c83
AD
579
580<SC_BRACED_CODE>
581{
cb823b6f
AD
582 "{"|"<"{splice}"%" STRING_GROW; nesting++;
583 "%"{splice}">" STRING_GROW; nesting--;
e9955c83 584 "}" {
25522739
PE
585 obstack_1grow (&obstack_for_string, '}');
586
cb823b6f
AD
587 --nesting;
588 if (nesting < 0)
e9955c83 589 {
41141c56 590 STRING_FINISH;
3f2d73f1 591 loc->start = code_start;
eb095650 592 val->code = last_string;
a706a1cc 593 BEGIN INITIAL;
58d7a1a1 594 return BRACED_CODE;
e9955c83
AD
595 }
596 }
597
a706a1cc
PE
598 /* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly
599 (as `<' `<%'). */
41141c56 600 "<"{splice}"<" STRING_GROW;
a706a1cc 601
47aee066
JD
602 <<EOF>> {
603 unexpected_eof (code_start, "}");
604 STRING_FINISH;
605 loc->start = code_start;
eb095650 606 val->code = last_string;
47aee066
JD
607 BEGIN INITIAL;
608 return BRACED_CODE;
609 }
e9955c83
AD
610}
611
612
613 /*--------------------------------------------------------------.
614 | Scanning some prologue: from "%{" (already scanned) to "%}". |
615 `--------------------------------------------------------------*/
616
617<SC_PROLOGUE>
618{
619 "%}" {
41141c56 620 STRING_FINISH;
3f2d73f1 621 loc->start = code_start;
223ff46e 622 val->chars = last_string;
a706a1cc 623 BEGIN INITIAL;
e9955c83
AD
624 return PROLOGUE;
625 }
626
47aee066
JD
627 <<EOF>> {
628 unexpected_eof (code_start, "%}");
629 STRING_FINISH;
630 loc->start = code_start;
631 val->chars = last_string;
632 BEGIN INITIAL;
633 return PROLOGUE;
634 }
e9955c83
AD
635}
636
637
638 /*---------------------------------------------------------------.
639 | Scanning the epilogue (everything after the second "%%", which |
d8d3f94a 640 | has already been eaten). |
e9955c83
AD
641 `---------------------------------------------------------------*/
642
643<SC_EPILOGUE>
644{
e9955c83 645 <<EOF>> {
41141c56 646 STRING_FINISH;
3f2d73f1 647 loc->start = code_start;
223ff46e 648 val->chars = last_string;
a706a1cc 649 BEGIN INITIAL;
e9955c83
AD
650 return EPILOGUE;
651 }
652}
653
654
4febdd96
PE
655 /*-----------------------------------------------------.
656 | By default, grow the string obstack with the input. |
657 `-----------------------------------------------------*/
658
659<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>. |
660<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>\n STRING_GROW;
661
e9955c83
AD
662%%
663
6c30d641
PE
664/* Read bytes from FP into buffer BUF of size SIZE. Return the
665 number of bytes read. Remove '\r' from input, treating \r\n
666 and isolated \r as \n. */
667
668static size_t
669no_cr_read (FILE *fp, char *buf, size_t size)
670{
a737b216
PE
671 size_t bytes_read = fread (buf, 1, size, fp);
672 if (bytes_read)
6c30d641 673 {
a737b216 674 char *w = memchr (buf, '\r', bytes_read);
6c30d641
PE
675 if (w)
676 {
677 char const *r = ++w;
a737b216 678 char const *lim = buf + bytes_read;
6c30d641
PE
679
680 for (;;)
681 {
682 /* Found an '\r'. Treat it like '\n', but ignore any
683 '\n' that immediately follows. */
684 w[-1] = '\n';
685 if (r == lim)
686 {
687 int ch = getc (fp);
688 if (ch != '\n' && ungetc (ch, fp) != ch)
689 break;
690 }
691 else if (*r == '\n')
692 r++;
693
694 /* Copy until the next '\r'. */
695 do
696 {
697 if (r == lim)
698 return w - buf;
699 }
700 while ((*w++ = *r++) != '\r');
701 }
702
703 return w - buf;
704 }
705 }
706
a737b216 707 return bytes_read;
6c30d641
PE
708}
709
710
f25bfb75 711
1452af69
PE
712/*------------------------------------------------------.
713| Scan NUMBER for a base-BASE integer at location LOC. |
714`------------------------------------------------------*/
715
716static unsigned long int
717scan_integer (char const *number, int base, location loc)
718{
4517da37
PE
719 verify (INT_MAX < ULONG_MAX);
720 unsigned long int num = strtoul (number, NULL, base);
721
722 if (INT_MAX < num)
1452af69
PE
723 {
724 complain_at (loc, _("integer out of range: %s"), quote (number));
725 num = INT_MAX;
726 }
4517da37 727
1452af69
PE
728 return num;
729}
730
731
d8d3f94a
PE
732/*------------------------------------------------------------------.
733| Convert universal character name UCN to a single-byte character, |
734| and return that character. Return -1 if UCN does not correspond |
735| to a single-byte character. |
736`------------------------------------------------------------------*/
737
738static int
739convert_ucn_to_byte (char const *ucn)
740{
4517da37
PE
741 verify (UCHAR_MAX <= INT_MAX);
742 unsigned long int code = strtoul (ucn + 2, NULL, 16);
d8d3f94a
PE
743
744 /* FIXME: Currently we assume Unicode-compatible unibyte characters
745 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
746 non-ASCII hosts we support only the portable C character set.
747 These limitations should be removed once we add support for
748 multibyte characters. */
749
750 if (UCHAR_MAX < code)
751 return -1;
752
753#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
754 {
755 /* A non-ASCII host. Use CODE to index into a table of the C
756 basic execution character set, which is guaranteed to exist on
757 all Standard C platforms. This table also includes '$', '@',
8e6ef483 758 and '`', which are not in the basic execution character set but
d8d3f94a
PE
759 which are unibyte characters on all the platforms that we know
760 about. */
761 static signed char const table[] =
762 {
763 '\0', -1, -1, -1, -1, -1, -1, '\a',
764 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
765 -1, -1, -1, -1, -1, -1, -1, -1,
766 -1, -1, -1, -1, -1, -1, -1, -1,
767 ' ', '!', '"', '#', '$', '%', '&', '\'',
768 '(', ')', '*', '+', ',', '-', '.', '/',
769 '0', '1', '2', '3', '4', '5', '6', '7',
770 '8', '9', ':', ';', '<', '=', '>', '?',
771 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
772 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
773 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
774 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
775 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
776 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
777 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
778 'x', 'y', 'z', '{', '|', '}', '~'
779 };
780
781 code = code < sizeof table ? table[code] : -1;
782 }
783#endif
c4d720cd 784
d8d3f94a
PE
785 return code;
786}
787
788
900c5db5
AD
789/*----------------------------------------------------------------.
790| Handle `#line INT "FILE"'. ARGS has already skipped `#line '. |
791`----------------------------------------------------------------*/
792
793static void
4517da37 794handle_syncline (char *args, location loc)
900c5db5 795{
4517da37
PE
796 char *after_num;
797 unsigned long int lineno = strtoul (args, &after_num, 10);
798 char *file = strchr (after_num, '"') + 1;
799 *strchr (file, '"') = '\0';
800 if (INT_MAX <= lineno)
801 {
802 warn_at (loc, _("line number overflow"));
803 lineno = INT_MAX;
804 }
e9071366 805 current_file = uniqstr_new (file);
0c8e079f 806 boundary_set (&scanner_cursor, current_file, lineno, 1);
4517da37
PE
807}
808
809
4febdd96
PE
810/*----------------------------------------------------------------.
811| For a token or comment starting at START, report message MSGID, |
812| which should say that an end marker was found before |
813| the expected TOKEN_END. |
814`----------------------------------------------------------------*/
815
816static void
817unexpected_end (boundary start, char const *msgid, char const *token_end)
818{
819 location loc;
820 loc.start = start;
821 loc.end = scanner_cursor;
822 complain_at (loc, _(msgid), token_end);
823}
824
825
3f2d73f1
PE
826/*------------------------------------------------------------------------.
827| Report an unexpected EOF in a token or comment starting at START. |
828| An end of file was encountered and the expected TOKEN_END was missing. |
3f2d73f1 829`------------------------------------------------------------------------*/
a706a1cc
PE
830
831static void
aa418041 832unexpected_eof (boundary start, char const *token_end)
a706a1cc 833{
4febdd96
PE
834 unexpected_end (start, N_("missing `%s' at end of file"), token_end);
835}
836
837
838/*----------------------------------------.
839| Likewise, but for unexpected newlines. |
840`----------------------------------------*/
841
842static void
843unexpected_newline (boundary start, char const *token_end)
844{
845 unexpected_end (start, N_("missing `%s' at end of line"), token_end);
a706a1cc
PE
846}
847
848
f25bfb75
AD
849/*-------------------------.
850| Initialize the scanner. |
851`-------------------------*/
852
1d6412ad 853void
e9071366 854gram_scanner_initialize (void)
1d6412ad 855{
223ff46e 856 obstack_init (&obstack_for_string);
1d6412ad
AD
857}
858
859
f25bfb75
AD
860/*-----------------------------------------------.
861| Free all the memory allocated to the scanner. |
862`-----------------------------------------------*/
863
4cdb01db 864void
e9071366 865gram_scanner_free (void)
4cdb01db 866{
223ff46e 867 obstack_free (&obstack_for_string, 0);
536545f3 868 /* Reclaim Flex's buffers. */
580b8926 869 yylex_destroy ();
4cdb01db 870}