]> git.saurik.com Git - bison.git/blame - src/scan-gram.l
Regen.
[bison.git] / src / scan-gram.l
CommitLineData
e9955c83 1/* Bison Grammar Scanner -*- C -*-
3b1e470c 2
deef2a0a 3 Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation,
7c0c6181 4 Inc.
e9955c83
AD
5
6 This file is part of Bison, the GNU Compiler Compiler.
7
f16b0819 8 This program is free software: you can redistribute it and/or modify
e9955c83 9 it under the terms of the GNU General Public License as published by
f16b0819 10 the Free Software Foundation, either version 3 of the License, or
e9955c83
AD
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
f16b0819 19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
e9955c83 20
aa418041 21%option debug nodefault nounput noyywrap never-interactive
e9955c83
AD
22%option prefix="gram_" outfile="lex.yy.c"
23
24%{
4f6e011e
PE
25/* Work around a bug in flex 2.5.31. See Debian bug 333231
26 <http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=333231>. */
27#undef gram_wrap
28#define gram_wrap() 1
29
e9071366 30#define FLEX_PREFIX(Id) gram_ ## Id
0305d25e 31#include <src/flex-scanner.h>
223ff46e 32
0305d25e
AD
33#include <src/complain.h>
34#include <src/files.h>
35#include <src/gram.h>
36#include <quotearg.h>
37#include <src/reader.h>
38#include <src/uniqstr.h>
e9955c83 39
e9071366
AD
40#include <mbswidth.h>
41#include <quote.h>
42
0305d25e 43#include <src/scan-gram.h>
e9071366
AD
44
45#define YY_DECL GRAM_LEX_DECL
2346344a 46
3f2d73f1 47#define YY_USER_INIT \
e9071366 48 code_start = scanner_cursor = loc->start; \
dc9701e8 49
3f2d73f1 50/* Location of scanner cursor. */
4a678af8 51static boundary scanner_cursor;
41141c56 52
e9071366 53#define YY_USER_ACTION location_compute (loc, &scanner_cursor, yytext, yyleng);
d8d3f94a 54
6c30d641 55static size_t no_cr_read (FILE *, char *, size_t);
d8d3f94a
PE
56#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
57
7ec2d4cd 58/* A string representing the most recently saved token. */
7c0c6181 59static char *last_string;
7ec2d4cd 60
7ec2d4cd 61void
e9071366 62gram_scanner_last_string_free (void)
7ec2d4cd 63{
41141c56 64 STRING_FREE;
7ec2d4cd 65}
e9955c83 66
4517da37 67static void handle_syncline (char *, location);
1452af69 68static unsigned long int scan_integer (char const *p, int base, location loc);
d8d3f94a 69static int convert_ucn_to_byte (char const *hex_text);
aa418041 70static void unexpected_eof (boundary, char const *);
4febdd96 71static void unexpected_newline (boundary, char const *);
e9955c83
AD
72
73%}
e9071366
AD
74 /* A C-like comment in directives/rules. */
75%x SC_YACC_COMMENT
76 /* Strings and characters in directives/rules. */
e9955c83 77%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
e9071366
AD
78 /* A identifier was just read in directives/rules. Special state
79 to capture the sequence `identifier :'. */
80%x SC_AFTER_IDENTIFIER
cb823b6f
AD
81 /* A complex tag, with nested angles brackets. */
82%x SC_TAG
e9071366
AD
83
84 /* Three types of user code:
85 - prologue (code between `%{' `%}' in the first section, before %%);
86 - actions, printers, union, etc, (between braced in the middle section);
87 - epilogue (everything after the second %%). */
88%x SC_PROLOGUE SC_BRACED_CODE SC_EPILOGUE
89 /* C and C++ comments in code. */
90%x SC_COMMENT SC_LINE_COMMENT
91 /* Strings and characters in code. */
92%x SC_STRING SC_CHARACTER
e9955c83 93
29c01725
AD
94letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
95id {letter}({letter}|[0-9])*
96directive %{letter}({letter}|[0-9]|-)*
624a35e2 97int [0-9]+
d8d3f94a
PE
98
99/* POSIX says that a tag must be both an id and a C union member, but
100 historically almost any character is allowed in a tag. We disallow
cb823b6f
AD
101 NUL, as this simplifies our implementation. We disallow angle
102 bracket to match them in nested pairs: several languages use them
103 for generics/template types. */
104tag [^\0<>]+
d8d3f94a
PE
105
106/* Zero or more instances of backslash-newline. Following GCC, allow
107 white space between the backslash and the newline. */
108splice (\\[ \f\t\v]*\n)*
e9955c83
AD
109
110%%
111%{
cb823b6f
AD
112 /* Nesting level. Either for nested braces, or nested angle brackets
113 (but not mixed). */
114 int nesting IF_LINT (= 0);
1a9e39f1 115
3f2d73f1 116 /* Parent context state, when applicable. */
5362ed19 117 int context_state IF_LINT (= 0);
a706a1cc 118
3f2d73f1 119 /* Location of most recent identifier, when applicable. */
a2bc9dbc 120 location id_loc IF_LINT (= empty_location);
3f2d73f1 121
a2bc9dbc
PE
122 /* Where containing code started, when applicable. Its initial
123 value is relevant only when yylex is invoked in the SC_EPILOGUE
124 start condition. */
125 boundary code_start = scanner_cursor;
3f2d73f1 126
223ff46e
PE
127 /* Where containing comment or string or character literal started,
128 when applicable. */
a2bc9dbc 129 boundary token_start IF_LINT (= scanner_cursor);
e9955c83
AD
130%}
131
132
3f2d73f1
PE
133 /*-----------------------.
134 | Scanning white space. |
135 `-----------------------*/
136
58d7a1a1 137<INITIAL,SC_AFTER_IDENTIFIER>
3f2d73f1 138{
4febdd96 139 /* Comments and white space. */
83adb046 140 "," warn_at (*loc, _("stray `,' treated as white space"));
4febdd96 141 [ \f\n\t\v] |
3f2d73f1 142 "//".* ;
83adb046
PE
143 "/*" {
144 token_start = loc->start;
145 context_state = YY_START;
146 BEGIN SC_YACC_COMMENT;
147 }
3f2d73f1
PE
148
149 /* #line directives are not documented, and may be withdrawn or
150 modified in future versions of Bison. */
151 ^"#line "{int}" \"".*"\"\n" {
4517da37 152 handle_syncline (yytext + sizeof "#line " - 1, *loc);
3f2d73f1
PE
153 }
154}
155
156
e9955c83
AD
157 /*----------------------------.
158 | Scanning Bison directives. |
159 `----------------------------*/
160<INITIAL>
161{
deef2a0a 162 "%binary" return PERCENT_NONASSOC;
136a0f76 163 "%code" return PERCENT_CODE;
deef2a0a
AD
164 "%debug" return PERCENT_DEBUG;
165 "%default"[-_]"prec" return PERCENT_DEFAULT_PREC;
166 "%define" return PERCENT_DEFINE;
167 "%defines" return PERCENT_DEFINES;
168 "%destructor" return PERCENT_DESTRUCTOR;
169 "%dprec" return PERCENT_DPREC;
170 "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
171 "%expect" return PERCENT_EXPECT;
172 "%expect"[-_]"rr" return PERCENT_EXPECT_RR;
173 "%file-prefix" return PERCENT_FILE_PREFIX;
e9955c83 174 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
deef2a0a
AD
175 "%initial-action" return PERCENT_INITIAL_ACTION;
176 "%glr-parser" return PERCENT_GLR_PARSER;
177 "%language" return PERCENT_LANGUAGE;
178 "%left" return PERCENT_LEFT;
179 "%lex-param" return PERCENT_LEX_PARAM;
180 "%locations" return PERCENT_LOCATIONS;
181 "%merge" return PERCENT_MERGE;
182 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
183 "%no"[-_]"default"[-_]"prec" return PERCENT_NO_DEFAULT_PREC;
184 "%no"[-_]"lines" return PERCENT_NO_LINES;
185 "%nonassoc" return PERCENT_NONASSOC;
186 "%nondeterministic-parser" return PERCENT_NONDETERMINISTIC_PARSER;
187 "%nterm" return PERCENT_NTERM;
188 "%output" return PERCENT_OUTPUT;
189 "%parse-param" return PERCENT_PARSE_PARAM;
190 "%prec" return PERCENT_PREC;
d78f0ac9 191 "%precedence" return PERCENT_PRECEDENCE;
deef2a0a
AD
192 "%printer" return PERCENT_PRINTER;
193 "%pure"[-_]"parser" return PERCENT_PURE_PARSER;
194 "%require" return PERCENT_REQUIRE;
195 "%right" return PERCENT_RIGHT;
196 "%skeleton" return PERCENT_SKELETON;
197 "%start" return PERCENT_START;
198 "%term" return PERCENT_TOKEN;
199 "%token" return PERCENT_TOKEN;
200 "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
201 "%type" return PERCENT_TYPE;
202 "%union" return PERCENT_UNION;
203 "%verbose" return PERCENT_VERBOSE;
204 "%yacc" return PERCENT_YACC;
e9955c83 205
3f2d73f1 206 {directive} {
41141c56 207 complain_at (*loc, _("invalid directive: %s"), quote (yytext));
412f8a59 208 }
900c5db5 209
e9955c83 210 "=" return EQUAL;
e9071366 211 "|" return PIPE;
e9955c83
AD
212 ";" return SEMICOLON;
213
3f2d73f1 214 {id} {
58d7a1a1 215 val->uniqstr = uniqstr_new (yytext);
3f2d73f1 216 id_loc = *loc;
3f2d73f1 217 BEGIN SC_AFTER_IDENTIFIER;
e9955c83
AD
218 }
219
d8d3f94a 220 {int} {
1452af69
PE
221 val->integer = scan_integer (yytext, 10, *loc);
222 return INT;
223 }
224 0[xX][0-9abcdefABCDEF]+ {
225 val->integer = scan_integer (yytext, 16, *loc);
d8d3f94a
PE
226 return INT;
227 }
e9955c83
AD
228
229 /* Characters. We don't check there is only one. */
3f2d73f1 230 "'" STRING_GROW; token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
e9955c83
AD
231
232 /* Strings. */
ca407bdf 233 "\"" token_start = loc->start; BEGIN SC_ESCAPED_STRING;
e9955c83
AD
234
235 /* Prologue. */
3f2d73f1 236 "%{" code_start = loc->start; BEGIN SC_PROLOGUE;
e9955c83
AD
237
238 /* Code in between braces. */
3f2d73f1
PE
239 "{" {
240 STRING_GROW;
cb823b6f 241 nesting = 0;
3f2d73f1
PE
242 code_start = loc->start;
243 BEGIN SC_BRACED_CODE;
244 }
e9955c83
AD
245
246 /* A type. */
cb823b6f
AD
247 "<*>" return TAG_ANY;
248 "<>" return TAG_NONE;
d8d3f94a 249 "<"{tag}">" {
223ff46e 250 obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
41141c56 251 STRING_FINISH;
223ff46e 252 val->uniqstr = uniqstr_new (last_string);
41141c56 253 STRING_FREE;
cb823b6f
AD
254 return TAG;
255 }
256 "<" {
257 nesting = 0;
258 token_start = loc->start;
259 BEGIN SC_TAG;
4cdb01db
AD
260 }
261
a706a1cc
PE
262 "%%" {
263 static int percent_percent_count;
e9955c83 264 if (++percent_percent_count == 2)
a2bc9dbc 265 BEGIN SC_EPILOGUE;
e9955c83
AD
266 return PERCENT_PERCENT;
267 }
268
a706a1cc 269 . {
41141c56 270 complain_at (*loc, _("invalid character: %s"), quote (yytext));
3f2d73f1 271 }
379f0ac8
PE
272
273 <<EOF>> {
274 loc->start = loc->end = scanner_cursor;
275 yyterminate ();
276 }
3f2d73f1
PE
277}
278
279
cb823b6f
AD
280 /*--------------------------------------------------------------.
281 | Supporting \0 complexifies our implementation for no expected |
282 | added value. |
283 `--------------------------------------------------------------*/
284
285<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING,SC_TAG>
286{
287 \0 complain_at (*loc, _("invalid null character"));
288}
289
290
3f2d73f1
PE
291 /*-----------------------------------------------------------------.
292 | Scanning after an identifier, checking whether a colon is next. |
293 `-----------------------------------------------------------------*/
294
295<SC_AFTER_IDENTIFIER>
296{
297 ":" {
3f2d73f1
PE
298 *loc = id_loc;
299 BEGIN INITIAL;
300 return ID_COLON;
301 }
302 . {
303 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
304 yyless (0);
305 *loc = id_loc;
306 BEGIN INITIAL;
307 return ID;
308 }
309 <<EOF>> {
310 *loc = id_loc;
311 BEGIN INITIAL;
312 return ID;
e9955c83
AD
313 }
314}
315
316
d8d3f94a
PE
317 /*---------------------------------------------------------------.
318 | Scanning a Yacc comment. The initial `/ *' is already eaten. |
319 `---------------------------------------------------------------*/
e9955c83 320
d8d3f94a 321<SC_YACC_COMMENT>
e9955c83 322{
3f2d73f1 323 "*/" BEGIN context_state;
a706a1cc 324 .|\n ;
aa418041 325 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
d8d3f94a
PE
326}
327
328
329 /*------------------------------------------------------------.
330 | Scanning a C comment. The initial `/ *' is already eaten. |
331 `------------------------------------------------------------*/
332
333<SC_COMMENT>
334{
3f2d73f1 335 "*"{splice}"/" STRING_GROW; BEGIN context_state;
aa418041 336 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
e9955c83
AD
337}
338
339
d8d3f94a
PE
340 /*--------------------------------------------------------------.
341 | Scanning a line comment. The initial `//' is already eaten. |
342 `--------------------------------------------------------------*/
343
344<SC_LINE_COMMENT>
345{
3f2d73f1 346 "\n" STRING_GROW; BEGIN context_state;
41141c56 347 {splice} STRING_GROW;
3f2d73f1 348 <<EOF>> BEGIN context_state;
d8d3f94a
PE
349}
350
351
4febdd96
PE
352 /*------------------------------------------------.
353 | Scanning a Bison string, including its escapes. |
354 | The initial quote is already eaten. |
355 `------------------------------------------------*/
e9955c83
AD
356
357<SC_ESCAPED_STRING>
358{
47aee066
JD
359 "\""|"\n" {
360 if (yytext[0] == '\n')
361 unexpected_newline (token_start, "\"");
362 STRING_FINISH;
363 loc->start = token_start;
364 val->chars = last_string;
365 BEGIN INITIAL;
366 return STRING;
367 }
368 <<EOF>> {
369 unexpected_eof (token_start, "\"");
41141c56 370 STRING_FINISH;
3f2d73f1 371 loc->start = token_start;
223ff46e 372 val->chars = last_string;
a706a1cc 373 BEGIN INITIAL;
e9955c83
AD
374 return STRING;
375 }
e9955c83
AD
376}
377
4febdd96
PE
378 /*----------------------------------------------------------.
379 | Scanning a Bison character literal, decoding its escapes. |
380 | The initial quote is already eaten. |
381 `----------------------------------------------------------*/
e9955c83
AD
382
383<SC_ESCAPED_CHARACTER>
384{
47aee066
JD
385 "'"|"\n" {
386 if (yytext[0] == '\n')
387 unexpected_newline (token_start, "'");
41141c56
PE
388 STRING_GROW;
389 STRING_FINISH;
3f2d73f1 390 loc->start = token_start;
58d7a1a1 391 val->character = last_string[1];
41141c56 392 STRING_FREE;
a706a1cc 393 BEGIN INITIAL;
58d7a1a1 394 return CHAR;
e9955c83 395 }
47aee066
JD
396 <<EOF>> {
397 unexpected_eof (token_start, "'");
398 STRING_FINISH;
399 loc->start = token_start;
400 if (strlen(last_string) > 1)
401 val->character = last_string[1];
402 else
403 val->character = last_string[0];
404 STRING_FREE;
405 BEGIN INITIAL;
406 return CHAR;
407 }
4febdd96 408}
a706a1cc 409
cb823b6f
AD
410 /*-----------------------------------------------------------.
411 | Scanning a Bison nested tag. The initial angle bracket is |
412 | already eaten. |
413 `-----------------------------------------------------------*/
414
415<SC_TAG>
4febdd96 416{
cb823b6f
AD
417 ">" {
418 --nesting;
419 if (nesting < 0)
420 {
421 STRING_FINISH;
422 loc->start = token_start;
423 val->uniqstr = uniqstr_new (last_string);
424 STRING_FREE;
425 BEGIN INITIAL;
426 return TAG;
427 }
428 STRING_GROW;
429 }
430
431 [^<>]+ STRING_GROW;
432 "<"+ STRING_GROW; nesting += yyleng;
e9955c83 433
cb823b6f
AD
434 <<EOF>> {
435 unexpected_eof (token_start, ">");
436 STRING_FINISH;
437 loc->start = token_start;
438 val->uniqstr = uniqstr_new (last_string);
439 STRING_FREE;
440 BEGIN INITIAL;
441 return TAG;
442 }
443}
e9955c83
AD
444
445 /*----------------------------.
446 | Decode escaped characters. |
447 `----------------------------*/
448
449<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
450{
d8d3f94a 451 \\[0-7]{1,3} {
4517da37 452 unsigned long int c = strtoul (yytext + 1, NULL, 8);
d8d3f94a 453 if (UCHAR_MAX < c)
3f2d73f1 454 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
05ac60f3 455 else if (! c)
92ac3705 456 complain_at (*loc, _("invalid null character: %s"), quote (yytext));
e9955c83 457 else
223ff46e 458 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
459 }
460
6b0d38ab 461 \\x[0-9abcdefABCDEF]+ {
4517da37
PE
462 verify (UCHAR_MAX < ULONG_MAX);
463 unsigned long int c = strtoul (yytext + 2, NULL, 16);
464 if (UCHAR_MAX < c)
3f2d73f1 465 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
92ac3705
PE
466 else if (! c)
467 complain_at (*loc, _("invalid null character: %s"), quote (yytext));
d8d3f94a 468 else
223ff46e 469 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
470 }
471
223ff46e
PE
472 \\a obstack_1grow (&obstack_for_string, '\a');
473 \\b obstack_1grow (&obstack_for_string, '\b');
474 \\f obstack_1grow (&obstack_for_string, '\f');
475 \\n obstack_1grow (&obstack_for_string, '\n');
476 \\r obstack_1grow (&obstack_for_string, '\r');
477 \\t obstack_1grow (&obstack_for_string, '\t');
478 \\v obstack_1grow (&obstack_for_string, '\v');
412f8a59
PE
479
480 /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
223ff46e 481 \\("\""|"'"|"?"|"\\") obstack_1grow (&obstack_for_string, yytext[1]);
412f8a59 482
6b0d38ab 483 \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
d8d3f94a
PE
484 int c = convert_ucn_to_byte (yytext);
485 if (c < 0)
3f2d73f1 486 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
92ac3705
PE
487 else if (! c)
488 complain_at (*loc, _("invalid null character: %s"), quote (yytext));
d8d3f94a 489 else
223ff46e 490 obstack_1grow (&obstack_for_string, c);
d8d3f94a 491 }
4f25ebb0 492 \\(.|\n) {
3f2d73f1 493 complain_at (*loc, _("unrecognized escape sequence: %s"), quote (yytext));
41141c56 494 STRING_GROW;
e9955c83
AD
495 }
496}
497
4febdd96
PE
498 /*--------------------------------------------.
499 | Scanning user-code characters and strings. |
500 `--------------------------------------------*/
e9955c83 501
4febdd96
PE
502<SC_CHARACTER,SC_STRING>
503{
e9071366 504 {splice}|\\{splice}[^\n\[\]] STRING_GROW;
4febdd96 505}
e9955c83
AD
506
507<SC_CHARACTER>
508{
4febdd96
PE
509 "'" STRING_GROW; BEGIN context_state;
510 \n unexpected_newline (token_start, "'"); BEGIN context_state;
511 <<EOF>> unexpected_eof (token_start, "'"); BEGIN context_state;
e9955c83
AD
512}
513
e9955c83
AD
514<SC_STRING>
515{
4febdd96
PE
516 "\"" STRING_GROW; BEGIN context_state;
517 \n unexpected_newline (token_start, "\""); BEGIN context_state;
518 <<EOF>> unexpected_eof (token_start, "\""); BEGIN context_state;
e9955c83
AD
519}
520
521
522 /*---------------------------------------------------.
523 | Strings, comments etc. can be found in user code. |
524 `---------------------------------------------------*/
525
526<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
527{
3f2d73f1
PE
528 "'" {
529 STRING_GROW;
530 context_state = YY_START;
531 token_start = loc->start;
532 BEGIN SC_CHARACTER;
533 }
534 "\"" {
535 STRING_GROW;
536 context_state = YY_START;
537 token_start = loc->start;
538 BEGIN SC_STRING;
539 }
540 "/"{splice}"*" {
541 STRING_GROW;
542 context_state = YY_START;
543 token_start = loc->start;
544 BEGIN SC_COMMENT;
545 }
546 "/"{splice}"/" {
547 STRING_GROW;
548 context_state = YY_START;
549 BEGIN SC_LINE_COMMENT;
550 }
e9955c83
AD
551}
552
553
624a35e2 554
58d7a1a1
AD
555 /*-----------------------------------------------------------.
556 | Scanning some code in braces (actions). The initial "{" is |
557 | already eaten. |
558 `-----------------------------------------------------------*/
e9955c83
AD
559
560<SC_BRACED_CODE>
561{
cb823b6f
AD
562 "{"|"<"{splice}"%" STRING_GROW; nesting++;
563 "%"{splice}">" STRING_GROW; nesting--;
e9955c83 564 "}" {
25522739
PE
565 obstack_1grow (&obstack_for_string, '}');
566
cb823b6f
AD
567 --nesting;
568 if (nesting < 0)
e9955c83 569 {
41141c56 570 STRING_FINISH;
3f2d73f1 571 loc->start = code_start;
eb095650 572 val->code = last_string;
a706a1cc 573 BEGIN INITIAL;
58d7a1a1 574 return BRACED_CODE;
e9955c83
AD
575 }
576 }
577
a706a1cc
PE
578 /* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly
579 (as `<' `<%'). */
41141c56 580 "<"{splice}"<" STRING_GROW;
a706a1cc 581
47aee066
JD
582 <<EOF>> {
583 unexpected_eof (code_start, "}");
584 STRING_FINISH;
585 loc->start = code_start;
eb095650 586 val->code = last_string;
47aee066
JD
587 BEGIN INITIAL;
588 return BRACED_CODE;
589 }
e9955c83
AD
590}
591
592
593 /*--------------------------------------------------------------.
594 | Scanning some prologue: from "%{" (already scanned) to "%}". |
595 `--------------------------------------------------------------*/
596
597<SC_PROLOGUE>
598{
599 "%}" {
41141c56 600 STRING_FINISH;
3f2d73f1 601 loc->start = code_start;
223ff46e 602 val->chars = last_string;
a706a1cc 603 BEGIN INITIAL;
e9955c83
AD
604 return PROLOGUE;
605 }
606
47aee066
JD
607 <<EOF>> {
608 unexpected_eof (code_start, "%}");
609 STRING_FINISH;
610 loc->start = code_start;
611 val->chars = last_string;
612 BEGIN INITIAL;
613 return PROLOGUE;
614 }
e9955c83
AD
615}
616
617
618 /*---------------------------------------------------------------.
619 | Scanning the epilogue (everything after the second "%%", which |
d8d3f94a 620 | has already been eaten). |
e9955c83
AD
621 `---------------------------------------------------------------*/
622
623<SC_EPILOGUE>
624{
e9955c83 625 <<EOF>> {
41141c56 626 STRING_FINISH;
3f2d73f1 627 loc->start = code_start;
223ff46e 628 val->chars = last_string;
a706a1cc 629 BEGIN INITIAL;
e9955c83
AD
630 return EPILOGUE;
631 }
632}
633
634
4febdd96
PE
635 /*-----------------------------------------------------.
636 | By default, grow the string obstack with the input. |
637 `-----------------------------------------------------*/
638
639<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>. |
640<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>\n STRING_GROW;
641
e9955c83
AD
642%%
643
6c30d641
PE
644/* Read bytes from FP into buffer BUF of size SIZE. Return the
645 number of bytes read. Remove '\r' from input, treating \r\n
646 and isolated \r as \n. */
647
648static size_t
649no_cr_read (FILE *fp, char *buf, size_t size)
650{
a737b216
PE
651 size_t bytes_read = fread (buf, 1, size, fp);
652 if (bytes_read)
6c30d641 653 {
a737b216 654 char *w = memchr (buf, '\r', bytes_read);
6c30d641
PE
655 if (w)
656 {
657 char const *r = ++w;
a737b216 658 char const *lim = buf + bytes_read;
6c30d641
PE
659
660 for (;;)
661 {
662 /* Found an '\r'. Treat it like '\n', but ignore any
663 '\n' that immediately follows. */
664 w[-1] = '\n';
665 if (r == lim)
666 {
667 int ch = getc (fp);
668 if (ch != '\n' && ungetc (ch, fp) != ch)
669 break;
670 }
671 else if (*r == '\n')
672 r++;
673
674 /* Copy until the next '\r'. */
675 do
676 {
677 if (r == lim)
678 return w - buf;
679 }
680 while ((*w++ = *r++) != '\r');
681 }
682
683 return w - buf;
684 }
685 }
686
a737b216 687 return bytes_read;
6c30d641
PE
688}
689
690
f25bfb75 691
1452af69
PE
692/*------------------------------------------------------.
693| Scan NUMBER for a base-BASE integer at location LOC. |
694`------------------------------------------------------*/
695
696static unsigned long int
697scan_integer (char const *number, int base, location loc)
698{
4517da37
PE
699 verify (INT_MAX < ULONG_MAX);
700 unsigned long int num = strtoul (number, NULL, base);
701
702 if (INT_MAX < num)
1452af69
PE
703 {
704 complain_at (loc, _("integer out of range: %s"), quote (number));
705 num = INT_MAX;
706 }
4517da37 707
1452af69
PE
708 return num;
709}
710
711
d8d3f94a
PE
712/*------------------------------------------------------------------.
713| Convert universal character name UCN to a single-byte character, |
714| and return that character. Return -1 if UCN does not correspond |
715| to a single-byte character. |
716`------------------------------------------------------------------*/
717
718static int
719convert_ucn_to_byte (char const *ucn)
720{
4517da37
PE
721 verify (UCHAR_MAX <= INT_MAX);
722 unsigned long int code = strtoul (ucn + 2, NULL, 16);
d8d3f94a
PE
723
724 /* FIXME: Currently we assume Unicode-compatible unibyte characters
725 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
726 non-ASCII hosts we support only the portable C character set.
727 These limitations should be removed once we add support for
728 multibyte characters. */
729
730 if (UCHAR_MAX < code)
731 return -1;
732
733#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
734 {
735 /* A non-ASCII host. Use CODE to index into a table of the C
736 basic execution character set, which is guaranteed to exist on
737 all Standard C platforms. This table also includes '$', '@',
8e6ef483 738 and '`', which are not in the basic execution character set but
d8d3f94a
PE
739 which are unibyte characters on all the platforms that we know
740 about. */
741 static signed char const table[] =
742 {
743 '\0', -1, -1, -1, -1, -1, -1, '\a',
744 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
745 -1, -1, -1, -1, -1, -1, -1, -1,
746 -1, -1, -1, -1, -1, -1, -1, -1,
747 ' ', '!', '"', '#', '$', '%', '&', '\'',
748 '(', ')', '*', '+', ',', '-', '.', '/',
749 '0', '1', '2', '3', '4', '5', '6', '7',
750 '8', '9', ':', ';', '<', '=', '>', '?',
751 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
752 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
753 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
754 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
755 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
756 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
757 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
758 'x', 'y', 'z', '{', '|', '}', '~'
759 };
760
761 code = code < sizeof table ? table[code] : -1;
762 }
763#endif
c4d720cd 764
d8d3f94a
PE
765 return code;
766}
767
768
900c5db5
AD
769/*----------------------------------------------------------------.
770| Handle `#line INT "FILE"'. ARGS has already skipped `#line '. |
771`----------------------------------------------------------------*/
772
773static void
4517da37 774handle_syncline (char *args, location loc)
900c5db5 775{
4517da37
PE
776 char *after_num;
777 unsigned long int lineno = strtoul (args, &after_num, 10);
778 char *file = strchr (after_num, '"') + 1;
779 *strchr (file, '"') = '\0';
780 if (INT_MAX <= lineno)
781 {
782 warn_at (loc, _("line number overflow"));
783 lineno = INT_MAX;
784 }
e9071366 785 current_file = uniqstr_new (file);
0c8e079f 786 boundary_set (&scanner_cursor, current_file, lineno, 1);
4517da37
PE
787}
788
789
4febdd96
PE
790/*----------------------------------------------------------------.
791| For a token or comment starting at START, report message MSGID, |
792| which should say that an end marker was found before |
793| the expected TOKEN_END. |
794`----------------------------------------------------------------*/
795
796static void
797unexpected_end (boundary start, char const *msgid, char const *token_end)
798{
799 location loc;
800 loc.start = start;
801 loc.end = scanner_cursor;
802 complain_at (loc, _(msgid), token_end);
803}
804
805
3f2d73f1
PE
806/*------------------------------------------------------------------------.
807| Report an unexpected EOF in a token or comment starting at START. |
808| An end of file was encountered and the expected TOKEN_END was missing. |
3f2d73f1 809`------------------------------------------------------------------------*/
a706a1cc
PE
810
811static void
aa418041 812unexpected_eof (boundary start, char const *token_end)
a706a1cc 813{
4febdd96
PE
814 unexpected_end (start, N_("missing `%s' at end of file"), token_end);
815}
816
817
818/*----------------------------------------.
819| Likewise, but for unexpected newlines. |
820`----------------------------------------*/
821
822static void
823unexpected_newline (boundary start, char const *token_end)
824{
825 unexpected_end (start, N_("missing `%s' at end of line"), token_end);
a706a1cc
PE
826}
827
828
f25bfb75
AD
829/*-------------------------.
830| Initialize the scanner. |
831`-------------------------*/
832
1d6412ad 833void
e9071366 834gram_scanner_initialize (void)
1d6412ad 835{
223ff46e 836 obstack_init (&obstack_for_string);
1d6412ad
AD
837}
838
839
f25bfb75
AD
840/*-----------------------------------------------.
841| Free all the memory allocated to the scanner. |
842`-----------------------------------------------*/
843
4cdb01db 844void
e9071366 845gram_scanner_free (void)
4cdb01db 846{
223ff46e 847 obstack_free (&obstack_for_string, 0);
536545f3 848 /* Reclaim Flex's buffers. */
580b8926 849 yylex_destroy ();
4cdb01db 850}