]> git.saurik.com Git - bison.git/blame - src/scan-gram.l
Regen.
[bison.git] / src / scan-gram.l
CommitLineData
e9955c83 1/* Bison Grammar Scanner -*- C -*-
3b1e470c 2
deef2a0a 3 Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation,
7c0c6181 4 Inc.
e9955c83
AD
5
6 This file is part of Bison, the GNU Compiler Compiler.
7
f16b0819 8 This program is free software: you can redistribute it and/or modify
e9955c83 9 it under the terms of the GNU General Public License as published by
f16b0819 10 the Free Software Foundation, either version 3 of the License, or
e9955c83
AD
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
f16b0819 19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
e9955c83 20
aa418041 21%option debug nodefault nounput noyywrap never-interactive
e9955c83
AD
22%option prefix="gram_" outfile="lex.yy.c"
23
24%{
4f6e011e
PE
25/* Work around a bug in flex 2.5.31. See Debian bug 333231
26 <http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=333231>. */
27#undef gram_wrap
28#define gram_wrap() 1
29
e9071366 30#define FLEX_PREFIX(Id) gram_ ## Id
0305d25e 31#include <src/flex-scanner.h>
223ff46e 32
0305d25e
AD
33#include <src/complain.h>
34#include <src/files.h>
35#include <src/gram.h>
36#include <quotearg.h>
37#include <src/reader.h>
38#include <src/uniqstr.h>
e9955c83 39
e9071366
AD
40#include <mbswidth.h>
41#include <quote.h>
42
0305d25e 43#include <src/scan-gram.h>
e9071366
AD
44
45#define YY_DECL GRAM_LEX_DECL
2346344a 46
3f2d73f1 47#define YY_USER_INIT \
e9071366 48 code_start = scanner_cursor = loc->start; \
dc9701e8 49
3f2d73f1 50/* Location of scanner cursor. */
4a678af8 51static boundary scanner_cursor;
41141c56 52
e9071366 53#define YY_USER_ACTION location_compute (loc, &scanner_cursor, yytext, yyleng);
d8d3f94a 54
6c30d641 55static size_t no_cr_read (FILE *, char *, size_t);
d8d3f94a
PE
56#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
57
7ec2d4cd 58/* A string representing the most recently saved token. */
7c0c6181 59static char *last_string;
7ec2d4cd 60
7ec2d4cd 61void
e9071366 62gram_scanner_last_string_free (void)
7ec2d4cd 63{
41141c56 64 STRING_FREE;
7ec2d4cd 65}
e9955c83 66
4517da37 67static void handle_syncline (char *, location);
1452af69 68static unsigned long int scan_integer (char const *p, int base, location loc);
d8d3f94a 69static int convert_ucn_to_byte (char const *hex_text);
aa418041 70static void unexpected_eof (boundary, char const *);
4febdd96 71static void unexpected_newline (boundary, char const *);
e9955c83
AD
72
73%}
e9071366
AD
74 /* A C-like comment in directives/rules. */
75%x SC_YACC_COMMENT
76 /* Strings and characters in directives/rules. */
e9955c83 77%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
e9071366
AD
78 /* A identifier was just read in directives/rules. Special state
79 to capture the sequence `identifier :'. */
80%x SC_AFTER_IDENTIFIER
cb823b6f
AD
81 /* A complex tag, with nested angles brackets. */
82%x SC_TAG
e9071366
AD
83
84 /* Three types of user code:
85 - prologue (code between `%{' `%}' in the first section, before %%);
86 - actions, printers, union, etc, (between braced in the middle section);
87 - epilogue (everything after the second %%). */
88%x SC_PROLOGUE SC_BRACED_CODE SC_EPILOGUE
89 /* C and C++ comments in code. */
90%x SC_COMMENT SC_LINE_COMMENT
91 /* Strings and characters in code. */
92%x SC_STRING SC_CHARACTER
e9955c83 93
29c01725
AD
94letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
95id {letter}({letter}|[0-9])*
96directive %{letter}({letter}|[0-9]|-)*
624a35e2 97int [0-9]+
d8d3f94a
PE
98
99/* POSIX says that a tag must be both an id and a C union member, but
100 historically almost any character is allowed in a tag. We disallow
cb823b6f
AD
101 NUL, as this simplifies our implementation. We disallow angle
102 bracket to match them in nested pairs: several languages use them
103 for generics/template types. */
104tag [^\0<>]+
d8d3f94a
PE
105
106/* Zero or more instances of backslash-newline. Following GCC, allow
107 white space between the backslash and the newline. */
108splice (\\[ \f\t\v]*\n)*
e9955c83
AD
109
110%%
111%{
cb823b6f
AD
112 /* Nesting level. Either for nested braces, or nested angle brackets
113 (but not mixed). */
114 int nesting IF_LINT (= 0);
1a9e39f1 115
3f2d73f1 116 /* Parent context state, when applicable. */
5362ed19 117 int context_state IF_LINT (= 0);
a706a1cc 118
3f2d73f1 119 /* Location of most recent identifier, when applicable. */
a2bc9dbc 120 location id_loc IF_LINT (= empty_location);
3f2d73f1 121
a2bc9dbc
PE
122 /* Where containing code started, when applicable. Its initial
123 value is relevant only when yylex is invoked in the SC_EPILOGUE
124 start condition. */
125 boundary code_start = scanner_cursor;
3f2d73f1 126
223ff46e
PE
127 /* Where containing comment or string or character literal started,
128 when applicable. */
a2bc9dbc 129 boundary token_start IF_LINT (= scanner_cursor);
e9955c83
AD
130%}
131
132
3f2d73f1
PE
133 /*-----------------------.
134 | Scanning white space. |
135 `-----------------------*/
136
58d7a1a1 137<INITIAL,SC_AFTER_IDENTIFIER>
3f2d73f1 138{
4febdd96 139 /* Comments and white space. */
83adb046 140 "," warn_at (*loc, _("stray `,' treated as white space"));
4febdd96 141 [ \f\n\t\v] |
3f2d73f1 142 "//".* ;
83adb046
PE
143 "/*" {
144 token_start = loc->start;
145 context_state = YY_START;
146 BEGIN SC_YACC_COMMENT;
147 }
3f2d73f1
PE
148
149 /* #line directives are not documented, and may be withdrawn or
150 modified in future versions of Bison. */
151 ^"#line "{int}" \"".*"\"\n" {
4517da37 152 handle_syncline (yytext + sizeof "#line " - 1, *loc);
3f2d73f1
PE
153 }
154}
155
156
e9955c83
AD
157 /*----------------------------.
158 | Scanning Bison directives. |
159 `----------------------------*/
a7c09cba
DJ
160
161 /* For directives that are also command line options, the regex must be
162 "%..."
163 after "[-_]"s are removed, and the directive must match the --long
164 option name, with a single string argument. Otherwise, add exceptions
165 to ../build-aux/cross-options.pl. */
166
e9955c83
AD
167<INITIAL>
168{
deef2a0a 169 "%binary" return PERCENT_NONASSOC;
136a0f76 170 "%code" return PERCENT_CODE;
deef2a0a
AD
171 "%debug" return PERCENT_DEBUG;
172 "%default"[-_]"prec" return PERCENT_DEFAULT_PREC;
173 "%define" return PERCENT_DEFINE;
174 "%defines" return PERCENT_DEFINES;
175 "%destructor" return PERCENT_DESTRUCTOR;
176 "%dprec" return PERCENT_DPREC;
177 "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
178 "%expect" return PERCENT_EXPECT;
179 "%expect"[-_]"rr" return PERCENT_EXPECT_RR;
180 "%file-prefix" return PERCENT_FILE_PREFIX;
e9955c83 181 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
deef2a0a
AD
182 "%initial-action" return PERCENT_INITIAL_ACTION;
183 "%glr-parser" return PERCENT_GLR_PARSER;
184 "%language" return PERCENT_LANGUAGE;
185 "%left" return PERCENT_LEFT;
186 "%lex-param" return PERCENT_LEX_PARAM;
187 "%locations" return PERCENT_LOCATIONS;
188 "%merge" return PERCENT_MERGE;
189 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
190 "%no"[-_]"default"[-_]"prec" return PERCENT_NO_DEFAULT_PREC;
191 "%no"[-_]"lines" return PERCENT_NO_LINES;
192 "%nonassoc" return PERCENT_NONASSOC;
193 "%nondeterministic-parser" return PERCENT_NONDETERMINISTIC_PARSER;
194 "%nterm" return PERCENT_NTERM;
195 "%output" return PERCENT_OUTPUT;
196 "%parse-param" return PERCENT_PARSE_PARAM;
197 "%prec" return PERCENT_PREC;
d78f0ac9 198 "%precedence" return PERCENT_PRECEDENCE;
deef2a0a
AD
199 "%printer" return PERCENT_PRINTER;
200 "%pure"[-_]"parser" return PERCENT_PURE_PARSER;
201 "%require" return PERCENT_REQUIRE;
202 "%right" return PERCENT_RIGHT;
203 "%skeleton" return PERCENT_SKELETON;
204 "%start" return PERCENT_START;
205 "%term" return PERCENT_TOKEN;
206 "%token" return PERCENT_TOKEN;
207 "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
208 "%type" return PERCENT_TYPE;
209 "%union" return PERCENT_UNION;
210 "%verbose" return PERCENT_VERBOSE;
211 "%yacc" return PERCENT_YACC;
e9955c83 212
3f2d73f1 213 {directive} {
41141c56 214 complain_at (*loc, _("invalid directive: %s"), quote (yytext));
412f8a59 215 }
900c5db5 216
e9955c83 217 "=" return EQUAL;
e9071366 218 "|" return PIPE;
e9955c83
AD
219 ";" return SEMICOLON;
220
3f2d73f1 221 {id} {
58d7a1a1 222 val->uniqstr = uniqstr_new (yytext);
3f2d73f1 223 id_loc = *loc;
3f2d73f1 224 BEGIN SC_AFTER_IDENTIFIER;
e9955c83
AD
225 }
226
d8d3f94a 227 {int} {
1452af69
PE
228 val->integer = scan_integer (yytext, 10, *loc);
229 return INT;
230 }
231 0[xX][0-9abcdefABCDEF]+ {
232 val->integer = scan_integer (yytext, 16, *loc);
d8d3f94a
PE
233 return INT;
234 }
e9955c83
AD
235
236 /* Characters. We don't check there is only one. */
3f2d73f1 237 "'" STRING_GROW; token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
e9955c83
AD
238
239 /* Strings. */
ca407bdf 240 "\"" token_start = loc->start; BEGIN SC_ESCAPED_STRING;
e9955c83
AD
241
242 /* Prologue. */
3f2d73f1 243 "%{" code_start = loc->start; BEGIN SC_PROLOGUE;
e9955c83
AD
244
245 /* Code in between braces. */
3f2d73f1
PE
246 "{" {
247 STRING_GROW;
cb823b6f 248 nesting = 0;
3f2d73f1
PE
249 code_start = loc->start;
250 BEGIN SC_BRACED_CODE;
251 }
e9955c83
AD
252
253 /* A type. */
cb823b6f
AD
254 "<*>" return TAG_ANY;
255 "<>" return TAG_NONE;
d8d3f94a 256 "<"{tag}">" {
223ff46e 257 obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
41141c56 258 STRING_FINISH;
223ff46e 259 val->uniqstr = uniqstr_new (last_string);
41141c56 260 STRING_FREE;
cb823b6f
AD
261 return TAG;
262 }
263 "<" {
264 nesting = 0;
265 token_start = loc->start;
266 BEGIN SC_TAG;
4cdb01db
AD
267 }
268
a706a1cc
PE
269 "%%" {
270 static int percent_percent_count;
e9955c83 271 if (++percent_percent_count == 2)
a2bc9dbc 272 BEGIN SC_EPILOGUE;
e9955c83
AD
273 return PERCENT_PERCENT;
274 }
275
a706a1cc 276 . {
41141c56 277 complain_at (*loc, _("invalid character: %s"), quote (yytext));
3f2d73f1 278 }
379f0ac8
PE
279
280 <<EOF>> {
281 loc->start = loc->end = scanner_cursor;
282 yyterminate ();
283 }
3f2d73f1
PE
284}
285
286
cb823b6f
AD
287 /*--------------------------------------------------------------.
288 | Supporting \0 complexifies our implementation for no expected |
289 | added value. |
290 `--------------------------------------------------------------*/
291
292<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING,SC_TAG>
293{
294 \0 complain_at (*loc, _("invalid null character"));
295}
296
297
3f2d73f1
PE
298 /*-----------------------------------------------------------------.
299 | Scanning after an identifier, checking whether a colon is next. |
300 `-----------------------------------------------------------------*/
301
302<SC_AFTER_IDENTIFIER>
303{
304 ":" {
3f2d73f1
PE
305 *loc = id_loc;
306 BEGIN INITIAL;
307 return ID_COLON;
308 }
309 . {
310 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
311 yyless (0);
312 *loc = id_loc;
313 BEGIN INITIAL;
314 return ID;
315 }
316 <<EOF>> {
317 *loc = id_loc;
318 BEGIN INITIAL;
319 return ID;
e9955c83
AD
320 }
321}
322
323
d8d3f94a
PE
324 /*---------------------------------------------------------------.
325 | Scanning a Yacc comment. The initial `/ *' is already eaten. |
326 `---------------------------------------------------------------*/
e9955c83 327
d8d3f94a 328<SC_YACC_COMMENT>
e9955c83 329{
3f2d73f1 330 "*/" BEGIN context_state;
a706a1cc 331 .|\n ;
aa418041 332 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
d8d3f94a
PE
333}
334
335
336 /*------------------------------------------------------------.
337 | Scanning a C comment. The initial `/ *' is already eaten. |
338 `------------------------------------------------------------*/
339
340<SC_COMMENT>
341{
3f2d73f1 342 "*"{splice}"/" STRING_GROW; BEGIN context_state;
aa418041 343 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
e9955c83
AD
344}
345
346
d8d3f94a
PE
347 /*--------------------------------------------------------------.
348 | Scanning a line comment. The initial `//' is already eaten. |
349 `--------------------------------------------------------------*/
350
351<SC_LINE_COMMENT>
352{
3f2d73f1 353 "\n" STRING_GROW; BEGIN context_state;
41141c56 354 {splice} STRING_GROW;
3f2d73f1 355 <<EOF>> BEGIN context_state;
d8d3f94a
PE
356}
357
358
4febdd96
PE
359 /*------------------------------------------------.
360 | Scanning a Bison string, including its escapes. |
361 | The initial quote is already eaten. |
362 `------------------------------------------------*/
e9955c83
AD
363
364<SC_ESCAPED_STRING>
365{
47aee066
JD
366 "\""|"\n" {
367 if (yytext[0] == '\n')
368 unexpected_newline (token_start, "\"");
369 STRING_FINISH;
370 loc->start = token_start;
371 val->chars = last_string;
372 BEGIN INITIAL;
373 return STRING;
374 }
375 <<EOF>> {
376 unexpected_eof (token_start, "\"");
41141c56 377 STRING_FINISH;
3f2d73f1 378 loc->start = token_start;
223ff46e 379 val->chars = last_string;
a706a1cc 380 BEGIN INITIAL;
e9955c83
AD
381 return STRING;
382 }
e9955c83
AD
383}
384
4febdd96
PE
385 /*----------------------------------------------------------.
386 | Scanning a Bison character literal, decoding its escapes. |
387 | The initial quote is already eaten. |
388 `----------------------------------------------------------*/
e9955c83
AD
389
390<SC_ESCAPED_CHARACTER>
391{
47aee066
JD
392 "'"|"\n" {
393 if (yytext[0] == '\n')
394 unexpected_newline (token_start, "'");
41141c56
PE
395 STRING_GROW;
396 STRING_FINISH;
3f2d73f1 397 loc->start = token_start;
58d7a1a1 398 val->character = last_string[1];
41141c56 399 STRING_FREE;
a706a1cc 400 BEGIN INITIAL;
58d7a1a1 401 return CHAR;
e9955c83 402 }
47aee066
JD
403 <<EOF>> {
404 unexpected_eof (token_start, "'");
405 STRING_FINISH;
406 loc->start = token_start;
407 if (strlen(last_string) > 1)
408 val->character = last_string[1];
409 else
410 val->character = last_string[0];
411 STRING_FREE;
412 BEGIN INITIAL;
413 return CHAR;
414 }
4febdd96 415}
a706a1cc 416
cb823b6f
AD
417 /*-----------------------------------------------------------.
418 | Scanning a Bison nested tag. The initial angle bracket is |
419 | already eaten. |
420 `-----------------------------------------------------------*/
421
422<SC_TAG>
4febdd96 423{
cb823b6f
AD
424 ">" {
425 --nesting;
426 if (nesting < 0)
427 {
428 STRING_FINISH;
429 loc->start = token_start;
430 val->uniqstr = uniqstr_new (last_string);
431 STRING_FREE;
432 BEGIN INITIAL;
433 return TAG;
434 }
435 STRING_GROW;
436 }
437
438 [^<>]+ STRING_GROW;
439 "<"+ STRING_GROW; nesting += yyleng;
e9955c83 440
cb823b6f
AD
441 <<EOF>> {
442 unexpected_eof (token_start, ">");
443 STRING_FINISH;
444 loc->start = token_start;
445 val->uniqstr = uniqstr_new (last_string);
446 STRING_FREE;
447 BEGIN INITIAL;
448 return TAG;
449 }
450}
e9955c83
AD
451
452 /*----------------------------.
453 | Decode escaped characters. |
454 `----------------------------*/
455
456<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
457{
d8d3f94a 458 \\[0-7]{1,3} {
4517da37 459 unsigned long int c = strtoul (yytext + 1, NULL, 8);
d8d3f94a 460 if (UCHAR_MAX < c)
3f2d73f1 461 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
05ac60f3 462 else if (! c)
92ac3705 463 complain_at (*loc, _("invalid null character: %s"), quote (yytext));
e9955c83 464 else
223ff46e 465 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
466 }
467
6b0d38ab 468 \\x[0-9abcdefABCDEF]+ {
4517da37
PE
469 verify (UCHAR_MAX < ULONG_MAX);
470 unsigned long int c = strtoul (yytext + 2, NULL, 16);
471 if (UCHAR_MAX < c)
3f2d73f1 472 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
92ac3705
PE
473 else if (! c)
474 complain_at (*loc, _("invalid null character: %s"), quote (yytext));
d8d3f94a 475 else
223ff46e 476 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
477 }
478
223ff46e
PE
479 \\a obstack_1grow (&obstack_for_string, '\a');
480 \\b obstack_1grow (&obstack_for_string, '\b');
481 \\f obstack_1grow (&obstack_for_string, '\f');
482 \\n obstack_1grow (&obstack_for_string, '\n');
483 \\r obstack_1grow (&obstack_for_string, '\r');
484 \\t obstack_1grow (&obstack_for_string, '\t');
485 \\v obstack_1grow (&obstack_for_string, '\v');
412f8a59
PE
486
487 /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
223ff46e 488 \\("\""|"'"|"?"|"\\") obstack_1grow (&obstack_for_string, yytext[1]);
412f8a59 489
6b0d38ab 490 \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
d8d3f94a
PE
491 int c = convert_ucn_to_byte (yytext);
492 if (c < 0)
3f2d73f1 493 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
92ac3705
PE
494 else if (! c)
495 complain_at (*loc, _("invalid null character: %s"), quote (yytext));
d8d3f94a 496 else
223ff46e 497 obstack_1grow (&obstack_for_string, c);
d8d3f94a 498 }
4f25ebb0 499 \\(.|\n) {
3f2d73f1 500 complain_at (*loc, _("unrecognized escape sequence: %s"), quote (yytext));
41141c56 501 STRING_GROW;
e9955c83
AD
502 }
503}
504
4febdd96
PE
505 /*--------------------------------------------.
506 | Scanning user-code characters and strings. |
507 `--------------------------------------------*/
e9955c83 508
4febdd96
PE
509<SC_CHARACTER,SC_STRING>
510{
e9071366 511 {splice}|\\{splice}[^\n\[\]] STRING_GROW;
4febdd96 512}
e9955c83
AD
513
514<SC_CHARACTER>
515{
4febdd96
PE
516 "'" STRING_GROW; BEGIN context_state;
517 \n unexpected_newline (token_start, "'"); BEGIN context_state;
518 <<EOF>> unexpected_eof (token_start, "'"); BEGIN context_state;
e9955c83
AD
519}
520
e9955c83
AD
521<SC_STRING>
522{
4febdd96
PE
523 "\"" STRING_GROW; BEGIN context_state;
524 \n unexpected_newline (token_start, "\""); BEGIN context_state;
525 <<EOF>> unexpected_eof (token_start, "\""); BEGIN context_state;
e9955c83
AD
526}
527
528
529 /*---------------------------------------------------.
530 | Strings, comments etc. can be found in user code. |
531 `---------------------------------------------------*/
532
533<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
534{
3f2d73f1
PE
535 "'" {
536 STRING_GROW;
537 context_state = YY_START;
538 token_start = loc->start;
539 BEGIN SC_CHARACTER;
540 }
541 "\"" {
542 STRING_GROW;
543 context_state = YY_START;
544 token_start = loc->start;
545 BEGIN SC_STRING;
546 }
547 "/"{splice}"*" {
548 STRING_GROW;
549 context_state = YY_START;
550 token_start = loc->start;
551 BEGIN SC_COMMENT;
552 }
553 "/"{splice}"/" {
554 STRING_GROW;
555 context_state = YY_START;
556 BEGIN SC_LINE_COMMENT;
557 }
e9955c83
AD
558}
559
560
624a35e2 561
58d7a1a1
AD
562 /*-----------------------------------------------------------.
563 | Scanning some code in braces (actions). The initial "{" is |
564 | already eaten. |
565 `-----------------------------------------------------------*/
e9955c83
AD
566
567<SC_BRACED_CODE>
568{
cb823b6f
AD
569 "{"|"<"{splice}"%" STRING_GROW; nesting++;
570 "%"{splice}">" STRING_GROW; nesting--;
e9955c83 571 "}" {
25522739
PE
572 obstack_1grow (&obstack_for_string, '}');
573
cb823b6f
AD
574 --nesting;
575 if (nesting < 0)
e9955c83 576 {
41141c56 577 STRING_FINISH;
3f2d73f1 578 loc->start = code_start;
eb095650 579 val->code = last_string;
a706a1cc 580 BEGIN INITIAL;
58d7a1a1 581 return BRACED_CODE;
e9955c83
AD
582 }
583 }
584
a706a1cc
PE
585 /* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly
586 (as `<' `<%'). */
41141c56 587 "<"{splice}"<" STRING_GROW;
a706a1cc 588
47aee066
JD
589 <<EOF>> {
590 unexpected_eof (code_start, "}");
591 STRING_FINISH;
592 loc->start = code_start;
eb095650 593 val->code = last_string;
47aee066
JD
594 BEGIN INITIAL;
595 return BRACED_CODE;
596 }
e9955c83
AD
597}
598
599
600 /*--------------------------------------------------------------.
601 | Scanning some prologue: from "%{" (already scanned) to "%}". |
602 `--------------------------------------------------------------*/
603
604<SC_PROLOGUE>
605{
606 "%}" {
41141c56 607 STRING_FINISH;
3f2d73f1 608 loc->start = code_start;
223ff46e 609 val->chars = last_string;
a706a1cc 610 BEGIN INITIAL;
e9955c83
AD
611 return PROLOGUE;
612 }
613
47aee066
JD
614 <<EOF>> {
615 unexpected_eof (code_start, "%}");
616 STRING_FINISH;
617 loc->start = code_start;
618 val->chars = last_string;
619 BEGIN INITIAL;
620 return PROLOGUE;
621 }
e9955c83
AD
622}
623
624
625 /*---------------------------------------------------------------.
626 | Scanning the epilogue (everything after the second "%%", which |
d8d3f94a 627 | has already been eaten). |
e9955c83
AD
628 `---------------------------------------------------------------*/
629
630<SC_EPILOGUE>
631{
e9955c83 632 <<EOF>> {
41141c56 633 STRING_FINISH;
3f2d73f1 634 loc->start = code_start;
223ff46e 635 val->chars = last_string;
a706a1cc 636 BEGIN INITIAL;
e9955c83
AD
637 return EPILOGUE;
638 }
639}
640
641
4febdd96
PE
642 /*-----------------------------------------------------.
643 | By default, grow the string obstack with the input. |
644 `-----------------------------------------------------*/
645
646<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>. |
647<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>\n STRING_GROW;
648
e9955c83
AD
649%%
650
6c30d641
PE
651/* Read bytes from FP into buffer BUF of size SIZE. Return the
652 number of bytes read. Remove '\r' from input, treating \r\n
653 and isolated \r as \n. */
654
655static size_t
656no_cr_read (FILE *fp, char *buf, size_t size)
657{
a737b216
PE
658 size_t bytes_read = fread (buf, 1, size, fp);
659 if (bytes_read)
6c30d641 660 {
a737b216 661 char *w = memchr (buf, '\r', bytes_read);
6c30d641
PE
662 if (w)
663 {
664 char const *r = ++w;
a737b216 665 char const *lim = buf + bytes_read;
6c30d641
PE
666
667 for (;;)
668 {
669 /* Found an '\r'. Treat it like '\n', but ignore any
670 '\n' that immediately follows. */
671 w[-1] = '\n';
672 if (r == lim)
673 {
674 int ch = getc (fp);
675 if (ch != '\n' && ungetc (ch, fp) != ch)
676 break;
677 }
678 else if (*r == '\n')
679 r++;
680
681 /* Copy until the next '\r'. */
682 do
683 {
684 if (r == lim)
685 return w - buf;
686 }
687 while ((*w++ = *r++) != '\r');
688 }
689
690 return w - buf;
691 }
692 }
693
a737b216 694 return bytes_read;
6c30d641
PE
695}
696
697
f25bfb75 698
1452af69
PE
699/*------------------------------------------------------.
700| Scan NUMBER for a base-BASE integer at location LOC. |
701`------------------------------------------------------*/
702
703static unsigned long int
704scan_integer (char const *number, int base, location loc)
705{
4517da37
PE
706 verify (INT_MAX < ULONG_MAX);
707 unsigned long int num = strtoul (number, NULL, base);
708
709 if (INT_MAX < num)
1452af69
PE
710 {
711 complain_at (loc, _("integer out of range: %s"), quote (number));
712 num = INT_MAX;
713 }
4517da37 714
1452af69
PE
715 return num;
716}
717
718
d8d3f94a
PE
719/*------------------------------------------------------------------.
720| Convert universal character name UCN to a single-byte character, |
721| and return that character. Return -1 if UCN does not correspond |
722| to a single-byte character. |
723`------------------------------------------------------------------*/
724
725static int
726convert_ucn_to_byte (char const *ucn)
727{
4517da37
PE
728 verify (UCHAR_MAX <= INT_MAX);
729 unsigned long int code = strtoul (ucn + 2, NULL, 16);
d8d3f94a
PE
730
731 /* FIXME: Currently we assume Unicode-compatible unibyte characters
732 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
733 non-ASCII hosts we support only the portable C character set.
734 These limitations should be removed once we add support for
735 multibyte characters. */
736
737 if (UCHAR_MAX < code)
738 return -1;
739
740#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
741 {
742 /* A non-ASCII host. Use CODE to index into a table of the C
743 basic execution character set, which is guaranteed to exist on
744 all Standard C platforms. This table also includes '$', '@',
8e6ef483 745 and '`', which are not in the basic execution character set but
d8d3f94a
PE
746 which are unibyte characters on all the platforms that we know
747 about. */
748 static signed char const table[] =
749 {
750 '\0', -1, -1, -1, -1, -1, -1, '\a',
751 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
752 -1, -1, -1, -1, -1, -1, -1, -1,
753 -1, -1, -1, -1, -1, -1, -1, -1,
754 ' ', '!', '"', '#', '$', '%', '&', '\'',
755 '(', ')', '*', '+', ',', '-', '.', '/',
756 '0', '1', '2', '3', '4', '5', '6', '7',
757 '8', '9', ':', ';', '<', '=', '>', '?',
758 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
759 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
760 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
761 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
762 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
763 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
764 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
765 'x', 'y', 'z', '{', '|', '}', '~'
766 };
767
768 code = code < sizeof table ? table[code] : -1;
769 }
770#endif
c4d720cd 771
d8d3f94a
PE
772 return code;
773}
774
775
900c5db5
AD
776/*----------------------------------------------------------------.
777| Handle `#line INT "FILE"'. ARGS has already skipped `#line '. |
778`----------------------------------------------------------------*/
779
780static void
4517da37 781handle_syncline (char *args, location loc)
900c5db5 782{
4517da37
PE
783 char *after_num;
784 unsigned long int lineno = strtoul (args, &after_num, 10);
785 char *file = strchr (after_num, '"') + 1;
786 *strchr (file, '"') = '\0';
787 if (INT_MAX <= lineno)
788 {
789 warn_at (loc, _("line number overflow"));
790 lineno = INT_MAX;
791 }
e9071366 792 current_file = uniqstr_new (file);
0c8e079f 793 boundary_set (&scanner_cursor, current_file, lineno, 1);
4517da37
PE
794}
795
796
4febdd96
PE
797/*----------------------------------------------------------------.
798| For a token or comment starting at START, report message MSGID, |
799| which should say that an end marker was found before |
800| the expected TOKEN_END. |
801`----------------------------------------------------------------*/
802
803static void
804unexpected_end (boundary start, char const *msgid, char const *token_end)
805{
806 location loc;
807 loc.start = start;
808 loc.end = scanner_cursor;
809 complain_at (loc, _(msgid), token_end);
810}
811
812
3f2d73f1
PE
813/*------------------------------------------------------------------------.
814| Report an unexpected EOF in a token or comment starting at START. |
815| An end of file was encountered and the expected TOKEN_END was missing. |
3f2d73f1 816`------------------------------------------------------------------------*/
a706a1cc
PE
817
818static void
aa418041 819unexpected_eof (boundary start, char const *token_end)
a706a1cc 820{
4febdd96
PE
821 unexpected_end (start, N_("missing `%s' at end of file"), token_end);
822}
823
824
825/*----------------------------------------.
826| Likewise, but for unexpected newlines. |
827`----------------------------------------*/
828
829static void
830unexpected_newline (boundary start, char const *token_end)
831{
832 unexpected_end (start, N_("missing `%s' at end of line"), token_end);
a706a1cc
PE
833}
834
835
f25bfb75
AD
836/*-------------------------.
837| Initialize the scanner. |
838`-------------------------*/
839
1d6412ad 840void
e9071366 841gram_scanner_initialize (void)
1d6412ad 842{
223ff46e 843 obstack_init (&obstack_for_string);
1d6412ad
AD
844}
845
846
f25bfb75
AD
847/*-----------------------------------------------.
848| Free all the memory allocated to the scanner. |
849`-----------------------------------------------*/
850
4cdb01db 851void
e9071366 852gram_scanner_free (void)
4cdb01db 853{
223ff46e 854 obstack_free (&obstack_for_string, 0);
536545f3 855 /* Reclaim Flex's buffers. */
580b8926 856 yylex_destroy ();
4cdb01db 857}