]> git.saurik.com Git - bison.git/blame - src/scan-gram.l
Document how `%define "var" "value"' is not M4-friendly.
[bison.git] / src / scan-gram.l
CommitLineData
e9955c83 1/* Bison Grammar Scanner -*- C -*-
3b1e470c 2
ba061fa6
AD
3 Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
4 Free Software Foundation, Inc.
e9955c83
AD
5
6 This file is part of Bison, the GNU Compiler Compiler.
7
f16b0819 8 This program is free software: you can redistribute it and/or modify
e9955c83 9 it under the terms of the GNU General Public License as published by
f16b0819 10 the Free Software Foundation, either version 3 of the License, or
e9955c83
AD
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
f16b0819 19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
e9955c83 20
aa418041 21%option debug nodefault nounput noyywrap never-interactive
e9955c83
AD
22%option prefix="gram_" outfile="lex.yy.c"
23
24%{
4f6e011e
PE
25/* Work around a bug in flex 2.5.31. See Debian bug 333231
26 <http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=333231>. */
27#undef gram_wrap
28#define gram_wrap() 1
29
e9071366 30#define FLEX_PREFIX(Id) gram_ ## Id
0305d25e 31#include <src/flex-scanner.h>
223ff46e 32
0305d25e
AD
33#include <src/complain.h>
34#include <src/files.h>
35#include <src/gram.h>
36#include <quotearg.h>
37#include <src/reader.h>
38#include <src/uniqstr.h>
e9955c83 39
e9071366
AD
40#include <mbswidth.h>
41#include <quote.h>
42
0305d25e 43#include <src/scan-gram.h>
e9071366
AD
44
45#define YY_DECL GRAM_LEX_DECL
2346344a 46
3f2d73f1 47#define YY_USER_INIT \
e9071366 48 code_start = scanner_cursor = loc->start; \
dc9701e8 49
3f2d73f1 50/* Location of scanner cursor. */
4a678af8 51static boundary scanner_cursor;
41141c56 52
e9071366 53#define YY_USER_ACTION location_compute (loc, &scanner_cursor, yytext, yyleng);
d8d3f94a 54
6c30d641 55static size_t no_cr_read (FILE *, char *, size_t);
d8d3f94a
PE
56#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
57
ba061fa6
AD
58#define RETURN_PERCENT_FLAG(Value) \
59 do { \
60 val->uniqstr = uniqstr_new (Value); \
61 return PERCENT_FLAG; \
62 } while (0)
63
64
7ec2d4cd 65/* A string representing the most recently saved token. */
7c0c6181 66static char *last_string;
7ec2d4cd 67
7ec2d4cd 68void
e9071366 69gram_scanner_last_string_free (void)
7ec2d4cd 70{
41141c56 71 STRING_FREE;
7ec2d4cd 72}
e9955c83 73
4517da37 74static void handle_syncline (char *, location);
1452af69 75static unsigned long int scan_integer (char const *p, int base, location loc);
d8d3f94a 76static int convert_ucn_to_byte (char const *hex_text);
aa418041 77static void unexpected_eof (boundary, char const *);
4febdd96 78static void unexpected_newline (boundary, char const *);
e9955c83
AD
79
80%}
e9071366
AD
81 /* A C-like comment in directives/rules. */
82%x SC_YACC_COMMENT
83 /* Strings and characters in directives/rules. */
e9955c83 84%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
e9071366
AD
85 /* A identifier was just read in directives/rules. Special state
86 to capture the sequence `identifier :'. */
87%x SC_AFTER_IDENTIFIER
cb823b6f
AD
88 /* A complex tag, with nested angles brackets. */
89%x SC_TAG
e9071366
AD
90
91 /* Three types of user code:
92 - prologue (code between `%{' `%}' in the first section, before %%);
93 - actions, printers, union, etc, (between braced in the middle section);
94 - epilogue (everything after the second %%). */
95%x SC_PROLOGUE SC_BRACED_CODE SC_EPILOGUE
96 /* C and C++ comments in code. */
97%x SC_COMMENT SC_LINE_COMMENT
98 /* Strings and characters in code. */
99%x SC_STRING SC_CHARACTER
e9955c83 100
29c01725 101letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
4f646c37
AD
102id {letter}({letter}|[0-9]|-)*
103directive %{id}
624a35e2 104int [0-9]+
d8d3f94a
PE
105
106/* POSIX says that a tag must be both an id and a C union member, but
107 historically almost any character is allowed in a tag. We disallow
cb823b6f
AD
108 NUL, as this simplifies our implementation. We disallow angle
109 bracket to match them in nested pairs: several languages use them
110 for generics/template types. */
111tag [^\0<>]+
d8d3f94a
PE
112
113/* Zero or more instances of backslash-newline. Following GCC, allow
114 white space between the backslash and the newline. */
115splice (\\[ \f\t\v]*\n)*
e9955c83
AD
116
117%%
118%{
cb823b6f
AD
119 /* Nesting level. Either for nested braces, or nested angle brackets
120 (but not mixed). */
121 int nesting IF_LINT (= 0);
1a9e39f1 122
3f2d73f1 123 /* Parent context state, when applicable. */
5362ed19 124 int context_state IF_LINT (= 0);
a706a1cc 125
3f2d73f1 126 /* Location of most recent identifier, when applicable. */
a2bc9dbc 127 location id_loc IF_LINT (= empty_location);
3f2d73f1 128
a2bc9dbc
PE
129 /* Where containing code started, when applicable. Its initial
130 value is relevant only when yylex is invoked in the SC_EPILOGUE
131 start condition. */
132 boundary code_start = scanner_cursor;
3f2d73f1 133
223ff46e
PE
134 /* Where containing comment or string or character literal started,
135 when applicable. */
a2bc9dbc 136 boundary token_start IF_LINT (= scanner_cursor);
e9955c83
AD
137%}
138
139
3f2d73f1
PE
140 /*-----------------------.
141 | Scanning white space. |
142 `-----------------------*/
143
58d7a1a1 144<INITIAL,SC_AFTER_IDENTIFIER>
3f2d73f1 145{
4febdd96 146 /* Comments and white space. */
83adb046 147 "," warn_at (*loc, _("stray `,' treated as white space"));
4febdd96 148 [ \f\n\t\v] |
3f2d73f1 149 "//".* ;
83adb046
PE
150 "/*" {
151 token_start = loc->start;
152 context_state = YY_START;
153 BEGIN SC_YACC_COMMENT;
154 }
3f2d73f1
PE
155
156 /* #line directives are not documented, and may be withdrawn or
157 modified in future versions of Bison. */
158 ^"#line "{int}" \"".*"\"\n" {
4517da37 159 handle_syncline (yytext + sizeof "#line " - 1, *loc);
3f2d73f1
PE
160 }
161}
162
163
e9955c83
AD
164 /*----------------------------.
165 | Scanning Bison directives. |
166 `----------------------------*/
a7c09cba
DJ
167
168 /* For directives that are also command line options, the regex must be
169 "%..."
170 after "[-_]"s are removed, and the directive must match the --long
171 option name, with a single string argument. Otherwise, add exceptions
172 to ../build-aux/cross-options.pl. */
173
e9955c83
AD
174<INITIAL>
175{
deef2a0a 176 "%binary" return PERCENT_NONASSOC;
136a0f76 177 "%code" return PERCENT_CODE;
fa819509 178 "%debug" RETURN_PERCENT_FLAG("parse.trace");
deef2a0a
AD
179 "%default"[-_]"prec" return PERCENT_DEFAULT_PREC;
180 "%define" return PERCENT_DEFINE;
181 "%defines" return PERCENT_DEFINES;
182 "%destructor" return PERCENT_DESTRUCTOR;
183 "%dprec" return PERCENT_DPREC;
71b00ed8 184 "%error"[-_]"verbose" RETURN_PERCENT_FLAG("error-verbose");
deef2a0a
AD
185 "%expect" return PERCENT_EXPECT;
186 "%expect"[-_]"rr" return PERCENT_EXPECT_RR;
187 "%file-prefix" return PERCENT_FILE_PREFIX;
e9955c83 188 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
deef2a0a
AD
189 "%initial-action" return PERCENT_INITIAL_ACTION;
190 "%glr-parser" return PERCENT_GLR_PARSER;
191 "%language" return PERCENT_LANGUAGE;
192 "%left" return PERCENT_LEFT;
193 "%lex-param" return PERCENT_LEX_PARAM;
bc0f5737 194 "%locations" RETURN_PERCENT_FLAG("locations");
deef2a0a
AD
195 "%merge" return PERCENT_MERGE;
196 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
197 "%no"[-_]"default"[-_]"prec" return PERCENT_NO_DEFAULT_PREC;
198 "%no"[-_]"lines" return PERCENT_NO_LINES;
199 "%nonassoc" return PERCENT_NONASSOC;
200 "%nondeterministic-parser" return PERCENT_NONDETERMINISTIC_PARSER;
201 "%nterm" return PERCENT_NTERM;
202 "%output" return PERCENT_OUTPUT;
203 "%parse-param" return PERCENT_PARSE_PARAM;
204 "%prec" return PERCENT_PREC;
d78f0ac9 205 "%precedence" return PERCENT_PRECEDENCE;
deef2a0a 206 "%printer" return PERCENT_PRINTER;
4920ae8b 207 "%pure"[-_]"parser" RETURN_PERCENT_FLAG("api.pure");
deef2a0a
AD
208 "%require" return PERCENT_REQUIRE;
209 "%right" return PERCENT_RIGHT;
210 "%skeleton" return PERCENT_SKELETON;
211 "%start" return PERCENT_START;
212 "%term" return PERCENT_TOKEN;
213 "%token" return PERCENT_TOKEN;
214 "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
215 "%type" return PERCENT_TYPE;
216 "%union" return PERCENT_UNION;
217 "%verbose" return PERCENT_VERBOSE;
218 "%yacc" return PERCENT_YACC;
e9955c83 219
3f2d73f1 220 {directive} {
41141c56 221 complain_at (*loc, _("invalid directive: %s"), quote (yytext));
412f8a59 222 }
900c5db5 223
e9955c83 224 "=" return EQUAL;
e9071366 225 "|" return PIPE;
e9955c83
AD
226 ";" return SEMICOLON;
227
3f2d73f1 228 {id} {
58d7a1a1 229 val->uniqstr = uniqstr_new (yytext);
3f2d73f1 230 id_loc = *loc;
3f2d73f1 231 BEGIN SC_AFTER_IDENTIFIER;
e9955c83
AD
232 }
233
d8d3f94a 234 {int} {
1452af69
PE
235 val->integer = scan_integer (yytext, 10, *loc);
236 return INT;
237 }
238 0[xX][0-9abcdefABCDEF]+ {
239 val->integer = scan_integer (yytext, 16, *loc);
d8d3f94a
PE
240 return INT;
241 }
e9955c83
AD
242
243 /* Characters. We don't check there is only one. */
3f2d73f1 244 "'" STRING_GROW; token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
e9955c83
AD
245
246 /* Strings. */
ca407bdf 247 "\"" token_start = loc->start; BEGIN SC_ESCAPED_STRING;
e9955c83
AD
248
249 /* Prologue. */
3f2d73f1 250 "%{" code_start = loc->start; BEGIN SC_PROLOGUE;
e9955c83
AD
251
252 /* Code in between braces. */
3f2d73f1
PE
253 "{" {
254 STRING_GROW;
cb823b6f 255 nesting = 0;
3f2d73f1
PE
256 code_start = loc->start;
257 BEGIN SC_BRACED_CODE;
258 }
e9955c83
AD
259
260 /* A type. */
cb823b6f
AD
261 "<*>" return TAG_ANY;
262 "<>" return TAG_NONE;
d8d3f94a 263 "<"{tag}">" {
223ff46e 264 obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
41141c56 265 STRING_FINISH;
223ff46e 266 val->uniqstr = uniqstr_new (last_string);
41141c56 267 STRING_FREE;
cb823b6f
AD
268 return TAG;
269 }
270 "<" {
271 nesting = 0;
272 token_start = loc->start;
273 BEGIN SC_TAG;
4cdb01db
AD
274 }
275
a706a1cc
PE
276 "%%" {
277 static int percent_percent_count;
e9955c83 278 if (++percent_percent_count == 2)
a2bc9dbc 279 BEGIN SC_EPILOGUE;
e9955c83
AD
280 return PERCENT_PERCENT;
281 }
282
a706a1cc 283 . {
41141c56 284 complain_at (*loc, _("invalid character: %s"), quote (yytext));
3f2d73f1 285 }
379f0ac8
PE
286
287 <<EOF>> {
288 loc->start = loc->end = scanner_cursor;
289 yyterminate ();
290 }
3f2d73f1
PE
291}
292
293
cb823b6f
AD
294 /*--------------------------------------------------------------.
295 | Supporting \0 complexifies our implementation for no expected |
296 | added value. |
297 `--------------------------------------------------------------*/
298
299<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING,SC_TAG>
300{
301 \0 complain_at (*loc, _("invalid null character"));
302}
303
304
3f2d73f1
PE
305 /*-----------------------------------------------------------------.
306 | Scanning after an identifier, checking whether a colon is next. |
307 `-----------------------------------------------------------------*/
308
309<SC_AFTER_IDENTIFIER>
310{
311 ":" {
3f2d73f1
PE
312 *loc = id_loc;
313 BEGIN INITIAL;
314 return ID_COLON;
315 }
316 . {
317 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
318 yyless (0);
319 *loc = id_loc;
320 BEGIN INITIAL;
321 return ID;
322 }
323 <<EOF>> {
324 *loc = id_loc;
325 BEGIN INITIAL;
326 return ID;
e9955c83
AD
327 }
328}
329
330
d8d3f94a
PE
331 /*---------------------------------------------------------------.
332 | Scanning a Yacc comment. The initial `/ *' is already eaten. |
333 `---------------------------------------------------------------*/
e9955c83 334
d8d3f94a 335<SC_YACC_COMMENT>
e9955c83 336{
3f2d73f1 337 "*/" BEGIN context_state;
a706a1cc 338 .|\n ;
aa418041 339 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
d8d3f94a
PE
340}
341
342
343 /*------------------------------------------------------------.
344 | Scanning a C comment. The initial `/ *' is already eaten. |
345 `------------------------------------------------------------*/
346
347<SC_COMMENT>
348{
3f2d73f1 349 "*"{splice}"/" STRING_GROW; BEGIN context_state;
aa418041 350 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
e9955c83
AD
351}
352
353
d8d3f94a
PE
354 /*--------------------------------------------------------------.
355 | Scanning a line comment. The initial `//' is already eaten. |
356 `--------------------------------------------------------------*/
357
358<SC_LINE_COMMENT>
359{
3f2d73f1 360 "\n" STRING_GROW; BEGIN context_state;
41141c56 361 {splice} STRING_GROW;
3f2d73f1 362 <<EOF>> BEGIN context_state;
d8d3f94a
PE
363}
364
365
4febdd96
PE
366 /*------------------------------------------------.
367 | Scanning a Bison string, including its escapes. |
368 | The initial quote is already eaten. |
369 `------------------------------------------------*/
e9955c83
AD
370
371<SC_ESCAPED_STRING>
372{
47aee066
JD
373 "\""|"\n" {
374 if (yytext[0] == '\n')
375 unexpected_newline (token_start, "\"");
376 STRING_FINISH;
377 loc->start = token_start;
378 val->chars = last_string;
379 BEGIN INITIAL;
380 return STRING;
381 }
382 <<EOF>> {
383 unexpected_eof (token_start, "\"");
41141c56 384 STRING_FINISH;
3f2d73f1 385 loc->start = token_start;
223ff46e 386 val->chars = last_string;
a706a1cc 387 BEGIN INITIAL;
e9955c83
AD
388 return STRING;
389 }
e9955c83
AD
390}
391
4febdd96
PE
392 /*----------------------------------------------------------.
393 | Scanning a Bison character literal, decoding its escapes. |
394 | The initial quote is already eaten. |
395 `----------------------------------------------------------*/
e9955c83
AD
396
397<SC_ESCAPED_CHARACTER>
398{
47aee066
JD
399 "'"|"\n" {
400 if (yytext[0] == '\n')
401 unexpected_newline (token_start, "'");
41141c56
PE
402 STRING_GROW;
403 STRING_FINISH;
3f2d73f1 404 loc->start = token_start;
58d7a1a1 405 val->character = last_string[1];
41141c56 406 STRING_FREE;
a706a1cc 407 BEGIN INITIAL;
58d7a1a1 408 return CHAR;
e9955c83 409 }
47aee066
JD
410 <<EOF>> {
411 unexpected_eof (token_start, "'");
412 STRING_FINISH;
413 loc->start = token_start;
4f646c37 414 if (strlen (last_string) > 1)
47aee066
JD
415 val->character = last_string[1];
416 else
417 val->character = last_string[0];
418 STRING_FREE;
419 BEGIN INITIAL;
420 return CHAR;
421 }
4febdd96 422}
a706a1cc 423
cb823b6f
AD
424 /*-----------------------------------------------------------.
425 | Scanning a Bison nested tag. The initial angle bracket is |
426 | already eaten. |
427 `-----------------------------------------------------------*/
428
429<SC_TAG>
4febdd96 430{
cb823b6f
AD
431 ">" {
432 --nesting;
433 if (nesting < 0)
434 {
435 STRING_FINISH;
436 loc->start = token_start;
437 val->uniqstr = uniqstr_new (last_string);
438 STRING_FREE;
439 BEGIN INITIAL;
440 return TAG;
441 }
442 STRING_GROW;
443 }
444
445 [^<>]+ STRING_GROW;
446 "<"+ STRING_GROW; nesting += yyleng;
e9955c83 447
cb823b6f
AD
448 <<EOF>> {
449 unexpected_eof (token_start, ">");
450 STRING_FINISH;
451 loc->start = token_start;
452 val->uniqstr = uniqstr_new (last_string);
453 STRING_FREE;
454 BEGIN INITIAL;
455 return TAG;
456 }
457}
e9955c83
AD
458
459 /*----------------------------.
460 | Decode escaped characters. |
461 `----------------------------*/
462
463<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
464{
d8d3f94a 465 \\[0-7]{1,3} {
4517da37 466 unsigned long int c = strtoul (yytext + 1, NULL, 8);
d8d3f94a 467 if (UCHAR_MAX < c)
3f2d73f1 468 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
05ac60f3 469 else if (! c)
92ac3705 470 complain_at (*loc, _("invalid null character: %s"), quote (yytext));
e9955c83 471 else
223ff46e 472 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
473 }
474
6b0d38ab 475 \\x[0-9abcdefABCDEF]+ {
4517da37
PE
476 verify (UCHAR_MAX < ULONG_MAX);
477 unsigned long int c = strtoul (yytext + 2, NULL, 16);
478 if (UCHAR_MAX < c)
3f2d73f1 479 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
92ac3705
PE
480 else if (! c)
481 complain_at (*loc, _("invalid null character: %s"), quote (yytext));
d8d3f94a 482 else
223ff46e 483 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
484 }
485
223ff46e
PE
486 \\a obstack_1grow (&obstack_for_string, '\a');
487 \\b obstack_1grow (&obstack_for_string, '\b');
488 \\f obstack_1grow (&obstack_for_string, '\f');
489 \\n obstack_1grow (&obstack_for_string, '\n');
490 \\r obstack_1grow (&obstack_for_string, '\r');
491 \\t obstack_1grow (&obstack_for_string, '\t');
492 \\v obstack_1grow (&obstack_for_string, '\v');
412f8a59
PE
493
494 /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
223ff46e 495 \\("\""|"'"|"?"|"\\") obstack_1grow (&obstack_for_string, yytext[1]);
412f8a59 496
6b0d38ab 497 \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
d8d3f94a
PE
498 int c = convert_ucn_to_byte (yytext);
499 if (c < 0)
3f2d73f1 500 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
92ac3705
PE
501 else if (! c)
502 complain_at (*loc, _("invalid null character: %s"), quote (yytext));
d8d3f94a 503 else
223ff46e 504 obstack_1grow (&obstack_for_string, c);
d8d3f94a 505 }
4f25ebb0 506 \\(.|\n) {
3f2d73f1 507 complain_at (*loc, _("unrecognized escape sequence: %s"), quote (yytext));
41141c56 508 STRING_GROW;
e9955c83
AD
509 }
510}
511
4febdd96
PE
512 /*--------------------------------------------.
513 | Scanning user-code characters and strings. |
514 `--------------------------------------------*/
e9955c83 515
4febdd96
PE
516<SC_CHARACTER,SC_STRING>
517{
e9071366 518 {splice}|\\{splice}[^\n\[\]] STRING_GROW;
4febdd96 519}
e9955c83
AD
520
521<SC_CHARACTER>
522{
4febdd96
PE
523 "'" STRING_GROW; BEGIN context_state;
524 \n unexpected_newline (token_start, "'"); BEGIN context_state;
525 <<EOF>> unexpected_eof (token_start, "'"); BEGIN context_state;
e9955c83
AD
526}
527
e9955c83
AD
528<SC_STRING>
529{
4febdd96
PE
530 "\"" STRING_GROW; BEGIN context_state;
531 \n unexpected_newline (token_start, "\""); BEGIN context_state;
532 <<EOF>> unexpected_eof (token_start, "\""); BEGIN context_state;
e9955c83
AD
533}
534
535
536 /*---------------------------------------------------.
537 | Strings, comments etc. can be found in user code. |
538 `---------------------------------------------------*/
539
540<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
541{
3f2d73f1
PE
542 "'" {
543 STRING_GROW;
544 context_state = YY_START;
545 token_start = loc->start;
546 BEGIN SC_CHARACTER;
547 }
548 "\"" {
549 STRING_GROW;
550 context_state = YY_START;
551 token_start = loc->start;
552 BEGIN SC_STRING;
553 }
554 "/"{splice}"*" {
555 STRING_GROW;
556 context_state = YY_START;
557 token_start = loc->start;
558 BEGIN SC_COMMENT;
559 }
560 "/"{splice}"/" {
561 STRING_GROW;
562 context_state = YY_START;
563 BEGIN SC_LINE_COMMENT;
564 }
e9955c83
AD
565}
566
567
624a35e2 568
58d7a1a1
AD
569 /*-----------------------------------------------------------.
570 | Scanning some code in braces (actions). The initial "{" is |
571 | already eaten. |
572 `-----------------------------------------------------------*/
e9955c83
AD
573
574<SC_BRACED_CODE>
575{
cb823b6f
AD
576 "{"|"<"{splice}"%" STRING_GROW; nesting++;
577 "%"{splice}">" STRING_GROW; nesting--;
e9955c83 578 "}" {
25522739
PE
579 obstack_1grow (&obstack_for_string, '}');
580
cb823b6f
AD
581 --nesting;
582 if (nesting < 0)
e9955c83 583 {
41141c56 584 STRING_FINISH;
3f2d73f1 585 loc->start = code_start;
eb095650 586 val->code = last_string;
a706a1cc 587 BEGIN INITIAL;
58d7a1a1 588 return BRACED_CODE;
e9955c83
AD
589 }
590 }
591
a706a1cc
PE
592 /* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly
593 (as `<' `<%'). */
41141c56 594 "<"{splice}"<" STRING_GROW;
a706a1cc 595
47aee066
JD
596 <<EOF>> {
597 unexpected_eof (code_start, "}");
598 STRING_FINISH;
599 loc->start = code_start;
eb095650 600 val->code = last_string;
47aee066
JD
601 BEGIN INITIAL;
602 return BRACED_CODE;
603 }
e9955c83
AD
604}
605
606
607 /*--------------------------------------------------------------.
608 | Scanning some prologue: from "%{" (already scanned) to "%}". |
609 `--------------------------------------------------------------*/
610
611<SC_PROLOGUE>
612{
613 "%}" {
41141c56 614 STRING_FINISH;
3f2d73f1 615 loc->start = code_start;
223ff46e 616 val->chars = last_string;
a706a1cc 617 BEGIN INITIAL;
e9955c83
AD
618 return PROLOGUE;
619 }
620
47aee066
JD
621 <<EOF>> {
622 unexpected_eof (code_start, "%}");
623 STRING_FINISH;
624 loc->start = code_start;
625 val->chars = last_string;
626 BEGIN INITIAL;
627 return PROLOGUE;
628 }
e9955c83
AD
629}
630
631
632 /*---------------------------------------------------------------.
633 | Scanning the epilogue (everything after the second "%%", which |
d8d3f94a 634 | has already been eaten). |
e9955c83
AD
635 `---------------------------------------------------------------*/
636
637<SC_EPILOGUE>
638{
e9955c83 639 <<EOF>> {
41141c56 640 STRING_FINISH;
3f2d73f1 641 loc->start = code_start;
223ff46e 642 val->chars = last_string;
a706a1cc 643 BEGIN INITIAL;
e9955c83
AD
644 return EPILOGUE;
645 }
646}
647
648
4febdd96
PE
649 /*-----------------------------------------------------.
650 | By default, grow the string obstack with the input. |
651 `-----------------------------------------------------*/
652
653<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>. |
654<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>\n STRING_GROW;
655
e9955c83
AD
656%%
657
6c30d641
PE
658/* Read bytes from FP into buffer BUF of size SIZE. Return the
659 number of bytes read. Remove '\r' from input, treating \r\n
660 and isolated \r as \n. */
661
662static size_t
663no_cr_read (FILE *fp, char *buf, size_t size)
664{
a737b216
PE
665 size_t bytes_read = fread (buf, 1, size, fp);
666 if (bytes_read)
6c30d641 667 {
a737b216 668 char *w = memchr (buf, '\r', bytes_read);
6c30d641
PE
669 if (w)
670 {
671 char const *r = ++w;
a737b216 672 char const *lim = buf + bytes_read;
6c30d641
PE
673
674 for (;;)
675 {
676 /* Found an '\r'. Treat it like '\n', but ignore any
677 '\n' that immediately follows. */
678 w[-1] = '\n';
679 if (r == lim)
680 {
681 int ch = getc (fp);
682 if (ch != '\n' && ungetc (ch, fp) != ch)
683 break;
684 }
685 else if (*r == '\n')
686 r++;
687
688 /* Copy until the next '\r'. */
689 do
690 {
691 if (r == lim)
692 return w - buf;
693 }
694 while ((*w++ = *r++) != '\r');
695 }
696
697 return w - buf;
698 }
699 }
700
a737b216 701 return bytes_read;
6c30d641
PE
702}
703
704
f25bfb75 705
1452af69
PE
706/*------------------------------------------------------.
707| Scan NUMBER for a base-BASE integer at location LOC. |
708`------------------------------------------------------*/
709
710static unsigned long int
711scan_integer (char const *number, int base, location loc)
712{
4517da37
PE
713 verify (INT_MAX < ULONG_MAX);
714 unsigned long int num = strtoul (number, NULL, base);
715
716 if (INT_MAX < num)
1452af69
PE
717 {
718 complain_at (loc, _("integer out of range: %s"), quote (number));
719 num = INT_MAX;
720 }
4517da37 721
1452af69
PE
722 return num;
723}
724
725
d8d3f94a
PE
726/*------------------------------------------------------------------.
727| Convert universal character name UCN to a single-byte character, |
728| and return that character. Return -1 if UCN does not correspond |
729| to a single-byte character. |
730`------------------------------------------------------------------*/
731
732static int
733convert_ucn_to_byte (char const *ucn)
734{
4517da37
PE
735 verify (UCHAR_MAX <= INT_MAX);
736 unsigned long int code = strtoul (ucn + 2, NULL, 16);
d8d3f94a
PE
737
738 /* FIXME: Currently we assume Unicode-compatible unibyte characters
739 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
740 non-ASCII hosts we support only the portable C character set.
741 These limitations should be removed once we add support for
742 multibyte characters. */
743
744 if (UCHAR_MAX < code)
745 return -1;
746
747#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
748 {
749 /* A non-ASCII host. Use CODE to index into a table of the C
750 basic execution character set, which is guaranteed to exist on
751 all Standard C platforms. This table also includes '$', '@',
8e6ef483 752 and '`', which are not in the basic execution character set but
d8d3f94a
PE
753 which are unibyte characters on all the platforms that we know
754 about. */
755 static signed char const table[] =
756 {
757 '\0', -1, -1, -1, -1, -1, -1, '\a',
758 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
759 -1, -1, -1, -1, -1, -1, -1, -1,
760 -1, -1, -1, -1, -1, -1, -1, -1,
761 ' ', '!', '"', '#', '$', '%', '&', '\'',
762 '(', ')', '*', '+', ',', '-', '.', '/',
763 '0', '1', '2', '3', '4', '5', '6', '7',
764 '8', '9', ':', ';', '<', '=', '>', '?',
765 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
766 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
767 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
768 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
769 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
770 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
771 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
772 'x', 'y', 'z', '{', '|', '}', '~'
773 };
774
775 code = code < sizeof table ? table[code] : -1;
776 }
777#endif
c4d720cd 778
d8d3f94a
PE
779 return code;
780}
781
782
900c5db5
AD
783/*----------------------------------------------------------------.
784| Handle `#line INT "FILE"'. ARGS has already skipped `#line '. |
785`----------------------------------------------------------------*/
786
787static void
4517da37 788handle_syncline (char *args, location loc)
900c5db5 789{
4517da37
PE
790 char *after_num;
791 unsigned long int lineno = strtoul (args, &after_num, 10);
792 char *file = strchr (after_num, '"') + 1;
793 *strchr (file, '"') = '\0';
794 if (INT_MAX <= lineno)
795 {
796 warn_at (loc, _("line number overflow"));
797 lineno = INT_MAX;
798 }
e9071366 799 current_file = uniqstr_new (file);
0c8e079f 800 boundary_set (&scanner_cursor, current_file, lineno, 1);
4517da37
PE
801}
802
803
4febdd96
PE
804/*----------------------------------------------------------------.
805| For a token or comment starting at START, report message MSGID, |
806| which should say that an end marker was found before |
807| the expected TOKEN_END. |
808`----------------------------------------------------------------*/
809
810static void
811unexpected_end (boundary start, char const *msgid, char const *token_end)
812{
813 location loc;
814 loc.start = start;
815 loc.end = scanner_cursor;
816 complain_at (loc, _(msgid), token_end);
817}
818
819
3f2d73f1
PE
820/*------------------------------------------------------------------------.
821| Report an unexpected EOF in a token or comment starting at START. |
822| An end of file was encountered and the expected TOKEN_END was missing. |
3f2d73f1 823`------------------------------------------------------------------------*/
a706a1cc
PE
824
825static void
aa418041 826unexpected_eof (boundary start, char const *token_end)
a706a1cc 827{
4febdd96
PE
828 unexpected_end (start, N_("missing `%s' at end of file"), token_end);
829}
830
831
832/*----------------------------------------.
833| Likewise, but for unexpected newlines. |
834`----------------------------------------*/
835
836static void
837unexpected_newline (boundary start, char const *token_end)
838{
839 unexpected_end (start, N_("missing `%s' at end of line"), token_end);
a706a1cc
PE
840}
841
842
f25bfb75
AD
843/*-------------------------.
844| Initialize the scanner. |
845`-------------------------*/
846
1d6412ad 847void
e9071366 848gram_scanner_initialize (void)
1d6412ad 849{
223ff46e 850 obstack_init (&obstack_for_string);
1d6412ad
AD
851}
852
853
f25bfb75
AD
854/*-----------------------------------------------.
855| Free all the memory allocated to the scanner. |
856`-----------------------------------------------*/
857
4cdb01db 858void
e9071366 859gram_scanner_free (void)
4cdb01db 860{
223ff46e 861 obstack_free (&obstack_for_string, 0);
536545f3 862 /* Reclaim Flex's buffers. */
580b8926 863 yylex_destroy ();
4cdb01db 864}