]> git.saurik.com Git - bison.git/blame - src/scan-gram.l
doc: explain how mid-rule actions are translated
[bison.git] / src / scan-gram.l
CommitLineData
e9955c83 1/* Bison Grammar Scanner -*- C -*-
3b1e470c 2
c932d613 3 Copyright (C) 2002-2012 Free Software Foundation, Inc.
e9955c83
AD
4
5 This file is part of Bison, the GNU Compiler Compiler.
6
f16b0819 7 This program is free software: you can redistribute it and/or modify
e9955c83 8 it under the terms of the GNU General Public License as published by
f16b0819 9 the Free Software Foundation, either version 3 of the License, or
e9955c83
AD
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
f16b0819 18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
e9955c83 19
4521fcdf 20%option debug nodefault noinput nounput noyywrap never-interactive
e9955c83
AD
21%option prefix="gram_" outfile="lex.yy.c"
22
23%{
4f6e011e
PE
24/* Work around a bug in flex 2.5.31. See Debian bug 333231
25 <http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=333231>. */
26#undef gram_wrap
27#define gram_wrap() 1
28
e9071366
AD
29#define FLEX_PREFIX(Id) gram_ ## Id
30#include "flex-scanner.h"
223ff46e 31
e9955c83 32#include "complain.h"
3f2d73f1 33#include "files.h"
e9955c83 34#include "gram.h"
ca407bdf 35#include "quotearg.h"
e9955c83 36#include "reader.h"
223ff46e 37#include "uniqstr.h"
e9955c83 38
457bf919 39#include <c-ctype.h>
e9071366
AD
40#include <mbswidth.h>
41#include <quote.h>
42
43#include "scan-gram.h"
44
45#define YY_DECL GRAM_LEX_DECL
2346344a 46
3f2d73f1 47#define YY_USER_INIT \
e9071366 48 code_start = scanner_cursor = loc->start; \
dc9701e8 49
3f2d73f1 50/* Location of scanner cursor. */
4a678af8 51static boundary scanner_cursor;
41141c56 52
e9071366 53#define YY_USER_ACTION location_compute (loc, &scanner_cursor, yytext, yyleng);
d8d3f94a 54
6c30d641 55static size_t no_cr_read (FILE *, char *, size_t);
d8d3f94a
PE
56#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
57
7685e2f7
AR
58#define ROLLBACK_CURRENT_TOKEN \
59 do { \
60 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0); \
61 yyless (0); \
62 } while (0)
63
7ec2d4cd 64/* A string representing the most recently saved token. */
7c0c6181 65static char *last_string;
7ec2d4cd 66
d5e8574b 67/* Bracketed identifier. */
7685e2f7
AR
68static uniqstr bracketed_id_str = 0;
69static location bracketed_id_loc;
70static boundary bracketed_id_start;
71static int bracketed_id_context_state = 0;
72
7ec2d4cd 73void
e9071366 74gram_scanner_last_string_free (void)
7ec2d4cd 75{
41141c56 76 STRING_FREE;
7ec2d4cd 77}
e9955c83 78
4517da37 79static void handle_syncline (char *, location);
1452af69 80static unsigned long int scan_integer (char const *p, int base, location loc);
d8d3f94a 81static int convert_ucn_to_byte (char const *hex_text);
aa418041 82static void unexpected_eof (boundary, char const *);
4febdd96 83static void unexpected_newline (boundary, char const *);
e9955c83
AD
84
85%}
e9071366
AD
86 /* A C-like comment in directives/rules. */
87%x SC_YACC_COMMENT
88 /* Strings and characters in directives/rules. */
e9955c83 89%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
e9071366 90 /* A identifier was just read in directives/rules. Special state
9874f80b 91 to capture the sequence 'identifier :'. */
e9071366 92%x SC_AFTER_IDENTIFIER
e9071366
AD
93
94 /* Three types of user code:
9874f80b 95 - prologue (code between '%{' '%}' in the first section, before %%);
e9071366
AD
96 - actions, printers, union, etc, (between braced in the middle section);
97 - epilogue (everything after the second %%). */
98%x SC_PROLOGUE SC_BRACED_CODE SC_EPILOGUE
99 /* C and C++ comments in code. */
100%x SC_COMMENT SC_LINE_COMMENT
101 /* Strings and characters in code. */
102%x SC_STRING SC_CHARACTER
d5e8574b 103 /* Bracketed identifiers support. */
7685e2f7 104%x SC_BRACKETED_ID SC_RETURN_BRACKETED_ID
e9955c83 105
68ac70bc
AD
106letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
107notletter [^.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]{-}[%\{]
eb8c66bb 108id {letter}({letter}|[-0-9])*
624a35e2 109int [0-9]+
d8d3f94a
PE
110
111/* POSIX says that a tag must be both an id and a C union member, but
112 historically almost any character is allowed in a tag. We disallow
113 NUL and newline, as this simplifies our implementation. */
114tag [^\0\n>]+
115
116/* Zero or more instances of backslash-newline. Following GCC, allow
117 white space between the backslash and the newline. */
118splice (\\[ \f\t\v]*\n)*
e9955c83
AD
119
120%%
121%{
a706a1cc 122 /* Nesting level of the current code in braces. */
77bb73e7 123 int braces_level PACIFY_CC (= 0);
1a9e39f1 124
3f2d73f1 125 /* Parent context state, when applicable. */
77bb73e7 126 int context_state PACIFY_CC (= 0);
a706a1cc 127
3f2d73f1 128 /* Location of most recent identifier, when applicable. */
77bb73e7 129 location id_loc PACIFY_CC (= empty_location);
3f2d73f1 130
a2bc9dbc
PE
131 /* Where containing code started, when applicable. Its initial
132 value is relevant only when yylex is invoked in the SC_EPILOGUE
133 start condition. */
134 boundary code_start = scanner_cursor;
3f2d73f1 135
223ff46e
PE
136 /* Where containing comment or string or character literal started,
137 when applicable. */
77bb73e7 138 boundary token_start PACIFY_CC (= scanner_cursor);
e9955c83
AD
139%}
140
141
3f2d73f1
PE
142 /*-----------------------.
143 | Scanning white space. |
144 `-----------------------*/
145
7685e2f7 146<INITIAL,SC_AFTER_IDENTIFIER,SC_BRACKETED_ID,SC_RETURN_BRACKETED_ID>
3f2d73f1 147{
4febdd96 148 /* Comments and white space. */
9874f80b 149 "," warn_at (*loc, _("stray ',' treated as white space"));
4febdd96 150 [ \f\n\t\v] |
96029914 151 "//".* continue;
83adb046
PE
152 "/*" {
153 token_start = loc->start;
154 context_state = YY_START;
155 BEGIN SC_YACC_COMMENT;
156 }
3f2d73f1
PE
157
158 /* #line directives are not documented, and may be withdrawn or
159 modified in future versions of Bison. */
03dbf629 160 ^"#line "{int}(" \"".*"\"")?"\n" {
4517da37 161 handle_syncline (yytext + sizeof "#line " - 1, *loc);
3f2d73f1
PE
162 }
163}
164
165
e9955c83
AD
166 /*----------------------------.
167 | Scanning Bison directives. |
168 `----------------------------*/
72183df4
DJ
169
170 /* For directives that are also command line options, the regex must be
171 "%..."
172 after "[-_]"s are removed, and the directive must match the --long
173 option name, with a single string argument. Otherwise, add exceptions
174 to ../build-aux/cross-options.pl. */
175
e9955c83
AD
176<INITIAL>
177{
43e6aea5 178 "%binary" return PERCENT_NONASSOC;
136a0f76 179 "%code" return PERCENT_CODE;
43e6aea5
AD
180 "%debug" return PERCENT_DEBUG;
181 "%default"[-_]"prec" return PERCENT_DEFAULT_PREC;
182 "%define" return PERCENT_DEFINE;
183 "%defines" return PERCENT_DEFINES;
184 "%destructor" return PERCENT_DESTRUCTOR;
185 "%dprec" return PERCENT_DPREC;
186 "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
187 "%expect" return PERCENT_EXPECT;
188 "%expect"[-_]"rr" return PERCENT_EXPECT_RR;
189 "%file-prefix" return PERCENT_FILE_PREFIX;
e9955c83 190 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
43e6aea5
AD
191 "%initial-action" return PERCENT_INITIAL_ACTION;
192 "%glr-parser" return PERCENT_GLR_PARSER;
193 "%language" return PERCENT_LANGUAGE;
194 "%left" return PERCENT_LEFT;
195 "%lex-param" return PERCENT_LEX_PARAM;
196 "%locations" return PERCENT_LOCATIONS;
197 "%merge" return PERCENT_MERGE;
198 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
199 "%no"[-_]"default"[-_]"prec" return PERCENT_NO_DEFAULT_PREC;
200 "%no"[-_]"lines" return PERCENT_NO_LINES;
201 "%nonassoc" return PERCENT_NONASSOC;
202 "%nondeterministic-parser" return PERCENT_NONDETERMINISTIC_PARSER;
203 "%nterm" return PERCENT_NTERM;
204 "%output" return PERCENT_OUTPUT;
205 "%parse-param" return PERCENT_PARSE_PARAM;
206 "%prec" return PERCENT_PREC;
207 "%printer" return PERCENT_PRINTER;
208 "%pure"[-_]"parser" return PERCENT_PURE_PARSER;
209 "%require" return PERCENT_REQUIRE;
210 "%right" return PERCENT_RIGHT;
211 "%skeleton" return PERCENT_SKELETON;
212 "%start" return PERCENT_START;
213 "%term" return PERCENT_TOKEN;
214 "%token" return PERCENT_TOKEN;
215 "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
216 "%type" return PERCENT_TYPE;
217 "%union" return PERCENT_UNION;
218 "%verbose" return PERCENT_VERBOSE;
219 "%yacc" return PERCENT_YACC;
e9955c83 220
68ac70bc 221 "%"{id}|"%"{notletter}([[:graph:]])+ {
41141c56 222 complain_at (*loc, _("invalid directive: %s"), quote (yytext));
412f8a59 223 }
900c5db5 224
e9955c83 225 "=" return EQUAL;
e9071366 226 "|" return PIPE;
e9955c83 227 ";" return SEMICOLON;
12e35840 228 "<*>" return TYPE_TAG_ANY;
3ebecc24 229 "<>" return TYPE_TAG_NONE;
e9955c83 230
3f2d73f1 231 {id} {
58d7a1a1 232 val->uniqstr = uniqstr_new (yytext);
3f2d73f1 233 id_loc = *loc;
7685e2f7 234 bracketed_id_str = NULL;
3f2d73f1 235 BEGIN SC_AFTER_IDENTIFIER;
e9955c83
AD
236 }
237
d8d3f94a 238 {int} {
1452af69
PE
239 val->integer = scan_integer (yytext, 10, *loc);
240 return INT;
241 }
242 0[xX][0-9abcdefABCDEF]+ {
243 val->integer = scan_integer (yytext, 16, *loc);
d8d3f94a
PE
244 return INT;
245 }
e9955c83 246
601bdfab
AD
247 /* Identifiers may not start with a digit. Yet, don't silently
248 accept "1FOO" as "1 FOO". */
249 {int}{id} {
250 complain_at (*loc, _("invalid identifier: %s"), quote (yytext));
251 }
252
ac9b0e95 253 /* Characters. */
07c0db18 254 "'" token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
e9955c83
AD
255
256 /* Strings. */
ca407bdf 257 "\"" token_start = loc->start; BEGIN SC_ESCAPED_STRING;
e9955c83
AD
258
259 /* Prologue. */
3f2d73f1 260 "%{" code_start = loc->start; BEGIN SC_PROLOGUE;
e9955c83
AD
261
262 /* Code in between braces. */
3f2d73f1
PE
263 "{" {
264 STRING_GROW;
265 braces_level = 0;
266 code_start = loc->start;
267 BEGIN SC_BRACED_CODE;
268 }
e9955c83
AD
269
270 /* A type. */
d8d3f94a 271 "<"{tag}">" {
223ff46e 272 obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
41141c56 273 STRING_FINISH;
223ff46e 274 val->uniqstr = uniqstr_new (last_string);
41141c56 275 STRING_FREE;
4cdb01db
AD
276 return TYPE;
277 }
278
a706a1cc
PE
279 "%%" {
280 static int percent_percent_count;
e9955c83 281 if (++percent_percent_count == 2)
a2bc9dbc 282 BEGIN SC_EPILOGUE;
e9955c83
AD
283 return PERCENT_PERCENT;
284 }
285
7685e2f7
AR
286 "[" {
287 bracketed_id_str = NULL;
288 bracketed_id_start = loc->start;
289 bracketed_id_context_state = YY_START;
290 BEGIN SC_BRACKETED_ID;
291 }
292
68ac70bc
AD
293 [^\[%A-Za-z0-9_<>{}\"\'*;|=/, \f\n\t\v]+|. {
294 complain_at (*loc, "%s: %s",
295 ngettext ("invalid character", "invalid characters", yyleng),
296 quote_mem (yytext, yyleng));
3f2d73f1 297 }
379f0ac8
PE
298
299 <<EOF>> {
300 loc->start = loc->end = scanner_cursor;
301 yyterminate ();
302 }
3f2d73f1
PE
303}
304
305
306 /*-----------------------------------------------------------------.
307 | Scanning after an identifier, checking whether a colon is next. |
308 `-----------------------------------------------------------------*/
309
310<SC_AFTER_IDENTIFIER>
311{
7685e2f7 312 "[" {
d5e8574b 313 if (bracketed_id_str)
7685e2f7
AR
314 {
315 ROLLBACK_CURRENT_TOKEN;
316 BEGIN SC_RETURN_BRACKETED_ID;
317 *loc = id_loc;
318 return ID;
319 }
d5e8574b
AR
320 else
321 {
322 bracketed_id_start = loc->start;
323 bracketed_id_context_state = YY_START;
324 BEGIN SC_BRACKETED_ID;
325 }
7685e2f7 326 }
3f2d73f1 327 ":" {
7685e2f7 328 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
3f2d73f1 329 *loc = id_loc;
3f2d73f1
PE
330 return ID_COLON;
331 }
332 . {
7685e2f7
AR
333 ROLLBACK_CURRENT_TOKEN;
334 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
3f2d73f1 335 *loc = id_loc;
3f2d73f1
PE
336 return ID;
337 }
338 <<EOF>> {
7685e2f7 339 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
3f2d73f1 340 *loc = id_loc;
3f2d73f1 341 return ID;
e9955c83
AD
342 }
343}
344
7685e2f7
AR
345 /*--------------------------------.
346 | Scanning bracketed identifiers. |
347 `--------------------------------*/
348
349<SC_BRACKETED_ID>
350{
351 {id} {
d5e8574b 352 if (bracketed_id_str)
7685e2f7 353 {
d5e8574b
AR
354 complain_at (*loc, _("unexpected identifier in bracketed name: %s"),
355 quote (yytext));
7685e2f7
AR
356 }
357 else
358 {
d5e8574b
AR
359 bracketed_id_str = uniqstr_new (yytext);
360 bracketed_id_loc = *loc;
7685e2f7
AR
361 }
362 }
363 "]" {
364 BEGIN bracketed_id_context_state;
365 if (bracketed_id_str)
366 {
367 if (INITIAL == bracketed_id_context_state)
368 {
369 val->uniqstr = bracketed_id_str;
370 bracketed_id_str = 0;
371 *loc = bracketed_id_loc;
372 return BRACKETED_ID;
373 }
374 }
375 else
d5e8574b 376 complain_at (*loc, _("an identifier expected"));
7685e2f7 377 }
68ac70bc
AD
378
379 [^\].A-Za-z0-9_/ \f\n\t\v]+|. {
380 complain_at (*loc, "%s: %s",
381 ngettext ("invalid character in bracketed name",
382 "invalid characters in bracketed name", yyleng),
383 quote_mem (yytext, yyleng));
7685e2f7 384 }
68ac70bc 385
7685e2f7
AR
386 <<EOF>> {
387 BEGIN bracketed_id_context_state;
388 unexpected_eof (bracketed_id_start, "]");
389 }
390}
391
392<SC_RETURN_BRACKETED_ID>
393{
394 . {
395 ROLLBACK_CURRENT_TOKEN;
396 val->uniqstr = bracketed_id_str;
397 bracketed_id_str = 0;
398 *loc = bracketed_id_loc;
399 BEGIN INITIAL;
400 return BRACKETED_ID;
401 }
402}
403
e9955c83 404
d8d3f94a 405 /*---------------------------------------------------------------.
9874f80b 406 | Scanning a Yacc comment. The initial '/ *' is already eaten. |
d8d3f94a 407 `---------------------------------------------------------------*/
e9955c83 408
d8d3f94a 409<SC_YACC_COMMENT>
e9955c83 410{
3f2d73f1 411 "*/" BEGIN context_state;
96029914 412 .|\n continue;
aa418041 413 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
d8d3f94a
PE
414}
415
416
417 /*------------------------------------------------------------.
9874f80b 418 | Scanning a C comment. The initial '/ *' is already eaten. |
d8d3f94a
PE
419 `------------------------------------------------------------*/
420
421<SC_COMMENT>
422{
3f2d73f1 423 "*"{splice}"/" STRING_GROW; BEGIN context_state;
aa418041 424 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
e9955c83
AD
425}
426
427
d8d3f94a 428 /*--------------------------------------------------------------.
9874f80b 429 | Scanning a line comment. The initial '//' is already eaten. |
d8d3f94a
PE
430 `--------------------------------------------------------------*/
431
432<SC_LINE_COMMENT>
433{
3f2d73f1 434 "\n" STRING_GROW; BEGIN context_state;
41141c56 435 {splice} STRING_GROW;
3f2d73f1 436 <<EOF>> BEGIN context_state;
d8d3f94a
PE
437}
438
439
4febdd96
PE
440 /*------------------------------------------------.
441 | Scanning a Bison string, including its escapes. |
442 | The initial quote is already eaten. |
443 `------------------------------------------------*/
e9955c83
AD
444
445<SC_ESCAPED_STRING>
446{
47aee066
JD
447 "\""|"\n" {
448 if (yytext[0] == '\n')
449 unexpected_newline (token_start, "\"");
450 STRING_FINISH;
451 loc->start = token_start;
452 val->chars = last_string;
453 BEGIN INITIAL;
454 return STRING;
455 }
456 <<EOF>> {
457 unexpected_eof (token_start, "\"");
41141c56 458 STRING_FINISH;
3f2d73f1 459 loc->start = token_start;
223ff46e 460 val->chars = last_string;
a706a1cc 461 BEGIN INITIAL;
e9955c83
AD
462 return STRING;
463 }
e9955c83
AD
464}
465
4febdd96
PE
466 /*----------------------------------------------------------.
467 | Scanning a Bison character literal, decoding its escapes. |
468 | The initial quote is already eaten. |
469 `----------------------------------------------------------*/
e9955c83
AD
470
471<SC_ESCAPED_CHARACTER>
472{
47aee066 473 "'"|"\n" {
41141c56 474 STRING_FINISH;
3f2d73f1 475 loc->start = token_start;
07c0db18 476 val->character = last_string[0];
ac9b0e95
JD
477 {
478 /* FIXME: Eventually, make these errors. */
07c0db18
JD
479 if (last_string[0] == '\0')
480 {
481 warn_at (*loc, _("empty character literal"));
482 /* '\0' seems dangerous even if we are about to complain. */
483 val->character = '\'';
484 }
485 else if (last_string[1] != '\0')
ac9b0e95
JD
486 warn_at (*loc, _("extra characters in character literal"));
487 }
488 if (yytext[0] == '\n')
489 unexpected_newline (token_start, "'");
41141c56 490 STRING_FREE;
a706a1cc 491 BEGIN INITIAL;
58d7a1a1 492 return CHAR;
e9955c83 493 }
47aee066 494 <<EOF>> {
47aee066
JD
495 STRING_FINISH;
496 loc->start = token_start;
07c0db18 497 val->character = last_string[0];
ac9b0e95 498 {
ac9b0e95 499 /* FIXME: Eventually, make these errors. */
07c0db18
JD
500 if (last_string[0] == '\0')
501 {
502 warn_at (*loc, _("empty character literal"));
503 /* '\0' seems dangerous even if we are about to complain. */
504 val->character = '\'';
505 }
506 else if (last_string[1] != '\0')
ac9b0e95 507 warn_at (*loc, _("extra characters in character literal"));
ac9b0e95
JD
508 }
509 unexpected_eof (token_start, "'");
47aee066
JD
510 STRING_FREE;
511 BEGIN INITIAL;
512 return CHAR;
513 }
4febdd96 514}
a706a1cc 515
4febdd96
PE
516<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING>
517{
92ac3705 518 \0 complain_at (*loc, _("invalid null character"));
e9955c83
AD
519}
520
521
522 /*----------------------------.
523 | Decode escaped characters. |
524 `----------------------------*/
525
526<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
527{
d8d3f94a 528 \\[0-7]{1,3} {
4517da37 529 unsigned long int c = strtoul (yytext + 1, NULL, 8);
39fb7e62
JD
530 if (!c || UCHAR_MAX < c)
531 complain_at (*loc, _("invalid number after \\-escape: %s"),
532 yytext+1);
e9955c83 533 else
223ff46e 534 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
535 }
536
6b0d38ab 537 \\x[0-9abcdefABCDEF]+ {
4517da37
PE
538 verify (UCHAR_MAX < ULONG_MAX);
539 unsigned long int c = strtoul (yytext + 2, NULL, 16);
39fb7e62
JD
540 if (!c || UCHAR_MAX < c)
541 complain_at (*loc, _("invalid number after \\-escape: %s"),
542 yytext+1);
d8d3f94a 543 else
223ff46e 544 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
545 }
546
223ff46e
PE
547 \\a obstack_1grow (&obstack_for_string, '\a');
548 \\b obstack_1grow (&obstack_for_string, '\b');
549 \\f obstack_1grow (&obstack_for_string, '\f');
550 \\n obstack_1grow (&obstack_for_string, '\n');
551 \\r obstack_1grow (&obstack_for_string, '\r');
552 \\t obstack_1grow (&obstack_for_string, '\t');
553 \\v obstack_1grow (&obstack_for_string, '\v');
412f8a59
PE
554
555 /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
223ff46e 556 \\("\""|"'"|"?"|"\\") obstack_1grow (&obstack_for_string, yytext[1]);
412f8a59 557
6b0d38ab 558 \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
d8d3f94a 559 int c = convert_ucn_to_byte (yytext);
39fb7e62
JD
560 if (c <= 0)
561 complain_at (*loc, _("invalid number after \\-escape: %s"),
562 yytext+1);
d8d3f94a 563 else
223ff46e 564 obstack_1grow (&obstack_for_string, c);
d8d3f94a 565 }
4f25ebb0 566 \\(.|\n) {
39fb7e62 567 char const *p = yytext + 1;
890ab17c 568 /* Quote only if escaping won't make the character visible. */
457bf919 569 if (c_isspace ((unsigned char) *p) && c_isprint ((unsigned char) *p))
890ab17c 570 p = quote (p);
39fb7e62
JD
571 else
572 p = quotearg_style_mem (escape_quoting_style, p, 1);
573 complain_at (*loc, _("invalid character after \\-escape: %s"), p);
e9955c83
AD
574 }
575}
576
4febdd96
PE
577 /*--------------------------------------------.
578 | Scanning user-code characters and strings. |
579 `--------------------------------------------*/
e9955c83 580
4febdd96
PE
581<SC_CHARACTER,SC_STRING>
582{
e9071366 583 {splice}|\\{splice}[^\n\[\]] STRING_GROW;
4febdd96 584}
e9955c83
AD
585
586<SC_CHARACTER>
587{
4febdd96
PE
588 "'" STRING_GROW; BEGIN context_state;
589 \n unexpected_newline (token_start, "'"); BEGIN context_state;
590 <<EOF>> unexpected_eof (token_start, "'"); BEGIN context_state;
e9955c83
AD
591}
592
e9955c83
AD
593<SC_STRING>
594{
4febdd96
PE
595 "\"" STRING_GROW; BEGIN context_state;
596 \n unexpected_newline (token_start, "\""); BEGIN context_state;
597 <<EOF>> unexpected_eof (token_start, "\""); BEGIN context_state;
e9955c83
AD
598}
599
600
601 /*---------------------------------------------------.
602 | Strings, comments etc. can be found in user code. |
603 `---------------------------------------------------*/
604
605<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
606{
3f2d73f1
PE
607 "'" {
608 STRING_GROW;
609 context_state = YY_START;
610 token_start = loc->start;
611 BEGIN SC_CHARACTER;
612 }
613 "\"" {
614 STRING_GROW;
615 context_state = YY_START;
616 token_start = loc->start;
617 BEGIN SC_STRING;
618 }
619 "/"{splice}"*" {
620 STRING_GROW;
621 context_state = YY_START;
622 token_start = loc->start;
623 BEGIN SC_COMMENT;
624 }
625 "/"{splice}"/" {
626 STRING_GROW;
627 context_state = YY_START;
628 BEGIN SC_LINE_COMMENT;
629 }
e9955c83
AD
630}
631
632
624a35e2 633
58d7a1a1
AD
634 /*-----------------------------------------------------------.
635 | Scanning some code in braces (actions). The initial "{" is |
636 | already eaten. |
637 `-----------------------------------------------------------*/
e9955c83
AD
638
639<SC_BRACED_CODE>
640{
41141c56
PE
641 "{"|"<"{splice}"%" STRING_GROW; braces_level++;
642 "%"{splice}">" STRING_GROW; braces_level--;
e9955c83 643 "}" {
25522739
PE
644 obstack_1grow (&obstack_for_string, '}');
645
2346344a
AD
646 --braces_level;
647 if (braces_level < 0)
e9955c83 648 {
41141c56 649 STRING_FINISH;
3f2d73f1 650 loc->start = code_start;
eb095650 651 val->code = last_string;
a706a1cc 652 BEGIN INITIAL;
58d7a1a1 653 return BRACED_CODE;
e9955c83
AD
654 }
655 }
656
9874f80b
JM
657 /* Tokenize '<<%' correctly (as '<<' '%') rather than incorrrectly
658 (as '<' '<%'). */
41141c56 659 "<"{splice}"<" STRING_GROW;
a706a1cc 660
47aee066
JD
661 <<EOF>> {
662 unexpected_eof (code_start, "}");
663 STRING_FINISH;
664 loc->start = code_start;
eb095650 665 val->code = last_string;
47aee066
JD
666 BEGIN INITIAL;
667 return BRACED_CODE;
668 }
e9955c83
AD
669}
670
671
672 /*--------------------------------------------------------------.
673 | Scanning some prologue: from "%{" (already scanned) to "%}". |
674 `--------------------------------------------------------------*/
675
676<SC_PROLOGUE>
677{
678 "%}" {
41141c56 679 STRING_FINISH;
3f2d73f1 680 loc->start = code_start;
223ff46e 681 val->chars = last_string;
a706a1cc 682 BEGIN INITIAL;
e9955c83
AD
683 return PROLOGUE;
684 }
685
47aee066
JD
686 <<EOF>> {
687 unexpected_eof (code_start, "%}");
688 STRING_FINISH;
689 loc->start = code_start;
690 val->chars = last_string;
691 BEGIN INITIAL;
692 return PROLOGUE;
693 }
e9955c83
AD
694}
695
696
697 /*---------------------------------------------------------------.
698 | Scanning the epilogue (everything after the second "%%", which |
d8d3f94a 699 | has already been eaten). |
e9955c83
AD
700 `---------------------------------------------------------------*/
701
702<SC_EPILOGUE>
703{
e9955c83 704 <<EOF>> {
41141c56 705 STRING_FINISH;
3f2d73f1 706 loc->start = code_start;
223ff46e 707 val->chars = last_string;
a706a1cc 708 BEGIN INITIAL;
e9955c83
AD
709 return EPILOGUE;
710 }
711}
712
713
4febdd96
PE
714 /*-----------------------------------------------------.
715 | By default, grow the string obstack with the input. |
716 `-----------------------------------------------------*/
717
718<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>. |
719<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>\n STRING_GROW;
720
e9955c83
AD
721%%
722
6c30d641
PE
723/* Read bytes from FP into buffer BUF of size SIZE. Return the
724 number of bytes read. Remove '\r' from input, treating \r\n
725 and isolated \r as \n. */
726
727static size_t
728no_cr_read (FILE *fp, char *buf, size_t size)
729{
a737b216
PE
730 size_t bytes_read = fread (buf, 1, size, fp);
731 if (bytes_read)
6c30d641 732 {
a737b216 733 char *w = memchr (buf, '\r', bytes_read);
6c30d641
PE
734 if (w)
735 {
736 char const *r = ++w;
a737b216 737 char const *lim = buf + bytes_read;
6c30d641
PE
738
739 for (;;)
740 {
741 /* Found an '\r'. Treat it like '\n', but ignore any
742 '\n' that immediately follows. */
743 w[-1] = '\n';
744 if (r == lim)
745 {
746 int ch = getc (fp);
747 if (ch != '\n' && ungetc (ch, fp) != ch)
748 break;
749 }
750 else if (*r == '\n')
751 r++;
752
753 /* Copy until the next '\r'. */
754 do
755 {
756 if (r == lim)
757 return w - buf;
758 }
759 while ((*w++ = *r++) != '\r');
760 }
761
762 return w - buf;
763 }
764 }
765
a737b216 766 return bytes_read;
6c30d641
PE
767}
768
769
f25bfb75 770
1452af69
PE
771/*------------------------------------------------------.
772| Scan NUMBER for a base-BASE integer at location LOC. |
773`------------------------------------------------------*/
774
775static unsigned long int
776scan_integer (char const *number, int base, location loc)
777{
4517da37
PE
778 verify (INT_MAX < ULONG_MAX);
779 unsigned long int num = strtoul (number, NULL, base);
780
781 if (INT_MAX < num)
1452af69
PE
782 {
783 complain_at (loc, _("integer out of range: %s"), quote (number));
784 num = INT_MAX;
785 }
4517da37 786
1452af69
PE
787 return num;
788}
789
790
d8d3f94a
PE
791/*------------------------------------------------------------------.
792| Convert universal character name UCN to a single-byte character, |
793| and return that character. Return -1 if UCN does not correspond |
794| to a single-byte character. |
795`------------------------------------------------------------------*/
796
797static int
798convert_ucn_to_byte (char const *ucn)
799{
4517da37
PE
800 verify (UCHAR_MAX <= INT_MAX);
801 unsigned long int code = strtoul (ucn + 2, NULL, 16);
d8d3f94a
PE
802
803 /* FIXME: Currently we assume Unicode-compatible unibyte characters
804 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
805 non-ASCII hosts we support only the portable C character set.
806 These limitations should be removed once we add support for
807 multibyte characters. */
808
809 if (UCHAR_MAX < code)
810 return -1;
811
812#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
813 {
814 /* A non-ASCII host. Use CODE to index into a table of the C
815 basic execution character set, which is guaranteed to exist on
816 all Standard C platforms. This table also includes '$', '@',
8e6ef483 817 and '`', which are not in the basic execution character set but
d8d3f94a
PE
818 which are unibyte characters on all the platforms that we know
819 about. */
820 static signed char const table[] =
821 {
822 '\0', -1, -1, -1, -1, -1, -1, '\a',
823 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
824 -1, -1, -1, -1, -1, -1, -1, -1,
825 -1, -1, -1, -1, -1, -1, -1, -1,
826 ' ', '!', '"', '#', '$', '%', '&', '\'',
827 '(', ')', '*', '+', ',', '-', '.', '/',
828 '0', '1', '2', '3', '4', '5', '6', '7',
829 '8', '9', ':', ';', '<', '=', '>', '?',
830 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
831 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
832 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
833 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
834 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
835 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
836 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
837 'x', 'y', 'z', '{', '|', '}', '~'
838 };
839
840 code = code < sizeof table ? table[code] : -1;
841 }
842#endif
c4d720cd 843
d8d3f94a
PE
844 return code;
845}
846
847
03dbf629
AD
848/*---------------------------------------------------------------------.
849| Handle '#line INT( "FILE")?\n'. ARGS has already skipped '#line '. |
850`---------------------------------------------------------------------*/
900c5db5
AD
851
852static void
4517da37 853handle_syncline (char *args, location loc)
900c5db5 854{
03dbf629
AD
855 char *file;
856 unsigned long int lineno = strtoul (args, &file, 10);
4517da37
PE
857 if (INT_MAX <= lineno)
858 {
859 warn_at (loc, _("line number overflow"));
860 lineno = INT_MAX;
861 }
03dbf629
AD
862
863 file = mbschr (file, '"');
864 if (file)
865 {
866 *mbschr (file + 1, '"') = '\0';
867 current_file = uniqstr_new (file + 1);
868 }
0c8e079f 869 boundary_set (&scanner_cursor, current_file, lineno, 1);
4517da37
PE
870}
871
872
4febdd96
PE
873/*----------------------------------------------------------------.
874| For a token or comment starting at START, report message MSGID, |
875| which should say that an end marker was found before |
876| the expected TOKEN_END. |
877`----------------------------------------------------------------*/
878
879static void
880unexpected_end (boundary start, char const *msgid, char const *token_end)
881{
882 location loc;
883 loc.start = start;
884 loc.end = scanner_cursor;
4c787a31
AD
885 token_end = quote (token_end);
886 // Instead of '\'', display "'".
f0f62c8c 887 if (!strcmp (token_end, "'\\''"))
4c787a31 888 token_end = "\"'\"";
4febdd96
PE
889 complain_at (loc, _(msgid), token_end);
890}
891
892
3f2d73f1
PE
893/*------------------------------------------------------------------------.
894| Report an unexpected EOF in a token or comment starting at START. |
895| An end of file was encountered and the expected TOKEN_END was missing. |
3f2d73f1 896`------------------------------------------------------------------------*/
a706a1cc
PE
897
898static void
aa418041 899unexpected_eof (boundary start, char const *token_end)
a706a1cc 900{
4c787a31 901 unexpected_end (start, N_("missing %s at end of file"), token_end);
4febdd96
PE
902}
903
904
905/*----------------------------------------.
906| Likewise, but for unexpected newlines. |
907`----------------------------------------*/
908
909static void
910unexpected_newline (boundary start, char const *token_end)
911{
4c787a31 912 unexpected_end (start, N_("missing %s at end of line"), token_end);
a706a1cc
PE
913}
914
915
f25bfb75
AD
916/*-------------------------.
917| Initialize the scanner. |
918`-------------------------*/
919
1d6412ad 920void
e9071366 921gram_scanner_initialize (void)
1d6412ad 922{
223ff46e 923 obstack_init (&obstack_for_string);
1d6412ad
AD
924}
925
926
f25bfb75
AD
927/*-----------------------------------------------.
928| Free all the memory allocated to the scanner. |
929`-----------------------------------------------*/
930
4cdb01db 931void
e9071366 932gram_scanner_free (void)
4cdb01db 933{
223ff46e 934 obstack_free (&obstack_for_string, 0);
536545f3 935 /* Reclaim Flex's buffers. */
580b8926 936 yylex_destroy ();
4cdb01db 937}