]> git.saurik.com Git - bison.git/blame - src/scan-gram.l
tables: scope reduction
[bison.git] / src / scan-gram.l
CommitLineData
e9955c83 1/* Bison Grammar Scanner -*- C -*-
3b1e470c 2
34136e65 3 Copyright (C) 2002-2012 Free Software Foundation, Inc.
e9955c83
AD
4
5 This file is part of Bison, the GNU Compiler Compiler.
6
f16b0819 7 This program is free software: you can redistribute it and/or modify
e9955c83 8 it under the terms of the GNU General Public License as published by
f16b0819 9 the Free Software Foundation, either version 3 of the License, or
e9955c83
AD
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
f16b0819 18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
e9955c83 19
2062d72d 20%option debug nodefault noinput noyywrap never-interactive
e9955c83
AD
21%option prefix="gram_" outfile="lex.yy.c"
22
23%{
4f6e011e
PE
24/* Work around a bug in flex 2.5.31. See Debian bug 333231
25 <http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=333231>. */
26#undef gram_wrap
27#define gram_wrap() 1
28
e9071366 29#define FLEX_PREFIX(Id) gram_ ## Id
0305d25e 30#include <src/flex-scanner.h>
223ff46e 31
0305d25e
AD
32#include <src/complain.h>
33#include <src/files.h>
2062d72d 34#include <src/getargs.h>
0305d25e
AD
35#include <src/gram.h>
36#include <quotearg.h>
37#include <src/reader.h>
38#include <src/uniqstr.h>
e9955c83 39
457bf919 40#include <c-ctype.h>
e9071366
AD
41#include <mbswidth.h>
42#include <quote.h>
43
0305d25e 44#include <src/scan-gram.h>
e9071366
AD
45
46#define YY_DECL GRAM_LEX_DECL
2346344a 47
e9690142
JD
48#define YY_USER_INIT \
49 code_start = scanner_cursor = loc->start; \
dc9701e8 50
3f2d73f1 51/* Location of scanner cursor. */
4a678af8 52static boundary scanner_cursor;
41141c56 53
e9071366 54#define YY_USER_ACTION location_compute (loc, &scanner_cursor, yytext, yyleng);
d8d3f94a 55
6c30d641 56static size_t no_cr_read (FILE *, char *, size_t);
d8d3f94a
PE
57#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
58
a7706735
AD
59#define RETURN_PERCENT_PARAM(Value) \
60 RETURN_VALUE(PERCENT_PARAM, param, param_ ## Value)
61
62#define RETURN_PERCENT_FLAG(Value) \
63 RETURN_VALUE(PERCENT_FLAG, uniqstr, uniqstr_new (Value))
64
65#define RETURN_VALUE(Token, Field, Value) \
ba061fa6 66 do { \
a7706735
AD
67 val->Field = Value; \
68 return Token; \
ba061fa6
AD
69 } while (0)
70
b9f1d9a4
AR
71#define ROLLBACK_CURRENT_TOKEN \
72 do { \
e9690142 73 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0); \
b9f1d9a4
AR
74 yyless (0); \
75 } while (0)
ba061fa6 76
2062d72d
TR
77#define DEPRECATED(Msg) \
78 do { \
79 size_t i; \
1dc927a7 80 deprecated_directive (loc, yytext, Msg); \
2062d72d
TR
81 scanner_cursor.column -= mbsnwidth (Msg, strlen (Msg), 0); \
82 for (i = strlen (Msg); i != 0; --i) \
83 unput (Msg[i - 1]); \
84 } while (0)
85
7ec2d4cd 86/* A string representing the most recently saved token. */
7c0c6181 87static char *last_string;
7ec2d4cd 88
872b52bc 89/* Bracketed identifier. */
b9f1d9a4
AR
90static uniqstr bracketed_id_str = 0;
91static location bracketed_id_loc;
92static boundary bracketed_id_start;
93static int bracketed_id_context_state = 0;
94
7ec2d4cd 95void
e9071366 96gram_scanner_last_string_free (void)
7ec2d4cd 97{
41141c56 98 STRING_FREE;
7ec2d4cd 99}
e9955c83 100
4517da37 101static void handle_syncline (char *, location);
1452af69 102static unsigned long int scan_integer (char const *p, int base, location loc);
d8d3f94a 103static int convert_ucn_to_byte (char const *hex_text);
aa418041 104static void unexpected_eof (boundary, char const *);
4febdd96 105static void unexpected_newline (boundary, char const *);
e9955c83
AD
106
107%}
e9071366
AD
108 /* A C-like comment in directives/rules. */
109%x SC_YACC_COMMENT
110 /* Strings and characters in directives/rules. */
e9955c83 111%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
e9071366 112 /* A identifier was just read in directives/rules. Special state
ae93e4e4 113 to capture the sequence 'identifier :'. */
e9071366 114%x SC_AFTER_IDENTIFIER
cb823b6f
AD
115 /* A complex tag, with nested angles brackets. */
116%x SC_TAG
e9071366 117
ca2a6d15 118 /* Four types of user code:
ae93e4e4 119 - prologue (code between '%{' '%}' in the first section, before %%);
e9071366 120 - actions, printers, union, etc, (between braced in the middle section);
da5462d4 121 - epilogue (everything after the second %%).
ae93e4e4 122 - predicate (code between '%?{' and '{' in middle section); */
ca2a6d15 123%x SC_PROLOGUE SC_BRACED_CODE SC_EPILOGUE SC_PREDICATE
e9071366
AD
124 /* C and C++ comments in code. */
125%x SC_COMMENT SC_LINE_COMMENT
126 /* Strings and characters in code. */
127%x SC_STRING SC_CHARACTER
872b52bc 128 /* Bracketed identifiers support. */
b9f1d9a4 129%x SC_BRACKETED_ID SC_RETURN_BRACKETED_ID
e9955c83 130
e9690142 131letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
68ac70bc 132notletter [^.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]{-}[%\{]
e9690142 133id {letter}({letter}|[-0-9])*
e9690142 134int [0-9]+
d8d3f94a
PE
135
136/* POSIX says that a tag must be both an id and a C union member, but
137 historically almost any character is allowed in a tag. We disallow
cb823b6f
AD
138 NUL, as this simplifies our implementation. We disallow angle
139 bracket to match them in nested pairs: several languages use them
140 for generics/template types. */
e9690142 141tag [^\0<>]+
d8d3f94a
PE
142
143/* Zero or more instances of backslash-newline. Following GCC, allow
144 white space between the backslash and the newline. */
e9690142 145splice (\\[ \f\t\v]*\n)*
e9955c83 146
2062d72d
TR
147/* An equal sign, with optional leading whitespaces. This is used in some
148 deprecated constructs. */
149eqopt ([[:space:]]*=)?
150
e9955c83
AD
151%%
152%{
cb823b6f
AD
153 /* Nesting level. Either for nested braces, or nested angle brackets
154 (but not mixed). */
84f6a6ca 155 int nesting PACIFY_CC (= 0);
1a9e39f1 156
3f2d73f1 157 /* Parent context state, when applicable. */
84f6a6ca 158 int context_state PACIFY_CC (= 0);
a706a1cc 159
3f2d73f1 160 /* Location of most recent identifier, when applicable. */
84f6a6ca 161 location id_loc PACIFY_CC (= empty_location);
3f2d73f1 162
a2bc9dbc
PE
163 /* Where containing code started, when applicable. Its initial
164 value is relevant only when yylex is invoked in the SC_EPILOGUE
165 start condition. */
166 boundary code_start = scanner_cursor;
3f2d73f1 167
223ff46e
PE
168 /* Where containing comment or string or character literal started,
169 when applicable. */
84f6a6ca 170 boundary token_start PACIFY_CC (= scanner_cursor);
e9955c83
AD
171%}
172
173
3f2d73f1
PE
174 /*-----------------------.
175 | Scanning white space. |
176 `-----------------------*/
177
b9f1d9a4 178<INITIAL,SC_AFTER_IDENTIFIER,SC_BRACKETED_ID,SC_RETURN_BRACKETED_ID>
3f2d73f1 179{
4febdd96 180 /* Comments and white space. */
6fb8b256 181 "," {
bb8e56ff 182 complain (loc, Wother, _("stray ',' treated as white space"));
6fb8b256 183 }
4febdd96 184 [ \f\n\t\v] |
96029914 185 "//".* continue;
83adb046
PE
186 "/*" {
187 token_start = loc->start;
188 context_state = YY_START;
189 BEGIN SC_YACC_COMMENT;
190 }
3f2d73f1
PE
191
192 /* #line directives are not documented, and may be withdrawn or
193 modified in future versions of Bison. */
03dbf629 194 ^"#line "{int}(" \"".*"\"")?"\n" {
4517da37 195 handle_syncline (yytext + sizeof "#line " - 1, *loc);
3f2d73f1
PE
196 }
197}
198
199
e9955c83
AD
200 /*----------------------------.
201 | Scanning Bison directives. |
202 `----------------------------*/
a7c09cba
DJ
203
204 /* For directives that are also command line options, the regex must be
e9690142 205 "%..."
a7c09cba
DJ
206 after "[-_]"s are removed, and the directive must match the --long
207 option name, with a single string argument. Otherwise, add exceptions
208 to ../build-aux/cross-options.pl. */
209
e9955c83
AD
210<INITIAL>
211{
deef2a0a 212 "%binary" return PERCENT_NONASSOC;
136a0f76 213 "%code" return PERCENT_CODE;
fa819509 214 "%debug" RETURN_PERCENT_FLAG("parse.trace");
2062d72d 215 "%default-prec" return PERCENT_DEFAULT_PREC;
deef2a0a
AD
216 "%define" return PERCENT_DEFINE;
217 "%defines" return PERCENT_DEFINES;
218 "%destructor" return PERCENT_DESTRUCTOR;
219 "%dprec" return PERCENT_DPREC;
2062d72d 220 "%error-verbose" return PERCENT_ERROR_VERBOSE;
deef2a0a 221 "%expect" return PERCENT_EXPECT;
2062d72d 222 "%expect-rr" return PERCENT_EXPECT_RR;
deef2a0a 223 "%file-prefix" return PERCENT_FILE_PREFIX;
2062d72d 224 "%fixed-output-files" return PERCENT_YACC;
deef2a0a
AD
225 "%initial-action" return PERCENT_INITIAL_ACTION;
226 "%glr-parser" return PERCENT_GLR_PARSER;
227 "%language" return PERCENT_LANGUAGE;
228 "%left" return PERCENT_LEFT;
a7706735 229 "%lex-param" RETURN_PERCENT_PARAM(lex);
bc0f5737 230 "%locations" RETURN_PERCENT_FLAG("locations");
deef2a0a 231 "%merge" return PERCENT_MERGE;
2062d72d
TR
232 "%name-prefix" return PERCENT_NAME_PREFIX;
233 "%no-default-prec" return PERCENT_NO_DEFAULT_PREC;
234 "%no-lines" return PERCENT_NO_LINES;
deef2a0a
AD
235 "%nonassoc" return PERCENT_NONASSOC;
236 "%nondeterministic-parser" return PERCENT_NONDETERMINISTIC_PARSER;
237 "%nterm" return PERCENT_NTERM;
238 "%output" return PERCENT_OUTPUT;
a7706735
AD
239 "%param" RETURN_PERCENT_PARAM(both);
240 "%parse-param" RETURN_PERCENT_PARAM(parse);
deef2a0a 241 "%prec" return PERCENT_PREC;
d78f0ac9 242 "%precedence" return PERCENT_PRECEDENCE;
deef2a0a 243 "%printer" return PERCENT_PRINTER;
2062d72d 244 "%pure-parser" RETURN_PERCENT_FLAG("api.pure");
deef2a0a
AD
245 "%require" return PERCENT_REQUIRE;
246 "%right" return PERCENT_RIGHT;
247 "%skeleton" return PERCENT_SKELETON;
248 "%start" return PERCENT_START;
249 "%term" return PERCENT_TOKEN;
250 "%token" return PERCENT_TOKEN;
2062d72d 251 "%token-table" return PERCENT_TOKEN_TABLE;
deef2a0a
AD
252 "%type" return PERCENT_TYPE;
253 "%union" return PERCENT_UNION;
254 "%verbose" return PERCENT_VERBOSE;
255 "%yacc" return PERCENT_YACC;
e9955c83 256
2062d72d
TR
257 /* deprecated */
258 "%default"[-_]"prec" DEPRECATED("%default-prec");
259 "%error"[-_]"verbose" DEPRECATED("%define parse.error verbose");
260 "%expect"[-_]"rr" DEPRECATED("%expect-rr");
261 "%file-prefix"{eqopt} DEPRECATED("%file-prefix");
262 "%fixed"[-_]"output"[-_]"files" DEPRECATED("%fixed-output-files");
263 "%name"[-_]"prefix"{eqopt} DEPRECATED("%name-prefix");
264 "%no"[-_]"default"[-_]"prec" DEPRECATED("%no-default-prec");
265 "%no"[-_]"lines" DEPRECATED("%no-lines");
266 "%output"{eqopt} DEPRECATED("%output");
267 "%pure"[-_]"parser" DEPRECATED("%pure-parser");
268 "%token"[-_]"table" DEPRECATED("%token-table");
269
68ac70bc 270 "%"{id}|"%"{notletter}([[:graph:]])+ {
bb8e56ff 271 complain (loc, complaint, _("invalid directive: %s"), quote (yytext));
412f8a59 272 }
900c5db5 273
e9955c83 274 "=" return EQUAL;
e9071366 275 "|" return PIPE;
e9955c83
AD
276 ";" return SEMICOLON;
277
3f2d73f1 278 {id} {
58d7a1a1 279 val->uniqstr = uniqstr_new (yytext);
3f2d73f1 280 id_loc = *loc;
b9f1d9a4 281 bracketed_id_str = NULL;
3f2d73f1 282 BEGIN SC_AFTER_IDENTIFIER;
e9955c83
AD
283 }
284
d8d3f94a 285 {int} {
1452af69
PE
286 val->integer = scan_integer (yytext, 10, *loc);
287 return INT;
288 }
289 0[xX][0-9abcdefABCDEF]+ {
290 val->integer = scan_integer (yytext, 16, *loc);
d8d3f94a
PE
291 return INT;
292 }
e9955c83 293
84a1cb5a
AD
294 /* Identifiers may not start with a digit. Yet, don't silently
295 accept "1FOO" as "1 FOO". */
296 {int}{id} {
bb8e56ff 297 complain (loc, complaint, _("invalid identifier: %s"), quote (yytext));
84a1cb5a
AD
298 }
299
3208e3f4 300 /* Characters. */
e9690142 301 "'" token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
e9955c83
AD
302
303 /* Strings. */
e9690142 304 "\"" token_start = loc->start; BEGIN SC_ESCAPED_STRING;
e9955c83
AD
305
306 /* Prologue. */
3f2d73f1 307 "%{" code_start = loc->start; BEGIN SC_PROLOGUE;
e9955c83
AD
308
309 /* Code in between braces. */
3f2d73f1
PE
310 "{" {
311 STRING_GROW;
cb823b6f 312 nesting = 0;
3f2d73f1
PE
313 code_start = loc->start;
314 BEGIN SC_BRACED_CODE;
315 }
e9955c83 316
ca2a6d15
PH
317 /* Semantic predicate. */
318 "%?"[ \f\n\t\v]*"{" {
319 nesting = 0;
320 code_start = loc->start;
321 BEGIN SC_PREDICATE;
322 }
323
e9955c83 324 /* A type. */
cb823b6f
AD
325 "<*>" return TAG_ANY;
326 "<>" return TAG_NONE;
d8d3f94a 327 "<"{tag}">" {
223ff46e 328 obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
41141c56 329 STRING_FINISH;
223ff46e 330 val->uniqstr = uniqstr_new (last_string);
41141c56 331 STRING_FREE;
cb823b6f
AD
332 return TAG;
333 }
334 "<" {
335 nesting = 0;
336 token_start = loc->start;
337 BEGIN SC_TAG;
4cdb01db
AD
338 }
339
a706a1cc
PE
340 "%%" {
341 static int percent_percent_count;
e9955c83 342 if (++percent_percent_count == 2)
a2bc9dbc 343 BEGIN SC_EPILOGUE;
e9955c83
AD
344 return PERCENT_PERCENT;
345 }
346
b9f1d9a4
AR
347 "[" {
348 bracketed_id_str = NULL;
349 bracketed_id_start = loc->start;
350 bracketed_id_context_state = YY_START;
351 BEGIN SC_BRACKETED_ID;
352 }
353
68ac70bc 354 [^\[%A-Za-z0-9_<>{}\"\'*;|=/, \f\n\t\v]+|. {
c6b17724
AD
355 complain (loc, complaint, "%s: %s",
356 ngettext ("invalid character", "invalid characters", yyleng),
e42906f7 357 quote_mem (yytext, yyleng));
3f2d73f1 358 }
379f0ac8
PE
359
360 <<EOF>> {
361 loc->start = loc->end = scanner_cursor;
362 yyterminate ();
363 }
3f2d73f1
PE
364}
365
366
cb823b6f
AD
367 /*--------------------------------------------------------------.
368 | Supporting \0 complexifies our implementation for no expected |
369 | added value. |
370 `--------------------------------------------------------------*/
371
372<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING,SC_TAG>
373{
bb8e56ff 374 \0 complain (loc, complaint, _("invalid null character"));
cb823b6f
AD
375}
376
377
3f2d73f1
PE
378 /*-----------------------------------------------------------------.
379 | Scanning after an identifier, checking whether a colon is next. |
380 `-----------------------------------------------------------------*/
381
382<SC_AFTER_IDENTIFIER>
383{
b9f1d9a4 384 "[" {
872b52bc 385 if (bracketed_id_str)
b9f1d9a4 386 {
e9690142
JD
387 ROLLBACK_CURRENT_TOKEN;
388 BEGIN SC_RETURN_BRACKETED_ID;
389 *loc = id_loc;
390 return ID;
b9f1d9a4 391 }
872b52bc
AR
392 else
393 {
e9690142
JD
394 bracketed_id_start = loc->start;
395 bracketed_id_context_state = YY_START;
396 BEGIN SC_BRACKETED_ID;
872b52bc 397 }
b9f1d9a4 398 }
3f2d73f1 399 ":" {
b9f1d9a4 400 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
3f2d73f1 401 *loc = id_loc;
3f2d73f1
PE
402 return ID_COLON;
403 }
404 . {
b9f1d9a4
AR
405 ROLLBACK_CURRENT_TOKEN;
406 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
3f2d73f1 407 *loc = id_loc;
3f2d73f1
PE
408 return ID;
409 }
410 <<EOF>> {
b9f1d9a4 411 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
3f2d73f1 412 *loc = id_loc;
3f2d73f1 413 return ID;
e9955c83
AD
414 }
415}
416
b9f1d9a4
AR
417 /*--------------------------------.
418 | Scanning bracketed identifiers. |
419 `--------------------------------*/
420
421<SC_BRACKETED_ID>
422{
423 {id} {
872b52bc 424 if (bracketed_id_str)
b9f1d9a4 425 {
bb8e56ff
TR
426 complain (loc, complaint,
427 _("unexpected identifier in bracketed name: %s"),
428 quote (yytext));
b9f1d9a4
AR
429 }
430 else
431 {
e9690142
JD
432 bracketed_id_str = uniqstr_new (yytext);
433 bracketed_id_loc = *loc;
b9f1d9a4
AR
434 }
435 }
436 "]" {
437 BEGIN bracketed_id_context_state;
438 if (bracketed_id_str)
439 {
e9690142
JD
440 if (INITIAL == bracketed_id_context_state)
441 {
442 val->uniqstr = bracketed_id_str;
443 bracketed_id_str = 0;
444 *loc = bracketed_id_loc;
445 return BRACKETED_ID;
446 }
b9f1d9a4
AR
447 }
448 else
bb8e56ff 449 complain (loc, complaint, _("an identifier expected"));
b9f1d9a4 450 }
68ac70bc
AD
451
452 [^\].A-Za-z0-9_/ \f\n\t\v]+|. {
c6b17724
AD
453 complain (loc, complaint, "%s: %s",
454 ngettext ("invalid character in bracketed name",
455 "invalid characters in bracketed name", yyleng),
e42906f7 456 quote_mem (yytext, yyleng));
b9f1d9a4 457 }
68ac70bc 458
b9f1d9a4
AR
459 <<EOF>> {
460 BEGIN bracketed_id_context_state;
461 unexpected_eof (bracketed_id_start, "]");
462 }
463}
464
465<SC_RETURN_BRACKETED_ID>
466{
467 . {
468 ROLLBACK_CURRENT_TOKEN;
469 val->uniqstr = bracketed_id_str;
470 bracketed_id_str = 0;
471 *loc = bracketed_id_loc;
472 BEGIN INITIAL;
473 return BRACKETED_ID;
474 }
475}
476
e9955c83 477
d8d3f94a 478 /*---------------------------------------------------------------.
ae93e4e4 479 | Scanning a Yacc comment. The initial '/ *' is already eaten. |
d8d3f94a 480 `---------------------------------------------------------------*/
e9955c83 481
d8d3f94a 482<SC_YACC_COMMENT>
e9955c83 483{
3f2d73f1 484 "*/" BEGIN context_state;
c6b17724 485 .|\n continue;
aa418041 486 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
d8d3f94a
PE
487}
488
489
490 /*------------------------------------------------------------.
ae93e4e4 491 | Scanning a C comment. The initial '/ *' is already eaten. |
d8d3f94a
PE
492 `------------------------------------------------------------*/
493
494<SC_COMMENT>
495{
3f2d73f1 496 "*"{splice}"/" STRING_GROW; BEGIN context_state;
e9690142 497 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
e9955c83
AD
498}
499
500
d8d3f94a 501 /*--------------------------------------------------------------.
ae93e4e4 502 | Scanning a line comment. The initial '//' is already eaten. |
d8d3f94a
PE
503 `--------------------------------------------------------------*/
504
505<SC_LINE_COMMENT>
506{
e9690142
JD
507 "\n" STRING_GROW; BEGIN context_state;
508 {splice} STRING_GROW;
509 <<EOF>> BEGIN context_state;
d8d3f94a
PE
510}
511
512
4febdd96
PE
513 /*------------------------------------------------.
514 | Scanning a Bison string, including its escapes. |
515 | The initial quote is already eaten. |
516 `------------------------------------------------*/
e9955c83
AD
517
518<SC_ESCAPED_STRING>
519{
47aee066
JD
520 "\""|"\n" {
521 if (yytext[0] == '\n')
522 unexpected_newline (token_start, "\"");
523 STRING_FINISH;
524 loc->start = token_start;
525 val->chars = last_string;
526 BEGIN INITIAL;
527 return STRING;
528 }
529 <<EOF>> {
530 unexpected_eof (token_start, "\"");
41141c56 531 STRING_FINISH;
3f2d73f1 532 loc->start = token_start;
223ff46e 533 val->chars = last_string;
a706a1cc 534 BEGIN INITIAL;
e9955c83
AD
535 return STRING;
536 }
e9955c83
AD
537}
538
4febdd96
PE
539 /*----------------------------------------------------------.
540 | Scanning a Bison character literal, decoding its escapes. |
e9690142 541 | The initial quote is already eaten. |
4febdd96 542 `----------------------------------------------------------*/
e9955c83
AD
543
544<SC_ESCAPED_CHARACTER>
545{
47aee066 546 "'"|"\n" {
41141c56 547 STRING_FINISH;
3f2d73f1 548 loc->start = token_start;
dfaa4860 549 val->character = last_string[0];
3208e3f4
JD
550 {
551 /* FIXME: Eventually, make these errors. */
dfaa4860
JD
552 if (last_string[0] == '\0')
553 {
bb8e56ff 554 complain (loc, Wother, _("empty character literal"));
dfaa4860
JD
555 /* '\0' seems dangerous even if we are about to complain. */
556 val->character = '\'';
557 }
558 else if (last_string[1] != '\0')
bb8e56ff 559 complain (loc, Wother,
6fb8b256 560 _("extra characters in character literal"));
3208e3f4
JD
561 }
562 if (yytext[0] == '\n')
563 unexpected_newline (token_start, "'");
41141c56 564 STRING_FREE;
a706a1cc 565 BEGIN INITIAL;
58d7a1a1 566 return CHAR;
e9955c83 567 }
47aee066 568 <<EOF>> {
47aee066
JD
569 STRING_FINISH;
570 loc->start = token_start;
dfaa4860 571 val->character = last_string[0];
3208e3f4 572 {
3208e3f4 573 /* FIXME: Eventually, make these errors. */
dfaa4860
JD
574 if (last_string[0] == '\0')
575 {
bb8e56ff 576 complain (loc, Wother, _("empty character literal"));
dfaa4860
JD
577 /* '\0' seems dangerous even if we are about to complain. */
578 val->character = '\'';
579 }
580 else if (last_string[1] != '\0')
bb8e56ff 581 complain (loc, Wother,
6fb8b256 582 _("extra characters in character literal"));
3208e3f4
JD
583 }
584 unexpected_eof (token_start, "'");
47aee066
JD
585 STRING_FREE;
586 BEGIN INITIAL;
587 return CHAR;
588 }
4febdd96 589}
a706a1cc 590
cb823b6f
AD
591 /*-----------------------------------------------------------.
592 | Scanning a Bison nested tag. The initial angle bracket is |
593 | already eaten. |
594 `-----------------------------------------------------------*/
595
596<SC_TAG>
4febdd96 597{
cb823b6f
AD
598 ">" {
599 --nesting;
600 if (nesting < 0)
601 {
602 STRING_FINISH;
603 loc->start = token_start;
604 val->uniqstr = uniqstr_new (last_string);
605 STRING_FREE;
606 BEGIN INITIAL;
607 return TAG;
608 }
609 STRING_GROW;
610 }
611
612 [^<>]+ STRING_GROW;
613 "<"+ STRING_GROW; nesting += yyleng;
e9955c83 614
cb823b6f
AD
615 <<EOF>> {
616 unexpected_eof (token_start, ">");
617 STRING_FINISH;
618 loc->start = token_start;
619 val->uniqstr = uniqstr_new (last_string);
620 STRING_FREE;
621 BEGIN INITIAL;
622 return TAG;
623 }
624}
e9955c83
AD
625
626 /*----------------------------.
627 | Decode escaped characters. |
628 `----------------------------*/
629
630<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
631{
d8d3f94a 632 \\[0-7]{1,3} {
4517da37 633 unsigned long int c = strtoul (yytext + 1, NULL, 8);
c2724603 634 if (!c || UCHAR_MAX < c)
bb8e56ff 635 complain (loc, complaint, _("invalid number after \\-escape: %s"),
c2724603 636 yytext+1);
e9955c83 637 else
223ff46e 638 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
639 }
640
6b0d38ab 641 \\x[0-9abcdefABCDEF]+ {
4517da37
PE
642 verify (UCHAR_MAX < ULONG_MAX);
643 unsigned long int c = strtoul (yytext + 2, NULL, 16);
c2724603 644 if (!c || UCHAR_MAX < c)
bb8e56ff 645 complain (loc, complaint, _("invalid number after \\-escape: %s"),
c2724603 646 yytext+1);
d8d3f94a 647 else
223ff46e 648 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
649 }
650
e9690142
JD
651 \\a obstack_1grow (&obstack_for_string, '\a');
652 \\b obstack_1grow (&obstack_for_string, '\b');
653 \\f obstack_1grow (&obstack_for_string, '\f');
654 \\n obstack_1grow (&obstack_for_string, '\n');
655 \\r obstack_1grow (&obstack_for_string, '\r');
656 \\t obstack_1grow (&obstack_for_string, '\t');
657 \\v obstack_1grow (&obstack_for_string, '\v');
412f8a59
PE
658
659 /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
223ff46e 660 \\("\""|"'"|"?"|"\\") obstack_1grow (&obstack_for_string, yytext[1]);
412f8a59 661
6b0d38ab 662 \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
d8d3f94a 663 int c = convert_ucn_to_byte (yytext);
c2724603 664 if (c <= 0)
bb8e56ff 665 complain (loc, complaint, _("invalid number after \\-escape: %s"),
c2724603 666 yytext+1);
d8d3f94a 667 else
223ff46e 668 obstack_1grow (&obstack_for_string, c);
d8d3f94a 669 }
e9690142 670 \\(.|\n) {
c2724603 671 char const *p = yytext + 1;
e6c849d8 672 /* Quote only if escaping won't make the character visible. */
457bf919 673 if (c_isspace ((unsigned char) *p) && c_isprint ((unsigned char) *p))
e6c849d8 674 p = quote (p);
c2724603
JD
675 else
676 p = quotearg_style_mem (escape_quoting_style, p, 1);
bb8e56ff 677 complain (loc, complaint, _("invalid character after \\-escape: %s"),
6fb8b256 678 p);
e9955c83
AD
679 }
680}
681
4febdd96
PE
682 /*--------------------------------------------.
683 | Scanning user-code characters and strings. |
684 `--------------------------------------------*/
e9955c83 685
4febdd96
PE
686<SC_CHARACTER,SC_STRING>
687{
e9690142 688 {splice}|\\{splice}[^\n\[\]] STRING_GROW;
4febdd96 689}
e9955c83
AD
690
691<SC_CHARACTER>
692{
e9690142
JD
693 "'" STRING_GROW; BEGIN context_state;
694 \n unexpected_newline (token_start, "'"); BEGIN context_state;
695 <<EOF>> unexpected_eof (token_start, "'"); BEGIN context_state;
e9955c83
AD
696}
697
e9955c83
AD
698<SC_STRING>
699{
e9690142
JD
700 "\"" STRING_GROW; BEGIN context_state;
701 \n unexpected_newline (token_start, "\""); BEGIN context_state;
702 <<EOF>> unexpected_eof (token_start, "\""); BEGIN context_state;
e9955c83
AD
703}
704
705
706 /*---------------------------------------------------.
707 | Strings, comments etc. can be found in user code. |
708 `---------------------------------------------------*/
709
ca2a6d15 710<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE,SC_PREDICATE>
e9955c83 711{
3f2d73f1
PE
712 "'" {
713 STRING_GROW;
714 context_state = YY_START;
715 token_start = loc->start;
716 BEGIN SC_CHARACTER;
717 }
718 "\"" {
719 STRING_GROW;
720 context_state = YY_START;
721 token_start = loc->start;
722 BEGIN SC_STRING;
723 }
724 "/"{splice}"*" {
725 STRING_GROW;
726 context_state = YY_START;
727 token_start = loc->start;
728 BEGIN SC_COMMENT;
729 }
730 "/"{splice}"/" {
731 STRING_GROW;
732 context_state = YY_START;
733 BEGIN SC_LINE_COMMENT;
734 }
e9955c83
AD
735}
736
737
624a35e2 738
58d7a1a1 739 /*-----------------------------------------------------------.
ca2a6d15
PH
740 | Scanning some code in braces (actions, predicates). The |
741 | initial "{" is already eaten. |
58d7a1a1 742 `-----------------------------------------------------------*/
e9955c83 743
ca2a6d15 744<SC_BRACED_CODE,SC_PREDICATE>
e9955c83 745{
cb823b6f
AD
746 "{"|"<"{splice}"%" STRING_GROW; nesting++;
747 "%"{splice}">" STRING_GROW; nesting--;
ca2a6d15 748
ae93e4e4
JM
749 /* Tokenize '<<%' correctly (as '<<' '%') rather than incorrrectly
750 (as '<' '<%'). */
ca2a6d15
PH
751 "<"{splice}"<" STRING_GROW;
752
753 <<EOF>> {
754 int token = (YY_START == SC_BRACED_CODE) ? BRACED_CODE : BRACED_PREDICATE;
755 unexpected_eof (code_start, "}");
756 STRING_FINISH;
757 loc->start = code_start;
758 val->code = last_string;
759 BEGIN INITIAL;
760 return token;
761 }
762}
763
764<SC_BRACED_CODE>
765{
e9955c83 766 "}" {
25522739
PE
767 obstack_1grow (&obstack_for_string, '}');
768
cb823b6f
AD
769 --nesting;
770 if (nesting < 0)
e9955c83 771 {
e9690142
JD
772 STRING_FINISH;
773 loc->start = code_start;
774 val->code = last_string;
775 BEGIN INITIAL;
776 return BRACED_CODE;
e9955c83
AD
777 }
778 }
ca2a6d15 779}
e9955c83 780
ca2a6d15
PH
781<SC_PREDICATE>
782{
783 "}" {
784 --nesting;
785 if (nesting < 0)
786 {
e9690142
JD
787 STRING_FINISH;
788 loc->start = code_start;
789 val->code = last_string;
790 BEGIN INITIAL;
791 return BRACED_PREDICATE;
ca2a6d15
PH
792 }
793 else
794 obstack_1grow (&obstack_for_string, '}');
47aee066 795 }
e9955c83
AD
796}
797
e9955c83
AD
798 /*--------------------------------------------------------------.
799 | Scanning some prologue: from "%{" (already scanned) to "%}". |
800 `--------------------------------------------------------------*/
801
802<SC_PROLOGUE>
803{
804 "%}" {
41141c56 805 STRING_FINISH;
3f2d73f1 806 loc->start = code_start;
223ff46e 807 val->chars = last_string;
a706a1cc 808 BEGIN INITIAL;
e9955c83
AD
809 return PROLOGUE;
810 }
811
47aee066
JD
812 <<EOF>> {
813 unexpected_eof (code_start, "%}");
814 STRING_FINISH;
815 loc->start = code_start;
816 val->chars = last_string;
817 BEGIN INITIAL;
818 return PROLOGUE;
819 }
e9955c83
AD
820}
821
822
823 /*---------------------------------------------------------------.
824 | Scanning the epilogue (everything after the second "%%", which |
d8d3f94a 825 | has already been eaten). |
e9955c83
AD
826 `---------------------------------------------------------------*/
827
828<SC_EPILOGUE>
829{
e9955c83 830 <<EOF>> {
41141c56 831 STRING_FINISH;
3f2d73f1 832 loc->start = code_start;
223ff46e 833 val->chars = last_string;
a706a1cc 834 BEGIN INITIAL;
e9955c83
AD
835 return EPILOGUE;
836 }
837}
838
839
4febdd96
PE
840 /*-----------------------------------------------------.
841 | By default, grow the string obstack with the input. |
842 `-----------------------------------------------------*/
843
e9690142
JD
844<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PREDICATE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>. |
845 <SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PREDICATE,SC_PROLOGUE,SC_EPILOGUE>\n STRING_GROW;
4febdd96 846
e9955c83
AD
847%%
848
6c30d641
PE
849/* Read bytes from FP into buffer BUF of size SIZE. Return the
850 number of bytes read. Remove '\r' from input, treating \r\n
851 and isolated \r as \n. */
852
853static size_t
854no_cr_read (FILE *fp, char *buf, size_t size)
855{
a737b216
PE
856 size_t bytes_read = fread (buf, 1, size, fp);
857 if (bytes_read)
6c30d641 858 {
a737b216 859 char *w = memchr (buf, '\r', bytes_read);
6c30d641 860 if (w)
e9690142
JD
861 {
862 char const *r = ++w;
863 char const *lim = buf + bytes_read;
864
865 for (;;)
866 {
867 /* Found an '\r'. Treat it like '\n', but ignore any
868 '\n' that immediately follows. */
869 w[-1] = '\n';
870 if (r == lim)
871 {
872 int ch = getc (fp);
873 if (ch != '\n' && ungetc (ch, fp) != ch)
874 break;
875 }
876 else if (*r == '\n')
877 r++;
878
879 /* Copy until the next '\r'. */
880 do
881 {
882 if (r == lim)
883 return w - buf;
884 }
885 while ((*w++ = *r++) != '\r');
886 }
887
888 return w - buf;
889 }
6c30d641
PE
890 }
891
a737b216 892 return bytes_read;
6c30d641
PE
893}
894
895
f25bfb75 896
1452af69
PE
897/*------------------------------------------------------.
898| Scan NUMBER for a base-BASE integer at location LOC. |
899`------------------------------------------------------*/
900
901static unsigned long int
902scan_integer (char const *number, int base, location loc)
903{
4517da37
PE
904 verify (INT_MAX < ULONG_MAX);
905 unsigned long int num = strtoul (number, NULL, base);
906
907 if (INT_MAX < num)
1452af69 908 {
bb8e56ff 909 complain (&loc, complaint, _("integer out of range: %s"),
6fb8b256 910 quote (number));
1452af69
PE
911 num = INT_MAX;
912 }
4517da37 913
1452af69
PE
914 return num;
915}
916
917
d8d3f94a
PE
918/*------------------------------------------------------------------.
919| Convert universal character name UCN to a single-byte character, |
920| and return that character. Return -1 if UCN does not correspond |
e9690142 921| to a single-byte character. |
d8d3f94a
PE
922`------------------------------------------------------------------*/
923
924static int
925convert_ucn_to_byte (char const *ucn)
926{
4517da37
PE
927 verify (UCHAR_MAX <= INT_MAX);
928 unsigned long int code = strtoul (ucn + 2, NULL, 16);
d8d3f94a
PE
929
930 /* FIXME: Currently we assume Unicode-compatible unibyte characters
931 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
932 non-ASCII hosts we support only the portable C character set.
933 These limitations should be removed once we add support for
934 multibyte characters. */
935
936 if (UCHAR_MAX < code)
937 return -1;
938
939#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
940 {
941 /* A non-ASCII host. Use CODE to index into a table of the C
942 basic execution character set, which is guaranteed to exist on
943 all Standard C platforms. This table also includes '$', '@',
8e6ef483 944 and '`', which are not in the basic execution character set but
d8d3f94a
PE
945 which are unibyte characters on all the platforms that we know
946 about. */
947 static signed char const table[] =
948 {
e9690142
JD
949 '\0', -1, -1, -1, -1, -1, -1, '\a',
950 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
951 -1, -1, -1, -1, -1, -1, -1, -1,
952 -1, -1, -1, -1, -1, -1, -1, -1,
953 ' ', '!', '"', '#', '$', '%', '&', '\'',
954 '(', ')', '*', '+', ',', '-', '.', '/',
955 '0', '1', '2', '3', '4', '5', '6', '7',
956 '8', '9', ':', ';', '<', '=', '>', '?',
957 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
958 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
959 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
960 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
961 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
962 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
963 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
964 'x', 'y', 'z', '{', '|', '}', '~'
d8d3f94a
PE
965 };
966
967 code = code < sizeof table ? table[code] : -1;
968 }
969#endif
c4d720cd 970
d8d3f94a
PE
971 return code;
972}
973
974
03dbf629
AD
975/*---------------------------------------------------------------------.
976| Handle '#line INT( "FILE")?\n'. ARGS has already skipped '#line '. |
977`---------------------------------------------------------------------*/
900c5db5
AD
978
979static void
4517da37 980handle_syncline (char *args, location loc)
900c5db5 981{
03dbf629
AD
982 char *file;
983 unsigned long int lineno = strtoul (args, &file, 10);
4517da37
PE
984 if (INT_MAX <= lineno)
985 {
bb8e56ff 986 complain (&loc, Wother, _("line number overflow"));
4517da37
PE
987 lineno = INT_MAX;
988 }
03dbf629 989
064e42b0 990 file = strchr (file, '"');
03dbf629
AD
991 if (file)
992 {
064e42b0 993 *strchr (file + 1, '"') = '\0';
03dbf629
AD
994 current_file = uniqstr_new (file + 1);
995 }
0c8e079f 996 boundary_set (&scanner_cursor, current_file, lineno, 1);
4517da37
PE
997}
998
999
4febdd96
PE
1000/*----------------------------------------------------------------.
1001| For a token or comment starting at START, report message MSGID, |
e9690142
JD
1002| which should say that an end marker was found before |
1003| the expected TOKEN_END. |
4febdd96
PE
1004`----------------------------------------------------------------*/
1005
1006static void
1007unexpected_end (boundary start, char const *msgid, char const *token_end)
1008{
1009 location loc;
1010 loc.start = start;
1011 loc.end = scanner_cursor;
4a9cd8f2 1012 token_end = quote (token_end);
1127a75a 1013 /* Instead of '\'', display "'". */
f518dbaf 1014 if (STREQ (token_end, "'\\''"))
4a9cd8f2 1015 token_end = "\"'\"";
bb8e56ff 1016 complain (&loc, complaint, _(msgid), token_end);
4febdd96
PE
1017}
1018
1019
3f2d73f1
PE
1020/*------------------------------------------------------------------------.
1021| Report an unexpected EOF in a token or comment starting at START. |
1022| An end of file was encountered and the expected TOKEN_END was missing. |
3f2d73f1 1023`------------------------------------------------------------------------*/
a706a1cc
PE
1024
1025static void
aa418041 1026unexpected_eof (boundary start, char const *token_end)
a706a1cc 1027{
4a9cd8f2 1028 unexpected_end (start, N_("missing %s at end of file"), token_end);
4febdd96
PE
1029}
1030
1031
1032/*----------------------------------------.
1033| Likewise, but for unexpected newlines. |
1034`----------------------------------------*/
1035
1036static void
1037unexpected_newline (boundary start, char const *token_end)
1038{
4a9cd8f2 1039 unexpected_end (start, N_("missing %s at end of line"), token_end);
a706a1cc
PE
1040}
1041
1042
f25bfb75
AD
1043/*-------------------------.
1044| Initialize the scanner. |
1045`-------------------------*/
1046
1d6412ad 1047void
e9071366 1048gram_scanner_initialize (void)
1d6412ad 1049{
223ff46e 1050 obstack_init (&obstack_for_string);
1d6412ad
AD
1051}
1052
1053
f25bfb75
AD
1054/*-----------------------------------------------.
1055| Free all the memory allocated to the scanner. |
1056`-----------------------------------------------*/
1057
4cdb01db 1058void
e9071366 1059gram_scanner_free (void)
4cdb01db 1060{
223ff46e 1061 obstack_free (&obstack_for_string, 0);
536545f3 1062 /* Reclaim Flex's buffers. */
580b8926 1063 yylex_destroy ();
4cdb01db 1064}