]> git.saurik.com Git - bison.git/blame - src/scan-gram.l
c: no longer require stdio.h when locations are enabled
[bison.git] / src / scan-gram.l
CommitLineData
e9955c83 1/* Bison Grammar Scanner -*- C -*-
3b1e470c 2
7d6bad19 3 Copyright (C) 2002-2013 Free Software Foundation, Inc.
e9955c83
AD
4
5 This file is part of Bison, the GNU Compiler Compiler.
6
f16b0819 7 This program is free software: you can redistribute it and/or modify
e9955c83 8 it under the terms of the GNU General Public License as published by
f16b0819 9 the Free Software Foundation, either version 3 of the License, or
e9955c83
AD
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
f16b0819 18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
e9955c83 19
2062d72d 20%option debug nodefault noinput noyywrap never-interactive
e9955c83
AD
21%option prefix="gram_" outfile="lex.yy.c"
22
23%{
4f6e011e
PE
24/* Work around a bug in flex 2.5.31. See Debian bug 333231
25 <http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=333231>. */
26#undef gram_wrap
27#define gram_wrap() 1
28
e9071366 29#define FLEX_PREFIX(Id) gram_ ## Id
0305d25e 30#include <src/flex-scanner.h>
223ff46e 31
0305d25e
AD
32#include <src/complain.h>
33#include <src/files.h>
2062d72d 34#include <src/getargs.h>
0305d25e
AD
35#include <src/gram.h>
36#include <quotearg.h>
37#include <src/reader.h>
38#include <src/uniqstr.h>
e9955c83 39
457bf919 40#include <c-ctype.h>
e9071366
AD
41#include <mbswidth.h>
42#include <quote.h>
43
0305d25e 44#include <src/scan-gram.h>
e9071366
AD
45
46#define YY_DECL GRAM_LEX_DECL
2346344a 47
e9690142
JD
48#define YY_USER_INIT \
49 code_start = scanner_cursor = loc->start; \
dc9701e8 50
3f2d73f1 51/* Location of scanner cursor. */
4a678af8 52static boundary scanner_cursor;
41141c56 53
e9071366 54#define YY_USER_ACTION location_compute (loc, &scanner_cursor, yytext, yyleng);
d8d3f94a 55
6c30d641 56static size_t no_cr_read (FILE *, char *, size_t);
d8d3f94a
PE
57#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
58
a7706735
AD
59#define RETURN_PERCENT_PARAM(Value) \
60 RETURN_VALUE(PERCENT_PARAM, param, param_ ## Value)
61
62#define RETURN_PERCENT_FLAG(Value) \
63 RETURN_VALUE(PERCENT_FLAG, uniqstr, uniqstr_new (Value))
64
65#define RETURN_VALUE(Token, Field, Value) \
ba061fa6 66 do { \
a7706735
AD
67 val->Field = Value; \
68 return Token; \
ba061fa6
AD
69 } while (0)
70
b9f1d9a4
AR
71#define ROLLBACK_CURRENT_TOKEN \
72 do { \
e9690142 73 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0); \
b9f1d9a4
AR
74 yyless (0); \
75 } while (0)
ba061fa6 76
2062d72d
TR
77#define DEPRECATED(Msg) \
78 do { \
79 size_t i; \
1dc927a7 80 deprecated_directive (loc, yytext, Msg); \
2062d72d
TR
81 scanner_cursor.column -= mbsnwidth (Msg, strlen (Msg), 0); \
82 for (i = strlen (Msg); i != 0; --i) \
83 unput (Msg[i - 1]); \
84 } while (0)
85
7ec2d4cd 86/* A string representing the most recently saved token. */
7c0c6181 87static char *last_string;
7ec2d4cd 88
872b52bc 89/* Bracketed identifier. */
b9f1d9a4
AR
90static uniqstr bracketed_id_str = 0;
91static location bracketed_id_loc;
92static boundary bracketed_id_start;
93static int bracketed_id_context_state = 0;
94
7ec2d4cd 95void
e9071366 96gram_scanner_last_string_free (void)
7ec2d4cd 97{
41141c56 98 STRING_FREE;
7ec2d4cd 99}
e9955c83 100
4517da37 101static void handle_syncline (char *, location);
1452af69 102static unsigned long int scan_integer (char const *p, int base, location loc);
d8d3f94a 103static int convert_ucn_to_byte (char const *hex_text);
aa418041 104static void unexpected_eof (boundary, char const *);
4febdd96 105static void unexpected_newline (boundary, char const *);
e9955c83
AD
106
107%}
e9071366
AD
108 /* A C-like comment in directives/rules. */
109%x SC_YACC_COMMENT
110 /* Strings and characters in directives/rules. */
e9955c83 111%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
e9071366 112 /* A identifier was just read in directives/rules. Special state
ae93e4e4 113 to capture the sequence 'identifier :'. */
e9071366 114%x SC_AFTER_IDENTIFIER
cb823b6f
AD
115 /* A complex tag, with nested angles brackets. */
116%x SC_TAG
e9071366 117
ca2a6d15 118 /* Four types of user code:
ae93e4e4 119 - prologue (code between '%{' '%}' in the first section, before %%);
e9071366 120 - actions, printers, union, etc, (between braced in the middle section);
da5462d4 121 - epilogue (everything after the second %%).
ae93e4e4 122 - predicate (code between '%?{' and '{' in middle section); */
ca2a6d15 123%x SC_PROLOGUE SC_BRACED_CODE SC_EPILOGUE SC_PREDICATE
e9071366
AD
124 /* C and C++ comments in code. */
125%x SC_COMMENT SC_LINE_COMMENT
126 /* Strings and characters in code. */
127%x SC_STRING SC_CHARACTER
872b52bc 128 /* Bracketed identifiers support. */
b9f1d9a4 129%x SC_BRACKETED_ID SC_RETURN_BRACKETED_ID
e9955c83 130
e9690142 131letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
68ac70bc 132notletter [^.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]{-}[%\{]
e9690142 133id {letter}({letter}|[-0-9])*
e9690142 134int [0-9]+
d8d3f94a
PE
135
136/* POSIX says that a tag must be both an id and a C union member, but
137 historically almost any character is allowed in a tag. We disallow
cb823b6f
AD
138 NUL, as this simplifies our implementation. We disallow angle
139 bracket to match them in nested pairs: several languages use them
140 for generics/template types. */
e9690142 141tag [^\0<>]+
d8d3f94a
PE
142
143/* Zero or more instances of backslash-newline. Following GCC, allow
144 white space between the backslash and the newline. */
e9690142 145splice (\\[ \f\t\v]*\n)*
e9955c83 146
2062d72d
TR
147/* An equal sign, with optional leading whitespaces. This is used in some
148 deprecated constructs. */
149eqopt ([[:space:]]*=)?
150
e9955c83
AD
151%%
152%{
cb823b6f
AD
153 /* Nesting level. Either for nested braces, or nested angle brackets
154 (but not mixed). */
84f6a6ca 155 int nesting PACIFY_CC (= 0);
1a9e39f1 156
3f2d73f1 157 /* Parent context state, when applicable. */
84f6a6ca 158 int context_state PACIFY_CC (= 0);
a706a1cc 159
3f2d73f1 160 /* Location of most recent identifier, when applicable. */
84f6a6ca 161 location id_loc PACIFY_CC (= empty_location);
3f2d73f1 162
a2bc9dbc
PE
163 /* Where containing code started, when applicable. Its initial
164 value is relevant only when yylex is invoked in the SC_EPILOGUE
165 start condition. */
166 boundary code_start = scanner_cursor;
3f2d73f1 167
223ff46e
PE
168 /* Where containing comment or string or character literal started,
169 when applicable. */
84f6a6ca 170 boundary token_start PACIFY_CC (= scanner_cursor);
e9955c83
AD
171%}
172
173
3f2d73f1
PE
174 /*-----------------------.
175 | Scanning white space. |
176 `-----------------------*/
177
b9f1d9a4 178<INITIAL,SC_AFTER_IDENTIFIER,SC_BRACKETED_ID,SC_RETURN_BRACKETED_ID>
3f2d73f1 179{
4febdd96 180 /* Comments and white space. */
6fb8b256 181 "," {
bb8e56ff 182 complain (loc, Wother, _("stray ',' treated as white space"));
6fb8b256 183 }
4febdd96 184 [ \f\n\t\v] |
96029914 185 "//".* continue;
83adb046
PE
186 "/*" {
187 token_start = loc->start;
188 context_state = YY_START;
189 BEGIN SC_YACC_COMMENT;
190 }
3f2d73f1
PE
191
192 /* #line directives are not documented, and may be withdrawn or
193 modified in future versions of Bison. */
03dbf629 194 ^"#line "{int}(" \"".*"\"")?"\n" {
4517da37 195 handle_syncline (yytext + sizeof "#line " - 1, *loc);
3f2d73f1
PE
196 }
197}
198
199
e9955c83
AD
200 /*----------------------------.
201 | Scanning Bison directives. |
202 `----------------------------*/
a7c09cba
DJ
203
204 /* For directives that are also command line options, the regex must be
e9690142 205 "%..."
a7c09cba
DJ
206 after "[-_]"s are removed, and the directive must match the --long
207 option name, with a single string argument. Otherwise, add exceptions
208 to ../build-aux/cross-options.pl. */
209
e9955c83
AD
210<INITIAL>
211{
deef2a0a 212 "%binary" return PERCENT_NONASSOC;
136a0f76 213 "%code" return PERCENT_CODE;
fa819509 214 "%debug" RETURN_PERCENT_FLAG("parse.trace");
2062d72d 215 "%default-prec" return PERCENT_DEFAULT_PREC;
deef2a0a
AD
216 "%define" return PERCENT_DEFINE;
217 "%defines" return PERCENT_DEFINES;
218 "%destructor" return PERCENT_DESTRUCTOR;
219 "%dprec" return PERCENT_DPREC;
2062d72d 220 "%error-verbose" return PERCENT_ERROR_VERBOSE;
deef2a0a 221 "%expect" return PERCENT_EXPECT;
2062d72d 222 "%expect-rr" return PERCENT_EXPECT_RR;
deef2a0a 223 "%file-prefix" return PERCENT_FILE_PREFIX;
2062d72d 224 "%fixed-output-files" return PERCENT_YACC;
deef2a0a
AD
225 "%initial-action" return PERCENT_INITIAL_ACTION;
226 "%glr-parser" return PERCENT_GLR_PARSER;
227 "%language" return PERCENT_LANGUAGE;
228 "%left" return PERCENT_LEFT;
a7706735 229 "%lex-param" RETURN_PERCENT_PARAM(lex);
bc0f5737 230 "%locations" RETURN_PERCENT_FLAG("locations");
deef2a0a 231 "%merge" return PERCENT_MERGE;
2062d72d
TR
232 "%name-prefix" return PERCENT_NAME_PREFIX;
233 "%no-default-prec" return PERCENT_NO_DEFAULT_PREC;
234 "%no-lines" return PERCENT_NO_LINES;
deef2a0a
AD
235 "%nonassoc" return PERCENT_NONASSOC;
236 "%nondeterministic-parser" return PERCENT_NONDETERMINISTIC_PARSER;
237 "%nterm" return PERCENT_NTERM;
238 "%output" return PERCENT_OUTPUT;
a7706735
AD
239 "%param" RETURN_PERCENT_PARAM(both);
240 "%parse-param" RETURN_PERCENT_PARAM(parse);
deef2a0a 241 "%prec" return PERCENT_PREC;
d78f0ac9 242 "%precedence" return PERCENT_PRECEDENCE;
deef2a0a 243 "%printer" return PERCENT_PRINTER;
2062d72d 244 "%pure-parser" RETURN_PERCENT_FLAG("api.pure");
deef2a0a
AD
245 "%require" return PERCENT_REQUIRE;
246 "%right" return PERCENT_RIGHT;
247 "%skeleton" return PERCENT_SKELETON;
248 "%start" return PERCENT_START;
249 "%term" return PERCENT_TOKEN;
250 "%token" return PERCENT_TOKEN;
2062d72d 251 "%token-table" return PERCENT_TOKEN_TABLE;
deef2a0a
AD
252 "%type" return PERCENT_TYPE;
253 "%union" return PERCENT_UNION;
254 "%verbose" return PERCENT_VERBOSE;
255 "%yacc" return PERCENT_YACC;
e9955c83 256
2062d72d
TR
257 /* deprecated */
258 "%default"[-_]"prec" DEPRECATED("%default-prec");
259 "%error"[-_]"verbose" DEPRECATED("%define parse.error verbose");
260 "%expect"[-_]"rr" DEPRECATED("%expect-rr");
261 "%file-prefix"{eqopt} DEPRECATED("%file-prefix");
262 "%fixed"[-_]"output"[-_]"files" DEPRECATED("%fixed-output-files");
263 "%name"[-_]"prefix"{eqopt} DEPRECATED("%name-prefix");
264 "%no"[-_]"default"[-_]"prec" DEPRECATED("%no-default-prec");
265 "%no"[-_]"lines" DEPRECATED("%no-lines");
266 "%output"{eqopt} DEPRECATED("%output");
267 "%pure"[-_]"parser" DEPRECATED("%pure-parser");
268 "%token"[-_]"table" DEPRECATED("%token-table");
269
68ac70bc 270 "%"{id}|"%"{notletter}([[:graph:]])+ {
bb8e56ff 271 complain (loc, complaint, _("invalid directive: %s"), quote (yytext));
412f8a59 272 }
900c5db5 273
e9955c83 274 "=" return EQUAL;
e9071366 275 "|" return PIPE;
e9955c83
AD
276 ";" return SEMICOLON;
277
3f2d73f1 278 {id} {
58d7a1a1 279 val->uniqstr = uniqstr_new (yytext);
3f2d73f1 280 id_loc = *loc;
b9f1d9a4 281 bracketed_id_str = NULL;
3f2d73f1 282 BEGIN SC_AFTER_IDENTIFIER;
e9955c83
AD
283 }
284
d8d3f94a 285 {int} {
1452af69
PE
286 val->integer = scan_integer (yytext, 10, *loc);
287 return INT;
288 }
289 0[xX][0-9abcdefABCDEF]+ {
290 val->integer = scan_integer (yytext, 16, *loc);
d8d3f94a
PE
291 return INT;
292 }
e9955c83 293
84a1cb5a
AD
294 /* Identifiers may not start with a digit. Yet, don't silently
295 accept "1FOO" as "1 FOO". */
296 {int}{id} {
bb8e56ff 297 complain (loc, complaint, _("invalid identifier: %s"), quote (yytext));
84a1cb5a
AD
298 }
299
3208e3f4 300 /* Characters. */
e9690142 301 "'" token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
e9955c83
AD
302
303 /* Strings. */
e9690142 304 "\"" token_start = loc->start; BEGIN SC_ESCAPED_STRING;
e9955c83
AD
305
306 /* Prologue. */
3f2d73f1 307 "%{" code_start = loc->start; BEGIN SC_PROLOGUE;
e9955c83
AD
308
309 /* Code in between braces. */
3f2d73f1
PE
310 "{" {
311 STRING_GROW;
cb823b6f 312 nesting = 0;
3f2d73f1
PE
313 code_start = loc->start;
314 BEGIN SC_BRACED_CODE;
315 }
e9955c83 316
ca2a6d15
PH
317 /* Semantic predicate. */
318 "%?"[ \f\n\t\v]*"{" {
319 nesting = 0;
320 code_start = loc->start;
321 BEGIN SC_PREDICATE;
322 }
323
e9955c83 324 /* A type. */
cb823b6f
AD
325 "<*>" return TAG_ANY;
326 "<>" return TAG_NONE;
d8d3f94a 327 "<"{tag}">" {
223ff46e 328 obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
41141c56 329 STRING_FINISH;
223ff46e 330 val->uniqstr = uniqstr_new (last_string);
41141c56 331 STRING_FREE;
cb823b6f
AD
332 return TAG;
333 }
334 "<" {
335 nesting = 0;
336 token_start = loc->start;
337 BEGIN SC_TAG;
4cdb01db
AD
338 }
339
a706a1cc
PE
340 "%%" {
341 static int percent_percent_count;
e9955c83 342 if (++percent_percent_count == 2)
a2bc9dbc 343 BEGIN SC_EPILOGUE;
e9955c83
AD
344 return PERCENT_PERCENT;
345 }
346
b9f1d9a4
AR
347 "[" {
348 bracketed_id_str = NULL;
349 bracketed_id_start = loc->start;
350 bracketed_id_context_state = YY_START;
351 BEGIN SC_BRACKETED_ID;
352 }
353
68ac70bc 354 [^\[%A-Za-z0-9_<>{}\"\'*;|=/, \f\n\t\v]+|. {
c6b17724
AD
355 complain (loc, complaint, "%s: %s",
356 ngettext ("invalid character", "invalid characters", yyleng),
e42906f7 357 quote_mem (yytext, yyleng));
3f2d73f1 358 }
379f0ac8
PE
359
360 <<EOF>> {
361 loc->start = loc->end = scanner_cursor;
362 yyterminate ();
363 }
3f2d73f1
PE
364}
365
366
cb823b6f
AD
367 /*--------------------------------------------------------------.
368 | Supporting \0 complexifies our implementation for no expected |
369 | added value. |
370 `--------------------------------------------------------------*/
371
372<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING,SC_TAG>
373{
bb8e56ff 374 \0 complain (loc, complaint, _("invalid null character"));
cb823b6f
AD
375}
376
377
3f2d73f1
PE
378 /*-----------------------------------------------------------------.
379 | Scanning after an identifier, checking whether a colon is next. |
380 `-----------------------------------------------------------------*/
381
382<SC_AFTER_IDENTIFIER>
383{
b9f1d9a4 384 "[" {
872b52bc 385 if (bracketed_id_str)
b9f1d9a4 386 {
e9690142
JD
387 ROLLBACK_CURRENT_TOKEN;
388 BEGIN SC_RETURN_BRACKETED_ID;
389 *loc = id_loc;
390 return ID;
b9f1d9a4 391 }
872b52bc
AR
392 else
393 {
e9690142
JD
394 bracketed_id_start = loc->start;
395 bracketed_id_context_state = YY_START;
396 BEGIN SC_BRACKETED_ID;
872b52bc 397 }
b9f1d9a4 398 }
3f2d73f1 399 ":" {
b9f1d9a4 400 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
3f2d73f1 401 *loc = id_loc;
3f2d73f1
PE
402 return ID_COLON;
403 }
404 . {
b9f1d9a4
AR
405 ROLLBACK_CURRENT_TOKEN;
406 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
3f2d73f1 407 *loc = id_loc;
3f2d73f1
PE
408 return ID;
409 }
410 <<EOF>> {
b9f1d9a4 411 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
3f2d73f1 412 *loc = id_loc;
3f2d73f1 413 return ID;
e9955c83
AD
414 }
415}
416
b9f1d9a4
AR
417 /*--------------------------------.
418 | Scanning bracketed identifiers. |
419 `--------------------------------*/
420
421<SC_BRACKETED_ID>
422{
423 {id} {
872b52bc 424 if (bracketed_id_str)
b9f1d9a4 425 {
bb8e56ff
TR
426 complain (loc, complaint,
427 _("unexpected identifier in bracketed name: %s"),
428 quote (yytext));
b9f1d9a4
AR
429 }
430 else
431 {
e9690142
JD
432 bracketed_id_str = uniqstr_new (yytext);
433 bracketed_id_loc = *loc;
b9f1d9a4
AR
434 }
435 }
436 "]" {
437 BEGIN bracketed_id_context_state;
438 if (bracketed_id_str)
439 {
e9690142
JD
440 if (INITIAL == bracketed_id_context_state)
441 {
442 val->uniqstr = bracketed_id_str;
443 bracketed_id_str = 0;
444 *loc = bracketed_id_loc;
445 return BRACKETED_ID;
446 }
b9f1d9a4
AR
447 }
448 else
bb8e56ff 449 complain (loc, complaint, _("an identifier expected"));
b9f1d9a4 450 }
68ac70bc
AD
451
452 [^\].A-Za-z0-9_/ \f\n\t\v]+|. {
c6b17724
AD
453 complain (loc, complaint, "%s: %s",
454 ngettext ("invalid character in bracketed name",
455 "invalid characters in bracketed name", yyleng),
e42906f7 456 quote_mem (yytext, yyleng));
b9f1d9a4 457 }
68ac70bc 458
b9f1d9a4
AR
459 <<EOF>> {
460 BEGIN bracketed_id_context_state;
461 unexpected_eof (bracketed_id_start, "]");
462 }
463}
464
465<SC_RETURN_BRACKETED_ID>
466{
467 . {
468 ROLLBACK_CURRENT_TOKEN;
469 val->uniqstr = bracketed_id_str;
470 bracketed_id_str = 0;
471 *loc = bracketed_id_loc;
472 BEGIN INITIAL;
473 return BRACKETED_ID;
474 }
475}
476
e9955c83 477
d8d3f94a 478 /*---------------------------------------------------------------.
ae93e4e4 479 | Scanning a Yacc comment. The initial '/ *' is already eaten. |
d8d3f94a 480 `---------------------------------------------------------------*/
e9955c83 481
d8d3f94a 482<SC_YACC_COMMENT>
e9955c83 483{
3f2d73f1 484 "*/" BEGIN context_state;
c6b17724 485 .|\n continue;
aa418041 486 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
d8d3f94a
PE
487}
488
489
490 /*------------------------------------------------------------.
ae93e4e4 491 | Scanning a C comment. The initial '/ *' is already eaten. |
d8d3f94a
PE
492 `------------------------------------------------------------*/
493
494<SC_COMMENT>
495{
3f2d73f1 496 "*"{splice}"/" STRING_GROW; BEGIN context_state;
e9690142 497 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
e9955c83
AD
498}
499
500
d8d3f94a 501 /*--------------------------------------------------------------.
ae93e4e4 502 | Scanning a line comment. The initial '//' is already eaten. |
d8d3f94a
PE
503 `--------------------------------------------------------------*/
504
505<SC_LINE_COMMENT>
506{
e9690142
JD
507 "\n" STRING_GROW; BEGIN context_state;
508 {splice} STRING_GROW;
509 <<EOF>> BEGIN context_state;
d8d3f94a
PE
510}
511
512
4febdd96
PE
513 /*------------------------------------------------.
514 | Scanning a Bison string, including its escapes. |
515 | The initial quote is already eaten. |
516 `------------------------------------------------*/
e9955c83
AD
517
518<SC_ESCAPED_STRING>
519{
c1b2677a 520 "\"" {
41141c56 521 STRING_FINISH;
3f2d73f1 522 loc->start = token_start;
223ff46e 523 val->chars = last_string;
a706a1cc 524 BEGIN INITIAL;
e9955c83
AD
525 return STRING;
526 }
c1b2677a
TR
527 <<EOF>> unexpected_eof (token_start, "\"");
528 "\n" unexpected_newline (token_start, "\"");
e9955c83
AD
529}
530
4febdd96
PE
531 /*----------------------------------------------------------.
532 | Scanning a Bison character literal, decoding its escapes. |
e9690142 533 | The initial quote is already eaten. |
4febdd96 534 `----------------------------------------------------------*/
e9955c83
AD
535
536<SC_ESCAPED_CHARACTER>
537{
c1b2677a 538 "'" {
47aee066
JD
539 STRING_FINISH;
540 loc->start = token_start;
dfaa4860 541 val->character = last_string[0];
c1b2677a
TR
542
543 /* FIXME: Eventually, make these errors. */
544 if (last_string[0] == '\0')
3208e3f4 545 {
c1b2677a
TR
546 complain (loc, Wother, _("empty character literal"));
547 /* '\0' seems dangerous even if we are about to complain. */
548 val->character = '\'';
3208e3f4 549 }
c1b2677a
TR
550 else if (last_string[1] != '\0')
551 complain (loc, Wother,
552 _("extra characters in character literal"));
47aee066
JD
553 STRING_FREE;
554 BEGIN INITIAL;
555 return CHAR;
556 }
c1b2677a
TR
557 "\n" unexpected_newline (token_start, "'");
558 <<EOF>> unexpected_eof (token_start, "'");
4febdd96 559}
a706a1cc 560
cb823b6f
AD
561 /*-----------------------------------------------------------.
562 | Scanning a Bison nested tag. The initial angle bracket is |
563 | already eaten. |
564 `-----------------------------------------------------------*/
565
566<SC_TAG>
4febdd96 567{
cb823b6f
AD
568 ">" {
569 --nesting;
570 if (nesting < 0)
571 {
572 STRING_FINISH;
573 loc->start = token_start;
574 val->uniqstr = uniqstr_new (last_string);
575 STRING_FREE;
576 BEGIN INITIAL;
577 return TAG;
578 }
579 STRING_GROW;
580 }
581
582 [^<>]+ STRING_GROW;
583 "<"+ STRING_GROW; nesting += yyleng;
e9955c83 584
c1b2677a 585 <<EOF>> unexpected_eof (token_start, ">");
cb823b6f 586}
e9955c83
AD
587
588 /*----------------------------.
589 | Decode escaped characters. |
590 `----------------------------*/
591
592<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
593{
d8d3f94a 594 \\[0-7]{1,3} {
4517da37 595 unsigned long int c = strtoul (yytext + 1, NULL, 8);
c2724603 596 if (!c || UCHAR_MAX < c)
bb8e56ff 597 complain (loc, complaint, _("invalid number after \\-escape: %s"),
c2724603 598 yytext+1);
e9955c83 599 else
223ff46e 600 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
601 }
602
6b0d38ab 603 \\x[0-9abcdefABCDEF]+ {
4517da37
PE
604 verify (UCHAR_MAX < ULONG_MAX);
605 unsigned long int c = strtoul (yytext + 2, NULL, 16);
c2724603 606 if (!c || UCHAR_MAX < c)
bb8e56ff 607 complain (loc, complaint, _("invalid number after \\-escape: %s"),
c2724603 608 yytext+1);
d8d3f94a 609 else
223ff46e 610 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
611 }
612
e9690142
JD
613 \\a obstack_1grow (&obstack_for_string, '\a');
614 \\b obstack_1grow (&obstack_for_string, '\b');
615 \\f obstack_1grow (&obstack_for_string, '\f');
616 \\n obstack_1grow (&obstack_for_string, '\n');
617 \\r obstack_1grow (&obstack_for_string, '\r');
618 \\t obstack_1grow (&obstack_for_string, '\t');
619 \\v obstack_1grow (&obstack_for_string, '\v');
412f8a59
PE
620
621 /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
223ff46e 622 \\("\""|"'"|"?"|"\\") obstack_1grow (&obstack_for_string, yytext[1]);
412f8a59 623
6b0d38ab 624 \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
d8d3f94a 625 int c = convert_ucn_to_byte (yytext);
c2724603 626 if (c <= 0)
bb8e56ff 627 complain (loc, complaint, _("invalid number after \\-escape: %s"),
c2724603 628 yytext+1);
d8d3f94a 629 else
223ff46e 630 obstack_1grow (&obstack_for_string, c);
d8d3f94a 631 }
e9690142 632 \\(.|\n) {
c2724603 633 char const *p = yytext + 1;
e6c849d8 634 /* Quote only if escaping won't make the character visible. */
457bf919 635 if (c_isspace ((unsigned char) *p) && c_isprint ((unsigned char) *p))
e6c849d8 636 p = quote (p);
c2724603
JD
637 else
638 p = quotearg_style_mem (escape_quoting_style, p, 1);
bb8e56ff 639 complain (loc, complaint, _("invalid character after \\-escape: %s"),
6fb8b256 640 p);
e9955c83
AD
641 }
642}
643
4febdd96
PE
644 /*--------------------------------------------.
645 | Scanning user-code characters and strings. |
646 `--------------------------------------------*/
e9955c83 647
4febdd96
PE
648<SC_CHARACTER,SC_STRING>
649{
e9690142 650 {splice}|\\{splice}[^\n\[\]] STRING_GROW;
4febdd96 651}
e9955c83
AD
652
653<SC_CHARACTER>
654{
e9690142 655 "'" STRING_GROW; BEGIN context_state;
c1b2677a
TR
656 \n unexpected_newline (token_start, "'");
657 <<EOF>> unexpected_eof (token_start, "'");
e9955c83
AD
658}
659
e9955c83
AD
660<SC_STRING>
661{
e9690142 662 "\"" STRING_GROW; BEGIN context_state;
c1b2677a
TR
663 \n unexpected_newline (token_start, "\"");
664 <<EOF>> unexpected_eof (token_start, "\"");
e9955c83
AD
665}
666
667
668 /*---------------------------------------------------.
669 | Strings, comments etc. can be found in user code. |
670 `---------------------------------------------------*/
671
ca2a6d15 672<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE,SC_PREDICATE>
e9955c83 673{
3f2d73f1
PE
674 "'" {
675 STRING_GROW;
676 context_state = YY_START;
677 token_start = loc->start;
678 BEGIN SC_CHARACTER;
679 }
680 "\"" {
681 STRING_GROW;
682 context_state = YY_START;
683 token_start = loc->start;
684 BEGIN SC_STRING;
685 }
686 "/"{splice}"*" {
687 STRING_GROW;
688 context_state = YY_START;
689 token_start = loc->start;
690 BEGIN SC_COMMENT;
691 }
692 "/"{splice}"/" {
693 STRING_GROW;
694 context_state = YY_START;
695 BEGIN SC_LINE_COMMENT;
696 }
e9955c83
AD
697}
698
699
624a35e2 700
58d7a1a1 701 /*-----------------------------------------------------------.
ca2a6d15
PH
702 | Scanning some code in braces (actions, predicates). The |
703 | initial "{" is already eaten. |
58d7a1a1 704 `-----------------------------------------------------------*/
e9955c83 705
ca2a6d15 706<SC_BRACED_CODE,SC_PREDICATE>
e9955c83 707{
cb823b6f
AD
708 "{"|"<"{splice}"%" STRING_GROW; nesting++;
709 "%"{splice}">" STRING_GROW; nesting--;
ca2a6d15 710
ae93e4e4
JM
711 /* Tokenize '<<%' correctly (as '<<' '%') rather than incorrrectly
712 (as '<' '<%'). */
ca2a6d15
PH
713 "<"{splice}"<" STRING_GROW;
714
c1b2677a 715 <<EOF>> unexpected_eof (code_start, "}");
ca2a6d15
PH
716}
717
718<SC_BRACED_CODE>
719{
e9955c83 720 "}" {
25522739
PE
721 obstack_1grow (&obstack_for_string, '}');
722
cb823b6f
AD
723 --nesting;
724 if (nesting < 0)
e9955c83 725 {
e9690142
JD
726 STRING_FINISH;
727 loc->start = code_start;
728 val->code = last_string;
729 BEGIN INITIAL;
730 return BRACED_CODE;
e9955c83
AD
731 }
732 }
ca2a6d15 733}
e9955c83 734
ca2a6d15
PH
735<SC_PREDICATE>
736{
737 "}" {
738 --nesting;
739 if (nesting < 0)
740 {
e9690142
JD
741 STRING_FINISH;
742 loc->start = code_start;
743 val->code = last_string;
744 BEGIN INITIAL;
745 return BRACED_PREDICATE;
ca2a6d15
PH
746 }
747 else
748 obstack_1grow (&obstack_for_string, '}');
47aee066 749 }
e9955c83
AD
750}
751
e9955c83
AD
752 /*--------------------------------------------------------------.
753 | Scanning some prologue: from "%{" (already scanned) to "%}". |
754 `--------------------------------------------------------------*/
755
756<SC_PROLOGUE>
757{
758 "%}" {
41141c56 759 STRING_FINISH;
3f2d73f1 760 loc->start = code_start;
223ff46e 761 val->chars = last_string;
a706a1cc 762 BEGIN INITIAL;
e9955c83
AD
763 return PROLOGUE;
764 }
765
c1b2677a 766 <<EOF>> unexpected_eof (code_start, "%}");
e9955c83
AD
767}
768
769
770 /*---------------------------------------------------------------.
771 | Scanning the epilogue (everything after the second "%%", which |
d8d3f94a 772 | has already been eaten). |
e9955c83
AD
773 `---------------------------------------------------------------*/
774
775<SC_EPILOGUE>
776{
e9955c83 777 <<EOF>> {
41141c56 778 STRING_FINISH;
3f2d73f1 779 loc->start = code_start;
223ff46e 780 val->chars = last_string;
a706a1cc 781 BEGIN INITIAL;
e9955c83
AD
782 return EPILOGUE;
783 }
784}
785
786
4febdd96
PE
787 /*-----------------------------------------------------.
788 | By default, grow the string obstack with the input. |
789 `-----------------------------------------------------*/
790
e9690142
JD
791<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PREDICATE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>. |
792 <SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PREDICATE,SC_PROLOGUE,SC_EPILOGUE>\n STRING_GROW;
4febdd96 793
e9955c83
AD
794%%
795
6c30d641
PE
796/* Read bytes from FP into buffer BUF of size SIZE. Return the
797 number of bytes read. Remove '\r' from input, treating \r\n
798 and isolated \r as \n. */
799
800static size_t
801no_cr_read (FILE *fp, char *buf, size_t size)
802{
a737b216
PE
803 size_t bytes_read = fread (buf, 1, size, fp);
804 if (bytes_read)
6c30d641 805 {
a737b216 806 char *w = memchr (buf, '\r', bytes_read);
6c30d641 807 if (w)
e9690142
JD
808 {
809 char const *r = ++w;
810 char const *lim = buf + bytes_read;
811
812 for (;;)
813 {
814 /* Found an '\r'. Treat it like '\n', but ignore any
815 '\n' that immediately follows. */
816 w[-1] = '\n';
817 if (r == lim)
818 {
819 int ch = getc (fp);
820 if (ch != '\n' && ungetc (ch, fp) != ch)
821 break;
822 }
823 else if (*r == '\n')
824 r++;
825
826 /* Copy until the next '\r'. */
827 do
828 {
829 if (r == lim)
830 return w - buf;
831 }
832 while ((*w++ = *r++) != '\r');
833 }
834
835 return w - buf;
836 }
6c30d641
PE
837 }
838
a737b216 839 return bytes_read;
6c30d641
PE
840}
841
842
f25bfb75 843
1452af69
PE
844/*------------------------------------------------------.
845| Scan NUMBER for a base-BASE integer at location LOC. |
846`------------------------------------------------------*/
847
848static unsigned long int
849scan_integer (char const *number, int base, location loc)
850{
4517da37
PE
851 verify (INT_MAX < ULONG_MAX);
852 unsigned long int num = strtoul (number, NULL, base);
853
854 if (INT_MAX < num)
1452af69 855 {
bb8e56ff 856 complain (&loc, complaint, _("integer out of range: %s"),
6fb8b256 857 quote (number));
1452af69
PE
858 num = INT_MAX;
859 }
4517da37 860
1452af69
PE
861 return num;
862}
863
864
d8d3f94a
PE
865/*------------------------------------------------------------------.
866| Convert universal character name UCN to a single-byte character, |
867| and return that character. Return -1 if UCN does not correspond |
e9690142 868| to a single-byte character. |
d8d3f94a
PE
869`------------------------------------------------------------------*/
870
871static int
872convert_ucn_to_byte (char const *ucn)
873{
4517da37
PE
874 verify (UCHAR_MAX <= INT_MAX);
875 unsigned long int code = strtoul (ucn + 2, NULL, 16);
d8d3f94a
PE
876
877 /* FIXME: Currently we assume Unicode-compatible unibyte characters
878 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
879 non-ASCII hosts we support only the portable C character set.
880 These limitations should be removed once we add support for
881 multibyte characters. */
882
883 if (UCHAR_MAX < code)
884 return -1;
885
886#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
887 {
888 /* A non-ASCII host. Use CODE to index into a table of the C
889 basic execution character set, which is guaranteed to exist on
890 all Standard C platforms. This table also includes '$', '@',
8e6ef483 891 and '`', which are not in the basic execution character set but
d8d3f94a
PE
892 which are unibyte characters on all the platforms that we know
893 about. */
894 static signed char const table[] =
895 {
e9690142
JD
896 '\0', -1, -1, -1, -1, -1, -1, '\a',
897 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
898 -1, -1, -1, -1, -1, -1, -1, -1,
899 -1, -1, -1, -1, -1, -1, -1, -1,
900 ' ', '!', '"', '#', '$', '%', '&', '\'',
901 '(', ')', '*', '+', ',', '-', '.', '/',
902 '0', '1', '2', '3', '4', '5', '6', '7',
903 '8', '9', ':', ';', '<', '=', '>', '?',
904 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
905 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
906 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
907 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
908 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
909 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
910 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
911 'x', 'y', 'z', '{', '|', '}', '~'
d8d3f94a
PE
912 };
913
914 code = code < sizeof table ? table[code] : -1;
915 }
916#endif
c4d720cd 917
d8d3f94a
PE
918 return code;
919}
920
921
03dbf629
AD
922/*---------------------------------------------------------------------.
923| Handle '#line INT( "FILE")?\n'. ARGS has already skipped '#line '. |
924`---------------------------------------------------------------------*/
900c5db5
AD
925
926static void
4517da37 927handle_syncline (char *args, location loc)
900c5db5 928{
03dbf629
AD
929 char *file;
930 unsigned long int lineno = strtoul (args, &file, 10);
4517da37
PE
931 if (INT_MAX <= lineno)
932 {
bb8e56ff 933 complain (&loc, Wother, _("line number overflow"));
4517da37
PE
934 lineno = INT_MAX;
935 }
03dbf629 936
064e42b0 937 file = strchr (file, '"');
03dbf629
AD
938 if (file)
939 {
064e42b0 940 *strchr (file + 1, '"') = '\0';
03dbf629
AD
941 current_file = uniqstr_new (file + 1);
942 }
0c8e079f 943 boundary_set (&scanner_cursor, current_file, lineno, 1);
4517da37
PE
944}
945
946
4febdd96
PE
947/*----------------------------------------------------------------.
948| For a token or comment starting at START, report message MSGID, |
c1b2677a
TR
949| which should say that an end marker was found before the |
950| expected TOKEN_END. Then, pretend that TOKEN_END was found. |
4febdd96
PE
951`----------------------------------------------------------------*/
952
953static void
954unexpected_end (boundary start, char const *msgid, char const *token_end)
955{
956 location loc;
957 loc.start = start;
958 loc.end = scanner_cursor;
c1b2677a
TR
959 size_t i = strlen (token_end);
960
961/* Adjust scanner cursor so that any later message does not count
962 the characters about to be inserted. */
963 scanner_cursor.column -= i;
964
965 while (i != 0)
966 unput (token_end[--i]);
967
4a9cd8f2 968 token_end = quote (token_end);
1127a75a 969 /* Instead of '\'', display "'". */
f518dbaf 970 if (STREQ (token_end, "'\\''"))
4a9cd8f2 971 token_end = "\"'\"";
bb8e56ff 972 complain (&loc, complaint, _(msgid), token_end);
4febdd96
PE
973}
974
975
3f2d73f1
PE
976/*------------------------------------------------------------------------.
977| Report an unexpected EOF in a token or comment starting at START. |
978| An end of file was encountered and the expected TOKEN_END was missing. |
c1b2677a 979| After reporting the problem, pretend that TOKEN_END was found. |
3f2d73f1 980`------------------------------------------------------------------------*/
a706a1cc
PE
981
982static void
aa418041 983unexpected_eof (boundary start, char const *token_end)
a706a1cc 984{
4a9cd8f2 985 unexpected_end (start, N_("missing %s at end of file"), token_end);
4febdd96
PE
986}
987
988
989/*----------------------------------------.
990| Likewise, but for unexpected newlines. |
991`----------------------------------------*/
992
993static void
994unexpected_newline (boundary start, char const *token_end)
995{
4a9cd8f2 996 unexpected_end (start, N_("missing %s at end of line"), token_end);
a706a1cc
PE
997}
998
999
f25bfb75
AD
1000/*-------------------------.
1001| Initialize the scanner. |
1002`-------------------------*/
1003
1d6412ad 1004void
e9071366 1005gram_scanner_initialize (void)
1d6412ad 1006{
223ff46e 1007 obstack_init (&obstack_for_string);
1d6412ad
AD
1008}
1009
1010
f25bfb75
AD
1011/*-----------------------------------------------.
1012| Free all the memory allocated to the scanner. |
1013`-----------------------------------------------*/
1014
4cdb01db 1015void
e9071366 1016gram_scanner_free (void)
4cdb01db 1017{
223ff46e 1018 obstack_free (&obstack_for_string, 0);
536545f3 1019 /* Reclaim Flex's buffers. */
580b8926 1020 yylex_destroy ();
4cdb01db 1021}