]> git.saurik.com Git - bison.git/blame - src/scan-gram.l
lalr1.cc: rename lex_symbol as api.token.constructor
[bison.git] / src / scan-gram.l
CommitLineData
e9955c83 1/* Bison Grammar Scanner -*- C -*-
3b1e470c 2
34136e65 3 Copyright (C) 2002-2012 Free Software Foundation, Inc.
e9955c83
AD
4
5 This file is part of Bison, the GNU Compiler Compiler.
6
f16b0819 7 This program is free software: you can redistribute it and/or modify
e9955c83 8 it under the terms of the GNU General Public License as published by
f16b0819 9 the Free Software Foundation, either version 3 of the License, or
e9955c83
AD
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
f16b0819 18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
e9955c83 19
2062d72d 20%option debug nodefault noinput noyywrap never-interactive
e9955c83
AD
21%option prefix="gram_" outfile="lex.yy.c"
22
23%{
4f6e011e
PE
24/* Work around a bug in flex 2.5.31. See Debian bug 333231
25 <http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=333231>. */
26#undef gram_wrap
27#define gram_wrap() 1
28
e9071366 29#define FLEX_PREFIX(Id) gram_ ## Id
0305d25e 30#include <src/flex-scanner.h>
223ff46e 31
0305d25e
AD
32#include <src/complain.h>
33#include <src/files.h>
2062d72d 34#include <src/getargs.h>
0305d25e
AD
35#include <src/gram.h>
36#include <quotearg.h>
37#include <src/reader.h>
38#include <src/uniqstr.h>
e9955c83 39
457bf919 40#include <c-ctype.h>
e9071366
AD
41#include <mbswidth.h>
42#include <quote.h>
43
0305d25e 44#include <src/scan-gram.h>
e9071366
AD
45
46#define YY_DECL GRAM_LEX_DECL
2346344a 47
e9690142
JD
48#define YY_USER_INIT \
49 code_start = scanner_cursor = loc->start; \
dc9701e8 50
3f2d73f1 51/* Location of scanner cursor. */
4a678af8 52static boundary scanner_cursor;
41141c56 53
e9071366 54#define YY_USER_ACTION location_compute (loc, &scanner_cursor, yytext, yyleng);
d8d3f94a 55
6c30d641 56static size_t no_cr_read (FILE *, char *, size_t);
d8d3f94a
PE
57#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
58
a7706735
AD
59#define RETURN_PERCENT_PARAM(Value) \
60 RETURN_VALUE(PERCENT_PARAM, param, param_ ## Value)
61
62#define RETURN_PERCENT_FLAG(Value) \
63 RETURN_VALUE(PERCENT_FLAG, uniqstr, uniqstr_new (Value))
64
65#define RETURN_VALUE(Token, Field, Value) \
ba061fa6 66 do { \
a7706735
AD
67 val->Field = Value; \
68 return Token; \
ba061fa6
AD
69 } while (0)
70
b9f1d9a4
AR
71#define ROLLBACK_CURRENT_TOKEN \
72 do { \
e9690142 73 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0); \
b9f1d9a4
AR
74 yyless (0); \
75 } while (0)
ba061fa6 76
2062d72d
TR
77#define DEPRECATED(Msg) \
78 do { \
79 size_t i; \
80 complain (loc, Wdeprecated, \
81 _("deprecated directive: %s, use %s"), \
82 quote (yytext), quote_n (1, Msg)); \
83 scanner_cursor.column -= mbsnwidth (Msg, strlen (Msg), 0); \
84 for (i = strlen (Msg); i != 0; --i) \
85 unput (Msg[i - 1]); \
86 } while (0)
87
7ec2d4cd 88/* A string representing the most recently saved token. */
7c0c6181 89static char *last_string;
7ec2d4cd 90
872b52bc 91/* Bracketed identifier. */
b9f1d9a4
AR
92static uniqstr bracketed_id_str = 0;
93static location bracketed_id_loc;
94static boundary bracketed_id_start;
95static int bracketed_id_context_state = 0;
96
7ec2d4cd 97void
e9071366 98gram_scanner_last_string_free (void)
7ec2d4cd 99{
41141c56 100 STRING_FREE;
7ec2d4cd 101}
e9955c83 102
4517da37 103static void handle_syncline (char *, location);
1452af69 104static unsigned long int scan_integer (char const *p, int base, location loc);
d8d3f94a 105static int convert_ucn_to_byte (char const *hex_text);
aa418041 106static void unexpected_eof (boundary, char const *);
4febdd96 107static void unexpected_newline (boundary, char const *);
e9955c83
AD
108
109%}
e9071366
AD
110 /* A C-like comment in directives/rules. */
111%x SC_YACC_COMMENT
112 /* Strings and characters in directives/rules. */
e9955c83 113%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
e9071366 114 /* A identifier was just read in directives/rules. Special state
ae93e4e4 115 to capture the sequence 'identifier :'. */
e9071366 116%x SC_AFTER_IDENTIFIER
cb823b6f
AD
117 /* A complex tag, with nested angles brackets. */
118%x SC_TAG
e9071366 119
ca2a6d15 120 /* Four types of user code:
ae93e4e4 121 - prologue (code between '%{' '%}' in the first section, before %%);
e9071366 122 - actions, printers, union, etc, (between braced in the middle section);
da5462d4 123 - epilogue (everything after the second %%).
ae93e4e4 124 - predicate (code between '%?{' and '{' in middle section); */
ca2a6d15 125%x SC_PROLOGUE SC_BRACED_CODE SC_EPILOGUE SC_PREDICATE
e9071366
AD
126 /* C and C++ comments in code. */
127%x SC_COMMENT SC_LINE_COMMENT
128 /* Strings and characters in code. */
129%x SC_STRING SC_CHARACTER
872b52bc 130 /* Bracketed identifiers support. */
b9f1d9a4 131%x SC_BRACKETED_ID SC_RETURN_BRACKETED_ID
e9955c83 132
e9690142
JD
133letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
134id {letter}({letter}|[-0-9])*
4f646c37 135directive %{id}
e9690142 136int [0-9]+
d8d3f94a
PE
137
138/* POSIX says that a tag must be both an id and a C union member, but
139 historically almost any character is allowed in a tag. We disallow
cb823b6f
AD
140 NUL, as this simplifies our implementation. We disallow angle
141 bracket to match them in nested pairs: several languages use them
142 for generics/template types. */
e9690142 143tag [^\0<>]+
d8d3f94a
PE
144
145/* Zero or more instances of backslash-newline. Following GCC, allow
146 white space between the backslash and the newline. */
e9690142 147splice (\\[ \f\t\v]*\n)*
e9955c83 148
2062d72d
TR
149/* An equal sign, with optional leading whitespaces. This is used in some
150 deprecated constructs. */
151eqopt ([[:space:]]*=)?
152
e9955c83
AD
153%%
154%{
cb823b6f
AD
155 /* Nesting level. Either for nested braces, or nested angle brackets
156 (but not mixed). */
84f6a6ca 157 int nesting PACIFY_CC (= 0);
1a9e39f1 158
3f2d73f1 159 /* Parent context state, when applicable. */
84f6a6ca 160 int context_state PACIFY_CC (= 0);
a706a1cc 161
3f2d73f1 162 /* Location of most recent identifier, when applicable. */
84f6a6ca 163 location id_loc PACIFY_CC (= empty_location);
3f2d73f1 164
a2bc9dbc
PE
165 /* Where containing code started, when applicable. Its initial
166 value is relevant only when yylex is invoked in the SC_EPILOGUE
167 start condition. */
168 boundary code_start = scanner_cursor;
3f2d73f1 169
223ff46e
PE
170 /* Where containing comment or string or character literal started,
171 when applicable. */
84f6a6ca 172 boundary token_start PACIFY_CC (= scanner_cursor);
e9955c83
AD
173%}
174
175
3f2d73f1
PE
176 /*-----------------------.
177 | Scanning white space. |
178 `-----------------------*/
179
b9f1d9a4 180<INITIAL,SC_AFTER_IDENTIFIER,SC_BRACKETED_ID,SC_RETURN_BRACKETED_ID>
3f2d73f1 181{
4febdd96 182 /* Comments and white space. */
6fb8b256 183 "," {
bb8e56ff 184 complain (loc, Wother, _("stray ',' treated as white space"));
6fb8b256 185 }
4febdd96 186 [ \f\n\t\v] |
3f2d73f1 187 "//".* ;
83adb046
PE
188 "/*" {
189 token_start = loc->start;
190 context_state = YY_START;
191 BEGIN SC_YACC_COMMENT;
192 }
3f2d73f1
PE
193
194 /* #line directives are not documented, and may be withdrawn or
195 modified in future versions of Bison. */
196 ^"#line "{int}" \"".*"\"\n" {
4517da37 197 handle_syncline (yytext + sizeof "#line " - 1, *loc);
3f2d73f1
PE
198 }
199}
200
201
e9955c83
AD
202 /*----------------------------.
203 | Scanning Bison directives. |
204 `----------------------------*/
a7c09cba
DJ
205
206 /* For directives that are also command line options, the regex must be
e9690142 207 "%..."
a7c09cba
DJ
208 after "[-_]"s are removed, and the directive must match the --long
209 option name, with a single string argument. Otherwise, add exceptions
210 to ../build-aux/cross-options.pl. */
211
e9955c83
AD
212<INITIAL>
213{
deef2a0a 214 "%binary" return PERCENT_NONASSOC;
136a0f76 215 "%code" return PERCENT_CODE;
fa819509 216 "%debug" RETURN_PERCENT_FLAG("parse.trace");
2062d72d 217 "%default-prec" return PERCENT_DEFAULT_PREC;
deef2a0a
AD
218 "%define" return PERCENT_DEFINE;
219 "%defines" return PERCENT_DEFINES;
220 "%destructor" return PERCENT_DESTRUCTOR;
221 "%dprec" return PERCENT_DPREC;
2062d72d 222 "%error-verbose" return PERCENT_ERROR_VERBOSE;
deef2a0a 223 "%expect" return PERCENT_EXPECT;
2062d72d 224 "%expect-rr" return PERCENT_EXPECT_RR;
deef2a0a 225 "%file-prefix" return PERCENT_FILE_PREFIX;
2062d72d 226 "%fixed-output-files" return PERCENT_YACC;
deef2a0a
AD
227 "%initial-action" return PERCENT_INITIAL_ACTION;
228 "%glr-parser" return PERCENT_GLR_PARSER;
229 "%language" return PERCENT_LANGUAGE;
230 "%left" return PERCENT_LEFT;
a7706735 231 "%lex-param" RETURN_PERCENT_PARAM(lex);
bc0f5737 232 "%locations" RETURN_PERCENT_FLAG("locations");
deef2a0a 233 "%merge" return PERCENT_MERGE;
2062d72d
TR
234 "%name-prefix" return PERCENT_NAME_PREFIX;
235 "%no-default-prec" return PERCENT_NO_DEFAULT_PREC;
236 "%no-lines" return PERCENT_NO_LINES;
deef2a0a
AD
237 "%nonassoc" return PERCENT_NONASSOC;
238 "%nondeterministic-parser" return PERCENT_NONDETERMINISTIC_PARSER;
239 "%nterm" return PERCENT_NTERM;
240 "%output" return PERCENT_OUTPUT;
a7706735
AD
241 "%param" RETURN_PERCENT_PARAM(both);
242 "%parse-param" RETURN_PERCENT_PARAM(parse);
deef2a0a 243 "%prec" return PERCENT_PREC;
d78f0ac9 244 "%precedence" return PERCENT_PRECEDENCE;
deef2a0a 245 "%printer" return PERCENT_PRINTER;
2062d72d 246 "%pure-parser" RETURN_PERCENT_FLAG("api.pure");
deef2a0a
AD
247 "%require" return PERCENT_REQUIRE;
248 "%right" return PERCENT_RIGHT;
249 "%skeleton" return PERCENT_SKELETON;
250 "%start" return PERCENT_START;
251 "%term" return PERCENT_TOKEN;
252 "%token" return PERCENT_TOKEN;
2062d72d 253 "%token-table" return PERCENT_TOKEN_TABLE;
deef2a0a
AD
254 "%type" return PERCENT_TYPE;
255 "%union" return PERCENT_UNION;
256 "%verbose" return PERCENT_VERBOSE;
257 "%yacc" return PERCENT_YACC;
e9955c83 258
2062d72d
TR
259 /* deprecated */
260 "%default"[-_]"prec" DEPRECATED("%default-prec");
261 "%error"[-_]"verbose" DEPRECATED("%define parse.error verbose");
262 "%expect"[-_]"rr" DEPRECATED("%expect-rr");
263 "%file-prefix"{eqopt} DEPRECATED("%file-prefix");
264 "%fixed"[-_]"output"[-_]"files" DEPRECATED("%fixed-output-files");
265 "%name"[-_]"prefix"{eqopt} DEPRECATED("%name-prefix");
266 "%no"[-_]"default"[-_]"prec" DEPRECATED("%no-default-prec");
267 "%no"[-_]"lines" DEPRECATED("%no-lines");
268 "%output"{eqopt} DEPRECATED("%output");
269 "%pure"[-_]"parser" DEPRECATED("%pure-parser");
270 "%token"[-_]"table" DEPRECATED("%token-table");
271
3f2d73f1 272 {directive} {
bb8e56ff 273 complain (loc, complaint, _("invalid directive: %s"), quote (yytext));
412f8a59 274 }
900c5db5 275
e9955c83 276 "=" return EQUAL;
e9071366 277 "|" return PIPE;
e9955c83
AD
278 ";" return SEMICOLON;
279
3f2d73f1 280 {id} {
58d7a1a1 281 val->uniqstr = uniqstr_new (yytext);
3f2d73f1 282 id_loc = *loc;
b9f1d9a4 283 bracketed_id_str = NULL;
3f2d73f1 284 BEGIN SC_AFTER_IDENTIFIER;
e9955c83
AD
285 }
286
d8d3f94a 287 {int} {
1452af69
PE
288 val->integer = scan_integer (yytext, 10, *loc);
289 return INT;
290 }
291 0[xX][0-9abcdefABCDEF]+ {
292 val->integer = scan_integer (yytext, 16, *loc);
d8d3f94a
PE
293 return INT;
294 }
e9955c83 295
84a1cb5a
AD
296 /* Identifiers may not start with a digit. Yet, don't silently
297 accept "1FOO" as "1 FOO". */
298 {int}{id} {
bb8e56ff 299 complain (loc, complaint, _("invalid identifier: %s"), quote (yytext));
84a1cb5a
AD
300 }
301
3208e3f4 302 /* Characters. */
e9690142 303 "'" token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
e9955c83
AD
304
305 /* Strings. */
e9690142 306 "\"" token_start = loc->start; BEGIN SC_ESCAPED_STRING;
e9955c83
AD
307
308 /* Prologue. */
3f2d73f1 309 "%{" code_start = loc->start; BEGIN SC_PROLOGUE;
e9955c83
AD
310
311 /* Code in between braces. */
3f2d73f1
PE
312 "{" {
313 STRING_GROW;
cb823b6f 314 nesting = 0;
3f2d73f1
PE
315 code_start = loc->start;
316 BEGIN SC_BRACED_CODE;
317 }
e9955c83 318
ca2a6d15
PH
319 /* Semantic predicate. */
320 "%?"[ \f\n\t\v]*"{" {
321 nesting = 0;
322 code_start = loc->start;
323 BEGIN SC_PREDICATE;
324 }
325
e9955c83 326 /* A type. */
cb823b6f
AD
327 "<*>" return TAG_ANY;
328 "<>" return TAG_NONE;
d8d3f94a 329 "<"{tag}">" {
223ff46e 330 obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
41141c56 331 STRING_FINISH;
223ff46e 332 val->uniqstr = uniqstr_new (last_string);
41141c56 333 STRING_FREE;
cb823b6f
AD
334 return TAG;
335 }
336 "<" {
337 nesting = 0;
338 token_start = loc->start;
339 BEGIN SC_TAG;
4cdb01db
AD
340 }
341
a706a1cc
PE
342 "%%" {
343 static int percent_percent_count;
e9955c83 344 if (++percent_percent_count == 2)
a2bc9dbc 345 BEGIN SC_EPILOGUE;
e9955c83
AD
346 return PERCENT_PERCENT;
347 }
348
b9f1d9a4
AR
349 "[" {
350 bracketed_id_str = NULL;
351 bracketed_id_start = loc->start;
352 bracketed_id_context_state = YY_START;
353 BEGIN SC_BRACKETED_ID;
354 }
355
a706a1cc 356 . {
bb8e56ff 357 complain (loc, complaint, _("invalid character: %s"), quote (yytext));
3f2d73f1 358 }
379f0ac8
PE
359
360 <<EOF>> {
361 loc->start = loc->end = scanner_cursor;
362 yyterminate ();
363 }
3f2d73f1
PE
364}
365
366
cb823b6f
AD
367 /*--------------------------------------------------------------.
368 | Supporting \0 complexifies our implementation for no expected |
369 | added value. |
370 `--------------------------------------------------------------*/
371
372<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING,SC_TAG>
373{
bb8e56ff 374 \0 complain (loc, complaint, _("invalid null character"));
cb823b6f
AD
375}
376
377
3f2d73f1
PE
378 /*-----------------------------------------------------------------.
379 | Scanning after an identifier, checking whether a colon is next. |
380 `-----------------------------------------------------------------*/
381
382<SC_AFTER_IDENTIFIER>
383{
b9f1d9a4 384 "[" {
872b52bc 385 if (bracketed_id_str)
b9f1d9a4 386 {
e9690142
JD
387 ROLLBACK_CURRENT_TOKEN;
388 BEGIN SC_RETURN_BRACKETED_ID;
389 *loc = id_loc;
390 return ID;
b9f1d9a4 391 }
872b52bc
AR
392 else
393 {
e9690142
JD
394 bracketed_id_start = loc->start;
395 bracketed_id_context_state = YY_START;
396 BEGIN SC_BRACKETED_ID;
872b52bc 397 }
b9f1d9a4 398 }
3f2d73f1 399 ":" {
b9f1d9a4 400 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
3f2d73f1 401 *loc = id_loc;
3f2d73f1
PE
402 return ID_COLON;
403 }
404 . {
b9f1d9a4
AR
405 ROLLBACK_CURRENT_TOKEN;
406 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
3f2d73f1 407 *loc = id_loc;
3f2d73f1
PE
408 return ID;
409 }
410 <<EOF>> {
b9f1d9a4 411 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
3f2d73f1 412 *loc = id_loc;
3f2d73f1 413 return ID;
e9955c83
AD
414 }
415}
416
b9f1d9a4
AR
417 /*--------------------------------.
418 | Scanning bracketed identifiers. |
419 `--------------------------------*/
420
421<SC_BRACKETED_ID>
422{
423 {id} {
872b52bc 424 if (bracketed_id_str)
b9f1d9a4 425 {
bb8e56ff
TR
426 complain (loc, complaint,
427 _("unexpected identifier in bracketed name: %s"),
428 quote (yytext));
b9f1d9a4
AR
429 }
430 else
431 {
e9690142
JD
432 bracketed_id_str = uniqstr_new (yytext);
433 bracketed_id_loc = *loc;
b9f1d9a4
AR
434 }
435 }
436 "]" {
437 BEGIN bracketed_id_context_state;
438 if (bracketed_id_str)
439 {
e9690142
JD
440 if (INITIAL == bracketed_id_context_state)
441 {
442 val->uniqstr = bracketed_id_str;
443 bracketed_id_str = 0;
444 *loc = bracketed_id_loc;
445 return BRACKETED_ID;
446 }
b9f1d9a4
AR
447 }
448 else
bb8e56ff 449 complain (loc, complaint, _("an identifier expected"));
b9f1d9a4
AR
450 }
451 . {
bb8e56ff 452 complain (loc, complaint, _("invalid character in bracketed name: %s"),
e9690142 453 quote (yytext));
b9f1d9a4
AR
454 }
455 <<EOF>> {
456 BEGIN bracketed_id_context_state;
457 unexpected_eof (bracketed_id_start, "]");
458 }
459}
460
461<SC_RETURN_BRACKETED_ID>
462{
463 . {
464 ROLLBACK_CURRENT_TOKEN;
465 val->uniqstr = bracketed_id_str;
466 bracketed_id_str = 0;
467 *loc = bracketed_id_loc;
468 BEGIN INITIAL;
469 return BRACKETED_ID;
470 }
471}
472
e9955c83 473
d8d3f94a 474 /*---------------------------------------------------------------.
ae93e4e4 475 | Scanning a Yacc comment. The initial '/ *' is already eaten. |
d8d3f94a 476 `---------------------------------------------------------------*/
e9955c83 477
d8d3f94a 478<SC_YACC_COMMENT>
e9955c83 479{
3f2d73f1 480 "*/" BEGIN context_state;
e9690142 481 .|\n ;
aa418041 482 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
d8d3f94a
PE
483}
484
485
486 /*------------------------------------------------------------.
ae93e4e4 487 | Scanning a C comment. The initial '/ *' is already eaten. |
d8d3f94a
PE
488 `------------------------------------------------------------*/
489
490<SC_COMMENT>
491{
3f2d73f1 492 "*"{splice}"/" STRING_GROW; BEGIN context_state;
e9690142 493 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
e9955c83
AD
494}
495
496
d8d3f94a 497 /*--------------------------------------------------------------.
ae93e4e4 498 | Scanning a line comment. The initial '//' is already eaten. |
d8d3f94a
PE
499 `--------------------------------------------------------------*/
500
501<SC_LINE_COMMENT>
502{
e9690142
JD
503 "\n" STRING_GROW; BEGIN context_state;
504 {splice} STRING_GROW;
505 <<EOF>> BEGIN context_state;
d8d3f94a
PE
506}
507
508
4febdd96
PE
509 /*------------------------------------------------.
510 | Scanning a Bison string, including its escapes. |
511 | The initial quote is already eaten. |
512 `------------------------------------------------*/
e9955c83
AD
513
514<SC_ESCAPED_STRING>
515{
47aee066
JD
516 "\""|"\n" {
517 if (yytext[0] == '\n')
518 unexpected_newline (token_start, "\"");
519 STRING_FINISH;
520 loc->start = token_start;
521 val->chars = last_string;
522 BEGIN INITIAL;
523 return STRING;
524 }
525 <<EOF>> {
526 unexpected_eof (token_start, "\"");
41141c56 527 STRING_FINISH;
3f2d73f1 528 loc->start = token_start;
223ff46e 529 val->chars = last_string;
a706a1cc 530 BEGIN INITIAL;
e9955c83
AD
531 return STRING;
532 }
e9955c83
AD
533}
534
4febdd96
PE
535 /*----------------------------------------------------------.
536 | Scanning a Bison character literal, decoding its escapes. |
e9690142 537 | The initial quote is already eaten. |
4febdd96 538 `----------------------------------------------------------*/
e9955c83
AD
539
540<SC_ESCAPED_CHARACTER>
541{
47aee066 542 "'"|"\n" {
41141c56 543 STRING_FINISH;
3f2d73f1 544 loc->start = token_start;
dfaa4860 545 val->character = last_string[0];
3208e3f4
JD
546 {
547 /* FIXME: Eventually, make these errors. */
dfaa4860
JD
548 if (last_string[0] == '\0')
549 {
bb8e56ff 550 complain (loc, Wother, _("empty character literal"));
dfaa4860
JD
551 /* '\0' seems dangerous even if we are about to complain. */
552 val->character = '\'';
553 }
554 else if (last_string[1] != '\0')
bb8e56ff 555 complain (loc, Wother,
6fb8b256 556 _("extra characters in character literal"));
3208e3f4
JD
557 }
558 if (yytext[0] == '\n')
559 unexpected_newline (token_start, "'");
41141c56 560 STRING_FREE;
a706a1cc 561 BEGIN INITIAL;
58d7a1a1 562 return CHAR;
e9955c83 563 }
47aee066 564 <<EOF>> {
47aee066
JD
565 STRING_FINISH;
566 loc->start = token_start;
dfaa4860 567 val->character = last_string[0];
3208e3f4 568 {
3208e3f4 569 /* FIXME: Eventually, make these errors. */
dfaa4860
JD
570 if (last_string[0] == '\0')
571 {
bb8e56ff 572 complain (loc, Wother, _("empty character literal"));
dfaa4860
JD
573 /* '\0' seems dangerous even if we are about to complain. */
574 val->character = '\'';
575 }
576 else if (last_string[1] != '\0')
bb8e56ff 577 complain (loc, Wother,
6fb8b256 578 _("extra characters in character literal"));
3208e3f4
JD
579 }
580 unexpected_eof (token_start, "'");
47aee066
JD
581 STRING_FREE;
582 BEGIN INITIAL;
583 return CHAR;
584 }
4febdd96 585}
a706a1cc 586
cb823b6f
AD
587 /*-----------------------------------------------------------.
588 | Scanning a Bison nested tag. The initial angle bracket is |
589 | already eaten. |
590 `-----------------------------------------------------------*/
591
592<SC_TAG>
4febdd96 593{
cb823b6f
AD
594 ">" {
595 --nesting;
596 if (nesting < 0)
597 {
598 STRING_FINISH;
599 loc->start = token_start;
600 val->uniqstr = uniqstr_new (last_string);
601 STRING_FREE;
602 BEGIN INITIAL;
603 return TAG;
604 }
605 STRING_GROW;
606 }
607
608 [^<>]+ STRING_GROW;
609 "<"+ STRING_GROW; nesting += yyleng;
e9955c83 610
cb823b6f
AD
611 <<EOF>> {
612 unexpected_eof (token_start, ">");
613 STRING_FINISH;
614 loc->start = token_start;
615 val->uniqstr = uniqstr_new (last_string);
616 STRING_FREE;
617 BEGIN INITIAL;
618 return TAG;
619 }
620}
e9955c83
AD
621
622 /*----------------------------.
623 | Decode escaped characters. |
624 `----------------------------*/
625
626<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
627{
d8d3f94a 628 \\[0-7]{1,3} {
4517da37 629 unsigned long int c = strtoul (yytext + 1, NULL, 8);
c2724603 630 if (!c || UCHAR_MAX < c)
bb8e56ff 631 complain (loc, complaint, _("invalid number after \\-escape: %s"),
c2724603 632 yytext+1);
e9955c83 633 else
223ff46e 634 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
635 }
636
6b0d38ab 637 \\x[0-9abcdefABCDEF]+ {
4517da37
PE
638 verify (UCHAR_MAX < ULONG_MAX);
639 unsigned long int c = strtoul (yytext + 2, NULL, 16);
c2724603 640 if (!c || UCHAR_MAX < c)
bb8e56ff 641 complain (loc, complaint, _("invalid number after \\-escape: %s"),
c2724603 642 yytext+1);
d8d3f94a 643 else
223ff46e 644 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
645 }
646
e9690142
JD
647 \\a obstack_1grow (&obstack_for_string, '\a');
648 \\b obstack_1grow (&obstack_for_string, '\b');
649 \\f obstack_1grow (&obstack_for_string, '\f');
650 \\n obstack_1grow (&obstack_for_string, '\n');
651 \\r obstack_1grow (&obstack_for_string, '\r');
652 \\t obstack_1grow (&obstack_for_string, '\t');
653 \\v obstack_1grow (&obstack_for_string, '\v');
412f8a59
PE
654
655 /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
223ff46e 656 \\("\""|"'"|"?"|"\\") obstack_1grow (&obstack_for_string, yytext[1]);
412f8a59 657
6b0d38ab 658 \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
d8d3f94a 659 int c = convert_ucn_to_byte (yytext);
c2724603 660 if (c <= 0)
bb8e56ff 661 complain (loc, complaint, _("invalid number after \\-escape: %s"),
c2724603 662 yytext+1);
d8d3f94a 663 else
223ff46e 664 obstack_1grow (&obstack_for_string, c);
d8d3f94a 665 }
e9690142 666 \\(.|\n) {
c2724603 667 char const *p = yytext + 1;
e6c849d8 668 /* Quote only if escaping won't make the character visible. */
457bf919 669 if (c_isspace ((unsigned char) *p) && c_isprint ((unsigned char) *p))
e6c849d8 670 p = quote (p);
c2724603
JD
671 else
672 p = quotearg_style_mem (escape_quoting_style, p, 1);
bb8e56ff 673 complain (loc, complaint, _("invalid character after \\-escape: %s"),
6fb8b256 674 p);
e9955c83
AD
675 }
676}
677
4febdd96
PE
678 /*--------------------------------------------.
679 | Scanning user-code characters and strings. |
680 `--------------------------------------------*/
e9955c83 681
4febdd96
PE
682<SC_CHARACTER,SC_STRING>
683{
e9690142 684 {splice}|\\{splice}[^\n\[\]] STRING_GROW;
4febdd96 685}
e9955c83
AD
686
687<SC_CHARACTER>
688{
e9690142
JD
689 "'" STRING_GROW; BEGIN context_state;
690 \n unexpected_newline (token_start, "'"); BEGIN context_state;
691 <<EOF>> unexpected_eof (token_start, "'"); BEGIN context_state;
e9955c83
AD
692}
693
e9955c83
AD
694<SC_STRING>
695{
e9690142
JD
696 "\"" STRING_GROW; BEGIN context_state;
697 \n unexpected_newline (token_start, "\""); BEGIN context_state;
698 <<EOF>> unexpected_eof (token_start, "\""); BEGIN context_state;
e9955c83
AD
699}
700
701
702 /*---------------------------------------------------.
703 | Strings, comments etc. can be found in user code. |
704 `---------------------------------------------------*/
705
ca2a6d15 706<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE,SC_PREDICATE>
e9955c83 707{
3f2d73f1
PE
708 "'" {
709 STRING_GROW;
710 context_state = YY_START;
711 token_start = loc->start;
712 BEGIN SC_CHARACTER;
713 }
714 "\"" {
715 STRING_GROW;
716 context_state = YY_START;
717 token_start = loc->start;
718 BEGIN SC_STRING;
719 }
720 "/"{splice}"*" {
721 STRING_GROW;
722 context_state = YY_START;
723 token_start = loc->start;
724 BEGIN SC_COMMENT;
725 }
726 "/"{splice}"/" {
727 STRING_GROW;
728 context_state = YY_START;
729 BEGIN SC_LINE_COMMENT;
730 }
e9955c83
AD
731}
732
733
624a35e2 734
58d7a1a1 735 /*-----------------------------------------------------------.
ca2a6d15
PH
736 | Scanning some code in braces (actions, predicates). The |
737 | initial "{" is already eaten. |
58d7a1a1 738 `-----------------------------------------------------------*/
e9955c83 739
ca2a6d15 740<SC_BRACED_CODE,SC_PREDICATE>
e9955c83 741{
cb823b6f
AD
742 "{"|"<"{splice}"%" STRING_GROW; nesting++;
743 "%"{splice}">" STRING_GROW; nesting--;
ca2a6d15 744
ae93e4e4
JM
745 /* Tokenize '<<%' correctly (as '<<' '%') rather than incorrrectly
746 (as '<' '<%'). */
ca2a6d15
PH
747 "<"{splice}"<" STRING_GROW;
748
749 <<EOF>> {
750 int token = (YY_START == SC_BRACED_CODE) ? BRACED_CODE : BRACED_PREDICATE;
751 unexpected_eof (code_start, "}");
752 STRING_FINISH;
753 loc->start = code_start;
754 val->code = last_string;
755 BEGIN INITIAL;
756 return token;
757 }
758}
759
760<SC_BRACED_CODE>
761{
e9955c83 762 "}" {
25522739
PE
763 obstack_1grow (&obstack_for_string, '}');
764
cb823b6f
AD
765 --nesting;
766 if (nesting < 0)
e9955c83 767 {
e9690142
JD
768 STRING_FINISH;
769 loc->start = code_start;
770 val->code = last_string;
771 BEGIN INITIAL;
772 return BRACED_CODE;
e9955c83
AD
773 }
774 }
ca2a6d15 775}
e9955c83 776
ca2a6d15
PH
777<SC_PREDICATE>
778{
779 "}" {
780 --nesting;
781 if (nesting < 0)
782 {
e9690142
JD
783 STRING_FINISH;
784 loc->start = code_start;
785 val->code = last_string;
786 BEGIN INITIAL;
787 return BRACED_PREDICATE;
ca2a6d15
PH
788 }
789 else
790 obstack_1grow (&obstack_for_string, '}');
47aee066 791 }
e9955c83
AD
792}
793
e9955c83
AD
794 /*--------------------------------------------------------------.
795 | Scanning some prologue: from "%{" (already scanned) to "%}". |
796 `--------------------------------------------------------------*/
797
798<SC_PROLOGUE>
799{
800 "%}" {
41141c56 801 STRING_FINISH;
3f2d73f1 802 loc->start = code_start;
223ff46e 803 val->chars = last_string;
a706a1cc 804 BEGIN INITIAL;
e9955c83
AD
805 return PROLOGUE;
806 }
807
47aee066
JD
808 <<EOF>> {
809 unexpected_eof (code_start, "%}");
810 STRING_FINISH;
811 loc->start = code_start;
812 val->chars = last_string;
813 BEGIN INITIAL;
814 return PROLOGUE;
815 }
e9955c83
AD
816}
817
818
819 /*---------------------------------------------------------------.
820 | Scanning the epilogue (everything after the second "%%", which |
d8d3f94a 821 | has already been eaten). |
e9955c83
AD
822 `---------------------------------------------------------------*/
823
824<SC_EPILOGUE>
825{
e9955c83 826 <<EOF>> {
41141c56 827 STRING_FINISH;
3f2d73f1 828 loc->start = code_start;
223ff46e 829 val->chars = last_string;
a706a1cc 830 BEGIN INITIAL;
e9955c83
AD
831 return EPILOGUE;
832 }
833}
834
835
4febdd96
PE
836 /*-----------------------------------------------------.
837 | By default, grow the string obstack with the input. |
838 `-----------------------------------------------------*/
839
e9690142
JD
840<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PREDICATE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>. |
841 <SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PREDICATE,SC_PROLOGUE,SC_EPILOGUE>\n STRING_GROW;
4febdd96 842
e9955c83
AD
843%%
844
6c30d641
PE
845/* Read bytes from FP into buffer BUF of size SIZE. Return the
846 number of bytes read. Remove '\r' from input, treating \r\n
847 and isolated \r as \n. */
848
849static size_t
850no_cr_read (FILE *fp, char *buf, size_t size)
851{
a737b216
PE
852 size_t bytes_read = fread (buf, 1, size, fp);
853 if (bytes_read)
6c30d641 854 {
a737b216 855 char *w = memchr (buf, '\r', bytes_read);
6c30d641 856 if (w)
e9690142
JD
857 {
858 char const *r = ++w;
859 char const *lim = buf + bytes_read;
860
861 for (;;)
862 {
863 /* Found an '\r'. Treat it like '\n', but ignore any
864 '\n' that immediately follows. */
865 w[-1] = '\n';
866 if (r == lim)
867 {
868 int ch = getc (fp);
869 if (ch != '\n' && ungetc (ch, fp) != ch)
870 break;
871 }
872 else if (*r == '\n')
873 r++;
874
875 /* Copy until the next '\r'. */
876 do
877 {
878 if (r == lim)
879 return w - buf;
880 }
881 while ((*w++ = *r++) != '\r');
882 }
883
884 return w - buf;
885 }
6c30d641
PE
886 }
887
a737b216 888 return bytes_read;
6c30d641
PE
889}
890
891
f25bfb75 892
1452af69
PE
893/*------------------------------------------------------.
894| Scan NUMBER for a base-BASE integer at location LOC. |
895`------------------------------------------------------*/
896
897static unsigned long int
898scan_integer (char const *number, int base, location loc)
899{
4517da37
PE
900 verify (INT_MAX < ULONG_MAX);
901 unsigned long int num = strtoul (number, NULL, base);
902
903 if (INT_MAX < num)
1452af69 904 {
bb8e56ff 905 complain (&loc, complaint, _("integer out of range: %s"),
6fb8b256 906 quote (number));
1452af69
PE
907 num = INT_MAX;
908 }
4517da37 909
1452af69
PE
910 return num;
911}
912
913
d8d3f94a
PE
914/*------------------------------------------------------------------.
915| Convert universal character name UCN to a single-byte character, |
916| and return that character. Return -1 if UCN does not correspond |
e9690142 917| to a single-byte character. |
d8d3f94a
PE
918`------------------------------------------------------------------*/
919
920static int
921convert_ucn_to_byte (char const *ucn)
922{
4517da37
PE
923 verify (UCHAR_MAX <= INT_MAX);
924 unsigned long int code = strtoul (ucn + 2, NULL, 16);
d8d3f94a
PE
925
926 /* FIXME: Currently we assume Unicode-compatible unibyte characters
927 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
928 non-ASCII hosts we support only the portable C character set.
929 These limitations should be removed once we add support for
930 multibyte characters. */
931
932 if (UCHAR_MAX < code)
933 return -1;
934
935#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
936 {
937 /* A non-ASCII host. Use CODE to index into a table of the C
938 basic execution character set, which is guaranteed to exist on
939 all Standard C platforms. This table also includes '$', '@',
8e6ef483 940 and '`', which are not in the basic execution character set but
d8d3f94a
PE
941 which are unibyte characters on all the platforms that we know
942 about. */
943 static signed char const table[] =
944 {
e9690142
JD
945 '\0', -1, -1, -1, -1, -1, -1, '\a',
946 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
947 -1, -1, -1, -1, -1, -1, -1, -1,
948 -1, -1, -1, -1, -1, -1, -1, -1,
949 ' ', '!', '"', '#', '$', '%', '&', '\'',
950 '(', ')', '*', '+', ',', '-', '.', '/',
951 '0', '1', '2', '3', '4', '5', '6', '7',
952 '8', '9', ':', ';', '<', '=', '>', '?',
953 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
954 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
955 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
956 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
957 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
958 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
959 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
960 'x', 'y', 'z', '{', '|', '}', '~'
d8d3f94a
PE
961 };
962
963 code = code < sizeof table ? table[code] : -1;
964 }
965#endif
c4d720cd 966
d8d3f94a
PE
967 return code;
968}
969
970
900c5db5 971/*----------------------------------------------------------------.
ae93e4e4 972| Handle '#line INT "FILE"'. ARGS has already skipped '#line '. |
900c5db5
AD
973`----------------------------------------------------------------*/
974
975static void
4517da37 976handle_syncline (char *args, location loc)
900c5db5 977{
4517da37
PE
978 char *after_num;
979 unsigned long int lineno = strtoul (args, &after_num, 10);
84526bf3
AD
980 char *file = strchr (after_num, '"') + 1;
981 *strchr (file, '"') = '\0';
4517da37
PE
982 if (INT_MAX <= lineno)
983 {
bb8e56ff 984 complain (&loc, Wother, _("line number overflow"));
4517da37
PE
985 lineno = INT_MAX;
986 }
e9071366 987 current_file = uniqstr_new (file);
0c8e079f 988 boundary_set (&scanner_cursor, current_file, lineno, 1);
4517da37
PE
989}
990
991
4febdd96
PE
992/*----------------------------------------------------------------.
993| For a token or comment starting at START, report message MSGID, |
e9690142
JD
994| which should say that an end marker was found before |
995| the expected TOKEN_END. |
4febdd96
PE
996`----------------------------------------------------------------*/
997
998static void
999unexpected_end (boundary start, char const *msgid, char const *token_end)
1000{
1001 location loc;
1002 loc.start = start;
1003 loc.end = scanner_cursor;
4a9cd8f2
AD
1004 token_end = quote (token_end);
1005 // Instead of '\'', display "'".
f518dbaf 1006 if (STREQ (token_end, "'\\''"))
4a9cd8f2 1007 token_end = "\"'\"";
bb8e56ff 1008 complain (&loc, complaint, _(msgid), token_end);
4febdd96
PE
1009}
1010
1011
3f2d73f1
PE
1012/*------------------------------------------------------------------------.
1013| Report an unexpected EOF in a token or comment starting at START. |
1014| An end of file was encountered and the expected TOKEN_END was missing. |
3f2d73f1 1015`------------------------------------------------------------------------*/
a706a1cc
PE
1016
1017static void
aa418041 1018unexpected_eof (boundary start, char const *token_end)
a706a1cc 1019{
4a9cd8f2 1020 unexpected_end (start, N_("missing %s at end of file"), token_end);
4febdd96
PE
1021}
1022
1023
1024/*----------------------------------------.
1025| Likewise, but for unexpected newlines. |
1026`----------------------------------------*/
1027
1028static void
1029unexpected_newline (boundary start, char const *token_end)
1030{
4a9cd8f2 1031 unexpected_end (start, N_("missing %s at end of line"), token_end);
a706a1cc
PE
1032}
1033
1034
f25bfb75
AD
1035/*-------------------------.
1036| Initialize the scanner. |
1037`-------------------------*/
1038
1d6412ad 1039void
e9071366 1040gram_scanner_initialize (void)
1d6412ad 1041{
223ff46e 1042 obstack_init (&obstack_for_string);
1d6412ad
AD
1043}
1044
1045
f25bfb75
AD
1046/*-----------------------------------------------.
1047| Free all the memory allocated to the scanner. |
1048`-----------------------------------------------*/
1049
4cdb01db 1050void
e9071366 1051gram_scanner_free (void)
4cdb01db 1052{
223ff46e 1053 obstack_free (&obstack_for_string, 0);
536545f3 1054 /* Reclaim Flex's buffers. */
580b8926 1055 yylex_destroy ();
4cdb01db 1056}