]> git.saurik.com Git - bison.git/blame - src/scan-gram.l
regen
[bison.git] / src / scan-gram.l
CommitLineData
e9955c83 1/* Bison Grammar Scanner -*- C -*-
3b1e470c 2
7d6bad19 3 Copyright (C) 2002-2013 Free Software Foundation, Inc.
e9955c83
AD
4
5 This file is part of Bison, the GNU Compiler Compiler.
6
f16b0819 7 This program is free software: you can redistribute it and/or modify
e9955c83 8 it under the terms of the GNU General Public License as published by
f16b0819 9 the Free Software Foundation, either version 3 of the License, or
e9955c83
AD
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
f16b0819 18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
e9955c83 19
2062d72d 20%option debug nodefault noinput noyywrap never-interactive
e9955c83
AD
21%option prefix="gram_" outfile="lex.yy.c"
22
23%{
4f6e011e
PE
24/* Work around a bug in flex 2.5.31. See Debian bug 333231
25 <http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=333231>. */
26#undef gram_wrap
27#define gram_wrap() 1
28
e9071366 29#define FLEX_PREFIX(Id) gram_ ## Id
0305d25e 30#include <src/flex-scanner.h>
223ff46e 31
0305d25e
AD
32#include <src/complain.h>
33#include <src/files.h>
2062d72d 34#include <src/getargs.h>
0305d25e
AD
35#include <src/gram.h>
36#include <quotearg.h>
37#include <src/reader.h>
38#include <src/uniqstr.h>
e9955c83 39
457bf919 40#include <c-ctype.h>
e9071366
AD
41#include <mbswidth.h>
42#include <quote.h>
43
0305d25e 44#include <src/scan-gram.h>
e9071366
AD
45
46#define YY_DECL GRAM_LEX_DECL
2346344a 47
e9690142
JD
48#define YY_USER_INIT \
49 code_start = scanner_cursor = loc->start; \
dc9701e8 50
3f2d73f1 51/* Location of scanner cursor. */
4a678af8 52static boundary scanner_cursor;
41141c56 53
e9071366 54#define YY_USER_ACTION location_compute (loc, &scanner_cursor, yytext, yyleng);
d8d3f94a 55
6c30d641 56static size_t no_cr_read (FILE *, char *, size_t);
d8d3f94a
PE
57#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
58
a7706735
AD
59#define RETURN_PERCENT_PARAM(Value) \
60 RETURN_VALUE(PERCENT_PARAM, param, param_ ## Value)
61
62#define RETURN_PERCENT_FLAG(Value) \
63 RETURN_VALUE(PERCENT_FLAG, uniqstr, uniqstr_new (Value))
64
65#define RETURN_VALUE(Token, Field, Value) \
ba061fa6 66 do { \
a7706735
AD
67 val->Field = Value; \
68 return Token; \
ba061fa6
AD
69 } while (0)
70
b9f1d9a4
AR
71#define ROLLBACK_CURRENT_TOKEN \
72 do { \
e9690142 73 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0); \
b9f1d9a4
AR
74 yyless (0); \
75 } while (0)
ba061fa6 76
2062d72d
TR
77#define DEPRECATED(Msg) \
78 do { \
79 size_t i; \
1dc927a7 80 deprecated_directive (loc, yytext, Msg); \
2062d72d
TR
81 scanner_cursor.column -= mbsnwidth (Msg, strlen (Msg), 0); \
82 for (i = strlen (Msg); i != 0; --i) \
83 unput (Msg[i - 1]); \
84 } while (0)
85
7ec2d4cd 86/* A string representing the most recently saved token. */
7c0c6181 87static char *last_string;
7ec2d4cd 88
872b52bc 89/* Bracketed identifier. */
b9f1d9a4
AR
90static uniqstr bracketed_id_str = 0;
91static location bracketed_id_loc;
92static boundary bracketed_id_start;
93static int bracketed_id_context_state = 0;
94
7ec2d4cd 95void
e9071366 96gram_scanner_last_string_free (void)
7ec2d4cd 97{
41141c56 98 STRING_FREE;
7ec2d4cd 99}
e9955c83 100
4517da37 101static void handle_syncline (char *, location);
1452af69 102static unsigned long int scan_integer (char const *p, int base, location loc);
d8d3f94a 103static int convert_ucn_to_byte (char const *hex_text);
aa418041 104static void unexpected_eof (boundary, char const *);
4febdd96 105static void unexpected_newline (boundary, char const *);
e9955c83
AD
106
107%}
e9071366
AD
108 /* A C-like comment in directives/rules. */
109%x SC_YACC_COMMENT
110 /* Strings and characters in directives/rules. */
e9955c83 111%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
e9071366 112 /* A identifier was just read in directives/rules. Special state
ae93e4e4 113 to capture the sequence 'identifier :'. */
e9071366 114%x SC_AFTER_IDENTIFIER
cb823b6f
AD
115 /* A complex tag, with nested angles brackets. */
116%x SC_TAG
e9071366 117
ca2a6d15 118 /* Four types of user code:
ae93e4e4 119 - prologue (code between '%{' '%}' in the first section, before %%);
e9071366 120 - actions, printers, union, etc, (between braced in the middle section);
da5462d4 121 - epilogue (everything after the second %%).
ae93e4e4 122 - predicate (code between '%?{' and '{' in middle section); */
ca2a6d15 123%x SC_PROLOGUE SC_BRACED_CODE SC_EPILOGUE SC_PREDICATE
e9071366
AD
124 /* C and C++ comments in code. */
125%x SC_COMMENT SC_LINE_COMMENT
126 /* Strings and characters in code. */
127%x SC_STRING SC_CHARACTER
872b52bc 128 /* Bracketed identifiers support. */
b9f1d9a4 129%x SC_BRACKETED_ID SC_RETURN_BRACKETED_ID
e9955c83 130
e9690142 131letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
68ac70bc 132notletter [^.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]{-}[%\{]
e9690142 133id {letter}({letter}|[-0-9])*
e9690142 134int [0-9]+
d8d3f94a
PE
135
136/* POSIX says that a tag must be both an id and a C union member, but
137 historically almost any character is allowed in a tag. We disallow
cb823b6f
AD
138 NUL, as this simplifies our implementation. We disallow angle
139 bracket to match them in nested pairs: several languages use them
140 for generics/template types. */
e9690142 141tag [^\0<>]+
d8d3f94a
PE
142
143/* Zero or more instances of backslash-newline. Following GCC, allow
144 white space between the backslash and the newline. */
e9690142 145splice (\\[ \f\t\v]*\n)*
e9955c83 146
2062d72d
TR
147/* An equal sign, with optional leading whitespaces. This is used in some
148 deprecated constructs. */
149eqopt ([[:space:]]*=)?
150
e9955c83
AD
151%%
152%{
cb823b6f
AD
153 /* Nesting level. Either for nested braces, or nested angle brackets
154 (but not mixed). */
84f6a6ca 155 int nesting PACIFY_CC (= 0);
1a9e39f1 156
3f2d73f1 157 /* Parent context state, when applicable. */
84f6a6ca 158 int context_state PACIFY_CC (= 0);
a706a1cc 159
3f2d73f1 160 /* Location of most recent identifier, when applicable. */
84f6a6ca 161 location id_loc PACIFY_CC (= empty_location);
3f2d73f1 162
a2bc9dbc
PE
163 /* Where containing code started, when applicable. Its initial
164 value is relevant only when yylex is invoked in the SC_EPILOGUE
165 start condition. */
166 boundary code_start = scanner_cursor;
3f2d73f1 167
223ff46e
PE
168 /* Where containing comment or string or character literal started,
169 when applicable. */
84f6a6ca 170 boundary token_start PACIFY_CC (= scanner_cursor);
e9955c83
AD
171%}
172
173
3f2d73f1
PE
174 /*-----------------------.
175 | Scanning white space. |
176 `-----------------------*/
177
b9f1d9a4 178<INITIAL,SC_AFTER_IDENTIFIER,SC_BRACKETED_ID,SC_RETURN_BRACKETED_ID>
3f2d73f1 179{
4febdd96 180 /* Comments and white space. */
6fb8b256 181 "," {
bb8e56ff 182 complain (loc, Wother, _("stray ',' treated as white space"));
6fb8b256 183 }
4febdd96 184 [ \f\n\t\v] |
96029914 185 "//".* continue;
83adb046
PE
186 "/*" {
187 token_start = loc->start;
188 context_state = YY_START;
189 BEGIN SC_YACC_COMMENT;
190 }
3f2d73f1
PE
191
192 /* #line directives are not documented, and may be withdrawn or
193 modified in future versions of Bison. */
03dbf629 194 ^"#line "{int}(" \"".*"\"")?"\n" {
4517da37 195 handle_syncline (yytext + sizeof "#line " - 1, *loc);
3f2d73f1
PE
196 }
197}
198
199
e9955c83
AD
200 /*----------------------------.
201 | Scanning Bison directives. |
202 `----------------------------*/
a7c09cba
DJ
203
204 /* For directives that are also command line options, the regex must be
e9690142 205 "%..."
a7c09cba
DJ
206 after "[-_]"s are removed, and the directive must match the --long
207 option name, with a single string argument. Otherwise, add exceptions
208 to ../build-aux/cross-options.pl. */
209
e9955c83
AD
210<INITIAL>
211{
deef2a0a 212 "%binary" return PERCENT_NONASSOC;
136a0f76 213 "%code" return PERCENT_CODE;
fa819509 214 "%debug" RETURN_PERCENT_FLAG("parse.trace");
2062d72d 215 "%default-prec" return PERCENT_DEFAULT_PREC;
deef2a0a
AD
216 "%define" return PERCENT_DEFINE;
217 "%defines" return PERCENT_DEFINES;
218 "%destructor" return PERCENT_DESTRUCTOR;
219 "%dprec" return PERCENT_DPREC;
ae2b48f5 220 "%empty" return PERCENT_EMPTY;
2062d72d 221 "%error-verbose" return PERCENT_ERROR_VERBOSE;
deef2a0a 222 "%expect" return PERCENT_EXPECT;
2062d72d 223 "%expect-rr" return PERCENT_EXPECT_RR;
deef2a0a 224 "%file-prefix" return PERCENT_FILE_PREFIX;
2062d72d 225 "%fixed-output-files" return PERCENT_YACC;
deef2a0a
AD
226 "%initial-action" return PERCENT_INITIAL_ACTION;
227 "%glr-parser" return PERCENT_GLR_PARSER;
228 "%language" return PERCENT_LANGUAGE;
229 "%left" return PERCENT_LEFT;
a7706735 230 "%lex-param" RETURN_PERCENT_PARAM(lex);
bc0f5737 231 "%locations" RETURN_PERCENT_FLAG("locations");
deef2a0a 232 "%merge" return PERCENT_MERGE;
2062d72d
TR
233 "%name-prefix" return PERCENT_NAME_PREFIX;
234 "%no-default-prec" return PERCENT_NO_DEFAULT_PREC;
235 "%no-lines" return PERCENT_NO_LINES;
deef2a0a
AD
236 "%nonassoc" return PERCENT_NONASSOC;
237 "%nondeterministic-parser" return PERCENT_NONDETERMINISTIC_PARSER;
238 "%nterm" return PERCENT_NTERM;
239 "%output" return PERCENT_OUTPUT;
a7706735
AD
240 "%param" RETURN_PERCENT_PARAM(both);
241 "%parse-param" RETURN_PERCENT_PARAM(parse);
deef2a0a 242 "%prec" return PERCENT_PREC;
d78f0ac9 243 "%precedence" return PERCENT_PRECEDENCE;
deef2a0a 244 "%printer" return PERCENT_PRINTER;
2062d72d 245 "%pure-parser" RETURN_PERCENT_FLAG("api.pure");
deef2a0a
AD
246 "%require" return PERCENT_REQUIRE;
247 "%right" return PERCENT_RIGHT;
248 "%skeleton" return PERCENT_SKELETON;
249 "%start" return PERCENT_START;
250 "%term" return PERCENT_TOKEN;
251 "%token" return PERCENT_TOKEN;
2062d72d 252 "%token-table" return PERCENT_TOKEN_TABLE;
deef2a0a
AD
253 "%type" return PERCENT_TYPE;
254 "%union" return PERCENT_UNION;
255 "%verbose" return PERCENT_VERBOSE;
256 "%yacc" return PERCENT_YACC;
e9955c83 257
2062d72d
TR
258 /* deprecated */
259 "%default"[-_]"prec" DEPRECATED("%default-prec");
260 "%error"[-_]"verbose" DEPRECATED("%define parse.error verbose");
261 "%expect"[-_]"rr" DEPRECATED("%expect-rr");
262 "%file-prefix"{eqopt} DEPRECATED("%file-prefix");
263 "%fixed"[-_]"output"[-_]"files" DEPRECATED("%fixed-output-files");
264 "%name"[-_]"prefix"{eqopt} DEPRECATED("%name-prefix");
265 "%no"[-_]"default"[-_]"prec" DEPRECATED("%no-default-prec");
266 "%no"[-_]"lines" DEPRECATED("%no-lines");
267 "%output"{eqopt} DEPRECATED("%output");
268 "%pure"[-_]"parser" DEPRECATED("%pure-parser");
269 "%token"[-_]"table" DEPRECATED("%token-table");
270
68ac70bc 271 "%"{id}|"%"{notletter}([[:graph:]])+ {
bb8e56ff 272 complain (loc, complaint, _("invalid directive: %s"), quote (yytext));
412f8a59 273 }
900c5db5 274
e9955c83 275 "=" return EQUAL;
e9071366 276 "|" return PIPE;
e9955c83
AD
277 ";" return SEMICOLON;
278
3f2d73f1 279 {id} {
58d7a1a1 280 val->uniqstr = uniqstr_new (yytext);
3f2d73f1 281 id_loc = *loc;
b9f1d9a4 282 bracketed_id_str = NULL;
3f2d73f1 283 BEGIN SC_AFTER_IDENTIFIER;
e9955c83
AD
284 }
285
d8d3f94a 286 {int} {
1452af69
PE
287 val->integer = scan_integer (yytext, 10, *loc);
288 return INT;
289 }
290 0[xX][0-9abcdefABCDEF]+ {
291 val->integer = scan_integer (yytext, 16, *loc);
d8d3f94a
PE
292 return INT;
293 }
e9955c83 294
84a1cb5a
AD
295 /* Identifiers may not start with a digit. Yet, don't silently
296 accept "1FOO" as "1 FOO". */
297 {int}{id} {
bb8e56ff 298 complain (loc, complaint, _("invalid identifier: %s"), quote (yytext));
84a1cb5a
AD
299 }
300
3208e3f4 301 /* Characters. */
e9690142 302 "'" token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
e9955c83
AD
303
304 /* Strings. */
e9690142 305 "\"" token_start = loc->start; BEGIN SC_ESCAPED_STRING;
e9955c83
AD
306
307 /* Prologue. */
3f2d73f1 308 "%{" code_start = loc->start; BEGIN SC_PROLOGUE;
e9955c83
AD
309
310 /* Code in between braces. */
3f2d73f1
PE
311 "{" {
312 STRING_GROW;
cb823b6f 313 nesting = 0;
3f2d73f1
PE
314 code_start = loc->start;
315 BEGIN SC_BRACED_CODE;
316 }
e9955c83 317
ca2a6d15
PH
318 /* Semantic predicate. */
319 "%?"[ \f\n\t\v]*"{" {
320 nesting = 0;
321 code_start = loc->start;
322 BEGIN SC_PREDICATE;
323 }
324
e9955c83 325 /* A type. */
cb823b6f
AD
326 "<*>" return TAG_ANY;
327 "<>" return TAG_NONE;
d8d3f94a 328 "<"{tag}">" {
223ff46e 329 obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
41141c56 330 STRING_FINISH;
223ff46e 331 val->uniqstr = uniqstr_new (last_string);
41141c56 332 STRING_FREE;
cb823b6f
AD
333 return TAG;
334 }
335 "<" {
336 nesting = 0;
337 token_start = loc->start;
338 BEGIN SC_TAG;
4cdb01db
AD
339 }
340
a706a1cc
PE
341 "%%" {
342 static int percent_percent_count;
e9955c83 343 if (++percent_percent_count == 2)
a2bc9dbc 344 BEGIN SC_EPILOGUE;
e9955c83
AD
345 return PERCENT_PERCENT;
346 }
347
b9f1d9a4
AR
348 "[" {
349 bracketed_id_str = NULL;
350 bracketed_id_start = loc->start;
351 bracketed_id_context_state = YY_START;
352 BEGIN SC_BRACKETED_ID;
353 }
354
68ac70bc 355 [^\[%A-Za-z0-9_<>{}\"\'*;|=/, \f\n\t\v]+|. {
c6b17724
AD
356 complain (loc, complaint, "%s: %s",
357 ngettext ("invalid character", "invalid characters", yyleng),
e42906f7 358 quote_mem (yytext, yyleng));
3f2d73f1 359 }
379f0ac8
PE
360
361 <<EOF>> {
362 loc->start = loc->end = scanner_cursor;
363 yyterminate ();
364 }
3f2d73f1
PE
365}
366
367
cb823b6f
AD
368 /*--------------------------------------------------------------.
369 | Supporting \0 complexifies our implementation for no expected |
370 | added value. |
371 `--------------------------------------------------------------*/
372
373<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING,SC_TAG>
374{
bb8e56ff 375 \0 complain (loc, complaint, _("invalid null character"));
cb823b6f
AD
376}
377
378
3f2d73f1
PE
379 /*-----------------------------------------------------------------.
380 | Scanning after an identifier, checking whether a colon is next. |
381 `-----------------------------------------------------------------*/
382
383<SC_AFTER_IDENTIFIER>
384{
b9f1d9a4 385 "[" {
872b52bc 386 if (bracketed_id_str)
b9f1d9a4 387 {
e9690142
JD
388 ROLLBACK_CURRENT_TOKEN;
389 BEGIN SC_RETURN_BRACKETED_ID;
390 *loc = id_loc;
391 return ID;
b9f1d9a4 392 }
872b52bc
AR
393 else
394 {
e9690142
JD
395 bracketed_id_start = loc->start;
396 bracketed_id_context_state = YY_START;
397 BEGIN SC_BRACKETED_ID;
872b52bc 398 }
b9f1d9a4 399 }
3f2d73f1 400 ":" {
b9f1d9a4 401 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
3f2d73f1 402 *loc = id_loc;
3f2d73f1
PE
403 return ID_COLON;
404 }
405 . {
b9f1d9a4
AR
406 ROLLBACK_CURRENT_TOKEN;
407 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
3f2d73f1 408 *loc = id_loc;
3f2d73f1
PE
409 return ID;
410 }
411 <<EOF>> {
b9f1d9a4 412 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
3f2d73f1 413 *loc = id_loc;
3f2d73f1 414 return ID;
e9955c83
AD
415 }
416}
417
b9f1d9a4
AR
418 /*--------------------------------.
419 | Scanning bracketed identifiers. |
420 `--------------------------------*/
421
422<SC_BRACKETED_ID>
423{
424 {id} {
872b52bc 425 if (bracketed_id_str)
b9f1d9a4 426 {
bb8e56ff
TR
427 complain (loc, complaint,
428 _("unexpected identifier in bracketed name: %s"),
429 quote (yytext));
b9f1d9a4
AR
430 }
431 else
432 {
e9690142
JD
433 bracketed_id_str = uniqstr_new (yytext);
434 bracketed_id_loc = *loc;
b9f1d9a4
AR
435 }
436 }
437 "]" {
438 BEGIN bracketed_id_context_state;
439 if (bracketed_id_str)
440 {
e9690142
JD
441 if (INITIAL == bracketed_id_context_state)
442 {
443 val->uniqstr = bracketed_id_str;
444 bracketed_id_str = 0;
445 *loc = bracketed_id_loc;
446 return BRACKETED_ID;
447 }
b9f1d9a4
AR
448 }
449 else
bb8e56ff 450 complain (loc, complaint, _("an identifier expected"));
b9f1d9a4 451 }
68ac70bc
AD
452
453 [^\].A-Za-z0-9_/ \f\n\t\v]+|. {
c6b17724
AD
454 complain (loc, complaint, "%s: %s",
455 ngettext ("invalid character in bracketed name",
456 "invalid characters in bracketed name", yyleng),
e42906f7 457 quote_mem (yytext, yyleng));
b9f1d9a4 458 }
68ac70bc 459
b9f1d9a4
AR
460 <<EOF>> {
461 BEGIN bracketed_id_context_state;
462 unexpected_eof (bracketed_id_start, "]");
463 }
464}
465
466<SC_RETURN_BRACKETED_ID>
467{
468 . {
469 ROLLBACK_CURRENT_TOKEN;
470 val->uniqstr = bracketed_id_str;
471 bracketed_id_str = 0;
472 *loc = bracketed_id_loc;
473 BEGIN INITIAL;
474 return BRACKETED_ID;
475 }
476}
477
e9955c83 478
d8d3f94a 479 /*---------------------------------------------------------------.
ae93e4e4 480 | Scanning a Yacc comment. The initial '/ *' is already eaten. |
d8d3f94a 481 `---------------------------------------------------------------*/
e9955c83 482
d8d3f94a 483<SC_YACC_COMMENT>
e9955c83 484{
3f2d73f1 485 "*/" BEGIN context_state;
c6b17724 486 .|\n continue;
aa418041 487 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
d8d3f94a
PE
488}
489
490
491 /*------------------------------------------------------------.
ae93e4e4 492 | Scanning a C comment. The initial '/ *' is already eaten. |
d8d3f94a
PE
493 `------------------------------------------------------------*/
494
495<SC_COMMENT>
496{
3f2d73f1 497 "*"{splice}"/" STRING_GROW; BEGIN context_state;
e9690142 498 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
e9955c83
AD
499}
500
501
d8d3f94a 502 /*--------------------------------------------------------------.
ae93e4e4 503 | Scanning a line comment. The initial '//' is already eaten. |
d8d3f94a
PE
504 `--------------------------------------------------------------*/
505
506<SC_LINE_COMMENT>
507{
e9690142
JD
508 "\n" STRING_GROW; BEGIN context_state;
509 {splice} STRING_GROW;
510 <<EOF>> BEGIN context_state;
d8d3f94a
PE
511}
512
513
4febdd96
PE
514 /*------------------------------------------------.
515 | Scanning a Bison string, including its escapes. |
516 | The initial quote is already eaten. |
517 `------------------------------------------------*/
e9955c83
AD
518
519<SC_ESCAPED_STRING>
520{
c1b2677a 521 "\"" {
41141c56 522 STRING_FINISH;
3f2d73f1 523 loc->start = token_start;
223ff46e 524 val->chars = last_string;
a706a1cc 525 BEGIN INITIAL;
e9955c83
AD
526 return STRING;
527 }
c1b2677a
TR
528 <<EOF>> unexpected_eof (token_start, "\"");
529 "\n" unexpected_newline (token_start, "\"");
e9955c83
AD
530}
531
4febdd96
PE
532 /*----------------------------------------------------------.
533 | Scanning a Bison character literal, decoding its escapes. |
e9690142 534 | The initial quote is already eaten. |
4febdd96 535 `----------------------------------------------------------*/
e9955c83
AD
536
537<SC_ESCAPED_CHARACTER>
538{
c1b2677a 539 "'" {
47aee066
JD
540 STRING_FINISH;
541 loc->start = token_start;
dfaa4860 542 val->character = last_string[0];
c1b2677a
TR
543
544 /* FIXME: Eventually, make these errors. */
545 if (last_string[0] == '\0')
3208e3f4 546 {
c1b2677a
TR
547 complain (loc, Wother, _("empty character literal"));
548 /* '\0' seems dangerous even if we are about to complain. */
549 val->character = '\'';
3208e3f4 550 }
c1b2677a
TR
551 else if (last_string[1] != '\0')
552 complain (loc, Wother,
553 _("extra characters in character literal"));
47aee066
JD
554 STRING_FREE;
555 BEGIN INITIAL;
556 return CHAR;
557 }
c1b2677a
TR
558 "\n" unexpected_newline (token_start, "'");
559 <<EOF>> unexpected_eof (token_start, "'");
4febdd96 560}
a706a1cc 561
cb823b6f
AD
562 /*-----------------------------------------------------------.
563 | Scanning a Bison nested tag. The initial angle bracket is |
564 | already eaten. |
565 `-----------------------------------------------------------*/
566
567<SC_TAG>
4febdd96 568{
cb823b6f
AD
569 ">" {
570 --nesting;
571 if (nesting < 0)
572 {
573 STRING_FINISH;
574 loc->start = token_start;
575 val->uniqstr = uniqstr_new (last_string);
576 STRING_FREE;
577 BEGIN INITIAL;
578 return TAG;
579 }
580 STRING_GROW;
581 }
582
583 [^<>]+ STRING_GROW;
584 "<"+ STRING_GROW; nesting += yyleng;
e9955c83 585
c1b2677a 586 <<EOF>> unexpected_eof (token_start, ">");
cb823b6f 587}
e9955c83
AD
588
589 /*----------------------------.
590 | Decode escaped characters. |
591 `----------------------------*/
592
593<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
594{
d8d3f94a 595 \\[0-7]{1,3} {
4517da37 596 unsigned long int c = strtoul (yytext + 1, NULL, 8);
c2724603 597 if (!c || UCHAR_MAX < c)
bb8e56ff 598 complain (loc, complaint, _("invalid number after \\-escape: %s"),
c2724603 599 yytext+1);
e9955c83 600 else
223ff46e 601 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
602 }
603
6b0d38ab 604 \\x[0-9abcdefABCDEF]+ {
4517da37
PE
605 verify (UCHAR_MAX < ULONG_MAX);
606 unsigned long int c = strtoul (yytext + 2, NULL, 16);
c2724603 607 if (!c || UCHAR_MAX < c)
bb8e56ff 608 complain (loc, complaint, _("invalid number after \\-escape: %s"),
c2724603 609 yytext+1);
d8d3f94a 610 else
223ff46e 611 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
612 }
613
e9690142
JD
614 \\a obstack_1grow (&obstack_for_string, '\a');
615 \\b obstack_1grow (&obstack_for_string, '\b');
616 \\f obstack_1grow (&obstack_for_string, '\f');
617 \\n obstack_1grow (&obstack_for_string, '\n');
618 \\r obstack_1grow (&obstack_for_string, '\r');
619 \\t obstack_1grow (&obstack_for_string, '\t');
620 \\v obstack_1grow (&obstack_for_string, '\v');
412f8a59
PE
621
622 /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
223ff46e 623 \\("\""|"'"|"?"|"\\") obstack_1grow (&obstack_for_string, yytext[1]);
412f8a59 624
6b0d38ab 625 \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
d8d3f94a 626 int c = convert_ucn_to_byte (yytext);
c2724603 627 if (c <= 0)
bb8e56ff 628 complain (loc, complaint, _("invalid number after \\-escape: %s"),
c2724603 629 yytext+1);
d8d3f94a 630 else
223ff46e 631 obstack_1grow (&obstack_for_string, c);
d8d3f94a 632 }
e9690142 633 \\(.|\n) {
c2724603 634 char const *p = yytext + 1;
e6c849d8 635 /* Quote only if escaping won't make the character visible. */
457bf919 636 if (c_isspace ((unsigned char) *p) && c_isprint ((unsigned char) *p))
e6c849d8 637 p = quote (p);
c2724603
JD
638 else
639 p = quotearg_style_mem (escape_quoting_style, p, 1);
bb8e56ff 640 complain (loc, complaint, _("invalid character after \\-escape: %s"),
6fb8b256 641 p);
e9955c83
AD
642 }
643}
644
4febdd96
PE
645 /*--------------------------------------------.
646 | Scanning user-code characters and strings. |
647 `--------------------------------------------*/
e9955c83 648
4febdd96
PE
649<SC_CHARACTER,SC_STRING>
650{
e9690142 651 {splice}|\\{splice}[^\n\[\]] STRING_GROW;
4febdd96 652}
e9955c83
AD
653
654<SC_CHARACTER>
655{
e9690142 656 "'" STRING_GROW; BEGIN context_state;
c1b2677a
TR
657 \n unexpected_newline (token_start, "'");
658 <<EOF>> unexpected_eof (token_start, "'");
e9955c83
AD
659}
660
e9955c83
AD
661<SC_STRING>
662{
e9690142 663 "\"" STRING_GROW; BEGIN context_state;
c1b2677a
TR
664 \n unexpected_newline (token_start, "\"");
665 <<EOF>> unexpected_eof (token_start, "\"");
e9955c83
AD
666}
667
668
669 /*---------------------------------------------------.
670 | Strings, comments etc. can be found in user code. |
671 `---------------------------------------------------*/
672
ca2a6d15 673<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE,SC_PREDICATE>
e9955c83 674{
3f2d73f1
PE
675 "'" {
676 STRING_GROW;
677 context_state = YY_START;
678 token_start = loc->start;
679 BEGIN SC_CHARACTER;
680 }
681 "\"" {
682 STRING_GROW;
683 context_state = YY_START;
684 token_start = loc->start;
685 BEGIN SC_STRING;
686 }
687 "/"{splice}"*" {
688 STRING_GROW;
689 context_state = YY_START;
690 token_start = loc->start;
691 BEGIN SC_COMMENT;
692 }
693 "/"{splice}"/" {
694 STRING_GROW;
695 context_state = YY_START;
696 BEGIN SC_LINE_COMMENT;
697 }
e9955c83
AD
698}
699
700
624a35e2 701
58d7a1a1 702 /*-----------------------------------------------------------.
ca2a6d15
PH
703 | Scanning some code in braces (actions, predicates). The |
704 | initial "{" is already eaten. |
58d7a1a1 705 `-----------------------------------------------------------*/
e9955c83 706
ca2a6d15 707<SC_BRACED_CODE,SC_PREDICATE>
e9955c83 708{
cb823b6f
AD
709 "{"|"<"{splice}"%" STRING_GROW; nesting++;
710 "%"{splice}">" STRING_GROW; nesting--;
ca2a6d15 711
ae93e4e4
JM
712 /* Tokenize '<<%' correctly (as '<<' '%') rather than incorrrectly
713 (as '<' '<%'). */
ca2a6d15
PH
714 "<"{splice}"<" STRING_GROW;
715
c1b2677a 716 <<EOF>> unexpected_eof (code_start, "}");
ca2a6d15
PH
717}
718
719<SC_BRACED_CODE>
720{
e9955c83 721 "}" {
25522739
PE
722 obstack_1grow (&obstack_for_string, '}');
723
cb823b6f
AD
724 --nesting;
725 if (nesting < 0)
e9955c83 726 {
e9690142
JD
727 STRING_FINISH;
728 loc->start = code_start;
729 val->code = last_string;
730 BEGIN INITIAL;
731 return BRACED_CODE;
e9955c83
AD
732 }
733 }
ca2a6d15 734}
e9955c83 735
ca2a6d15
PH
736<SC_PREDICATE>
737{
738 "}" {
739 --nesting;
740 if (nesting < 0)
741 {
e9690142
JD
742 STRING_FINISH;
743 loc->start = code_start;
744 val->code = last_string;
745 BEGIN INITIAL;
746 return BRACED_PREDICATE;
ca2a6d15
PH
747 }
748 else
749 obstack_1grow (&obstack_for_string, '}');
47aee066 750 }
e9955c83
AD
751}
752
e9955c83
AD
753 /*--------------------------------------------------------------.
754 | Scanning some prologue: from "%{" (already scanned) to "%}". |
755 `--------------------------------------------------------------*/
756
757<SC_PROLOGUE>
758{
759 "%}" {
41141c56 760 STRING_FINISH;
3f2d73f1 761 loc->start = code_start;
223ff46e 762 val->chars = last_string;
a706a1cc 763 BEGIN INITIAL;
e9955c83
AD
764 return PROLOGUE;
765 }
766
c1b2677a 767 <<EOF>> unexpected_eof (code_start, "%}");
e9955c83
AD
768}
769
770
771 /*---------------------------------------------------------------.
772 | Scanning the epilogue (everything after the second "%%", which |
d8d3f94a 773 | has already been eaten). |
e9955c83
AD
774 `---------------------------------------------------------------*/
775
776<SC_EPILOGUE>
777{
e9955c83 778 <<EOF>> {
41141c56 779 STRING_FINISH;
3f2d73f1 780 loc->start = code_start;
223ff46e 781 val->chars = last_string;
a706a1cc 782 BEGIN INITIAL;
e9955c83
AD
783 return EPILOGUE;
784 }
785}
786
787
4febdd96
PE
788 /*-----------------------------------------------------.
789 | By default, grow the string obstack with the input. |
790 `-----------------------------------------------------*/
791
e9690142
JD
792<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PREDICATE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>. |
793 <SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PREDICATE,SC_PROLOGUE,SC_EPILOGUE>\n STRING_GROW;
4febdd96 794
e9955c83
AD
795%%
796
6c30d641
PE
797/* Read bytes from FP into buffer BUF of size SIZE. Return the
798 number of bytes read. Remove '\r' from input, treating \r\n
799 and isolated \r as \n. */
800
801static size_t
802no_cr_read (FILE *fp, char *buf, size_t size)
803{
a737b216
PE
804 size_t bytes_read = fread (buf, 1, size, fp);
805 if (bytes_read)
6c30d641 806 {
a737b216 807 char *w = memchr (buf, '\r', bytes_read);
6c30d641 808 if (w)
e9690142
JD
809 {
810 char const *r = ++w;
811 char const *lim = buf + bytes_read;
812
813 for (;;)
814 {
815 /* Found an '\r'. Treat it like '\n', but ignore any
816 '\n' that immediately follows. */
817 w[-1] = '\n';
818 if (r == lim)
819 {
820 int ch = getc (fp);
821 if (ch != '\n' && ungetc (ch, fp) != ch)
822 break;
823 }
824 else if (*r == '\n')
825 r++;
826
827 /* Copy until the next '\r'. */
828 do
829 {
830 if (r == lim)
831 return w - buf;
832 }
833 while ((*w++ = *r++) != '\r');
834 }
835
836 return w - buf;
837 }
6c30d641
PE
838 }
839
a737b216 840 return bytes_read;
6c30d641
PE
841}
842
843
f25bfb75 844
1452af69
PE
845/*------------------------------------------------------.
846| Scan NUMBER for a base-BASE integer at location LOC. |
847`------------------------------------------------------*/
848
849static unsigned long int
850scan_integer (char const *number, int base, location loc)
851{
4517da37
PE
852 verify (INT_MAX < ULONG_MAX);
853 unsigned long int num = strtoul (number, NULL, base);
854
855 if (INT_MAX < num)
1452af69 856 {
bb8e56ff 857 complain (&loc, complaint, _("integer out of range: %s"),
6fb8b256 858 quote (number));
1452af69
PE
859 num = INT_MAX;
860 }
4517da37 861
1452af69
PE
862 return num;
863}
864
865
d8d3f94a
PE
866/*------------------------------------------------------------------.
867| Convert universal character name UCN to a single-byte character, |
868| and return that character. Return -1 if UCN does not correspond |
e9690142 869| to a single-byte character. |
d8d3f94a
PE
870`------------------------------------------------------------------*/
871
872static int
873convert_ucn_to_byte (char const *ucn)
874{
4517da37
PE
875 verify (UCHAR_MAX <= INT_MAX);
876 unsigned long int code = strtoul (ucn + 2, NULL, 16);
d8d3f94a
PE
877
878 /* FIXME: Currently we assume Unicode-compatible unibyte characters
879 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
880 non-ASCII hosts we support only the portable C character set.
881 These limitations should be removed once we add support for
882 multibyte characters. */
883
884 if (UCHAR_MAX < code)
885 return -1;
886
887#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
888 {
889 /* A non-ASCII host. Use CODE to index into a table of the C
890 basic execution character set, which is guaranteed to exist on
891 all Standard C platforms. This table also includes '$', '@',
8e6ef483 892 and '`', which are not in the basic execution character set but
d8d3f94a
PE
893 which are unibyte characters on all the platforms that we know
894 about. */
895 static signed char const table[] =
896 {
e9690142
JD
897 '\0', -1, -1, -1, -1, -1, -1, '\a',
898 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
899 -1, -1, -1, -1, -1, -1, -1, -1,
900 -1, -1, -1, -1, -1, -1, -1, -1,
901 ' ', '!', '"', '#', '$', '%', '&', '\'',
902 '(', ')', '*', '+', ',', '-', '.', '/',
903 '0', '1', '2', '3', '4', '5', '6', '7',
904 '8', '9', ':', ';', '<', '=', '>', '?',
905 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
906 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
907 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
908 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
909 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
910 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
911 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
912 'x', 'y', 'z', '{', '|', '}', '~'
d8d3f94a
PE
913 };
914
915 code = code < sizeof table ? table[code] : -1;
916 }
917#endif
c4d720cd 918
d8d3f94a
PE
919 return code;
920}
921
922
03dbf629
AD
923/*---------------------------------------------------------------------.
924| Handle '#line INT( "FILE")?\n'. ARGS has already skipped '#line '. |
925`---------------------------------------------------------------------*/
900c5db5
AD
926
927static void
4517da37 928handle_syncline (char *args, location loc)
900c5db5 929{
03dbf629
AD
930 char *file;
931 unsigned long int lineno = strtoul (args, &file, 10);
4517da37
PE
932 if (INT_MAX <= lineno)
933 {
bb8e56ff 934 complain (&loc, Wother, _("line number overflow"));
4517da37
PE
935 lineno = INT_MAX;
936 }
03dbf629 937
064e42b0 938 file = strchr (file, '"');
03dbf629
AD
939 if (file)
940 {
064e42b0 941 *strchr (file + 1, '"') = '\0';
03dbf629
AD
942 current_file = uniqstr_new (file + 1);
943 }
0c8e079f 944 boundary_set (&scanner_cursor, current_file, lineno, 1);
4517da37
PE
945}
946
947
4febdd96
PE
948/*----------------------------------------------------------------.
949| For a token or comment starting at START, report message MSGID, |
c1b2677a
TR
950| which should say that an end marker was found before the |
951| expected TOKEN_END. Then, pretend that TOKEN_END was found. |
4febdd96
PE
952`----------------------------------------------------------------*/
953
954static void
955unexpected_end (boundary start, char const *msgid, char const *token_end)
956{
957 location loc;
958 loc.start = start;
959 loc.end = scanner_cursor;
c1b2677a
TR
960 size_t i = strlen (token_end);
961
962/* Adjust scanner cursor so that any later message does not count
963 the characters about to be inserted. */
964 scanner_cursor.column -= i;
965
966 while (i != 0)
967 unput (token_end[--i]);
968
4a9cd8f2 969 token_end = quote (token_end);
1127a75a 970 /* Instead of '\'', display "'". */
f518dbaf 971 if (STREQ (token_end, "'\\''"))
4a9cd8f2 972 token_end = "\"'\"";
bb8e56ff 973 complain (&loc, complaint, _(msgid), token_end);
4febdd96
PE
974}
975
976
3f2d73f1
PE
977/*------------------------------------------------------------------------.
978| Report an unexpected EOF in a token or comment starting at START. |
979| An end of file was encountered and the expected TOKEN_END was missing. |
c1b2677a 980| After reporting the problem, pretend that TOKEN_END was found. |
3f2d73f1 981`------------------------------------------------------------------------*/
a706a1cc
PE
982
983static void
aa418041 984unexpected_eof (boundary start, char const *token_end)
a706a1cc 985{
4a9cd8f2 986 unexpected_end (start, N_("missing %s at end of file"), token_end);
4febdd96
PE
987}
988
989
990/*----------------------------------------.
991| Likewise, but for unexpected newlines. |
992`----------------------------------------*/
993
994static void
995unexpected_newline (boundary start, char const *token_end)
996{
4a9cd8f2 997 unexpected_end (start, N_("missing %s at end of line"), token_end);
a706a1cc
PE
998}
999
1000
f25bfb75
AD
1001/*-------------------------.
1002| Initialize the scanner. |
1003`-------------------------*/
1004
1d6412ad 1005void
e9071366 1006gram_scanner_initialize (void)
1d6412ad 1007{
223ff46e 1008 obstack_init (&obstack_for_string);
1d6412ad
AD
1009}
1010
1011
f25bfb75
AD
1012/*-----------------------------------------------.
1013| Free all the memory allocated to the scanner. |
1014`-----------------------------------------------*/
1015
4cdb01db 1016void
e9071366 1017gram_scanner_free (void)
4cdb01db 1018{
223ff46e 1019 obstack_free (&obstack_for_string, 0);
536545f3 1020 /* Reclaim Flex's buffers. */
580b8926 1021 yylex_destroy ();
4cdb01db 1022}