]> git.saurik.com Git - bison.git/blame - src/scan-gram.l
doc: improve html and pdf rendering
[bison.git] / src / scan-gram.l
CommitLineData
e9955c83 1/* Bison Grammar Scanner -*- C -*-
3b1e470c 2
3209eb1c 3 Copyright (C) 2002-2015 Free Software Foundation, Inc.
e9955c83
AD
4
5 This file is part of Bison, the GNU Compiler Compiler.
6
f16b0819 7 This program is free software: you can redistribute it and/or modify
e9955c83 8 it under the terms of the GNU General Public License as published by
f16b0819 9 the Free Software Foundation, either version 3 of the License, or
e9955c83
AD
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
f16b0819 18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
e9955c83 19
2062d72d 20%option debug nodefault noinput noyywrap never-interactive
e9955c83
AD
21%option prefix="gram_" outfile="lex.yy.c"
22
23%{
4f6e011e
PE
24/* Work around a bug in flex 2.5.31. See Debian bug 333231
25 <http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=333231>. */
26#undef gram_wrap
27#define gram_wrap() 1
28
e9071366 29#define FLEX_PREFIX(Id) gram_ ## Id
0305d25e 30#include <src/flex-scanner.h>
223ff46e 31
0305d25e
AD
32#include <src/complain.h>
33#include <src/files.h>
2062d72d 34#include <src/getargs.h>
0305d25e
AD
35#include <src/gram.h>
36#include <quotearg.h>
37#include <src/reader.h>
38#include <src/uniqstr.h>
e9955c83 39
457bf919 40#include <c-ctype.h>
e9071366
AD
41#include <mbswidth.h>
42#include <quote.h>
43
0305d25e 44#include <src/scan-gram.h>
e9071366
AD
45
46#define YY_DECL GRAM_LEX_DECL
2346344a 47
3f2d73f1 48/* Location of scanner cursor. */
4a678af8 49static boundary scanner_cursor;
41141c56 50
e9071366 51#define YY_USER_ACTION location_compute (loc, &scanner_cursor, yytext, yyleng);
d8d3f94a 52
6c30d641 53static size_t no_cr_read (FILE *, char *, size_t);
d8d3f94a
PE
54#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
55
a7706735
AD
56#define RETURN_PERCENT_PARAM(Value) \
57 RETURN_VALUE(PERCENT_PARAM, param, param_ ## Value)
58
59#define RETURN_PERCENT_FLAG(Value) \
60 RETURN_VALUE(PERCENT_FLAG, uniqstr, uniqstr_new (Value))
61
62#define RETURN_VALUE(Token, Field, Value) \
ba061fa6 63 do { \
a7706735
AD
64 val->Field = Value; \
65 return Token; \
ba061fa6
AD
66 } while (0)
67
b9f1d9a4
AR
68#define ROLLBACK_CURRENT_TOKEN \
69 do { \
e9690142 70 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0); \
b9f1d9a4
AR
71 yyless (0); \
72 } while (0)
ba061fa6 73
2062d72d
TR
74#define DEPRECATED(Msg) \
75 do { \
76 size_t i; \
1dc927a7 77 deprecated_directive (loc, yytext, Msg); \
2062d72d
TR
78 scanner_cursor.column -= mbsnwidth (Msg, strlen (Msg), 0); \
79 for (i = strlen (Msg); i != 0; --i) \
80 unput (Msg[i - 1]); \
81 } while (0)
82
7ec2d4cd 83/* A string representing the most recently saved token. */
7c0c6181 84static char *last_string;
7ec2d4cd 85
872b52bc 86/* Bracketed identifier. */
b9f1d9a4
AR
87static uniqstr bracketed_id_str = 0;
88static location bracketed_id_loc;
89static boundary bracketed_id_start;
90static int bracketed_id_context_state = 0;
91
7ec2d4cd 92void
e9071366 93gram_scanner_last_string_free (void)
7ec2d4cd 94{
41141c56 95 STRING_FREE;
7ec2d4cd 96}
e9955c83 97
4517da37 98static void handle_syncline (char *, location);
1452af69 99static unsigned long int scan_integer (char const *p, int base, location loc);
d8d3f94a 100static int convert_ucn_to_byte (char const *hex_text);
aa418041 101static void unexpected_eof (boundary, char const *);
4febdd96 102static void unexpected_newline (boundary, char const *);
e9955c83
AD
103
104%}
e9071366
AD
105 /* A C-like comment in directives/rules. */
106%x SC_YACC_COMMENT
107 /* Strings and characters in directives/rules. */
e9955c83 108%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
e9071366 109 /* A identifier was just read in directives/rules. Special state
ae93e4e4 110 to capture the sequence 'identifier :'. */
e9071366 111%x SC_AFTER_IDENTIFIER
e52ddf82
AD
112
113 /* POSIX says that a tag must be both an id and a C union member, but
114 historically almost any character is allowed in a tag. We
115 disallow NUL, as this simplifies our implementation. We match
116 angle brackets in nested pairs: several languages use them for
117 generics/template types. */
cb823b6f 118%x SC_TAG
e9071366 119
ca2a6d15 120 /* Four types of user code:
ae93e4e4 121 - prologue (code between '%{' '%}' in the first section, before %%);
e9071366 122 - actions, printers, union, etc, (between braced in the middle section);
da5462d4 123 - epilogue (everything after the second %%).
ae93e4e4 124 - predicate (code between '%?{' and '{' in middle section); */
ca2a6d15 125%x SC_PROLOGUE SC_BRACED_CODE SC_EPILOGUE SC_PREDICATE
e9071366
AD
126 /* C and C++ comments in code. */
127%x SC_COMMENT SC_LINE_COMMENT
128 /* Strings and characters in code. */
129%x SC_STRING SC_CHARACTER
872b52bc 130 /* Bracketed identifiers support. */
b9f1d9a4 131%x SC_BRACKETED_ID SC_RETURN_BRACKETED_ID
e9955c83 132
e9690142 133letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
68ac70bc 134notletter [^.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]{-}[%\{]
e9690142 135id {letter}({letter}|[-0-9])*
e9690142 136int [0-9]+
d8d3f94a 137
d8d3f94a
PE
138/* Zero or more instances of backslash-newline. Following GCC, allow
139 white space between the backslash and the newline. */
e9690142 140splice (\\[ \f\t\v]*\n)*
e9955c83 141
2062d72d
TR
142/* An equal sign, with optional leading whitespaces. This is used in some
143 deprecated constructs. */
144eqopt ([[:space:]]*=)?
145
e9955c83
AD
146%%
147%{
cb823b6f
AD
148 /* Nesting level. Either for nested braces, or nested angle brackets
149 (but not mixed). */
84f6a6ca 150 int nesting PACIFY_CC (= 0);
1a9e39f1 151
3f2d73f1 152 /* Parent context state, when applicable. */
84f6a6ca 153 int context_state PACIFY_CC (= 0);
a706a1cc 154
3f2d73f1 155 /* Location of most recent identifier, when applicable. */
84f6a6ca 156 location id_loc PACIFY_CC (= empty_location);
3f2d73f1 157
a2bc9dbc
PE
158 /* Where containing code started, when applicable. Its initial
159 value is relevant only when yylex is invoked in the SC_EPILOGUE
160 start condition. */
161 boundary code_start = scanner_cursor;
3f2d73f1 162
223ff46e
PE
163 /* Where containing comment or string or character literal started,
164 when applicable. */
84f6a6ca 165 boundary token_start PACIFY_CC (= scanner_cursor);
975bb564
AD
166
167 /* We cannot trust YY_USER_INIT, whose semantics changes over time
168 (it moved in Flex 2.5.38). */
169 static bool first = true;
170 if (first)
171 {
172 scanner_cursor = loc->start;
173 first = false;
174 }
e9955c83
AD
175%}
176
177
3f2d73f1
PE
178 /*-----------------------.
179 | Scanning white space. |
180 `-----------------------*/
181
b9f1d9a4 182<INITIAL,SC_AFTER_IDENTIFIER,SC_BRACKETED_ID,SC_RETURN_BRACKETED_ID>
3f2d73f1 183{
4febdd96 184 /* Comments and white space. */
6fb8b256 185 "," {
bb8e56ff 186 complain (loc, Wother, _("stray ',' treated as white space"));
6fb8b256 187 }
4febdd96 188 [ \f\n\t\v] |
96029914 189 "//".* continue;
83adb046
PE
190 "/*" {
191 token_start = loc->start;
192 context_state = YY_START;
193 BEGIN SC_YACC_COMMENT;
194 }
3f2d73f1
PE
195
196 /* #line directives are not documented, and may be withdrawn or
197 modified in future versions of Bison. */
03dbf629 198 ^"#line "{int}(" \"".*"\"")?"\n" {
4517da37 199 handle_syncline (yytext + sizeof "#line " - 1, *loc);
3f2d73f1
PE
200 }
201}
202
203
e9955c83
AD
204 /*----------------------------.
205 | Scanning Bison directives. |
206 `----------------------------*/
a7c09cba
DJ
207
208 /* For directives that are also command line options, the regex must be
e9690142 209 "%..."
a7c09cba
DJ
210 after "[-_]"s are removed, and the directive must match the --long
211 option name, with a single string argument. Otherwise, add exceptions
212 to ../build-aux/cross-options.pl. */
213
e9955c83
AD
214<INITIAL>
215{
deef2a0a 216 "%binary" return PERCENT_NONASSOC;
136a0f76 217 "%code" return PERCENT_CODE;
fa819509 218 "%debug" RETURN_PERCENT_FLAG("parse.trace");
2062d72d 219 "%default-prec" return PERCENT_DEFAULT_PREC;
deef2a0a
AD
220 "%define" return PERCENT_DEFINE;
221 "%defines" return PERCENT_DEFINES;
222 "%destructor" return PERCENT_DESTRUCTOR;
223 "%dprec" return PERCENT_DPREC;
ae2b48f5 224 "%empty" return PERCENT_EMPTY;
2062d72d 225 "%error-verbose" return PERCENT_ERROR_VERBOSE;
deef2a0a 226 "%expect" return PERCENT_EXPECT;
2062d72d 227 "%expect-rr" return PERCENT_EXPECT_RR;
deef2a0a 228 "%file-prefix" return PERCENT_FILE_PREFIX;
2062d72d 229 "%fixed-output-files" return PERCENT_YACC;
deef2a0a
AD
230 "%initial-action" return PERCENT_INITIAL_ACTION;
231 "%glr-parser" return PERCENT_GLR_PARSER;
232 "%language" return PERCENT_LANGUAGE;
233 "%left" return PERCENT_LEFT;
a7706735 234 "%lex-param" RETURN_PERCENT_PARAM(lex);
bc0f5737 235 "%locations" RETURN_PERCENT_FLAG("locations");
deef2a0a 236 "%merge" return PERCENT_MERGE;
2062d72d
TR
237 "%name-prefix" return PERCENT_NAME_PREFIX;
238 "%no-default-prec" return PERCENT_NO_DEFAULT_PREC;
239 "%no-lines" return PERCENT_NO_LINES;
deef2a0a
AD
240 "%nonassoc" return PERCENT_NONASSOC;
241 "%nondeterministic-parser" return PERCENT_NONDETERMINISTIC_PARSER;
242 "%nterm" return PERCENT_NTERM;
243 "%output" return PERCENT_OUTPUT;
a7706735
AD
244 "%param" RETURN_PERCENT_PARAM(both);
245 "%parse-param" RETURN_PERCENT_PARAM(parse);
deef2a0a 246 "%prec" return PERCENT_PREC;
d78f0ac9 247 "%precedence" return PERCENT_PRECEDENCE;
deef2a0a 248 "%printer" return PERCENT_PRINTER;
2062d72d 249 "%pure-parser" RETURN_PERCENT_FLAG("api.pure");
deef2a0a
AD
250 "%require" return PERCENT_REQUIRE;
251 "%right" return PERCENT_RIGHT;
252 "%skeleton" return PERCENT_SKELETON;
253 "%start" return PERCENT_START;
254 "%term" return PERCENT_TOKEN;
255 "%token" return PERCENT_TOKEN;
2062d72d 256 "%token-table" return PERCENT_TOKEN_TABLE;
deef2a0a
AD
257 "%type" return PERCENT_TYPE;
258 "%union" return PERCENT_UNION;
259 "%verbose" return PERCENT_VERBOSE;
260 "%yacc" return PERCENT_YACC;
e9955c83 261
2062d72d
TR
262 /* deprecated */
263 "%default"[-_]"prec" DEPRECATED("%default-prec");
264 "%error"[-_]"verbose" DEPRECATED("%define parse.error verbose");
265 "%expect"[-_]"rr" DEPRECATED("%expect-rr");
266 "%file-prefix"{eqopt} DEPRECATED("%file-prefix");
267 "%fixed"[-_]"output"[-_]"files" DEPRECATED("%fixed-output-files");
268 "%name"[-_]"prefix"{eqopt} DEPRECATED("%name-prefix");
269 "%no"[-_]"default"[-_]"prec" DEPRECATED("%no-default-prec");
270 "%no"[-_]"lines" DEPRECATED("%no-lines");
271 "%output"{eqopt} DEPRECATED("%output");
272 "%pure"[-_]"parser" DEPRECATED("%pure-parser");
273 "%token"[-_]"table" DEPRECATED("%token-table");
274
071863b3 275 "%"{id} {
bb8e56ff 276 complain (loc, complaint, _("invalid directive: %s"), quote (yytext));
412f8a59 277 }
900c5db5 278
e9955c83 279 "=" return EQUAL;
e9071366 280 "|" return PIPE;
e9955c83
AD
281 ";" return SEMICOLON;
282
3f2d73f1 283 {id} {
58d7a1a1 284 val->uniqstr = uniqstr_new (yytext);
3f2d73f1 285 id_loc = *loc;
b9f1d9a4 286 bracketed_id_str = NULL;
3f2d73f1 287 BEGIN SC_AFTER_IDENTIFIER;
e9955c83
AD
288 }
289
d8d3f94a 290 {int} {
1452af69
PE
291 val->integer = scan_integer (yytext, 10, *loc);
292 return INT;
293 }
294 0[xX][0-9abcdefABCDEF]+ {
295 val->integer = scan_integer (yytext, 16, *loc);
d8d3f94a
PE
296 return INT;
297 }
e9955c83 298
84a1cb5a
AD
299 /* Identifiers may not start with a digit. Yet, don't silently
300 accept "1FOO" as "1 FOO". */
301 {int}{id} {
bb8e56ff 302 complain (loc, complaint, _("invalid identifier: %s"), quote (yytext));
84a1cb5a
AD
303 }
304
3208e3f4 305 /* Characters. */
e9690142 306 "'" token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
e9955c83
AD
307
308 /* Strings. */
e9690142 309 "\"" token_start = loc->start; BEGIN SC_ESCAPED_STRING;
e9955c83
AD
310
311 /* Prologue. */
3f2d73f1 312 "%{" code_start = loc->start; BEGIN SC_PROLOGUE;
e9955c83
AD
313
314 /* Code in between braces. */
3f2d73f1
PE
315 "{" {
316 STRING_GROW;
cb823b6f 317 nesting = 0;
3f2d73f1
PE
318 code_start = loc->start;
319 BEGIN SC_BRACED_CODE;
320 }
e9955c83 321
ca2a6d15
PH
322 /* Semantic predicate. */
323 "%?"[ \f\n\t\v]*"{" {
324 nesting = 0;
325 code_start = loc->start;
326 BEGIN SC_PREDICATE;
327 }
328
e9955c83 329 /* A type. */
cb823b6f
AD
330 "<*>" return TAG_ANY;
331 "<>" return TAG_NONE;
cb823b6f
AD
332 "<" {
333 nesting = 0;
334 token_start = loc->start;
335 BEGIN SC_TAG;
4cdb01db
AD
336 }
337
a706a1cc
PE
338 "%%" {
339 static int percent_percent_count;
e9955c83 340 if (++percent_percent_count == 2)
a2bc9dbc 341 BEGIN SC_EPILOGUE;
e9955c83
AD
342 return PERCENT_PERCENT;
343 }
344
b9f1d9a4
AR
345 "[" {
346 bracketed_id_str = NULL;
347 bracketed_id_start = loc->start;
348 bracketed_id_context_state = YY_START;
349 BEGIN SC_BRACKETED_ID;
350 }
351
68ac70bc 352 [^\[%A-Za-z0-9_<>{}\"\'*;|=/, \f\n\t\v]+|. {
c6b17724
AD
353 complain (loc, complaint, "%s: %s",
354 ngettext ("invalid character", "invalid characters", yyleng),
e42906f7 355 quote_mem (yytext, yyleng));
3f2d73f1 356 }
379f0ac8
PE
357
358 <<EOF>> {
359 loc->start = loc->end = scanner_cursor;
360 yyterminate ();
361 }
3f2d73f1
PE
362}
363
364
cb823b6f
AD
365 /*--------------------------------------------------------------.
366 | Supporting \0 complexifies our implementation for no expected |
367 | added value. |
368 `--------------------------------------------------------------*/
369
370<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING,SC_TAG>
371{
bb8e56ff 372 \0 complain (loc, complaint, _("invalid null character"));
cb823b6f
AD
373}
374
375
3f2d73f1
PE
376 /*-----------------------------------------------------------------.
377 | Scanning after an identifier, checking whether a colon is next. |
378 `-----------------------------------------------------------------*/
379
380<SC_AFTER_IDENTIFIER>
381{
b9f1d9a4 382 "[" {
872b52bc 383 if (bracketed_id_str)
b9f1d9a4 384 {
e9690142
JD
385 ROLLBACK_CURRENT_TOKEN;
386 BEGIN SC_RETURN_BRACKETED_ID;
387 *loc = id_loc;
388 return ID;
b9f1d9a4 389 }
872b52bc
AR
390 else
391 {
e9690142
JD
392 bracketed_id_start = loc->start;
393 bracketed_id_context_state = YY_START;
394 BEGIN SC_BRACKETED_ID;
872b52bc 395 }
b9f1d9a4 396 }
3f2d73f1 397 ":" {
b9f1d9a4 398 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
3f2d73f1 399 *loc = id_loc;
3f2d73f1
PE
400 return ID_COLON;
401 }
402 . {
b9f1d9a4
AR
403 ROLLBACK_CURRENT_TOKEN;
404 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
3f2d73f1 405 *loc = id_loc;
3f2d73f1
PE
406 return ID;
407 }
408 <<EOF>> {
b9f1d9a4 409 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
3f2d73f1 410 *loc = id_loc;
3f2d73f1 411 return ID;
e9955c83
AD
412 }
413}
414
b9f1d9a4
AR
415 /*--------------------------------.
416 | Scanning bracketed identifiers. |
417 `--------------------------------*/
418
419<SC_BRACKETED_ID>
420{
421 {id} {
872b52bc 422 if (bracketed_id_str)
b9f1d9a4 423 {
bb8e56ff
TR
424 complain (loc, complaint,
425 _("unexpected identifier in bracketed name: %s"),
426 quote (yytext));
b9f1d9a4
AR
427 }
428 else
429 {
e9690142
JD
430 bracketed_id_str = uniqstr_new (yytext);
431 bracketed_id_loc = *loc;
b9f1d9a4
AR
432 }
433 }
434 "]" {
435 BEGIN bracketed_id_context_state;
436 if (bracketed_id_str)
437 {
e9690142
JD
438 if (INITIAL == bracketed_id_context_state)
439 {
440 val->uniqstr = bracketed_id_str;
441 bracketed_id_str = 0;
442 *loc = bracketed_id_loc;
443 return BRACKETED_ID;
444 }
b9f1d9a4
AR
445 }
446 else
bb8e56ff 447 complain (loc, complaint, _("an identifier expected"));
b9f1d9a4 448 }
68ac70bc
AD
449
450 [^\].A-Za-z0-9_/ \f\n\t\v]+|. {
c6b17724
AD
451 complain (loc, complaint, "%s: %s",
452 ngettext ("invalid character in bracketed name",
453 "invalid characters in bracketed name", yyleng),
e42906f7 454 quote_mem (yytext, yyleng));
b9f1d9a4 455 }
68ac70bc 456
b9f1d9a4
AR
457 <<EOF>> {
458 BEGIN bracketed_id_context_state;
459 unexpected_eof (bracketed_id_start, "]");
460 }
461}
462
463<SC_RETURN_BRACKETED_ID>
464{
465 . {
466 ROLLBACK_CURRENT_TOKEN;
467 val->uniqstr = bracketed_id_str;
468 bracketed_id_str = 0;
469 *loc = bracketed_id_loc;
470 BEGIN INITIAL;
471 return BRACKETED_ID;
472 }
473}
474
e9955c83 475
d8d3f94a 476 /*---------------------------------------------------------------.
ae93e4e4 477 | Scanning a Yacc comment. The initial '/ *' is already eaten. |
d8d3f94a 478 `---------------------------------------------------------------*/
e9955c83 479
d8d3f94a 480<SC_YACC_COMMENT>
e9955c83 481{
3f2d73f1 482 "*/" BEGIN context_state;
c6b17724 483 .|\n continue;
aa418041 484 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
d8d3f94a
PE
485}
486
487
488 /*------------------------------------------------------------.
ae93e4e4 489 | Scanning a C comment. The initial '/ *' is already eaten. |
d8d3f94a
PE
490 `------------------------------------------------------------*/
491
492<SC_COMMENT>
493{
3f2d73f1 494 "*"{splice}"/" STRING_GROW; BEGIN context_state;
e9690142 495 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
e9955c83
AD
496}
497
498
d8d3f94a 499 /*--------------------------------------------------------------.
ae93e4e4 500 | Scanning a line comment. The initial '//' is already eaten. |
d8d3f94a
PE
501 `--------------------------------------------------------------*/
502
503<SC_LINE_COMMENT>
504{
e9690142
JD
505 "\n" STRING_GROW; BEGIN context_state;
506 {splice} STRING_GROW;
507 <<EOF>> BEGIN context_state;
d8d3f94a
PE
508}
509
510
4febdd96
PE
511 /*------------------------------------------------.
512 | Scanning a Bison string, including its escapes. |
513 | The initial quote is already eaten. |
514 `------------------------------------------------*/
e9955c83
AD
515
516<SC_ESCAPED_STRING>
517{
c1b2677a 518 "\"" {
41141c56 519 STRING_FINISH;
3f2d73f1 520 loc->start = token_start;
985d7177 521 val->code = last_string;
a706a1cc 522 BEGIN INITIAL;
e9955c83
AD
523 return STRING;
524 }
c1b2677a
TR
525 <<EOF>> unexpected_eof (token_start, "\"");
526 "\n" unexpected_newline (token_start, "\"");
e9955c83
AD
527}
528
4febdd96
PE
529 /*----------------------------------------------------------.
530 | Scanning a Bison character literal, decoding its escapes. |
e9690142 531 | The initial quote is already eaten. |
4febdd96 532 `----------------------------------------------------------*/
e9955c83
AD
533
534<SC_ESCAPED_CHARACTER>
535{
c1b2677a 536 "'" {
47aee066
JD
537 STRING_FINISH;
538 loc->start = token_start;
dfaa4860 539 val->character = last_string[0];
c1b2677a
TR
540
541 /* FIXME: Eventually, make these errors. */
542 if (last_string[0] == '\0')
3208e3f4 543 {
c1b2677a
TR
544 complain (loc, Wother, _("empty character literal"));
545 /* '\0' seems dangerous even if we are about to complain. */
546 val->character = '\'';
3208e3f4 547 }
c1b2677a
TR
548 else if (last_string[1] != '\0')
549 complain (loc, Wother,
550 _("extra characters in character literal"));
47aee066
JD
551 STRING_FREE;
552 BEGIN INITIAL;
553 return CHAR;
554 }
c1b2677a
TR
555 "\n" unexpected_newline (token_start, "'");
556 <<EOF>> unexpected_eof (token_start, "'");
4febdd96 557}
a706a1cc 558
e52ddf82
AD
559
560
561 /*--------------------------------------------------------------.
562 | Scanning a tag. The initial angle bracket is already eaten. |
563 `--------------------------------------------------------------*/
cb823b6f
AD
564
565<SC_TAG>
4febdd96 566{
cb823b6f
AD
567 ">" {
568 --nesting;
569 if (nesting < 0)
570 {
571 STRING_FINISH;
572 loc->start = token_start;
573 val->uniqstr = uniqstr_new (last_string);
574 STRING_FREE;
575 BEGIN INITIAL;
576 return TAG;
577 }
578 STRING_GROW;
579 }
580
cb8d8bb9 581 ([^<>]|->)+ STRING_GROW;
cb823b6f 582 "<"+ STRING_GROW; nesting += yyleng;
e9955c83 583
c1b2677a 584 <<EOF>> unexpected_eof (token_start, ">");
cb823b6f 585}
e9955c83
AD
586
587 /*----------------------------.
588 | Decode escaped characters. |
589 `----------------------------*/
590
591<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
592{
d8d3f94a 593 \\[0-7]{1,3} {
4517da37 594 unsigned long int c = strtoul (yytext + 1, NULL, 8);
c2724603 595 if (!c || UCHAR_MAX < c)
bb8e56ff 596 complain (loc, complaint, _("invalid number after \\-escape: %s"),
c2724603 597 yytext+1);
e9955c83 598 else
223ff46e 599 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
600 }
601
6b0d38ab 602 \\x[0-9abcdefABCDEF]+ {
4517da37
PE
603 verify (UCHAR_MAX < ULONG_MAX);
604 unsigned long int c = strtoul (yytext + 2, NULL, 16);
c2724603 605 if (!c || UCHAR_MAX < c)
bb8e56ff 606 complain (loc, complaint, _("invalid number after \\-escape: %s"),
c2724603 607 yytext+1);
d8d3f94a 608 else
223ff46e 609 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
610 }
611
e9690142
JD
612 \\a obstack_1grow (&obstack_for_string, '\a');
613 \\b obstack_1grow (&obstack_for_string, '\b');
614 \\f obstack_1grow (&obstack_for_string, '\f');
615 \\n obstack_1grow (&obstack_for_string, '\n');
616 \\r obstack_1grow (&obstack_for_string, '\r');
617 \\t obstack_1grow (&obstack_for_string, '\t');
618 \\v obstack_1grow (&obstack_for_string, '\v');
412f8a59
PE
619
620 /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
223ff46e 621 \\("\""|"'"|"?"|"\\") obstack_1grow (&obstack_for_string, yytext[1]);
412f8a59 622
6b0d38ab 623 \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
d8d3f94a 624 int c = convert_ucn_to_byte (yytext);
c2724603 625 if (c <= 0)
bb8e56ff 626 complain (loc, complaint, _("invalid number after \\-escape: %s"),
c2724603 627 yytext+1);
d8d3f94a 628 else
223ff46e 629 obstack_1grow (&obstack_for_string, c);
d8d3f94a 630 }
e9690142 631 \\(.|\n) {
c2724603 632 char const *p = yytext + 1;
e6c849d8 633 /* Quote only if escaping won't make the character visible. */
457bf919 634 if (c_isspace ((unsigned char) *p) && c_isprint ((unsigned char) *p))
e6c849d8 635 p = quote (p);
c2724603
JD
636 else
637 p = quotearg_style_mem (escape_quoting_style, p, 1);
bb8e56ff 638 complain (loc, complaint, _("invalid character after \\-escape: %s"),
6fb8b256 639 p);
e9955c83
AD
640 }
641}
642
4febdd96
PE
643 /*--------------------------------------------.
644 | Scanning user-code characters and strings. |
645 `--------------------------------------------*/
e9955c83 646
4febdd96
PE
647<SC_CHARACTER,SC_STRING>
648{
e9690142 649 {splice}|\\{splice}[^\n\[\]] STRING_GROW;
4febdd96 650}
e9955c83
AD
651
652<SC_CHARACTER>
653{
e9690142 654 "'" STRING_GROW; BEGIN context_state;
c1b2677a
TR
655 \n unexpected_newline (token_start, "'");
656 <<EOF>> unexpected_eof (token_start, "'");
e9955c83
AD
657}
658
e9955c83
AD
659<SC_STRING>
660{
e9690142 661 "\"" STRING_GROW; BEGIN context_state;
c1b2677a
TR
662 \n unexpected_newline (token_start, "\"");
663 <<EOF>> unexpected_eof (token_start, "\"");
e9955c83
AD
664}
665
666
667 /*---------------------------------------------------.
668 | Strings, comments etc. can be found in user code. |
669 `---------------------------------------------------*/
670
ca2a6d15 671<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE,SC_PREDICATE>
e9955c83 672{
3f2d73f1
PE
673 "'" {
674 STRING_GROW;
675 context_state = YY_START;
676 token_start = loc->start;
677 BEGIN SC_CHARACTER;
678 }
679 "\"" {
680 STRING_GROW;
681 context_state = YY_START;
682 token_start = loc->start;
683 BEGIN SC_STRING;
684 }
685 "/"{splice}"*" {
686 STRING_GROW;
687 context_state = YY_START;
688 token_start = loc->start;
689 BEGIN SC_COMMENT;
690 }
691 "/"{splice}"/" {
692 STRING_GROW;
693 context_state = YY_START;
694 BEGIN SC_LINE_COMMENT;
695 }
e9955c83
AD
696}
697
698
624a35e2 699
58d7a1a1 700 /*-----------------------------------------------------------.
ca2a6d15
PH
701 | Scanning some code in braces (actions, predicates). The |
702 | initial "{" is already eaten. |
58d7a1a1 703 `-----------------------------------------------------------*/
e9955c83 704
ca2a6d15 705<SC_BRACED_CODE,SC_PREDICATE>
e9955c83 706{
cb823b6f
AD
707 "{"|"<"{splice}"%" STRING_GROW; nesting++;
708 "%"{splice}">" STRING_GROW; nesting--;
ca2a6d15 709
ae93e4e4
JM
710 /* Tokenize '<<%' correctly (as '<<' '%') rather than incorrrectly
711 (as '<' '<%'). */
ca2a6d15
PH
712 "<"{splice}"<" STRING_GROW;
713
c1b2677a 714 <<EOF>> unexpected_eof (code_start, "}");
ca2a6d15
PH
715}
716
717<SC_BRACED_CODE>
718{
e9955c83 719 "}" {
25522739
PE
720 obstack_1grow (&obstack_for_string, '}');
721
cb823b6f
AD
722 --nesting;
723 if (nesting < 0)
e9955c83 724 {
e9690142
JD
725 STRING_FINISH;
726 loc->start = code_start;
727 val->code = last_string;
728 BEGIN INITIAL;
729 return BRACED_CODE;
e9955c83
AD
730 }
731 }
ca2a6d15 732}
e9955c83 733
ca2a6d15
PH
734<SC_PREDICATE>
735{
736 "}" {
737 --nesting;
738 if (nesting < 0)
739 {
e9690142
JD
740 STRING_FINISH;
741 loc->start = code_start;
742 val->code = last_string;
743 BEGIN INITIAL;
744 return BRACED_PREDICATE;
ca2a6d15
PH
745 }
746 else
747 obstack_1grow (&obstack_for_string, '}');
47aee066 748 }
e9955c83
AD
749}
750
e9955c83
AD
751 /*--------------------------------------------------------------.
752 | Scanning some prologue: from "%{" (already scanned) to "%}". |
753 `--------------------------------------------------------------*/
754
755<SC_PROLOGUE>
756{
757 "%}" {
41141c56 758 STRING_FINISH;
3f2d73f1 759 loc->start = code_start;
985d7177 760 val->code = last_string;
a706a1cc 761 BEGIN INITIAL;
e9955c83
AD
762 return PROLOGUE;
763 }
764
c1b2677a 765 <<EOF>> unexpected_eof (code_start, "%}");
e9955c83
AD
766}
767
768
769 /*---------------------------------------------------------------.
770 | Scanning the epilogue (everything after the second "%%", which |
d8d3f94a 771 | has already been eaten). |
e9955c83
AD
772 `---------------------------------------------------------------*/
773
774<SC_EPILOGUE>
775{
e9955c83 776 <<EOF>> {
41141c56 777 STRING_FINISH;
3f2d73f1 778 loc->start = code_start;
985d7177 779 val->code = last_string;
a706a1cc 780 BEGIN INITIAL;
e9955c83
AD
781 return EPILOGUE;
782 }
783}
784
785
4febdd96
PE
786 /*-----------------------------------------------------.
787 | By default, grow the string obstack with the input. |
788 `-----------------------------------------------------*/
789
e9690142
JD
790<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PREDICATE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>. |
791 <SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PREDICATE,SC_PROLOGUE,SC_EPILOGUE>\n STRING_GROW;
4febdd96 792
e9955c83
AD
793%%
794
6c30d641
PE
795/* Read bytes from FP into buffer BUF of size SIZE. Return the
796 number of bytes read. Remove '\r' from input, treating \r\n
797 and isolated \r as \n. */
798
799static size_t
800no_cr_read (FILE *fp, char *buf, size_t size)
801{
a737b216
PE
802 size_t bytes_read = fread (buf, 1, size, fp);
803 if (bytes_read)
6c30d641 804 {
a737b216 805 char *w = memchr (buf, '\r', bytes_read);
6c30d641 806 if (w)
e9690142
JD
807 {
808 char const *r = ++w;
809 char const *lim = buf + bytes_read;
810
811 for (;;)
812 {
813 /* Found an '\r'. Treat it like '\n', but ignore any
814 '\n' that immediately follows. */
815 w[-1] = '\n';
816 if (r == lim)
817 {
818 int ch = getc (fp);
819 if (ch != '\n' && ungetc (ch, fp) != ch)
820 break;
821 }
822 else if (*r == '\n')
823 r++;
824
825 /* Copy until the next '\r'. */
826 do
827 {
828 if (r == lim)
829 return w - buf;
830 }
831 while ((*w++ = *r++) != '\r');
832 }
833
834 return w - buf;
835 }
6c30d641
PE
836 }
837
a737b216 838 return bytes_read;
6c30d641
PE
839}
840
841
f25bfb75 842
1452af69
PE
843/*------------------------------------------------------.
844| Scan NUMBER for a base-BASE integer at location LOC. |
845`------------------------------------------------------*/
846
847static unsigned long int
848scan_integer (char const *number, int base, location loc)
849{
4517da37
PE
850 verify (INT_MAX < ULONG_MAX);
851 unsigned long int num = strtoul (number, NULL, base);
852
853 if (INT_MAX < num)
1452af69 854 {
bb8e56ff 855 complain (&loc, complaint, _("integer out of range: %s"),
6fb8b256 856 quote (number));
1452af69
PE
857 num = INT_MAX;
858 }
4517da37 859
1452af69
PE
860 return num;
861}
862
863
d8d3f94a
PE
864/*------------------------------------------------------------------.
865| Convert universal character name UCN to a single-byte character, |
866| and return that character. Return -1 if UCN does not correspond |
e9690142 867| to a single-byte character. |
d8d3f94a
PE
868`------------------------------------------------------------------*/
869
870static int
871convert_ucn_to_byte (char const *ucn)
872{
4517da37
PE
873 verify (UCHAR_MAX <= INT_MAX);
874 unsigned long int code = strtoul (ucn + 2, NULL, 16);
d8d3f94a
PE
875
876 /* FIXME: Currently we assume Unicode-compatible unibyte characters
877 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
878 non-ASCII hosts we support only the portable C character set.
879 These limitations should be removed once we add support for
880 multibyte characters. */
881
882 if (UCHAR_MAX < code)
883 return -1;
884
885#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
886 {
887 /* A non-ASCII host. Use CODE to index into a table of the C
888 basic execution character set, which is guaranteed to exist on
889 all Standard C platforms. This table also includes '$', '@',
8e6ef483 890 and '`', which are not in the basic execution character set but
d8d3f94a
PE
891 which are unibyte characters on all the platforms that we know
892 about. */
893 static signed char const table[] =
894 {
e9690142
JD
895 '\0', -1, -1, -1, -1, -1, -1, '\a',
896 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
897 -1, -1, -1, -1, -1, -1, -1, -1,
898 -1, -1, -1, -1, -1, -1, -1, -1,
899 ' ', '!', '"', '#', '$', '%', '&', '\'',
900 '(', ')', '*', '+', ',', '-', '.', '/',
901 '0', '1', '2', '3', '4', '5', '6', '7',
902 '8', '9', ':', ';', '<', '=', '>', '?',
903 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
904 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
905 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
906 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
907 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
908 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
909 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
910 'x', 'y', 'z', '{', '|', '}', '~'
d8d3f94a
PE
911 };
912
913 code = code < sizeof table ? table[code] : -1;
914 }
915#endif
c4d720cd 916
d8d3f94a
PE
917 return code;
918}
919
920
03dbf629
AD
921/*---------------------------------------------------------------------.
922| Handle '#line INT( "FILE")?\n'. ARGS has already skipped '#line '. |
923`---------------------------------------------------------------------*/
900c5db5
AD
924
925static void
4517da37 926handle_syncline (char *args, location loc)
900c5db5 927{
03dbf629
AD
928 char *file;
929 unsigned long int lineno = strtoul (args, &file, 10);
4517da37
PE
930 if (INT_MAX <= lineno)
931 {
bb8e56ff 932 complain (&loc, Wother, _("line number overflow"));
4517da37
PE
933 lineno = INT_MAX;
934 }
03dbf629 935
064e42b0 936 file = strchr (file, '"');
03dbf629
AD
937 if (file)
938 {
064e42b0 939 *strchr (file + 1, '"') = '\0';
03dbf629
AD
940 current_file = uniqstr_new (file + 1);
941 }
0c8e079f 942 boundary_set (&scanner_cursor, current_file, lineno, 1);
4517da37
PE
943}
944
945
4febdd96
PE
946/*----------------------------------------------------------------.
947| For a token or comment starting at START, report message MSGID, |
c1b2677a
TR
948| which should say that an end marker was found before the |
949| expected TOKEN_END. Then, pretend that TOKEN_END was found. |
4febdd96
PE
950`----------------------------------------------------------------*/
951
952static void
953unexpected_end (boundary start, char const *msgid, char const *token_end)
954{
955 location loc;
956 loc.start = start;
957 loc.end = scanner_cursor;
c1b2677a
TR
958 size_t i = strlen (token_end);
959
960/* Adjust scanner cursor so that any later message does not count
961 the characters about to be inserted. */
962 scanner_cursor.column -= i;
963
964 while (i != 0)
965 unput (token_end[--i]);
966
4a9cd8f2 967 token_end = quote (token_end);
1127a75a 968 /* Instead of '\'', display "'". */
f518dbaf 969 if (STREQ (token_end, "'\\''"))
4a9cd8f2 970 token_end = "\"'\"";
bb8e56ff 971 complain (&loc, complaint, _(msgid), token_end);
4febdd96
PE
972}
973
974
3f2d73f1
PE
975/*------------------------------------------------------------------------.
976| Report an unexpected EOF in a token or comment starting at START. |
977| An end of file was encountered and the expected TOKEN_END was missing. |
c1b2677a 978| After reporting the problem, pretend that TOKEN_END was found. |
3f2d73f1 979`------------------------------------------------------------------------*/
a706a1cc
PE
980
981static void
aa418041 982unexpected_eof (boundary start, char const *token_end)
a706a1cc 983{
4a9cd8f2 984 unexpected_end (start, N_("missing %s at end of file"), token_end);
4febdd96
PE
985}
986
987
988/*----------------------------------------.
989| Likewise, but for unexpected newlines. |
990`----------------------------------------*/
991
992static void
993unexpected_newline (boundary start, char const *token_end)
994{
4a9cd8f2 995 unexpected_end (start, N_("missing %s at end of line"), token_end);
a706a1cc
PE
996}
997
998
f25bfb75
AD
999/*-------------------------.
1000| Initialize the scanner. |
1001`-------------------------*/
1002
1d6412ad 1003void
e9071366 1004gram_scanner_initialize (void)
1d6412ad 1005{
223ff46e 1006 obstack_init (&obstack_for_string);
1d6412ad
AD
1007}
1008
1009
f25bfb75
AD
1010/*-----------------------------------------------.
1011| Free all the memory allocated to the scanner. |
1012`-----------------------------------------------*/
1013
4cdb01db 1014void
e9071366 1015gram_scanner_free (void)
4cdb01db 1016{
223ff46e 1017 obstack_free (&obstack_for_string, 0);
536545f3 1018 /* Reclaim Flex's buffers. */
580b8926 1019 yylex_destroy ();
4cdb01db 1020}