]> git.saurik.com Git - bison.git/blame - src/scan-gram.l
Merge remote-tracking branch 'origin/maint'
[bison.git] / src / scan-gram.l
CommitLineData
e9955c83 1/* Bison Grammar Scanner -*- C -*-
3b1e470c 2
34136e65 3 Copyright (C) 2002-2012 Free Software Foundation, Inc.
e9955c83
AD
4
5 This file is part of Bison, the GNU Compiler Compiler.
6
f16b0819 7 This program is free software: you can redistribute it and/or modify
e9955c83 8 it under the terms of the GNU General Public License as published by
f16b0819 9 the Free Software Foundation, either version 3 of the License, or
e9955c83
AD
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
f16b0819 18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
e9955c83 19
2062d72d 20%option debug nodefault noinput noyywrap never-interactive
e9955c83
AD
21%option prefix="gram_" outfile="lex.yy.c"
22
23%{
4f6e011e
PE
24/* Work around a bug in flex 2.5.31. See Debian bug 333231
25 <http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=333231>. */
26#undef gram_wrap
27#define gram_wrap() 1
28
e9071366 29#define FLEX_PREFIX(Id) gram_ ## Id
0305d25e 30#include <src/flex-scanner.h>
223ff46e 31
0305d25e
AD
32#include <src/complain.h>
33#include <src/files.h>
2062d72d 34#include <src/getargs.h>
0305d25e
AD
35#include <src/gram.h>
36#include <quotearg.h>
37#include <src/reader.h>
38#include <src/uniqstr.h>
e9955c83 39
457bf919 40#include <c-ctype.h>
e9071366
AD
41#include <mbswidth.h>
42#include <quote.h>
43
0305d25e 44#include <src/scan-gram.h>
e9071366
AD
45
46#define YY_DECL GRAM_LEX_DECL
2346344a 47
e9690142
JD
48#define YY_USER_INIT \
49 code_start = scanner_cursor = loc->start; \
dc9701e8 50
3f2d73f1 51/* Location of scanner cursor. */
4a678af8 52static boundary scanner_cursor;
41141c56 53
e9071366 54#define YY_USER_ACTION location_compute (loc, &scanner_cursor, yytext, yyleng);
d8d3f94a 55
6c30d641 56static size_t no_cr_read (FILE *, char *, size_t);
d8d3f94a
PE
57#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
58
a7706735
AD
59#define RETURN_PERCENT_PARAM(Value) \
60 RETURN_VALUE(PERCENT_PARAM, param, param_ ## Value)
61
62#define RETURN_PERCENT_FLAG(Value) \
63 RETURN_VALUE(PERCENT_FLAG, uniqstr, uniqstr_new (Value))
64
65#define RETURN_VALUE(Token, Field, Value) \
ba061fa6 66 do { \
a7706735
AD
67 val->Field = Value; \
68 return Token; \
ba061fa6
AD
69 } while (0)
70
b9f1d9a4
AR
71#define ROLLBACK_CURRENT_TOKEN \
72 do { \
e9690142 73 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0); \
b9f1d9a4
AR
74 yyless (0); \
75 } while (0)
ba061fa6 76
2062d72d
TR
77#define DEPRECATED(Msg) \
78 do { \
79 size_t i; \
80 complain (loc, Wdeprecated, \
81 _("deprecated directive: %s, use %s"), \
82 quote (yytext), quote_n (1, Msg)); \
83 scanner_cursor.column -= mbsnwidth (Msg, strlen (Msg), 0); \
84 for (i = strlen (Msg); i != 0; --i) \
85 unput (Msg[i - 1]); \
86 } while (0)
87
7ec2d4cd 88/* A string representing the most recently saved token. */
7c0c6181 89static char *last_string;
7ec2d4cd 90
872b52bc 91/* Bracketed identifier. */
b9f1d9a4
AR
92static uniqstr bracketed_id_str = 0;
93static location bracketed_id_loc;
94static boundary bracketed_id_start;
95static int bracketed_id_context_state = 0;
96
7ec2d4cd 97void
e9071366 98gram_scanner_last_string_free (void)
7ec2d4cd 99{
41141c56 100 STRING_FREE;
7ec2d4cd 101}
e9955c83 102
4517da37 103static void handle_syncline (char *, location);
1452af69 104static unsigned long int scan_integer (char const *p, int base, location loc);
d8d3f94a 105static int convert_ucn_to_byte (char const *hex_text);
aa418041 106static void unexpected_eof (boundary, char const *);
4febdd96 107static void unexpected_newline (boundary, char const *);
e9955c83
AD
108
109%}
e9071366
AD
110 /* A C-like comment in directives/rules. */
111%x SC_YACC_COMMENT
112 /* Strings and characters in directives/rules. */
e9955c83 113%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
e9071366 114 /* A identifier was just read in directives/rules. Special state
ae93e4e4 115 to capture the sequence 'identifier :'. */
e9071366 116%x SC_AFTER_IDENTIFIER
cb823b6f
AD
117 /* A complex tag, with nested angles brackets. */
118%x SC_TAG
e9071366 119
ca2a6d15 120 /* Four types of user code:
ae93e4e4 121 - prologue (code between '%{' '%}' in the first section, before %%);
e9071366 122 - actions, printers, union, etc, (between braced in the middle section);
da5462d4 123 - epilogue (everything after the second %%).
ae93e4e4 124 - predicate (code between '%?{' and '{' in middle section); */
ca2a6d15 125%x SC_PROLOGUE SC_BRACED_CODE SC_EPILOGUE SC_PREDICATE
e9071366
AD
126 /* C and C++ comments in code. */
127%x SC_COMMENT SC_LINE_COMMENT
128 /* Strings and characters in code. */
129%x SC_STRING SC_CHARACTER
872b52bc 130 /* Bracketed identifiers support. */
b9f1d9a4 131%x SC_BRACKETED_ID SC_RETURN_BRACKETED_ID
e9955c83 132
e9690142 133letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
68ac70bc 134notletter [^.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]{-}[%\{]
e9690142 135id {letter}({letter}|[-0-9])*
e9690142 136int [0-9]+
d8d3f94a
PE
137
138/* POSIX says that a tag must be both an id and a C union member, but
139 historically almost any character is allowed in a tag. We disallow
cb823b6f
AD
140 NUL, as this simplifies our implementation. We disallow angle
141 bracket to match them in nested pairs: several languages use them
142 for generics/template types. */
e9690142 143tag [^\0<>]+
d8d3f94a
PE
144
145/* Zero or more instances of backslash-newline. Following GCC, allow
146 white space between the backslash and the newline. */
e9690142 147splice (\\[ \f\t\v]*\n)*
e9955c83 148
2062d72d
TR
149/* An equal sign, with optional leading whitespaces. This is used in some
150 deprecated constructs. */
151eqopt ([[:space:]]*=)?
152
e9955c83
AD
153%%
154%{
cb823b6f
AD
155 /* Nesting level. Either for nested braces, or nested angle brackets
156 (but not mixed). */
84f6a6ca 157 int nesting PACIFY_CC (= 0);
1a9e39f1 158
3f2d73f1 159 /* Parent context state, when applicable. */
84f6a6ca 160 int context_state PACIFY_CC (= 0);
a706a1cc 161
3f2d73f1 162 /* Location of most recent identifier, when applicable. */
84f6a6ca 163 location id_loc PACIFY_CC (= empty_location);
3f2d73f1 164
a2bc9dbc
PE
165 /* Where containing code started, when applicable. Its initial
166 value is relevant only when yylex is invoked in the SC_EPILOGUE
167 start condition. */
168 boundary code_start = scanner_cursor;
3f2d73f1 169
223ff46e
PE
170 /* Where containing comment or string or character literal started,
171 when applicable. */
84f6a6ca 172 boundary token_start PACIFY_CC (= scanner_cursor);
e9955c83
AD
173%}
174
175
3f2d73f1
PE
176 /*-----------------------.
177 | Scanning white space. |
178 `-----------------------*/
179
b9f1d9a4 180<INITIAL,SC_AFTER_IDENTIFIER,SC_BRACKETED_ID,SC_RETURN_BRACKETED_ID>
3f2d73f1 181{
4febdd96 182 /* Comments and white space. */
6fb8b256 183 "," {
bb8e56ff 184 complain (loc, Wother, _("stray ',' treated as white space"));
6fb8b256 185 }
4febdd96 186 [ \f\n\t\v] |
96029914 187 "//".* continue;
83adb046
PE
188 "/*" {
189 token_start = loc->start;
190 context_state = YY_START;
191 BEGIN SC_YACC_COMMENT;
192 }
3f2d73f1
PE
193
194 /* #line directives are not documented, and may be withdrawn or
195 modified in future versions of Bison. */
03dbf629 196 ^"#line "{int}(" \"".*"\"")?"\n" {
4517da37 197 handle_syncline (yytext + sizeof "#line " - 1, *loc);
3f2d73f1
PE
198 }
199}
200
201
e9955c83
AD
202 /*----------------------------.
203 | Scanning Bison directives. |
204 `----------------------------*/
a7c09cba
DJ
205
206 /* For directives that are also command line options, the regex must be
e9690142 207 "%..."
a7c09cba
DJ
208 after "[-_]"s are removed, and the directive must match the --long
209 option name, with a single string argument. Otherwise, add exceptions
210 to ../build-aux/cross-options.pl. */
211
e9955c83
AD
212<INITIAL>
213{
deef2a0a 214 "%binary" return PERCENT_NONASSOC;
136a0f76 215 "%code" return PERCENT_CODE;
fa819509 216 "%debug" RETURN_PERCENT_FLAG("parse.trace");
2062d72d 217 "%default-prec" return PERCENT_DEFAULT_PREC;
deef2a0a
AD
218 "%define" return PERCENT_DEFINE;
219 "%defines" return PERCENT_DEFINES;
220 "%destructor" return PERCENT_DESTRUCTOR;
221 "%dprec" return PERCENT_DPREC;
2062d72d 222 "%error-verbose" return PERCENT_ERROR_VERBOSE;
deef2a0a 223 "%expect" return PERCENT_EXPECT;
2062d72d 224 "%expect-rr" return PERCENT_EXPECT_RR;
deef2a0a 225 "%file-prefix" return PERCENT_FILE_PREFIX;
2062d72d 226 "%fixed-output-files" return PERCENT_YACC;
deef2a0a
AD
227 "%initial-action" return PERCENT_INITIAL_ACTION;
228 "%glr-parser" return PERCENT_GLR_PARSER;
229 "%language" return PERCENT_LANGUAGE;
230 "%left" return PERCENT_LEFT;
a7706735 231 "%lex-param" RETURN_PERCENT_PARAM(lex);
bc0f5737 232 "%locations" RETURN_PERCENT_FLAG("locations");
deef2a0a 233 "%merge" return PERCENT_MERGE;
2062d72d
TR
234 "%name-prefix" return PERCENT_NAME_PREFIX;
235 "%no-default-prec" return PERCENT_NO_DEFAULT_PREC;
236 "%no-lines" return PERCENT_NO_LINES;
deef2a0a
AD
237 "%nonassoc" return PERCENT_NONASSOC;
238 "%nondeterministic-parser" return PERCENT_NONDETERMINISTIC_PARSER;
239 "%nterm" return PERCENT_NTERM;
240 "%output" return PERCENT_OUTPUT;
a7706735
AD
241 "%param" RETURN_PERCENT_PARAM(both);
242 "%parse-param" RETURN_PERCENT_PARAM(parse);
deef2a0a 243 "%prec" return PERCENT_PREC;
d78f0ac9 244 "%precedence" return PERCENT_PRECEDENCE;
deef2a0a 245 "%printer" return PERCENT_PRINTER;
2062d72d 246 "%pure-parser" RETURN_PERCENT_FLAG("api.pure");
deef2a0a
AD
247 "%require" return PERCENT_REQUIRE;
248 "%right" return PERCENT_RIGHT;
249 "%skeleton" return PERCENT_SKELETON;
250 "%start" return PERCENT_START;
251 "%term" return PERCENT_TOKEN;
252 "%token" return PERCENT_TOKEN;
2062d72d 253 "%token-table" return PERCENT_TOKEN_TABLE;
deef2a0a
AD
254 "%type" return PERCENT_TYPE;
255 "%union" return PERCENT_UNION;
256 "%verbose" return PERCENT_VERBOSE;
257 "%yacc" return PERCENT_YACC;
e9955c83 258
2062d72d
TR
259 /* deprecated */
260 "%default"[-_]"prec" DEPRECATED("%default-prec");
261 "%error"[-_]"verbose" DEPRECATED("%define parse.error verbose");
262 "%expect"[-_]"rr" DEPRECATED("%expect-rr");
263 "%file-prefix"{eqopt} DEPRECATED("%file-prefix");
264 "%fixed"[-_]"output"[-_]"files" DEPRECATED("%fixed-output-files");
265 "%name"[-_]"prefix"{eqopt} DEPRECATED("%name-prefix");
266 "%no"[-_]"default"[-_]"prec" DEPRECATED("%no-default-prec");
267 "%no"[-_]"lines" DEPRECATED("%no-lines");
268 "%output"{eqopt} DEPRECATED("%output");
269 "%pure"[-_]"parser" DEPRECATED("%pure-parser");
270 "%token"[-_]"table" DEPRECATED("%token-table");
271
68ac70bc 272 "%"{id}|"%"{notletter}([[:graph:]])+ {
bb8e56ff 273 complain (loc, complaint, _("invalid directive: %s"), quote (yytext));
412f8a59 274 }
900c5db5 275
e9955c83 276 "=" return EQUAL;
e9071366 277 "|" return PIPE;
e9955c83
AD
278 ";" return SEMICOLON;
279
3f2d73f1 280 {id} {
58d7a1a1 281 val->uniqstr = uniqstr_new (yytext);
3f2d73f1 282 id_loc = *loc;
b9f1d9a4 283 bracketed_id_str = NULL;
3f2d73f1 284 BEGIN SC_AFTER_IDENTIFIER;
e9955c83
AD
285 }
286
d8d3f94a 287 {int} {
1452af69
PE
288 val->integer = scan_integer (yytext, 10, *loc);
289 return INT;
290 }
291 0[xX][0-9abcdefABCDEF]+ {
292 val->integer = scan_integer (yytext, 16, *loc);
d8d3f94a
PE
293 return INT;
294 }
e9955c83 295
84a1cb5a
AD
296 /* Identifiers may not start with a digit. Yet, don't silently
297 accept "1FOO" as "1 FOO". */
298 {int}{id} {
bb8e56ff 299 complain (loc, complaint, _("invalid identifier: %s"), quote (yytext));
84a1cb5a
AD
300 }
301
3208e3f4 302 /* Characters. */
e9690142 303 "'" token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
e9955c83
AD
304
305 /* Strings. */
e9690142 306 "\"" token_start = loc->start; BEGIN SC_ESCAPED_STRING;
e9955c83
AD
307
308 /* Prologue. */
3f2d73f1 309 "%{" code_start = loc->start; BEGIN SC_PROLOGUE;
e9955c83
AD
310
311 /* Code in between braces. */
3f2d73f1
PE
312 "{" {
313 STRING_GROW;
cb823b6f 314 nesting = 0;
3f2d73f1
PE
315 code_start = loc->start;
316 BEGIN SC_BRACED_CODE;
317 }
e9955c83 318
ca2a6d15
PH
319 /* Semantic predicate. */
320 "%?"[ \f\n\t\v]*"{" {
321 nesting = 0;
322 code_start = loc->start;
323 BEGIN SC_PREDICATE;
324 }
325
e9955c83 326 /* A type. */
cb823b6f
AD
327 "<*>" return TAG_ANY;
328 "<>" return TAG_NONE;
d8d3f94a 329 "<"{tag}">" {
223ff46e 330 obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
41141c56 331 STRING_FINISH;
223ff46e 332 val->uniqstr = uniqstr_new (last_string);
41141c56 333 STRING_FREE;
cb823b6f
AD
334 return TAG;
335 }
336 "<" {
337 nesting = 0;
338 token_start = loc->start;
339 BEGIN SC_TAG;
4cdb01db
AD
340 }
341
a706a1cc
PE
342 "%%" {
343 static int percent_percent_count;
e9955c83 344 if (++percent_percent_count == 2)
a2bc9dbc 345 BEGIN SC_EPILOGUE;
e9955c83
AD
346 return PERCENT_PERCENT;
347 }
348
b9f1d9a4
AR
349 "[" {
350 bracketed_id_str = NULL;
351 bracketed_id_start = loc->start;
352 bracketed_id_context_state = YY_START;
353 BEGIN SC_BRACKETED_ID;
354 }
355
68ac70bc 356 [^\[%A-Za-z0-9_<>{}\"\'*;|=/, \f\n\t\v]+|. {
c6b17724
AD
357 complain (loc, complaint, "%s: %s",
358 ngettext ("invalid character", "invalid characters", yyleng),
e42906f7 359 quote_mem (yytext, yyleng));
3f2d73f1 360 }
379f0ac8
PE
361
362 <<EOF>> {
363 loc->start = loc->end = scanner_cursor;
364 yyterminate ();
365 }
3f2d73f1
PE
366}
367
368
cb823b6f
AD
369 /*--------------------------------------------------------------.
370 | Supporting \0 complexifies our implementation for no expected |
371 | added value. |
372 `--------------------------------------------------------------*/
373
374<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING,SC_TAG>
375{
bb8e56ff 376 \0 complain (loc, complaint, _("invalid null character"));
cb823b6f
AD
377}
378
379
3f2d73f1
PE
380 /*-----------------------------------------------------------------.
381 | Scanning after an identifier, checking whether a colon is next. |
382 `-----------------------------------------------------------------*/
383
384<SC_AFTER_IDENTIFIER>
385{
b9f1d9a4 386 "[" {
872b52bc 387 if (bracketed_id_str)
b9f1d9a4 388 {
e9690142
JD
389 ROLLBACK_CURRENT_TOKEN;
390 BEGIN SC_RETURN_BRACKETED_ID;
391 *loc = id_loc;
392 return ID;
b9f1d9a4 393 }
872b52bc
AR
394 else
395 {
e9690142
JD
396 bracketed_id_start = loc->start;
397 bracketed_id_context_state = YY_START;
398 BEGIN SC_BRACKETED_ID;
872b52bc 399 }
b9f1d9a4 400 }
3f2d73f1 401 ":" {
b9f1d9a4 402 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
3f2d73f1 403 *loc = id_loc;
3f2d73f1
PE
404 return ID_COLON;
405 }
406 . {
b9f1d9a4
AR
407 ROLLBACK_CURRENT_TOKEN;
408 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
3f2d73f1 409 *loc = id_loc;
3f2d73f1
PE
410 return ID;
411 }
412 <<EOF>> {
b9f1d9a4 413 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
3f2d73f1 414 *loc = id_loc;
3f2d73f1 415 return ID;
e9955c83
AD
416 }
417}
418
b9f1d9a4
AR
419 /*--------------------------------.
420 | Scanning bracketed identifiers. |
421 `--------------------------------*/
422
423<SC_BRACKETED_ID>
424{
425 {id} {
872b52bc 426 if (bracketed_id_str)
b9f1d9a4 427 {
bb8e56ff
TR
428 complain (loc, complaint,
429 _("unexpected identifier in bracketed name: %s"),
430 quote (yytext));
b9f1d9a4
AR
431 }
432 else
433 {
e9690142
JD
434 bracketed_id_str = uniqstr_new (yytext);
435 bracketed_id_loc = *loc;
b9f1d9a4
AR
436 }
437 }
438 "]" {
439 BEGIN bracketed_id_context_state;
440 if (bracketed_id_str)
441 {
e9690142
JD
442 if (INITIAL == bracketed_id_context_state)
443 {
444 val->uniqstr = bracketed_id_str;
445 bracketed_id_str = 0;
446 *loc = bracketed_id_loc;
447 return BRACKETED_ID;
448 }
b9f1d9a4
AR
449 }
450 else
bb8e56ff 451 complain (loc, complaint, _("an identifier expected"));
b9f1d9a4 452 }
68ac70bc
AD
453
454 [^\].A-Za-z0-9_/ \f\n\t\v]+|. {
c6b17724
AD
455 complain (loc, complaint, "%s: %s",
456 ngettext ("invalid character in bracketed name",
457 "invalid characters in bracketed name", yyleng),
e42906f7 458 quote_mem (yytext, yyleng));
b9f1d9a4 459 }
68ac70bc 460
b9f1d9a4
AR
461 <<EOF>> {
462 BEGIN bracketed_id_context_state;
463 unexpected_eof (bracketed_id_start, "]");
464 }
465}
466
467<SC_RETURN_BRACKETED_ID>
468{
469 . {
470 ROLLBACK_CURRENT_TOKEN;
471 val->uniqstr = bracketed_id_str;
472 bracketed_id_str = 0;
473 *loc = bracketed_id_loc;
474 BEGIN INITIAL;
475 return BRACKETED_ID;
476 }
477}
478
e9955c83 479
d8d3f94a 480 /*---------------------------------------------------------------.
ae93e4e4 481 | Scanning a Yacc comment. The initial '/ *' is already eaten. |
d8d3f94a 482 `---------------------------------------------------------------*/
e9955c83 483
d8d3f94a 484<SC_YACC_COMMENT>
e9955c83 485{
3f2d73f1 486 "*/" BEGIN context_state;
c6b17724 487 .|\n continue;
aa418041 488 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
d8d3f94a
PE
489}
490
491
492 /*------------------------------------------------------------.
ae93e4e4 493 | Scanning a C comment. The initial '/ *' is already eaten. |
d8d3f94a
PE
494 `------------------------------------------------------------*/
495
496<SC_COMMENT>
497{
3f2d73f1 498 "*"{splice}"/" STRING_GROW; BEGIN context_state;
e9690142 499 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
e9955c83
AD
500}
501
502
d8d3f94a 503 /*--------------------------------------------------------------.
ae93e4e4 504 | Scanning a line comment. The initial '//' is already eaten. |
d8d3f94a
PE
505 `--------------------------------------------------------------*/
506
507<SC_LINE_COMMENT>
508{
e9690142
JD
509 "\n" STRING_GROW; BEGIN context_state;
510 {splice} STRING_GROW;
511 <<EOF>> BEGIN context_state;
d8d3f94a
PE
512}
513
514
4febdd96
PE
515 /*------------------------------------------------.
516 | Scanning a Bison string, including its escapes. |
517 | The initial quote is already eaten. |
518 `------------------------------------------------*/
e9955c83
AD
519
520<SC_ESCAPED_STRING>
521{
47aee066
JD
522 "\""|"\n" {
523 if (yytext[0] == '\n')
524 unexpected_newline (token_start, "\"");
525 STRING_FINISH;
526 loc->start = token_start;
527 val->chars = last_string;
528 BEGIN INITIAL;
529 return STRING;
530 }
531 <<EOF>> {
532 unexpected_eof (token_start, "\"");
41141c56 533 STRING_FINISH;
3f2d73f1 534 loc->start = token_start;
223ff46e 535 val->chars = last_string;
a706a1cc 536 BEGIN INITIAL;
e9955c83
AD
537 return STRING;
538 }
e9955c83
AD
539}
540
4febdd96
PE
541 /*----------------------------------------------------------.
542 | Scanning a Bison character literal, decoding its escapes. |
e9690142 543 | The initial quote is already eaten. |
4febdd96 544 `----------------------------------------------------------*/
e9955c83
AD
545
546<SC_ESCAPED_CHARACTER>
547{
47aee066 548 "'"|"\n" {
41141c56 549 STRING_FINISH;
3f2d73f1 550 loc->start = token_start;
dfaa4860 551 val->character = last_string[0];
3208e3f4
JD
552 {
553 /* FIXME: Eventually, make these errors. */
dfaa4860
JD
554 if (last_string[0] == '\0')
555 {
bb8e56ff 556 complain (loc, Wother, _("empty character literal"));
dfaa4860
JD
557 /* '\0' seems dangerous even if we are about to complain. */
558 val->character = '\'';
559 }
560 else if (last_string[1] != '\0')
bb8e56ff 561 complain (loc, Wother,
6fb8b256 562 _("extra characters in character literal"));
3208e3f4
JD
563 }
564 if (yytext[0] == '\n')
565 unexpected_newline (token_start, "'");
41141c56 566 STRING_FREE;
a706a1cc 567 BEGIN INITIAL;
58d7a1a1 568 return CHAR;
e9955c83 569 }
47aee066 570 <<EOF>> {
47aee066
JD
571 STRING_FINISH;
572 loc->start = token_start;
dfaa4860 573 val->character = last_string[0];
3208e3f4 574 {
3208e3f4 575 /* FIXME: Eventually, make these errors. */
dfaa4860
JD
576 if (last_string[0] == '\0')
577 {
bb8e56ff 578 complain (loc, Wother, _("empty character literal"));
dfaa4860
JD
579 /* '\0' seems dangerous even if we are about to complain. */
580 val->character = '\'';
581 }
582 else if (last_string[1] != '\0')
bb8e56ff 583 complain (loc, Wother,
6fb8b256 584 _("extra characters in character literal"));
3208e3f4
JD
585 }
586 unexpected_eof (token_start, "'");
47aee066
JD
587 STRING_FREE;
588 BEGIN INITIAL;
589 return CHAR;
590 }
4febdd96 591}
a706a1cc 592
cb823b6f
AD
593 /*-----------------------------------------------------------.
594 | Scanning a Bison nested tag. The initial angle bracket is |
595 | already eaten. |
596 `-----------------------------------------------------------*/
597
598<SC_TAG>
4febdd96 599{
cb823b6f
AD
600 ">" {
601 --nesting;
602 if (nesting < 0)
603 {
604 STRING_FINISH;
605 loc->start = token_start;
606 val->uniqstr = uniqstr_new (last_string);
607 STRING_FREE;
608 BEGIN INITIAL;
609 return TAG;
610 }
611 STRING_GROW;
612 }
613
614 [^<>]+ STRING_GROW;
615 "<"+ STRING_GROW; nesting += yyleng;
e9955c83 616
cb823b6f
AD
617 <<EOF>> {
618 unexpected_eof (token_start, ">");
619 STRING_FINISH;
620 loc->start = token_start;
621 val->uniqstr = uniqstr_new (last_string);
622 STRING_FREE;
623 BEGIN INITIAL;
624 return TAG;
625 }
626}
e9955c83
AD
627
628 /*----------------------------.
629 | Decode escaped characters. |
630 `----------------------------*/
631
632<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
633{
d8d3f94a 634 \\[0-7]{1,3} {
4517da37 635 unsigned long int c = strtoul (yytext + 1, NULL, 8);
c2724603 636 if (!c || UCHAR_MAX < c)
bb8e56ff 637 complain (loc, complaint, _("invalid number after \\-escape: %s"),
c2724603 638 yytext+1);
e9955c83 639 else
223ff46e 640 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
641 }
642
6b0d38ab 643 \\x[0-9abcdefABCDEF]+ {
4517da37
PE
644 verify (UCHAR_MAX < ULONG_MAX);
645 unsigned long int c = strtoul (yytext + 2, NULL, 16);
c2724603 646 if (!c || UCHAR_MAX < c)
bb8e56ff 647 complain (loc, complaint, _("invalid number after \\-escape: %s"),
c2724603 648 yytext+1);
d8d3f94a 649 else
223ff46e 650 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
651 }
652
e9690142
JD
653 \\a obstack_1grow (&obstack_for_string, '\a');
654 \\b obstack_1grow (&obstack_for_string, '\b');
655 \\f obstack_1grow (&obstack_for_string, '\f');
656 \\n obstack_1grow (&obstack_for_string, '\n');
657 \\r obstack_1grow (&obstack_for_string, '\r');
658 \\t obstack_1grow (&obstack_for_string, '\t');
659 \\v obstack_1grow (&obstack_for_string, '\v');
412f8a59
PE
660
661 /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
223ff46e 662 \\("\""|"'"|"?"|"\\") obstack_1grow (&obstack_for_string, yytext[1]);
412f8a59 663
6b0d38ab 664 \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
d8d3f94a 665 int c = convert_ucn_to_byte (yytext);
c2724603 666 if (c <= 0)
bb8e56ff 667 complain (loc, complaint, _("invalid number after \\-escape: %s"),
c2724603 668 yytext+1);
d8d3f94a 669 else
223ff46e 670 obstack_1grow (&obstack_for_string, c);
d8d3f94a 671 }
e9690142 672 \\(.|\n) {
c2724603 673 char const *p = yytext + 1;
e6c849d8 674 /* Quote only if escaping won't make the character visible. */
457bf919 675 if (c_isspace ((unsigned char) *p) && c_isprint ((unsigned char) *p))
e6c849d8 676 p = quote (p);
c2724603
JD
677 else
678 p = quotearg_style_mem (escape_quoting_style, p, 1);
bb8e56ff 679 complain (loc, complaint, _("invalid character after \\-escape: %s"),
6fb8b256 680 p);
e9955c83
AD
681 }
682}
683
4febdd96
PE
684 /*--------------------------------------------.
685 | Scanning user-code characters and strings. |
686 `--------------------------------------------*/
e9955c83 687
4febdd96
PE
688<SC_CHARACTER,SC_STRING>
689{
e9690142 690 {splice}|\\{splice}[^\n\[\]] STRING_GROW;
4febdd96 691}
e9955c83
AD
692
693<SC_CHARACTER>
694{
e9690142
JD
695 "'" STRING_GROW; BEGIN context_state;
696 \n unexpected_newline (token_start, "'"); BEGIN context_state;
697 <<EOF>> unexpected_eof (token_start, "'"); BEGIN context_state;
e9955c83
AD
698}
699
e9955c83
AD
700<SC_STRING>
701{
e9690142
JD
702 "\"" STRING_GROW; BEGIN context_state;
703 \n unexpected_newline (token_start, "\""); BEGIN context_state;
704 <<EOF>> unexpected_eof (token_start, "\""); BEGIN context_state;
e9955c83
AD
705}
706
707
708 /*---------------------------------------------------.
709 | Strings, comments etc. can be found in user code. |
710 `---------------------------------------------------*/
711
ca2a6d15 712<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE,SC_PREDICATE>
e9955c83 713{
3f2d73f1
PE
714 "'" {
715 STRING_GROW;
716 context_state = YY_START;
717 token_start = loc->start;
718 BEGIN SC_CHARACTER;
719 }
720 "\"" {
721 STRING_GROW;
722 context_state = YY_START;
723 token_start = loc->start;
724 BEGIN SC_STRING;
725 }
726 "/"{splice}"*" {
727 STRING_GROW;
728 context_state = YY_START;
729 token_start = loc->start;
730 BEGIN SC_COMMENT;
731 }
732 "/"{splice}"/" {
733 STRING_GROW;
734 context_state = YY_START;
735 BEGIN SC_LINE_COMMENT;
736 }
e9955c83
AD
737}
738
739
624a35e2 740
58d7a1a1 741 /*-----------------------------------------------------------.
ca2a6d15
PH
742 | Scanning some code in braces (actions, predicates). The |
743 | initial "{" is already eaten. |
58d7a1a1 744 `-----------------------------------------------------------*/
e9955c83 745
ca2a6d15 746<SC_BRACED_CODE,SC_PREDICATE>
e9955c83 747{
cb823b6f
AD
748 "{"|"<"{splice}"%" STRING_GROW; nesting++;
749 "%"{splice}">" STRING_GROW; nesting--;
ca2a6d15 750
ae93e4e4
JM
751 /* Tokenize '<<%' correctly (as '<<' '%') rather than incorrrectly
752 (as '<' '<%'). */
ca2a6d15
PH
753 "<"{splice}"<" STRING_GROW;
754
755 <<EOF>> {
756 int token = (YY_START == SC_BRACED_CODE) ? BRACED_CODE : BRACED_PREDICATE;
757 unexpected_eof (code_start, "}");
758 STRING_FINISH;
759 loc->start = code_start;
760 val->code = last_string;
761 BEGIN INITIAL;
762 return token;
763 }
764}
765
766<SC_BRACED_CODE>
767{
e9955c83 768 "}" {
25522739
PE
769 obstack_1grow (&obstack_for_string, '}');
770
cb823b6f
AD
771 --nesting;
772 if (nesting < 0)
e9955c83 773 {
e9690142
JD
774 STRING_FINISH;
775 loc->start = code_start;
776 val->code = last_string;
777 BEGIN INITIAL;
778 return BRACED_CODE;
e9955c83
AD
779 }
780 }
ca2a6d15 781}
e9955c83 782
ca2a6d15
PH
783<SC_PREDICATE>
784{
785 "}" {
786 --nesting;
787 if (nesting < 0)
788 {
e9690142
JD
789 STRING_FINISH;
790 loc->start = code_start;
791 val->code = last_string;
792 BEGIN INITIAL;
793 return BRACED_PREDICATE;
ca2a6d15
PH
794 }
795 else
796 obstack_1grow (&obstack_for_string, '}');
47aee066 797 }
e9955c83
AD
798}
799
e9955c83
AD
800 /*--------------------------------------------------------------.
801 | Scanning some prologue: from "%{" (already scanned) to "%}". |
802 `--------------------------------------------------------------*/
803
804<SC_PROLOGUE>
805{
806 "%}" {
41141c56 807 STRING_FINISH;
3f2d73f1 808 loc->start = code_start;
223ff46e 809 val->chars = last_string;
a706a1cc 810 BEGIN INITIAL;
e9955c83
AD
811 return PROLOGUE;
812 }
813
47aee066
JD
814 <<EOF>> {
815 unexpected_eof (code_start, "%}");
816 STRING_FINISH;
817 loc->start = code_start;
818 val->chars = last_string;
819 BEGIN INITIAL;
820 return PROLOGUE;
821 }
e9955c83
AD
822}
823
824
825 /*---------------------------------------------------------------.
826 | Scanning the epilogue (everything after the second "%%", which |
d8d3f94a 827 | has already been eaten). |
e9955c83
AD
828 `---------------------------------------------------------------*/
829
830<SC_EPILOGUE>
831{
e9955c83 832 <<EOF>> {
41141c56 833 STRING_FINISH;
3f2d73f1 834 loc->start = code_start;
223ff46e 835 val->chars = last_string;
a706a1cc 836 BEGIN INITIAL;
e9955c83
AD
837 return EPILOGUE;
838 }
839}
840
841
4febdd96
PE
842 /*-----------------------------------------------------.
843 | By default, grow the string obstack with the input. |
844 `-----------------------------------------------------*/
845
e9690142
JD
846<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PREDICATE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>. |
847 <SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PREDICATE,SC_PROLOGUE,SC_EPILOGUE>\n STRING_GROW;
4febdd96 848
e9955c83
AD
849%%
850
6c30d641
PE
851/* Read bytes from FP into buffer BUF of size SIZE. Return the
852 number of bytes read. Remove '\r' from input, treating \r\n
853 and isolated \r as \n. */
854
855static size_t
856no_cr_read (FILE *fp, char *buf, size_t size)
857{
a737b216
PE
858 size_t bytes_read = fread (buf, 1, size, fp);
859 if (bytes_read)
6c30d641 860 {
a737b216 861 char *w = memchr (buf, '\r', bytes_read);
6c30d641 862 if (w)
e9690142
JD
863 {
864 char const *r = ++w;
865 char const *lim = buf + bytes_read;
866
867 for (;;)
868 {
869 /* Found an '\r'. Treat it like '\n', but ignore any
870 '\n' that immediately follows. */
871 w[-1] = '\n';
872 if (r == lim)
873 {
874 int ch = getc (fp);
875 if (ch != '\n' && ungetc (ch, fp) != ch)
876 break;
877 }
878 else if (*r == '\n')
879 r++;
880
881 /* Copy until the next '\r'. */
882 do
883 {
884 if (r == lim)
885 return w - buf;
886 }
887 while ((*w++ = *r++) != '\r');
888 }
889
890 return w - buf;
891 }
6c30d641
PE
892 }
893
a737b216 894 return bytes_read;
6c30d641
PE
895}
896
897
f25bfb75 898
1452af69
PE
899/*------------------------------------------------------.
900| Scan NUMBER for a base-BASE integer at location LOC. |
901`------------------------------------------------------*/
902
903static unsigned long int
904scan_integer (char const *number, int base, location loc)
905{
4517da37
PE
906 verify (INT_MAX < ULONG_MAX);
907 unsigned long int num = strtoul (number, NULL, base);
908
909 if (INT_MAX < num)
1452af69 910 {
bb8e56ff 911 complain (&loc, complaint, _("integer out of range: %s"),
6fb8b256 912 quote (number));
1452af69
PE
913 num = INT_MAX;
914 }
4517da37 915
1452af69
PE
916 return num;
917}
918
919
d8d3f94a
PE
920/*------------------------------------------------------------------.
921| Convert universal character name UCN to a single-byte character, |
922| and return that character. Return -1 if UCN does not correspond |
e9690142 923| to a single-byte character. |
d8d3f94a
PE
924`------------------------------------------------------------------*/
925
926static int
927convert_ucn_to_byte (char const *ucn)
928{
4517da37
PE
929 verify (UCHAR_MAX <= INT_MAX);
930 unsigned long int code = strtoul (ucn + 2, NULL, 16);
d8d3f94a
PE
931
932 /* FIXME: Currently we assume Unicode-compatible unibyte characters
933 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
934 non-ASCII hosts we support only the portable C character set.
935 These limitations should be removed once we add support for
936 multibyte characters. */
937
938 if (UCHAR_MAX < code)
939 return -1;
940
941#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
942 {
943 /* A non-ASCII host. Use CODE to index into a table of the C
944 basic execution character set, which is guaranteed to exist on
945 all Standard C platforms. This table also includes '$', '@',
8e6ef483 946 and '`', which are not in the basic execution character set but
d8d3f94a
PE
947 which are unibyte characters on all the platforms that we know
948 about. */
949 static signed char const table[] =
950 {
e9690142
JD
951 '\0', -1, -1, -1, -1, -1, -1, '\a',
952 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
953 -1, -1, -1, -1, -1, -1, -1, -1,
954 -1, -1, -1, -1, -1, -1, -1, -1,
955 ' ', '!', '"', '#', '$', '%', '&', '\'',
956 '(', ')', '*', '+', ',', '-', '.', '/',
957 '0', '1', '2', '3', '4', '5', '6', '7',
958 '8', '9', ':', ';', '<', '=', '>', '?',
959 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
960 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
961 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
962 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
963 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
964 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
965 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
966 'x', 'y', 'z', '{', '|', '}', '~'
d8d3f94a
PE
967 };
968
969 code = code < sizeof table ? table[code] : -1;
970 }
971#endif
c4d720cd 972
d8d3f94a
PE
973 return code;
974}
975
976
03dbf629
AD
977/*---------------------------------------------------------------------.
978| Handle '#line INT( "FILE")?\n'. ARGS has already skipped '#line '. |
979`---------------------------------------------------------------------*/
900c5db5
AD
980
981static void
4517da37 982handle_syncline (char *args, location loc)
900c5db5 983{
03dbf629
AD
984 char *file;
985 unsigned long int lineno = strtoul (args, &file, 10);
4517da37
PE
986 if (INT_MAX <= lineno)
987 {
bb8e56ff 988 complain (&loc, Wother, _("line number overflow"));
4517da37
PE
989 lineno = INT_MAX;
990 }
03dbf629 991
064e42b0 992 file = strchr (file, '"');
03dbf629
AD
993 if (file)
994 {
064e42b0 995 *strchr (file + 1, '"') = '\0';
03dbf629
AD
996 current_file = uniqstr_new (file + 1);
997 }
0c8e079f 998 boundary_set (&scanner_cursor, current_file, lineno, 1);
4517da37
PE
999}
1000
1001
4febdd96
PE
1002/*----------------------------------------------------------------.
1003| For a token or comment starting at START, report message MSGID, |
e9690142
JD
1004| which should say that an end marker was found before |
1005| the expected TOKEN_END. |
4febdd96
PE
1006`----------------------------------------------------------------*/
1007
1008static void
1009unexpected_end (boundary start, char const *msgid, char const *token_end)
1010{
1011 location loc;
1012 loc.start = start;
1013 loc.end = scanner_cursor;
4a9cd8f2
AD
1014 token_end = quote (token_end);
1015 // Instead of '\'', display "'".
f518dbaf 1016 if (STREQ (token_end, "'\\''"))
4a9cd8f2 1017 token_end = "\"'\"";
bb8e56ff 1018 complain (&loc, complaint, _(msgid), token_end);
4febdd96
PE
1019}
1020
1021
3f2d73f1
PE
1022/*------------------------------------------------------------------------.
1023| Report an unexpected EOF in a token or comment starting at START. |
1024| An end of file was encountered and the expected TOKEN_END was missing. |
3f2d73f1 1025`------------------------------------------------------------------------*/
a706a1cc
PE
1026
1027static void
aa418041 1028unexpected_eof (boundary start, char const *token_end)
a706a1cc 1029{
4a9cd8f2 1030 unexpected_end (start, N_("missing %s at end of file"), token_end);
4febdd96
PE
1031}
1032
1033
1034/*----------------------------------------.
1035| Likewise, but for unexpected newlines. |
1036`----------------------------------------*/
1037
1038static void
1039unexpected_newline (boundary start, char const *token_end)
1040{
4a9cd8f2 1041 unexpected_end (start, N_("missing %s at end of line"), token_end);
a706a1cc
PE
1042}
1043
1044
f25bfb75
AD
1045/*-------------------------.
1046| Initialize the scanner. |
1047`-------------------------*/
1048
1d6412ad 1049void
e9071366 1050gram_scanner_initialize (void)
1d6412ad 1051{
223ff46e 1052 obstack_init (&obstack_for_string);
1d6412ad
AD
1053}
1054
1055
f25bfb75
AD
1056/*-----------------------------------------------.
1057| Free all the memory allocated to the scanner. |
1058`-----------------------------------------------*/
1059
4cdb01db 1060void
e9071366 1061gram_scanner_free (void)
4cdb01db 1062{
223ff46e 1063 obstack_free (&obstack_for_string, 0);
536545f3 1064 /* Reclaim Flex's buffers. */
580b8926 1065 yylex_destroy ();
4cdb01db 1066}