]> git.saurik.com Git - bison.git/blame - src/scan-gram.l
regen
[bison.git] / src / scan-gram.l
CommitLineData
e9955c83 1/* Bison Grammar Scanner -*- C -*-
3b1e470c 2
7d6bad19 3 Copyright (C) 2002-2013 Free Software Foundation, Inc.
e9955c83
AD
4
5 This file is part of Bison, the GNU Compiler Compiler.
6
f16b0819 7 This program is free software: you can redistribute it and/or modify
e9955c83 8 it under the terms of the GNU General Public License as published by
f16b0819 9 the Free Software Foundation, either version 3 of the License, or
e9955c83
AD
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
f16b0819 18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
e9955c83 19
2062d72d 20%option debug nodefault noinput noyywrap never-interactive
e9955c83
AD
21%option prefix="gram_" outfile="lex.yy.c"
22
23%{
4f6e011e
PE
24/* Work around a bug in flex 2.5.31. See Debian bug 333231
25 <http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=333231>. */
26#undef gram_wrap
27#define gram_wrap() 1
28
e9071366 29#define FLEX_PREFIX(Id) gram_ ## Id
0305d25e 30#include <src/flex-scanner.h>
223ff46e 31
0305d25e
AD
32#include <src/complain.h>
33#include <src/files.h>
2062d72d 34#include <src/getargs.h>
0305d25e
AD
35#include <src/gram.h>
36#include <quotearg.h>
37#include <src/reader.h>
38#include <src/uniqstr.h>
e9955c83 39
457bf919 40#include <c-ctype.h>
e9071366
AD
41#include <mbswidth.h>
42#include <quote.h>
43
0305d25e 44#include <src/scan-gram.h>
e9071366
AD
45
46#define YY_DECL GRAM_LEX_DECL
2346344a 47
e9690142
JD
48#define YY_USER_INIT \
49 code_start = scanner_cursor = loc->start; \
dc9701e8 50
3f2d73f1 51/* Location of scanner cursor. */
4a678af8 52static boundary scanner_cursor;
41141c56 53
e9071366 54#define YY_USER_ACTION location_compute (loc, &scanner_cursor, yytext, yyleng);
d8d3f94a 55
6c30d641 56static size_t no_cr_read (FILE *, char *, size_t);
d8d3f94a
PE
57#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
58
a7706735
AD
59#define RETURN_PERCENT_PARAM(Value) \
60 RETURN_VALUE(PERCENT_PARAM, param, param_ ## Value)
61
62#define RETURN_PERCENT_FLAG(Value) \
63 RETURN_VALUE(PERCENT_FLAG, uniqstr, uniqstr_new (Value))
64
65#define RETURN_VALUE(Token, Field, Value) \
ba061fa6 66 do { \
a7706735
AD
67 val->Field = Value; \
68 return Token; \
ba061fa6
AD
69 } while (0)
70
b9f1d9a4
AR
71#define ROLLBACK_CURRENT_TOKEN \
72 do { \
e9690142 73 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0); \
b9f1d9a4
AR
74 yyless (0); \
75 } while (0)
ba061fa6 76
2062d72d
TR
77#define DEPRECATED(Msg) \
78 do { \
79 size_t i; \
1dc927a7 80 deprecated_directive (loc, yytext, Msg); \
2062d72d
TR
81 scanner_cursor.column -= mbsnwidth (Msg, strlen (Msg), 0); \
82 for (i = strlen (Msg); i != 0; --i) \
83 unput (Msg[i - 1]); \
84 } while (0)
85
7ec2d4cd 86/* A string representing the most recently saved token. */
7c0c6181 87static char *last_string;
7ec2d4cd 88
872b52bc 89/* Bracketed identifier. */
b9f1d9a4
AR
90static uniqstr bracketed_id_str = 0;
91static location bracketed_id_loc;
92static boundary bracketed_id_start;
93static int bracketed_id_context_state = 0;
94
7ec2d4cd 95void
e9071366 96gram_scanner_last_string_free (void)
7ec2d4cd 97{
41141c56 98 STRING_FREE;
7ec2d4cd 99}
e9955c83 100
4517da37 101static void handle_syncline (char *, location);
1452af69 102static unsigned long int scan_integer (char const *p, int base, location loc);
d8d3f94a 103static int convert_ucn_to_byte (char const *hex_text);
aa418041 104static void unexpected_eof (boundary, char const *);
4febdd96 105static void unexpected_newline (boundary, char const *);
e9955c83
AD
106
107%}
e9071366
AD
108 /* A C-like comment in directives/rules. */
109%x SC_YACC_COMMENT
110 /* Strings and characters in directives/rules. */
e9955c83 111%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
e9071366 112 /* A identifier was just read in directives/rules. Special state
ae93e4e4 113 to capture the sequence 'identifier :'. */
e9071366 114%x SC_AFTER_IDENTIFIER
e52ddf82
AD
115
116 /* POSIX says that a tag must be both an id and a C union member, but
117 historically almost any character is allowed in a tag. We
118 disallow NUL, as this simplifies our implementation. We match
119 angle brackets in nested pairs: several languages use them for
120 generics/template types. */
cb823b6f 121%x SC_TAG
e9071366 122
ca2a6d15 123 /* Four types of user code:
ae93e4e4 124 - prologue (code between '%{' '%}' in the first section, before %%);
e9071366 125 - actions, printers, union, etc, (between braced in the middle section);
da5462d4 126 - epilogue (everything after the second %%).
ae93e4e4 127 - predicate (code between '%?{' and '{' in middle section); */
ca2a6d15 128%x SC_PROLOGUE SC_BRACED_CODE SC_EPILOGUE SC_PREDICATE
e9071366
AD
129 /* C and C++ comments in code. */
130%x SC_COMMENT SC_LINE_COMMENT
131 /* Strings and characters in code. */
132%x SC_STRING SC_CHARACTER
872b52bc 133 /* Bracketed identifiers support. */
b9f1d9a4 134%x SC_BRACKETED_ID SC_RETURN_BRACKETED_ID
e9955c83 135
e9690142 136letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
68ac70bc 137notletter [^.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]{-}[%\{]
e9690142 138id {letter}({letter}|[-0-9])*
e9690142 139int [0-9]+
d8d3f94a 140
d8d3f94a
PE
141/* Zero or more instances of backslash-newline. Following GCC, allow
142 white space between the backslash and the newline. */
e9690142 143splice (\\[ \f\t\v]*\n)*
e9955c83 144
2062d72d
TR
145/* An equal sign, with optional leading whitespaces. This is used in some
146 deprecated constructs. */
147eqopt ([[:space:]]*=)?
148
e9955c83
AD
149%%
150%{
cb823b6f
AD
151 /* Nesting level. Either for nested braces, or nested angle brackets
152 (but not mixed). */
84f6a6ca 153 int nesting PACIFY_CC (= 0);
1a9e39f1 154
3f2d73f1 155 /* Parent context state, when applicable. */
84f6a6ca 156 int context_state PACIFY_CC (= 0);
a706a1cc 157
3f2d73f1 158 /* Location of most recent identifier, when applicable. */
84f6a6ca 159 location id_loc PACIFY_CC (= empty_location);
3f2d73f1 160
a2bc9dbc
PE
161 /* Where containing code started, when applicable. Its initial
162 value is relevant only when yylex is invoked in the SC_EPILOGUE
163 start condition. */
164 boundary code_start = scanner_cursor;
3f2d73f1 165
223ff46e
PE
166 /* Where containing comment or string or character literal started,
167 when applicable. */
84f6a6ca 168 boundary token_start PACIFY_CC (= scanner_cursor);
e9955c83
AD
169%}
170
171
3f2d73f1
PE
172 /*-----------------------.
173 | Scanning white space. |
174 `-----------------------*/
175
b9f1d9a4 176<INITIAL,SC_AFTER_IDENTIFIER,SC_BRACKETED_ID,SC_RETURN_BRACKETED_ID>
3f2d73f1 177{
4febdd96 178 /* Comments and white space. */
6fb8b256 179 "," {
bb8e56ff 180 complain (loc, Wother, _("stray ',' treated as white space"));
6fb8b256 181 }
4febdd96 182 [ \f\n\t\v] |
96029914 183 "//".* continue;
83adb046
PE
184 "/*" {
185 token_start = loc->start;
186 context_state = YY_START;
187 BEGIN SC_YACC_COMMENT;
188 }
3f2d73f1
PE
189
190 /* #line directives are not documented, and may be withdrawn or
191 modified in future versions of Bison. */
03dbf629 192 ^"#line "{int}(" \"".*"\"")?"\n" {
4517da37 193 handle_syncline (yytext + sizeof "#line " - 1, *loc);
3f2d73f1
PE
194 }
195}
196
197
e9955c83
AD
198 /*----------------------------.
199 | Scanning Bison directives. |
200 `----------------------------*/
a7c09cba
DJ
201
202 /* For directives that are also command line options, the regex must be
e9690142 203 "%..."
a7c09cba
DJ
204 after "[-_]"s are removed, and the directive must match the --long
205 option name, with a single string argument. Otherwise, add exceptions
206 to ../build-aux/cross-options.pl. */
207
e9955c83
AD
208<INITIAL>
209{
deef2a0a 210 "%binary" return PERCENT_NONASSOC;
136a0f76 211 "%code" return PERCENT_CODE;
fa819509 212 "%debug" RETURN_PERCENT_FLAG("parse.trace");
2062d72d 213 "%default-prec" return PERCENT_DEFAULT_PREC;
deef2a0a
AD
214 "%define" return PERCENT_DEFINE;
215 "%defines" return PERCENT_DEFINES;
216 "%destructor" return PERCENT_DESTRUCTOR;
217 "%dprec" return PERCENT_DPREC;
ae2b48f5 218 "%empty" return PERCENT_EMPTY;
2062d72d 219 "%error-verbose" return PERCENT_ERROR_VERBOSE;
deef2a0a 220 "%expect" return PERCENT_EXPECT;
2062d72d 221 "%expect-rr" return PERCENT_EXPECT_RR;
deef2a0a 222 "%file-prefix" return PERCENT_FILE_PREFIX;
2062d72d 223 "%fixed-output-files" return PERCENT_YACC;
deef2a0a
AD
224 "%initial-action" return PERCENT_INITIAL_ACTION;
225 "%glr-parser" return PERCENT_GLR_PARSER;
226 "%language" return PERCENT_LANGUAGE;
227 "%left" return PERCENT_LEFT;
a7706735 228 "%lex-param" RETURN_PERCENT_PARAM(lex);
bc0f5737 229 "%locations" RETURN_PERCENT_FLAG("locations");
deef2a0a 230 "%merge" return PERCENT_MERGE;
2062d72d
TR
231 "%name-prefix" return PERCENT_NAME_PREFIX;
232 "%no-default-prec" return PERCENT_NO_DEFAULT_PREC;
233 "%no-lines" return PERCENT_NO_LINES;
deef2a0a
AD
234 "%nonassoc" return PERCENT_NONASSOC;
235 "%nondeterministic-parser" return PERCENT_NONDETERMINISTIC_PARSER;
236 "%nterm" return PERCENT_NTERM;
237 "%output" return PERCENT_OUTPUT;
a7706735
AD
238 "%param" RETURN_PERCENT_PARAM(both);
239 "%parse-param" RETURN_PERCENT_PARAM(parse);
deef2a0a 240 "%prec" return PERCENT_PREC;
d78f0ac9 241 "%precedence" return PERCENT_PRECEDENCE;
deef2a0a 242 "%printer" return PERCENT_PRINTER;
2062d72d 243 "%pure-parser" RETURN_PERCENT_FLAG("api.pure");
deef2a0a
AD
244 "%require" return PERCENT_REQUIRE;
245 "%right" return PERCENT_RIGHT;
246 "%skeleton" return PERCENT_SKELETON;
247 "%start" return PERCENT_START;
248 "%term" return PERCENT_TOKEN;
249 "%token" return PERCENT_TOKEN;
2062d72d 250 "%token-table" return PERCENT_TOKEN_TABLE;
deef2a0a
AD
251 "%type" return PERCENT_TYPE;
252 "%union" return PERCENT_UNION;
253 "%verbose" return PERCENT_VERBOSE;
254 "%yacc" return PERCENT_YACC;
e9955c83 255
2062d72d
TR
256 /* deprecated */
257 "%default"[-_]"prec" DEPRECATED("%default-prec");
258 "%error"[-_]"verbose" DEPRECATED("%define parse.error verbose");
259 "%expect"[-_]"rr" DEPRECATED("%expect-rr");
260 "%file-prefix"{eqopt} DEPRECATED("%file-prefix");
261 "%fixed"[-_]"output"[-_]"files" DEPRECATED("%fixed-output-files");
262 "%name"[-_]"prefix"{eqopt} DEPRECATED("%name-prefix");
263 "%no"[-_]"default"[-_]"prec" DEPRECATED("%no-default-prec");
264 "%no"[-_]"lines" DEPRECATED("%no-lines");
265 "%output"{eqopt} DEPRECATED("%output");
266 "%pure"[-_]"parser" DEPRECATED("%pure-parser");
267 "%token"[-_]"table" DEPRECATED("%token-table");
268
68ac70bc 269 "%"{id}|"%"{notletter}([[:graph:]])+ {
bb8e56ff 270 complain (loc, complaint, _("invalid directive: %s"), quote (yytext));
412f8a59 271 }
900c5db5 272
e9955c83 273 "=" return EQUAL;
e9071366 274 "|" return PIPE;
e9955c83
AD
275 ";" return SEMICOLON;
276
3f2d73f1 277 {id} {
58d7a1a1 278 val->uniqstr = uniqstr_new (yytext);
3f2d73f1 279 id_loc = *loc;
b9f1d9a4 280 bracketed_id_str = NULL;
3f2d73f1 281 BEGIN SC_AFTER_IDENTIFIER;
e9955c83
AD
282 }
283
d8d3f94a 284 {int} {
1452af69
PE
285 val->integer = scan_integer (yytext, 10, *loc);
286 return INT;
287 }
288 0[xX][0-9abcdefABCDEF]+ {
289 val->integer = scan_integer (yytext, 16, *loc);
d8d3f94a
PE
290 return INT;
291 }
e9955c83 292
84a1cb5a
AD
293 /* Identifiers may not start with a digit. Yet, don't silently
294 accept "1FOO" as "1 FOO". */
295 {int}{id} {
bb8e56ff 296 complain (loc, complaint, _("invalid identifier: %s"), quote (yytext));
84a1cb5a
AD
297 }
298
3208e3f4 299 /* Characters. */
e9690142 300 "'" token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
e9955c83
AD
301
302 /* Strings. */
e9690142 303 "\"" token_start = loc->start; BEGIN SC_ESCAPED_STRING;
e9955c83
AD
304
305 /* Prologue. */
3f2d73f1 306 "%{" code_start = loc->start; BEGIN SC_PROLOGUE;
e9955c83
AD
307
308 /* Code in between braces. */
3f2d73f1
PE
309 "{" {
310 STRING_GROW;
cb823b6f 311 nesting = 0;
3f2d73f1
PE
312 code_start = loc->start;
313 BEGIN SC_BRACED_CODE;
314 }
e9955c83 315
ca2a6d15
PH
316 /* Semantic predicate. */
317 "%?"[ \f\n\t\v]*"{" {
318 nesting = 0;
319 code_start = loc->start;
320 BEGIN SC_PREDICATE;
321 }
322
e9955c83 323 /* A type. */
cb823b6f
AD
324 "<*>" return TAG_ANY;
325 "<>" return TAG_NONE;
cb823b6f
AD
326 "<" {
327 nesting = 0;
328 token_start = loc->start;
329 BEGIN SC_TAG;
4cdb01db
AD
330 }
331
a706a1cc
PE
332 "%%" {
333 static int percent_percent_count;
e9955c83 334 if (++percent_percent_count == 2)
a2bc9dbc 335 BEGIN SC_EPILOGUE;
e9955c83
AD
336 return PERCENT_PERCENT;
337 }
338
b9f1d9a4
AR
339 "[" {
340 bracketed_id_str = NULL;
341 bracketed_id_start = loc->start;
342 bracketed_id_context_state = YY_START;
343 BEGIN SC_BRACKETED_ID;
344 }
345
68ac70bc 346 [^\[%A-Za-z0-9_<>{}\"\'*;|=/, \f\n\t\v]+|. {
c6b17724
AD
347 complain (loc, complaint, "%s: %s",
348 ngettext ("invalid character", "invalid characters", yyleng),
e42906f7 349 quote_mem (yytext, yyleng));
3f2d73f1 350 }
379f0ac8
PE
351
352 <<EOF>> {
353 loc->start = loc->end = scanner_cursor;
354 yyterminate ();
355 }
3f2d73f1
PE
356}
357
358
cb823b6f
AD
359 /*--------------------------------------------------------------.
360 | Supporting \0 complexifies our implementation for no expected |
361 | added value. |
362 `--------------------------------------------------------------*/
363
364<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING,SC_TAG>
365{
bb8e56ff 366 \0 complain (loc, complaint, _("invalid null character"));
cb823b6f
AD
367}
368
369
3f2d73f1
PE
370 /*-----------------------------------------------------------------.
371 | Scanning after an identifier, checking whether a colon is next. |
372 `-----------------------------------------------------------------*/
373
374<SC_AFTER_IDENTIFIER>
375{
b9f1d9a4 376 "[" {
872b52bc 377 if (bracketed_id_str)
b9f1d9a4 378 {
e9690142
JD
379 ROLLBACK_CURRENT_TOKEN;
380 BEGIN SC_RETURN_BRACKETED_ID;
381 *loc = id_loc;
382 return ID;
b9f1d9a4 383 }
872b52bc
AR
384 else
385 {
e9690142
JD
386 bracketed_id_start = loc->start;
387 bracketed_id_context_state = YY_START;
388 BEGIN SC_BRACKETED_ID;
872b52bc 389 }
b9f1d9a4 390 }
3f2d73f1 391 ":" {
b9f1d9a4 392 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
3f2d73f1 393 *loc = id_loc;
3f2d73f1
PE
394 return ID_COLON;
395 }
396 . {
b9f1d9a4
AR
397 ROLLBACK_CURRENT_TOKEN;
398 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
3f2d73f1 399 *loc = id_loc;
3f2d73f1
PE
400 return ID;
401 }
402 <<EOF>> {
b9f1d9a4 403 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
3f2d73f1 404 *loc = id_loc;
3f2d73f1 405 return ID;
e9955c83
AD
406 }
407}
408
b9f1d9a4
AR
409 /*--------------------------------.
410 | Scanning bracketed identifiers. |
411 `--------------------------------*/
412
413<SC_BRACKETED_ID>
414{
415 {id} {
872b52bc 416 if (bracketed_id_str)
b9f1d9a4 417 {
bb8e56ff
TR
418 complain (loc, complaint,
419 _("unexpected identifier in bracketed name: %s"),
420 quote (yytext));
b9f1d9a4
AR
421 }
422 else
423 {
e9690142
JD
424 bracketed_id_str = uniqstr_new (yytext);
425 bracketed_id_loc = *loc;
b9f1d9a4
AR
426 }
427 }
428 "]" {
429 BEGIN bracketed_id_context_state;
430 if (bracketed_id_str)
431 {
e9690142
JD
432 if (INITIAL == bracketed_id_context_state)
433 {
434 val->uniqstr = bracketed_id_str;
435 bracketed_id_str = 0;
436 *loc = bracketed_id_loc;
437 return BRACKETED_ID;
438 }
b9f1d9a4
AR
439 }
440 else
bb8e56ff 441 complain (loc, complaint, _("an identifier expected"));
b9f1d9a4 442 }
68ac70bc
AD
443
444 [^\].A-Za-z0-9_/ \f\n\t\v]+|. {
c6b17724
AD
445 complain (loc, complaint, "%s: %s",
446 ngettext ("invalid character in bracketed name",
447 "invalid characters in bracketed name", yyleng),
e42906f7 448 quote_mem (yytext, yyleng));
b9f1d9a4 449 }
68ac70bc 450
b9f1d9a4
AR
451 <<EOF>> {
452 BEGIN bracketed_id_context_state;
453 unexpected_eof (bracketed_id_start, "]");
454 }
455}
456
457<SC_RETURN_BRACKETED_ID>
458{
459 . {
460 ROLLBACK_CURRENT_TOKEN;
461 val->uniqstr = bracketed_id_str;
462 bracketed_id_str = 0;
463 *loc = bracketed_id_loc;
464 BEGIN INITIAL;
465 return BRACKETED_ID;
466 }
467}
468
e9955c83 469
d8d3f94a 470 /*---------------------------------------------------------------.
ae93e4e4 471 | Scanning a Yacc comment. The initial '/ *' is already eaten. |
d8d3f94a 472 `---------------------------------------------------------------*/
e9955c83 473
d8d3f94a 474<SC_YACC_COMMENT>
e9955c83 475{
3f2d73f1 476 "*/" BEGIN context_state;
c6b17724 477 .|\n continue;
aa418041 478 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
d8d3f94a
PE
479}
480
481
482 /*------------------------------------------------------------.
ae93e4e4 483 | Scanning a C comment. The initial '/ *' is already eaten. |
d8d3f94a
PE
484 `------------------------------------------------------------*/
485
486<SC_COMMENT>
487{
3f2d73f1 488 "*"{splice}"/" STRING_GROW; BEGIN context_state;
e9690142 489 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
e9955c83
AD
490}
491
492
d8d3f94a 493 /*--------------------------------------------------------------.
ae93e4e4 494 | Scanning a line comment. The initial '//' is already eaten. |
d8d3f94a
PE
495 `--------------------------------------------------------------*/
496
497<SC_LINE_COMMENT>
498{
e9690142
JD
499 "\n" STRING_GROW; BEGIN context_state;
500 {splice} STRING_GROW;
501 <<EOF>> BEGIN context_state;
d8d3f94a
PE
502}
503
504
4febdd96
PE
505 /*------------------------------------------------.
506 | Scanning a Bison string, including its escapes. |
507 | The initial quote is already eaten. |
508 `------------------------------------------------*/
e9955c83
AD
509
510<SC_ESCAPED_STRING>
511{
c1b2677a 512 "\"" {
41141c56 513 STRING_FINISH;
3f2d73f1 514 loc->start = token_start;
985d7177 515 val->code = last_string;
a706a1cc 516 BEGIN INITIAL;
e9955c83
AD
517 return STRING;
518 }
c1b2677a
TR
519 <<EOF>> unexpected_eof (token_start, "\"");
520 "\n" unexpected_newline (token_start, "\"");
e9955c83
AD
521}
522
4febdd96
PE
523 /*----------------------------------------------------------.
524 | Scanning a Bison character literal, decoding its escapes. |
e9690142 525 | The initial quote is already eaten. |
4febdd96 526 `----------------------------------------------------------*/
e9955c83
AD
527
528<SC_ESCAPED_CHARACTER>
529{
c1b2677a 530 "'" {
47aee066
JD
531 STRING_FINISH;
532 loc->start = token_start;
dfaa4860 533 val->character = last_string[0];
c1b2677a
TR
534
535 /* FIXME: Eventually, make these errors. */
536 if (last_string[0] == '\0')
3208e3f4 537 {
c1b2677a
TR
538 complain (loc, Wother, _("empty character literal"));
539 /* '\0' seems dangerous even if we are about to complain. */
540 val->character = '\'';
3208e3f4 541 }
c1b2677a
TR
542 else if (last_string[1] != '\0')
543 complain (loc, Wother,
544 _("extra characters in character literal"));
47aee066
JD
545 STRING_FREE;
546 BEGIN INITIAL;
547 return CHAR;
548 }
c1b2677a
TR
549 "\n" unexpected_newline (token_start, "'");
550 <<EOF>> unexpected_eof (token_start, "'");
4febdd96 551}
a706a1cc 552
e52ddf82
AD
553
554
555 /*--------------------------------------------------------------.
556 | Scanning a tag. The initial angle bracket is already eaten. |
557 `--------------------------------------------------------------*/
cb823b6f
AD
558
559<SC_TAG>
4febdd96 560{
cb823b6f
AD
561 ">" {
562 --nesting;
563 if (nesting < 0)
564 {
565 STRING_FINISH;
566 loc->start = token_start;
567 val->uniqstr = uniqstr_new (last_string);
568 STRING_FREE;
569 BEGIN INITIAL;
570 return TAG;
571 }
572 STRING_GROW;
573 }
574
cb8d8bb9 575 ([^<>]|->)+ STRING_GROW;
cb823b6f 576 "<"+ STRING_GROW; nesting += yyleng;
e9955c83 577
c1b2677a 578 <<EOF>> unexpected_eof (token_start, ">");
cb823b6f 579}
e9955c83
AD
580
581 /*----------------------------.
582 | Decode escaped characters. |
583 `----------------------------*/
584
585<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
586{
d8d3f94a 587 \\[0-7]{1,3} {
4517da37 588 unsigned long int c = strtoul (yytext + 1, NULL, 8);
c2724603 589 if (!c || UCHAR_MAX < c)
bb8e56ff 590 complain (loc, complaint, _("invalid number after \\-escape: %s"),
c2724603 591 yytext+1);
e9955c83 592 else
223ff46e 593 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
594 }
595
6b0d38ab 596 \\x[0-9abcdefABCDEF]+ {
4517da37
PE
597 verify (UCHAR_MAX < ULONG_MAX);
598 unsigned long int c = strtoul (yytext + 2, NULL, 16);
c2724603 599 if (!c || UCHAR_MAX < c)
bb8e56ff 600 complain (loc, complaint, _("invalid number after \\-escape: %s"),
c2724603 601 yytext+1);
d8d3f94a 602 else
223ff46e 603 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
604 }
605
e9690142
JD
606 \\a obstack_1grow (&obstack_for_string, '\a');
607 \\b obstack_1grow (&obstack_for_string, '\b');
608 \\f obstack_1grow (&obstack_for_string, '\f');
609 \\n obstack_1grow (&obstack_for_string, '\n');
610 \\r obstack_1grow (&obstack_for_string, '\r');
611 \\t obstack_1grow (&obstack_for_string, '\t');
612 \\v obstack_1grow (&obstack_for_string, '\v');
412f8a59
PE
613
614 /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
223ff46e 615 \\("\""|"'"|"?"|"\\") obstack_1grow (&obstack_for_string, yytext[1]);
412f8a59 616
6b0d38ab 617 \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
d8d3f94a 618 int c = convert_ucn_to_byte (yytext);
c2724603 619 if (c <= 0)
bb8e56ff 620 complain (loc, complaint, _("invalid number after \\-escape: %s"),
c2724603 621 yytext+1);
d8d3f94a 622 else
223ff46e 623 obstack_1grow (&obstack_for_string, c);
d8d3f94a 624 }
e9690142 625 \\(.|\n) {
c2724603 626 char const *p = yytext + 1;
e6c849d8 627 /* Quote only if escaping won't make the character visible. */
457bf919 628 if (c_isspace ((unsigned char) *p) && c_isprint ((unsigned char) *p))
e6c849d8 629 p = quote (p);
c2724603
JD
630 else
631 p = quotearg_style_mem (escape_quoting_style, p, 1);
bb8e56ff 632 complain (loc, complaint, _("invalid character after \\-escape: %s"),
6fb8b256 633 p);
e9955c83
AD
634 }
635}
636
4febdd96
PE
637 /*--------------------------------------------.
638 | Scanning user-code characters and strings. |
639 `--------------------------------------------*/
e9955c83 640
4febdd96
PE
641<SC_CHARACTER,SC_STRING>
642{
e9690142 643 {splice}|\\{splice}[^\n\[\]] STRING_GROW;
4febdd96 644}
e9955c83
AD
645
646<SC_CHARACTER>
647{
e9690142 648 "'" STRING_GROW; BEGIN context_state;
c1b2677a
TR
649 \n unexpected_newline (token_start, "'");
650 <<EOF>> unexpected_eof (token_start, "'");
e9955c83
AD
651}
652
e9955c83
AD
653<SC_STRING>
654{
e9690142 655 "\"" STRING_GROW; BEGIN context_state;
c1b2677a
TR
656 \n unexpected_newline (token_start, "\"");
657 <<EOF>> unexpected_eof (token_start, "\"");
e9955c83
AD
658}
659
660
661 /*---------------------------------------------------.
662 | Strings, comments etc. can be found in user code. |
663 `---------------------------------------------------*/
664
ca2a6d15 665<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE,SC_PREDICATE>
e9955c83 666{
3f2d73f1
PE
667 "'" {
668 STRING_GROW;
669 context_state = YY_START;
670 token_start = loc->start;
671 BEGIN SC_CHARACTER;
672 }
673 "\"" {
674 STRING_GROW;
675 context_state = YY_START;
676 token_start = loc->start;
677 BEGIN SC_STRING;
678 }
679 "/"{splice}"*" {
680 STRING_GROW;
681 context_state = YY_START;
682 token_start = loc->start;
683 BEGIN SC_COMMENT;
684 }
685 "/"{splice}"/" {
686 STRING_GROW;
687 context_state = YY_START;
688 BEGIN SC_LINE_COMMENT;
689 }
e9955c83
AD
690}
691
692
624a35e2 693
58d7a1a1 694 /*-----------------------------------------------------------.
ca2a6d15
PH
695 | Scanning some code in braces (actions, predicates). The |
696 | initial "{" is already eaten. |
58d7a1a1 697 `-----------------------------------------------------------*/
e9955c83 698
ca2a6d15 699<SC_BRACED_CODE,SC_PREDICATE>
e9955c83 700{
cb823b6f
AD
701 "{"|"<"{splice}"%" STRING_GROW; nesting++;
702 "%"{splice}">" STRING_GROW; nesting--;
ca2a6d15 703
ae93e4e4
JM
704 /* Tokenize '<<%' correctly (as '<<' '%') rather than incorrrectly
705 (as '<' '<%'). */
ca2a6d15
PH
706 "<"{splice}"<" STRING_GROW;
707
c1b2677a 708 <<EOF>> unexpected_eof (code_start, "}");
ca2a6d15
PH
709}
710
711<SC_BRACED_CODE>
712{
e9955c83 713 "}" {
25522739
PE
714 obstack_1grow (&obstack_for_string, '}');
715
cb823b6f
AD
716 --nesting;
717 if (nesting < 0)
e9955c83 718 {
e9690142
JD
719 STRING_FINISH;
720 loc->start = code_start;
721 val->code = last_string;
722 BEGIN INITIAL;
723 return BRACED_CODE;
e9955c83
AD
724 }
725 }
ca2a6d15 726}
e9955c83 727
ca2a6d15
PH
728<SC_PREDICATE>
729{
730 "}" {
731 --nesting;
732 if (nesting < 0)
733 {
e9690142
JD
734 STRING_FINISH;
735 loc->start = code_start;
736 val->code = last_string;
737 BEGIN INITIAL;
738 return BRACED_PREDICATE;
ca2a6d15
PH
739 }
740 else
741 obstack_1grow (&obstack_for_string, '}');
47aee066 742 }
e9955c83
AD
743}
744
e9955c83
AD
745 /*--------------------------------------------------------------.
746 | Scanning some prologue: from "%{" (already scanned) to "%}". |
747 `--------------------------------------------------------------*/
748
749<SC_PROLOGUE>
750{
751 "%}" {
41141c56 752 STRING_FINISH;
3f2d73f1 753 loc->start = code_start;
985d7177 754 val->code = last_string;
a706a1cc 755 BEGIN INITIAL;
e9955c83
AD
756 return PROLOGUE;
757 }
758
c1b2677a 759 <<EOF>> unexpected_eof (code_start, "%}");
e9955c83
AD
760}
761
762
763 /*---------------------------------------------------------------.
764 | Scanning the epilogue (everything after the second "%%", which |
d8d3f94a 765 | has already been eaten). |
e9955c83
AD
766 `---------------------------------------------------------------*/
767
768<SC_EPILOGUE>
769{
e9955c83 770 <<EOF>> {
41141c56 771 STRING_FINISH;
3f2d73f1 772 loc->start = code_start;
985d7177 773 val->code = last_string;
a706a1cc 774 BEGIN INITIAL;
e9955c83
AD
775 return EPILOGUE;
776 }
777}
778
779
4febdd96
PE
780 /*-----------------------------------------------------.
781 | By default, grow the string obstack with the input. |
782 `-----------------------------------------------------*/
783
e9690142
JD
784<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PREDICATE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>. |
785 <SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PREDICATE,SC_PROLOGUE,SC_EPILOGUE>\n STRING_GROW;
4febdd96 786
e9955c83
AD
787%%
788
6c30d641
PE
789/* Read bytes from FP into buffer BUF of size SIZE. Return the
790 number of bytes read. Remove '\r' from input, treating \r\n
791 and isolated \r as \n. */
792
793static size_t
794no_cr_read (FILE *fp, char *buf, size_t size)
795{
a737b216
PE
796 size_t bytes_read = fread (buf, 1, size, fp);
797 if (bytes_read)
6c30d641 798 {
a737b216 799 char *w = memchr (buf, '\r', bytes_read);
6c30d641 800 if (w)
e9690142
JD
801 {
802 char const *r = ++w;
803 char const *lim = buf + bytes_read;
804
805 for (;;)
806 {
807 /* Found an '\r'. Treat it like '\n', but ignore any
808 '\n' that immediately follows. */
809 w[-1] = '\n';
810 if (r == lim)
811 {
812 int ch = getc (fp);
813 if (ch != '\n' && ungetc (ch, fp) != ch)
814 break;
815 }
816 else if (*r == '\n')
817 r++;
818
819 /* Copy until the next '\r'. */
820 do
821 {
822 if (r == lim)
823 return w - buf;
824 }
825 while ((*w++ = *r++) != '\r');
826 }
827
828 return w - buf;
829 }
6c30d641
PE
830 }
831
a737b216 832 return bytes_read;
6c30d641
PE
833}
834
835
f25bfb75 836
1452af69
PE
837/*------------------------------------------------------.
838| Scan NUMBER for a base-BASE integer at location LOC. |
839`------------------------------------------------------*/
840
841static unsigned long int
842scan_integer (char const *number, int base, location loc)
843{
4517da37
PE
844 verify (INT_MAX < ULONG_MAX);
845 unsigned long int num = strtoul (number, NULL, base);
846
847 if (INT_MAX < num)
1452af69 848 {
bb8e56ff 849 complain (&loc, complaint, _("integer out of range: %s"),
6fb8b256 850 quote (number));
1452af69
PE
851 num = INT_MAX;
852 }
4517da37 853
1452af69
PE
854 return num;
855}
856
857
d8d3f94a
PE
858/*------------------------------------------------------------------.
859| Convert universal character name UCN to a single-byte character, |
860| and return that character. Return -1 if UCN does not correspond |
e9690142 861| to a single-byte character. |
d8d3f94a
PE
862`------------------------------------------------------------------*/
863
864static int
865convert_ucn_to_byte (char const *ucn)
866{
4517da37
PE
867 verify (UCHAR_MAX <= INT_MAX);
868 unsigned long int code = strtoul (ucn + 2, NULL, 16);
d8d3f94a
PE
869
870 /* FIXME: Currently we assume Unicode-compatible unibyte characters
871 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
872 non-ASCII hosts we support only the portable C character set.
873 These limitations should be removed once we add support for
874 multibyte characters. */
875
876 if (UCHAR_MAX < code)
877 return -1;
878
879#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
880 {
881 /* A non-ASCII host. Use CODE to index into a table of the C
882 basic execution character set, which is guaranteed to exist on
883 all Standard C platforms. This table also includes '$', '@',
8e6ef483 884 and '`', which are not in the basic execution character set but
d8d3f94a
PE
885 which are unibyte characters on all the platforms that we know
886 about. */
887 static signed char const table[] =
888 {
e9690142
JD
889 '\0', -1, -1, -1, -1, -1, -1, '\a',
890 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
891 -1, -1, -1, -1, -1, -1, -1, -1,
892 -1, -1, -1, -1, -1, -1, -1, -1,
893 ' ', '!', '"', '#', '$', '%', '&', '\'',
894 '(', ')', '*', '+', ',', '-', '.', '/',
895 '0', '1', '2', '3', '4', '5', '6', '7',
896 '8', '9', ':', ';', '<', '=', '>', '?',
897 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
898 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
899 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
900 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
901 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
902 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
903 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
904 'x', 'y', 'z', '{', '|', '}', '~'
d8d3f94a
PE
905 };
906
907 code = code < sizeof table ? table[code] : -1;
908 }
909#endif
c4d720cd 910
d8d3f94a
PE
911 return code;
912}
913
914
03dbf629
AD
915/*---------------------------------------------------------------------.
916| Handle '#line INT( "FILE")?\n'. ARGS has already skipped '#line '. |
917`---------------------------------------------------------------------*/
900c5db5
AD
918
919static void
4517da37 920handle_syncline (char *args, location loc)
900c5db5 921{
03dbf629
AD
922 char *file;
923 unsigned long int lineno = strtoul (args, &file, 10);
4517da37
PE
924 if (INT_MAX <= lineno)
925 {
bb8e56ff 926 complain (&loc, Wother, _("line number overflow"));
4517da37
PE
927 lineno = INT_MAX;
928 }
03dbf629 929
064e42b0 930 file = strchr (file, '"');
03dbf629
AD
931 if (file)
932 {
064e42b0 933 *strchr (file + 1, '"') = '\0';
03dbf629
AD
934 current_file = uniqstr_new (file + 1);
935 }
0c8e079f 936 boundary_set (&scanner_cursor, current_file, lineno, 1);
4517da37
PE
937}
938
939
4febdd96
PE
940/*----------------------------------------------------------------.
941| For a token or comment starting at START, report message MSGID, |
c1b2677a
TR
942| which should say that an end marker was found before the |
943| expected TOKEN_END. Then, pretend that TOKEN_END was found. |
4febdd96
PE
944`----------------------------------------------------------------*/
945
946static void
947unexpected_end (boundary start, char const *msgid, char const *token_end)
948{
949 location loc;
950 loc.start = start;
951 loc.end = scanner_cursor;
c1b2677a
TR
952 size_t i = strlen (token_end);
953
954/* Adjust scanner cursor so that any later message does not count
955 the characters about to be inserted. */
956 scanner_cursor.column -= i;
957
958 while (i != 0)
959 unput (token_end[--i]);
960
4a9cd8f2 961 token_end = quote (token_end);
1127a75a 962 /* Instead of '\'', display "'". */
f518dbaf 963 if (STREQ (token_end, "'\\''"))
4a9cd8f2 964 token_end = "\"'\"";
bb8e56ff 965 complain (&loc, complaint, _(msgid), token_end);
4febdd96
PE
966}
967
968
3f2d73f1
PE
969/*------------------------------------------------------------------------.
970| Report an unexpected EOF in a token or comment starting at START. |
971| An end of file was encountered and the expected TOKEN_END was missing. |
c1b2677a 972| After reporting the problem, pretend that TOKEN_END was found. |
3f2d73f1 973`------------------------------------------------------------------------*/
a706a1cc
PE
974
975static void
aa418041 976unexpected_eof (boundary start, char const *token_end)
a706a1cc 977{
4a9cd8f2 978 unexpected_end (start, N_("missing %s at end of file"), token_end);
4febdd96
PE
979}
980
981
982/*----------------------------------------.
983| Likewise, but for unexpected newlines. |
984`----------------------------------------*/
985
986static void
987unexpected_newline (boundary start, char const *token_end)
988{
4a9cd8f2 989 unexpected_end (start, N_("missing %s at end of line"), token_end);
a706a1cc
PE
990}
991
992
f25bfb75
AD
993/*-------------------------.
994| Initialize the scanner. |
995`-------------------------*/
996
1d6412ad 997void
e9071366 998gram_scanner_initialize (void)
1d6412ad 999{
223ff46e 1000 obstack_init (&obstack_for_string);
1d6412ad
AD
1001}
1002
1003
f25bfb75
AD
1004/*-----------------------------------------------.
1005| Free all the memory allocated to the scanner. |
1006`-----------------------------------------------*/
1007
4cdb01db 1008void
e9071366 1009gram_scanner_free (void)
4cdb01db 1010{
223ff46e 1011 obstack_free (&obstack_for_string, 0);
536545f3 1012 /* Reclaim Flex's buffers. */
580b8926 1013 yylex_destroy ();
4cdb01db 1014}