]> git.saurik.com Git - bison.git/blame - src/scan-gram.l
Merge branch 'maint'
[bison.git] / src / scan-gram.l
CommitLineData
e9955c83 1/* Bison Grammar Scanner -*- C -*-
3b1e470c 2
34136e65 3 Copyright (C) 2002-2012 Free Software Foundation, Inc.
e9955c83
AD
4
5 This file is part of Bison, the GNU Compiler Compiler.
6
f16b0819 7 This program is free software: you can redistribute it and/or modify
e9955c83 8 it under the terms of the GNU General Public License as published by
f16b0819 9 the Free Software Foundation, either version 3 of the License, or
e9955c83
AD
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
f16b0819 18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
e9955c83 19
8d90395d 20%option debug nodefault noinput nounput noyywrap never-interactive
e9955c83
AD
21%option prefix="gram_" outfile="lex.yy.c"
22
23%{
4f6e011e
PE
24/* Work around a bug in flex 2.5.31. See Debian bug 333231
25 <http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=333231>. */
26#undef gram_wrap
27#define gram_wrap() 1
28
e9071366 29#define FLEX_PREFIX(Id) gram_ ## Id
0305d25e 30#include <src/flex-scanner.h>
223ff46e 31
0305d25e
AD
32#include <src/complain.h>
33#include <src/files.h>
34#include <src/gram.h>
35#include <quotearg.h>
36#include <src/reader.h>
37#include <src/uniqstr.h>
e9955c83 38
457bf919 39#include <c-ctype.h>
e9071366
AD
40#include <mbswidth.h>
41#include <quote.h>
42
0305d25e 43#include <src/scan-gram.h>
e9071366
AD
44
45#define YY_DECL GRAM_LEX_DECL
2346344a 46
e9690142
JD
47#define YY_USER_INIT \
48 code_start = scanner_cursor = loc->start; \
dc9701e8 49
3f2d73f1 50/* Location of scanner cursor. */
4a678af8 51static boundary scanner_cursor;
41141c56 52
e9071366 53#define YY_USER_ACTION location_compute (loc, &scanner_cursor, yytext, yyleng);
d8d3f94a 54
6c30d641 55static size_t no_cr_read (FILE *, char *, size_t);
d8d3f94a
PE
56#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
57
a7706735
AD
58#define RETURN_PERCENT_PARAM(Value) \
59 RETURN_VALUE(PERCENT_PARAM, param, param_ ## Value)
60
61#define RETURN_PERCENT_FLAG(Value) \
62 RETURN_VALUE(PERCENT_FLAG, uniqstr, uniqstr_new (Value))
63
64#define RETURN_VALUE(Token, Field, Value) \
ba061fa6 65 do { \
a7706735
AD
66 val->Field = Value; \
67 return Token; \
ba061fa6
AD
68 } while (0)
69
b9f1d9a4
AR
70#define ROLLBACK_CURRENT_TOKEN \
71 do { \
e9690142 72 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0); \
b9f1d9a4
AR
73 yyless (0); \
74 } while (0)
ba061fa6 75
7ec2d4cd 76/* A string representing the most recently saved token. */
7c0c6181 77static char *last_string;
7ec2d4cd 78
872b52bc 79/* Bracketed identifier. */
b9f1d9a4
AR
80static uniqstr bracketed_id_str = 0;
81static location bracketed_id_loc;
82static boundary bracketed_id_start;
83static int bracketed_id_context_state = 0;
84
7ec2d4cd 85void
e9071366 86gram_scanner_last_string_free (void)
7ec2d4cd 87{
41141c56 88 STRING_FREE;
7ec2d4cd 89}
e9955c83 90
4517da37 91static void handle_syncline (char *, location);
1452af69 92static unsigned long int scan_integer (char const *p, int base, location loc);
d8d3f94a 93static int convert_ucn_to_byte (char const *hex_text);
aa418041 94static void unexpected_eof (boundary, char const *);
4febdd96 95static void unexpected_newline (boundary, char const *);
e9955c83
AD
96
97%}
e9071366
AD
98 /* A C-like comment in directives/rules. */
99%x SC_YACC_COMMENT
100 /* Strings and characters in directives/rules. */
e9955c83 101%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
e9071366 102 /* A identifier was just read in directives/rules. Special state
ae93e4e4 103 to capture the sequence 'identifier :'. */
e9071366 104%x SC_AFTER_IDENTIFIER
cb823b6f
AD
105 /* A complex tag, with nested angles brackets. */
106%x SC_TAG
e9071366 107
ca2a6d15 108 /* Four types of user code:
ae93e4e4 109 - prologue (code between '%{' '%}' in the first section, before %%);
e9071366 110 - actions, printers, union, etc, (between braced in the middle section);
da5462d4 111 - epilogue (everything after the second %%).
ae93e4e4 112 - predicate (code between '%?{' and '{' in middle section); */
ca2a6d15 113%x SC_PROLOGUE SC_BRACED_CODE SC_EPILOGUE SC_PREDICATE
e9071366
AD
114 /* C and C++ comments in code. */
115%x SC_COMMENT SC_LINE_COMMENT
116 /* Strings and characters in code. */
117%x SC_STRING SC_CHARACTER
872b52bc 118 /* Bracketed identifiers support. */
b9f1d9a4 119%x SC_BRACKETED_ID SC_RETURN_BRACKETED_ID
e9955c83 120
e9690142
JD
121letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
122id {letter}({letter}|[-0-9])*
4f646c37 123directive %{id}
e9690142 124int [0-9]+
d8d3f94a
PE
125
126/* POSIX says that a tag must be both an id and a C union member, but
127 historically almost any character is allowed in a tag. We disallow
cb823b6f
AD
128 NUL, as this simplifies our implementation. We disallow angle
129 bracket to match them in nested pairs: several languages use them
130 for generics/template types. */
e9690142 131tag [^\0<>]+
d8d3f94a
PE
132
133/* Zero or more instances of backslash-newline. Following GCC, allow
134 white space between the backslash and the newline. */
e9690142 135splice (\\[ \f\t\v]*\n)*
e9955c83
AD
136
137%%
138%{
cb823b6f
AD
139 /* Nesting level. Either for nested braces, or nested angle brackets
140 (but not mixed). */
84f6a6ca 141 int nesting PACIFY_CC (= 0);
1a9e39f1 142
3f2d73f1 143 /* Parent context state, when applicable. */
84f6a6ca 144 int context_state PACIFY_CC (= 0);
a706a1cc 145
3f2d73f1 146 /* Location of most recent identifier, when applicable. */
84f6a6ca 147 location id_loc PACIFY_CC (= empty_location);
3f2d73f1 148
a2bc9dbc
PE
149 /* Where containing code started, when applicable. Its initial
150 value is relevant only when yylex is invoked in the SC_EPILOGUE
151 start condition. */
152 boundary code_start = scanner_cursor;
3f2d73f1 153
223ff46e
PE
154 /* Where containing comment or string or character literal started,
155 when applicable. */
84f6a6ca 156 boundary token_start PACIFY_CC (= scanner_cursor);
e9955c83
AD
157%}
158
159
3f2d73f1
PE
160 /*-----------------------.
161 | Scanning white space. |
162 `-----------------------*/
163
b9f1d9a4 164<INITIAL,SC_AFTER_IDENTIFIER,SC_BRACKETED_ID,SC_RETURN_BRACKETED_ID>
3f2d73f1 165{
4febdd96 166 /* Comments and white space. */
6fb8b256 167 "," {
bb8e56ff 168 complain (loc, Wother, _("stray ',' treated as white space"));
6fb8b256 169 }
4febdd96 170 [ \f\n\t\v] |
3f2d73f1 171 "//".* ;
83adb046
PE
172 "/*" {
173 token_start = loc->start;
174 context_state = YY_START;
175 BEGIN SC_YACC_COMMENT;
176 }
3f2d73f1
PE
177
178 /* #line directives are not documented, and may be withdrawn or
179 modified in future versions of Bison. */
180 ^"#line "{int}" \"".*"\"\n" {
4517da37 181 handle_syncline (yytext + sizeof "#line " - 1, *loc);
3f2d73f1
PE
182 }
183}
184
185
e9955c83
AD
186 /*----------------------------.
187 | Scanning Bison directives. |
188 `----------------------------*/
a7c09cba
DJ
189
190 /* For directives that are also command line options, the regex must be
e9690142 191 "%..."
a7c09cba
DJ
192 after "[-_]"s are removed, and the directive must match the --long
193 option name, with a single string argument. Otherwise, add exceptions
194 to ../build-aux/cross-options.pl. */
195
e9955c83
AD
196<INITIAL>
197{
deef2a0a 198 "%binary" return PERCENT_NONASSOC;
136a0f76 199 "%code" return PERCENT_CODE;
fa819509 200 "%debug" RETURN_PERCENT_FLAG("parse.trace");
deef2a0a
AD
201 "%default"[-_]"prec" return PERCENT_DEFAULT_PREC;
202 "%define" return PERCENT_DEFINE;
203 "%defines" return PERCENT_DEFINES;
204 "%destructor" return PERCENT_DESTRUCTOR;
205 "%dprec" return PERCENT_DPREC;
31b850d2 206 "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
deef2a0a
AD
207 "%expect" return PERCENT_EXPECT;
208 "%expect"[-_]"rr" return PERCENT_EXPECT_RR;
209 "%file-prefix" return PERCENT_FILE_PREFIX;
e9955c83 210 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
deef2a0a
AD
211 "%initial-action" return PERCENT_INITIAL_ACTION;
212 "%glr-parser" return PERCENT_GLR_PARSER;
213 "%language" return PERCENT_LANGUAGE;
214 "%left" return PERCENT_LEFT;
a7706735 215 "%lex-param" RETURN_PERCENT_PARAM(lex);
bc0f5737 216 "%locations" RETURN_PERCENT_FLAG("locations");
deef2a0a
AD
217 "%merge" return PERCENT_MERGE;
218 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
219 "%no"[-_]"default"[-_]"prec" return PERCENT_NO_DEFAULT_PREC;
220 "%no"[-_]"lines" return PERCENT_NO_LINES;
221 "%nonassoc" return PERCENT_NONASSOC;
222 "%nondeterministic-parser" return PERCENT_NONDETERMINISTIC_PARSER;
223 "%nterm" return PERCENT_NTERM;
224 "%output" return PERCENT_OUTPUT;
a7706735
AD
225 "%param" RETURN_PERCENT_PARAM(both);
226 "%parse-param" RETURN_PERCENT_PARAM(parse);
deef2a0a 227 "%prec" return PERCENT_PREC;
d78f0ac9 228 "%precedence" return PERCENT_PRECEDENCE;
deef2a0a 229 "%printer" return PERCENT_PRINTER;
4920ae8b 230 "%pure"[-_]"parser" RETURN_PERCENT_FLAG("api.pure");
deef2a0a
AD
231 "%require" return PERCENT_REQUIRE;
232 "%right" return PERCENT_RIGHT;
233 "%skeleton" return PERCENT_SKELETON;
234 "%start" return PERCENT_START;
235 "%term" return PERCENT_TOKEN;
236 "%token" return PERCENT_TOKEN;
237 "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
238 "%type" return PERCENT_TYPE;
239 "%union" return PERCENT_UNION;
240 "%verbose" return PERCENT_VERBOSE;
241 "%yacc" return PERCENT_YACC;
e9955c83 242
3f2d73f1 243 {directive} {
bb8e56ff 244 complain (loc, complaint, _("invalid directive: %s"), quote (yytext));
412f8a59 245 }
900c5db5 246
e9955c83 247 "=" return EQUAL;
e9071366 248 "|" return PIPE;
e9955c83
AD
249 ";" return SEMICOLON;
250
3f2d73f1 251 {id} {
58d7a1a1 252 val->uniqstr = uniqstr_new (yytext);
3f2d73f1 253 id_loc = *loc;
b9f1d9a4 254 bracketed_id_str = NULL;
3f2d73f1 255 BEGIN SC_AFTER_IDENTIFIER;
e9955c83
AD
256 }
257
d8d3f94a 258 {int} {
1452af69
PE
259 val->integer = scan_integer (yytext, 10, *loc);
260 return INT;
261 }
262 0[xX][0-9abcdefABCDEF]+ {
263 val->integer = scan_integer (yytext, 16, *loc);
d8d3f94a
PE
264 return INT;
265 }
e9955c83 266
84a1cb5a
AD
267 /* Identifiers may not start with a digit. Yet, don't silently
268 accept "1FOO" as "1 FOO". */
269 {int}{id} {
bb8e56ff 270 complain (loc, complaint, _("invalid identifier: %s"), quote (yytext));
84a1cb5a
AD
271 }
272
3208e3f4 273 /* Characters. */
e9690142 274 "'" token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
e9955c83
AD
275
276 /* Strings. */
e9690142 277 "\"" token_start = loc->start; BEGIN SC_ESCAPED_STRING;
e9955c83
AD
278
279 /* Prologue. */
3f2d73f1 280 "%{" code_start = loc->start; BEGIN SC_PROLOGUE;
e9955c83
AD
281
282 /* Code in between braces. */
3f2d73f1
PE
283 "{" {
284 STRING_GROW;
cb823b6f 285 nesting = 0;
3f2d73f1
PE
286 code_start = loc->start;
287 BEGIN SC_BRACED_CODE;
288 }
e9955c83 289
ca2a6d15
PH
290 /* Semantic predicate. */
291 "%?"[ \f\n\t\v]*"{" {
292 nesting = 0;
293 code_start = loc->start;
294 BEGIN SC_PREDICATE;
295 }
296
e9955c83 297 /* A type. */
cb823b6f
AD
298 "<*>" return TAG_ANY;
299 "<>" return TAG_NONE;
d8d3f94a 300 "<"{tag}">" {
223ff46e 301 obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
41141c56 302 STRING_FINISH;
223ff46e 303 val->uniqstr = uniqstr_new (last_string);
41141c56 304 STRING_FREE;
cb823b6f
AD
305 return TAG;
306 }
307 "<" {
308 nesting = 0;
309 token_start = loc->start;
310 BEGIN SC_TAG;
4cdb01db
AD
311 }
312
a706a1cc
PE
313 "%%" {
314 static int percent_percent_count;
e9955c83 315 if (++percent_percent_count == 2)
a2bc9dbc 316 BEGIN SC_EPILOGUE;
e9955c83
AD
317 return PERCENT_PERCENT;
318 }
319
b9f1d9a4
AR
320 "[" {
321 bracketed_id_str = NULL;
322 bracketed_id_start = loc->start;
323 bracketed_id_context_state = YY_START;
324 BEGIN SC_BRACKETED_ID;
325 }
326
a706a1cc 327 . {
bb8e56ff 328 complain (loc, complaint, _("invalid character: %s"), quote (yytext));
3f2d73f1 329 }
379f0ac8
PE
330
331 <<EOF>> {
332 loc->start = loc->end = scanner_cursor;
333 yyterminate ();
334 }
3f2d73f1
PE
335}
336
337
cb823b6f
AD
338 /*--------------------------------------------------------------.
339 | Supporting \0 complexifies our implementation for no expected |
340 | added value. |
341 `--------------------------------------------------------------*/
342
343<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING,SC_TAG>
344{
bb8e56ff 345 \0 complain (loc, complaint, _("invalid null character"));
cb823b6f
AD
346}
347
348
3f2d73f1
PE
349 /*-----------------------------------------------------------------.
350 | Scanning after an identifier, checking whether a colon is next. |
351 `-----------------------------------------------------------------*/
352
353<SC_AFTER_IDENTIFIER>
354{
b9f1d9a4 355 "[" {
872b52bc 356 if (bracketed_id_str)
b9f1d9a4 357 {
e9690142
JD
358 ROLLBACK_CURRENT_TOKEN;
359 BEGIN SC_RETURN_BRACKETED_ID;
360 *loc = id_loc;
361 return ID;
b9f1d9a4 362 }
872b52bc
AR
363 else
364 {
e9690142
JD
365 bracketed_id_start = loc->start;
366 bracketed_id_context_state = YY_START;
367 BEGIN SC_BRACKETED_ID;
872b52bc 368 }
b9f1d9a4 369 }
3f2d73f1 370 ":" {
b9f1d9a4 371 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
3f2d73f1 372 *loc = id_loc;
3f2d73f1
PE
373 return ID_COLON;
374 }
375 . {
b9f1d9a4
AR
376 ROLLBACK_CURRENT_TOKEN;
377 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
3f2d73f1 378 *loc = id_loc;
3f2d73f1
PE
379 return ID;
380 }
381 <<EOF>> {
b9f1d9a4 382 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
3f2d73f1 383 *loc = id_loc;
3f2d73f1 384 return ID;
e9955c83
AD
385 }
386}
387
b9f1d9a4
AR
388 /*--------------------------------.
389 | Scanning bracketed identifiers. |
390 `--------------------------------*/
391
392<SC_BRACKETED_ID>
393{
394 {id} {
872b52bc 395 if (bracketed_id_str)
b9f1d9a4 396 {
bb8e56ff
TR
397 complain (loc, complaint,
398 _("unexpected identifier in bracketed name: %s"),
399 quote (yytext));
b9f1d9a4
AR
400 }
401 else
402 {
e9690142
JD
403 bracketed_id_str = uniqstr_new (yytext);
404 bracketed_id_loc = *loc;
b9f1d9a4
AR
405 }
406 }
407 "]" {
408 BEGIN bracketed_id_context_state;
409 if (bracketed_id_str)
410 {
e9690142
JD
411 if (INITIAL == bracketed_id_context_state)
412 {
413 val->uniqstr = bracketed_id_str;
414 bracketed_id_str = 0;
415 *loc = bracketed_id_loc;
416 return BRACKETED_ID;
417 }
b9f1d9a4
AR
418 }
419 else
bb8e56ff 420 complain (loc, complaint, _("an identifier expected"));
b9f1d9a4
AR
421 }
422 . {
bb8e56ff 423 complain (loc, complaint, _("invalid character in bracketed name: %s"),
e9690142 424 quote (yytext));
b9f1d9a4
AR
425 }
426 <<EOF>> {
427 BEGIN bracketed_id_context_state;
428 unexpected_eof (bracketed_id_start, "]");
429 }
430}
431
432<SC_RETURN_BRACKETED_ID>
433{
434 . {
435 ROLLBACK_CURRENT_TOKEN;
436 val->uniqstr = bracketed_id_str;
437 bracketed_id_str = 0;
438 *loc = bracketed_id_loc;
439 BEGIN INITIAL;
440 return BRACKETED_ID;
441 }
442}
443
e9955c83 444
d8d3f94a 445 /*---------------------------------------------------------------.
ae93e4e4 446 | Scanning a Yacc comment. The initial '/ *' is already eaten. |
d8d3f94a 447 `---------------------------------------------------------------*/
e9955c83 448
d8d3f94a 449<SC_YACC_COMMENT>
e9955c83 450{
3f2d73f1 451 "*/" BEGIN context_state;
e9690142 452 .|\n ;
aa418041 453 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
d8d3f94a
PE
454}
455
456
457 /*------------------------------------------------------------.
ae93e4e4 458 | Scanning a C comment. The initial '/ *' is already eaten. |
d8d3f94a
PE
459 `------------------------------------------------------------*/
460
461<SC_COMMENT>
462{
3f2d73f1 463 "*"{splice}"/" STRING_GROW; BEGIN context_state;
e9690142 464 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
e9955c83
AD
465}
466
467
d8d3f94a 468 /*--------------------------------------------------------------.
ae93e4e4 469 | Scanning a line comment. The initial '//' is already eaten. |
d8d3f94a
PE
470 `--------------------------------------------------------------*/
471
472<SC_LINE_COMMENT>
473{
e9690142
JD
474 "\n" STRING_GROW; BEGIN context_state;
475 {splice} STRING_GROW;
476 <<EOF>> BEGIN context_state;
d8d3f94a
PE
477}
478
479
4febdd96
PE
480 /*------------------------------------------------.
481 | Scanning a Bison string, including its escapes. |
482 | The initial quote is already eaten. |
483 `------------------------------------------------*/
e9955c83
AD
484
485<SC_ESCAPED_STRING>
486{
47aee066
JD
487 "\""|"\n" {
488 if (yytext[0] == '\n')
489 unexpected_newline (token_start, "\"");
490 STRING_FINISH;
491 loc->start = token_start;
492 val->chars = last_string;
493 BEGIN INITIAL;
494 return STRING;
495 }
496 <<EOF>> {
497 unexpected_eof (token_start, "\"");
41141c56 498 STRING_FINISH;
3f2d73f1 499 loc->start = token_start;
223ff46e 500 val->chars = last_string;
a706a1cc 501 BEGIN INITIAL;
e9955c83
AD
502 return STRING;
503 }
e9955c83
AD
504}
505
4febdd96
PE
506 /*----------------------------------------------------------.
507 | Scanning a Bison character literal, decoding its escapes. |
e9690142 508 | The initial quote is already eaten. |
4febdd96 509 `----------------------------------------------------------*/
e9955c83
AD
510
511<SC_ESCAPED_CHARACTER>
512{
47aee066 513 "'"|"\n" {
41141c56 514 STRING_FINISH;
3f2d73f1 515 loc->start = token_start;
dfaa4860 516 val->character = last_string[0];
3208e3f4
JD
517 {
518 /* FIXME: Eventually, make these errors. */
dfaa4860
JD
519 if (last_string[0] == '\0')
520 {
bb8e56ff 521 complain (loc, Wother, _("empty character literal"));
dfaa4860
JD
522 /* '\0' seems dangerous even if we are about to complain. */
523 val->character = '\'';
524 }
525 else if (last_string[1] != '\0')
bb8e56ff 526 complain (loc, Wother,
6fb8b256 527 _("extra characters in character literal"));
3208e3f4
JD
528 }
529 if (yytext[0] == '\n')
530 unexpected_newline (token_start, "'");
41141c56 531 STRING_FREE;
a706a1cc 532 BEGIN INITIAL;
58d7a1a1 533 return CHAR;
e9955c83 534 }
47aee066 535 <<EOF>> {
47aee066
JD
536 STRING_FINISH;
537 loc->start = token_start;
dfaa4860 538 val->character = last_string[0];
3208e3f4 539 {
3208e3f4 540 /* FIXME: Eventually, make these errors. */
dfaa4860
JD
541 if (last_string[0] == '\0')
542 {
bb8e56ff 543 complain (loc, Wother, _("empty character literal"));
dfaa4860
JD
544 /* '\0' seems dangerous even if we are about to complain. */
545 val->character = '\'';
546 }
547 else if (last_string[1] != '\0')
bb8e56ff 548 complain (loc, Wother,
6fb8b256 549 _("extra characters in character literal"));
3208e3f4
JD
550 }
551 unexpected_eof (token_start, "'");
47aee066
JD
552 STRING_FREE;
553 BEGIN INITIAL;
554 return CHAR;
555 }
4febdd96 556}
a706a1cc 557
cb823b6f
AD
558 /*-----------------------------------------------------------.
559 | Scanning a Bison nested tag. The initial angle bracket is |
560 | already eaten. |
561 `-----------------------------------------------------------*/
562
563<SC_TAG>
4febdd96 564{
cb823b6f
AD
565 ">" {
566 --nesting;
567 if (nesting < 0)
568 {
569 STRING_FINISH;
570 loc->start = token_start;
571 val->uniqstr = uniqstr_new (last_string);
572 STRING_FREE;
573 BEGIN INITIAL;
574 return TAG;
575 }
576 STRING_GROW;
577 }
578
579 [^<>]+ STRING_GROW;
580 "<"+ STRING_GROW; nesting += yyleng;
e9955c83 581
cb823b6f
AD
582 <<EOF>> {
583 unexpected_eof (token_start, ">");
584 STRING_FINISH;
585 loc->start = token_start;
586 val->uniqstr = uniqstr_new (last_string);
587 STRING_FREE;
588 BEGIN INITIAL;
589 return TAG;
590 }
591}
e9955c83
AD
592
593 /*----------------------------.
594 | Decode escaped characters. |
595 `----------------------------*/
596
597<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
598{
d8d3f94a 599 \\[0-7]{1,3} {
4517da37 600 unsigned long int c = strtoul (yytext + 1, NULL, 8);
c2724603 601 if (!c || UCHAR_MAX < c)
bb8e56ff 602 complain (loc, complaint, _("invalid number after \\-escape: %s"),
c2724603 603 yytext+1);
e9955c83 604 else
223ff46e 605 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
606 }
607
6b0d38ab 608 \\x[0-9abcdefABCDEF]+ {
4517da37
PE
609 verify (UCHAR_MAX < ULONG_MAX);
610 unsigned long int c = strtoul (yytext + 2, NULL, 16);
c2724603 611 if (!c || UCHAR_MAX < c)
bb8e56ff 612 complain (loc, complaint, _("invalid number after \\-escape: %s"),
c2724603 613 yytext+1);
d8d3f94a 614 else
223ff46e 615 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
616 }
617
e9690142
JD
618 \\a obstack_1grow (&obstack_for_string, '\a');
619 \\b obstack_1grow (&obstack_for_string, '\b');
620 \\f obstack_1grow (&obstack_for_string, '\f');
621 \\n obstack_1grow (&obstack_for_string, '\n');
622 \\r obstack_1grow (&obstack_for_string, '\r');
623 \\t obstack_1grow (&obstack_for_string, '\t');
624 \\v obstack_1grow (&obstack_for_string, '\v');
412f8a59
PE
625
626 /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
223ff46e 627 \\("\""|"'"|"?"|"\\") obstack_1grow (&obstack_for_string, yytext[1]);
412f8a59 628
6b0d38ab 629 \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
d8d3f94a 630 int c = convert_ucn_to_byte (yytext);
c2724603 631 if (c <= 0)
bb8e56ff 632 complain (loc, complaint, _("invalid number after \\-escape: %s"),
c2724603 633 yytext+1);
d8d3f94a 634 else
223ff46e 635 obstack_1grow (&obstack_for_string, c);
d8d3f94a 636 }
e9690142 637 \\(.|\n) {
c2724603 638 char const *p = yytext + 1;
e6c849d8 639 /* Quote only if escaping won't make the character visible. */
457bf919 640 if (c_isspace ((unsigned char) *p) && c_isprint ((unsigned char) *p))
e6c849d8 641 p = quote (p);
c2724603
JD
642 else
643 p = quotearg_style_mem (escape_quoting_style, p, 1);
bb8e56ff 644 complain (loc, complaint, _("invalid character after \\-escape: %s"),
6fb8b256 645 p);
e9955c83
AD
646 }
647}
648
4febdd96
PE
649 /*--------------------------------------------.
650 | Scanning user-code characters and strings. |
651 `--------------------------------------------*/
e9955c83 652
4febdd96
PE
653<SC_CHARACTER,SC_STRING>
654{
e9690142 655 {splice}|\\{splice}[^\n\[\]] STRING_GROW;
4febdd96 656}
e9955c83
AD
657
658<SC_CHARACTER>
659{
e9690142
JD
660 "'" STRING_GROW; BEGIN context_state;
661 \n unexpected_newline (token_start, "'"); BEGIN context_state;
662 <<EOF>> unexpected_eof (token_start, "'"); BEGIN context_state;
e9955c83
AD
663}
664
e9955c83
AD
665<SC_STRING>
666{
e9690142
JD
667 "\"" STRING_GROW; BEGIN context_state;
668 \n unexpected_newline (token_start, "\""); BEGIN context_state;
669 <<EOF>> unexpected_eof (token_start, "\""); BEGIN context_state;
e9955c83
AD
670}
671
672
673 /*---------------------------------------------------.
674 | Strings, comments etc. can be found in user code. |
675 `---------------------------------------------------*/
676
ca2a6d15 677<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE,SC_PREDICATE>
e9955c83 678{
3f2d73f1
PE
679 "'" {
680 STRING_GROW;
681 context_state = YY_START;
682 token_start = loc->start;
683 BEGIN SC_CHARACTER;
684 }
685 "\"" {
686 STRING_GROW;
687 context_state = YY_START;
688 token_start = loc->start;
689 BEGIN SC_STRING;
690 }
691 "/"{splice}"*" {
692 STRING_GROW;
693 context_state = YY_START;
694 token_start = loc->start;
695 BEGIN SC_COMMENT;
696 }
697 "/"{splice}"/" {
698 STRING_GROW;
699 context_state = YY_START;
700 BEGIN SC_LINE_COMMENT;
701 }
e9955c83
AD
702}
703
704
624a35e2 705
58d7a1a1 706 /*-----------------------------------------------------------.
ca2a6d15
PH
707 | Scanning some code in braces (actions, predicates). The |
708 | initial "{" is already eaten. |
58d7a1a1 709 `-----------------------------------------------------------*/
e9955c83 710
ca2a6d15 711<SC_BRACED_CODE,SC_PREDICATE>
e9955c83 712{
cb823b6f
AD
713 "{"|"<"{splice}"%" STRING_GROW; nesting++;
714 "%"{splice}">" STRING_GROW; nesting--;
ca2a6d15 715
ae93e4e4
JM
716 /* Tokenize '<<%' correctly (as '<<' '%') rather than incorrrectly
717 (as '<' '<%'). */
ca2a6d15
PH
718 "<"{splice}"<" STRING_GROW;
719
720 <<EOF>> {
721 int token = (YY_START == SC_BRACED_CODE) ? BRACED_CODE : BRACED_PREDICATE;
722 unexpected_eof (code_start, "}");
723 STRING_FINISH;
724 loc->start = code_start;
725 val->code = last_string;
726 BEGIN INITIAL;
727 return token;
728 }
729}
730
731<SC_BRACED_CODE>
732{
e9955c83 733 "}" {
25522739
PE
734 obstack_1grow (&obstack_for_string, '}');
735
cb823b6f
AD
736 --nesting;
737 if (nesting < 0)
e9955c83 738 {
e9690142
JD
739 STRING_FINISH;
740 loc->start = code_start;
741 val->code = last_string;
742 BEGIN INITIAL;
743 return BRACED_CODE;
e9955c83
AD
744 }
745 }
ca2a6d15 746}
e9955c83 747
ca2a6d15
PH
748<SC_PREDICATE>
749{
750 "}" {
751 --nesting;
752 if (nesting < 0)
753 {
e9690142
JD
754 STRING_FINISH;
755 loc->start = code_start;
756 val->code = last_string;
757 BEGIN INITIAL;
758 return BRACED_PREDICATE;
ca2a6d15
PH
759 }
760 else
761 obstack_1grow (&obstack_for_string, '}');
47aee066 762 }
e9955c83
AD
763}
764
e9955c83
AD
765 /*--------------------------------------------------------------.
766 | Scanning some prologue: from "%{" (already scanned) to "%}". |
767 `--------------------------------------------------------------*/
768
769<SC_PROLOGUE>
770{
771 "%}" {
41141c56 772 STRING_FINISH;
3f2d73f1 773 loc->start = code_start;
223ff46e 774 val->chars = last_string;
a706a1cc 775 BEGIN INITIAL;
e9955c83
AD
776 return PROLOGUE;
777 }
778
47aee066
JD
779 <<EOF>> {
780 unexpected_eof (code_start, "%}");
781 STRING_FINISH;
782 loc->start = code_start;
783 val->chars = last_string;
784 BEGIN INITIAL;
785 return PROLOGUE;
786 }
e9955c83
AD
787}
788
789
790 /*---------------------------------------------------------------.
791 | Scanning the epilogue (everything after the second "%%", which |
d8d3f94a 792 | has already been eaten). |
e9955c83
AD
793 `---------------------------------------------------------------*/
794
795<SC_EPILOGUE>
796{
e9955c83 797 <<EOF>> {
41141c56 798 STRING_FINISH;
3f2d73f1 799 loc->start = code_start;
223ff46e 800 val->chars = last_string;
a706a1cc 801 BEGIN INITIAL;
e9955c83
AD
802 return EPILOGUE;
803 }
804}
805
806
4febdd96
PE
807 /*-----------------------------------------------------.
808 | By default, grow the string obstack with the input. |
809 `-----------------------------------------------------*/
810
e9690142
JD
811<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PREDICATE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>. |
812 <SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PREDICATE,SC_PROLOGUE,SC_EPILOGUE>\n STRING_GROW;
4febdd96 813
e9955c83
AD
814%%
815
6c30d641
PE
816/* Read bytes from FP into buffer BUF of size SIZE. Return the
817 number of bytes read. Remove '\r' from input, treating \r\n
818 and isolated \r as \n. */
819
820static size_t
821no_cr_read (FILE *fp, char *buf, size_t size)
822{
a737b216
PE
823 size_t bytes_read = fread (buf, 1, size, fp);
824 if (bytes_read)
6c30d641 825 {
a737b216 826 char *w = memchr (buf, '\r', bytes_read);
6c30d641 827 if (w)
e9690142
JD
828 {
829 char const *r = ++w;
830 char const *lim = buf + bytes_read;
831
832 for (;;)
833 {
834 /* Found an '\r'. Treat it like '\n', but ignore any
835 '\n' that immediately follows. */
836 w[-1] = '\n';
837 if (r == lim)
838 {
839 int ch = getc (fp);
840 if (ch != '\n' && ungetc (ch, fp) != ch)
841 break;
842 }
843 else if (*r == '\n')
844 r++;
845
846 /* Copy until the next '\r'. */
847 do
848 {
849 if (r == lim)
850 return w - buf;
851 }
852 while ((*w++ = *r++) != '\r');
853 }
854
855 return w - buf;
856 }
6c30d641
PE
857 }
858
a737b216 859 return bytes_read;
6c30d641
PE
860}
861
862
f25bfb75 863
1452af69
PE
864/*------------------------------------------------------.
865| Scan NUMBER for a base-BASE integer at location LOC. |
866`------------------------------------------------------*/
867
868static unsigned long int
869scan_integer (char const *number, int base, location loc)
870{
4517da37
PE
871 verify (INT_MAX < ULONG_MAX);
872 unsigned long int num = strtoul (number, NULL, base);
873
874 if (INT_MAX < num)
1452af69 875 {
bb8e56ff 876 complain (&loc, complaint, _("integer out of range: %s"),
6fb8b256 877 quote (number));
1452af69
PE
878 num = INT_MAX;
879 }
4517da37 880
1452af69
PE
881 return num;
882}
883
884
d8d3f94a
PE
885/*------------------------------------------------------------------.
886| Convert universal character name UCN to a single-byte character, |
887| and return that character. Return -1 if UCN does not correspond |
e9690142 888| to a single-byte character. |
d8d3f94a
PE
889`------------------------------------------------------------------*/
890
891static int
892convert_ucn_to_byte (char const *ucn)
893{
4517da37
PE
894 verify (UCHAR_MAX <= INT_MAX);
895 unsigned long int code = strtoul (ucn + 2, NULL, 16);
d8d3f94a
PE
896
897 /* FIXME: Currently we assume Unicode-compatible unibyte characters
898 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
899 non-ASCII hosts we support only the portable C character set.
900 These limitations should be removed once we add support for
901 multibyte characters. */
902
903 if (UCHAR_MAX < code)
904 return -1;
905
906#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
907 {
908 /* A non-ASCII host. Use CODE to index into a table of the C
909 basic execution character set, which is guaranteed to exist on
910 all Standard C platforms. This table also includes '$', '@',
8e6ef483 911 and '`', which are not in the basic execution character set but
d8d3f94a
PE
912 which are unibyte characters on all the platforms that we know
913 about. */
914 static signed char const table[] =
915 {
e9690142
JD
916 '\0', -1, -1, -1, -1, -1, -1, '\a',
917 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
918 -1, -1, -1, -1, -1, -1, -1, -1,
919 -1, -1, -1, -1, -1, -1, -1, -1,
920 ' ', '!', '"', '#', '$', '%', '&', '\'',
921 '(', ')', '*', '+', ',', '-', '.', '/',
922 '0', '1', '2', '3', '4', '5', '6', '7',
923 '8', '9', ':', ';', '<', '=', '>', '?',
924 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
925 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
926 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
927 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
928 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
929 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
930 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
931 'x', 'y', 'z', '{', '|', '}', '~'
d8d3f94a
PE
932 };
933
934 code = code < sizeof table ? table[code] : -1;
935 }
936#endif
c4d720cd 937
d8d3f94a
PE
938 return code;
939}
940
941
900c5db5 942/*----------------------------------------------------------------.
ae93e4e4 943| Handle '#line INT "FILE"'. ARGS has already skipped '#line '. |
900c5db5
AD
944`----------------------------------------------------------------*/
945
946static void
4517da37 947handle_syncline (char *args, location loc)
900c5db5 948{
4517da37
PE
949 char *after_num;
950 unsigned long int lineno = strtoul (args, &after_num, 10);
84526bf3
AD
951 char *file = strchr (after_num, '"') + 1;
952 *strchr (file, '"') = '\0';
4517da37
PE
953 if (INT_MAX <= lineno)
954 {
bb8e56ff 955 complain (&loc, Wother, _("line number overflow"));
4517da37
PE
956 lineno = INT_MAX;
957 }
e9071366 958 current_file = uniqstr_new (file);
0c8e079f 959 boundary_set (&scanner_cursor, current_file, lineno, 1);
4517da37
PE
960}
961
962
4febdd96
PE
963/*----------------------------------------------------------------.
964| For a token or comment starting at START, report message MSGID, |
e9690142
JD
965| which should say that an end marker was found before |
966| the expected TOKEN_END. |
4febdd96
PE
967`----------------------------------------------------------------*/
968
969static void
970unexpected_end (boundary start, char const *msgid, char const *token_end)
971{
972 location loc;
973 loc.start = start;
974 loc.end = scanner_cursor;
4a9cd8f2
AD
975 token_end = quote (token_end);
976 // Instead of '\'', display "'".
f518dbaf 977 if (STREQ (token_end, "'\\''"))
4a9cd8f2 978 token_end = "\"'\"";
bb8e56ff 979 complain (&loc, complaint, _(msgid), token_end);
4febdd96
PE
980}
981
982
3f2d73f1
PE
983/*------------------------------------------------------------------------.
984| Report an unexpected EOF in a token or comment starting at START. |
985| An end of file was encountered and the expected TOKEN_END was missing. |
3f2d73f1 986`------------------------------------------------------------------------*/
a706a1cc
PE
987
988static void
aa418041 989unexpected_eof (boundary start, char const *token_end)
a706a1cc 990{
4a9cd8f2 991 unexpected_end (start, N_("missing %s at end of file"), token_end);
4febdd96
PE
992}
993
994
995/*----------------------------------------.
996| Likewise, but for unexpected newlines. |
997`----------------------------------------*/
998
999static void
1000unexpected_newline (boundary start, char const *token_end)
1001{
4a9cd8f2 1002 unexpected_end (start, N_("missing %s at end of line"), token_end);
a706a1cc
PE
1003}
1004
1005
f25bfb75
AD
1006/*-------------------------.
1007| Initialize the scanner. |
1008`-------------------------*/
1009
1d6412ad 1010void
e9071366 1011gram_scanner_initialize (void)
1d6412ad 1012{
223ff46e 1013 obstack_init (&obstack_for_string);
1d6412ad
AD
1014}
1015
1016
f25bfb75
AD
1017/*-----------------------------------------------.
1018| Free all the memory allocated to the scanner. |
1019`-----------------------------------------------*/
1020
4cdb01db 1021void
e9071366 1022gram_scanner_free (void)
4cdb01db 1023{
223ff46e 1024 obstack_free (&obstack_for_string, 0);
536545f3 1025 /* Reclaim Flex's buffers. */
580b8926 1026 yylex_destroy ();
4cdb01db 1027}