]> git.saurik.com Git - bison.git/blame - src/scan-gram.l
maint: de-recurse the handling of examples
[bison.git] / src / scan-gram.l
CommitLineData
e9955c83 1/* Bison Grammar Scanner -*- C -*-
3b1e470c 2
34136e65 3 Copyright (C) 2002-2012 Free Software Foundation, Inc.
e9955c83
AD
4
5 This file is part of Bison, the GNU Compiler Compiler.
6
f16b0819 7 This program is free software: you can redistribute it and/or modify
e9955c83 8 it under the terms of the GNU General Public License as published by
f16b0819 9 the Free Software Foundation, either version 3 of the License, or
e9955c83
AD
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
f16b0819 18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
e9955c83 19
8d90395d 20%option debug nodefault noinput nounput noyywrap never-interactive
e9955c83
AD
21%option prefix="gram_" outfile="lex.yy.c"
22
23%{
4f6e011e
PE
24/* Work around a bug in flex 2.5.31. See Debian bug 333231
25 <http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=333231>. */
26#undef gram_wrap
27#define gram_wrap() 1
28
e9071366 29#define FLEX_PREFIX(Id) gram_ ## Id
0305d25e 30#include <src/flex-scanner.h>
223ff46e 31
0305d25e
AD
32#include <src/complain.h>
33#include <src/files.h>
34#include <src/gram.h>
35#include <quotearg.h>
36#include <src/reader.h>
37#include <src/uniqstr.h>
e9955c83 38
c2724603 39#include <ctype.h>
e9071366
AD
40#include <mbswidth.h>
41#include <quote.h>
4a9cd8f2 42#include <streq.h>
e9071366 43
0305d25e 44#include <src/scan-gram.h>
e9071366
AD
45
46#define YY_DECL GRAM_LEX_DECL
2346344a 47
e9690142
JD
48#define YY_USER_INIT \
49 code_start = scanner_cursor = loc->start; \
dc9701e8 50
3f2d73f1 51/* Location of scanner cursor. */
4a678af8 52static boundary scanner_cursor;
41141c56 53
e9071366 54#define YY_USER_ACTION location_compute (loc, &scanner_cursor, yytext, yyleng);
d8d3f94a 55
6c30d641 56static size_t no_cr_read (FILE *, char *, size_t);
d8d3f94a
PE
57#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
58
a7706735
AD
59#define RETURN_PERCENT_PARAM(Value) \
60 RETURN_VALUE(PERCENT_PARAM, param, param_ ## Value)
61
62#define RETURN_PERCENT_FLAG(Value) \
63 RETURN_VALUE(PERCENT_FLAG, uniqstr, uniqstr_new (Value))
64
65#define RETURN_VALUE(Token, Field, Value) \
ba061fa6 66 do { \
a7706735
AD
67 val->Field = Value; \
68 return Token; \
ba061fa6
AD
69 } while (0)
70
b9f1d9a4
AR
71#define ROLLBACK_CURRENT_TOKEN \
72 do { \
e9690142 73 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0); \
b9f1d9a4
AR
74 yyless (0); \
75 } while (0)
ba061fa6 76
7ec2d4cd 77/* A string representing the most recently saved token. */
7c0c6181 78static char *last_string;
7ec2d4cd 79
872b52bc 80/* Bracketed identifier. */
b9f1d9a4
AR
81static uniqstr bracketed_id_str = 0;
82static location bracketed_id_loc;
83static boundary bracketed_id_start;
84static int bracketed_id_context_state = 0;
85
7ec2d4cd 86void
e9071366 87gram_scanner_last_string_free (void)
7ec2d4cd 88{
41141c56 89 STRING_FREE;
7ec2d4cd 90}
e9955c83 91
4517da37 92static void handle_syncline (char *, location);
1452af69 93static unsigned long int scan_integer (char const *p, int base, location loc);
d8d3f94a 94static int convert_ucn_to_byte (char const *hex_text);
aa418041 95static void unexpected_eof (boundary, char const *);
4febdd96 96static void unexpected_newline (boundary, char const *);
e9955c83
AD
97
98%}
e9071366
AD
99 /* A C-like comment in directives/rules. */
100%x SC_YACC_COMMENT
101 /* Strings and characters in directives/rules. */
e9955c83 102%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
e9071366 103 /* A identifier was just read in directives/rules. Special state
ae93e4e4 104 to capture the sequence 'identifier :'. */
e9071366 105%x SC_AFTER_IDENTIFIER
cb823b6f
AD
106 /* A complex tag, with nested angles brackets. */
107%x SC_TAG
e9071366 108
ca2a6d15 109 /* Four types of user code:
ae93e4e4 110 - prologue (code between '%{' '%}' in the first section, before %%);
e9071366 111 - actions, printers, union, etc, (between braced in the middle section);
da5462d4 112 - epilogue (everything after the second %%).
ae93e4e4 113 - predicate (code between '%?{' and '{' in middle section); */
ca2a6d15 114%x SC_PROLOGUE SC_BRACED_CODE SC_EPILOGUE SC_PREDICATE
e9071366
AD
115 /* C and C++ comments in code. */
116%x SC_COMMENT SC_LINE_COMMENT
117 /* Strings and characters in code. */
118%x SC_STRING SC_CHARACTER
872b52bc 119 /* Bracketed identifiers support. */
b9f1d9a4 120%x SC_BRACKETED_ID SC_RETURN_BRACKETED_ID
e9955c83 121
e9690142
JD
122letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
123id {letter}({letter}|[-0-9])*
4f646c37 124directive %{id}
e9690142 125int [0-9]+
d8d3f94a
PE
126
127/* POSIX says that a tag must be both an id and a C union member, but
128 historically almost any character is allowed in a tag. We disallow
cb823b6f
AD
129 NUL, as this simplifies our implementation. We disallow angle
130 bracket to match them in nested pairs: several languages use them
131 for generics/template types. */
e9690142 132tag [^\0<>]+
d8d3f94a
PE
133
134/* Zero or more instances of backslash-newline. Following GCC, allow
135 white space between the backslash and the newline. */
e9690142 136splice (\\[ \f\t\v]*\n)*
e9955c83
AD
137
138%%
139%{
cb823b6f
AD
140 /* Nesting level. Either for nested braces, or nested angle brackets
141 (but not mixed). */
84f6a6ca 142 int nesting PACIFY_CC (= 0);
1a9e39f1 143
3f2d73f1 144 /* Parent context state, when applicable. */
84f6a6ca 145 int context_state PACIFY_CC (= 0);
a706a1cc 146
3f2d73f1 147 /* Location of most recent identifier, when applicable. */
84f6a6ca 148 location id_loc PACIFY_CC (= empty_location);
3f2d73f1 149
a2bc9dbc
PE
150 /* Where containing code started, when applicable. Its initial
151 value is relevant only when yylex is invoked in the SC_EPILOGUE
152 start condition. */
153 boundary code_start = scanner_cursor;
3f2d73f1 154
223ff46e
PE
155 /* Where containing comment or string or character literal started,
156 when applicable. */
84f6a6ca 157 boundary token_start PACIFY_CC (= scanner_cursor);
e9955c83
AD
158%}
159
160
3f2d73f1
PE
161 /*-----------------------.
162 | Scanning white space. |
163 `-----------------------*/
164
b9f1d9a4 165<INITIAL,SC_AFTER_IDENTIFIER,SC_BRACKETED_ID,SC_RETURN_BRACKETED_ID>
3f2d73f1 166{
4febdd96 167 /* Comments and white space. */
ae93e4e4 168 "," warn_at (*loc, _("stray ',' treated as white space"));
4febdd96 169 [ \f\n\t\v] |
3f2d73f1 170 "//".* ;
83adb046
PE
171 "/*" {
172 token_start = loc->start;
173 context_state = YY_START;
174 BEGIN SC_YACC_COMMENT;
175 }
3f2d73f1
PE
176
177 /* #line directives are not documented, and may be withdrawn or
178 modified in future versions of Bison. */
179 ^"#line "{int}" \"".*"\"\n" {
4517da37 180 handle_syncline (yytext + sizeof "#line " - 1, *loc);
3f2d73f1
PE
181 }
182}
183
184
e9955c83
AD
185 /*----------------------------.
186 | Scanning Bison directives. |
187 `----------------------------*/
a7c09cba
DJ
188
189 /* For directives that are also command line options, the regex must be
e9690142 190 "%..."
a7c09cba
DJ
191 after "[-_]"s are removed, and the directive must match the --long
192 option name, with a single string argument. Otherwise, add exceptions
193 to ../build-aux/cross-options.pl. */
194
e9955c83
AD
195<INITIAL>
196{
deef2a0a 197 "%binary" return PERCENT_NONASSOC;
136a0f76 198 "%code" return PERCENT_CODE;
fa819509 199 "%debug" RETURN_PERCENT_FLAG("parse.trace");
deef2a0a
AD
200 "%default"[-_]"prec" return PERCENT_DEFAULT_PREC;
201 "%define" return PERCENT_DEFINE;
202 "%defines" return PERCENT_DEFINES;
203 "%destructor" return PERCENT_DESTRUCTOR;
204 "%dprec" return PERCENT_DPREC;
31b850d2 205 "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
deef2a0a
AD
206 "%expect" return PERCENT_EXPECT;
207 "%expect"[-_]"rr" return PERCENT_EXPECT_RR;
208 "%file-prefix" return PERCENT_FILE_PREFIX;
e9955c83 209 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
deef2a0a
AD
210 "%initial-action" return PERCENT_INITIAL_ACTION;
211 "%glr-parser" return PERCENT_GLR_PARSER;
212 "%language" return PERCENT_LANGUAGE;
213 "%left" return PERCENT_LEFT;
a7706735 214 "%lex-param" RETURN_PERCENT_PARAM(lex);
bc0f5737 215 "%locations" RETURN_PERCENT_FLAG("locations");
deef2a0a
AD
216 "%merge" return PERCENT_MERGE;
217 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
218 "%no"[-_]"default"[-_]"prec" return PERCENT_NO_DEFAULT_PREC;
219 "%no"[-_]"lines" return PERCENT_NO_LINES;
220 "%nonassoc" return PERCENT_NONASSOC;
221 "%nondeterministic-parser" return PERCENT_NONDETERMINISTIC_PARSER;
222 "%nterm" return PERCENT_NTERM;
223 "%output" return PERCENT_OUTPUT;
a7706735
AD
224 "%param" RETURN_PERCENT_PARAM(both);
225 "%parse-param" RETURN_PERCENT_PARAM(parse);
deef2a0a 226 "%prec" return PERCENT_PREC;
d78f0ac9 227 "%precedence" return PERCENT_PRECEDENCE;
deef2a0a 228 "%printer" return PERCENT_PRINTER;
4920ae8b 229 "%pure"[-_]"parser" RETURN_PERCENT_FLAG("api.pure");
deef2a0a
AD
230 "%require" return PERCENT_REQUIRE;
231 "%right" return PERCENT_RIGHT;
232 "%skeleton" return PERCENT_SKELETON;
233 "%start" return PERCENT_START;
234 "%term" return PERCENT_TOKEN;
235 "%token" return PERCENT_TOKEN;
236 "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
237 "%type" return PERCENT_TYPE;
238 "%union" return PERCENT_UNION;
239 "%verbose" return PERCENT_VERBOSE;
240 "%yacc" return PERCENT_YACC;
e9955c83 241
3f2d73f1 242 {directive} {
41141c56 243 complain_at (*loc, _("invalid directive: %s"), quote (yytext));
412f8a59 244 }
900c5db5 245
e9955c83 246 "=" return EQUAL;
e9071366 247 "|" return PIPE;
e9955c83
AD
248 ";" return SEMICOLON;
249
3f2d73f1 250 {id} {
58d7a1a1 251 val->uniqstr = uniqstr_new (yytext);
3f2d73f1 252 id_loc = *loc;
b9f1d9a4 253 bracketed_id_str = NULL;
3f2d73f1 254 BEGIN SC_AFTER_IDENTIFIER;
e9955c83
AD
255 }
256
d8d3f94a 257 {int} {
1452af69
PE
258 val->integer = scan_integer (yytext, 10, *loc);
259 return INT;
260 }
261 0[xX][0-9abcdefABCDEF]+ {
262 val->integer = scan_integer (yytext, 16, *loc);
d8d3f94a
PE
263 return INT;
264 }
e9955c83 265
84a1cb5a
AD
266 /* Identifiers may not start with a digit. Yet, don't silently
267 accept "1FOO" as "1 FOO". */
268 {int}{id} {
269 complain_at (*loc, _("invalid identifier: %s"), quote (yytext));
270 }
271
3208e3f4 272 /* Characters. */
e9690142 273 "'" token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
e9955c83
AD
274
275 /* Strings. */
e9690142 276 "\"" token_start = loc->start; BEGIN SC_ESCAPED_STRING;
e9955c83
AD
277
278 /* Prologue. */
3f2d73f1 279 "%{" code_start = loc->start; BEGIN SC_PROLOGUE;
e9955c83
AD
280
281 /* Code in between braces. */
3f2d73f1
PE
282 "{" {
283 STRING_GROW;
cb823b6f 284 nesting = 0;
3f2d73f1
PE
285 code_start = loc->start;
286 BEGIN SC_BRACED_CODE;
287 }
e9955c83 288
ca2a6d15
PH
289 /* Semantic predicate. */
290 "%?"[ \f\n\t\v]*"{" {
291 nesting = 0;
292 code_start = loc->start;
293 BEGIN SC_PREDICATE;
294 }
295
e9955c83 296 /* A type. */
cb823b6f
AD
297 "<*>" return TAG_ANY;
298 "<>" return TAG_NONE;
d8d3f94a 299 "<"{tag}">" {
223ff46e 300 obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
41141c56 301 STRING_FINISH;
223ff46e 302 val->uniqstr = uniqstr_new (last_string);
41141c56 303 STRING_FREE;
cb823b6f
AD
304 return TAG;
305 }
306 "<" {
307 nesting = 0;
308 token_start = loc->start;
309 BEGIN SC_TAG;
4cdb01db
AD
310 }
311
a706a1cc
PE
312 "%%" {
313 static int percent_percent_count;
e9955c83 314 if (++percent_percent_count == 2)
a2bc9dbc 315 BEGIN SC_EPILOGUE;
e9955c83
AD
316 return PERCENT_PERCENT;
317 }
318
b9f1d9a4
AR
319 "[" {
320 bracketed_id_str = NULL;
321 bracketed_id_start = loc->start;
322 bracketed_id_context_state = YY_START;
323 BEGIN SC_BRACKETED_ID;
324 }
325
a706a1cc 326 . {
41141c56 327 complain_at (*loc, _("invalid character: %s"), quote (yytext));
3f2d73f1 328 }
379f0ac8
PE
329
330 <<EOF>> {
331 loc->start = loc->end = scanner_cursor;
332 yyterminate ();
333 }
3f2d73f1
PE
334}
335
336
cb823b6f
AD
337 /*--------------------------------------------------------------.
338 | Supporting \0 complexifies our implementation for no expected |
339 | added value. |
340 `--------------------------------------------------------------*/
341
342<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING,SC_TAG>
343{
e9690142 344 \0 complain_at (*loc, _("invalid null character"));
cb823b6f
AD
345}
346
347
3f2d73f1
PE
348 /*-----------------------------------------------------------------.
349 | Scanning after an identifier, checking whether a colon is next. |
350 `-----------------------------------------------------------------*/
351
352<SC_AFTER_IDENTIFIER>
353{
b9f1d9a4 354 "[" {
872b52bc 355 if (bracketed_id_str)
b9f1d9a4 356 {
e9690142
JD
357 ROLLBACK_CURRENT_TOKEN;
358 BEGIN SC_RETURN_BRACKETED_ID;
359 *loc = id_loc;
360 return ID;
b9f1d9a4 361 }
872b52bc
AR
362 else
363 {
e9690142
JD
364 bracketed_id_start = loc->start;
365 bracketed_id_context_state = YY_START;
366 BEGIN SC_BRACKETED_ID;
872b52bc 367 }
b9f1d9a4 368 }
3f2d73f1 369 ":" {
b9f1d9a4 370 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
3f2d73f1 371 *loc = id_loc;
3f2d73f1
PE
372 return ID_COLON;
373 }
374 . {
b9f1d9a4
AR
375 ROLLBACK_CURRENT_TOKEN;
376 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
3f2d73f1 377 *loc = id_loc;
3f2d73f1
PE
378 return ID;
379 }
380 <<EOF>> {
b9f1d9a4 381 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
3f2d73f1 382 *loc = id_loc;
3f2d73f1 383 return ID;
e9955c83
AD
384 }
385}
386
b9f1d9a4
AR
387 /*--------------------------------.
388 | Scanning bracketed identifiers. |
389 `--------------------------------*/
390
391<SC_BRACKETED_ID>
392{
393 {id} {
872b52bc 394 if (bracketed_id_str)
b9f1d9a4 395 {
e9690142
JD
396 complain_at (*loc, _("unexpected identifier in bracketed name: %s"),
397 quote (yytext));
b9f1d9a4
AR
398 }
399 else
400 {
e9690142
JD
401 bracketed_id_str = uniqstr_new (yytext);
402 bracketed_id_loc = *loc;
b9f1d9a4
AR
403 }
404 }
405 "]" {
406 BEGIN bracketed_id_context_state;
407 if (bracketed_id_str)
408 {
e9690142
JD
409 if (INITIAL == bracketed_id_context_state)
410 {
411 val->uniqstr = bracketed_id_str;
412 bracketed_id_str = 0;
413 *loc = bracketed_id_loc;
414 return BRACKETED_ID;
415 }
b9f1d9a4
AR
416 }
417 else
872b52bc 418 complain_at (*loc, _("an identifier expected"));
b9f1d9a4
AR
419 }
420 . {
421 complain_at (*loc, _("invalid character in bracketed name: %s"),
e9690142 422 quote (yytext));
b9f1d9a4
AR
423 }
424 <<EOF>> {
425 BEGIN bracketed_id_context_state;
426 unexpected_eof (bracketed_id_start, "]");
427 }
428}
429
430<SC_RETURN_BRACKETED_ID>
431{
432 . {
433 ROLLBACK_CURRENT_TOKEN;
434 val->uniqstr = bracketed_id_str;
435 bracketed_id_str = 0;
436 *loc = bracketed_id_loc;
437 BEGIN INITIAL;
438 return BRACKETED_ID;
439 }
440}
441
e9955c83 442
d8d3f94a 443 /*---------------------------------------------------------------.
ae93e4e4 444 | Scanning a Yacc comment. The initial '/ *' is already eaten. |
d8d3f94a 445 `---------------------------------------------------------------*/
e9955c83 446
d8d3f94a 447<SC_YACC_COMMENT>
e9955c83 448{
3f2d73f1 449 "*/" BEGIN context_state;
e9690142 450 .|\n ;
aa418041 451 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
d8d3f94a
PE
452}
453
454
455 /*------------------------------------------------------------.
ae93e4e4 456 | Scanning a C comment. The initial '/ *' is already eaten. |
d8d3f94a
PE
457 `------------------------------------------------------------*/
458
459<SC_COMMENT>
460{
3f2d73f1 461 "*"{splice}"/" STRING_GROW; BEGIN context_state;
e9690142 462 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
e9955c83
AD
463}
464
465
d8d3f94a 466 /*--------------------------------------------------------------.
ae93e4e4 467 | Scanning a line comment. The initial '//' is already eaten. |
d8d3f94a
PE
468 `--------------------------------------------------------------*/
469
470<SC_LINE_COMMENT>
471{
e9690142
JD
472 "\n" STRING_GROW; BEGIN context_state;
473 {splice} STRING_GROW;
474 <<EOF>> BEGIN context_state;
d8d3f94a
PE
475}
476
477
4febdd96
PE
478 /*------------------------------------------------.
479 | Scanning a Bison string, including its escapes. |
480 | The initial quote is already eaten. |
481 `------------------------------------------------*/
e9955c83
AD
482
483<SC_ESCAPED_STRING>
484{
47aee066
JD
485 "\""|"\n" {
486 if (yytext[0] == '\n')
487 unexpected_newline (token_start, "\"");
488 STRING_FINISH;
489 loc->start = token_start;
490 val->chars = last_string;
491 BEGIN INITIAL;
492 return STRING;
493 }
494 <<EOF>> {
495 unexpected_eof (token_start, "\"");
41141c56 496 STRING_FINISH;
3f2d73f1 497 loc->start = token_start;
223ff46e 498 val->chars = last_string;
a706a1cc 499 BEGIN INITIAL;
e9955c83
AD
500 return STRING;
501 }
e9955c83
AD
502}
503
4febdd96
PE
504 /*----------------------------------------------------------.
505 | Scanning a Bison character literal, decoding its escapes. |
e9690142 506 | The initial quote is already eaten. |
4febdd96 507 `----------------------------------------------------------*/
e9955c83
AD
508
509<SC_ESCAPED_CHARACTER>
510{
47aee066 511 "'"|"\n" {
41141c56 512 STRING_FINISH;
3f2d73f1 513 loc->start = token_start;
dfaa4860 514 val->character = last_string[0];
3208e3f4
JD
515 {
516 /* FIXME: Eventually, make these errors. */
dfaa4860
JD
517 if (last_string[0] == '\0')
518 {
519 warn_at (*loc, _("empty character literal"));
520 /* '\0' seems dangerous even if we are about to complain. */
521 val->character = '\'';
522 }
523 else if (last_string[1] != '\0')
3208e3f4
JD
524 warn_at (*loc, _("extra characters in character literal"));
525 }
526 if (yytext[0] == '\n')
527 unexpected_newline (token_start, "'");
41141c56 528 STRING_FREE;
a706a1cc 529 BEGIN INITIAL;
58d7a1a1 530 return CHAR;
e9955c83 531 }
47aee066 532 <<EOF>> {
47aee066
JD
533 STRING_FINISH;
534 loc->start = token_start;
dfaa4860 535 val->character = last_string[0];
3208e3f4 536 {
3208e3f4 537 /* FIXME: Eventually, make these errors. */
dfaa4860
JD
538 if (last_string[0] == '\0')
539 {
540 warn_at (*loc, _("empty character literal"));
541 /* '\0' seems dangerous even if we are about to complain. */
542 val->character = '\'';
543 }
544 else if (last_string[1] != '\0')
3208e3f4 545 warn_at (*loc, _("extra characters in character literal"));
3208e3f4
JD
546 }
547 unexpected_eof (token_start, "'");
47aee066
JD
548 STRING_FREE;
549 BEGIN INITIAL;
550 return CHAR;
551 }
4febdd96 552}
a706a1cc 553
cb823b6f
AD
554 /*-----------------------------------------------------------.
555 | Scanning a Bison nested tag. The initial angle bracket is |
556 | already eaten. |
557 `-----------------------------------------------------------*/
558
559<SC_TAG>
4febdd96 560{
cb823b6f
AD
561 ">" {
562 --nesting;
563 if (nesting < 0)
564 {
565 STRING_FINISH;
566 loc->start = token_start;
567 val->uniqstr = uniqstr_new (last_string);
568 STRING_FREE;
569 BEGIN INITIAL;
570 return TAG;
571 }
572 STRING_GROW;
573 }
574
575 [^<>]+ STRING_GROW;
576 "<"+ STRING_GROW; nesting += yyleng;
e9955c83 577
cb823b6f
AD
578 <<EOF>> {
579 unexpected_eof (token_start, ">");
580 STRING_FINISH;
581 loc->start = token_start;
582 val->uniqstr = uniqstr_new (last_string);
583 STRING_FREE;
584 BEGIN INITIAL;
585 return TAG;
586 }
587}
e9955c83
AD
588
589 /*----------------------------.
590 | Decode escaped characters. |
591 `----------------------------*/
592
593<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
594{
d8d3f94a 595 \\[0-7]{1,3} {
4517da37 596 unsigned long int c = strtoul (yytext + 1, NULL, 8);
c2724603
JD
597 if (!c || UCHAR_MAX < c)
598 complain_at (*loc, _("invalid number after \\-escape: %s"),
599 yytext+1);
e9955c83 600 else
223ff46e 601 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
602 }
603
6b0d38ab 604 \\x[0-9abcdefABCDEF]+ {
4517da37
PE
605 verify (UCHAR_MAX < ULONG_MAX);
606 unsigned long int c = strtoul (yytext + 2, NULL, 16);
c2724603
JD
607 if (!c || UCHAR_MAX < c)
608 complain_at (*loc, _("invalid number after \\-escape: %s"),
609 yytext+1);
d8d3f94a 610 else
223ff46e 611 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
612 }
613
e9690142
JD
614 \\a obstack_1grow (&obstack_for_string, '\a');
615 \\b obstack_1grow (&obstack_for_string, '\b');
616 \\f obstack_1grow (&obstack_for_string, '\f');
617 \\n obstack_1grow (&obstack_for_string, '\n');
618 \\r obstack_1grow (&obstack_for_string, '\r');
619 \\t obstack_1grow (&obstack_for_string, '\t');
620 \\v obstack_1grow (&obstack_for_string, '\v');
412f8a59
PE
621
622 /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
223ff46e 623 \\("\""|"'"|"?"|"\\") obstack_1grow (&obstack_for_string, yytext[1]);
412f8a59 624
6b0d38ab 625 \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
d8d3f94a 626 int c = convert_ucn_to_byte (yytext);
c2724603
JD
627 if (c <= 0)
628 complain_at (*loc, _("invalid number after \\-escape: %s"),
629 yytext+1);
d8d3f94a 630 else
223ff46e 631 obstack_1grow (&obstack_for_string, c);
d8d3f94a 632 }
e9690142 633 \\(.|\n) {
c2724603 634 char const *p = yytext + 1;
e6c849d8 635 /* Quote only if escaping won't make the character visible. */
4413bbd3 636 if (isspace ((unsigned char) *p) && isprint ((unsigned char) *p))
e6c849d8 637 p = quote (p);
c2724603
JD
638 else
639 p = quotearg_style_mem (escape_quoting_style, p, 1);
640 complain_at (*loc, _("invalid character after \\-escape: %s"), p);
e9955c83
AD
641 }
642}
643
4febdd96
PE
644 /*--------------------------------------------.
645 | Scanning user-code characters and strings. |
646 `--------------------------------------------*/
e9955c83 647
4febdd96
PE
648<SC_CHARACTER,SC_STRING>
649{
e9690142 650 {splice}|\\{splice}[^\n\[\]] STRING_GROW;
4febdd96 651}
e9955c83
AD
652
653<SC_CHARACTER>
654{
e9690142
JD
655 "'" STRING_GROW; BEGIN context_state;
656 \n unexpected_newline (token_start, "'"); BEGIN context_state;
657 <<EOF>> unexpected_eof (token_start, "'"); BEGIN context_state;
e9955c83
AD
658}
659
e9955c83
AD
660<SC_STRING>
661{
e9690142
JD
662 "\"" STRING_GROW; BEGIN context_state;
663 \n unexpected_newline (token_start, "\""); BEGIN context_state;
664 <<EOF>> unexpected_eof (token_start, "\""); BEGIN context_state;
e9955c83
AD
665}
666
667
668 /*---------------------------------------------------.
669 | Strings, comments etc. can be found in user code. |
670 `---------------------------------------------------*/
671
ca2a6d15 672<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE,SC_PREDICATE>
e9955c83 673{
3f2d73f1
PE
674 "'" {
675 STRING_GROW;
676 context_state = YY_START;
677 token_start = loc->start;
678 BEGIN SC_CHARACTER;
679 }
680 "\"" {
681 STRING_GROW;
682 context_state = YY_START;
683 token_start = loc->start;
684 BEGIN SC_STRING;
685 }
686 "/"{splice}"*" {
687 STRING_GROW;
688 context_state = YY_START;
689 token_start = loc->start;
690 BEGIN SC_COMMENT;
691 }
692 "/"{splice}"/" {
693 STRING_GROW;
694 context_state = YY_START;
695 BEGIN SC_LINE_COMMENT;
696 }
e9955c83
AD
697}
698
699
624a35e2 700
58d7a1a1 701 /*-----------------------------------------------------------.
ca2a6d15
PH
702 | Scanning some code in braces (actions, predicates). The |
703 | initial "{" is already eaten. |
58d7a1a1 704 `-----------------------------------------------------------*/
e9955c83 705
ca2a6d15 706<SC_BRACED_CODE,SC_PREDICATE>
e9955c83 707{
cb823b6f
AD
708 "{"|"<"{splice}"%" STRING_GROW; nesting++;
709 "%"{splice}">" STRING_GROW; nesting--;
ca2a6d15 710
ae93e4e4
JM
711 /* Tokenize '<<%' correctly (as '<<' '%') rather than incorrrectly
712 (as '<' '<%'). */
ca2a6d15
PH
713 "<"{splice}"<" STRING_GROW;
714
715 <<EOF>> {
716 int token = (YY_START == SC_BRACED_CODE) ? BRACED_CODE : BRACED_PREDICATE;
717 unexpected_eof (code_start, "}");
718 STRING_FINISH;
719 loc->start = code_start;
720 val->code = last_string;
721 BEGIN INITIAL;
722 return token;
723 }
724}
725
726<SC_BRACED_CODE>
727{
e9955c83 728 "}" {
25522739
PE
729 obstack_1grow (&obstack_for_string, '}');
730
cb823b6f
AD
731 --nesting;
732 if (nesting < 0)
e9955c83 733 {
e9690142
JD
734 STRING_FINISH;
735 loc->start = code_start;
736 val->code = last_string;
737 BEGIN INITIAL;
738 return BRACED_CODE;
e9955c83
AD
739 }
740 }
ca2a6d15 741}
e9955c83 742
ca2a6d15
PH
743<SC_PREDICATE>
744{
745 "}" {
746 --nesting;
747 if (nesting < 0)
748 {
e9690142
JD
749 STRING_FINISH;
750 loc->start = code_start;
751 val->code = last_string;
752 BEGIN INITIAL;
753 return BRACED_PREDICATE;
ca2a6d15
PH
754 }
755 else
756 obstack_1grow (&obstack_for_string, '}');
47aee066 757 }
e9955c83
AD
758}
759
e9955c83
AD
760 /*--------------------------------------------------------------.
761 | Scanning some prologue: from "%{" (already scanned) to "%}". |
762 `--------------------------------------------------------------*/
763
764<SC_PROLOGUE>
765{
766 "%}" {
41141c56 767 STRING_FINISH;
3f2d73f1 768 loc->start = code_start;
223ff46e 769 val->chars = last_string;
a706a1cc 770 BEGIN INITIAL;
e9955c83
AD
771 return PROLOGUE;
772 }
773
47aee066
JD
774 <<EOF>> {
775 unexpected_eof (code_start, "%}");
776 STRING_FINISH;
777 loc->start = code_start;
778 val->chars = last_string;
779 BEGIN INITIAL;
780 return PROLOGUE;
781 }
e9955c83
AD
782}
783
784
785 /*---------------------------------------------------------------.
786 | Scanning the epilogue (everything after the second "%%", which |
d8d3f94a 787 | has already been eaten). |
e9955c83
AD
788 `---------------------------------------------------------------*/
789
790<SC_EPILOGUE>
791{
e9955c83 792 <<EOF>> {
41141c56 793 STRING_FINISH;
3f2d73f1 794 loc->start = code_start;
223ff46e 795 val->chars = last_string;
a706a1cc 796 BEGIN INITIAL;
e9955c83
AD
797 return EPILOGUE;
798 }
799}
800
801
4febdd96
PE
802 /*-----------------------------------------------------.
803 | By default, grow the string obstack with the input. |
804 `-----------------------------------------------------*/
805
e9690142
JD
806<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PREDICATE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>. |
807 <SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PREDICATE,SC_PROLOGUE,SC_EPILOGUE>\n STRING_GROW;
4febdd96 808
e9955c83
AD
809%%
810
6c30d641
PE
811/* Read bytes from FP into buffer BUF of size SIZE. Return the
812 number of bytes read. Remove '\r' from input, treating \r\n
813 and isolated \r as \n. */
814
815static size_t
816no_cr_read (FILE *fp, char *buf, size_t size)
817{
a737b216
PE
818 size_t bytes_read = fread (buf, 1, size, fp);
819 if (bytes_read)
6c30d641 820 {
a737b216 821 char *w = memchr (buf, '\r', bytes_read);
6c30d641 822 if (w)
e9690142
JD
823 {
824 char const *r = ++w;
825 char const *lim = buf + bytes_read;
826
827 for (;;)
828 {
829 /* Found an '\r'. Treat it like '\n', but ignore any
830 '\n' that immediately follows. */
831 w[-1] = '\n';
832 if (r == lim)
833 {
834 int ch = getc (fp);
835 if (ch != '\n' && ungetc (ch, fp) != ch)
836 break;
837 }
838 else if (*r == '\n')
839 r++;
840
841 /* Copy until the next '\r'. */
842 do
843 {
844 if (r == lim)
845 return w - buf;
846 }
847 while ((*w++ = *r++) != '\r');
848 }
849
850 return w - buf;
851 }
6c30d641
PE
852 }
853
a737b216 854 return bytes_read;
6c30d641
PE
855}
856
857
f25bfb75 858
1452af69
PE
859/*------------------------------------------------------.
860| Scan NUMBER for a base-BASE integer at location LOC. |
861`------------------------------------------------------*/
862
863static unsigned long int
864scan_integer (char const *number, int base, location loc)
865{
4517da37
PE
866 verify (INT_MAX < ULONG_MAX);
867 unsigned long int num = strtoul (number, NULL, base);
868
869 if (INT_MAX < num)
1452af69
PE
870 {
871 complain_at (loc, _("integer out of range: %s"), quote (number));
872 num = INT_MAX;
873 }
4517da37 874
1452af69
PE
875 return num;
876}
877
878
d8d3f94a
PE
879/*------------------------------------------------------------------.
880| Convert universal character name UCN to a single-byte character, |
881| and return that character. Return -1 if UCN does not correspond |
e9690142 882| to a single-byte character. |
d8d3f94a
PE
883`------------------------------------------------------------------*/
884
885static int
886convert_ucn_to_byte (char const *ucn)
887{
4517da37
PE
888 verify (UCHAR_MAX <= INT_MAX);
889 unsigned long int code = strtoul (ucn + 2, NULL, 16);
d8d3f94a
PE
890
891 /* FIXME: Currently we assume Unicode-compatible unibyte characters
892 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
893 non-ASCII hosts we support only the portable C character set.
894 These limitations should be removed once we add support for
895 multibyte characters. */
896
897 if (UCHAR_MAX < code)
898 return -1;
899
900#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
901 {
902 /* A non-ASCII host. Use CODE to index into a table of the C
903 basic execution character set, which is guaranteed to exist on
904 all Standard C platforms. This table also includes '$', '@',
8e6ef483 905 and '`', which are not in the basic execution character set but
d8d3f94a
PE
906 which are unibyte characters on all the platforms that we know
907 about. */
908 static signed char const table[] =
909 {
e9690142
JD
910 '\0', -1, -1, -1, -1, -1, -1, '\a',
911 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
912 -1, -1, -1, -1, -1, -1, -1, -1,
913 -1, -1, -1, -1, -1, -1, -1, -1,
914 ' ', '!', '"', '#', '$', '%', '&', '\'',
915 '(', ')', '*', '+', ',', '-', '.', '/',
916 '0', '1', '2', '3', '4', '5', '6', '7',
917 '8', '9', ':', ';', '<', '=', '>', '?',
918 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
919 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
920 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
921 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
922 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
923 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
924 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
925 'x', 'y', 'z', '{', '|', '}', '~'
d8d3f94a
PE
926 };
927
928 code = code < sizeof table ? table[code] : -1;
929 }
930#endif
c4d720cd 931
d8d3f94a
PE
932 return code;
933}
934
935
900c5db5 936/*----------------------------------------------------------------.
ae93e4e4 937| Handle '#line INT "FILE"'. ARGS has already skipped '#line '. |
900c5db5
AD
938`----------------------------------------------------------------*/
939
940static void
4517da37 941handle_syncline (char *args, location loc)
900c5db5 942{
4517da37
PE
943 char *after_num;
944 unsigned long int lineno = strtoul (args, &after_num, 10);
ba60c395
JD
945 char *file = mbschr (after_num, '"') + 1;
946 *mbschr (file, '"') = '\0';
4517da37
PE
947 if (INT_MAX <= lineno)
948 {
949 warn_at (loc, _("line number overflow"));
950 lineno = INT_MAX;
951 }
e9071366 952 current_file = uniqstr_new (file);
0c8e079f 953 boundary_set (&scanner_cursor, current_file, lineno, 1);
4517da37
PE
954}
955
956
4febdd96
PE
957/*----------------------------------------------------------------.
958| For a token or comment starting at START, report message MSGID, |
e9690142
JD
959| which should say that an end marker was found before |
960| the expected TOKEN_END. |
4febdd96
PE
961`----------------------------------------------------------------*/
962
963static void
964unexpected_end (boundary start, char const *msgid, char const *token_end)
965{
966 location loc;
967 loc.start = start;
968 loc.end = scanner_cursor;
4a9cd8f2
AD
969 token_end = quote (token_end);
970 // Instead of '\'', display "'".
971 if (STREQ (token_end, "'\\''", '\'', '\\', '\'', '\'', 0,0,0,0,0))
972 token_end = "\"'\"";
4febdd96
PE
973 complain_at (loc, _(msgid), token_end);
974}
975
976
3f2d73f1
PE
977/*------------------------------------------------------------------------.
978| Report an unexpected EOF in a token or comment starting at START. |
979| An end of file was encountered and the expected TOKEN_END was missing. |
3f2d73f1 980`------------------------------------------------------------------------*/
a706a1cc
PE
981
982static void
aa418041 983unexpected_eof (boundary start, char const *token_end)
a706a1cc 984{
4a9cd8f2 985 unexpected_end (start, N_("missing %s at end of file"), token_end);
4febdd96
PE
986}
987
988
989/*----------------------------------------.
990| Likewise, but for unexpected newlines. |
991`----------------------------------------*/
992
993static void
994unexpected_newline (boundary start, char const *token_end)
995{
4a9cd8f2 996 unexpected_end (start, N_("missing %s at end of line"), token_end);
a706a1cc
PE
997}
998
999
f25bfb75
AD
1000/*-------------------------.
1001| Initialize the scanner. |
1002`-------------------------*/
1003
1d6412ad 1004void
e9071366 1005gram_scanner_initialize (void)
1d6412ad 1006{
223ff46e 1007 obstack_init (&obstack_for_string);
1d6412ad
AD
1008}
1009
1010
f25bfb75
AD
1011/*-----------------------------------------------.
1012| Free all the memory allocated to the scanner. |
1013`-----------------------------------------------*/
1014
4cdb01db 1015void
e9071366 1016gram_scanner_free (void)
4cdb01db 1017{
223ff46e 1018 obstack_free (&obstack_for_string, 0);
536545f3 1019 /* Reclaim Flex's buffers. */
580b8926 1020 yylex_destroy ();
4cdb01db 1021}