]> git.saurik.com Git - bison.git/blame - src/scan-gram.l
maint: run "make update-copyright".
[bison.git] / src / scan-gram.l
CommitLineData
e9955c83 1/* Bison Grammar Scanner -*- C -*-
3b1e470c 2
575619af 3 Copyright (C) 2002-2011 Free Software Foundation, Inc.
e9955c83
AD
4
5 This file is part of Bison, the GNU Compiler Compiler.
6
f16b0819 7 This program is free software: you can redistribute it and/or modify
e9955c83 8 it under the terms of the GNU General Public License as published by
f16b0819 9 the Free Software Foundation, either version 3 of the License, or
e9955c83
AD
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
f16b0819 18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
e9955c83 19
8d90395d 20%option debug nodefault noinput nounput noyywrap never-interactive
e9955c83
AD
21%option prefix="gram_" outfile="lex.yy.c"
22
23%{
4f6e011e
PE
24/* Work around a bug in flex 2.5.31. See Debian bug 333231
25 <http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=333231>. */
26#undef gram_wrap
27#define gram_wrap() 1
28
e9071366 29#define FLEX_PREFIX(Id) gram_ ## Id
0305d25e 30#include <src/flex-scanner.h>
223ff46e 31
0305d25e
AD
32#include <src/complain.h>
33#include <src/files.h>
34#include <src/gram.h>
35#include <quotearg.h>
36#include <src/reader.h>
37#include <src/uniqstr.h>
e9955c83 38
c2724603 39#include <ctype.h>
e9071366
AD
40#include <mbswidth.h>
41#include <quote.h>
42
0305d25e 43#include <src/scan-gram.h>
e9071366
AD
44
45#define YY_DECL GRAM_LEX_DECL
2346344a 46
3f2d73f1 47#define YY_USER_INIT \
e9071366 48 code_start = scanner_cursor = loc->start; \
dc9701e8 49
3f2d73f1 50/* Location of scanner cursor. */
4a678af8 51static boundary scanner_cursor;
41141c56 52
e9071366 53#define YY_USER_ACTION location_compute (loc, &scanner_cursor, yytext, yyleng);
d8d3f94a 54
6c30d641 55static size_t no_cr_read (FILE *, char *, size_t);
d8d3f94a
PE
56#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
57
a7706735
AD
58#define RETURN_PERCENT_PARAM(Value) \
59 RETURN_VALUE(PERCENT_PARAM, param, param_ ## Value)
60
61#define RETURN_PERCENT_FLAG(Value) \
62 RETURN_VALUE(PERCENT_FLAG, uniqstr, uniqstr_new (Value))
63
64#define RETURN_VALUE(Token, Field, Value) \
ba061fa6 65 do { \
a7706735
AD
66 val->Field = Value; \
67 return Token; \
ba061fa6
AD
68 } while (0)
69
b9f1d9a4
AR
70#define ROLLBACK_CURRENT_TOKEN \
71 do { \
72 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0); \
73 yyless (0); \
74 } while (0)
ba061fa6 75
7ec2d4cd 76/* A string representing the most recently saved token. */
7c0c6181 77static char *last_string;
7ec2d4cd 78
872b52bc 79/* Bracketed identifier. */
b9f1d9a4
AR
80static uniqstr bracketed_id_str = 0;
81static location bracketed_id_loc;
82static boundary bracketed_id_start;
83static int bracketed_id_context_state = 0;
84
7ec2d4cd 85void
e9071366 86gram_scanner_last_string_free (void)
7ec2d4cd 87{
41141c56 88 STRING_FREE;
7ec2d4cd 89}
e9955c83 90
4517da37 91static void handle_syncline (char *, location);
1452af69 92static unsigned long int scan_integer (char const *p, int base, location loc);
d8d3f94a 93static int convert_ucn_to_byte (char const *hex_text);
aa418041 94static void unexpected_eof (boundary, char const *);
4febdd96 95static void unexpected_newline (boundary, char const *);
e9955c83
AD
96
97%}
e9071366
AD
98 /* A C-like comment in directives/rules. */
99%x SC_YACC_COMMENT
100 /* Strings and characters in directives/rules. */
e9955c83 101%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
e9071366
AD
102 /* A identifier was just read in directives/rules. Special state
103 to capture the sequence `identifier :'. */
104%x SC_AFTER_IDENTIFIER
cb823b6f
AD
105 /* A complex tag, with nested angles brackets. */
106%x SC_TAG
e9071366 107
ca2a6d15 108 /* Four types of user code:
e9071366
AD
109 - prologue (code between `%{' `%}' in the first section, before %%);
110 - actions, printers, union, etc, (between braced in the middle section);
ca2a6d15
PH
111 - epilogue (everything after the second %%).
112 - predicate (code between `%?{' and `{' in middle section); */
113%x SC_PROLOGUE SC_BRACED_CODE SC_EPILOGUE SC_PREDICATE
e9071366
AD
114 /* C and C++ comments in code. */
115%x SC_COMMENT SC_LINE_COMMENT
116 /* Strings and characters in code. */
117%x SC_STRING SC_CHARACTER
872b52bc 118 /* Bracketed identifiers support. */
b9f1d9a4 119%x SC_BRACKETED_ID SC_RETURN_BRACKETED_ID
e9955c83 120
cdf3f113
AD
121letter [-.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
122id {letter}({letter}|[0-9])*
4f646c37 123directive %{id}
624a35e2 124int [0-9]+
d8d3f94a
PE
125
126/* POSIX says that a tag must be both an id and a C union member, but
127 historically almost any character is allowed in a tag. We disallow
cb823b6f
AD
128 NUL, as this simplifies our implementation. We disallow angle
129 bracket to match them in nested pairs: several languages use them
130 for generics/template types. */
131tag [^\0<>]+
d8d3f94a
PE
132
133/* Zero or more instances of backslash-newline. Following GCC, allow
134 white space between the backslash and the newline. */
135splice (\\[ \f\t\v]*\n)*
e9955c83
AD
136
137%%
138%{
cb823b6f
AD
139 /* Nesting level. Either for nested braces, or nested angle brackets
140 (but not mixed). */
141 int nesting IF_LINT (= 0);
1a9e39f1 142
3f2d73f1 143 /* Parent context state, when applicable. */
5362ed19 144 int context_state IF_LINT (= 0);
a706a1cc 145
3f2d73f1 146 /* Location of most recent identifier, when applicable. */
a2bc9dbc 147 location id_loc IF_LINT (= empty_location);
3f2d73f1 148
a2bc9dbc
PE
149 /* Where containing code started, when applicable. Its initial
150 value is relevant only when yylex is invoked in the SC_EPILOGUE
151 start condition. */
152 boundary code_start = scanner_cursor;
3f2d73f1 153
223ff46e
PE
154 /* Where containing comment or string or character literal started,
155 when applicable. */
a2bc9dbc 156 boundary token_start IF_LINT (= scanner_cursor);
e9955c83
AD
157%}
158
159
3f2d73f1
PE
160 /*-----------------------.
161 | Scanning white space. |
162 `-----------------------*/
163
b9f1d9a4 164<INITIAL,SC_AFTER_IDENTIFIER,SC_BRACKETED_ID,SC_RETURN_BRACKETED_ID>
3f2d73f1 165{
4febdd96 166 /* Comments and white space. */
83adb046 167 "," warn_at (*loc, _("stray `,' treated as white space"));
4febdd96 168 [ \f\n\t\v] |
3f2d73f1 169 "//".* ;
83adb046
PE
170 "/*" {
171 token_start = loc->start;
172 context_state = YY_START;
173 BEGIN SC_YACC_COMMENT;
174 }
3f2d73f1
PE
175
176 /* #line directives are not documented, and may be withdrawn or
177 modified in future versions of Bison. */
178 ^"#line "{int}" \"".*"\"\n" {
4517da37 179 handle_syncline (yytext + sizeof "#line " - 1, *loc);
3f2d73f1
PE
180 }
181}
182
183
e9955c83
AD
184 /*----------------------------.
185 | Scanning Bison directives. |
186 `----------------------------*/
a7c09cba
DJ
187
188 /* For directives that are also command line options, the regex must be
189 "%..."
190 after "[-_]"s are removed, and the directive must match the --long
191 option name, with a single string argument. Otherwise, add exceptions
192 to ../build-aux/cross-options.pl. */
193
e9955c83
AD
194<INITIAL>
195{
deef2a0a 196 "%binary" return PERCENT_NONASSOC;
136a0f76 197 "%code" return PERCENT_CODE;
fa819509 198 "%debug" RETURN_PERCENT_FLAG("parse.trace");
deef2a0a
AD
199 "%default"[-_]"prec" return PERCENT_DEFAULT_PREC;
200 "%define" return PERCENT_DEFINE;
201 "%defines" return PERCENT_DEFINES;
202 "%destructor" return PERCENT_DESTRUCTOR;
203 "%dprec" return PERCENT_DPREC;
31b850d2 204 "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
deef2a0a
AD
205 "%expect" return PERCENT_EXPECT;
206 "%expect"[-_]"rr" return PERCENT_EXPECT_RR;
207 "%file-prefix" return PERCENT_FILE_PREFIX;
e9955c83 208 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
deef2a0a
AD
209 "%initial-action" return PERCENT_INITIAL_ACTION;
210 "%glr-parser" return PERCENT_GLR_PARSER;
211 "%language" return PERCENT_LANGUAGE;
212 "%left" return PERCENT_LEFT;
a7706735 213 "%lex-param" RETURN_PERCENT_PARAM(lex);
bc0f5737 214 "%locations" RETURN_PERCENT_FLAG("locations");
deef2a0a
AD
215 "%merge" return PERCENT_MERGE;
216 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
217 "%no"[-_]"default"[-_]"prec" return PERCENT_NO_DEFAULT_PREC;
218 "%no"[-_]"lines" return PERCENT_NO_LINES;
219 "%nonassoc" return PERCENT_NONASSOC;
220 "%nondeterministic-parser" return PERCENT_NONDETERMINISTIC_PARSER;
221 "%nterm" return PERCENT_NTERM;
222 "%output" return PERCENT_OUTPUT;
a7706735
AD
223 "%param" RETURN_PERCENT_PARAM(both);
224 "%parse-param" RETURN_PERCENT_PARAM(parse);
deef2a0a 225 "%prec" return PERCENT_PREC;
d78f0ac9 226 "%precedence" return PERCENT_PRECEDENCE;
deef2a0a 227 "%printer" return PERCENT_PRINTER;
4920ae8b 228 "%pure"[-_]"parser" RETURN_PERCENT_FLAG("api.pure");
deef2a0a
AD
229 "%require" return PERCENT_REQUIRE;
230 "%right" return PERCENT_RIGHT;
231 "%skeleton" return PERCENT_SKELETON;
232 "%start" return PERCENT_START;
233 "%term" return PERCENT_TOKEN;
234 "%token" return PERCENT_TOKEN;
235 "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
236 "%type" return PERCENT_TYPE;
237 "%union" return PERCENT_UNION;
238 "%verbose" return PERCENT_VERBOSE;
239 "%yacc" return PERCENT_YACC;
e9955c83 240
3f2d73f1 241 {directive} {
41141c56 242 complain_at (*loc, _("invalid directive: %s"), quote (yytext));
412f8a59 243 }
900c5db5 244
e9955c83 245 "=" return EQUAL;
e9071366 246 "|" return PIPE;
e9955c83
AD
247 ";" return SEMICOLON;
248
3f2d73f1 249 {id} {
58d7a1a1 250 val->uniqstr = uniqstr_new (yytext);
3f2d73f1 251 id_loc = *loc;
b9f1d9a4 252 bracketed_id_str = NULL;
3f2d73f1 253 BEGIN SC_AFTER_IDENTIFIER;
e9955c83
AD
254 }
255
d8d3f94a 256 {int} {
1452af69
PE
257 val->integer = scan_integer (yytext, 10, *loc);
258 return INT;
259 }
260 0[xX][0-9abcdefABCDEF]+ {
261 val->integer = scan_integer (yytext, 16, *loc);
d8d3f94a
PE
262 return INT;
263 }
e9955c83 264
84a1cb5a
AD
265 /* Identifiers may not start with a digit. Yet, don't silently
266 accept "1FOO" as "1 FOO". */
267 {int}{id} {
268 complain_at (*loc, _("invalid identifier: %s"), quote (yytext));
269 }
270
3208e3f4 271 /* Characters. */
dfaa4860 272 "'" token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
e9955c83
AD
273
274 /* Strings. */
ca407bdf 275 "\"" token_start = loc->start; BEGIN SC_ESCAPED_STRING;
e9955c83
AD
276
277 /* Prologue. */
3f2d73f1 278 "%{" code_start = loc->start; BEGIN SC_PROLOGUE;
e9955c83
AD
279
280 /* Code in between braces. */
3f2d73f1
PE
281 "{" {
282 STRING_GROW;
cb823b6f 283 nesting = 0;
3f2d73f1
PE
284 code_start = loc->start;
285 BEGIN SC_BRACED_CODE;
286 }
e9955c83 287
ca2a6d15
PH
288 /* Semantic predicate. */
289 "%?"[ \f\n\t\v]*"{" {
290 nesting = 0;
291 code_start = loc->start;
292 BEGIN SC_PREDICATE;
293 }
294
e9955c83 295 /* A type. */
cb823b6f
AD
296 "<*>" return TAG_ANY;
297 "<>" return TAG_NONE;
d8d3f94a 298 "<"{tag}">" {
223ff46e 299 obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
41141c56 300 STRING_FINISH;
223ff46e 301 val->uniqstr = uniqstr_new (last_string);
41141c56 302 STRING_FREE;
cb823b6f
AD
303 return TAG;
304 }
305 "<" {
306 nesting = 0;
307 token_start = loc->start;
308 BEGIN SC_TAG;
4cdb01db
AD
309 }
310
a706a1cc
PE
311 "%%" {
312 static int percent_percent_count;
e9955c83 313 if (++percent_percent_count == 2)
a2bc9dbc 314 BEGIN SC_EPILOGUE;
e9955c83
AD
315 return PERCENT_PERCENT;
316 }
317
b9f1d9a4
AR
318 "[" {
319 bracketed_id_str = NULL;
320 bracketed_id_start = loc->start;
321 bracketed_id_context_state = YY_START;
322 BEGIN SC_BRACKETED_ID;
323 }
324
a706a1cc 325 . {
41141c56 326 complain_at (*loc, _("invalid character: %s"), quote (yytext));
3f2d73f1 327 }
379f0ac8
PE
328
329 <<EOF>> {
330 loc->start = loc->end = scanner_cursor;
331 yyterminate ();
332 }
3f2d73f1
PE
333}
334
335
cb823b6f
AD
336 /*--------------------------------------------------------------.
337 | Supporting \0 complexifies our implementation for no expected |
338 | added value. |
339 `--------------------------------------------------------------*/
340
341<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING,SC_TAG>
342{
343 \0 complain_at (*loc, _("invalid null character"));
344}
345
346
3f2d73f1
PE
347 /*-----------------------------------------------------------------.
348 | Scanning after an identifier, checking whether a colon is next. |
349 `-----------------------------------------------------------------*/
350
351<SC_AFTER_IDENTIFIER>
352{
b9f1d9a4 353 "[" {
872b52bc 354 if (bracketed_id_str)
b9f1d9a4
AR
355 {
356 ROLLBACK_CURRENT_TOKEN;
357 BEGIN SC_RETURN_BRACKETED_ID;
358 *loc = id_loc;
359 return ID;
360 }
872b52bc
AR
361 else
362 {
363 bracketed_id_start = loc->start;
364 bracketed_id_context_state = YY_START;
365 BEGIN SC_BRACKETED_ID;
366 }
b9f1d9a4 367 }
3f2d73f1 368 ":" {
b9f1d9a4 369 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
3f2d73f1 370 *loc = id_loc;
3f2d73f1
PE
371 return ID_COLON;
372 }
373 . {
b9f1d9a4
AR
374 ROLLBACK_CURRENT_TOKEN;
375 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
3f2d73f1 376 *loc = id_loc;
3f2d73f1
PE
377 return ID;
378 }
379 <<EOF>> {
b9f1d9a4 380 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
3f2d73f1 381 *loc = id_loc;
3f2d73f1 382 return ID;
e9955c83
AD
383 }
384}
385
b9f1d9a4
AR
386 /*--------------------------------.
387 | Scanning bracketed identifiers. |
388 `--------------------------------*/
389
390<SC_BRACKETED_ID>
391{
392 {id} {
872b52bc 393 if (bracketed_id_str)
b9f1d9a4 394 {
872b52bc
AR
395 complain_at (*loc, _("unexpected identifier in bracketed name: %s"),
396 quote (yytext));
b9f1d9a4
AR
397 }
398 else
399 {
872b52bc
AR
400 bracketed_id_str = uniqstr_new (yytext);
401 bracketed_id_loc = *loc;
b9f1d9a4
AR
402 }
403 }
404 "]" {
405 BEGIN bracketed_id_context_state;
406 if (bracketed_id_str)
407 {
408 if (INITIAL == bracketed_id_context_state)
409 {
410 val->uniqstr = bracketed_id_str;
411 bracketed_id_str = 0;
412 *loc = bracketed_id_loc;
413 return BRACKETED_ID;
414 }
415 }
416 else
872b52bc 417 complain_at (*loc, _("an identifier expected"));
b9f1d9a4
AR
418 }
419 . {
420 complain_at (*loc, _("invalid character in bracketed name: %s"),
421 quote (yytext));
422 }
423 <<EOF>> {
424 BEGIN bracketed_id_context_state;
425 unexpected_eof (bracketed_id_start, "]");
426 }
427}
428
429<SC_RETURN_BRACKETED_ID>
430{
431 . {
432 ROLLBACK_CURRENT_TOKEN;
433 val->uniqstr = bracketed_id_str;
434 bracketed_id_str = 0;
435 *loc = bracketed_id_loc;
436 BEGIN INITIAL;
437 return BRACKETED_ID;
438 }
439}
440
e9955c83 441
d8d3f94a
PE
442 /*---------------------------------------------------------------.
443 | Scanning a Yacc comment. The initial `/ *' is already eaten. |
444 `---------------------------------------------------------------*/
e9955c83 445
d8d3f94a 446<SC_YACC_COMMENT>
e9955c83 447{
3f2d73f1 448 "*/" BEGIN context_state;
a706a1cc 449 .|\n ;
aa418041 450 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
d8d3f94a
PE
451}
452
453
454 /*------------------------------------------------------------.
455 | Scanning a C comment. The initial `/ *' is already eaten. |
456 `------------------------------------------------------------*/
457
458<SC_COMMENT>
459{
3f2d73f1 460 "*"{splice}"/" STRING_GROW; BEGIN context_state;
aa418041 461 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
e9955c83
AD
462}
463
464
d8d3f94a
PE
465 /*--------------------------------------------------------------.
466 | Scanning a line comment. The initial `//' is already eaten. |
467 `--------------------------------------------------------------*/
468
469<SC_LINE_COMMENT>
470{
3f2d73f1 471 "\n" STRING_GROW; BEGIN context_state;
41141c56 472 {splice} STRING_GROW;
3f2d73f1 473 <<EOF>> BEGIN context_state;
d8d3f94a
PE
474}
475
476
4febdd96
PE
477 /*------------------------------------------------.
478 | Scanning a Bison string, including its escapes. |
479 | The initial quote is already eaten. |
480 `------------------------------------------------*/
e9955c83
AD
481
482<SC_ESCAPED_STRING>
483{
47aee066
JD
484 "\""|"\n" {
485 if (yytext[0] == '\n')
486 unexpected_newline (token_start, "\"");
487 STRING_FINISH;
488 loc->start = token_start;
489 val->chars = last_string;
490 BEGIN INITIAL;
491 return STRING;
492 }
493 <<EOF>> {
494 unexpected_eof (token_start, "\"");
41141c56 495 STRING_FINISH;
3f2d73f1 496 loc->start = token_start;
223ff46e 497 val->chars = last_string;
a706a1cc 498 BEGIN INITIAL;
e9955c83
AD
499 return STRING;
500 }
e9955c83
AD
501}
502
4febdd96
PE
503 /*----------------------------------------------------------.
504 | Scanning a Bison character literal, decoding its escapes. |
505 | The initial quote is already eaten. |
506 `----------------------------------------------------------*/
e9955c83
AD
507
508<SC_ESCAPED_CHARACTER>
509{
47aee066 510 "'"|"\n" {
41141c56 511 STRING_FINISH;
3f2d73f1 512 loc->start = token_start;
dfaa4860 513 val->character = last_string[0];
3208e3f4
JD
514 {
515 /* FIXME: Eventually, make these errors. */
dfaa4860
JD
516 if (last_string[0] == '\0')
517 {
518 warn_at (*loc, _("empty character literal"));
519 /* '\0' seems dangerous even if we are about to complain. */
520 val->character = '\'';
521 }
522 else if (last_string[1] != '\0')
3208e3f4
JD
523 warn_at (*loc, _("extra characters in character literal"));
524 }
525 if (yytext[0] == '\n')
526 unexpected_newline (token_start, "'");
41141c56 527 STRING_FREE;
a706a1cc 528 BEGIN INITIAL;
58d7a1a1 529 return CHAR;
e9955c83 530 }
47aee066 531 <<EOF>> {
47aee066
JD
532 STRING_FINISH;
533 loc->start = token_start;
dfaa4860 534 val->character = last_string[0];
3208e3f4 535 {
3208e3f4 536 /* FIXME: Eventually, make these errors. */
dfaa4860
JD
537 if (last_string[0] == '\0')
538 {
539 warn_at (*loc, _("empty character literal"));
540 /* '\0' seems dangerous even if we are about to complain. */
541 val->character = '\'';
542 }
543 else if (last_string[1] != '\0')
3208e3f4 544 warn_at (*loc, _("extra characters in character literal"));
3208e3f4
JD
545 }
546 unexpected_eof (token_start, "'");
47aee066
JD
547 STRING_FREE;
548 BEGIN INITIAL;
549 return CHAR;
550 }
4febdd96 551}
a706a1cc 552
cb823b6f
AD
553 /*-----------------------------------------------------------.
554 | Scanning a Bison nested tag. The initial angle bracket is |
555 | already eaten. |
556 `-----------------------------------------------------------*/
557
558<SC_TAG>
4febdd96 559{
cb823b6f
AD
560 ">" {
561 --nesting;
562 if (nesting < 0)
563 {
564 STRING_FINISH;
565 loc->start = token_start;
566 val->uniqstr = uniqstr_new (last_string);
567 STRING_FREE;
568 BEGIN INITIAL;
569 return TAG;
570 }
571 STRING_GROW;
572 }
573
574 [^<>]+ STRING_GROW;
575 "<"+ STRING_GROW; nesting += yyleng;
e9955c83 576
cb823b6f
AD
577 <<EOF>> {
578 unexpected_eof (token_start, ">");
579 STRING_FINISH;
580 loc->start = token_start;
581 val->uniqstr = uniqstr_new (last_string);
582 STRING_FREE;
583 BEGIN INITIAL;
584 return TAG;
585 }
586}
e9955c83
AD
587
588 /*----------------------------.
589 | Decode escaped characters. |
590 `----------------------------*/
591
592<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
593{
d8d3f94a 594 \\[0-7]{1,3} {
4517da37 595 unsigned long int c = strtoul (yytext + 1, NULL, 8);
c2724603
JD
596 if (!c || UCHAR_MAX < c)
597 complain_at (*loc, _("invalid number after \\-escape: %s"),
598 yytext+1);
e9955c83 599 else
223ff46e 600 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
601 }
602
6b0d38ab 603 \\x[0-9abcdefABCDEF]+ {
4517da37
PE
604 verify (UCHAR_MAX < ULONG_MAX);
605 unsigned long int c = strtoul (yytext + 2, NULL, 16);
c2724603
JD
606 if (!c || UCHAR_MAX < c)
607 complain_at (*loc, _("invalid number after \\-escape: %s"),
608 yytext+1);
d8d3f94a 609 else
223ff46e 610 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
611 }
612
223ff46e
PE
613 \\a obstack_1grow (&obstack_for_string, '\a');
614 \\b obstack_1grow (&obstack_for_string, '\b');
615 \\f obstack_1grow (&obstack_for_string, '\f');
616 \\n obstack_1grow (&obstack_for_string, '\n');
617 \\r obstack_1grow (&obstack_for_string, '\r');
618 \\t obstack_1grow (&obstack_for_string, '\t');
619 \\v obstack_1grow (&obstack_for_string, '\v');
412f8a59
PE
620
621 /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
223ff46e 622 \\("\""|"'"|"?"|"\\") obstack_1grow (&obstack_for_string, yytext[1]);
412f8a59 623
6b0d38ab 624 \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
d8d3f94a 625 int c = convert_ucn_to_byte (yytext);
c2724603
JD
626 if (c <= 0)
627 complain_at (*loc, _("invalid number after \\-escape: %s"),
628 yytext+1);
d8d3f94a 629 else
223ff46e 630 obstack_1grow (&obstack_for_string, c);
d8d3f94a 631 }
4f25ebb0 632 \\(.|\n) {
c2724603 633 char const *p = yytext + 1;
e6c849d8 634 /* Quote only if escaping won't make the character visible. */
4413bbd3 635 if (isspace ((unsigned char) *p) && isprint ((unsigned char) *p))
e6c849d8 636 p = quote (p);
c2724603
JD
637 else
638 p = quotearg_style_mem (escape_quoting_style, p, 1);
639 complain_at (*loc, _("invalid character after \\-escape: %s"), p);
e9955c83
AD
640 }
641}
642
4febdd96
PE
643 /*--------------------------------------------.
644 | Scanning user-code characters and strings. |
645 `--------------------------------------------*/
e9955c83 646
4febdd96
PE
647<SC_CHARACTER,SC_STRING>
648{
e9071366 649 {splice}|\\{splice}[^\n\[\]] STRING_GROW;
4febdd96 650}
e9955c83
AD
651
652<SC_CHARACTER>
653{
4febdd96
PE
654 "'" STRING_GROW; BEGIN context_state;
655 \n unexpected_newline (token_start, "'"); BEGIN context_state;
656 <<EOF>> unexpected_eof (token_start, "'"); BEGIN context_state;
e9955c83
AD
657}
658
e9955c83
AD
659<SC_STRING>
660{
4febdd96
PE
661 "\"" STRING_GROW; BEGIN context_state;
662 \n unexpected_newline (token_start, "\""); BEGIN context_state;
663 <<EOF>> unexpected_eof (token_start, "\""); BEGIN context_state;
e9955c83
AD
664}
665
666
667 /*---------------------------------------------------.
668 | Strings, comments etc. can be found in user code. |
669 `---------------------------------------------------*/
670
ca2a6d15 671<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE,SC_PREDICATE>
e9955c83 672{
3f2d73f1
PE
673 "'" {
674 STRING_GROW;
675 context_state = YY_START;
676 token_start = loc->start;
677 BEGIN SC_CHARACTER;
678 }
679 "\"" {
680 STRING_GROW;
681 context_state = YY_START;
682 token_start = loc->start;
683 BEGIN SC_STRING;
684 }
685 "/"{splice}"*" {
686 STRING_GROW;
687 context_state = YY_START;
688 token_start = loc->start;
689 BEGIN SC_COMMENT;
690 }
691 "/"{splice}"/" {
692 STRING_GROW;
693 context_state = YY_START;
694 BEGIN SC_LINE_COMMENT;
695 }
e9955c83
AD
696}
697
698
624a35e2 699
58d7a1a1 700 /*-----------------------------------------------------------.
ca2a6d15
PH
701 | Scanning some code in braces (actions, predicates). The |
702 | initial "{" is already eaten. |
58d7a1a1 703 `-----------------------------------------------------------*/
e9955c83 704
ca2a6d15 705<SC_BRACED_CODE,SC_PREDICATE>
e9955c83 706{
cb823b6f
AD
707 "{"|"<"{splice}"%" STRING_GROW; nesting++;
708 "%"{splice}">" STRING_GROW; nesting--;
ca2a6d15
PH
709
710 /* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly
711 (as `<' `<%'). */
712 "<"{splice}"<" STRING_GROW;
713
714 <<EOF>> {
715 int token = (YY_START == SC_BRACED_CODE) ? BRACED_CODE : BRACED_PREDICATE;
716 unexpected_eof (code_start, "}");
717 STRING_FINISH;
718 loc->start = code_start;
719 val->code = last_string;
720 BEGIN INITIAL;
721 return token;
722 }
723}
724
725<SC_BRACED_CODE>
726{
e9955c83 727 "}" {
25522739
PE
728 obstack_1grow (&obstack_for_string, '}');
729
cb823b6f
AD
730 --nesting;
731 if (nesting < 0)
e9955c83 732 {
41141c56 733 STRING_FINISH;
3f2d73f1 734 loc->start = code_start;
eb095650 735 val->code = last_string;
a706a1cc 736 BEGIN INITIAL;
58d7a1a1 737 return BRACED_CODE;
e9955c83
AD
738 }
739 }
ca2a6d15 740}
e9955c83 741
ca2a6d15
PH
742<SC_PREDICATE>
743{
744 "}" {
745 --nesting;
746 if (nesting < 0)
747 {
748 STRING_FINISH;
749 loc->start = code_start;
750 val->code = last_string;
751 BEGIN INITIAL;
752 return BRACED_PREDICATE;
753 }
754 else
755 obstack_1grow (&obstack_for_string, '}');
47aee066 756 }
e9955c83
AD
757}
758
e9955c83
AD
759 /*--------------------------------------------------------------.
760 | Scanning some prologue: from "%{" (already scanned) to "%}". |
761 `--------------------------------------------------------------*/
762
763<SC_PROLOGUE>
764{
765 "%}" {
41141c56 766 STRING_FINISH;
3f2d73f1 767 loc->start = code_start;
223ff46e 768 val->chars = last_string;
a706a1cc 769 BEGIN INITIAL;
e9955c83
AD
770 return PROLOGUE;
771 }
772
47aee066
JD
773 <<EOF>> {
774 unexpected_eof (code_start, "%}");
775 STRING_FINISH;
776 loc->start = code_start;
777 val->chars = last_string;
778 BEGIN INITIAL;
779 return PROLOGUE;
780 }
e9955c83
AD
781}
782
783
784 /*---------------------------------------------------------------.
785 | Scanning the epilogue (everything after the second "%%", which |
d8d3f94a 786 | has already been eaten). |
e9955c83
AD
787 `---------------------------------------------------------------*/
788
789<SC_EPILOGUE>
790{
e9955c83 791 <<EOF>> {
41141c56 792 STRING_FINISH;
3f2d73f1 793 loc->start = code_start;
223ff46e 794 val->chars = last_string;
a706a1cc 795 BEGIN INITIAL;
e9955c83
AD
796 return EPILOGUE;
797 }
798}
799
800
4febdd96
PE
801 /*-----------------------------------------------------.
802 | By default, grow the string obstack with the input. |
803 `-----------------------------------------------------*/
804
ca2a6d15
PH
805<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PREDICATE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>. |
806 <SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PREDICATE,SC_PROLOGUE,SC_EPILOGUE>\n STRING_GROW;
4febdd96 807
e9955c83
AD
808%%
809
6c30d641
PE
810/* Read bytes from FP into buffer BUF of size SIZE. Return the
811 number of bytes read. Remove '\r' from input, treating \r\n
812 and isolated \r as \n. */
813
814static size_t
815no_cr_read (FILE *fp, char *buf, size_t size)
816{
a737b216
PE
817 size_t bytes_read = fread (buf, 1, size, fp);
818 if (bytes_read)
6c30d641 819 {
a737b216 820 char *w = memchr (buf, '\r', bytes_read);
6c30d641
PE
821 if (w)
822 {
823 char const *r = ++w;
a737b216 824 char const *lim = buf + bytes_read;
6c30d641
PE
825
826 for (;;)
827 {
828 /* Found an '\r'. Treat it like '\n', but ignore any
829 '\n' that immediately follows. */
830 w[-1] = '\n';
831 if (r == lim)
832 {
833 int ch = getc (fp);
834 if (ch != '\n' && ungetc (ch, fp) != ch)
835 break;
836 }
837 else if (*r == '\n')
838 r++;
839
840 /* Copy until the next '\r'. */
841 do
842 {
843 if (r == lim)
844 return w - buf;
845 }
846 while ((*w++ = *r++) != '\r');
847 }
848
849 return w - buf;
850 }
851 }
852
a737b216 853 return bytes_read;
6c30d641
PE
854}
855
856
f25bfb75 857
1452af69
PE
858/*------------------------------------------------------.
859| Scan NUMBER for a base-BASE integer at location LOC. |
860`------------------------------------------------------*/
861
862static unsigned long int
863scan_integer (char const *number, int base, location loc)
864{
4517da37
PE
865 verify (INT_MAX < ULONG_MAX);
866 unsigned long int num = strtoul (number, NULL, base);
867
868 if (INT_MAX < num)
1452af69
PE
869 {
870 complain_at (loc, _("integer out of range: %s"), quote (number));
871 num = INT_MAX;
872 }
4517da37 873
1452af69
PE
874 return num;
875}
876
877
d8d3f94a
PE
878/*------------------------------------------------------------------.
879| Convert universal character name UCN to a single-byte character, |
880| and return that character. Return -1 if UCN does not correspond |
881| to a single-byte character. |
882`------------------------------------------------------------------*/
883
884static int
885convert_ucn_to_byte (char const *ucn)
886{
4517da37
PE
887 verify (UCHAR_MAX <= INT_MAX);
888 unsigned long int code = strtoul (ucn + 2, NULL, 16);
d8d3f94a
PE
889
890 /* FIXME: Currently we assume Unicode-compatible unibyte characters
891 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
892 non-ASCII hosts we support only the portable C character set.
893 These limitations should be removed once we add support for
894 multibyte characters. */
895
896 if (UCHAR_MAX < code)
897 return -1;
898
899#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
900 {
901 /* A non-ASCII host. Use CODE to index into a table of the C
902 basic execution character set, which is guaranteed to exist on
903 all Standard C platforms. This table also includes '$', '@',
8e6ef483 904 and '`', which are not in the basic execution character set but
d8d3f94a
PE
905 which are unibyte characters on all the platforms that we know
906 about. */
907 static signed char const table[] =
908 {
909 '\0', -1, -1, -1, -1, -1, -1, '\a',
910 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
911 -1, -1, -1, -1, -1, -1, -1, -1,
912 -1, -1, -1, -1, -1, -1, -1, -1,
913 ' ', '!', '"', '#', '$', '%', '&', '\'',
914 '(', ')', '*', '+', ',', '-', '.', '/',
915 '0', '1', '2', '3', '4', '5', '6', '7',
916 '8', '9', ':', ';', '<', '=', '>', '?',
917 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
918 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
919 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
920 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
921 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
922 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
923 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
924 'x', 'y', 'z', '{', '|', '}', '~'
925 };
926
927 code = code < sizeof table ? table[code] : -1;
928 }
929#endif
c4d720cd 930
d8d3f94a
PE
931 return code;
932}
933
934
900c5db5
AD
935/*----------------------------------------------------------------.
936| Handle `#line INT "FILE"'. ARGS has already skipped `#line '. |
937`----------------------------------------------------------------*/
938
939static void
4517da37 940handle_syncline (char *args, location loc)
900c5db5 941{
4517da37
PE
942 char *after_num;
943 unsigned long int lineno = strtoul (args, &after_num, 10);
944 char *file = strchr (after_num, '"') + 1;
945 *strchr (file, '"') = '\0';
946 if (INT_MAX <= lineno)
947 {
948 warn_at (loc, _("line number overflow"));
949 lineno = INT_MAX;
950 }
e9071366 951 current_file = uniqstr_new (file);
0c8e079f 952 boundary_set (&scanner_cursor, current_file, lineno, 1);
4517da37
PE
953}
954
955
4febdd96
PE
956/*----------------------------------------------------------------.
957| For a token or comment starting at START, report message MSGID, |
958| which should say that an end marker was found before |
959| the expected TOKEN_END. |
960`----------------------------------------------------------------*/
961
962static void
963unexpected_end (boundary start, char const *msgid, char const *token_end)
964{
965 location loc;
966 loc.start = start;
967 loc.end = scanner_cursor;
968 complain_at (loc, _(msgid), token_end);
969}
970
971
3f2d73f1
PE
972/*------------------------------------------------------------------------.
973| Report an unexpected EOF in a token or comment starting at START. |
974| An end of file was encountered and the expected TOKEN_END was missing. |
3f2d73f1 975`------------------------------------------------------------------------*/
a706a1cc
PE
976
977static void
aa418041 978unexpected_eof (boundary start, char const *token_end)
a706a1cc 979{
4febdd96
PE
980 unexpected_end (start, N_("missing `%s' at end of file"), token_end);
981}
982
983
984/*----------------------------------------.
985| Likewise, but for unexpected newlines. |
986`----------------------------------------*/
987
988static void
989unexpected_newline (boundary start, char const *token_end)
990{
991 unexpected_end (start, N_("missing `%s' at end of line"), token_end);
a706a1cc
PE
992}
993
994
f25bfb75
AD
995/*-------------------------.
996| Initialize the scanner. |
997`-------------------------*/
998
1d6412ad 999void
e9071366 1000gram_scanner_initialize (void)
1d6412ad 1001{
223ff46e 1002 obstack_init (&obstack_for_string);
1d6412ad
AD
1003}
1004
1005
f25bfb75
AD
1006/*-----------------------------------------------.
1007| Free all the memory allocated to the scanner. |
1008`-----------------------------------------------*/
1009
4cdb01db 1010void
e9071366 1011gram_scanner_free (void)
4cdb01db 1012{
223ff46e 1013 obstack_free (&obstack_for_string, 0);
536545f3 1014 /* Reclaim Flex's buffers. */
580b8926 1015 yylex_destroy ();
4cdb01db 1016}