]> git.saurik.com Git - bison.git/blame - src/scan-gram.l
doc: fixes.
[bison.git] / src / scan-gram.l
CommitLineData
e9955c83 1/* Bison Grammar Scanner -*- C -*-
3b1e470c 2
ba061fa6
AD
3 Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
4 Free Software Foundation, Inc.
e9955c83
AD
5
6 This file is part of Bison, the GNU Compiler Compiler.
7
f16b0819 8 This program is free software: you can redistribute it and/or modify
e9955c83 9 it under the terms of the GNU General Public License as published by
f16b0819 10 the Free Software Foundation, either version 3 of the License, or
e9955c83
AD
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
f16b0819 19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
e9955c83 20
8d90395d 21%option debug nodefault noinput nounput noyywrap never-interactive
e9955c83
AD
22%option prefix="gram_" outfile="lex.yy.c"
23
24%{
4f6e011e
PE
25/* Work around a bug in flex 2.5.31. See Debian bug 333231
26 <http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=333231>. */
27#undef gram_wrap
28#define gram_wrap() 1
29
e9071366 30#define FLEX_PREFIX(Id) gram_ ## Id
0305d25e 31#include <src/flex-scanner.h>
223ff46e 32
0305d25e
AD
33#include <src/complain.h>
34#include <src/files.h>
35#include <src/gram.h>
36#include <quotearg.h>
37#include <src/reader.h>
38#include <src/uniqstr.h>
e9955c83 39
e9071366
AD
40#include <mbswidth.h>
41#include <quote.h>
42
0305d25e 43#include <src/scan-gram.h>
e9071366
AD
44
45#define YY_DECL GRAM_LEX_DECL
2346344a 46
3f2d73f1 47#define YY_USER_INIT \
e9071366 48 code_start = scanner_cursor = loc->start; \
dc9701e8 49
3f2d73f1 50/* Location of scanner cursor. */
4a678af8 51static boundary scanner_cursor;
41141c56 52
e9071366 53#define YY_USER_ACTION location_compute (loc, &scanner_cursor, yytext, yyleng);
d8d3f94a 54
6c30d641 55static size_t no_cr_read (FILE *, char *, size_t);
d8d3f94a
PE
56#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
57
ba061fa6
AD
58#define RETURN_PERCENT_FLAG(Value) \
59 do { \
60 val->uniqstr = uniqstr_new (Value); \
61 return PERCENT_FLAG; \
62 } while (0)
63
b9f1d9a4
AR
64#define ROLLBACK_CURRENT_TOKEN \
65 do { \
66 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0); \
67 yyless (0); \
68 } while (0)
ba061fa6 69
7ec2d4cd 70/* A string representing the most recently saved token. */
7c0c6181 71static char *last_string;
7ec2d4cd 72
872b52bc 73/* Bracketed identifier. */
b9f1d9a4
AR
74static uniqstr bracketed_id_str = 0;
75static location bracketed_id_loc;
76static boundary bracketed_id_start;
77static int bracketed_id_context_state = 0;
78
7ec2d4cd 79void
e9071366 80gram_scanner_last_string_free (void)
7ec2d4cd 81{
41141c56 82 STRING_FREE;
7ec2d4cd 83}
e9955c83 84
4517da37 85static void handle_syncline (char *, location);
1452af69 86static unsigned long int scan_integer (char const *p, int base, location loc);
d8d3f94a 87static int convert_ucn_to_byte (char const *hex_text);
aa418041 88static void unexpected_eof (boundary, char const *);
4febdd96 89static void unexpected_newline (boundary, char const *);
e9955c83
AD
90
91%}
e9071366
AD
92 /* A C-like comment in directives/rules. */
93%x SC_YACC_COMMENT
94 /* Strings and characters in directives/rules. */
e9955c83 95%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
e9071366
AD
96 /* A identifier was just read in directives/rules. Special state
97 to capture the sequence `identifier :'. */
98%x SC_AFTER_IDENTIFIER
cb823b6f
AD
99 /* A complex tag, with nested angles brackets. */
100%x SC_TAG
e9071366
AD
101
102 /* Three types of user code:
103 - prologue (code between `%{' `%}' in the first section, before %%);
104 - actions, printers, union, etc, (between braced in the middle section);
105 - epilogue (everything after the second %%). */
106%x SC_PROLOGUE SC_BRACED_CODE SC_EPILOGUE
107 /* C and C++ comments in code. */
108%x SC_COMMENT SC_LINE_COMMENT
109 /* Strings and characters in code. */
110%x SC_STRING SC_CHARACTER
872b52bc 111 /* Bracketed identifiers support. */
b9f1d9a4 112%x SC_BRACKETED_ID SC_RETURN_BRACKETED_ID
e9955c83 113
cdf3f113
AD
114letter [-.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
115id {letter}({letter}|[0-9])*
4f646c37 116directive %{id}
624a35e2 117int [0-9]+
d8d3f94a
PE
118
119/* POSIX says that a tag must be both an id and a C union member, but
120 historically almost any character is allowed in a tag. We disallow
cb823b6f
AD
121 NUL, as this simplifies our implementation. We disallow angle
122 bracket to match them in nested pairs: several languages use them
123 for generics/template types. */
124tag [^\0<>]+
d8d3f94a
PE
125
126/* Zero or more instances of backslash-newline. Following GCC, allow
127 white space between the backslash and the newline. */
128splice (\\[ \f\t\v]*\n)*
e9955c83
AD
129
130%%
131%{
cb823b6f
AD
132 /* Nesting level. Either for nested braces, or nested angle brackets
133 (but not mixed). */
134 int nesting IF_LINT (= 0);
1a9e39f1 135
3f2d73f1 136 /* Parent context state, when applicable. */
5362ed19 137 int context_state IF_LINT (= 0);
a706a1cc 138
3f2d73f1 139 /* Location of most recent identifier, when applicable. */
a2bc9dbc 140 location id_loc IF_LINT (= empty_location);
3f2d73f1 141
a2bc9dbc
PE
142 /* Where containing code started, when applicable. Its initial
143 value is relevant only when yylex is invoked in the SC_EPILOGUE
144 start condition. */
145 boundary code_start = scanner_cursor;
3f2d73f1 146
223ff46e
PE
147 /* Where containing comment or string or character literal started,
148 when applicable. */
a2bc9dbc 149 boundary token_start IF_LINT (= scanner_cursor);
e9955c83
AD
150%}
151
152
3f2d73f1
PE
153 /*-----------------------.
154 | Scanning white space. |
155 `-----------------------*/
156
b9f1d9a4 157<INITIAL,SC_AFTER_IDENTIFIER,SC_BRACKETED_ID,SC_RETURN_BRACKETED_ID>
3f2d73f1 158{
4febdd96 159 /* Comments and white space. */
83adb046 160 "," warn_at (*loc, _("stray `,' treated as white space"));
4febdd96 161 [ \f\n\t\v] |
3f2d73f1 162 "//".* ;
83adb046
PE
163 "/*" {
164 token_start = loc->start;
165 context_state = YY_START;
166 BEGIN SC_YACC_COMMENT;
167 }
3f2d73f1
PE
168
169 /* #line directives are not documented, and may be withdrawn or
170 modified in future versions of Bison. */
171 ^"#line "{int}" \"".*"\"\n" {
4517da37 172 handle_syncline (yytext + sizeof "#line " - 1, *loc);
3f2d73f1
PE
173 }
174}
175
176
e9955c83
AD
177 /*----------------------------.
178 | Scanning Bison directives. |
179 `----------------------------*/
a7c09cba
DJ
180
181 /* For directives that are also command line options, the regex must be
182 "%..."
183 after "[-_]"s are removed, and the directive must match the --long
184 option name, with a single string argument. Otherwise, add exceptions
185 to ../build-aux/cross-options.pl. */
186
e9955c83
AD
187<INITIAL>
188{
deef2a0a 189 "%binary" return PERCENT_NONASSOC;
136a0f76 190 "%code" return PERCENT_CODE;
fa819509 191 "%debug" RETURN_PERCENT_FLAG("parse.trace");
deef2a0a
AD
192 "%default"[-_]"prec" return PERCENT_DEFAULT_PREC;
193 "%define" return PERCENT_DEFINE;
194 "%defines" return PERCENT_DEFINES;
195 "%destructor" return PERCENT_DESTRUCTOR;
196 "%dprec" return PERCENT_DPREC;
31b850d2 197 "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
deef2a0a
AD
198 "%expect" return PERCENT_EXPECT;
199 "%expect"[-_]"rr" return PERCENT_EXPECT_RR;
200 "%file-prefix" return PERCENT_FILE_PREFIX;
e9955c83 201 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
deef2a0a
AD
202 "%initial-action" return PERCENT_INITIAL_ACTION;
203 "%glr-parser" return PERCENT_GLR_PARSER;
204 "%language" return PERCENT_LANGUAGE;
205 "%left" return PERCENT_LEFT;
206 "%lex-param" return PERCENT_LEX_PARAM;
bc0f5737 207 "%locations" RETURN_PERCENT_FLAG("locations");
deef2a0a
AD
208 "%merge" return PERCENT_MERGE;
209 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
210 "%no"[-_]"default"[-_]"prec" return PERCENT_NO_DEFAULT_PREC;
211 "%no"[-_]"lines" return PERCENT_NO_LINES;
212 "%nonassoc" return PERCENT_NONASSOC;
213 "%nondeterministic-parser" return PERCENT_NONDETERMINISTIC_PARSER;
214 "%nterm" return PERCENT_NTERM;
215 "%output" return PERCENT_OUTPUT;
216 "%parse-param" return PERCENT_PARSE_PARAM;
217 "%prec" return PERCENT_PREC;
d78f0ac9 218 "%precedence" return PERCENT_PRECEDENCE;
deef2a0a 219 "%printer" return PERCENT_PRINTER;
4920ae8b 220 "%pure"[-_]"parser" RETURN_PERCENT_FLAG("api.pure");
deef2a0a
AD
221 "%require" return PERCENT_REQUIRE;
222 "%right" return PERCENT_RIGHT;
223 "%skeleton" return PERCENT_SKELETON;
224 "%start" return PERCENT_START;
225 "%term" return PERCENT_TOKEN;
226 "%token" return PERCENT_TOKEN;
227 "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
228 "%type" return PERCENT_TYPE;
229 "%union" return PERCENT_UNION;
230 "%verbose" return PERCENT_VERBOSE;
231 "%yacc" return PERCENT_YACC;
e9955c83 232
3f2d73f1 233 {directive} {
41141c56 234 complain_at (*loc, _("invalid directive: %s"), quote (yytext));
412f8a59 235 }
900c5db5 236
e9955c83 237 "=" return EQUAL;
e9071366 238 "|" return PIPE;
e9955c83
AD
239 ";" return SEMICOLON;
240
3f2d73f1 241 {id} {
58d7a1a1 242 val->uniqstr = uniqstr_new (yytext);
3f2d73f1 243 id_loc = *loc;
b9f1d9a4 244 bracketed_id_str = NULL;
3f2d73f1 245 BEGIN SC_AFTER_IDENTIFIER;
e9955c83
AD
246 }
247
d8d3f94a 248 {int} {
1452af69
PE
249 val->integer = scan_integer (yytext, 10, *loc);
250 return INT;
251 }
252 0[xX][0-9abcdefABCDEF]+ {
253 val->integer = scan_integer (yytext, 16, *loc);
d8d3f94a
PE
254 return INT;
255 }
e9955c83 256
84a1cb5a
AD
257 /* Identifiers may not start with a digit. Yet, don't silently
258 accept "1FOO" as "1 FOO". */
259 {int}{id} {
260 complain_at (*loc, _("invalid identifier: %s"), quote (yytext));
261 }
262
3208e3f4 263 /* Characters. */
dfaa4860 264 "'" token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
e9955c83
AD
265
266 /* Strings. */
ca407bdf 267 "\"" token_start = loc->start; BEGIN SC_ESCAPED_STRING;
e9955c83
AD
268
269 /* Prologue. */
3f2d73f1 270 "%{" code_start = loc->start; BEGIN SC_PROLOGUE;
e9955c83
AD
271
272 /* Code in between braces. */
3f2d73f1
PE
273 "{" {
274 STRING_GROW;
cb823b6f 275 nesting = 0;
3f2d73f1
PE
276 code_start = loc->start;
277 BEGIN SC_BRACED_CODE;
278 }
e9955c83
AD
279
280 /* A type. */
cb823b6f
AD
281 "<*>" return TAG_ANY;
282 "<>" return TAG_NONE;
d8d3f94a 283 "<"{tag}">" {
223ff46e 284 obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
41141c56 285 STRING_FINISH;
223ff46e 286 val->uniqstr = uniqstr_new (last_string);
41141c56 287 STRING_FREE;
cb823b6f
AD
288 return TAG;
289 }
290 "<" {
291 nesting = 0;
292 token_start = loc->start;
293 BEGIN SC_TAG;
4cdb01db
AD
294 }
295
a706a1cc
PE
296 "%%" {
297 static int percent_percent_count;
e9955c83 298 if (++percent_percent_count == 2)
a2bc9dbc 299 BEGIN SC_EPILOGUE;
e9955c83
AD
300 return PERCENT_PERCENT;
301 }
302
b9f1d9a4
AR
303 "[" {
304 bracketed_id_str = NULL;
305 bracketed_id_start = loc->start;
306 bracketed_id_context_state = YY_START;
307 BEGIN SC_BRACKETED_ID;
308 }
309
a706a1cc 310 . {
41141c56 311 complain_at (*loc, _("invalid character: %s"), quote (yytext));
3f2d73f1 312 }
379f0ac8
PE
313
314 <<EOF>> {
315 loc->start = loc->end = scanner_cursor;
316 yyterminate ();
317 }
3f2d73f1
PE
318}
319
320
cb823b6f
AD
321 /*--------------------------------------------------------------.
322 | Supporting \0 complexifies our implementation for no expected |
323 | added value. |
324 `--------------------------------------------------------------*/
325
326<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING,SC_TAG>
327{
328 \0 complain_at (*loc, _("invalid null character"));
329}
330
331
3f2d73f1
PE
332 /*-----------------------------------------------------------------.
333 | Scanning after an identifier, checking whether a colon is next. |
334 `-----------------------------------------------------------------*/
335
336<SC_AFTER_IDENTIFIER>
337{
b9f1d9a4 338 "[" {
872b52bc 339 if (bracketed_id_str)
b9f1d9a4
AR
340 {
341 ROLLBACK_CURRENT_TOKEN;
342 BEGIN SC_RETURN_BRACKETED_ID;
343 *loc = id_loc;
344 return ID;
345 }
872b52bc
AR
346 else
347 {
348 bracketed_id_start = loc->start;
349 bracketed_id_context_state = YY_START;
350 BEGIN SC_BRACKETED_ID;
351 }
b9f1d9a4 352 }
3f2d73f1 353 ":" {
b9f1d9a4 354 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
3f2d73f1 355 *loc = id_loc;
3f2d73f1
PE
356 return ID_COLON;
357 }
358 . {
b9f1d9a4
AR
359 ROLLBACK_CURRENT_TOKEN;
360 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
3f2d73f1 361 *loc = id_loc;
3f2d73f1
PE
362 return ID;
363 }
364 <<EOF>> {
b9f1d9a4 365 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
3f2d73f1 366 *loc = id_loc;
3f2d73f1 367 return ID;
e9955c83
AD
368 }
369}
370
b9f1d9a4
AR
371 /*--------------------------------.
372 | Scanning bracketed identifiers. |
373 `--------------------------------*/
374
375<SC_BRACKETED_ID>
376{
377 {id} {
872b52bc 378 if (bracketed_id_str)
b9f1d9a4 379 {
872b52bc
AR
380 complain_at (*loc, _("unexpected identifier in bracketed name: %s"),
381 quote (yytext));
b9f1d9a4
AR
382 }
383 else
384 {
872b52bc
AR
385 bracketed_id_str = uniqstr_new (yytext);
386 bracketed_id_loc = *loc;
b9f1d9a4
AR
387 }
388 }
389 "]" {
390 BEGIN bracketed_id_context_state;
391 if (bracketed_id_str)
392 {
393 if (INITIAL == bracketed_id_context_state)
394 {
395 val->uniqstr = bracketed_id_str;
396 bracketed_id_str = 0;
397 *loc = bracketed_id_loc;
398 return BRACKETED_ID;
399 }
400 }
401 else
872b52bc 402 complain_at (*loc, _("an identifier expected"));
b9f1d9a4
AR
403 }
404 . {
405 complain_at (*loc, _("invalid character in bracketed name: %s"),
406 quote (yytext));
407 }
408 <<EOF>> {
409 BEGIN bracketed_id_context_state;
410 unexpected_eof (bracketed_id_start, "]");
411 }
412}
413
414<SC_RETURN_BRACKETED_ID>
415{
416 . {
417 ROLLBACK_CURRENT_TOKEN;
418 val->uniqstr = bracketed_id_str;
419 bracketed_id_str = 0;
420 *loc = bracketed_id_loc;
421 BEGIN INITIAL;
422 return BRACKETED_ID;
423 }
424}
425
e9955c83 426
d8d3f94a
PE
427 /*---------------------------------------------------------------.
428 | Scanning a Yacc comment. The initial `/ *' is already eaten. |
429 `---------------------------------------------------------------*/
e9955c83 430
d8d3f94a 431<SC_YACC_COMMENT>
e9955c83 432{
3f2d73f1 433 "*/" BEGIN context_state;
a706a1cc 434 .|\n ;
aa418041 435 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
d8d3f94a
PE
436}
437
438
439 /*------------------------------------------------------------.
440 | Scanning a C comment. The initial `/ *' is already eaten. |
441 `------------------------------------------------------------*/
442
443<SC_COMMENT>
444{
3f2d73f1 445 "*"{splice}"/" STRING_GROW; BEGIN context_state;
aa418041 446 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
e9955c83
AD
447}
448
449
d8d3f94a
PE
450 /*--------------------------------------------------------------.
451 | Scanning a line comment. The initial `//' is already eaten. |
452 `--------------------------------------------------------------*/
453
454<SC_LINE_COMMENT>
455{
3f2d73f1 456 "\n" STRING_GROW; BEGIN context_state;
41141c56 457 {splice} STRING_GROW;
3f2d73f1 458 <<EOF>> BEGIN context_state;
d8d3f94a
PE
459}
460
461
4febdd96
PE
462 /*------------------------------------------------.
463 | Scanning a Bison string, including its escapes. |
464 | The initial quote is already eaten. |
465 `------------------------------------------------*/
e9955c83
AD
466
467<SC_ESCAPED_STRING>
468{
47aee066
JD
469 "\""|"\n" {
470 if (yytext[0] == '\n')
471 unexpected_newline (token_start, "\"");
472 STRING_FINISH;
473 loc->start = token_start;
474 val->chars = last_string;
475 BEGIN INITIAL;
476 return STRING;
477 }
478 <<EOF>> {
479 unexpected_eof (token_start, "\"");
41141c56 480 STRING_FINISH;
3f2d73f1 481 loc->start = token_start;
223ff46e 482 val->chars = last_string;
a706a1cc 483 BEGIN INITIAL;
e9955c83
AD
484 return STRING;
485 }
e9955c83
AD
486}
487
4febdd96
PE
488 /*----------------------------------------------------------.
489 | Scanning a Bison character literal, decoding its escapes. |
490 | The initial quote is already eaten. |
491 `----------------------------------------------------------*/
e9955c83
AD
492
493<SC_ESCAPED_CHARACTER>
494{
47aee066 495 "'"|"\n" {
41141c56 496 STRING_FINISH;
3f2d73f1 497 loc->start = token_start;
dfaa4860 498 val->character = last_string[0];
3208e3f4
JD
499 {
500 /* FIXME: Eventually, make these errors. */
dfaa4860
JD
501 if (last_string[0] == '\0')
502 {
503 warn_at (*loc, _("empty character literal"));
504 /* '\0' seems dangerous even if we are about to complain. */
505 val->character = '\'';
506 }
507 else if (last_string[1] != '\0')
3208e3f4
JD
508 warn_at (*loc, _("extra characters in character literal"));
509 }
510 if (yytext[0] == '\n')
511 unexpected_newline (token_start, "'");
41141c56 512 STRING_FREE;
a706a1cc 513 BEGIN INITIAL;
58d7a1a1 514 return CHAR;
e9955c83 515 }
47aee066 516 <<EOF>> {
47aee066
JD
517 STRING_FINISH;
518 loc->start = token_start;
dfaa4860 519 val->character = last_string[0];
3208e3f4 520 {
3208e3f4 521 /* FIXME: Eventually, make these errors. */
dfaa4860
JD
522 if (last_string[0] == '\0')
523 {
524 warn_at (*loc, _("empty character literal"));
525 /* '\0' seems dangerous even if we are about to complain. */
526 val->character = '\'';
527 }
528 else if (last_string[1] != '\0')
3208e3f4 529 warn_at (*loc, _("extra characters in character literal"));
3208e3f4
JD
530 }
531 unexpected_eof (token_start, "'");
47aee066
JD
532 STRING_FREE;
533 BEGIN INITIAL;
534 return CHAR;
535 }
4febdd96 536}
a706a1cc 537
cb823b6f
AD
538 /*-----------------------------------------------------------.
539 | Scanning a Bison nested tag. The initial angle bracket is |
540 | already eaten. |
541 `-----------------------------------------------------------*/
542
543<SC_TAG>
4febdd96 544{
cb823b6f
AD
545 ">" {
546 --nesting;
547 if (nesting < 0)
548 {
549 STRING_FINISH;
550 loc->start = token_start;
551 val->uniqstr = uniqstr_new (last_string);
552 STRING_FREE;
553 BEGIN INITIAL;
554 return TAG;
555 }
556 STRING_GROW;
557 }
558
559 [^<>]+ STRING_GROW;
560 "<"+ STRING_GROW; nesting += yyleng;
e9955c83 561
cb823b6f
AD
562 <<EOF>> {
563 unexpected_eof (token_start, ">");
564 STRING_FINISH;
565 loc->start = token_start;
566 val->uniqstr = uniqstr_new (last_string);
567 STRING_FREE;
568 BEGIN INITIAL;
569 return TAG;
570 }
571}
e9955c83
AD
572
573 /*----------------------------.
574 | Decode escaped characters. |
575 `----------------------------*/
576
577<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
578{
d8d3f94a 579 \\[0-7]{1,3} {
4517da37 580 unsigned long int c = strtoul (yytext + 1, NULL, 8);
d8d3f94a 581 if (UCHAR_MAX < c)
3f2d73f1 582 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
05ac60f3 583 else if (! c)
92ac3705 584 complain_at (*loc, _("invalid null character: %s"), quote (yytext));
e9955c83 585 else
223ff46e 586 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
587 }
588
6b0d38ab 589 \\x[0-9abcdefABCDEF]+ {
4517da37
PE
590 verify (UCHAR_MAX < ULONG_MAX);
591 unsigned long int c = strtoul (yytext + 2, NULL, 16);
592 if (UCHAR_MAX < c)
3f2d73f1 593 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
92ac3705
PE
594 else if (! c)
595 complain_at (*loc, _("invalid null character: %s"), quote (yytext));
d8d3f94a 596 else
223ff46e 597 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
598 }
599
223ff46e
PE
600 \\a obstack_1grow (&obstack_for_string, '\a');
601 \\b obstack_1grow (&obstack_for_string, '\b');
602 \\f obstack_1grow (&obstack_for_string, '\f');
603 \\n obstack_1grow (&obstack_for_string, '\n');
604 \\r obstack_1grow (&obstack_for_string, '\r');
605 \\t obstack_1grow (&obstack_for_string, '\t');
606 \\v obstack_1grow (&obstack_for_string, '\v');
412f8a59
PE
607
608 /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
223ff46e 609 \\("\""|"'"|"?"|"\\") obstack_1grow (&obstack_for_string, yytext[1]);
412f8a59 610
6b0d38ab 611 \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
d8d3f94a
PE
612 int c = convert_ucn_to_byte (yytext);
613 if (c < 0)
3f2d73f1 614 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
92ac3705
PE
615 else if (! c)
616 complain_at (*loc, _("invalid null character: %s"), quote (yytext));
d8d3f94a 617 else
223ff46e 618 obstack_1grow (&obstack_for_string, c);
d8d3f94a 619 }
4f25ebb0 620 \\(.|\n) {
3f2d73f1 621 complain_at (*loc, _("unrecognized escape sequence: %s"), quote (yytext));
41141c56 622 STRING_GROW;
e9955c83
AD
623 }
624}
625
4febdd96
PE
626 /*--------------------------------------------.
627 | Scanning user-code characters and strings. |
628 `--------------------------------------------*/
e9955c83 629
4febdd96
PE
630<SC_CHARACTER,SC_STRING>
631{
e9071366 632 {splice}|\\{splice}[^\n\[\]] STRING_GROW;
4febdd96 633}
e9955c83
AD
634
635<SC_CHARACTER>
636{
4febdd96
PE
637 "'" STRING_GROW; BEGIN context_state;
638 \n unexpected_newline (token_start, "'"); BEGIN context_state;
639 <<EOF>> unexpected_eof (token_start, "'"); BEGIN context_state;
e9955c83
AD
640}
641
e9955c83
AD
642<SC_STRING>
643{
4febdd96
PE
644 "\"" STRING_GROW; BEGIN context_state;
645 \n unexpected_newline (token_start, "\""); BEGIN context_state;
646 <<EOF>> unexpected_eof (token_start, "\""); BEGIN context_state;
e9955c83
AD
647}
648
649
650 /*---------------------------------------------------.
651 | Strings, comments etc. can be found in user code. |
652 `---------------------------------------------------*/
653
654<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
655{
3f2d73f1
PE
656 "'" {
657 STRING_GROW;
658 context_state = YY_START;
659 token_start = loc->start;
660 BEGIN SC_CHARACTER;
661 }
662 "\"" {
663 STRING_GROW;
664 context_state = YY_START;
665 token_start = loc->start;
666 BEGIN SC_STRING;
667 }
668 "/"{splice}"*" {
669 STRING_GROW;
670 context_state = YY_START;
671 token_start = loc->start;
672 BEGIN SC_COMMENT;
673 }
674 "/"{splice}"/" {
675 STRING_GROW;
676 context_state = YY_START;
677 BEGIN SC_LINE_COMMENT;
678 }
e9955c83
AD
679}
680
681
624a35e2 682
58d7a1a1
AD
683 /*-----------------------------------------------------------.
684 | Scanning some code in braces (actions). The initial "{" is |
685 | already eaten. |
686 `-----------------------------------------------------------*/
e9955c83
AD
687
688<SC_BRACED_CODE>
689{
cb823b6f
AD
690 "{"|"<"{splice}"%" STRING_GROW; nesting++;
691 "%"{splice}">" STRING_GROW; nesting--;
e9955c83 692 "}" {
25522739
PE
693 obstack_1grow (&obstack_for_string, '}');
694
cb823b6f
AD
695 --nesting;
696 if (nesting < 0)
e9955c83 697 {
41141c56 698 STRING_FINISH;
3f2d73f1 699 loc->start = code_start;
eb095650 700 val->code = last_string;
a706a1cc 701 BEGIN INITIAL;
58d7a1a1 702 return BRACED_CODE;
e9955c83
AD
703 }
704 }
705
a706a1cc
PE
706 /* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly
707 (as `<' `<%'). */
41141c56 708 "<"{splice}"<" STRING_GROW;
a706a1cc 709
47aee066
JD
710 <<EOF>> {
711 unexpected_eof (code_start, "}");
712 STRING_FINISH;
713 loc->start = code_start;
eb095650 714 val->code = last_string;
47aee066
JD
715 BEGIN INITIAL;
716 return BRACED_CODE;
717 }
e9955c83
AD
718}
719
720
721 /*--------------------------------------------------------------.
722 | Scanning some prologue: from "%{" (already scanned) to "%}". |
723 `--------------------------------------------------------------*/
724
725<SC_PROLOGUE>
726{
727 "%}" {
41141c56 728 STRING_FINISH;
3f2d73f1 729 loc->start = code_start;
223ff46e 730 val->chars = last_string;
a706a1cc 731 BEGIN INITIAL;
e9955c83
AD
732 return PROLOGUE;
733 }
734
47aee066
JD
735 <<EOF>> {
736 unexpected_eof (code_start, "%}");
737 STRING_FINISH;
738 loc->start = code_start;
739 val->chars = last_string;
740 BEGIN INITIAL;
741 return PROLOGUE;
742 }
e9955c83
AD
743}
744
745
746 /*---------------------------------------------------------------.
747 | Scanning the epilogue (everything after the second "%%", which |
d8d3f94a 748 | has already been eaten). |
e9955c83
AD
749 `---------------------------------------------------------------*/
750
751<SC_EPILOGUE>
752{
e9955c83 753 <<EOF>> {
41141c56 754 STRING_FINISH;
3f2d73f1 755 loc->start = code_start;
223ff46e 756 val->chars = last_string;
a706a1cc 757 BEGIN INITIAL;
e9955c83
AD
758 return EPILOGUE;
759 }
760}
761
762
4febdd96
PE
763 /*-----------------------------------------------------.
764 | By default, grow the string obstack with the input. |
765 `-----------------------------------------------------*/
766
767<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>. |
768<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>\n STRING_GROW;
769
e9955c83
AD
770%%
771
6c30d641
PE
772/* Read bytes from FP into buffer BUF of size SIZE. Return the
773 number of bytes read. Remove '\r' from input, treating \r\n
774 and isolated \r as \n. */
775
776static size_t
777no_cr_read (FILE *fp, char *buf, size_t size)
778{
a737b216
PE
779 size_t bytes_read = fread (buf, 1, size, fp);
780 if (bytes_read)
6c30d641 781 {
a737b216 782 char *w = memchr (buf, '\r', bytes_read);
6c30d641
PE
783 if (w)
784 {
785 char const *r = ++w;
a737b216 786 char const *lim = buf + bytes_read;
6c30d641
PE
787
788 for (;;)
789 {
790 /* Found an '\r'. Treat it like '\n', but ignore any
791 '\n' that immediately follows. */
792 w[-1] = '\n';
793 if (r == lim)
794 {
795 int ch = getc (fp);
796 if (ch != '\n' && ungetc (ch, fp) != ch)
797 break;
798 }
799 else if (*r == '\n')
800 r++;
801
802 /* Copy until the next '\r'. */
803 do
804 {
805 if (r == lim)
806 return w - buf;
807 }
808 while ((*w++ = *r++) != '\r');
809 }
810
811 return w - buf;
812 }
813 }
814
a737b216 815 return bytes_read;
6c30d641
PE
816}
817
818
f25bfb75 819
1452af69
PE
820/*------------------------------------------------------.
821| Scan NUMBER for a base-BASE integer at location LOC. |
822`------------------------------------------------------*/
823
824static unsigned long int
825scan_integer (char const *number, int base, location loc)
826{
4517da37
PE
827 verify (INT_MAX < ULONG_MAX);
828 unsigned long int num = strtoul (number, NULL, base);
829
830 if (INT_MAX < num)
1452af69
PE
831 {
832 complain_at (loc, _("integer out of range: %s"), quote (number));
833 num = INT_MAX;
834 }
4517da37 835
1452af69
PE
836 return num;
837}
838
839
d8d3f94a
PE
840/*------------------------------------------------------------------.
841| Convert universal character name UCN to a single-byte character, |
842| and return that character. Return -1 if UCN does not correspond |
843| to a single-byte character. |
844`------------------------------------------------------------------*/
845
846static int
847convert_ucn_to_byte (char const *ucn)
848{
4517da37
PE
849 verify (UCHAR_MAX <= INT_MAX);
850 unsigned long int code = strtoul (ucn + 2, NULL, 16);
d8d3f94a
PE
851
852 /* FIXME: Currently we assume Unicode-compatible unibyte characters
853 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
854 non-ASCII hosts we support only the portable C character set.
855 These limitations should be removed once we add support for
856 multibyte characters. */
857
858 if (UCHAR_MAX < code)
859 return -1;
860
861#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
862 {
863 /* A non-ASCII host. Use CODE to index into a table of the C
864 basic execution character set, which is guaranteed to exist on
865 all Standard C platforms. This table also includes '$', '@',
8e6ef483 866 and '`', which are not in the basic execution character set but
d8d3f94a
PE
867 which are unibyte characters on all the platforms that we know
868 about. */
869 static signed char const table[] =
870 {
871 '\0', -1, -1, -1, -1, -1, -1, '\a',
872 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
873 -1, -1, -1, -1, -1, -1, -1, -1,
874 -1, -1, -1, -1, -1, -1, -1, -1,
875 ' ', '!', '"', '#', '$', '%', '&', '\'',
876 '(', ')', '*', '+', ',', '-', '.', '/',
877 '0', '1', '2', '3', '4', '5', '6', '7',
878 '8', '9', ':', ';', '<', '=', '>', '?',
879 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
880 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
881 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
882 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
883 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
884 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
885 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
886 'x', 'y', 'z', '{', '|', '}', '~'
887 };
888
889 code = code < sizeof table ? table[code] : -1;
890 }
891#endif
c4d720cd 892
d8d3f94a
PE
893 return code;
894}
895
896
900c5db5
AD
897/*----------------------------------------------------------------.
898| Handle `#line INT "FILE"'. ARGS has already skipped `#line '. |
899`----------------------------------------------------------------*/
900
901static void
4517da37 902handle_syncline (char *args, location loc)
900c5db5 903{
4517da37
PE
904 char *after_num;
905 unsigned long int lineno = strtoul (args, &after_num, 10);
906 char *file = strchr (after_num, '"') + 1;
907 *strchr (file, '"') = '\0';
908 if (INT_MAX <= lineno)
909 {
910 warn_at (loc, _("line number overflow"));
911 lineno = INT_MAX;
912 }
e9071366 913 current_file = uniqstr_new (file);
0c8e079f 914 boundary_set (&scanner_cursor, current_file, lineno, 1);
4517da37
PE
915}
916
917
4febdd96
PE
918/*----------------------------------------------------------------.
919| For a token or comment starting at START, report message MSGID, |
920| which should say that an end marker was found before |
921| the expected TOKEN_END. |
922`----------------------------------------------------------------*/
923
924static void
925unexpected_end (boundary start, char const *msgid, char const *token_end)
926{
927 location loc;
928 loc.start = start;
929 loc.end = scanner_cursor;
930 complain_at (loc, _(msgid), token_end);
931}
932
933
3f2d73f1
PE
934/*------------------------------------------------------------------------.
935| Report an unexpected EOF in a token or comment starting at START. |
936| An end of file was encountered and the expected TOKEN_END was missing. |
3f2d73f1 937`------------------------------------------------------------------------*/
a706a1cc
PE
938
939static void
aa418041 940unexpected_eof (boundary start, char const *token_end)
a706a1cc 941{
4febdd96
PE
942 unexpected_end (start, N_("missing `%s' at end of file"), token_end);
943}
944
945
946/*----------------------------------------.
947| Likewise, but for unexpected newlines. |
948`----------------------------------------*/
949
950static void
951unexpected_newline (boundary start, char const *token_end)
952{
953 unexpected_end (start, N_("missing `%s' at end of line"), token_end);
a706a1cc
PE
954}
955
956
f25bfb75
AD
957/*-------------------------.
958| Initialize the scanner. |
959`-------------------------*/
960
1d6412ad 961void
e9071366 962gram_scanner_initialize (void)
1d6412ad 963{
223ff46e 964 obstack_init (&obstack_for_string);
1d6412ad
AD
965}
966
967
f25bfb75
AD
968/*-----------------------------------------------.
969| Free all the memory allocated to the scanner. |
970`-----------------------------------------------*/
971
4cdb01db 972void
e9071366 973gram_scanner_free (void)
4cdb01db 974{
223ff46e 975 obstack_free (&obstack_for_string, 0);
536545f3 976 /* Reclaim Flex's buffers. */
580b8926 977 yylex_destroy ();
4cdb01db 978}