]> git.saurik.com Git - bison.git/blame - src/scan-gram.l
Named symbol references.
[bison.git] / src / scan-gram.l
CommitLineData
e9955c83 1/* Bison Grammar Scanner -*- C -*-
3b1e470c 2
ba061fa6
AD
3 Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
4 Free Software Foundation, Inc.
e9955c83
AD
5
6 This file is part of Bison, the GNU Compiler Compiler.
7
f16b0819 8 This program is free software: you can redistribute it and/or modify
e9955c83 9 it under the terms of the GNU General Public License as published by
f16b0819 10 the Free Software Foundation, either version 3 of the License, or
e9955c83
AD
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
f16b0819 19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
e9955c83 20
aa418041 21%option debug nodefault nounput noyywrap never-interactive
e9955c83
AD
22%option prefix="gram_" outfile="lex.yy.c"
23
24%{
4f6e011e
PE
25/* Work around a bug in flex 2.5.31. See Debian bug 333231
26 <http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=333231>. */
27#undef gram_wrap
28#define gram_wrap() 1
29
e9071366 30#define FLEX_PREFIX(Id) gram_ ## Id
0305d25e 31#include <src/flex-scanner.h>
223ff46e 32
0305d25e
AD
33#include <src/complain.h>
34#include <src/files.h>
35#include <src/gram.h>
36#include <quotearg.h>
37#include <src/reader.h>
38#include <src/uniqstr.h>
e9955c83 39
e9071366
AD
40#include <mbswidth.h>
41#include <quote.h>
42
0305d25e 43#include <src/scan-gram.h>
e9071366
AD
44
45#define YY_DECL GRAM_LEX_DECL
2346344a 46
3f2d73f1 47#define YY_USER_INIT \
e9071366 48 code_start = scanner_cursor = loc->start; \
dc9701e8 49
3f2d73f1 50/* Location of scanner cursor. */
4a678af8 51static boundary scanner_cursor;
41141c56 52
e9071366 53#define YY_USER_ACTION location_compute (loc, &scanner_cursor, yytext, yyleng);
d8d3f94a 54
6c30d641 55static size_t no_cr_read (FILE *, char *, size_t);
d8d3f94a
PE
56#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
57
ba061fa6
AD
58#define RETURN_PERCENT_FLAG(Value) \
59 do { \
60 val->uniqstr = uniqstr_new (Value); \
61 return PERCENT_FLAG; \
62 } while (0)
63
b9f1d9a4
AR
64#define ROLLBACK_CURRENT_TOKEN \
65 do { \
66 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0); \
67 yyless (0); \
68 } while (0)
ba061fa6 69
7ec2d4cd 70/* A string representing the most recently saved token. */
7c0c6181 71static char *last_string;
7ec2d4cd 72
b9f1d9a4
AR
73/* Bracketed identifier */
74static uniqstr bracketed_id_str = 0;
75static location bracketed_id_loc;
76static boundary bracketed_id_start;
77static int bracketed_id_context_state = 0;
78
7ec2d4cd 79void
e9071366 80gram_scanner_last_string_free (void)
7ec2d4cd 81{
41141c56 82 STRING_FREE;
7ec2d4cd 83}
e9955c83 84
4517da37 85static void handle_syncline (char *, location);
1452af69 86static unsigned long int scan_integer (char const *p, int base, location loc);
d8d3f94a 87static int convert_ucn_to_byte (char const *hex_text);
aa418041 88static void unexpected_eof (boundary, char const *);
4febdd96 89static void unexpected_newline (boundary, char const *);
e9955c83
AD
90
91%}
e9071366
AD
92 /* A C-like comment in directives/rules. */
93%x SC_YACC_COMMENT
94 /* Strings and characters in directives/rules. */
e9955c83 95%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
e9071366
AD
96 /* A identifier was just read in directives/rules. Special state
97 to capture the sequence `identifier :'. */
98%x SC_AFTER_IDENTIFIER
cb823b6f
AD
99 /* A complex tag, with nested angles brackets. */
100%x SC_TAG
e9071366
AD
101
102 /* Three types of user code:
103 - prologue (code between `%{' `%}' in the first section, before %%);
104 - actions, printers, union, etc, (between braced in the middle section);
105 - epilogue (everything after the second %%). */
106%x SC_PROLOGUE SC_BRACED_CODE SC_EPILOGUE
107 /* C and C++ comments in code. */
108%x SC_COMMENT SC_LINE_COMMENT
109 /* Strings and characters in code. */
110%x SC_STRING SC_CHARACTER
b9f1d9a4
AR
111 /* Bracketed identifiers support */
112%x SC_BRACKETED_ID SC_RETURN_BRACKETED_ID
e9955c83 113
cdf3f113
AD
114letter [-.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
115id {letter}({letter}|[0-9])*
4f646c37 116directive %{id}
624a35e2 117int [0-9]+
d8d3f94a
PE
118
119/* POSIX says that a tag must be both an id and a C union member, but
120 historically almost any character is allowed in a tag. We disallow
cb823b6f
AD
121 NUL, as this simplifies our implementation. We disallow angle
122 bracket to match them in nested pairs: several languages use them
123 for generics/template types. */
124tag [^\0<>]+
d8d3f94a
PE
125
126/* Zero or more instances of backslash-newline. Following GCC, allow
127 white space between the backslash and the newline. */
128splice (\\[ \f\t\v]*\n)*
e9955c83
AD
129
130%%
131%{
cb823b6f
AD
132 /* Nesting level. Either for nested braces, or nested angle brackets
133 (but not mixed). */
134 int nesting IF_LINT (= 0);
1a9e39f1 135
3f2d73f1 136 /* Parent context state, when applicable. */
5362ed19 137 int context_state IF_LINT (= 0);
a706a1cc 138
3f2d73f1 139 /* Location of most recent identifier, when applicable. */
a2bc9dbc 140 location id_loc IF_LINT (= empty_location);
3f2d73f1 141
a2bc9dbc
PE
142 /* Where containing code started, when applicable. Its initial
143 value is relevant only when yylex is invoked in the SC_EPILOGUE
144 start condition. */
145 boundary code_start = scanner_cursor;
3f2d73f1 146
223ff46e
PE
147 /* Where containing comment or string or character literal started,
148 when applicable. */
a2bc9dbc 149 boundary token_start IF_LINT (= scanner_cursor);
e9955c83
AD
150%}
151
152
3f2d73f1
PE
153 /*-----------------------.
154 | Scanning white space. |
155 `-----------------------*/
156
b9f1d9a4 157<INITIAL,SC_AFTER_IDENTIFIER,SC_BRACKETED_ID,SC_RETURN_BRACKETED_ID>
3f2d73f1 158{
4febdd96 159 /* Comments and white space. */
83adb046 160 "," warn_at (*loc, _("stray `,' treated as white space"));
4febdd96 161 [ \f\n\t\v] |
3f2d73f1 162 "//".* ;
83adb046
PE
163 "/*" {
164 token_start = loc->start;
165 context_state = YY_START;
166 BEGIN SC_YACC_COMMENT;
167 }
3f2d73f1
PE
168
169 /* #line directives are not documented, and may be withdrawn or
170 modified in future versions of Bison. */
171 ^"#line "{int}" \"".*"\"\n" {
4517da37 172 handle_syncline (yytext + sizeof "#line " - 1, *loc);
3f2d73f1
PE
173 }
174}
175
176
e9955c83
AD
177 /*----------------------------.
178 | Scanning Bison directives. |
179 `----------------------------*/
a7c09cba
DJ
180
181 /* For directives that are also command line options, the regex must be
182 "%..."
183 after "[-_]"s are removed, and the directive must match the --long
184 option name, with a single string argument. Otherwise, add exceptions
185 to ../build-aux/cross-options.pl. */
186
e9955c83
AD
187<INITIAL>
188{
deef2a0a 189 "%binary" return PERCENT_NONASSOC;
136a0f76 190 "%code" return PERCENT_CODE;
fa819509 191 "%debug" RETURN_PERCENT_FLAG("parse.trace");
deef2a0a
AD
192 "%default"[-_]"prec" return PERCENT_DEFAULT_PREC;
193 "%define" return PERCENT_DEFINE;
194 "%defines" return PERCENT_DEFINES;
195 "%destructor" return PERCENT_DESTRUCTOR;
196 "%dprec" return PERCENT_DPREC;
71b00ed8 197 "%error"[-_]"verbose" RETURN_PERCENT_FLAG("error-verbose");
deef2a0a
AD
198 "%expect" return PERCENT_EXPECT;
199 "%expect"[-_]"rr" return PERCENT_EXPECT_RR;
200 "%file-prefix" return PERCENT_FILE_PREFIX;
e9955c83 201 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
deef2a0a
AD
202 "%initial-action" return PERCENT_INITIAL_ACTION;
203 "%glr-parser" return PERCENT_GLR_PARSER;
204 "%language" return PERCENT_LANGUAGE;
205 "%left" return PERCENT_LEFT;
206 "%lex-param" return PERCENT_LEX_PARAM;
bc0f5737 207 "%locations" RETURN_PERCENT_FLAG("locations");
deef2a0a
AD
208 "%merge" return PERCENT_MERGE;
209 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
210 "%no"[-_]"default"[-_]"prec" return PERCENT_NO_DEFAULT_PREC;
211 "%no"[-_]"lines" return PERCENT_NO_LINES;
212 "%nonassoc" return PERCENT_NONASSOC;
213 "%nondeterministic-parser" return PERCENT_NONDETERMINISTIC_PARSER;
214 "%nterm" return PERCENT_NTERM;
215 "%output" return PERCENT_OUTPUT;
216 "%parse-param" return PERCENT_PARSE_PARAM;
217 "%prec" return PERCENT_PREC;
d78f0ac9 218 "%precedence" return PERCENT_PRECEDENCE;
deef2a0a 219 "%printer" return PERCENT_PRINTER;
4920ae8b 220 "%pure"[-_]"parser" RETURN_PERCENT_FLAG("api.pure");
deef2a0a
AD
221 "%require" return PERCENT_REQUIRE;
222 "%right" return PERCENT_RIGHT;
223 "%skeleton" return PERCENT_SKELETON;
224 "%start" return PERCENT_START;
225 "%term" return PERCENT_TOKEN;
226 "%token" return PERCENT_TOKEN;
227 "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
228 "%type" return PERCENT_TYPE;
229 "%union" return PERCENT_UNION;
230 "%verbose" return PERCENT_VERBOSE;
231 "%yacc" return PERCENT_YACC;
e9955c83 232
3f2d73f1 233 {directive} {
41141c56 234 complain_at (*loc, _("invalid directive: %s"), quote (yytext));
412f8a59 235 }
900c5db5 236
e9955c83 237 "=" return EQUAL;
e9071366 238 "|" return PIPE;
e9955c83
AD
239 ";" return SEMICOLON;
240
3f2d73f1 241 {id} {
58d7a1a1 242 val->uniqstr = uniqstr_new (yytext);
3f2d73f1 243 id_loc = *loc;
b9f1d9a4 244 bracketed_id_str = NULL;
3f2d73f1 245 BEGIN SC_AFTER_IDENTIFIER;
e9955c83
AD
246 }
247
d8d3f94a 248 {int} {
1452af69
PE
249 val->integer = scan_integer (yytext, 10, *loc);
250 return INT;
251 }
252 0[xX][0-9abcdefABCDEF]+ {
253 val->integer = scan_integer (yytext, 16, *loc);
d8d3f94a
PE
254 return INT;
255 }
e9955c83 256
84a1cb5a
AD
257 /* Identifiers may not start with a digit. Yet, don't silently
258 accept "1FOO" as "1 FOO". */
259 {int}{id} {
260 complain_at (*loc, _("invalid identifier: %s"), quote (yytext));
261 }
262
e9955c83 263 /* Characters. We don't check there is only one. */
3f2d73f1 264 "'" STRING_GROW; token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
e9955c83
AD
265
266 /* Strings. */
ca407bdf 267 "\"" token_start = loc->start; BEGIN SC_ESCAPED_STRING;
e9955c83
AD
268
269 /* Prologue. */
3f2d73f1 270 "%{" code_start = loc->start; BEGIN SC_PROLOGUE;
e9955c83
AD
271
272 /* Code in between braces. */
3f2d73f1
PE
273 "{" {
274 STRING_GROW;
cb823b6f 275 nesting = 0;
3f2d73f1
PE
276 code_start = loc->start;
277 BEGIN SC_BRACED_CODE;
278 }
e9955c83
AD
279
280 /* A type. */
cb823b6f
AD
281 "<*>" return TAG_ANY;
282 "<>" return TAG_NONE;
d8d3f94a 283 "<"{tag}">" {
223ff46e 284 obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
41141c56 285 STRING_FINISH;
223ff46e 286 val->uniqstr = uniqstr_new (last_string);
41141c56 287 STRING_FREE;
cb823b6f
AD
288 return TAG;
289 }
290 "<" {
291 nesting = 0;
292 token_start = loc->start;
293 BEGIN SC_TAG;
4cdb01db
AD
294 }
295
a706a1cc
PE
296 "%%" {
297 static int percent_percent_count;
e9955c83 298 if (++percent_percent_count == 2)
a2bc9dbc 299 BEGIN SC_EPILOGUE;
e9955c83
AD
300 return PERCENT_PERCENT;
301 }
302
b9f1d9a4
AR
303 "[" {
304 bracketed_id_str = NULL;
305 bracketed_id_start = loc->start;
306 bracketed_id_context_state = YY_START;
307 BEGIN SC_BRACKETED_ID;
308 }
309
a706a1cc 310 . {
41141c56 311 complain_at (*loc, _("invalid character: %s"), quote (yytext));
3f2d73f1 312 }
379f0ac8
PE
313
314 <<EOF>> {
315 loc->start = loc->end = scanner_cursor;
316 yyterminate ();
317 }
3f2d73f1
PE
318}
319
320
cb823b6f
AD
321 /*--------------------------------------------------------------.
322 | Supporting \0 complexifies our implementation for no expected |
323 | added value. |
324 `--------------------------------------------------------------*/
325
326<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING,SC_TAG>
327{
328 \0 complain_at (*loc, _("invalid null character"));
329}
330
331
3f2d73f1
PE
332 /*-----------------------------------------------------------------.
333 | Scanning after an identifier, checking whether a colon is next. |
334 `-----------------------------------------------------------------*/
335
336<SC_AFTER_IDENTIFIER>
337{
b9f1d9a4
AR
338 "[" {
339 if (!bracketed_id_str)
340 {
341 bracketed_id_start = loc->start;
342 bracketed_id_context_state = YY_START;
343 BEGIN SC_BRACKETED_ID;
344 }
345 else
346 {
347 ROLLBACK_CURRENT_TOKEN;
348 BEGIN SC_RETURN_BRACKETED_ID;
349 *loc = id_loc;
350 return ID;
351 }
352 }
3f2d73f1 353 ":" {
b9f1d9a4 354 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
3f2d73f1 355 *loc = id_loc;
3f2d73f1
PE
356 return ID_COLON;
357 }
358 . {
b9f1d9a4
AR
359 ROLLBACK_CURRENT_TOKEN;
360 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
3f2d73f1 361 *loc = id_loc;
3f2d73f1
PE
362 return ID;
363 }
364 <<EOF>> {
b9f1d9a4 365 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
3f2d73f1 366 *loc = id_loc;
3f2d73f1 367 return ID;
e9955c83
AD
368 }
369}
370
b9f1d9a4
AR
371 /*--------------------------------.
372 | Scanning bracketed identifiers. |
373 `--------------------------------*/
374
375<SC_BRACKETED_ID>
376{
377 {id} {
378 if (!bracketed_id_str)
379 {
380 bracketed_id_str = uniqstr_new (yytext);
381 bracketed_id_loc = *loc;
382 }
383 else
384 {
385 complain_at (*loc, _("redundant identifier in bracketed name: %s"),
386 quote (yytext));
387 }
388 }
389 "]" {
390 BEGIN bracketed_id_context_state;
391 if (bracketed_id_str)
392 {
393 if (INITIAL == bracketed_id_context_state)
394 {
395 val->uniqstr = bracketed_id_str;
396 bracketed_id_str = 0;
397 *loc = bracketed_id_loc;
398 return BRACKETED_ID;
399 }
400 }
401 else
402 complain_at (*loc, _("a non empty identifier expected"));
403 }
404 . {
405 complain_at (*loc, _("invalid character in bracketed name: %s"),
406 quote (yytext));
407 }
408 <<EOF>> {
409 BEGIN bracketed_id_context_state;
410 unexpected_eof (bracketed_id_start, "]");
411 }
412}
413
414<SC_RETURN_BRACKETED_ID>
415{
416 . {
417 ROLLBACK_CURRENT_TOKEN;
418 val->uniqstr = bracketed_id_str;
419 bracketed_id_str = 0;
420 *loc = bracketed_id_loc;
421 BEGIN INITIAL;
422 return BRACKETED_ID;
423 }
424}
425
e9955c83 426
d8d3f94a
PE
427 /*---------------------------------------------------------------.
428 | Scanning a Yacc comment. The initial `/ *' is already eaten. |
429 `---------------------------------------------------------------*/
e9955c83 430
d8d3f94a 431<SC_YACC_COMMENT>
e9955c83 432{
3f2d73f1 433 "*/" BEGIN context_state;
a706a1cc 434 .|\n ;
aa418041 435 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
d8d3f94a
PE
436}
437
438
439 /*------------------------------------------------------------.
440 | Scanning a C comment. The initial `/ *' is already eaten. |
441 `------------------------------------------------------------*/
442
443<SC_COMMENT>
444{
3f2d73f1 445 "*"{splice}"/" STRING_GROW; BEGIN context_state;
aa418041 446 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
e9955c83
AD
447}
448
449
d8d3f94a
PE
450 /*--------------------------------------------------------------.
451 | Scanning a line comment. The initial `//' is already eaten. |
452 `--------------------------------------------------------------*/
453
454<SC_LINE_COMMENT>
455{
3f2d73f1 456 "\n" STRING_GROW; BEGIN context_state;
41141c56 457 {splice} STRING_GROW;
3f2d73f1 458 <<EOF>> BEGIN context_state;
d8d3f94a
PE
459}
460
461
4febdd96
PE
462 /*------------------------------------------------.
463 | Scanning a Bison string, including its escapes. |
464 | The initial quote is already eaten. |
465 `------------------------------------------------*/
e9955c83
AD
466
467<SC_ESCAPED_STRING>
468{
47aee066
JD
469 "\""|"\n" {
470 if (yytext[0] == '\n')
471 unexpected_newline (token_start, "\"");
472 STRING_FINISH;
473 loc->start = token_start;
474 val->chars = last_string;
475 BEGIN INITIAL;
476 return STRING;
477 }
478 <<EOF>> {
479 unexpected_eof (token_start, "\"");
41141c56 480 STRING_FINISH;
3f2d73f1 481 loc->start = token_start;
223ff46e 482 val->chars = last_string;
a706a1cc 483 BEGIN INITIAL;
e9955c83
AD
484 return STRING;
485 }
e9955c83
AD
486}
487
4febdd96
PE
488 /*----------------------------------------------------------.
489 | Scanning a Bison character literal, decoding its escapes. |
490 | The initial quote is already eaten. |
491 `----------------------------------------------------------*/
e9955c83
AD
492
493<SC_ESCAPED_CHARACTER>
494{
47aee066
JD
495 "'"|"\n" {
496 if (yytext[0] == '\n')
497 unexpected_newline (token_start, "'");
41141c56
PE
498 STRING_GROW;
499 STRING_FINISH;
3f2d73f1 500 loc->start = token_start;
58d7a1a1 501 val->character = last_string[1];
41141c56 502 STRING_FREE;
a706a1cc 503 BEGIN INITIAL;
58d7a1a1 504 return CHAR;
e9955c83 505 }
47aee066
JD
506 <<EOF>> {
507 unexpected_eof (token_start, "'");
508 STRING_FINISH;
509 loc->start = token_start;
4f646c37 510 if (strlen (last_string) > 1)
47aee066
JD
511 val->character = last_string[1];
512 else
513 val->character = last_string[0];
514 STRING_FREE;
515 BEGIN INITIAL;
516 return CHAR;
517 }
4febdd96 518}
a706a1cc 519
cb823b6f
AD
520 /*-----------------------------------------------------------.
521 | Scanning a Bison nested tag. The initial angle bracket is |
522 | already eaten. |
523 `-----------------------------------------------------------*/
524
525<SC_TAG>
4febdd96 526{
cb823b6f
AD
527 ">" {
528 --nesting;
529 if (nesting < 0)
530 {
531 STRING_FINISH;
532 loc->start = token_start;
533 val->uniqstr = uniqstr_new (last_string);
534 STRING_FREE;
535 BEGIN INITIAL;
536 return TAG;
537 }
538 STRING_GROW;
539 }
540
541 [^<>]+ STRING_GROW;
542 "<"+ STRING_GROW; nesting += yyleng;
e9955c83 543
cb823b6f
AD
544 <<EOF>> {
545 unexpected_eof (token_start, ">");
546 STRING_FINISH;
547 loc->start = token_start;
548 val->uniqstr = uniqstr_new (last_string);
549 STRING_FREE;
550 BEGIN INITIAL;
551 return TAG;
552 }
553}
e9955c83
AD
554
555 /*----------------------------.
556 | Decode escaped characters. |
557 `----------------------------*/
558
559<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
560{
d8d3f94a 561 \\[0-7]{1,3} {
4517da37 562 unsigned long int c = strtoul (yytext + 1, NULL, 8);
d8d3f94a 563 if (UCHAR_MAX < c)
3f2d73f1 564 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
05ac60f3 565 else if (! c)
92ac3705 566 complain_at (*loc, _("invalid null character: %s"), quote (yytext));
e9955c83 567 else
223ff46e 568 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
569 }
570
6b0d38ab 571 \\x[0-9abcdefABCDEF]+ {
4517da37
PE
572 verify (UCHAR_MAX < ULONG_MAX);
573 unsigned long int c = strtoul (yytext + 2, NULL, 16);
574 if (UCHAR_MAX < c)
3f2d73f1 575 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
92ac3705
PE
576 else if (! c)
577 complain_at (*loc, _("invalid null character: %s"), quote (yytext));
d8d3f94a 578 else
223ff46e 579 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
580 }
581
223ff46e
PE
582 \\a obstack_1grow (&obstack_for_string, '\a');
583 \\b obstack_1grow (&obstack_for_string, '\b');
584 \\f obstack_1grow (&obstack_for_string, '\f');
585 \\n obstack_1grow (&obstack_for_string, '\n');
586 \\r obstack_1grow (&obstack_for_string, '\r');
587 \\t obstack_1grow (&obstack_for_string, '\t');
588 \\v obstack_1grow (&obstack_for_string, '\v');
412f8a59
PE
589
590 /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
223ff46e 591 \\("\""|"'"|"?"|"\\") obstack_1grow (&obstack_for_string, yytext[1]);
412f8a59 592
6b0d38ab 593 \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
d8d3f94a
PE
594 int c = convert_ucn_to_byte (yytext);
595 if (c < 0)
3f2d73f1 596 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
92ac3705
PE
597 else if (! c)
598 complain_at (*loc, _("invalid null character: %s"), quote (yytext));
d8d3f94a 599 else
223ff46e 600 obstack_1grow (&obstack_for_string, c);
d8d3f94a 601 }
4f25ebb0 602 \\(.|\n) {
3f2d73f1 603 complain_at (*loc, _("unrecognized escape sequence: %s"), quote (yytext));
41141c56 604 STRING_GROW;
e9955c83
AD
605 }
606}
607
4febdd96
PE
608 /*--------------------------------------------.
609 | Scanning user-code characters and strings. |
610 `--------------------------------------------*/
e9955c83 611
4febdd96
PE
612<SC_CHARACTER,SC_STRING>
613{
e9071366 614 {splice}|\\{splice}[^\n\[\]] STRING_GROW;
4febdd96 615}
e9955c83
AD
616
617<SC_CHARACTER>
618{
4febdd96
PE
619 "'" STRING_GROW; BEGIN context_state;
620 \n unexpected_newline (token_start, "'"); BEGIN context_state;
621 <<EOF>> unexpected_eof (token_start, "'"); BEGIN context_state;
e9955c83
AD
622}
623
e9955c83
AD
624<SC_STRING>
625{
4febdd96
PE
626 "\"" STRING_GROW; BEGIN context_state;
627 \n unexpected_newline (token_start, "\""); BEGIN context_state;
628 <<EOF>> unexpected_eof (token_start, "\""); BEGIN context_state;
e9955c83
AD
629}
630
631
632 /*---------------------------------------------------.
633 | Strings, comments etc. can be found in user code. |
634 `---------------------------------------------------*/
635
636<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
637{
3f2d73f1
PE
638 "'" {
639 STRING_GROW;
640 context_state = YY_START;
641 token_start = loc->start;
642 BEGIN SC_CHARACTER;
643 }
644 "\"" {
645 STRING_GROW;
646 context_state = YY_START;
647 token_start = loc->start;
648 BEGIN SC_STRING;
649 }
650 "/"{splice}"*" {
651 STRING_GROW;
652 context_state = YY_START;
653 token_start = loc->start;
654 BEGIN SC_COMMENT;
655 }
656 "/"{splice}"/" {
657 STRING_GROW;
658 context_state = YY_START;
659 BEGIN SC_LINE_COMMENT;
660 }
e9955c83
AD
661}
662
663
624a35e2 664
58d7a1a1
AD
665 /*-----------------------------------------------------------.
666 | Scanning some code in braces (actions). The initial "{" is |
667 | already eaten. |
668 `-----------------------------------------------------------*/
e9955c83
AD
669
670<SC_BRACED_CODE>
671{
cb823b6f
AD
672 "{"|"<"{splice}"%" STRING_GROW; nesting++;
673 "%"{splice}">" STRING_GROW; nesting--;
e9955c83 674 "}" {
25522739
PE
675 obstack_1grow (&obstack_for_string, '}');
676
cb823b6f
AD
677 --nesting;
678 if (nesting < 0)
e9955c83 679 {
41141c56 680 STRING_FINISH;
3f2d73f1 681 loc->start = code_start;
eb095650 682 val->code = last_string;
a706a1cc 683 BEGIN INITIAL;
58d7a1a1 684 return BRACED_CODE;
e9955c83
AD
685 }
686 }
687
a706a1cc
PE
688 /* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly
689 (as `<' `<%'). */
41141c56 690 "<"{splice}"<" STRING_GROW;
a706a1cc 691
47aee066
JD
692 <<EOF>> {
693 unexpected_eof (code_start, "}");
694 STRING_FINISH;
695 loc->start = code_start;
eb095650 696 val->code = last_string;
47aee066
JD
697 BEGIN INITIAL;
698 return BRACED_CODE;
699 }
e9955c83
AD
700}
701
702
703 /*--------------------------------------------------------------.
704 | Scanning some prologue: from "%{" (already scanned) to "%}". |
705 `--------------------------------------------------------------*/
706
707<SC_PROLOGUE>
708{
709 "%}" {
41141c56 710 STRING_FINISH;
3f2d73f1 711 loc->start = code_start;
223ff46e 712 val->chars = last_string;
a706a1cc 713 BEGIN INITIAL;
e9955c83
AD
714 return PROLOGUE;
715 }
716
47aee066
JD
717 <<EOF>> {
718 unexpected_eof (code_start, "%}");
719 STRING_FINISH;
720 loc->start = code_start;
721 val->chars = last_string;
722 BEGIN INITIAL;
723 return PROLOGUE;
724 }
e9955c83
AD
725}
726
727
728 /*---------------------------------------------------------------.
729 | Scanning the epilogue (everything after the second "%%", which |
d8d3f94a 730 | has already been eaten). |
e9955c83
AD
731 `---------------------------------------------------------------*/
732
733<SC_EPILOGUE>
734{
e9955c83 735 <<EOF>> {
41141c56 736 STRING_FINISH;
3f2d73f1 737 loc->start = code_start;
223ff46e 738 val->chars = last_string;
a706a1cc 739 BEGIN INITIAL;
e9955c83
AD
740 return EPILOGUE;
741 }
742}
743
744
4febdd96
PE
745 /*-----------------------------------------------------.
746 | By default, grow the string obstack with the input. |
747 `-----------------------------------------------------*/
748
749<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>. |
750<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>\n STRING_GROW;
751
e9955c83
AD
752%%
753
6c30d641
PE
754/* Read bytes from FP into buffer BUF of size SIZE. Return the
755 number of bytes read. Remove '\r' from input, treating \r\n
756 and isolated \r as \n. */
757
758static size_t
759no_cr_read (FILE *fp, char *buf, size_t size)
760{
a737b216
PE
761 size_t bytes_read = fread (buf, 1, size, fp);
762 if (bytes_read)
6c30d641 763 {
a737b216 764 char *w = memchr (buf, '\r', bytes_read);
6c30d641
PE
765 if (w)
766 {
767 char const *r = ++w;
a737b216 768 char const *lim = buf + bytes_read;
6c30d641
PE
769
770 for (;;)
771 {
772 /* Found an '\r'. Treat it like '\n', but ignore any
773 '\n' that immediately follows. */
774 w[-1] = '\n';
775 if (r == lim)
776 {
777 int ch = getc (fp);
778 if (ch != '\n' && ungetc (ch, fp) != ch)
779 break;
780 }
781 else if (*r == '\n')
782 r++;
783
784 /* Copy until the next '\r'. */
785 do
786 {
787 if (r == lim)
788 return w - buf;
789 }
790 while ((*w++ = *r++) != '\r');
791 }
792
793 return w - buf;
794 }
795 }
796
a737b216 797 return bytes_read;
6c30d641
PE
798}
799
800
f25bfb75 801
1452af69
PE
802/*------------------------------------------------------.
803| Scan NUMBER for a base-BASE integer at location LOC. |
804`------------------------------------------------------*/
805
806static unsigned long int
807scan_integer (char const *number, int base, location loc)
808{
4517da37
PE
809 verify (INT_MAX < ULONG_MAX);
810 unsigned long int num = strtoul (number, NULL, base);
811
812 if (INT_MAX < num)
1452af69
PE
813 {
814 complain_at (loc, _("integer out of range: %s"), quote (number));
815 num = INT_MAX;
816 }
4517da37 817
1452af69
PE
818 return num;
819}
820
821
d8d3f94a
PE
822/*------------------------------------------------------------------.
823| Convert universal character name UCN to a single-byte character, |
824| and return that character. Return -1 if UCN does not correspond |
825| to a single-byte character. |
826`------------------------------------------------------------------*/
827
828static int
829convert_ucn_to_byte (char const *ucn)
830{
4517da37
PE
831 verify (UCHAR_MAX <= INT_MAX);
832 unsigned long int code = strtoul (ucn + 2, NULL, 16);
d8d3f94a
PE
833
834 /* FIXME: Currently we assume Unicode-compatible unibyte characters
835 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
836 non-ASCII hosts we support only the portable C character set.
837 These limitations should be removed once we add support for
838 multibyte characters. */
839
840 if (UCHAR_MAX < code)
841 return -1;
842
843#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
844 {
845 /* A non-ASCII host. Use CODE to index into a table of the C
846 basic execution character set, which is guaranteed to exist on
847 all Standard C platforms. This table also includes '$', '@',
8e6ef483 848 and '`', which are not in the basic execution character set but
d8d3f94a
PE
849 which are unibyte characters on all the platforms that we know
850 about. */
851 static signed char const table[] =
852 {
853 '\0', -1, -1, -1, -1, -1, -1, '\a',
854 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
855 -1, -1, -1, -1, -1, -1, -1, -1,
856 -1, -1, -1, -1, -1, -1, -1, -1,
857 ' ', '!', '"', '#', '$', '%', '&', '\'',
858 '(', ')', '*', '+', ',', '-', '.', '/',
859 '0', '1', '2', '3', '4', '5', '6', '7',
860 '8', '9', ':', ';', '<', '=', '>', '?',
861 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
862 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
863 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
864 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
865 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
866 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
867 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
868 'x', 'y', 'z', '{', '|', '}', '~'
869 };
870
871 code = code < sizeof table ? table[code] : -1;
872 }
873#endif
c4d720cd 874
d8d3f94a
PE
875 return code;
876}
877
878
900c5db5
AD
879/*----------------------------------------------------------------.
880| Handle `#line INT "FILE"'. ARGS has already skipped `#line '. |
881`----------------------------------------------------------------*/
882
883static void
4517da37 884handle_syncline (char *args, location loc)
900c5db5 885{
4517da37
PE
886 char *after_num;
887 unsigned long int lineno = strtoul (args, &after_num, 10);
888 char *file = strchr (after_num, '"') + 1;
889 *strchr (file, '"') = '\0';
890 if (INT_MAX <= lineno)
891 {
892 warn_at (loc, _("line number overflow"));
893 lineno = INT_MAX;
894 }
e9071366 895 current_file = uniqstr_new (file);
0c8e079f 896 boundary_set (&scanner_cursor, current_file, lineno, 1);
4517da37
PE
897}
898
899
4febdd96
PE
900/*----------------------------------------------------------------.
901| For a token or comment starting at START, report message MSGID, |
902| which should say that an end marker was found before |
903| the expected TOKEN_END. |
904`----------------------------------------------------------------*/
905
906static void
907unexpected_end (boundary start, char const *msgid, char const *token_end)
908{
909 location loc;
910 loc.start = start;
911 loc.end = scanner_cursor;
912 complain_at (loc, _(msgid), token_end);
913}
914
915
3f2d73f1
PE
916/*------------------------------------------------------------------------.
917| Report an unexpected EOF in a token or comment starting at START. |
918| An end of file was encountered and the expected TOKEN_END was missing. |
3f2d73f1 919`------------------------------------------------------------------------*/
a706a1cc
PE
920
921static void
aa418041 922unexpected_eof (boundary start, char const *token_end)
a706a1cc 923{
4febdd96
PE
924 unexpected_end (start, N_("missing `%s' at end of file"), token_end);
925}
926
927
928/*----------------------------------------.
929| Likewise, but for unexpected newlines. |
930`----------------------------------------*/
931
932static void
933unexpected_newline (boundary start, char const *token_end)
934{
935 unexpected_end (start, N_("missing `%s' at end of line"), token_end);
a706a1cc
PE
936}
937
938
f25bfb75
AD
939/*-------------------------.
940| Initialize the scanner. |
941`-------------------------*/
942
1d6412ad 943void
e9071366 944gram_scanner_initialize (void)
1d6412ad 945{
223ff46e 946 obstack_init (&obstack_for_string);
1d6412ad
AD
947}
948
949
f25bfb75
AD
950/*-----------------------------------------------.
951| Free all the memory allocated to the scanner. |
952`-----------------------------------------------*/
953
4cdb01db 954void
e9071366 955gram_scanner_free (void)
4cdb01db 956{
223ff46e 957 obstack_free (&obstack_for_string, 0);
536545f3 958 /* Reclaim Flex's buffers. */
580b8926 959 yylex_destroy ();
4cdb01db 960}