]> git.saurik.com Git - bison.git/blame - src/scan-gram.l
Fix %error-verbose for conflicts resolved by %nonassoc.
[bison.git] / src / scan-gram.l
CommitLineData
e9955c83 1/* Bison Grammar Scanner -*- C -*-
3b1e470c 2
ba061fa6
AD
3 Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
4 Free Software Foundation, Inc.
e9955c83
AD
5
6 This file is part of Bison, the GNU Compiler Compiler.
7
f16b0819 8 This program is free software: you can redistribute it and/or modify
e9955c83 9 it under the terms of the GNU General Public License as published by
f16b0819 10 the Free Software Foundation, either version 3 of the License, or
e9955c83
AD
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
f16b0819 19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
e9955c83 20
8d90395d 21%option debug nodefault noinput nounput noyywrap never-interactive
e9955c83
AD
22%option prefix="gram_" outfile="lex.yy.c"
23
24%{
4f6e011e
PE
25/* Work around a bug in flex 2.5.31. See Debian bug 333231
26 <http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=333231>. */
27#undef gram_wrap
28#define gram_wrap() 1
29
e9071366 30#define FLEX_PREFIX(Id) gram_ ## Id
0305d25e 31#include <src/flex-scanner.h>
223ff46e 32
0305d25e
AD
33#include <src/complain.h>
34#include <src/files.h>
35#include <src/gram.h>
36#include <quotearg.h>
37#include <src/reader.h>
38#include <src/uniqstr.h>
e9955c83 39
c2724603 40#include <ctype.h>
e9071366
AD
41#include <mbswidth.h>
42#include <quote.h>
43
0305d25e 44#include <src/scan-gram.h>
e9071366
AD
45
46#define YY_DECL GRAM_LEX_DECL
2346344a 47
3f2d73f1 48#define YY_USER_INIT \
e9071366 49 code_start = scanner_cursor = loc->start; \
dc9701e8 50
3f2d73f1 51/* Location of scanner cursor. */
4a678af8 52static boundary scanner_cursor;
41141c56 53
e9071366 54#define YY_USER_ACTION location_compute (loc, &scanner_cursor, yytext, yyleng);
d8d3f94a 55
6c30d641 56static size_t no_cr_read (FILE *, char *, size_t);
d8d3f94a
PE
57#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
58
ba061fa6
AD
59#define RETURN_PERCENT_FLAG(Value) \
60 do { \
61 val->uniqstr = uniqstr_new (Value); \
62 return PERCENT_FLAG; \
63 } while (0)
64
b9f1d9a4
AR
65#define ROLLBACK_CURRENT_TOKEN \
66 do { \
67 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0); \
68 yyless (0); \
69 } while (0)
ba061fa6 70
7ec2d4cd 71/* A string representing the most recently saved token. */
7c0c6181 72static char *last_string;
7ec2d4cd 73
872b52bc 74/* Bracketed identifier. */
b9f1d9a4
AR
75static uniqstr bracketed_id_str = 0;
76static location bracketed_id_loc;
77static boundary bracketed_id_start;
78static int bracketed_id_context_state = 0;
79
7ec2d4cd 80void
e9071366 81gram_scanner_last_string_free (void)
7ec2d4cd 82{
41141c56 83 STRING_FREE;
7ec2d4cd 84}
e9955c83 85
4517da37 86static void handle_syncline (char *, location);
1452af69 87static unsigned long int scan_integer (char const *p, int base, location loc);
d8d3f94a 88static int convert_ucn_to_byte (char const *hex_text);
aa418041 89static void unexpected_eof (boundary, char const *);
4febdd96 90static void unexpected_newline (boundary, char const *);
e9955c83
AD
91
92%}
e9071366
AD
93 /* A C-like comment in directives/rules. */
94%x SC_YACC_COMMENT
95 /* Strings and characters in directives/rules. */
e9955c83 96%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
e9071366
AD
97 /* A identifier was just read in directives/rules. Special state
98 to capture the sequence `identifier :'. */
99%x SC_AFTER_IDENTIFIER
cb823b6f
AD
100 /* A complex tag, with nested angles brackets. */
101%x SC_TAG
e9071366
AD
102
103 /* Three types of user code:
104 - prologue (code between `%{' `%}' in the first section, before %%);
105 - actions, printers, union, etc, (between braced in the middle section);
106 - epilogue (everything after the second %%). */
107%x SC_PROLOGUE SC_BRACED_CODE SC_EPILOGUE
108 /* C and C++ comments in code. */
109%x SC_COMMENT SC_LINE_COMMENT
110 /* Strings and characters in code. */
111%x SC_STRING SC_CHARACTER
872b52bc 112 /* Bracketed identifiers support. */
b9f1d9a4 113%x SC_BRACKETED_ID SC_RETURN_BRACKETED_ID
e9955c83 114
cdf3f113
AD
115letter [-.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
116id {letter}({letter}|[0-9])*
4f646c37 117directive %{id}
624a35e2 118int [0-9]+
d8d3f94a
PE
119
120/* POSIX says that a tag must be both an id and a C union member, but
121 historically almost any character is allowed in a tag. We disallow
cb823b6f
AD
122 NUL, as this simplifies our implementation. We disallow angle
123 bracket to match them in nested pairs: several languages use them
124 for generics/template types. */
125tag [^\0<>]+
d8d3f94a
PE
126
127/* Zero or more instances of backslash-newline. Following GCC, allow
128 white space between the backslash and the newline. */
129splice (\\[ \f\t\v]*\n)*
e9955c83
AD
130
131%%
132%{
cb823b6f
AD
133 /* Nesting level. Either for nested braces, or nested angle brackets
134 (but not mixed). */
135 int nesting IF_LINT (= 0);
1a9e39f1 136
3f2d73f1 137 /* Parent context state, when applicable. */
5362ed19 138 int context_state IF_LINT (= 0);
a706a1cc 139
3f2d73f1 140 /* Location of most recent identifier, when applicable. */
a2bc9dbc 141 location id_loc IF_LINT (= empty_location);
3f2d73f1 142
a2bc9dbc
PE
143 /* Where containing code started, when applicable. Its initial
144 value is relevant only when yylex is invoked in the SC_EPILOGUE
145 start condition. */
146 boundary code_start = scanner_cursor;
3f2d73f1 147
223ff46e
PE
148 /* Where containing comment or string or character literal started,
149 when applicable. */
a2bc9dbc 150 boundary token_start IF_LINT (= scanner_cursor);
e9955c83
AD
151%}
152
153
3f2d73f1
PE
154 /*-----------------------.
155 | Scanning white space. |
156 `-----------------------*/
157
b9f1d9a4 158<INITIAL,SC_AFTER_IDENTIFIER,SC_BRACKETED_ID,SC_RETURN_BRACKETED_ID>
3f2d73f1 159{
4febdd96 160 /* Comments and white space. */
83adb046 161 "," warn_at (*loc, _("stray `,' treated as white space"));
4febdd96 162 [ \f\n\t\v] |
3f2d73f1 163 "//".* ;
83adb046
PE
164 "/*" {
165 token_start = loc->start;
166 context_state = YY_START;
167 BEGIN SC_YACC_COMMENT;
168 }
3f2d73f1
PE
169
170 /* #line directives are not documented, and may be withdrawn or
171 modified in future versions of Bison. */
172 ^"#line "{int}" \"".*"\"\n" {
4517da37 173 handle_syncline (yytext + sizeof "#line " - 1, *loc);
3f2d73f1
PE
174 }
175}
176
177
e9955c83
AD
178 /*----------------------------.
179 | Scanning Bison directives. |
180 `----------------------------*/
a7c09cba
DJ
181
182 /* For directives that are also command line options, the regex must be
183 "%..."
184 after "[-_]"s are removed, and the directive must match the --long
185 option name, with a single string argument. Otherwise, add exceptions
186 to ../build-aux/cross-options.pl. */
187
e9955c83
AD
188<INITIAL>
189{
deef2a0a 190 "%binary" return PERCENT_NONASSOC;
136a0f76 191 "%code" return PERCENT_CODE;
fa819509 192 "%debug" RETURN_PERCENT_FLAG("parse.trace");
deef2a0a
AD
193 "%default"[-_]"prec" return PERCENT_DEFAULT_PREC;
194 "%define" return PERCENT_DEFINE;
195 "%defines" return PERCENT_DEFINES;
196 "%destructor" return PERCENT_DESTRUCTOR;
197 "%dprec" return PERCENT_DPREC;
31b850d2 198 "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
deef2a0a
AD
199 "%expect" return PERCENT_EXPECT;
200 "%expect"[-_]"rr" return PERCENT_EXPECT_RR;
201 "%file-prefix" return PERCENT_FILE_PREFIX;
e9955c83 202 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
deef2a0a
AD
203 "%initial-action" return PERCENT_INITIAL_ACTION;
204 "%glr-parser" return PERCENT_GLR_PARSER;
205 "%language" return PERCENT_LANGUAGE;
206 "%left" return PERCENT_LEFT;
207 "%lex-param" return PERCENT_LEX_PARAM;
bc0f5737 208 "%locations" RETURN_PERCENT_FLAG("locations");
deef2a0a
AD
209 "%merge" return PERCENT_MERGE;
210 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
211 "%no"[-_]"default"[-_]"prec" return PERCENT_NO_DEFAULT_PREC;
212 "%no"[-_]"lines" return PERCENT_NO_LINES;
213 "%nonassoc" return PERCENT_NONASSOC;
214 "%nondeterministic-parser" return PERCENT_NONDETERMINISTIC_PARSER;
215 "%nterm" return PERCENT_NTERM;
216 "%output" return PERCENT_OUTPUT;
217 "%parse-param" return PERCENT_PARSE_PARAM;
218 "%prec" return PERCENT_PREC;
d78f0ac9 219 "%precedence" return PERCENT_PRECEDENCE;
deef2a0a 220 "%printer" return PERCENT_PRINTER;
4920ae8b 221 "%pure"[-_]"parser" RETURN_PERCENT_FLAG("api.pure");
deef2a0a
AD
222 "%require" return PERCENT_REQUIRE;
223 "%right" return PERCENT_RIGHT;
224 "%skeleton" return PERCENT_SKELETON;
225 "%start" return PERCENT_START;
226 "%term" return PERCENT_TOKEN;
227 "%token" return PERCENT_TOKEN;
228 "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
229 "%type" return PERCENT_TYPE;
230 "%union" return PERCENT_UNION;
231 "%verbose" return PERCENT_VERBOSE;
232 "%yacc" return PERCENT_YACC;
e9955c83 233
3f2d73f1 234 {directive} {
41141c56 235 complain_at (*loc, _("invalid directive: %s"), quote (yytext));
412f8a59 236 }
900c5db5 237
e9955c83 238 "=" return EQUAL;
e9071366 239 "|" return PIPE;
e9955c83
AD
240 ";" return SEMICOLON;
241
3f2d73f1 242 {id} {
58d7a1a1 243 val->uniqstr = uniqstr_new (yytext);
3f2d73f1 244 id_loc = *loc;
b9f1d9a4 245 bracketed_id_str = NULL;
3f2d73f1 246 BEGIN SC_AFTER_IDENTIFIER;
e9955c83
AD
247 }
248
d8d3f94a 249 {int} {
1452af69
PE
250 val->integer = scan_integer (yytext, 10, *loc);
251 return INT;
252 }
253 0[xX][0-9abcdefABCDEF]+ {
254 val->integer = scan_integer (yytext, 16, *loc);
d8d3f94a
PE
255 return INT;
256 }
e9955c83 257
84a1cb5a
AD
258 /* Identifiers may not start with a digit. Yet, don't silently
259 accept "1FOO" as "1 FOO". */
260 {int}{id} {
261 complain_at (*loc, _("invalid identifier: %s"), quote (yytext));
262 }
263
3208e3f4 264 /* Characters. */
dfaa4860 265 "'" token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
e9955c83
AD
266
267 /* Strings. */
ca407bdf 268 "\"" token_start = loc->start; BEGIN SC_ESCAPED_STRING;
e9955c83
AD
269
270 /* Prologue. */
3f2d73f1 271 "%{" code_start = loc->start; BEGIN SC_PROLOGUE;
e9955c83
AD
272
273 /* Code in between braces. */
3f2d73f1
PE
274 "{" {
275 STRING_GROW;
cb823b6f 276 nesting = 0;
3f2d73f1
PE
277 code_start = loc->start;
278 BEGIN SC_BRACED_CODE;
279 }
e9955c83
AD
280
281 /* A type. */
cb823b6f
AD
282 "<*>" return TAG_ANY;
283 "<>" return TAG_NONE;
d8d3f94a 284 "<"{tag}">" {
223ff46e 285 obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
41141c56 286 STRING_FINISH;
223ff46e 287 val->uniqstr = uniqstr_new (last_string);
41141c56 288 STRING_FREE;
cb823b6f
AD
289 return TAG;
290 }
291 "<" {
292 nesting = 0;
293 token_start = loc->start;
294 BEGIN SC_TAG;
4cdb01db
AD
295 }
296
a706a1cc
PE
297 "%%" {
298 static int percent_percent_count;
e9955c83 299 if (++percent_percent_count == 2)
a2bc9dbc 300 BEGIN SC_EPILOGUE;
e9955c83
AD
301 return PERCENT_PERCENT;
302 }
303
b9f1d9a4
AR
304 "[" {
305 bracketed_id_str = NULL;
306 bracketed_id_start = loc->start;
307 bracketed_id_context_state = YY_START;
308 BEGIN SC_BRACKETED_ID;
309 }
310
a706a1cc 311 . {
41141c56 312 complain_at (*loc, _("invalid character: %s"), quote (yytext));
3f2d73f1 313 }
379f0ac8
PE
314
315 <<EOF>> {
316 loc->start = loc->end = scanner_cursor;
317 yyterminate ();
318 }
3f2d73f1
PE
319}
320
321
cb823b6f
AD
322 /*--------------------------------------------------------------.
323 | Supporting \0 complexifies our implementation for no expected |
324 | added value. |
325 `--------------------------------------------------------------*/
326
327<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING,SC_TAG>
328{
329 \0 complain_at (*loc, _("invalid null character"));
330}
331
332
3f2d73f1
PE
333 /*-----------------------------------------------------------------.
334 | Scanning after an identifier, checking whether a colon is next. |
335 `-----------------------------------------------------------------*/
336
337<SC_AFTER_IDENTIFIER>
338{
b9f1d9a4 339 "[" {
872b52bc 340 if (bracketed_id_str)
b9f1d9a4
AR
341 {
342 ROLLBACK_CURRENT_TOKEN;
343 BEGIN SC_RETURN_BRACKETED_ID;
344 *loc = id_loc;
345 return ID;
346 }
872b52bc
AR
347 else
348 {
349 bracketed_id_start = loc->start;
350 bracketed_id_context_state = YY_START;
351 BEGIN SC_BRACKETED_ID;
352 }
b9f1d9a4 353 }
3f2d73f1 354 ":" {
b9f1d9a4 355 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
3f2d73f1 356 *loc = id_loc;
3f2d73f1
PE
357 return ID_COLON;
358 }
359 . {
b9f1d9a4
AR
360 ROLLBACK_CURRENT_TOKEN;
361 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
3f2d73f1 362 *loc = id_loc;
3f2d73f1
PE
363 return ID;
364 }
365 <<EOF>> {
b9f1d9a4 366 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
3f2d73f1 367 *loc = id_loc;
3f2d73f1 368 return ID;
e9955c83
AD
369 }
370}
371
b9f1d9a4
AR
372 /*--------------------------------.
373 | Scanning bracketed identifiers. |
374 `--------------------------------*/
375
376<SC_BRACKETED_ID>
377{
378 {id} {
872b52bc 379 if (bracketed_id_str)
b9f1d9a4 380 {
872b52bc
AR
381 complain_at (*loc, _("unexpected identifier in bracketed name: %s"),
382 quote (yytext));
b9f1d9a4
AR
383 }
384 else
385 {
872b52bc
AR
386 bracketed_id_str = uniqstr_new (yytext);
387 bracketed_id_loc = *loc;
b9f1d9a4
AR
388 }
389 }
390 "]" {
391 BEGIN bracketed_id_context_state;
392 if (bracketed_id_str)
393 {
394 if (INITIAL == bracketed_id_context_state)
395 {
396 val->uniqstr = bracketed_id_str;
397 bracketed_id_str = 0;
398 *loc = bracketed_id_loc;
399 return BRACKETED_ID;
400 }
401 }
402 else
872b52bc 403 complain_at (*loc, _("an identifier expected"));
b9f1d9a4
AR
404 }
405 . {
406 complain_at (*loc, _("invalid character in bracketed name: %s"),
407 quote (yytext));
408 }
409 <<EOF>> {
410 BEGIN bracketed_id_context_state;
411 unexpected_eof (bracketed_id_start, "]");
412 }
413}
414
415<SC_RETURN_BRACKETED_ID>
416{
417 . {
418 ROLLBACK_CURRENT_TOKEN;
419 val->uniqstr = bracketed_id_str;
420 bracketed_id_str = 0;
421 *loc = bracketed_id_loc;
422 BEGIN INITIAL;
423 return BRACKETED_ID;
424 }
425}
426
e9955c83 427
d8d3f94a
PE
428 /*---------------------------------------------------------------.
429 | Scanning a Yacc comment. The initial `/ *' is already eaten. |
430 `---------------------------------------------------------------*/
e9955c83 431
d8d3f94a 432<SC_YACC_COMMENT>
e9955c83 433{
3f2d73f1 434 "*/" BEGIN context_state;
a706a1cc 435 .|\n ;
aa418041 436 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
d8d3f94a
PE
437}
438
439
440 /*------------------------------------------------------------.
441 | Scanning a C comment. The initial `/ *' is already eaten. |
442 `------------------------------------------------------------*/
443
444<SC_COMMENT>
445{
3f2d73f1 446 "*"{splice}"/" STRING_GROW; BEGIN context_state;
aa418041 447 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
e9955c83
AD
448}
449
450
d8d3f94a
PE
451 /*--------------------------------------------------------------.
452 | Scanning a line comment. The initial `//' is already eaten. |
453 `--------------------------------------------------------------*/
454
455<SC_LINE_COMMENT>
456{
3f2d73f1 457 "\n" STRING_GROW; BEGIN context_state;
41141c56 458 {splice} STRING_GROW;
3f2d73f1 459 <<EOF>> BEGIN context_state;
d8d3f94a
PE
460}
461
462
4febdd96
PE
463 /*------------------------------------------------.
464 | Scanning a Bison string, including its escapes. |
465 | The initial quote is already eaten. |
466 `------------------------------------------------*/
e9955c83
AD
467
468<SC_ESCAPED_STRING>
469{
47aee066
JD
470 "\""|"\n" {
471 if (yytext[0] == '\n')
472 unexpected_newline (token_start, "\"");
473 STRING_FINISH;
474 loc->start = token_start;
475 val->chars = last_string;
476 BEGIN INITIAL;
477 return STRING;
478 }
479 <<EOF>> {
480 unexpected_eof (token_start, "\"");
41141c56 481 STRING_FINISH;
3f2d73f1 482 loc->start = token_start;
223ff46e 483 val->chars = last_string;
a706a1cc 484 BEGIN INITIAL;
e9955c83
AD
485 return STRING;
486 }
e9955c83
AD
487}
488
4febdd96
PE
489 /*----------------------------------------------------------.
490 | Scanning a Bison character literal, decoding its escapes. |
491 | The initial quote is already eaten. |
492 `----------------------------------------------------------*/
e9955c83
AD
493
494<SC_ESCAPED_CHARACTER>
495{
47aee066 496 "'"|"\n" {
41141c56 497 STRING_FINISH;
3f2d73f1 498 loc->start = token_start;
dfaa4860 499 val->character = last_string[0];
3208e3f4
JD
500 {
501 /* FIXME: Eventually, make these errors. */
dfaa4860
JD
502 if (last_string[0] == '\0')
503 {
504 warn_at (*loc, _("empty character literal"));
505 /* '\0' seems dangerous even if we are about to complain. */
506 val->character = '\'';
507 }
508 else if (last_string[1] != '\0')
3208e3f4
JD
509 warn_at (*loc, _("extra characters in character literal"));
510 }
511 if (yytext[0] == '\n')
512 unexpected_newline (token_start, "'");
41141c56 513 STRING_FREE;
a706a1cc 514 BEGIN INITIAL;
58d7a1a1 515 return CHAR;
e9955c83 516 }
47aee066 517 <<EOF>> {
47aee066
JD
518 STRING_FINISH;
519 loc->start = token_start;
dfaa4860 520 val->character = last_string[0];
3208e3f4 521 {
3208e3f4 522 /* FIXME: Eventually, make these errors. */
dfaa4860
JD
523 if (last_string[0] == '\0')
524 {
525 warn_at (*loc, _("empty character literal"));
526 /* '\0' seems dangerous even if we are about to complain. */
527 val->character = '\'';
528 }
529 else if (last_string[1] != '\0')
3208e3f4 530 warn_at (*loc, _("extra characters in character literal"));
3208e3f4
JD
531 }
532 unexpected_eof (token_start, "'");
47aee066
JD
533 STRING_FREE;
534 BEGIN INITIAL;
535 return CHAR;
536 }
4febdd96 537}
a706a1cc 538
cb823b6f
AD
539 /*-----------------------------------------------------------.
540 | Scanning a Bison nested tag. The initial angle bracket is |
541 | already eaten. |
542 `-----------------------------------------------------------*/
543
544<SC_TAG>
4febdd96 545{
cb823b6f
AD
546 ">" {
547 --nesting;
548 if (nesting < 0)
549 {
550 STRING_FINISH;
551 loc->start = token_start;
552 val->uniqstr = uniqstr_new (last_string);
553 STRING_FREE;
554 BEGIN INITIAL;
555 return TAG;
556 }
557 STRING_GROW;
558 }
559
560 [^<>]+ STRING_GROW;
561 "<"+ STRING_GROW; nesting += yyleng;
e9955c83 562
cb823b6f
AD
563 <<EOF>> {
564 unexpected_eof (token_start, ">");
565 STRING_FINISH;
566 loc->start = token_start;
567 val->uniqstr = uniqstr_new (last_string);
568 STRING_FREE;
569 BEGIN INITIAL;
570 return TAG;
571 }
572}
e9955c83
AD
573
574 /*----------------------------.
575 | Decode escaped characters. |
576 `----------------------------*/
577
578<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
579{
d8d3f94a 580 \\[0-7]{1,3} {
4517da37 581 unsigned long int c = strtoul (yytext + 1, NULL, 8);
c2724603
JD
582 if (!c || UCHAR_MAX < c)
583 complain_at (*loc, _("invalid number after \\-escape: %s"),
584 yytext+1);
e9955c83 585 else
223ff46e 586 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
587 }
588
6b0d38ab 589 \\x[0-9abcdefABCDEF]+ {
4517da37
PE
590 verify (UCHAR_MAX < ULONG_MAX);
591 unsigned long int c = strtoul (yytext + 2, NULL, 16);
c2724603
JD
592 if (!c || UCHAR_MAX < c)
593 complain_at (*loc, _("invalid number after \\-escape: %s"),
594 yytext+1);
d8d3f94a 595 else
223ff46e 596 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
597 }
598
223ff46e
PE
599 \\a obstack_1grow (&obstack_for_string, '\a');
600 \\b obstack_1grow (&obstack_for_string, '\b');
601 \\f obstack_1grow (&obstack_for_string, '\f');
602 \\n obstack_1grow (&obstack_for_string, '\n');
603 \\r obstack_1grow (&obstack_for_string, '\r');
604 \\t obstack_1grow (&obstack_for_string, '\t');
605 \\v obstack_1grow (&obstack_for_string, '\v');
412f8a59
PE
606
607 /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
223ff46e 608 \\("\""|"'"|"?"|"\\") obstack_1grow (&obstack_for_string, yytext[1]);
412f8a59 609
6b0d38ab 610 \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
d8d3f94a 611 int c = convert_ucn_to_byte (yytext);
c2724603
JD
612 if (c <= 0)
613 complain_at (*loc, _("invalid number after \\-escape: %s"),
614 yytext+1);
d8d3f94a 615 else
223ff46e 616 obstack_1grow (&obstack_for_string, c);
d8d3f94a 617 }
4f25ebb0 618 \\(.|\n) {
c2724603 619 char const *p = yytext + 1;
e6c849d8 620 /* Quote only if escaping won't make the character visible. */
c2724603 621 if (isspace (*p) && isprint (*p))
e6c849d8 622 p = quote (p);
c2724603
JD
623 else
624 p = quotearg_style_mem (escape_quoting_style, p, 1);
625 complain_at (*loc, _("invalid character after \\-escape: %s"), p);
e9955c83
AD
626 }
627}
628
4febdd96
PE
629 /*--------------------------------------------.
630 | Scanning user-code characters and strings. |
631 `--------------------------------------------*/
e9955c83 632
4febdd96
PE
633<SC_CHARACTER,SC_STRING>
634{
e9071366 635 {splice}|\\{splice}[^\n\[\]] STRING_GROW;
4febdd96 636}
e9955c83
AD
637
638<SC_CHARACTER>
639{
4febdd96
PE
640 "'" STRING_GROW; BEGIN context_state;
641 \n unexpected_newline (token_start, "'"); BEGIN context_state;
642 <<EOF>> unexpected_eof (token_start, "'"); BEGIN context_state;
e9955c83
AD
643}
644
e9955c83
AD
645<SC_STRING>
646{
4febdd96
PE
647 "\"" STRING_GROW; BEGIN context_state;
648 \n unexpected_newline (token_start, "\""); BEGIN context_state;
649 <<EOF>> unexpected_eof (token_start, "\""); BEGIN context_state;
e9955c83
AD
650}
651
652
653 /*---------------------------------------------------.
654 | Strings, comments etc. can be found in user code. |
655 `---------------------------------------------------*/
656
657<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
658{
3f2d73f1
PE
659 "'" {
660 STRING_GROW;
661 context_state = YY_START;
662 token_start = loc->start;
663 BEGIN SC_CHARACTER;
664 }
665 "\"" {
666 STRING_GROW;
667 context_state = YY_START;
668 token_start = loc->start;
669 BEGIN SC_STRING;
670 }
671 "/"{splice}"*" {
672 STRING_GROW;
673 context_state = YY_START;
674 token_start = loc->start;
675 BEGIN SC_COMMENT;
676 }
677 "/"{splice}"/" {
678 STRING_GROW;
679 context_state = YY_START;
680 BEGIN SC_LINE_COMMENT;
681 }
e9955c83
AD
682}
683
684
624a35e2 685
58d7a1a1
AD
686 /*-----------------------------------------------------------.
687 | Scanning some code in braces (actions). The initial "{" is |
688 | already eaten. |
689 `-----------------------------------------------------------*/
e9955c83
AD
690
691<SC_BRACED_CODE>
692{
cb823b6f
AD
693 "{"|"<"{splice}"%" STRING_GROW; nesting++;
694 "%"{splice}">" STRING_GROW; nesting--;
e9955c83 695 "}" {
25522739
PE
696 obstack_1grow (&obstack_for_string, '}');
697
cb823b6f
AD
698 --nesting;
699 if (nesting < 0)
e9955c83 700 {
41141c56 701 STRING_FINISH;
3f2d73f1 702 loc->start = code_start;
eb095650 703 val->code = last_string;
a706a1cc 704 BEGIN INITIAL;
58d7a1a1 705 return BRACED_CODE;
e9955c83
AD
706 }
707 }
708
a706a1cc
PE
709 /* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly
710 (as `<' `<%'). */
41141c56 711 "<"{splice}"<" STRING_GROW;
a706a1cc 712
47aee066
JD
713 <<EOF>> {
714 unexpected_eof (code_start, "}");
715 STRING_FINISH;
716 loc->start = code_start;
eb095650 717 val->code = last_string;
47aee066
JD
718 BEGIN INITIAL;
719 return BRACED_CODE;
720 }
e9955c83
AD
721}
722
723
724 /*--------------------------------------------------------------.
725 | Scanning some prologue: from "%{" (already scanned) to "%}". |
726 `--------------------------------------------------------------*/
727
728<SC_PROLOGUE>
729{
730 "%}" {
41141c56 731 STRING_FINISH;
3f2d73f1 732 loc->start = code_start;
223ff46e 733 val->chars = last_string;
a706a1cc 734 BEGIN INITIAL;
e9955c83
AD
735 return PROLOGUE;
736 }
737
47aee066
JD
738 <<EOF>> {
739 unexpected_eof (code_start, "%}");
740 STRING_FINISH;
741 loc->start = code_start;
742 val->chars = last_string;
743 BEGIN INITIAL;
744 return PROLOGUE;
745 }
e9955c83
AD
746}
747
748
749 /*---------------------------------------------------------------.
750 | Scanning the epilogue (everything after the second "%%", which |
d8d3f94a 751 | has already been eaten). |
e9955c83
AD
752 `---------------------------------------------------------------*/
753
754<SC_EPILOGUE>
755{
e9955c83 756 <<EOF>> {
41141c56 757 STRING_FINISH;
3f2d73f1 758 loc->start = code_start;
223ff46e 759 val->chars = last_string;
a706a1cc 760 BEGIN INITIAL;
e9955c83
AD
761 return EPILOGUE;
762 }
763}
764
765
4febdd96
PE
766 /*-----------------------------------------------------.
767 | By default, grow the string obstack with the input. |
768 `-----------------------------------------------------*/
769
770<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>. |
771<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>\n STRING_GROW;
772
e9955c83
AD
773%%
774
6c30d641
PE
775/* Read bytes from FP into buffer BUF of size SIZE. Return the
776 number of bytes read. Remove '\r' from input, treating \r\n
777 and isolated \r as \n. */
778
779static size_t
780no_cr_read (FILE *fp, char *buf, size_t size)
781{
a737b216
PE
782 size_t bytes_read = fread (buf, 1, size, fp);
783 if (bytes_read)
6c30d641 784 {
a737b216 785 char *w = memchr (buf, '\r', bytes_read);
6c30d641
PE
786 if (w)
787 {
788 char const *r = ++w;
a737b216 789 char const *lim = buf + bytes_read;
6c30d641
PE
790
791 for (;;)
792 {
793 /* Found an '\r'. Treat it like '\n', but ignore any
794 '\n' that immediately follows. */
795 w[-1] = '\n';
796 if (r == lim)
797 {
798 int ch = getc (fp);
799 if (ch != '\n' && ungetc (ch, fp) != ch)
800 break;
801 }
802 else if (*r == '\n')
803 r++;
804
805 /* Copy until the next '\r'. */
806 do
807 {
808 if (r == lim)
809 return w - buf;
810 }
811 while ((*w++ = *r++) != '\r');
812 }
813
814 return w - buf;
815 }
816 }
817
a737b216 818 return bytes_read;
6c30d641
PE
819}
820
821
f25bfb75 822
1452af69
PE
823/*------------------------------------------------------.
824| Scan NUMBER for a base-BASE integer at location LOC. |
825`------------------------------------------------------*/
826
827static unsigned long int
828scan_integer (char const *number, int base, location loc)
829{
4517da37
PE
830 verify (INT_MAX < ULONG_MAX);
831 unsigned long int num = strtoul (number, NULL, base);
832
833 if (INT_MAX < num)
1452af69
PE
834 {
835 complain_at (loc, _("integer out of range: %s"), quote (number));
836 num = INT_MAX;
837 }
4517da37 838
1452af69
PE
839 return num;
840}
841
842
d8d3f94a
PE
843/*------------------------------------------------------------------.
844| Convert universal character name UCN to a single-byte character, |
845| and return that character. Return -1 if UCN does not correspond |
846| to a single-byte character. |
847`------------------------------------------------------------------*/
848
849static int
850convert_ucn_to_byte (char const *ucn)
851{
4517da37
PE
852 verify (UCHAR_MAX <= INT_MAX);
853 unsigned long int code = strtoul (ucn + 2, NULL, 16);
d8d3f94a
PE
854
855 /* FIXME: Currently we assume Unicode-compatible unibyte characters
856 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
857 non-ASCII hosts we support only the portable C character set.
858 These limitations should be removed once we add support for
859 multibyte characters. */
860
861 if (UCHAR_MAX < code)
862 return -1;
863
864#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
865 {
866 /* A non-ASCII host. Use CODE to index into a table of the C
867 basic execution character set, which is guaranteed to exist on
868 all Standard C platforms. This table also includes '$', '@',
8e6ef483 869 and '`', which are not in the basic execution character set but
d8d3f94a
PE
870 which are unibyte characters on all the platforms that we know
871 about. */
872 static signed char const table[] =
873 {
874 '\0', -1, -1, -1, -1, -1, -1, '\a',
875 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
876 -1, -1, -1, -1, -1, -1, -1, -1,
877 -1, -1, -1, -1, -1, -1, -1, -1,
878 ' ', '!', '"', '#', '$', '%', '&', '\'',
879 '(', ')', '*', '+', ',', '-', '.', '/',
880 '0', '1', '2', '3', '4', '5', '6', '7',
881 '8', '9', ':', ';', '<', '=', '>', '?',
882 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
883 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
884 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
885 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
886 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
887 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
888 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
889 'x', 'y', 'z', '{', '|', '}', '~'
890 };
891
892 code = code < sizeof table ? table[code] : -1;
893 }
894#endif
c4d720cd 895
d8d3f94a
PE
896 return code;
897}
898
899
900c5db5
AD
900/*----------------------------------------------------------------.
901| Handle `#line INT "FILE"'. ARGS has already skipped `#line '. |
902`----------------------------------------------------------------*/
903
904static void
4517da37 905handle_syncline (char *args, location loc)
900c5db5 906{
4517da37
PE
907 char *after_num;
908 unsigned long int lineno = strtoul (args, &after_num, 10);
909 char *file = strchr (after_num, '"') + 1;
910 *strchr (file, '"') = '\0';
911 if (INT_MAX <= lineno)
912 {
913 warn_at (loc, _("line number overflow"));
914 lineno = INT_MAX;
915 }
e9071366 916 current_file = uniqstr_new (file);
0c8e079f 917 boundary_set (&scanner_cursor, current_file, lineno, 1);
4517da37
PE
918}
919
920
4febdd96
PE
921/*----------------------------------------------------------------.
922| For a token or comment starting at START, report message MSGID, |
923| which should say that an end marker was found before |
924| the expected TOKEN_END. |
925`----------------------------------------------------------------*/
926
927static void
928unexpected_end (boundary start, char const *msgid, char const *token_end)
929{
930 location loc;
931 loc.start = start;
932 loc.end = scanner_cursor;
933 complain_at (loc, _(msgid), token_end);
934}
935
936
3f2d73f1
PE
937/*------------------------------------------------------------------------.
938| Report an unexpected EOF in a token or comment starting at START. |
939| An end of file was encountered and the expected TOKEN_END was missing. |
3f2d73f1 940`------------------------------------------------------------------------*/
a706a1cc
PE
941
942static void
aa418041 943unexpected_eof (boundary start, char const *token_end)
a706a1cc 944{
4febdd96
PE
945 unexpected_end (start, N_("missing `%s' at end of file"), token_end);
946}
947
948
949/*----------------------------------------.
950| Likewise, but for unexpected newlines. |
951`----------------------------------------*/
952
953static void
954unexpected_newline (boundary start, char const *token_end)
955{
956 unexpected_end (start, N_("missing `%s' at end of line"), token_end);
a706a1cc
PE
957}
958
959
f25bfb75
AD
960/*-------------------------.
961| Initialize the scanner. |
962`-------------------------*/
963
1d6412ad 964void
e9071366 965gram_scanner_initialize (void)
1d6412ad 966{
223ff46e 967 obstack_init (&obstack_for_string);
1d6412ad
AD
968}
969
970
f25bfb75
AD
971/*-----------------------------------------------.
972| Free all the memory allocated to the scanner. |
973`-----------------------------------------------*/
974
4cdb01db 975void
e9071366 976gram_scanner_free (void)
4cdb01db 977{
223ff46e 978 obstack_free (&obstack_for_string, 0);
536545f3 979 /* Reclaim Flex's buffers. */
580b8926 980 yylex_destroy ();
4cdb01db 981}