]> git.saurik.com Git - bison.git/blame - src/scan-gram.l
Expand GLR acronym in summary of Bison.
[bison.git] / src / scan-gram.l
CommitLineData
e9955c83 1/* Bison Grammar Scanner -*- C -*-
3b1e470c 2
ba061fa6
AD
3 Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
4 Free Software Foundation, Inc.
e9955c83
AD
5
6 This file is part of Bison, the GNU Compiler Compiler.
7
f16b0819 8 This program is free software: you can redistribute it and/or modify
e9955c83 9 it under the terms of the GNU General Public License as published by
f16b0819 10 the Free Software Foundation, either version 3 of the License, or
e9955c83
AD
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
f16b0819 19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
e9955c83 20
8d90395d 21%option debug nodefault noinput nounput noyywrap never-interactive
e9955c83
AD
22%option prefix="gram_" outfile="lex.yy.c"
23
24%{
4f6e011e
PE
25/* Work around a bug in flex 2.5.31. See Debian bug 333231
26 <http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=333231>. */
27#undef gram_wrap
28#define gram_wrap() 1
29
e9071366 30#define FLEX_PREFIX(Id) gram_ ## Id
0305d25e 31#include <src/flex-scanner.h>
223ff46e 32
0305d25e
AD
33#include <src/complain.h>
34#include <src/files.h>
35#include <src/gram.h>
36#include <quotearg.h>
37#include <src/reader.h>
38#include <src/uniqstr.h>
e9955c83 39
c2724603 40#include <ctype.h>
e9071366
AD
41#include <mbswidth.h>
42#include <quote.h>
43
0305d25e 44#include <src/scan-gram.h>
e9071366
AD
45
46#define YY_DECL GRAM_LEX_DECL
2346344a 47
3f2d73f1 48#define YY_USER_INIT \
e9071366 49 code_start = scanner_cursor = loc->start; \
dc9701e8 50
3f2d73f1 51/* Location of scanner cursor. */
4a678af8 52static boundary scanner_cursor;
41141c56 53
e9071366 54#define YY_USER_ACTION location_compute (loc, &scanner_cursor, yytext, yyleng);
d8d3f94a 55
6c30d641 56static size_t no_cr_read (FILE *, char *, size_t);
d8d3f94a
PE
57#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
58
a7706735
AD
59#define RETURN_PERCENT_PARAM(Value) \
60 RETURN_VALUE(PERCENT_PARAM, param, param_ ## Value)
61
62#define RETURN_PERCENT_FLAG(Value) \
63 RETURN_VALUE(PERCENT_FLAG, uniqstr, uniqstr_new (Value))
64
65#define RETURN_VALUE(Token, Field, Value) \
ba061fa6 66 do { \
a7706735
AD
67 val->Field = Value; \
68 return Token; \
ba061fa6
AD
69 } while (0)
70
b9f1d9a4
AR
71#define ROLLBACK_CURRENT_TOKEN \
72 do { \
73 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0); \
74 yyless (0); \
75 } while (0)
ba061fa6 76
7ec2d4cd 77/* A string representing the most recently saved token. */
7c0c6181 78static char *last_string;
7ec2d4cd 79
872b52bc 80/* Bracketed identifier. */
b9f1d9a4
AR
81static uniqstr bracketed_id_str = 0;
82static location bracketed_id_loc;
83static boundary bracketed_id_start;
84static int bracketed_id_context_state = 0;
85
7ec2d4cd 86void
e9071366 87gram_scanner_last_string_free (void)
7ec2d4cd 88{
41141c56 89 STRING_FREE;
7ec2d4cd 90}
e9955c83 91
4517da37 92static void handle_syncline (char *, location);
1452af69 93static unsigned long int scan_integer (char const *p, int base, location loc);
d8d3f94a 94static int convert_ucn_to_byte (char const *hex_text);
aa418041 95static void unexpected_eof (boundary, char const *);
4febdd96 96static void unexpected_newline (boundary, char const *);
e9955c83
AD
97
98%}
e9071366
AD
99 /* A C-like comment in directives/rules. */
100%x SC_YACC_COMMENT
101 /* Strings and characters in directives/rules. */
e9955c83 102%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
e9071366
AD
103 /* A identifier was just read in directives/rules. Special state
104 to capture the sequence `identifier :'. */
105%x SC_AFTER_IDENTIFIER
cb823b6f
AD
106 /* A complex tag, with nested angles brackets. */
107%x SC_TAG
e9071366
AD
108
109 /* Three types of user code:
110 - prologue (code between `%{' `%}' in the first section, before %%);
111 - actions, printers, union, etc, (between braced in the middle section);
112 - epilogue (everything after the second %%). */
113%x SC_PROLOGUE SC_BRACED_CODE SC_EPILOGUE
114 /* C and C++ comments in code. */
115%x SC_COMMENT SC_LINE_COMMENT
116 /* Strings and characters in code. */
117%x SC_STRING SC_CHARACTER
872b52bc 118 /* Bracketed identifiers support. */
b9f1d9a4 119%x SC_BRACKETED_ID SC_RETURN_BRACKETED_ID
e9955c83 120
cdf3f113
AD
121letter [-.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
122id {letter}({letter}|[0-9])*
4f646c37 123directive %{id}
624a35e2 124int [0-9]+
d8d3f94a
PE
125
126/* POSIX says that a tag must be both an id and a C union member, but
127 historically almost any character is allowed in a tag. We disallow
cb823b6f
AD
128 NUL, as this simplifies our implementation. We disallow angle
129 bracket to match them in nested pairs: several languages use them
130 for generics/template types. */
131tag [^\0<>]+
d8d3f94a
PE
132
133/* Zero or more instances of backslash-newline. Following GCC, allow
134 white space between the backslash and the newline. */
135splice (\\[ \f\t\v]*\n)*
e9955c83
AD
136
137%%
138%{
cb823b6f
AD
139 /* Nesting level. Either for nested braces, or nested angle brackets
140 (but not mixed). */
141 int nesting IF_LINT (= 0);
1a9e39f1 142
3f2d73f1 143 /* Parent context state, when applicable. */
5362ed19 144 int context_state IF_LINT (= 0);
a706a1cc 145
3f2d73f1 146 /* Location of most recent identifier, when applicable. */
a2bc9dbc 147 location id_loc IF_LINT (= empty_location);
3f2d73f1 148
a2bc9dbc
PE
149 /* Where containing code started, when applicable. Its initial
150 value is relevant only when yylex is invoked in the SC_EPILOGUE
151 start condition. */
152 boundary code_start = scanner_cursor;
3f2d73f1 153
223ff46e
PE
154 /* Where containing comment or string or character literal started,
155 when applicable. */
a2bc9dbc 156 boundary token_start IF_LINT (= scanner_cursor);
e9955c83
AD
157%}
158
159
3f2d73f1
PE
160 /*-----------------------.
161 | Scanning white space. |
162 `-----------------------*/
163
b9f1d9a4 164<INITIAL,SC_AFTER_IDENTIFIER,SC_BRACKETED_ID,SC_RETURN_BRACKETED_ID>
3f2d73f1 165{
4febdd96 166 /* Comments and white space. */
83adb046 167 "," warn_at (*loc, _("stray `,' treated as white space"));
4febdd96 168 [ \f\n\t\v] |
3f2d73f1 169 "//".* ;
83adb046
PE
170 "/*" {
171 token_start = loc->start;
172 context_state = YY_START;
173 BEGIN SC_YACC_COMMENT;
174 }
3f2d73f1
PE
175
176 /* #line directives are not documented, and may be withdrawn or
177 modified in future versions of Bison. */
178 ^"#line "{int}" \"".*"\"\n" {
4517da37 179 handle_syncline (yytext + sizeof "#line " - 1, *loc);
3f2d73f1
PE
180 }
181}
182
183
e9955c83
AD
184 /*----------------------------.
185 | Scanning Bison directives. |
186 `----------------------------*/
a7c09cba
DJ
187
188 /* For directives that are also command line options, the regex must be
189 "%..."
190 after "[-_]"s are removed, and the directive must match the --long
191 option name, with a single string argument. Otherwise, add exceptions
192 to ../build-aux/cross-options.pl. */
193
e9955c83
AD
194<INITIAL>
195{
deef2a0a 196 "%binary" return PERCENT_NONASSOC;
136a0f76 197 "%code" return PERCENT_CODE;
fa819509 198 "%debug" RETURN_PERCENT_FLAG("parse.trace");
deef2a0a
AD
199 "%default"[-_]"prec" return PERCENT_DEFAULT_PREC;
200 "%define" return PERCENT_DEFINE;
201 "%defines" return PERCENT_DEFINES;
202 "%destructor" return PERCENT_DESTRUCTOR;
203 "%dprec" return PERCENT_DPREC;
31b850d2 204 "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
deef2a0a
AD
205 "%expect" return PERCENT_EXPECT;
206 "%expect"[-_]"rr" return PERCENT_EXPECT_RR;
207 "%file-prefix" return PERCENT_FILE_PREFIX;
e9955c83 208 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
deef2a0a
AD
209 "%initial-action" return PERCENT_INITIAL_ACTION;
210 "%glr-parser" return PERCENT_GLR_PARSER;
211 "%language" return PERCENT_LANGUAGE;
212 "%left" return PERCENT_LEFT;
a7706735 213 "%lex-param" RETURN_PERCENT_PARAM(lex);
bc0f5737 214 "%locations" RETURN_PERCENT_FLAG("locations");
deef2a0a
AD
215 "%merge" return PERCENT_MERGE;
216 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
217 "%no"[-_]"default"[-_]"prec" return PERCENT_NO_DEFAULT_PREC;
218 "%no"[-_]"lines" return PERCENT_NO_LINES;
219 "%nonassoc" return PERCENT_NONASSOC;
220 "%nondeterministic-parser" return PERCENT_NONDETERMINISTIC_PARSER;
221 "%nterm" return PERCENT_NTERM;
222 "%output" return PERCENT_OUTPUT;
a7706735
AD
223 "%param" RETURN_PERCENT_PARAM(both);
224 "%parse-param" RETURN_PERCENT_PARAM(parse);
deef2a0a 225 "%prec" return PERCENT_PREC;
d78f0ac9 226 "%precedence" return PERCENT_PRECEDENCE;
deef2a0a 227 "%printer" return PERCENT_PRINTER;
4920ae8b 228 "%pure"[-_]"parser" RETURN_PERCENT_FLAG("api.pure");
deef2a0a
AD
229 "%require" return PERCENT_REQUIRE;
230 "%right" return PERCENT_RIGHT;
231 "%skeleton" return PERCENT_SKELETON;
232 "%start" return PERCENT_START;
233 "%term" return PERCENT_TOKEN;
234 "%token" return PERCENT_TOKEN;
235 "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
236 "%type" return PERCENT_TYPE;
237 "%union" return PERCENT_UNION;
238 "%verbose" return PERCENT_VERBOSE;
239 "%yacc" return PERCENT_YACC;
e9955c83 240
3f2d73f1 241 {directive} {
41141c56 242 complain_at (*loc, _("invalid directive: %s"), quote (yytext));
412f8a59 243 }
900c5db5 244
e9955c83 245 "=" return EQUAL;
e9071366 246 "|" return PIPE;
e9955c83
AD
247 ";" return SEMICOLON;
248
3f2d73f1 249 {id} {
58d7a1a1 250 val->uniqstr = uniqstr_new (yytext);
3f2d73f1 251 id_loc = *loc;
b9f1d9a4 252 bracketed_id_str = NULL;
3f2d73f1 253 BEGIN SC_AFTER_IDENTIFIER;
e9955c83
AD
254 }
255
d8d3f94a 256 {int} {
1452af69
PE
257 val->integer = scan_integer (yytext, 10, *loc);
258 return INT;
259 }
260 0[xX][0-9abcdefABCDEF]+ {
261 val->integer = scan_integer (yytext, 16, *loc);
d8d3f94a
PE
262 return INT;
263 }
e9955c83 264
84a1cb5a
AD
265 /* Identifiers may not start with a digit. Yet, don't silently
266 accept "1FOO" as "1 FOO". */
267 {int}{id} {
268 complain_at (*loc, _("invalid identifier: %s"), quote (yytext));
269 }
270
3208e3f4 271 /* Characters. */
dfaa4860 272 "'" token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
e9955c83
AD
273
274 /* Strings. */
ca407bdf 275 "\"" token_start = loc->start; BEGIN SC_ESCAPED_STRING;
e9955c83
AD
276
277 /* Prologue. */
3f2d73f1 278 "%{" code_start = loc->start; BEGIN SC_PROLOGUE;
e9955c83
AD
279
280 /* Code in between braces. */
3f2d73f1
PE
281 "{" {
282 STRING_GROW;
cb823b6f 283 nesting = 0;
3f2d73f1
PE
284 code_start = loc->start;
285 BEGIN SC_BRACED_CODE;
286 }
e9955c83
AD
287
288 /* A type. */
cb823b6f
AD
289 "<*>" return TAG_ANY;
290 "<>" return TAG_NONE;
d8d3f94a 291 "<"{tag}">" {
223ff46e 292 obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
41141c56 293 STRING_FINISH;
223ff46e 294 val->uniqstr = uniqstr_new (last_string);
41141c56 295 STRING_FREE;
cb823b6f
AD
296 return TAG;
297 }
298 "<" {
299 nesting = 0;
300 token_start = loc->start;
301 BEGIN SC_TAG;
4cdb01db
AD
302 }
303
a706a1cc
PE
304 "%%" {
305 static int percent_percent_count;
e9955c83 306 if (++percent_percent_count == 2)
a2bc9dbc 307 BEGIN SC_EPILOGUE;
e9955c83
AD
308 return PERCENT_PERCENT;
309 }
310
b9f1d9a4
AR
311 "[" {
312 bracketed_id_str = NULL;
313 bracketed_id_start = loc->start;
314 bracketed_id_context_state = YY_START;
315 BEGIN SC_BRACKETED_ID;
316 }
317
a706a1cc 318 . {
41141c56 319 complain_at (*loc, _("invalid character: %s"), quote (yytext));
3f2d73f1 320 }
379f0ac8
PE
321
322 <<EOF>> {
323 loc->start = loc->end = scanner_cursor;
324 yyterminate ();
325 }
3f2d73f1
PE
326}
327
328
cb823b6f
AD
329 /*--------------------------------------------------------------.
330 | Supporting \0 complexifies our implementation for no expected |
331 | added value. |
332 `--------------------------------------------------------------*/
333
334<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING,SC_TAG>
335{
336 \0 complain_at (*loc, _("invalid null character"));
337}
338
339
3f2d73f1
PE
340 /*-----------------------------------------------------------------.
341 | Scanning after an identifier, checking whether a colon is next. |
342 `-----------------------------------------------------------------*/
343
344<SC_AFTER_IDENTIFIER>
345{
b9f1d9a4 346 "[" {
872b52bc 347 if (bracketed_id_str)
b9f1d9a4
AR
348 {
349 ROLLBACK_CURRENT_TOKEN;
350 BEGIN SC_RETURN_BRACKETED_ID;
351 *loc = id_loc;
352 return ID;
353 }
872b52bc
AR
354 else
355 {
356 bracketed_id_start = loc->start;
357 bracketed_id_context_state = YY_START;
358 BEGIN SC_BRACKETED_ID;
359 }
b9f1d9a4 360 }
3f2d73f1 361 ":" {
b9f1d9a4 362 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
3f2d73f1 363 *loc = id_loc;
3f2d73f1
PE
364 return ID_COLON;
365 }
366 . {
b9f1d9a4
AR
367 ROLLBACK_CURRENT_TOKEN;
368 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
3f2d73f1 369 *loc = id_loc;
3f2d73f1
PE
370 return ID;
371 }
372 <<EOF>> {
b9f1d9a4 373 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
3f2d73f1 374 *loc = id_loc;
3f2d73f1 375 return ID;
e9955c83
AD
376 }
377}
378
b9f1d9a4
AR
379 /*--------------------------------.
380 | Scanning bracketed identifiers. |
381 `--------------------------------*/
382
383<SC_BRACKETED_ID>
384{
385 {id} {
872b52bc 386 if (bracketed_id_str)
b9f1d9a4 387 {
872b52bc
AR
388 complain_at (*loc, _("unexpected identifier in bracketed name: %s"),
389 quote (yytext));
b9f1d9a4
AR
390 }
391 else
392 {
872b52bc
AR
393 bracketed_id_str = uniqstr_new (yytext);
394 bracketed_id_loc = *loc;
b9f1d9a4
AR
395 }
396 }
397 "]" {
398 BEGIN bracketed_id_context_state;
399 if (bracketed_id_str)
400 {
401 if (INITIAL == bracketed_id_context_state)
402 {
403 val->uniqstr = bracketed_id_str;
404 bracketed_id_str = 0;
405 *loc = bracketed_id_loc;
406 return BRACKETED_ID;
407 }
408 }
409 else
872b52bc 410 complain_at (*loc, _("an identifier expected"));
b9f1d9a4
AR
411 }
412 . {
413 complain_at (*loc, _("invalid character in bracketed name: %s"),
414 quote (yytext));
415 }
416 <<EOF>> {
417 BEGIN bracketed_id_context_state;
418 unexpected_eof (bracketed_id_start, "]");
419 }
420}
421
422<SC_RETURN_BRACKETED_ID>
423{
424 . {
425 ROLLBACK_CURRENT_TOKEN;
426 val->uniqstr = bracketed_id_str;
427 bracketed_id_str = 0;
428 *loc = bracketed_id_loc;
429 BEGIN INITIAL;
430 return BRACKETED_ID;
431 }
432}
433
e9955c83 434
d8d3f94a
PE
435 /*---------------------------------------------------------------.
436 | Scanning a Yacc comment. The initial `/ *' is already eaten. |
437 `---------------------------------------------------------------*/
e9955c83 438
d8d3f94a 439<SC_YACC_COMMENT>
e9955c83 440{
3f2d73f1 441 "*/" BEGIN context_state;
a706a1cc 442 .|\n ;
aa418041 443 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
d8d3f94a
PE
444}
445
446
447 /*------------------------------------------------------------.
448 | Scanning a C comment. The initial `/ *' is already eaten. |
449 `------------------------------------------------------------*/
450
451<SC_COMMENT>
452{
3f2d73f1 453 "*"{splice}"/" STRING_GROW; BEGIN context_state;
aa418041 454 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
e9955c83
AD
455}
456
457
d8d3f94a
PE
458 /*--------------------------------------------------------------.
459 | Scanning a line comment. The initial `//' is already eaten. |
460 `--------------------------------------------------------------*/
461
462<SC_LINE_COMMENT>
463{
3f2d73f1 464 "\n" STRING_GROW; BEGIN context_state;
41141c56 465 {splice} STRING_GROW;
3f2d73f1 466 <<EOF>> BEGIN context_state;
d8d3f94a
PE
467}
468
469
4febdd96
PE
470 /*------------------------------------------------.
471 | Scanning a Bison string, including its escapes. |
472 | The initial quote is already eaten. |
473 `------------------------------------------------*/
e9955c83
AD
474
475<SC_ESCAPED_STRING>
476{
47aee066
JD
477 "\""|"\n" {
478 if (yytext[0] == '\n')
479 unexpected_newline (token_start, "\"");
480 STRING_FINISH;
481 loc->start = token_start;
482 val->chars = last_string;
483 BEGIN INITIAL;
484 return STRING;
485 }
486 <<EOF>> {
487 unexpected_eof (token_start, "\"");
41141c56 488 STRING_FINISH;
3f2d73f1 489 loc->start = token_start;
223ff46e 490 val->chars = last_string;
a706a1cc 491 BEGIN INITIAL;
e9955c83
AD
492 return STRING;
493 }
e9955c83
AD
494}
495
4febdd96
PE
496 /*----------------------------------------------------------.
497 | Scanning a Bison character literal, decoding its escapes. |
498 | The initial quote is already eaten. |
499 `----------------------------------------------------------*/
e9955c83
AD
500
501<SC_ESCAPED_CHARACTER>
502{
47aee066 503 "'"|"\n" {
41141c56 504 STRING_FINISH;
3f2d73f1 505 loc->start = token_start;
dfaa4860 506 val->character = last_string[0];
3208e3f4
JD
507 {
508 /* FIXME: Eventually, make these errors. */
dfaa4860
JD
509 if (last_string[0] == '\0')
510 {
511 warn_at (*loc, _("empty character literal"));
512 /* '\0' seems dangerous even if we are about to complain. */
513 val->character = '\'';
514 }
515 else if (last_string[1] != '\0')
3208e3f4
JD
516 warn_at (*loc, _("extra characters in character literal"));
517 }
518 if (yytext[0] == '\n')
519 unexpected_newline (token_start, "'");
41141c56 520 STRING_FREE;
a706a1cc 521 BEGIN INITIAL;
58d7a1a1 522 return CHAR;
e9955c83 523 }
47aee066 524 <<EOF>> {
47aee066
JD
525 STRING_FINISH;
526 loc->start = token_start;
dfaa4860 527 val->character = last_string[0];
3208e3f4 528 {
3208e3f4 529 /* FIXME: Eventually, make these errors. */
dfaa4860
JD
530 if (last_string[0] == '\0')
531 {
532 warn_at (*loc, _("empty character literal"));
533 /* '\0' seems dangerous even if we are about to complain. */
534 val->character = '\'';
535 }
536 else if (last_string[1] != '\0')
3208e3f4 537 warn_at (*loc, _("extra characters in character literal"));
3208e3f4
JD
538 }
539 unexpected_eof (token_start, "'");
47aee066
JD
540 STRING_FREE;
541 BEGIN INITIAL;
542 return CHAR;
543 }
4febdd96 544}
a706a1cc 545
cb823b6f
AD
546 /*-----------------------------------------------------------.
547 | Scanning a Bison nested tag. The initial angle bracket is |
548 | already eaten. |
549 `-----------------------------------------------------------*/
550
551<SC_TAG>
4febdd96 552{
cb823b6f
AD
553 ">" {
554 --nesting;
555 if (nesting < 0)
556 {
557 STRING_FINISH;
558 loc->start = token_start;
559 val->uniqstr = uniqstr_new (last_string);
560 STRING_FREE;
561 BEGIN INITIAL;
562 return TAG;
563 }
564 STRING_GROW;
565 }
566
567 [^<>]+ STRING_GROW;
568 "<"+ STRING_GROW; nesting += yyleng;
e9955c83 569
cb823b6f
AD
570 <<EOF>> {
571 unexpected_eof (token_start, ">");
572 STRING_FINISH;
573 loc->start = token_start;
574 val->uniqstr = uniqstr_new (last_string);
575 STRING_FREE;
576 BEGIN INITIAL;
577 return TAG;
578 }
579}
e9955c83
AD
580
581 /*----------------------------.
582 | Decode escaped characters. |
583 `----------------------------*/
584
585<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
586{
d8d3f94a 587 \\[0-7]{1,3} {
4517da37 588 unsigned long int c = strtoul (yytext + 1, NULL, 8);
c2724603
JD
589 if (!c || UCHAR_MAX < c)
590 complain_at (*loc, _("invalid number after \\-escape: %s"),
591 yytext+1);
e9955c83 592 else
223ff46e 593 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
594 }
595
6b0d38ab 596 \\x[0-9abcdefABCDEF]+ {
4517da37
PE
597 verify (UCHAR_MAX < ULONG_MAX);
598 unsigned long int c = strtoul (yytext + 2, NULL, 16);
c2724603
JD
599 if (!c || UCHAR_MAX < c)
600 complain_at (*loc, _("invalid number after \\-escape: %s"),
601 yytext+1);
d8d3f94a 602 else
223ff46e 603 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
604 }
605
223ff46e
PE
606 \\a obstack_1grow (&obstack_for_string, '\a');
607 \\b obstack_1grow (&obstack_for_string, '\b');
608 \\f obstack_1grow (&obstack_for_string, '\f');
609 \\n obstack_1grow (&obstack_for_string, '\n');
610 \\r obstack_1grow (&obstack_for_string, '\r');
611 \\t obstack_1grow (&obstack_for_string, '\t');
612 \\v obstack_1grow (&obstack_for_string, '\v');
412f8a59
PE
613
614 /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
223ff46e 615 \\("\""|"'"|"?"|"\\") obstack_1grow (&obstack_for_string, yytext[1]);
412f8a59 616
6b0d38ab 617 \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
d8d3f94a 618 int c = convert_ucn_to_byte (yytext);
c2724603
JD
619 if (c <= 0)
620 complain_at (*loc, _("invalid number after \\-escape: %s"),
621 yytext+1);
d8d3f94a 622 else
223ff46e 623 obstack_1grow (&obstack_for_string, c);
d8d3f94a 624 }
4f25ebb0 625 \\(.|\n) {
c2724603 626 char const *p = yytext + 1;
e6c849d8 627 /* Quote only if escaping won't make the character visible. */
4413bbd3 628 if (isspace ((unsigned char) *p) && isprint ((unsigned char) *p))
e6c849d8 629 p = quote (p);
c2724603
JD
630 else
631 p = quotearg_style_mem (escape_quoting_style, p, 1);
632 complain_at (*loc, _("invalid character after \\-escape: %s"), p);
e9955c83
AD
633 }
634}
635
4febdd96
PE
636 /*--------------------------------------------.
637 | Scanning user-code characters and strings. |
638 `--------------------------------------------*/
e9955c83 639
4febdd96
PE
640<SC_CHARACTER,SC_STRING>
641{
e9071366 642 {splice}|\\{splice}[^\n\[\]] STRING_GROW;
4febdd96 643}
e9955c83
AD
644
645<SC_CHARACTER>
646{
4febdd96
PE
647 "'" STRING_GROW; BEGIN context_state;
648 \n unexpected_newline (token_start, "'"); BEGIN context_state;
649 <<EOF>> unexpected_eof (token_start, "'"); BEGIN context_state;
e9955c83
AD
650}
651
e9955c83
AD
652<SC_STRING>
653{
4febdd96
PE
654 "\"" STRING_GROW; BEGIN context_state;
655 \n unexpected_newline (token_start, "\""); BEGIN context_state;
656 <<EOF>> unexpected_eof (token_start, "\""); BEGIN context_state;
e9955c83
AD
657}
658
659
660 /*---------------------------------------------------.
661 | Strings, comments etc. can be found in user code. |
662 `---------------------------------------------------*/
663
664<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
665{
3f2d73f1
PE
666 "'" {
667 STRING_GROW;
668 context_state = YY_START;
669 token_start = loc->start;
670 BEGIN SC_CHARACTER;
671 }
672 "\"" {
673 STRING_GROW;
674 context_state = YY_START;
675 token_start = loc->start;
676 BEGIN SC_STRING;
677 }
678 "/"{splice}"*" {
679 STRING_GROW;
680 context_state = YY_START;
681 token_start = loc->start;
682 BEGIN SC_COMMENT;
683 }
684 "/"{splice}"/" {
685 STRING_GROW;
686 context_state = YY_START;
687 BEGIN SC_LINE_COMMENT;
688 }
e9955c83
AD
689}
690
691
624a35e2 692
58d7a1a1
AD
693 /*-----------------------------------------------------------.
694 | Scanning some code in braces (actions). The initial "{" is |
695 | already eaten. |
696 `-----------------------------------------------------------*/
e9955c83
AD
697
698<SC_BRACED_CODE>
699{
cb823b6f
AD
700 "{"|"<"{splice}"%" STRING_GROW; nesting++;
701 "%"{splice}">" STRING_GROW; nesting--;
e9955c83 702 "}" {
25522739
PE
703 obstack_1grow (&obstack_for_string, '}');
704
cb823b6f
AD
705 --nesting;
706 if (nesting < 0)
e9955c83 707 {
41141c56 708 STRING_FINISH;
3f2d73f1 709 loc->start = code_start;
eb095650 710 val->code = last_string;
a706a1cc 711 BEGIN INITIAL;
58d7a1a1 712 return BRACED_CODE;
e9955c83
AD
713 }
714 }
715
a706a1cc
PE
716 /* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly
717 (as `<' `<%'). */
41141c56 718 "<"{splice}"<" STRING_GROW;
a706a1cc 719
47aee066
JD
720 <<EOF>> {
721 unexpected_eof (code_start, "}");
722 STRING_FINISH;
723 loc->start = code_start;
eb095650 724 val->code = last_string;
47aee066
JD
725 BEGIN INITIAL;
726 return BRACED_CODE;
727 }
e9955c83
AD
728}
729
730
731 /*--------------------------------------------------------------.
732 | Scanning some prologue: from "%{" (already scanned) to "%}". |
733 `--------------------------------------------------------------*/
734
735<SC_PROLOGUE>
736{
737 "%}" {
41141c56 738 STRING_FINISH;
3f2d73f1 739 loc->start = code_start;
223ff46e 740 val->chars = last_string;
a706a1cc 741 BEGIN INITIAL;
e9955c83
AD
742 return PROLOGUE;
743 }
744
47aee066
JD
745 <<EOF>> {
746 unexpected_eof (code_start, "%}");
747 STRING_FINISH;
748 loc->start = code_start;
749 val->chars = last_string;
750 BEGIN INITIAL;
751 return PROLOGUE;
752 }
e9955c83
AD
753}
754
755
756 /*---------------------------------------------------------------.
757 | Scanning the epilogue (everything after the second "%%", which |
d8d3f94a 758 | has already been eaten). |
e9955c83
AD
759 `---------------------------------------------------------------*/
760
761<SC_EPILOGUE>
762{
e9955c83 763 <<EOF>> {
41141c56 764 STRING_FINISH;
3f2d73f1 765 loc->start = code_start;
223ff46e 766 val->chars = last_string;
a706a1cc 767 BEGIN INITIAL;
e9955c83
AD
768 return EPILOGUE;
769 }
770}
771
772
4febdd96
PE
773 /*-----------------------------------------------------.
774 | By default, grow the string obstack with the input. |
775 `-----------------------------------------------------*/
776
777<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>. |
778<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>\n STRING_GROW;
779
e9955c83
AD
780%%
781
6c30d641
PE
782/* Read bytes from FP into buffer BUF of size SIZE. Return the
783 number of bytes read. Remove '\r' from input, treating \r\n
784 and isolated \r as \n. */
785
786static size_t
787no_cr_read (FILE *fp, char *buf, size_t size)
788{
a737b216
PE
789 size_t bytes_read = fread (buf, 1, size, fp);
790 if (bytes_read)
6c30d641 791 {
a737b216 792 char *w = memchr (buf, '\r', bytes_read);
6c30d641
PE
793 if (w)
794 {
795 char const *r = ++w;
a737b216 796 char const *lim = buf + bytes_read;
6c30d641
PE
797
798 for (;;)
799 {
800 /* Found an '\r'. Treat it like '\n', but ignore any
801 '\n' that immediately follows. */
802 w[-1] = '\n';
803 if (r == lim)
804 {
805 int ch = getc (fp);
806 if (ch != '\n' && ungetc (ch, fp) != ch)
807 break;
808 }
809 else if (*r == '\n')
810 r++;
811
812 /* Copy until the next '\r'. */
813 do
814 {
815 if (r == lim)
816 return w - buf;
817 }
818 while ((*w++ = *r++) != '\r');
819 }
820
821 return w - buf;
822 }
823 }
824
a737b216 825 return bytes_read;
6c30d641
PE
826}
827
828
f25bfb75 829
1452af69
PE
830/*------------------------------------------------------.
831| Scan NUMBER for a base-BASE integer at location LOC. |
832`------------------------------------------------------*/
833
834static unsigned long int
835scan_integer (char const *number, int base, location loc)
836{
4517da37
PE
837 verify (INT_MAX < ULONG_MAX);
838 unsigned long int num = strtoul (number, NULL, base);
839
840 if (INT_MAX < num)
1452af69
PE
841 {
842 complain_at (loc, _("integer out of range: %s"), quote (number));
843 num = INT_MAX;
844 }
4517da37 845
1452af69
PE
846 return num;
847}
848
849
d8d3f94a
PE
850/*------------------------------------------------------------------.
851| Convert universal character name UCN to a single-byte character, |
852| and return that character. Return -1 if UCN does not correspond |
853| to a single-byte character. |
854`------------------------------------------------------------------*/
855
856static int
857convert_ucn_to_byte (char const *ucn)
858{
4517da37
PE
859 verify (UCHAR_MAX <= INT_MAX);
860 unsigned long int code = strtoul (ucn + 2, NULL, 16);
d8d3f94a
PE
861
862 /* FIXME: Currently we assume Unicode-compatible unibyte characters
863 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
864 non-ASCII hosts we support only the portable C character set.
865 These limitations should be removed once we add support for
866 multibyte characters. */
867
868 if (UCHAR_MAX < code)
869 return -1;
870
871#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
872 {
873 /* A non-ASCII host. Use CODE to index into a table of the C
874 basic execution character set, which is guaranteed to exist on
875 all Standard C platforms. This table also includes '$', '@',
8e6ef483 876 and '`', which are not in the basic execution character set but
d8d3f94a
PE
877 which are unibyte characters on all the platforms that we know
878 about. */
879 static signed char const table[] =
880 {
881 '\0', -1, -1, -1, -1, -1, -1, '\a',
882 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
883 -1, -1, -1, -1, -1, -1, -1, -1,
884 -1, -1, -1, -1, -1, -1, -1, -1,
885 ' ', '!', '"', '#', '$', '%', '&', '\'',
886 '(', ')', '*', '+', ',', '-', '.', '/',
887 '0', '1', '2', '3', '4', '5', '6', '7',
888 '8', '9', ':', ';', '<', '=', '>', '?',
889 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
890 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
891 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
892 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
893 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
894 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
895 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
896 'x', 'y', 'z', '{', '|', '}', '~'
897 };
898
899 code = code < sizeof table ? table[code] : -1;
900 }
901#endif
c4d720cd 902
d8d3f94a
PE
903 return code;
904}
905
906
900c5db5
AD
907/*----------------------------------------------------------------.
908| Handle `#line INT "FILE"'. ARGS has already skipped `#line '. |
909`----------------------------------------------------------------*/
910
911static void
4517da37 912handle_syncline (char *args, location loc)
900c5db5 913{
4517da37
PE
914 char *after_num;
915 unsigned long int lineno = strtoul (args, &after_num, 10);
916 char *file = strchr (after_num, '"') + 1;
917 *strchr (file, '"') = '\0';
918 if (INT_MAX <= lineno)
919 {
920 warn_at (loc, _("line number overflow"));
921 lineno = INT_MAX;
922 }
e9071366 923 current_file = uniqstr_new (file);
0c8e079f 924 boundary_set (&scanner_cursor, current_file, lineno, 1);
4517da37
PE
925}
926
927
4febdd96
PE
928/*----------------------------------------------------------------.
929| For a token or comment starting at START, report message MSGID, |
930| which should say that an end marker was found before |
931| the expected TOKEN_END. |
932`----------------------------------------------------------------*/
933
934static void
935unexpected_end (boundary start, char const *msgid, char const *token_end)
936{
937 location loc;
938 loc.start = start;
939 loc.end = scanner_cursor;
940 complain_at (loc, _(msgid), token_end);
941}
942
943
3f2d73f1
PE
944/*------------------------------------------------------------------------.
945| Report an unexpected EOF in a token or comment starting at START. |
946| An end of file was encountered and the expected TOKEN_END was missing. |
3f2d73f1 947`------------------------------------------------------------------------*/
a706a1cc
PE
948
949static void
aa418041 950unexpected_eof (boundary start, char const *token_end)
a706a1cc 951{
4febdd96
PE
952 unexpected_end (start, N_("missing `%s' at end of file"), token_end);
953}
954
955
956/*----------------------------------------.
957| Likewise, but for unexpected newlines. |
958`----------------------------------------*/
959
960static void
961unexpected_newline (boundary start, char const *token_end)
962{
963 unexpected_end (start, N_("missing `%s' at end of line"), token_end);
a706a1cc
PE
964}
965
966
f25bfb75
AD
967/*-------------------------.
968| Initialize the scanner. |
969`-------------------------*/
970
1d6412ad 971void
e9071366 972gram_scanner_initialize (void)
1d6412ad 973{
223ff46e 974 obstack_init (&obstack_for_string);
1d6412ad
AD
975}
976
977
f25bfb75
AD
978/*-----------------------------------------------.
979| Free all the memory allocated to the scanner. |
980`-----------------------------------------------*/
981
4cdb01db 982void
e9071366 983gram_scanner_free (void)
4cdb01db 984{
223ff46e 985 obstack_free (&obstack_for_string, 0);
536545f3 986 /* Reclaim Flex's buffers. */
580b8926 987 yylex_destroy ();
4cdb01db 988}