]> git.saurik.com Git - bison.git/blame - src/scan-gram.l
* NEWS (2.4.3): Mention fix for Sun Studio C++.
[bison.git] / src / scan-gram.l
CommitLineData
e9955c83 1/* Bison Grammar Scanner -*- C -*-
3b1e470c 2
e141f4d4 3 Copyright (C) 2002-2010 Free Software Foundation, Inc.
e9955c83
AD
4
5 This file is part of Bison, the GNU Compiler Compiler.
6
f16b0819 7 This program is free software: you can redistribute it and/or modify
e9955c83 8 it under the terms of the GNU General Public License as published by
f16b0819 9 the Free Software Foundation, either version 3 of the License, or
e9955c83
AD
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
f16b0819 18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
e9955c83 19
8d90395d 20%option debug nodefault noinput nounput noyywrap never-interactive
e9955c83
AD
21%option prefix="gram_" outfile="lex.yy.c"
22
23%{
4f6e011e
PE
24/* Work around a bug in flex 2.5.31. See Debian bug 333231
25 <http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=333231>. */
26#undef gram_wrap
27#define gram_wrap() 1
28
e9071366 29#define FLEX_PREFIX(Id) gram_ ## Id
0305d25e 30#include <src/flex-scanner.h>
223ff46e 31
0305d25e
AD
32#include <src/complain.h>
33#include <src/files.h>
34#include <src/gram.h>
35#include <quotearg.h>
36#include <src/reader.h>
37#include <src/uniqstr.h>
e9955c83 38
c2724603 39#include <ctype.h>
e9071366
AD
40#include <mbswidth.h>
41#include <quote.h>
42
0305d25e 43#include <src/scan-gram.h>
e9071366
AD
44
45#define YY_DECL GRAM_LEX_DECL
2346344a 46
3f2d73f1 47#define YY_USER_INIT \
e9071366 48 code_start = scanner_cursor = loc->start; \
dc9701e8 49
3f2d73f1 50/* Location of scanner cursor. */
4a678af8 51static boundary scanner_cursor;
41141c56 52
e9071366 53#define YY_USER_ACTION location_compute (loc, &scanner_cursor, yytext, yyleng);
d8d3f94a 54
6c30d641 55static size_t no_cr_read (FILE *, char *, size_t);
d8d3f94a
PE
56#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
57
a7706735
AD
58#define RETURN_PERCENT_PARAM(Value) \
59 RETURN_VALUE(PERCENT_PARAM, param, param_ ## Value)
60
61#define RETURN_PERCENT_FLAG(Value) \
62 RETURN_VALUE(PERCENT_FLAG, uniqstr, uniqstr_new (Value))
63
64#define RETURN_VALUE(Token, Field, Value) \
ba061fa6 65 do { \
a7706735
AD
66 val->Field = Value; \
67 return Token; \
ba061fa6
AD
68 } while (0)
69
b9f1d9a4
AR
70#define ROLLBACK_CURRENT_TOKEN \
71 do { \
72 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0); \
73 yyless (0); \
74 } while (0)
ba061fa6 75
7ec2d4cd 76/* A string representing the most recently saved token. */
7c0c6181 77static char *last_string;
7ec2d4cd 78
872b52bc 79/* Bracketed identifier. */
b9f1d9a4
AR
80static uniqstr bracketed_id_str = 0;
81static location bracketed_id_loc;
82static boundary bracketed_id_start;
83static int bracketed_id_context_state = 0;
84
7ec2d4cd 85void
e9071366 86gram_scanner_last_string_free (void)
7ec2d4cd 87{
41141c56 88 STRING_FREE;
7ec2d4cd 89}
e9955c83 90
4517da37 91static void handle_syncline (char *, location);
1452af69 92static unsigned long int scan_integer (char const *p, int base, location loc);
d8d3f94a 93static int convert_ucn_to_byte (char const *hex_text);
aa418041 94static void unexpected_eof (boundary, char const *);
4febdd96 95static void unexpected_newline (boundary, char const *);
e9955c83
AD
96
97%}
e9071366
AD
98 /* A C-like comment in directives/rules. */
99%x SC_YACC_COMMENT
100 /* Strings and characters in directives/rules. */
e9955c83 101%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
e9071366
AD
102 /* A identifier was just read in directives/rules. Special state
103 to capture the sequence `identifier :'. */
104%x SC_AFTER_IDENTIFIER
cb823b6f
AD
105 /* A complex tag, with nested angles brackets. */
106%x SC_TAG
e9071366
AD
107
108 /* Three types of user code:
109 - prologue (code between `%{' `%}' in the first section, before %%);
110 - actions, printers, union, etc, (between braced in the middle section);
111 - epilogue (everything after the second %%). */
112%x SC_PROLOGUE SC_BRACED_CODE SC_EPILOGUE
113 /* C and C++ comments in code. */
114%x SC_COMMENT SC_LINE_COMMENT
115 /* Strings and characters in code. */
116%x SC_STRING SC_CHARACTER
872b52bc 117 /* Bracketed identifiers support. */
b9f1d9a4 118%x SC_BRACKETED_ID SC_RETURN_BRACKETED_ID
e9955c83 119
cdf3f113
AD
120letter [-.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
121id {letter}({letter}|[0-9])*
4f646c37 122directive %{id}
624a35e2 123int [0-9]+
d8d3f94a
PE
124
125/* POSIX says that a tag must be both an id and a C union member, but
126 historically almost any character is allowed in a tag. We disallow
cb823b6f
AD
127 NUL, as this simplifies our implementation. We disallow angle
128 bracket to match them in nested pairs: several languages use them
129 for generics/template types. */
130tag [^\0<>]+
d8d3f94a
PE
131
132/* Zero or more instances of backslash-newline. Following GCC, allow
133 white space between the backslash and the newline. */
134splice (\\[ \f\t\v]*\n)*
e9955c83
AD
135
136%%
137%{
cb823b6f
AD
138 /* Nesting level. Either for nested braces, or nested angle brackets
139 (but not mixed). */
140 int nesting IF_LINT (= 0);
1a9e39f1 141
3f2d73f1 142 /* Parent context state, when applicable. */
5362ed19 143 int context_state IF_LINT (= 0);
a706a1cc 144
3f2d73f1 145 /* Location of most recent identifier, when applicable. */
a2bc9dbc 146 location id_loc IF_LINT (= empty_location);
3f2d73f1 147
a2bc9dbc
PE
148 /* Where containing code started, when applicable. Its initial
149 value is relevant only when yylex is invoked in the SC_EPILOGUE
150 start condition. */
151 boundary code_start = scanner_cursor;
3f2d73f1 152
223ff46e
PE
153 /* Where containing comment or string or character literal started,
154 when applicable. */
a2bc9dbc 155 boundary token_start IF_LINT (= scanner_cursor);
e9955c83
AD
156%}
157
158
3f2d73f1
PE
159 /*-----------------------.
160 | Scanning white space. |
161 `-----------------------*/
162
b9f1d9a4 163<INITIAL,SC_AFTER_IDENTIFIER,SC_BRACKETED_ID,SC_RETURN_BRACKETED_ID>
3f2d73f1 164{
4febdd96 165 /* Comments and white space. */
83adb046 166 "," warn_at (*loc, _("stray `,' treated as white space"));
4febdd96 167 [ \f\n\t\v] |
3f2d73f1 168 "//".* ;
83adb046
PE
169 "/*" {
170 token_start = loc->start;
171 context_state = YY_START;
172 BEGIN SC_YACC_COMMENT;
173 }
3f2d73f1
PE
174
175 /* #line directives are not documented, and may be withdrawn or
176 modified in future versions of Bison. */
177 ^"#line "{int}" \"".*"\"\n" {
4517da37 178 handle_syncline (yytext + sizeof "#line " - 1, *loc);
3f2d73f1
PE
179 }
180}
181
182
e9955c83
AD
183 /*----------------------------.
184 | Scanning Bison directives. |
185 `----------------------------*/
a7c09cba
DJ
186
187 /* For directives that are also command line options, the regex must be
188 "%..."
189 after "[-_]"s are removed, and the directive must match the --long
190 option name, with a single string argument. Otherwise, add exceptions
191 to ../build-aux/cross-options.pl. */
192
e9955c83
AD
193<INITIAL>
194{
deef2a0a 195 "%binary" return PERCENT_NONASSOC;
136a0f76 196 "%code" return PERCENT_CODE;
fa819509 197 "%debug" RETURN_PERCENT_FLAG("parse.trace");
deef2a0a
AD
198 "%default"[-_]"prec" return PERCENT_DEFAULT_PREC;
199 "%define" return PERCENT_DEFINE;
200 "%defines" return PERCENT_DEFINES;
201 "%destructor" return PERCENT_DESTRUCTOR;
202 "%dprec" return PERCENT_DPREC;
31b850d2 203 "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
deef2a0a
AD
204 "%expect" return PERCENT_EXPECT;
205 "%expect"[-_]"rr" return PERCENT_EXPECT_RR;
206 "%file-prefix" return PERCENT_FILE_PREFIX;
e9955c83 207 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
deef2a0a
AD
208 "%initial-action" return PERCENT_INITIAL_ACTION;
209 "%glr-parser" return PERCENT_GLR_PARSER;
210 "%language" return PERCENT_LANGUAGE;
211 "%left" return PERCENT_LEFT;
a7706735 212 "%lex-param" RETURN_PERCENT_PARAM(lex);
bc0f5737 213 "%locations" RETURN_PERCENT_FLAG("locations");
deef2a0a
AD
214 "%merge" return PERCENT_MERGE;
215 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
216 "%no"[-_]"default"[-_]"prec" return PERCENT_NO_DEFAULT_PREC;
217 "%no"[-_]"lines" return PERCENT_NO_LINES;
218 "%nonassoc" return PERCENT_NONASSOC;
219 "%nondeterministic-parser" return PERCENT_NONDETERMINISTIC_PARSER;
220 "%nterm" return PERCENT_NTERM;
221 "%output" return PERCENT_OUTPUT;
a7706735
AD
222 "%param" RETURN_PERCENT_PARAM(both);
223 "%parse-param" RETURN_PERCENT_PARAM(parse);
deef2a0a 224 "%prec" return PERCENT_PREC;
d78f0ac9 225 "%precedence" return PERCENT_PRECEDENCE;
deef2a0a 226 "%printer" return PERCENT_PRINTER;
4920ae8b 227 "%pure"[-_]"parser" RETURN_PERCENT_FLAG("api.pure");
deef2a0a
AD
228 "%require" return PERCENT_REQUIRE;
229 "%right" return PERCENT_RIGHT;
230 "%skeleton" return PERCENT_SKELETON;
231 "%start" return PERCENT_START;
232 "%term" return PERCENT_TOKEN;
233 "%token" return PERCENT_TOKEN;
234 "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
235 "%type" return PERCENT_TYPE;
236 "%union" return PERCENT_UNION;
237 "%verbose" return PERCENT_VERBOSE;
238 "%yacc" return PERCENT_YACC;
e9955c83 239
3f2d73f1 240 {directive} {
41141c56 241 complain_at (*loc, _("invalid directive: %s"), quote (yytext));
412f8a59 242 }
900c5db5 243
e9955c83 244 "=" return EQUAL;
e9071366 245 "|" return PIPE;
e9955c83
AD
246 ";" return SEMICOLON;
247
3f2d73f1 248 {id} {
58d7a1a1 249 val->uniqstr = uniqstr_new (yytext);
3f2d73f1 250 id_loc = *loc;
b9f1d9a4 251 bracketed_id_str = NULL;
3f2d73f1 252 BEGIN SC_AFTER_IDENTIFIER;
e9955c83
AD
253 }
254
d8d3f94a 255 {int} {
1452af69
PE
256 val->integer = scan_integer (yytext, 10, *loc);
257 return INT;
258 }
259 0[xX][0-9abcdefABCDEF]+ {
260 val->integer = scan_integer (yytext, 16, *loc);
d8d3f94a
PE
261 return INT;
262 }
e9955c83 263
84a1cb5a
AD
264 /* Identifiers may not start with a digit. Yet, don't silently
265 accept "1FOO" as "1 FOO". */
266 {int}{id} {
267 complain_at (*loc, _("invalid identifier: %s"), quote (yytext));
268 }
269
3208e3f4 270 /* Characters. */
dfaa4860 271 "'" token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
e9955c83
AD
272
273 /* Strings. */
ca407bdf 274 "\"" token_start = loc->start; BEGIN SC_ESCAPED_STRING;
e9955c83
AD
275
276 /* Prologue. */
3f2d73f1 277 "%{" code_start = loc->start; BEGIN SC_PROLOGUE;
e9955c83
AD
278
279 /* Code in between braces. */
3f2d73f1
PE
280 "{" {
281 STRING_GROW;
cb823b6f 282 nesting = 0;
3f2d73f1
PE
283 code_start = loc->start;
284 BEGIN SC_BRACED_CODE;
285 }
e9955c83
AD
286
287 /* A type. */
cb823b6f
AD
288 "<*>" return TAG_ANY;
289 "<>" return TAG_NONE;
d8d3f94a 290 "<"{tag}">" {
223ff46e 291 obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
41141c56 292 STRING_FINISH;
223ff46e 293 val->uniqstr = uniqstr_new (last_string);
41141c56 294 STRING_FREE;
cb823b6f
AD
295 return TAG;
296 }
297 "<" {
298 nesting = 0;
299 token_start = loc->start;
300 BEGIN SC_TAG;
4cdb01db
AD
301 }
302
a706a1cc
PE
303 "%%" {
304 static int percent_percent_count;
e9955c83 305 if (++percent_percent_count == 2)
a2bc9dbc 306 BEGIN SC_EPILOGUE;
e9955c83
AD
307 return PERCENT_PERCENT;
308 }
309
b9f1d9a4
AR
310 "[" {
311 bracketed_id_str = NULL;
312 bracketed_id_start = loc->start;
313 bracketed_id_context_state = YY_START;
314 BEGIN SC_BRACKETED_ID;
315 }
316
a706a1cc 317 . {
41141c56 318 complain_at (*loc, _("invalid character: %s"), quote (yytext));
3f2d73f1 319 }
379f0ac8
PE
320
321 <<EOF>> {
322 loc->start = loc->end = scanner_cursor;
323 yyterminate ();
324 }
3f2d73f1
PE
325}
326
327
cb823b6f
AD
328 /*--------------------------------------------------------------.
329 | Supporting \0 complexifies our implementation for no expected |
330 | added value. |
331 `--------------------------------------------------------------*/
332
333<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING,SC_TAG>
334{
335 \0 complain_at (*loc, _("invalid null character"));
336}
337
338
3f2d73f1
PE
339 /*-----------------------------------------------------------------.
340 | Scanning after an identifier, checking whether a colon is next. |
341 `-----------------------------------------------------------------*/
342
343<SC_AFTER_IDENTIFIER>
344{
b9f1d9a4 345 "[" {
872b52bc 346 if (bracketed_id_str)
b9f1d9a4
AR
347 {
348 ROLLBACK_CURRENT_TOKEN;
349 BEGIN SC_RETURN_BRACKETED_ID;
350 *loc = id_loc;
351 return ID;
352 }
872b52bc
AR
353 else
354 {
355 bracketed_id_start = loc->start;
356 bracketed_id_context_state = YY_START;
357 BEGIN SC_BRACKETED_ID;
358 }
b9f1d9a4 359 }
3f2d73f1 360 ":" {
b9f1d9a4 361 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
3f2d73f1 362 *loc = id_loc;
3f2d73f1
PE
363 return ID_COLON;
364 }
365 . {
b9f1d9a4
AR
366 ROLLBACK_CURRENT_TOKEN;
367 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
3f2d73f1 368 *loc = id_loc;
3f2d73f1
PE
369 return ID;
370 }
371 <<EOF>> {
b9f1d9a4 372 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
3f2d73f1 373 *loc = id_loc;
3f2d73f1 374 return ID;
e9955c83
AD
375 }
376}
377
b9f1d9a4
AR
378 /*--------------------------------.
379 | Scanning bracketed identifiers. |
380 `--------------------------------*/
381
382<SC_BRACKETED_ID>
383{
384 {id} {
872b52bc 385 if (bracketed_id_str)
b9f1d9a4 386 {
872b52bc
AR
387 complain_at (*loc, _("unexpected identifier in bracketed name: %s"),
388 quote (yytext));
b9f1d9a4
AR
389 }
390 else
391 {
872b52bc
AR
392 bracketed_id_str = uniqstr_new (yytext);
393 bracketed_id_loc = *loc;
b9f1d9a4
AR
394 }
395 }
396 "]" {
397 BEGIN bracketed_id_context_state;
398 if (bracketed_id_str)
399 {
400 if (INITIAL == bracketed_id_context_state)
401 {
402 val->uniqstr = bracketed_id_str;
403 bracketed_id_str = 0;
404 *loc = bracketed_id_loc;
405 return BRACKETED_ID;
406 }
407 }
408 else
872b52bc 409 complain_at (*loc, _("an identifier expected"));
b9f1d9a4
AR
410 }
411 . {
412 complain_at (*loc, _("invalid character in bracketed name: %s"),
413 quote (yytext));
414 }
415 <<EOF>> {
416 BEGIN bracketed_id_context_state;
417 unexpected_eof (bracketed_id_start, "]");
418 }
419}
420
421<SC_RETURN_BRACKETED_ID>
422{
423 . {
424 ROLLBACK_CURRENT_TOKEN;
425 val->uniqstr = bracketed_id_str;
426 bracketed_id_str = 0;
427 *loc = bracketed_id_loc;
428 BEGIN INITIAL;
429 return BRACKETED_ID;
430 }
431}
432
e9955c83 433
d8d3f94a
PE
434 /*---------------------------------------------------------------.
435 | Scanning a Yacc comment. The initial `/ *' is already eaten. |
436 `---------------------------------------------------------------*/
e9955c83 437
d8d3f94a 438<SC_YACC_COMMENT>
e9955c83 439{
3f2d73f1 440 "*/" BEGIN context_state;
a706a1cc 441 .|\n ;
aa418041 442 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
d8d3f94a
PE
443}
444
445
446 /*------------------------------------------------------------.
447 | Scanning a C comment. The initial `/ *' is already eaten. |
448 `------------------------------------------------------------*/
449
450<SC_COMMENT>
451{
3f2d73f1 452 "*"{splice}"/" STRING_GROW; BEGIN context_state;
aa418041 453 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
e9955c83
AD
454}
455
456
d8d3f94a
PE
457 /*--------------------------------------------------------------.
458 | Scanning a line comment. The initial `//' is already eaten. |
459 `--------------------------------------------------------------*/
460
461<SC_LINE_COMMENT>
462{
3f2d73f1 463 "\n" STRING_GROW; BEGIN context_state;
41141c56 464 {splice} STRING_GROW;
3f2d73f1 465 <<EOF>> BEGIN context_state;
d8d3f94a
PE
466}
467
468
4febdd96
PE
469 /*------------------------------------------------.
470 | Scanning a Bison string, including its escapes. |
471 | The initial quote is already eaten. |
472 `------------------------------------------------*/
e9955c83
AD
473
474<SC_ESCAPED_STRING>
475{
47aee066
JD
476 "\""|"\n" {
477 if (yytext[0] == '\n')
478 unexpected_newline (token_start, "\"");
479 STRING_FINISH;
480 loc->start = token_start;
481 val->chars = last_string;
482 BEGIN INITIAL;
483 return STRING;
484 }
485 <<EOF>> {
486 unexpected_eof (token_start, "\"");
41141c56 487 STRING_FINISH;
3f2d73f1 488 loc->start = token_start;
223ff46e 489 val->chars = last_string;
a706a1cc 490 BEGIN INITIAL;
e9955c83
AD
491 return STRING;
492 }
e9955c83
AD
493}
494
4febdd96
PE
495 /*----------------------------------------------------------.
496 | Scanning a Bison character literal, decoding its escapes. |
497 | The initial quote is already eaten. |
498 `----------------------------------------------------------*/
e9955c83
AD
499
500<SC_ESCAPED_CHARACTER>
501{
47aee066 502 "'"|"\n" {
41141c56 503 STRING_FINISH;
3f2d73f1 504 loc->start = token_start;
dfaa4860 505 val->character = last_string[0];
3208e3f4
JD
506 {
507 /* FIXME: Eventually, make these errors. */
dfaa4860
JD
508 if (last_string[0] == '\0')
509 {
510 warn_at (*loc, _("empty character literal"));
511 /* '\0' seems dangerous even if we are about to complain. */
512 val->character = '\'';
513 }
514 else if (last_string[1] != '\0')
3208e3f4
JD
515 warn_at (*loc, _("extra characters in character literal"));
516 }
517 if (yytext[0] == '\n')
518 unexpected_newline (token_start, "'");
41141c56 519 STRING_FREE;
a706a1cc 520 BEGIN INITIAL;
58d7a1a1 521 return CHAR;
e9955c83 522 }
47aee066 523 <<EOF>> {
47aee066
JD
524 STRING_FINISH;
525 loc->start = token_start;
dfaa4860 526 val->character = last_string[0];
3208e3f4 527 {
3208e3f4 528 /* FIXME: Eventually, make these errors. */
dfaa4860
JD
529 if (last_string[0] == '\0')
530 {
531 warn_at (*loc, _("empty character literal"));
532 /* '\0' seems dangerous even if we are about to complain. */
533 val->character = '\'';
534 }
535 else if (last_string[1] != '\0')
3208e3f4 536 warn_at (*loc, _("extra characters in character literal"));
3208e3f4
JD
537 }
538 unexpected_eof (token_start, "'");
47aee066
JD
539 STRING_FREE;
540 BEGIN INITIAL;
541 return CHAR;
542 }
4febdd96 543}
a706a1cc 544
cb823b6f
AD
545 /*-----------------------------------------------------------.
546 | Scanning a Bison nested tag. The initial angle bracket is |
547 | already eaten. |
548 `-----------------------------------------------------------*/
549
550<SC_TAG>
4febdd96 551{
cb823b6f
AD
552 ">" {
553 --nesting;
554 if (nesting < 0)
555 {
556 STRING_FINISH;
557 loc->start = token_start;
558 val->uniqstr = uniqstr_new (last_string);
559 STRING_FREE;
560 BEGIN INITIAL;
561 return TAG;
562 }
563 STRING_GROW;
564 }
565
566 [^<>]+ STRING_GROW;
567 "<"+ STRING_GROW; nesting += yyleng;
e9955c83 568
cb823b6f
AD
569 <<EOF>> {
570 unexpected_eof (token_start, ">");
571 STRING_FINISH;
572 loc->start = token_start;
573 val->uniqstr = uniqstr_new (last_string);
574 STRING_FREE;
575 BEGIN INITIAL;
576 return TAG;
577 }
578}
e9955c83
AD
579
580 /*----------------------------.
581 | Decode escaped characters. |
582 `----------------------------*/
583
584<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
585{
d8d3f94a 586 \\[0-7]{1,3} {
4517da37 587 unsigned long int c = strtoul (yytext + 1, NULL, 8);
c2724603
JD
588 if (!c || UCHAR_MAX < c)
589 complain_at (*loc, _("invalid number after \\-escape: %s"),
590 yytext+1);
e9955c83 591 else
223ff46e 592 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
593 }
594
6b0d38ab 595 \\x[0-9abcdefABCDEF]+ {
4517da37
PE
596 verify (UCHAR_MAX < ULONG_MAX);
597 unsigned long int c = strtoul (yytext + 2, NULL, 16);
c2724603
JD
598 if (!c || UCHAR_MAX < c)
599 complain_at (*loc, _("invalid number after \\-escape: %s"),
600 yytext+1);
d8d3f94a 601 else
223ff46e 602 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
603 }
604
223ff46e
PE
605 \\a obstack_1grow (&obstack_for_string, '\a');
606 \\b obstack_1grow (&obstack_for_string, '\b');
607 \\f obstack_1grow (&obstack_for_string, '\f');
608 \\n obstack_1grow (&obstack_for_string, '\n');
609 \\r obstack_1grow (&obstack_for_string, '\r');
610 \\t obstack_1grow (&obstack_for_string, '\t');
611 \\v obstack_1grow (&obstack_for_string, '\v');
412f8a59
PE
612
613 /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
223ff46e 614 \\("\""|"'"|"?"|"\\") obstack_1grow (&obstack_for_string, yytext[1]);
412f8a59 615
6b0d38ab 616 \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
d8d3f94a 617 int c = convert_ucn_to_byte (yytext);
c2724603
JD
618 if (c <= 0)
619 complain_at (*loc, _("invalid number after \\-escape: %s"),
620 yytext+1);
d8d3f94a 621 else
223ff46e 622 obstack_1grow (&obstack_for_string, c);
d8d3f94a 623 }
4f25ebb0 624 \\(.|\n) {
c2724603 625 char const *p = yytext + 1;
e6c849d8 626 /* Quote only if escaping won't make the character visible. */
4413bbd3 627 if (isspace ((unsigned char) *p) && isprint ((unsigned char) *p))
e6c849d8 628 p = quote (p);
c2724603
JD
629 else
630 p = quotearg_style_mem (escape_quoting_style, p, 1);
631 complain_at (*loc, _("invalid character after \\-escape: %s"), p);
e9955c83
AD
632 }
633}
634
4febdd96
PE
635 /*--------------------------------------------.
636 | Scanning user-code characters and strings. |
637 `--------------------------------------------*/
e9955c83 638
4febdd96
PE
639<SC_CHARACTER,SC_STRING>
640{
e9071366 641 {splice}|\\{splice}[^\n\[\]] STRING_GROW;
4febdd96 642}
e9955c83
AD
643
644<SC_CHARACTER>
645{
4febdd96
PE
646 "'" STRING_GROW; BEGIN context_state;
647 \n unexpected_newline (token_start, "'"); BEGIN context_state;
648 <<EOF>> unexpected_eof (token_start, "'"); BEGIN context_state;
e9955c83
AD
649}
650
e9955c83
AD
651<SC_STRING>
652{
4febdd96
PE
653 "\"" STRING_GROW; BEGIN context_state;
654 \n unexpected_newline (token_start, "\""); BEGIN context_state;
655 <<EOF>> unexpected_eof (token_start, "\""); BEGIN context_state;
e9955c83
AD
656}
657
658
659 /*---------------------------------------------------.
660 | Strings, comments etc. can be found in user code. |
661 `---------------------------------------------------*/
662
663<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
664{
3f2d73f1
PE
665 "'" {
666 STRING_GROW;
667 context_state = YY_START;
668 token_start = loc->start;
669 BEGIN SC_CHARACTER;
670 }
671 "\"" {
672 STRING_GROW;
673 context_state = YY_START;
674 token_start = loc->start;
675 BEGIN SC_STRING;
676 }
677 "/"{splice}"*" {
678 STRING_GROW;
679 context_state = YY_START;
680 token_start = loc->start;
681 BEGIN SC_COMMENT;
682 }
683 "/"{splice}"/" {
684 STRING_GROW;
685 context_state = YY_START;
686 BEGIN SC_LINE_COMMENT;
687 }
e9955c83
AD
688}
689
690
624a35e2 691
58d7a1a1
AD
692 /*-----------------------------------------------------------.
693 | Scanning some code in braces (actions). The initial "{" is |
694 | already eaten. |
695 `-----------------------------------------------------------*/
e9955c83
AD
696
697<SC_BRACED_CODE>
698{
cb823b6f
AD
699 "{"|"<"{splice}"%" STRING_GROW; nesting++;
700 "%"{splice}">" STRING_GROW; nesting--;
e9955c83 701 "}" {
25522739
PE
702 obstack_1grow (&obstack_for_string, '}');
703
cb823b6f
AD
704 --nesting;
705 if (nesting < 0)
e9955c83 706 {
41141c56 707 STRING_FINISH;
3f2d73f1 708 loc->start = code_start;
eb095650 709 val->code = last_string;
a706a1cc 710 BEGIN INITIAL;
58d7a1a1 711 return BRACED_CODE;
e9955c83
AD
712 }
713 }
714
a706a1cc
PE
715 /* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly
716 (as `<' `<%'). */
41141c56 717 "<"{splice}"<" STRING_GROW;
a706a1cc 718
47aee066
JD
719 <<EOF>> {
720 unexpected_eof (code_start, "}");
721 STRING_FINISH;
722 loc->start = code_start;
eb095650 723 val->code = last_string;
47aee066
JD
724 BEGIN INITIAL;
725 return BRACED_CODE;
726 }
e9955c83
AD
727}
728
729
730 /*--------------------------------------------------------------.
731 | Scanning some prologue: from "%{" (already scanned) to "%}". |
732 `--------------------------------------------------------------*/
733
734<SC_PROLOGUE>
735{
736 "%}" {
41141c56 737 STRING_FINISH;
3f2d73f1 738 loc->start = code_start;
223ff46e 739 val->chars = last_string;
a706a1cc 740 BEGIN INITIAL;
e9955c83
AD
741 return PROLOGUE;
742 }
743
47aee066
JD
744 <<EOF>> {
745 unexpected_eof (code_start, "%}");
746 STRING_FINISH;
747 loc->start = code_start;
748 val->chars = last_string;
749 BEGIN INITIAL;
750 return PROLOGUE;
751 }
e9955c83
AD
752}
753
754
755 /*---------------------------------------------------------------.
756 | Scanning the epilogue (everything after the second "%%", which |
d8d3f94a 757 | has already been eaten). |
e9955c83
AD
758 `---------------------------------------------------------------*/
759
760<SC_EPILOGUE>
761{
e9955c83 762 <<EOF>> {
41141c56 763 STRING_FINISH;
3f2d73f1 764 loc->start = code_start;
223ff46e 765 val->chars = last_string;
a706a1cc 766 BEGIN INITIAL;
e9955c83
AD
767 return EPILOGUE;
768 }
769}
770
771
4febdd96
PE
772 /*-----------------------------------------------------.
773 | By default, grow the string obstack with the input. |
774 `-----------------------------------------------------*/
775
776<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>. |
777<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>\n STRING_GROW;
778
e9955c83
AD
779%%
780
6c30d641
PE
781/* Read bytes from FP into buffer BUF of size SIZE. Return the
782 number of bytes read. Remove '\r' from input, treating \r\n
783 and isolated \r as \n. */
784
785static size_t
786no_cr_read (FILE *fp, char *buf, size_t size)
787{
a737b216
PE
788 size_t bytes_read = fread (buf, 1, size, fp);
789 if (bytes_read)
6c30d641 790 {
a737b216 791 char *w = memchr (buf, '\r', bytes_read);
6c30d641
PE
792 if (w)
793 {
794 char const *r = ++w;
a737b216 795 char const *lim = buf + bytes_read;
6c30d641
PE
796
797 for (;;)
798 {
799 /* Found an '\r'. Treat it like '\n', but ignore any
800 '\n' that immediately follows. */
801 w[-1] = '\n';
802 if (r == lim)
803 {
804 int ch = getc (fp);
805 if (ch != '\n' && ungetc (ch, fp) != ch)
806 break;
807 }
808 else if (*r == '\n')
809 r++;
810
811 /* Copy until the next '\r'. */
812 do
813 {
814 if (r == lim)
815 return w - buf;
816 }
817 while ((*w++ = *r++) != '\r');
818 }
819
820 return w - buf;
821 }
822 }
823
a737b216 824 return bytes_read;
6c30d641
PE
825}
826
827
f25bfb75 828
1452af69
PE
829/*------------------------------------------------------.
830| Scan NUMBER for a base-BASE integer at location LOC. |
831`------------------------------------------------------*/
832
833static unsigned long int
834scan_integer (char const *number, int base, location loc)
835{
4517da37
PE
836 verify (INT_MAX < ULONG_MAX);
837 unsigned long int num = strtoul (number, NULL, base);
838
839 if (INT_MAX < num)
1452af69
PE
840 {
841 complain_at (loc, _("integer out of range: %s"), quote (number));
842 num = INT_MAX;
843 }
4517da37 844
1452af69
PE
845 return num;
846}
847
848
d8d3f94a
PE
849/*------------------------------------------------------------------.
850| Convert universal character name UCN to a single-byte character, |
851| and return that character. Return -1 if UCN does not correspond |
852| to a single-byte character. |
853`------------------------------------------------------------------*/
854
855static int
856convert_ucn_to_byte (char const *ucn)
857{
4517da37
PE
858 verify (UCHAR_MAX <= INT_MAX);
859 unsigned long int code = strtoul (ucn + 2, NULL, 16);
d8d3f94a
PE
860
861 /* FIXME: Currently we assume Unicode-compatible unibyte characters
862 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
863 non-ASCII hosts we support only the portable C character set.
864 These limitations should be removed once we add support for
865 multibyte characters. */
866
867 if (UCHAR_MAX < code)
868 return -1;
869
870#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
871 {
872 /* A non-ASCII host. Use CODE to index into a table of the C
873 basic execution character set, which is guaranteed to exist on
874 all Standard C platforms. This table also includes '$', '@',
8e6ef483 875 and '`', which are not in the basic execution character set but
d8d3f94a
PE
876 which are unibyte characters on all the platforms that we know
877 about. */
878 static signed char const table[] =
879 {
880 '\0', -1, -1, -1, -1, -1, -1, '\a',
881 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
882 -1, -1, -1, -1, -1, -1, -1, -1,
883 -1, -1, -1, -1, -1, -1, -1, -1,
884 ' ', '!', '"', '#', '$', '%', '&', '\'',
885 '(', ')', '*', '+', ',', '-', '.', '/',
886 '0', '1', '2', '3', '4', '5', '6', '7',
887 '8', '9', ':', ';', '<', '=', '>', '?',
888 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
889 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
890 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
891 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
892 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
893 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
894 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
895 'x', 'y', 'z', '{', '|', '}', '~'
896 };
897
898 code = code < sizeof table ? table[code] : -1;
899 }
900#endif
c4d720cd 901
d8d3f94a
PE
902 return code;
903}
904
905
900c5db5
AD
906/*----------------------------------------------------------------.
907| Handle `#line INT "FILE"'. ARGS has already skipped `#line '. |
908`----------------------------------------------------------------*/
909
910static void
4517da37 911handle_syncline (char *args, location loc)
900c5db5 912{
4517da37
PE
913 char *after_num;
914 unsigned long int lineno = strtoul (args, &after_num, 10);
915 char *file = strchr (after_num, '"') + 1;
916 *strchr (file, '"') = '\0';
917 if (INT_MAX <= lineno)
918 {
919 warn_at (loc, _("line number overflow"));
920 lineno = INT_MAX;
921 }
e9071366 922 current_file = uniqstr_new (file);
0c8e079f 923 boundary_set (&scanner_cursor, current_file, lineno, 1);
4517da37
PE
924}
925
926
4febdd96
PE
927/*----------------------------------------------------------------.
928| For a token or comment starting at START, report message MSGID, |
929| which should say that an end marker was found before |
930| the expected TOKEN_END. |
931`----------------------------------------------------------------*/
932
933static void
934unexpected_end (boundary start, char const *msgid, char const *token_end)
935{
936 location loc;
937 loc.start = start;
938 loc.end = scanner_cursor;
939 complain_at (loc, _(msgid), token_end);
940}
941
942
3f2d73f1
PE
943/*------------------------------------------------------------------------.
944| Report an unexpected EOF in a token or comment starting at START. |
945| An end of file was encountered and the expected TOKEN_END was missing. |
3f2d73f1 946`------------------------------------------------------------------------*/
a706a1cc
PE
947
948static void
aa418041 949unexpected_eof (boundary start, char const *token_end)
a706a1cc 950{
4febdd96
PE
951 unexpected_end (start, N_("missing `%s' at end of file"), token_end);
952}
953
954
955/*----------------------------------------.
956| Likewise, but for unexpected newlines. |
957`----------------------------------------*/
958
959static void
960unexpected_newline (boundary start, char const *token_end)
961{
962 unexpected_end (start, N_("missing `%s' at end of line"), token_end);
a706a1cc
PE
963}
964
965
f25bfb75
AD
966/*-------------------------.
967| Initialize the scanner. |
968`-------------------------*/
969
1d6412ad 970void
e9071366 971gram_scanner_initialize (void)
1d6412ad 972{
223ff46e 973 obstack_init (&obstack_for_string);
1d6412ad
AD
974}
975
976
f25bfb75
AD
977/*-----------------------------------------------.
978| Free all the memory allocated to the scanner. |
979`-----------------------------------------------*/
980
4cdb01db 981void
e9071366 982gram_scanner_free (void)
4cdb01db 983{
223ff46e 984 obstack_free (&obstack_for_string, 0);
536545f3 985 /* Reclaim Flex's buffers. */
580b8926 986 yylex_destroy ();
4cdb01db 987}