]> git.saurik.com Git - bison.git/blame - src/scan-gram.l
Fix complaints about escape sequences.
[bison.git] / src / scan-gram.l
CommitLineData
e9955c83 1/* Bison Grammar Scanner -*- C -*-
3b1e470c 2
6789b8bd
JD
3 Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free
4 Software Foundation, Inc.
e9955c83
AD
5
6 This file is part of Bison, the GNU Compiler Compiler.
7
f16b0819 8 This program is free software: you can redistribute it and/or modify
e9955c83 9 it under the terms of the GNU General Public License as published by
f16b0819 10 the Free Software Foundation, either version 3 of the License, or
e9955c83
AD
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
f16b0819 19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
e9955c83 20
4521fcdf 21%option debug nodefault noinput nounput noyywrap never-interactive
e9955c83
AD
22%option prefix="gram_" outfile="lex.yy.c"
23
24%{
4f6e011e
PE
25/* Work around a bug in flex 2.5.31. See Debian bug 333231
26 <http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=333231>. */
27#undef gram_wrap
28#define gram_wrap() 1
29
e9071366
AD
30#define FLEX_PREFIX(Id) gram_ ## Id
31#include "flex-scanner.h"
223ff46e 32
e9955c83 33#include "complain.h"
3f2d73f1 34#include "files.h"
e9955c83 35#include "gram.h"
ca407bdf 36#include "quotearg.h"
e9955c83 37#include "reader.h"
223ff46e 38#include "uniqstr.h"
e9955c83 39
39fb7e62 40#include <ctype.h>
e9071366
AD
41#include <mbswidth.h>
42#include <quote.h>
43
44#include "scan-gram.h"
45
46#define YY_DECL GRAM_LEX_DECL
2346344a 47
3f2d73f1 48#define YY_USER_INIT \
e9071366 49 code_start = scanner_cursor = loc->start; \
dc9701e8 50
3f2d73f1 51/* Location of scanner cursor. */
4a678af8 52static boundary scanner_cursor;
41141c56 53
e9071366 54#define YY_USER_ACTION location_compute (loc, &scanner_cursor, yytext, yyleng);
d8d3f94a 55
6c30d641 56static size_t no_cr_read (FILE *, char *, size_t);
d8d3f94a
PE
57#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
58
7685e2f7
AR
59#define ROLLBACK_CURRENT_TOKEN \
60 do { \
61 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0); \
62 yyless (0); \
63 } while (0)
64
7ec2d4cd 65/* A string representing the most recently saved token. */
7c0c6181 66static char *last_string;
7ec2d4cd 67
d5e8574b 68/* Bracketed identifier. */
7685e2f7
AR
69static uniqstr bracketed_id_str = 0;
70static location bracketed_id_loc;
71static boundary bracketed_id_start;
72static int bracketed_id_context_state = 0;
73
7ec2d4cd 74void
e9071366 75gram_scanner_last_string_free (void)
7ec2d4cd 76{
41141c56 77 STRING_FREE;
7ec2d4cd 78}
e9955c83 79
4517da37 80static void handle_syncline (char *, location);
1452af69 81static unsigned long int scan_integer (char const *p, int base, location loc);
d8d3f94a 82static int convert_ucn_to_byte (char const *hex_text);
aa418041 83static void unexpected_eof (boundary, char const *);
4febdd96 84static void unexpected_newline (boundary, char const *);
e9955c83
AD
85
86%}
e9071366
AD
87 /* A C-like comment in directives/rules. */
88%x SC_YACC_COMMENT
89 /* Strings and characters in directives/rules. */
e9955c83 90%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
e9071366
AD
91 /* A identifier was just read in directives/rules. Special state
92 to capture the sequence `identifier :'. */
93%x SC_AFTER_IDENTIFIER
e9071366
AD
94
95 /* Three types of user code:
96 - prologue (code between `%{' `%}' in the first section, before %%);
97 - actions, printers, union, etc, (between braced in the middle section);
98 - epilogue (everything after the second %%). */
99%x SC_PROLOGUE SC_BRACED_CODE SC_EPILOGUE
100 /* C and C++ comments in code. */
101%x SC_COMMENT SC_LINE_COMMENT
102 /* Strings and characters in code. */
103%x SC_STRING SC_CHARACTER
d5e8574b 104 /* Bracketed identifiers support. */
7685e2f7 105%x SC_BRACKETED_ID SC_RETURN_BRACKETED_ID
e9955c83 106
c046698e
AD
107letter [-.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
108id {letter}({letter}|[0-9])*
663ce7bb 109directive %{id}
624a35e2 110int [0-9]+
d8d3f94a
PE
111
112/* POSIX says that a tag must be both an id and a C union member, but
113 historically almost any character is allowed in a tag. We disallow
114 NUL and newline, as this simplifies our implementation. */
115tag [^\0\n>]+
116
117/* Zero or more instances of backslash-newline. Following GCC, allow
118 white space between the backslash and the newline. */
119splice (\\[ \f\t\v]*\n)*
e9955c83
AD
120
121%%
122%{
a706a1cc 123 /* Nesting level of the current code in braces. */
5362ed19 124 int braces_level IF_LINT (= 0);
1a9e39f1 125
3f2d73f1 126 /* Parent context state, when applicable. */
5362ed19 127 int context_state IF_LINT (= 0);
a706a1cc 128
3f2d73f1 129 /* Location of most recent identifier, when applicable. */
a2bc9dbc 130 location id_loc IF_LINT (= empty_location);
3f2d73f1 131
a2bc9dbc
PE
132 /* Where containing code started, when applicable. Its initial
133 value is relevant only when yylex is invoked in the SC_EPILOGUE
134 start condition. */
135 boundary code_start = scanner_cursor;
3f2d73f1 136
223ff46e
PE
137 /* Where containing comment or string or character literal started,
138 when applicable. */
a2bc9dbc 139 boundary token_start IF_LINT (= scanner_cursor);
e9955c83
AD
140%}
141
142
3f2d73f1
PE
143 /*-----------------------.
144 | Scanning white space. |
145 `-----------------------*/
146
7685e2f7 147<INITIAL,SC_AFTER_IDENTIFIER,SC_BRACKETED_ID,SC_RETURN_BRACKETED_ID>
3f2d73f1 148{
4febdd96 149 /* Comments and white space. */
83adb046 150 "," warn_at (*loc, _("stray `,' treated as white space"));
4febdd96 151 [ \f\n\t\v] |
3f2d73f1 152 "//".* ;
83adb046
PE
153 "/*" {
154 token_start = loc->start;
155 context_state = YY_START;
156 BEGIN SC_YACC_COMMENT;
157 }
3f2d73f1
PE
158
159 /* #line directives are not documented, and may be withdrawn or
160 modified in future versions of Bison. */
161 ^"#line "{int}" \"".*"\"\n" {
4517da37 162 handle_syncline (yytext + sizeof "#line " - 1, *loc);
3f2d73f1
PE
163 }
164}
165
166
e9955c83
AD
167 /*----------------------------.
168 | Scanning Bison directives. |
169 `----------------------------*/
72183df4
DJ
170
171 /* For directives that are also command line options, the regex must be
172 "%..."
173 after "[-_]"s are removed, and the directive must match the --long
174 option name, with a single string argument. Otherwise, add exceptions
175 to ../build-aux/cross-options.pl. */
176
e9955c83
AD
177<INITIAL>
178{
43e6aea5 179 "%binary" return PERCENT_NONASSOC;
136a0f76 180 "%code" return PERCENT_CODE;
43e6aea5
AD
181 "%debug" return PERCENT_DEBUG;
182 "%default"[-_]"prec" return PERCENT_DEFAULT_PREC;
183 "%define" return PERCENT_DEFINE;
184 "%defines" return PERCENT_DEFINES;
185 "%destructor" return PERCENT_DESTRUCTOR;
186 "%dprec" return PERCENT_DPREC;
187 "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
188 "%expect" return PERCENT_EXPECT;
189 "%expect"[-_]"rr" return PERCENT_EXPECT_RR;
190 "%file-prefix" return PERCENT_FILE_PREFIX;
e9955c83 191 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
43e6aea5
AD
192 "%initial-action" return PERCENT_INITIAL_ACTION;
193 "%glr-parser" return PERCENT_GLR_PARSER;
194 "%language" return PERCENT_LANGUAGE;
195 "%left" return PERCENT_LEFT;
196 "%lex-param" return PERCENT_LEX_PARAM;
197 "%locations" return PERCENT_LOCATIONS;
198 "%merge" return PERCENT_MERGE;
199 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
200 "%no"[-_]"default"[-_]"prec" return PERCENT_NO_DEFAULT_PREC;
201 "%no"[-_]"lines" return PERCENT_NO_LINES;
202 "%nonassoc" return PERCENT_NONASSOC;
203 "%nondeterministic-parser" return PERCENT_NONDETERMINISTIC_PARSER;
204 "%nterm" return PERCENT_NTERM;
205 "%output" return PERCENT_OUTPUT;
206 "%parse-param" return PERCENT_PARSE_PARAM;
207 "%prec" return PERCENT_PREC;
208 "%printer" return PERCENT_PRINTER;
209 "%pure"[-_]"parser" return PERCENT_PURE_PARSER;
210 "%require" return PERCENT_REQUIRE;
211 "%right" return PERCENT_RIGHT;
212 "%skeleton" return PERCENT_SKELETON;
213 "%start" return PERCENT_START;
214 "%term" return PERCENT_TOKEN;
215 "%token" return PERCENT_TOKEN;
216 "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
217 "%type" return PERCENT_TYPE;
218 "%union" return PERCENT_UNION;
219 "%verbose" return PERCENT_VERBOSE;
220 "%yacc" return PERCENT_YACC;
e9955c83 221
3f2d73f1 222 {directive} {
41141c56 223 complain_at (*loc, _("invalid directive: %s"), quote (yytext));
412f8a59 224 }
900c5db5 225
e9955c83 226 "=" return EQUAL;
e9071366 227 "|" return PIPE;
e9955c83 228 ";" return SEMICOLON;
12e35840 229 "<*>" return TYPE_TAG_ANY;
3ebecc24 230 "<>" return TYPE_TAG_NONE;
e9955c83 231
3f2d73f1 232 {id} {
58d7a1a1 233 val->uniqstr = uniqstr_new (yytext);
3f2d73f1 234 id_loc = *loc;
7685e2f7 235 bracketed_id_str = NULL;
3f2d73f1 236 BEGIN SC_AFTER_IDENTIFIER;
e9955c83
AD
237 }
238
d8d3f94a 239 {int} {
1452af69
PE
240 val->integer = scan_integer (yytext, 10, *loc);
241 return INT;
242 }
243 0[xX][0-9abcdefABCDEF]+ {
244 val->integer = scan_integer (yytext, 16, *loc);
d8d3f94a
PE
245 return INT;
246 }
e9955c83 247
601bdfab
AD
248 /* Identifiers may not start with a digit. Yet, don't silently
249 accept "1FOO" as "1 FOO". */
250 {int}{id} {
251 complain_at (*loc, _("invalid identifier: %s"), quote (yytext));
252 }
253
ac9b0e95 254 /* Characters. */
07c0db18 255 "'" token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
e9955c83
AD
256
257 /* Strings. */
ca407bdf 258 "\"" token_start = loc->start; BEGIN SC_ESCAPED_STRING;
e9955c83
AD
259
260 /* Prologue. */
3f2d73f1 261 "%{" code_start = loc->start; BEGIN SC_PROLOGUE;
e9955c83
AD
262
263 /* Code in between braces. */
3f2d73f1
PE
264 "{" {
265 STRING_GROW;
266 braces_level = 0;
267 code_start = loc->start;
268 BEGIN SC_BRACED_CODE;
269 }
e9955c83
AD
270
271 /* A type. */
d8d3f94a 272 "<"{tag}">" {
223ff46e 273 obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
41141c56 274 STRING_FINISH;
223ff46e 275 val->uniqstr = uniqstr_new (last_string);
41141c56 276 STRING_FREE;
4cdb01db
AD
277 return TYPE;
278 }
279
a706a1cc
PE
280 "%%" {
281 static int percent_percent_count;
e9955c83 282 if (++percent_percent_count == 2)
a2bc9dbc 283 BEGIN SC_EPILOGUE;
e9955c83
AD
284 return PERCENT_PERCENT;
285 }
286
7685e2f7
AR
287 "[" {
288 bracketed_id_str = NULL;
289 bracketed_id_start = loc->start;
290 bracketed_id_context_state = YY_START;
291 BEGIN SC_BRACKETED_ID;
292 }
293
a706a1cc 294 . {
41141c56 295 complain_at (*loc, _("invalid character: %s"), quote (yytext));
3f2d73f1 296 }
379f0ac8
PE
297
298 <<EOF>> {
299 loc->start = loc->end = scanner_cursor;
300 yyterminate ();
301 }
3f2d73f1
PE
302}
303
304
305 /*-----------------------------------------------------------------.
306 | Scanning after an identifier, checking whether a colon is next. |
307 `-----------------------------------------------------------------*/
308
309<SC_AFTER_IDENTIFIER>
310{
7685e2f7 311 "[" {
d5e8574b 312 if (bracketed_id_str)
7685e2f7
AR
313 {
314 ROLLBACK_CURRENT_TOKEN;
315 BEGIN SC_RETURN_BRACKETED_ID;
316 *loc = id_loc;
317 return ID;
318 }
d5e8574b
AR
319 else
320 {
321 bracketed_id_start = loc->start;
322 bracketed_id_context_state = YY_START;
323 BEGIN SC_BRACKETED_ID;
324 }
7685e2f7 325 }
3f2d73f1 326 ":" {
7685e2f7 327 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
3f2d73f1 328 *loc = id_loc;
3f2d73f1
PE
329 return ID_COLON;
330 }
331 . {
7685e2f7
AR
332 ROLLBACK_CURRENT_TOKEN;
333 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
3f2d73f1 334 *loc = id_loc;
3f2d73f1
PE
335 return ID;
336 }
337 <<EOF>> {
7685e2f7 338 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
3f2d73f1 339 *loc = id_loc;
3f2d73f1 340 return ID;
e9955c83
AD
341 }
342}
343
7685e2f7
AR
344 /*--------------------------------.
345 | Scanning bracketed identifiers. |
346 `--------------------------------*/
347
348<SC_BRACKETED_ID>
349{
350 {id} {
d5e8574b 351 if (bracketed_id_str)
7685e2f7 352 {
d5e8574b
AR
353 complain_at (*loc, _("unexpected identifier in bracketed name: %s"),
354 quote (yytext));
7685e2f7
AR
355 }
356 else
357 {
d5e8574b
AR
358 bracketed_id_str = uniqstr_new (yytext);
359 bracketed_id_loc = *loc;
7685e2f7
AR
360 }
361 }
362 "]" {
363 BEGIN bracketed_id_context_state;
364 if (bracketed_id_str)
365 {
366 if (INITIAL == bracketed_id_context_state)
367 {
368 val->uniqstr = bracketed_id_str;
369 bracketed_id_str = 0;
370 *loc = bracketed_id_loc;
371 return BRACKETED_ID;
372 }
373 }
374 else
d5e8574b 375 complain_at (*loc, _("an identifier expected"));
7685e2f7
AR
376 }
377 . {
378 complain_at (*loc, _("invalid character in bracketed name: %s"),
379 quote (yytext));
380 }
381 <<EOF>> {
382 BEGIN bracketed_id_context_state;
383 unexpected_eof (bracketed_id_start, "]");
384 }
385}
386
387<SC_RETURN_BRACKETED_ID>
388{
389 . {
390 ROLLBACK_CURRENT_TOKEN;
391 val->uniqstr = bracketed_id_str;
392 bracketed_id_str = 0;
393 *loc = bracketed_id_loc;
394 BEGIN INITIAL;
395 return BRACKETED_ID;
396 }
397}
398
e9955c83 399
d8d3f94a
PE
400 /*---------------------------------------------------------------.
401 | Scanning a Yacc comment. The initial `/ *' is already eaten. |
402 `---------------------------------------------------------------*/
e9955c83 403
d8d3f94a 404<SC_YACC_COMMENT>
e9955c83 405{
3f2d73f1 406 "*/" BEGIN context_state;
a706a1cc 407 .|\n ;
aa418041 408 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
d8d3f94a
PE
409}
410
411
412 /*------------------------------------------------------------.
413 | Scanning a C comment. The initial `/ *' is already eaten. |
414 `------------------------------------------------------------*/
415
416<SC_COMMENT>
417{
3f2d73f1 418 "*"{splice}"/" STRING_GROW; BEGIN context_state;
aa418041 419 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
e9955c83
AD
420}
421
422
d8d3f94a
PE
423 /*--------------------------------------------------------------.
424 | Scanning a line comment. The initial `//' is already eaten. |
425 `--------------------------------------------------------------*/
426
427<SC_LINE_COMMENT>
428{
3f2d73f1 429 "\n" STRING_GROW; BEGIN context_state;
41141c56 430 {splice} STRING_GROW;
3f2d73f1 431 <<EOF>> BEGIN context_state;
d8d3f94a
PE
432}
433
434
4febdd96
PE
435 /*------------------------------------------------.
436 | Scanning a Bison string, including its escapes. |
437 | The initial quote is already eaten. |
438 `------------------------------------------------*/
e9955c83
AD
439
440<SC_ESCAPED_STRING>
441{
47aee066
JD
442 "\""|"\n" {
443 if (yytext[0] == '\n')
444 unexpected_newline (token_start, "\"");
445 STRING_FINISH;
446 loc->start = token_start;
447 val->chars = last_string;
448 BEGIN INITIAL;
449 return STRING;
450 }
451 <<EOF>> {
452 unexpected_eof (token_start, "\"");
41141c56 453 STRING_FINISH;
3f2d73f1 454 loc->start = token_start;
223ff46e 455 val->chars = last_string;
a706a1cc 456 BEGIN INITIAL;
e9955c83
AD
457 return STRING;
458 }
e9955c83
AD
459}
460
4febdd96
PE
461 /*----------------------------------------------------------.
462 | Scanning a Bison character literal, decoding its escapes. |
463 | The initial quote is already eaten. |
464 `----------------------------------------------------------*/
e9955c83
AD
465
466<SC_ESCAPED_CHARACTER>
467{
47aee066 468 "'"|"\n" {
41141c56 469 STRING_FINISH;
3f2d73f1 470 loc->start = token_start;
07c0db18 471 val->character = last_string[0];
ac9b0e95
JD
472 {
473 /* FIXME: Eventually, make these errors. */
07c0db18
JD
474 if (last_string[0] == '\0')
475 {
476 warn_at (*loc, _("empty character literal"));
477 /* '\0' seems dangerous even if we are about to complain. */
478 val->character = '\'';
479 }
480 else if (last_string[1] != '\0')
ac9b0e95
JD
481 warn_at (*loc, _("extra characters in character literal"));
482 }
483 if (yytext[0] == '\n')
484 unexpected_newline (token_start, "'");
41141c56 485 STRING_FREE;
a706a1cc 486 BEGIN INITIAL;
58d7a1a1 487 return CHAR;
e9955c83 488 }
47aee066 489 <<EOF>> {
47aee066
JD
490 STRING_FINISH;
491 loc->start = token_start;
07c0db18 492 val->character = last_string[0];
ac9b0e95 493 {
ac9b0e95 494 /* FIXME: Eventually, make these errors. */
07c0db18
JD
495 if (last_string[0] == '\0')
496 {
497 warn_at (*loc, _("empty character literal"));
498 /* '\0' seems dangerous even if we are about to complain. */
499 val->character = '\'';
500 }
501 else if (last_string[1] != '\0')
ac9b0e95 502 warn_at (*loc, _("extra characters in character literal"));
ac9b0e95
JD
503 }
504 unexpected_eof (token_start, "'");
47aee066
JD
505 STRING_FREE;
506 BEGIN INITIAL;
507 return CHAR;
508 }
4febdd96 509}
a706a1cc 510
4febdd96
PE
511<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING>
512{
92ac3705 513 \0 complain_at (*loc, _("invalid null character"));
e9955c83
AD
514}
515
516
517 /*----------------------------.
518 | Decode escaped characters. |
519 `----------------------------*/
520
521<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
522{
d8d3f94a 523 \\[0-7]{1,3} {
4517da37 524 unsigned long int c = strtoul (yytext + 1, NULL, 8);
39fb7e62
JD
525 if (!c || UCHAR_MAX < c)
526 complain_at (*loc, _("invalid number after \\-escape: %s"),
527 yytext+1);
e9955c83 528 else
223ff46e 529 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
530 }
531
6b0d38ab 532 \\x[0-9abcdefABCDEF]+ {
4517da37
PE
533 verify (UCHAR_MAX < ULONG_MAX);
534 unsigned long int c = strtoul (yytext + 2, NULL, 16);
39fb7e62
JD
535 if (!c || UCHAR_MAX < c)
536 complain_at (*loc, _("invalid number after \\-escape: %s"),
537 yytext+1);
d8d3f94a 538 else
223ff46e 539 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
540 }
541
223ff46e
PE
542 \\a obstack_1grow (&obstack_for_string, '\a');
543 \\b obstack_1grow (&obstack_for_string, '\b');
544 \\f obstack_1grow (&obstack_for_string, '\f');
545 \\n obstack_1grow (&obstack_for_string, '\n');
546 \\r obstack_1grow (&obstack_for_string, '\r');
547 \\t obstack_1grow (&obstack_for_string, '\t');
548 \\v obstack_1grow (&obstack_for_string, '\v');
412f8a59
PE
549
550 /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
223ff46e 551 \\("\""|"'"|"?"|"\\") obstack_1grow (&obstack_for_string, yytext[1]);
412f8a59 552
6b0d38ab 553 \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
d8d3f94a 554 int c = convert_ucn_to_byte (yytext);
39fb7e62
JD
555 if (c <= 0)
556 complain_at (*loc, _("invalid number after \\-escape: %s"),
557 yytext+1);
d8d3f94a 558 else
223ff46e 559 obstack_1grow (&obstack_for_string, c);
d8d3f94a 560 }
4f25ebb0 561 \\(.|\n) {
39fb7e62
JD
562 char const *p = yytext + 1;
563 char quoted_ws[] = "` '";
564 if (isspace (*p) && isprint (*p))
565 {
566 quoted_ws[1] = *p;
567 p = quoted_ws;
568 }
569 else
570 p = quotearg_style_mem (escape_quoting_style, p, 1);
571 complain_at (*loc, _("invalid character after \\-escape: %s"), p);
e9955c83
AD
572 }
573}
574
4febdd96
PE
575 /*--------------------------------------------.
576 | Scanning user-code characters and strings. |
577 `--------------------------------------------*/
e9955c83 578
4febdd96
PE
579<SC_CHARACTER,SC_STRING>
580{
e9071366 581 {splice}|\\{splice}[^\n\[\]] STRING_GROW;
4febdd96 582}
e9955c83
AD
583
584<SC_CHARACTER>
585{
4febdd96
PE
586 "'" STRING_GROW; BEGIN context_state;
587 \n unexpected_newline (token_start, "'"); BEGIN context_state;
588 <<EOF>> unexpected_eof (token_start, "'"); BEGIN context_state;
e9955c83
AD
589}
590
e9955c83
AD
591<SC_STRING>
592{
4febdd96
PE
593 "\"" STRING_GROW; BEGIN context_state;
594 \n unexpected_newline (token_start, "\""); BEGIN context_state;
595 <<EOF>> unexpected_eof (token_start, "\""); BEGIN context_state;
e9955c83
AD
596}
597
598
599 /*---------------------------------------------------.
600 | Strings, comments etc. can be found in user code. |
601 `---------------------------------------------------*/
602
603<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
604{
3f2d73f1
PE
605 "'" {
606 STRING_GROW;
607 context_state = YY_START;
608 token_start = loc->start;
609 BEGIN SC_CHARACTER;
610 }
611 "\"" {
612 STRING_GROW;
613 context_state = YY_START;
614 token_start = loc->start;
615 BEGIN SC_STRING;
616 }
617 "/"{splice}"*" {
618 STRING_GROW;
619 context_state = YY_START;
620 token_start = loc->start;
621 BEGIN SC_COMMENT;
622 }
623 "/"{splice}"/" {
624 STRING_GROW;
625 context_state = YY_START;
626 BEGIN SC_LINE_COMMENT;
627 }
e9955c83
AD
628}
629
630
624a35e2 631
58d7a1a1
AD
632 /*-----------------------------------------------------------.
633 | Scanning some code in braces (actions). The initial "{" is |
634 | already eaten. |
635 `-----------------------------------------------------------*/
e9955c83
AD
636
637<SC_BRACED_CODE>
638{
41141c56
PE
639 "{"|"<"{splice}"%" STRING_GROW; braces_level++;
640 "%"{splice}">" STRING_GROW; braces_level--;
e9955c83 641 "}" {
25522739
PE
642 obstack_1grow (&obstack_for_string, '}');
643
2346344a
AD
644 --braces_level;
645 if (braces_level < 0)
e9955c83 646 {
41141c56 647 STRING_FINISH;
3f2d73f1 648 loc->start = code_start;
eb095650 649 val->code = last_string;
a706a1cc 650 BEGIN INITIAL;
58d7a1a1 651 return BRACED_CODE;
e9955c83
AD
652 }
653 }
654
a706a1cc
PE
655 /* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly
656 (as `<' `<%'). */
41141c56 657 "<"{splice}"<" STRING_GROW;
a706a1cc 658
47aee066
JD
659 <<EOF>> {
660 unexpected_eof (code_start, "}");
661 STRING_FINISH;
662 loc->start = code_start;
eb095650 663 val->code = last_string;
47aee066
JD
664 BEGIN INITIAL;
665 return BRACED_CODE;
666 }
e9955c83
AD
667}
668
669
670 /*--------------------------------------------------------------.
671 | Scanning some prologue: from "%{" (already scanned) to "%}". |
672 `--------------------------------------------------------------*/
673
674<SC_PROLOGUE>
675{
676 "%}" {
41141c56 677 STRING_FINISH;
3f2d73f1 678 loc->start = code_start;
223ff46e 679 val->chars = last_string;
a706a1cc 680 BEGIN INITIAL;
e9955c83
AD
681 return PROLOGUE;
682 }
683
47aee066
JD
684 <<EOF>> {
685 unexpected_eof (code_start, "%}");
686 STRING_FINISH;
687 loc->start = code_start;
688 val->chars = last_string;
689 BEGIN INITIAL;
690 return PROLOGUE;
691 }
e9955c83
AD
692}
693
694
695 /*---------------------------------------------------------------.
696 | Scanning the epilogue (everything after the second "%%", which |
d8d3f94a 697 | has already been eaten). |
e9955c83
AD
698 `---------------------------------------------------------------*/
699
700<SC_EPILOGUE>
701{
e9955c83 702 <<EOF>> {
41141c56 703 STRING_FINISH;
3f2d73f1 704 loc->start = code_start;
223ff46e 705 val->chars = last_string;
a706a1cc 706 BEGIN INITIAL;
e9955c83
AD
707 return EPILOGUE;
708 }
709}
710
711
4febdd96
PE
712 /*-----------------------------------------------------.
713 | By default, grow the string obstack with the input. |
714 `-----------------------------------------------------*/
715
716<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>. |
717<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>\n STRING_GROW;
718
e9955c83
AD
719%%
720
6c30d641
PE
721/* Read bytes from FP into buffer BUF of size SIZE. Return the
722 number of bytes read. Remove '\r' from input, treating \r\n
723 and isolated \r as \n. */
724
725static size_t
726no_cr_read (FILE *fp, char *buf, size_t size)
727{
a737b216
PE
728 size_t bytes_read = fread (buf, 1, size, fp);
729 if (bytes_read)
6c30d641 730 {
a737b216 731 char *w = memchr (buf, '\r', bytes_read);
6c30d641
PE
732 if (w)
733 {
734 char const *r = ++w;
a737b216 735 char const *lim = buf + bytes_read;
6c30d641
PE
736
737 for (;;)
738 {
739 /* Found an '\r'. Treat it like '\n', but ignore any
740 '\n' that immediately follows. */
741 w[-1] = '\n';
742 if (r == lim)
743 {
744 int ch = getc (fp);
745 if (ch != '\n' && ungetc (ch, fp) != ch)
746 break;
747 }
748 else if (*r == '\n')
749 r++;
750
751 /* Copy until the next '\r'. */
752 do
753 {
754 if (r == lim)
755 return w - buf;
756 }
757 while ((*w++ = *r++) != '\r');
758 }
759
760 return w - buf;
761 }
762 }
763
a737b216 764 return bytes_read;
6c30d641
PE
765}
766
767
f25bfb75 768
1452af69
PE
769/*------------------------------------------------------.
770| Scan NUMBER for a base-BASE integer at location LOC. |
771`------------------------------------------------------*/
772
773static unsigned long int
774scan_integer (char const *number, int base, location loc)
775{
4517da37
PE
776 verify (INT_MAX < ULONG_MAX);
777 unsigned long int num = strtoul (number, NULL, base);
778
779 if (INT_MAX < num)
1452af69
PE
780 {
781 complain_at (loc, _("integer out of range: %s"), quote (number));
782 num = INT_MAX;
783 }
4517da37 784
1452af69
PE
785 return num;
786}
787
788
d8d3f94a
PE
789/*------------------------------------------------------------------.
790| Convert universal character name UCN to a single-byte character, |
791| and return that character. Return -1 if UCN does not correspond |
792| to a single-byte character. |
793`------------------------------------------------------------------*/
794
795static int
796convert_ucn_to_byte (char const *ucn)
797{
4517da37
PE
798 verify (UCHAR_MAX <= INT_MAX);
799 unsigned long int code = strtoul (ucn + 2, NULL, 16);
d8d3f94a
PE
800
801 /* FIXME: Currently we assume Unicode-compatible unibyte characters
802 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
803 non-ASCII hosts we support only the portable C character set.
804 These limitations should be removed once we add support for
805 multibyte characters. */
806
807 if (UCHAR_MAX < code)
808 return -1;
809
810#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
811 {
812 /* A non-ASCII host. Use CODE to index into a table of the C
813 basic execution character set, which is guaranteed to exist on
814 all Standard C platforms. This table also includes '$', '@',
8e6ef483 815 and '`', which are not in the basic execution character set but
d8d3f94a
PE
816 which are unibyte characters on all the platforms that we know
817 about. */
818 static signed char const table[] =
819 {
820 '\0', -1, -1, -1, -1, -1, -1, '\a',
821 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
822 -1, -1, -1, -1, -1, -1, -1, -1,
823 -1, -1, -1, -1, -1, -1, -1, -1,
824 ' ', '!', '"', '#', '$', '%', '&', '\'',
825 '(', ')', '*', '+', ',', '-', '.', '/',
826 '0', '1', '2', '3', '4', '5', '6', '7',
827 '8', '9', ':', ';', '<', '=', '>', '?',
828 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
829 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
830 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
831 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
832 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
833 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
834 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
835 'x', 'y', 'z', '{', '|', '}', '~'
836 };
837
838 code = code < sizeof table ? table[code] : -1;
839 }
840#endif
c4d720cd 841
d8d3f94a
PE
842 return code;
843}
844
845
900c5db5
AD
846/*----------------------------------------------------------------.
847| Handle `#line INT "FILE"'. ARGS has already skipped `#line '. |
848`----------------------------------------------------------------*/
849
850static void
4517da37 851handle_syncline (char *args, location loc)
900c5db5 852{
4517da37
PE
853 char *after_num;
854 unsigned long int lineno = strtoul (args, &after_num, 10);
855 char *file = strchr (after_num, '"') + 1;
856 *strchr (file, '"') = '\0';
857 if (INT_MAX <= lineno)
858 {
859 warn_at (loc, _("line number overflow"));
860 lineno = INT_MAX;
861 }
e9071366 862 current_file = uniqstr_new (file);
0c8e079f 863 boundary_set (&scanner_cursor, current_file, lineno, 1);
4517da37
PE
864}
865
866
4febdd96
PE
867/*----------------------------------------------------------------.
868| For a token or comment starting at START, report message MSGID, |
869| which should say that an end marker was found before |
870| the expected TOKEN_END. |
871`----------------------------------------------------------------*/
872
873static void
874unexpected_end (boundary start, char const *msgid, char const *token_end)
875{
876 location loc;
877 loc.start = start;
878 loc.end = scanner_cursor;
879 complain_at (loc, _(msgid), token_end);
880}
881
882
3f2d73f1
PE
883/*------------------------------------------------------------------------.
884| Report an unexpected EOF in a token or comment starting at START. |
885| An end of file was encountered and the expected TOKEN_END was missing. |
3f2d73f1 886`------------------------------------------------------------------------*/
a706a1cc
PE
887
888static void
aa418041 889unexpected_eof (boundary start, char const *token_end)
a706a1cc 890{
4febdd96
PE
891 unexpected_end (start, N_("missing `%s' at end of file"), token_end);
892}
893
894
895/*----------------------------------------.
896| Likewise, but for unexpected newlines. |
897`----------------------------------------*/
898
899static void
900unexpected_newline (boundary start, char const *token_end)
901{
902 unexpected_end (start, N_("missing `%s' at end of line"), token_end);
a706a1cc
PE
903}
904
905
f25bfb75
AD
906/*-------------------------.
907| Initialize the scanner. |
908`-------------------------*/
909
1d6412ad 910void
e9071366 911gram_scanner_initialize (void)
1d6412ad 912{
223ff46e 913 obstack_init (&obstack_for_string);
1d6412ad
AD
914}
915
916
f25bfb75
AD
917/*-----------------------------------------------.
918| Free all the memory allocated to the scanner. |
919`-----------------------------------------------*/
920
4cdb01db 921void
e9071366 922gram_scanner_free (void)
4cdb01db 923{
223ff46e 924 obstack_free (&obstack_for_string, 0);
536545f3 925 /* Reclaim Flex's buffers. */
580b8926 926 yylex_destroy ();
4cdb01db 927}