]> git.saurik.com Git - bison.git/blame - src/scan-gram.l
tests: fix regressions.
[bison.git] / src / scan-gram.l
CommitLineData
e9955c83 1/* Bison Grammar Scanner -*- C -*-
3b1e470c 2
c932d613 3 Copyright (C) 2002-2012 Free Software Foundation, Inc.
e9955c83
AD
4
5 This file is part of Bison, the GNU Compiler Compiler.
6
f16b0819 7 This program is free software: you can redistribute it and/or modify
e9955c83 8 it under the terms of the GNU General Public License as published by
f16b0819 9 the Free Software Foundation, either version 3 of the License, or
e9955c83
AD
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
f16b0819 18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
e9955c83 19
4521fcdf 20%option debug nodefault noinput nounput noyywrap never-interactive
e9955c83
AD
21%option prefix="gram_" outfile="lex.yy.c"
22
23%{
4f6e011e
PE
24/* Work around a bug in flex 2.5.31. See Debian bug 333231
25 <http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=333231>. */
26#undef gram_wrap
27#define gram_wrap() 1
28
e9071366
AD
29#define FLEX_PREFIX(Id) gram_ ## Id
30#include "flex-scanner.h"
223ff46e 31
e9955c83 32#include "complain.h"
3f2d73f1 33#include "files.h"
e9955c83 34#include "gram.h"
ca407bdf 35#include "quotearg.h"
e9955c83 36#include "reader.h"
223ff46e 37#include "uniqstr.h"
e9955c83 38
39fb7e62 39#include <ctype.h>
e9071366
AD
40#include <mbswidth.h>
41#include <quote.h>
4c787a31 42#include <streq.h>
e9071366
AD
43
44#include "scan-gram.h"
45
46#define YY_DECL GRAM_LEX_DECL
2346344a 47
3f2d73f1 48#define YY_USER_INIT \
e9071366 49 code_start = scanner_cursor = loc->start; \
dc9701e8 50
3f2d73f1 51/* Location of scanner cursor. */
4a678af8 52static boundary scanner_cursor;
41141c56 53
e9071366 54#define YY_USER_ACTION location_compute (loc, &scanner_cursor, yytext, yyleng);
d8d3f94a 55
6c30d641 56static size_t no_cr_read (FILE *, char *, size_t);
d8d3f94a
PE
57#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
58
7685e2f7
AR
59#define ROLLBACK_CURRENT_TOKEN \
60 do { \
61 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0); \
62 yyless (0); \
63 } while (0)
64
7ec2d4cd 65/* A string representing the most recently saved token. */
7c0c6181 66static char *last_string;
7ec2d4cd 67
d5e8574b 68/* Bracketed identifier. */
7685e2f7
AR
69static uniqstr bracketed_id_str = 0;
70static location bracketed_id_loc;
71static boundary bracketed_id_start;
72static int bracketed_id_context_state = 0;
73
7ec2d4cd 74void
e9071366 75gram_scanner_last_string_free (void)
7ec2d4cd 76{
41141c56 77 STRING_FREE;
7ec2d4cd 78}
e9955c83 79
4517da37 80static void handle_syncline (char *, location);
1452af69 81static unsigned long int scan_integer (char const *p, int base, location loc);
d8d3f94a 82static int convert_ucn_to_byte (char const *hex_text);
aa418041 83static void unexpected_eof (boundary, char const *);
4febdd96 84static void unexpected_newline (boundary, char const *);
e9955c83
AD
85
86%}
e9071366
AD
87 /* A C-like comment in directives/rules. */
88%x SC_YACC_COMMENT
89 /* Strings and characters in directives/rules. */
e9955c83 90%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
e9071366 91 /* A identifier was just read in directives/rules. Special state
9874f80b 92 to capture the sequence 'identifier :'. */
e9071366 93%x SC_AFTER_IDENTIFIER
e9071366
AD
94
95 /* Three types of user code:
9874f80b 96 - prologue (code between '%{' '%}' in the first section, before %%);
e9071366
AD
97 - actions, printers, union, etc, (between braced in the middle section);
98 - epilogue (everything after the second %%). */
99%x SC_PROLOGUE SC_BRACED_CODE SC_EPILOGUE
100 /* C and C++ comments in code. */
101%x SC_COMMENT SC_LINE_COMMENT
102 /* Strings and characters in code. */
103%x SC_STRING SC_CHARACTER
d5e8574b 104 /* Bracketed identifiers support. */
7685e2f7 105%x SC_BRACKETED_ID SC_RETURN_BRACKETED_ID
e9955c83 106
d236ad94 107letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
eb8c66bb 108id {letter}({letter}|[-0-9])*
663ce7bb 109directive %{id}
624a35e2 110int [0-9]+
d8d3f94a
PE
111
112/* POSIX says that a tag must be both an id and a C union member, but
113 historically almost any character is allowed in a tag. We disallow
114 NUL and newline, as this simplifies our implementation. */
115tag [^\0\n>]+
116
117/* Zero or more instances of backslash-newline. Following GCC, allow
118 white space between the backslash and the newline. */
119splice (\\[ \f\t\v]*\n)*
e9955c83
AD
120
121%%
122%{
a706a1cc 123 /* Nesting level of the current code in braces. */
77bb73e7 124 int braces_level PACIFY_CC (= 0);
1a9e39f1 125
3f2d73f1 126 /* Parent context state, when applicable. */
77bb73e7 127 int context_state PACIFY_CC (= 0);
a706a1cc 128
3f2d73f1 129 /* Location of most recent identifier, when applicable. */
77bb73e7 130 location id_loc PACIFY_CC (= empty_location);
3f2d73f1 131
a2bc9dbc
PE
132 /* Where containing code started, when applicable. Its initial
133 value is relevant only when yylex is invoked in the SC_EPILOGUE
134 start condition. */
135 boundary code_start = scanner_cursor;
3f2d73f1 136
223ff46e
PE
137 /* Where containing comment or string or character literal started,
138 when applicable. */
77bb73e7 139 boundary token_start PACIFY_CC (= scanner_cursor);
e9955c83
AD
140%}
141
142
3f2d73f1
PE
143 /*-----------------------.
144 | Scanning white space. |
145 `-----------------------*/
146
7685e2f7 147<INITIAL,SC_AFTER_IDENTIFIER,SC_BRACKETED_ID,SC_RETURN_BRACKETED_ID>
3f2d73f1 148{
4febdd96 149 /* Comments and white space. */
9874f80b 150 "," warn_at (*loc, _("stray ',' treated as white space"));
4febdd96 151 [ \f\n\t\v] |
3f2d73f1 152 "//".* ;
83adb046
PE
153 "/*" {
154 token_start = loc->start;
155 context_state = YY_START;
156 BEGIN SC_YACC_COMMENT;
157 }
3f2d73f1
PE
158
159 /* #line directives are not documented, and may be withdrawn or
160 modified in future versions of Bison. */
161 ^"#line "{int}" \"".*"\"\n" {
4517da37 162 handle_syncline (yytext + sizeof "#line " - 1, *loc);
3f2d73f1
PE
163 }
164}
165
166
e9955c83
AD
167 /*----------------------------.
168 | Scanning Bison directives. |
169 `----------------------------*/
72183df4
DJ
170
171 /* For directives that are also command line options, the regex must be
172 "%..."
173 after "[-_]"s are removed, and the directive must match the --long
174 option name, with a single string argument. Otherwise, add exceptions
175 to ../build-aux/cross-options.pl. */
176
e9955c83
AD
177<INITIAL>
178{
43e6aea5 179 "%binary" return PERCENT_NONASSOC;
136a0f76 180 "%code" return PERCENT_CODE;
43e6aea5
AD
181 "%debug" return PERCENT_DEBUG;
182 "%default"[-_]"prec" return PERCENT_DEFAULT_PREC;
183 "%define" return PERCENT_DEFINE;
184 "%defines" return PERCENT_DEFINES;
185 "%destructor" return PERCENT_DESTRUCTOR;
186 "%dprec" return PERCENT_DPREC;
187 "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
188 "%expect" return PERCENT_EXPECT;
189 "%expect"[-_]"rr" return PERCENT_EXPECT_RR;
190 "%file-prefix" return PERCENT_FILE_PREFIX;
e9955c83 191 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
43e6aea5
AD
192 "%initial-action" return PERCENT_INITIAL_ACTION;
193 "%glr-parser" return PERCENT_GLR_PARSER;
194 "%language" return PERCENT_LANGUAGE;
195 "%left" return PERCENT_LEFT;
196 "%lex-param" return PERCENT_LEX_PARAM;
197 "%locations" return PERCENT_LOCATIONS;
198 "%merge" return PERCENT_MERGE;
199 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
200 "%no"[-_]"default"[-_]"prec" return PERCENT_NO_DEFAULT_PREC;
201 "%no"[-_]"lines" return PERCENT_NO_LINES;
202 "%nonassoc" return PERCENT_NONASSOC;
203 "%nondeterministic-parser" return PERCENT_NONDETERMINISTIC_PARSER;
204 "%nterm" return PERCENT_NTERM;
205 "%output" return PERCENT_OUTPUT;
206 "%parse-param" return PERCENT_PARSE_PARAM;
207 "%prec" return PERCENT_PREC;
208 "%printer" return PERCENT_PRINTER;
209 "%pure"[-_]"parser" return PERCENT_PURE_PARSER;
210 "%require" return PERCENT_REQUIRE;
211 "%right" return PERCENT_RIGHT;
212 "%skeleton" return PERCENT_SKELETON;
213 "%start" return PERCENT_START;
214 "%term" return PERCENT_TOKEN;
215 "%token" return PERCENT_TOKEN;
216 "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
217 "%type" return PERCENT_TYPE;
218 "%union" return PERCENT_UNION;
219 "%verbose" return PERCENT_VERBOSE;
220 "%yacc" return PERCENT_YACC;
e9955c83 221
3f2d73f1 222 {directive} {
41141c56 223 complain_at (*loc, _("invalid directive: %s"), quote (yytext));
412f8a59 224 }
900c5db5 225
e9955c83 226 "=" return EQUAL;
e9071366 227 "|" return PIPE;
e9955c83 228 ";" return SEMICOLON;
12e35840 229 "<*>" return TYPE_TAG_ANY;
3ebecc24 230 "<>" return TYPE_TAG_NONE;
e9955c83 231
3f2d73f1 232 {id} {
58d7a1a1 233 val->uniqstr = uniqstr_new (yytext);
3f2d73f1 234 id_loc = *loc;
7685e2f7 235 bracketed_id_str = NULL;
3f2d73f1 236 BEGIN SC_AFTER_IDENTIFIER;
e9955c83
AD
237 }
238
d8d3f94a 239 {int} {
1452af69
PE
240 val->integer = scan_integer (yytext, 10, *loc);
241 return INT;
242 }
243 0[xX][0-9abcdefABCDEF]+ {
244 val->integer = scan_integer (yytext, 16, *loc);
d8d3f94a
PE
245 return INT;
246 }
e9955c83 247
601bdfab
AD
248 /* Identifiers may not start with a digit. Yet, don't silently
249 accept "1FOO" as "1 FOO". */
250 {int}{id} {
251 complain_at (*loc, _("invalid identifier: %s"), quote (yytext));
252 }
253
ac9b0e95 254 /* Characters. */
07c0db18 255 "'" token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
e9955c83
AD
256
257 /* Strings. */
ca407bdf 258 "\"" token_start = loc->start; BEGIN SC_ESCAPED_STRING;
e9955c83
AD
259
260 /* Prologue. */
3f2d73f1 261 "%{" code_start = loc->start; BEGIN SC_PROLOGUE;
e9955c83
AD
262
263 /* Code in between braces. */
3f2d73f1
PE
264 "{" {
265 STRING_GROW;
266 braces_level = 0;
267 code_start = loc->start;
268 BEGIN SC_BRACED_CODE;
269 }
e9955c83
AD
270
271 /* A type. */
d8d3f94a 272 "<"{tag}">" {
223ff46e 273 obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
41141c56 274 STRING_FINISH;
223ff46e 275 val->uniqstr = uniqstr_new (last_string);
41141c56 276 STRING_FREE;
4cdb01db
AD
277 return TYPE;
278 }
279
a706a1cc
PE
280 "%%" {
281 static int percent_percent_count;
e9955c83 282 if (++percent_percent_count == 2)
a2bc9dbc 283 BEGIN SC_EPILOGUE;
e9955c83
AD
284 return PERCENT_PERCENT;
285 }
286
7685e2f7
AR
287 "[" {
288 bracketed_id_str = NULL;
289 bracketed_id_start = loc->start;
290 bracketed_id_context_state = YY_START;
291 BEGIN SC_BRACKETED_ID;
292 }
293
a706a1cc 294 . {
41141c56 295 complain_at (*loc, _("invalid character: %s"), quote (yytext));
3f2d73f1 296 }
379f0ac8
PE
297
298 <<EOF>> {
299 loc->start = loc->end = scanner_cursor;
300 yyterminate ();
301 }
3f2d73f1
PE
302}
303
304
305 /*-----------------------------------------------------------------.
306 | Scanning after an identifier, checking whether a colon is next. |
307 `-----------------------------------------------------------------*/
308
309<SC_AFTER_IDENTIFIER>
310{
7685e2f7 311 "[" {
d5e8574b 312 if (bracketed_id_str)
7685e2f7
AR
313 {
314 ROLLBACK_CURRENT_TOKEN;
315 BEGIN SC_RETURN_BRACKETED_ID;
316 *loc = id_loc;
317 return ID;
318 }
d5e8574b
AR
319 else
320 {
321 bracketed_id_start = loc->start;
322 bracketed_id_context_state = YY_START;
323 BEGIN SC_BRACKETED_ID;
324 }
7685e2f7 325 }
3f2d73f1 326 ":" {
7685e2f7 327 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
3f2d73f1 328 *loc = id_loc;
3f2d73f1
PE
329 return ID_COLON;
330 }
331 . {
7685e2f7
AR
332 ROLLBACK_CURRENT_TOKEN;
333 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
3f2d73f1 334 *loc = id_loc;
3f2d73f1
PE
335 return ID;
336 }
337 <<EOF>> {
7685e2f7 338 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
3f2d73f1 339 *loc = id_loc;
3f2d73f1 340 return ID;
e9955c83
AD
341 }
342}
343
7685e2f7
AR
344 /*--------------------------------.
345 | Scanning bracketed identifiers. |
346 `--------------------------------*/
347
348<SC_BRACKETED_ID>
349{
350 {id} {
d5e8574b 351 if (bracketed_id_str)
7685e2f7 352 {
d5e8574b
AR
353 complain_at (*loc, _("unexpected identifier in bracketed name: %s"),
354 quote (yytext));
7685e2f7
AR
355 }
356 else
357 {
d5e8574b
AR
358 bracketed_id_str = uniqstr_new (yytext);
359 bracketed_id_loc = *loc;
7685e2f7
AR
360 }
361 }
362 "]" {
363 BEGIN bracketed_id_context_state;
364 if (bracketed_id_str)
365 {
366 if (INITIAL == bracketed_id_context_state)
367 {
368 val->uniqstr = bracketed_id_str;
369 bracketed_id_str = 0;
370 *loc = bracketed_id_loc;
371 return BRACKETED_ID;
372 }
373 }
374 else
d5e8574b 375 complain_at (*loc, _("an identifier expected"));
7685e2f7
AR
376 }
377 . {
378 complain_at (*loc, _("invalid character in bracketed name: %s"),
379 quote (yytext));
380 }
381 <<EOF>> {
382 BEGIN bracketed_id_context_state;
383 unexpected_eof (bracketed_id_start, "]");
384 }
385}
386
387<SC_RETURN_BRACKETED_ID>
388{
389 . {
390 ROLLBACK_CURRENT_TOKEN;
391 val->uniqstr = bracketed_id_str;
392 bracketed_id_str = 0;
393 *loc = bracketed_id_loc;
394 BEGIN INITIAL;
395 return BRACKETED_ID;
396 }
397}
398
e9955c83 399
d8d3f94a 400 /*---------------------------------------------------------------.
9874f80b 401 | Scanning a Yacc comment. The initial '/ *' is already eaten. |
d8d3f94a 402 `---------------------------------------------------------------*/
e9955c83 403
d8d3f94a 404<SC_YACC_COMMENT>
e9955c83 405{
3f2d73f1 406 "*/" BEGIN context_state;
a706a1cc 407 .|\n ;
aa418041 408 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
d8d3f94a
PE
409}
410
411
412 /*------------------------------------------------------------.
9874f80b 413 | Scanning a C comment. The initial '/ *' is already eaten. |
d8d3f94a
PE
414 `------------------------------------------------------------*/
415
416<SC_COMMENT>
417{
3f2d73f1 418 "*"{splice}"/" STRING_GROW; BEGIN context_state;
aa418041 419 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
e9955c83
AD
420}
421
422
d8d3f94a 423 /*--------------------------------------------------------------.
9874f80b 424 | Scanning a line comment. The initial '//' is already eaten. |
d8d3f94a
PE
425 `--------------------------------------------------------------*/
426
427<SC_LINE_COMMENT>
428{
3f2d73f1 429 "\n" STRING_GROW; BEGIN context_state;
41141c56 430 {splice} STRING_GROW;
3f2d73f1 431 <<EOF>> BEGIN context_state;
d8d3f94a
PE
432}
433
434
4febdd96
PE
435 /*------------------------------------------------.
436 | Scanning a Bison string, including its escapes. |
437 | The initial quote is already eaten. |
438 `------------------------------------------------*/
e9955c83
AD
439
440<SC_ESCAPED_STRING>
441{
47aee066
JD
442 "\""|"\n" {
443 if (yytext[0] == '\n')
444 unexpected_newline (token_start, "\"");
445 STRING_FINISH;
446 loc->start = token_start;
447 val->chars = last_string;
448 BEGIN INITIAL;
449 return STRING;
450 }
451 <<EOF>> {
452 unexpected_eof (token_start, "\"");
41141c56 453 STRING_FINISH;
3f2d73f1 454 loc->start = token_start;
223ff46e 455 val->chars = last_string;
a706a1cc 456 BEGIN INITIAL;
e9955c83
AD
457 return STRING;
458 }
e9955c83
AD
459}
460
4febdd96
PE
461 /*----------------------------------------------------------.
462 | Scanning a Bison character literal, decoding its escapes. |
463 | The initial quote is already eaten. |
464 `----------------------------------------------------------*/
e9955c83
AD
465
466<SC_ESCAPED_CHARACTER>
467{
47aee066 468 "'"|"\n" {
41141c56 469 STRING_FINISH;
3f2d73f1 470 loc->start = token_start;
07c0db18 471 val->character = last_string[0];
ac9b0e95
JD
472 {
473 /* FIXME: Eventually, make these errors. */
07c0db18
JD
474 if (last_string[0] == '\0')
475 {
476 warn_at (*loc, _("empty character literal"));
477 /* '\0' seems dangerous even if we are about to complain. */
478 val->character = '\'';
479 }
480 else if (last_string[1] != '\0')
ac9b0e95
JD
481 warn_at (*loc, _("extra characters in character literal"));
482 }
483 if (yytext[0] == '\n')
484 unexpected_newline (token_start, "'");
41141c56 485 STRING_FREE;
a706a1cc 486 BEGIN INITIAL;
58d7a1a1 487 return CHAR;
e9955c83 488 }
47aee066 489 <<EOF>> {
47aee066
JD
490 STRING_FINISH;
491 loc->start = token_start;
07c0db18 492 val->character = last_string[0];
ac9b0e95 493 {
ac9b0e95 494 /* FIXME: Eventually, make these errors. */
07c0db18
JD
495 if (last_string[0] == '\0')
496 {
497 warn_at (*loc, _("empty character literal"));
498 /* '\0' seems dangerous even if we are about to complain. */
499 val->character = '\'';
500 }
501 else if (last_string[1] != '\0')
ac9b0e95 502 warn_at (*loc, _("extra characters in character literal"));
ac9b0e95
JD
503 }
504 unexpected_eof (token_start, "'");
47aee066
JD
505 STRING_FREE;
506 BEGIN INITIAL;
507 return CHAR;
508 }
4febdd96 509}
a706a1cc 510
4febdd96
PE
511<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING>
512{
92ac3705 513 \0 complain_at (*loc, _("invalid null character"));
e9955c83
AD
514}
515
516
517 /*----------------------------.
518 | Decode escaped characters. |
519 `----------------------------*/
520
521<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
522{
d8d3f94a 523 \\[0-7]{1,3} {
4517da37 524 unsigned long int c = strtoul (yytext + 1, NULL, 8);
39fb7e62
JD
525 if (!c || UCHAR_MAX < c)
526 complain_at (*loc, _("invalid number after \\-escape: %s"),
527 yytext+1);
e9955c83 528 else
223ff46e 529 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
530 }
531
6b0d38ab 532 \\x[0-9abcdefABCDEF]+ {
4517da37
PE
533 verify (UCHAR_MAX < ULONG_MAX);
534 unsigned long int c = strtoul (yytext + 2, NULL, 16);
39fb7e62
JD
535 if (!c || UCHAR_MAX < c)
536 complain_at (*loc, _("invalid number after \\-escape: %s"),
537 yytext+1);
d8d3f94a 538 else
223ff46e 539 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
540 }
541
223ff46e
PE
542 \\a obstack_1grow (&obstack_for_string, '\a');
543 \\b obstack_1grow (&obstack_for_string, '\b');
544 \\f obstack_1grow (&obstack_for_string, '\f');
545 \\n obstack_1grow (&obstack_for_string, '\n');
546 \\r obstack_1grow (&obstack_for_string, '\r');
547 \\t obstack_1grow (&obstack_for_string, '\t');
548 \\v obstack_1grow (&obstack_for_string, '\v');
412f8a59
PE
549
550 /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
223ff46e 551 \\("\""|"'"|"?"|"\\") obstack_1grow (&obstack_for_string, yytext[1]);
412f8a59 552
6b0d38ab 553 \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
d8d3f94a 554 int c = convert_ucn_to_byte (yytext);
39fb7e62
JD
555 if (c <= 0)
556 complain_at (*loc, _("invalid number after \\-escape: %s"),
557 yytext+1);
d8d3f94a 558 else
223ff46e 559 obstack_1grow (&obstack_for_string, c);
d8d3f94a 560 }
4f25ebb0 561 \\(.|\n) {
39fb7e62 562 char const *p = yytext + 1;
890ab17c 563 /* Quote only if escaping won't make the character visible. */
4bb975e1 564 if (isspace ((unsigned char) *p) && isprint ((unsigned char) *p))
890ab17c 565 p = quote (p);
39fb7e62
JD
566 else
567 p = quotearg_style_mem (escape_quoting_style, p, 1);
568 complain_at (*loc, _("invalid character after \\-escape: %s"), p);
e9955c83
AD
569 }
570}
571
4febdd96
PE
572 /*--------------------------------------------.
573 | Scanning user-code characters and strings. |
574 `--------------------------------------------*/
e9955c83 575
4febdd96
PE
576<SC_CHARACTER,SC_STRING>
577{
e9071366 578 {splice}|\\{splice}[^\n\[\]] STRING_GROW;
4febdd96 579}
e9955c83
AD
580
581<SC_CHARACTER>
582{
4febdd96
PE
583 "'" STRING_GROW; BEGIN context_state;
584 \n unexpected_newline (token_start, "'"); BEGIN context_state;
585 <<EOF>> unexpected_eof (token_start, "'"); BEGIN context_state;
e9955c83
AD
586}
587
e9955c83
AD
588<SC_STRING>
589{
4febdd96
PE
590 "\"" STRING_GROW; BEGIN context_state;
591 \n unexpected_newline (token_start, "\""); BEGIN context_state;
592 <<EOF>> unexpected_eof (token_start, "\""); BEGIN context_state;
e9955c83
AD
593}
594
595
596 /*---------------------------------------------------.
597 | Strings, comments etc. can be found in user code. |
598 `---------------------------------------------------*/
599
600<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
601{
3f2d73f1
PE
602 "'" {
603 STRING_GROW;
604 context_state = YY_START;
605 token_start = loc->start;
606 BEGIN SC_CHARACTER;
607 }
608 "\"" {
609 STRING_GROW;
610 context_state = YY_START;
611 token_start = loc->start;
612 BEGIN SC_STRING;
613 }
614 "/"{splice}"*" {
615 STRING_GROW;
616 context_state = YY_START;
617 token_start = loc->start;
618 BEGIN SC_COMMENT;
619 }
620 "/"{splice}"/" {
621 STRING_GROW;
622 context_state = YY_START;
623 BEGIN SC_LINE_COMMENT;
624 }
e9955c83
AD
625}
626
627
624a35e2 628
58d7a1a1
AD
629 /*-----------------------------------------------------------.
630 | Scanning some code in braces (actions). The initial "{" is |
631 | already eaten. |
632 `-----------------------------------------------------------*/
e9955c83
AD
633
634<SC_BRACED_CODE>
635{
41141c56
PE
636 "{"|"<"{splice}"%" STRING_GROW; braces_level++;
637 "%"{splice}">" STRING_GROW; braces_level--;
e9955c83 638 "}" {
25522739
PE
639 obstack_1grow (&obstack_for_string, '}');
640
2346344a
AD
641 --braces_level;
642 if (braces_level < 0)
e9955c83 643 {
41141c56 644 STRING_FINISH;
3f2d73f1 645 loc->start = code_start;
eb095650 646 val->code = last_string;
a706a1cc 647 BEGIN INITIAL;
58d7a1a1 648 return BRACED_CODE;
e9955c83
AD
649 }
650 }
651
9874f80b
JM
652 /* Tokenize '<<%' correctly (as '<<' '%') rather than incorrrectly
653 (as '<' '<%'). */
41141c56 654 "<"{splice}"<" STRING_GROW;
a706a1cc 655
47aee066
JD
656 <<EOF>> {
657 unexpected_eof (code_start, "}");
658 STRING_FINISH;
659 loc->start = code_start;
eb095650 660 val->code = last_string;
47aee066
JD
661 BEGIN INITIAL;
662 return BRACED_CODE;
663 }
e9955c83
AD
664}
665
666
667 /*--------------------------------------------------------------.
668 | Scanning some prologue: from "%{" (already scanned) to "%}". |
669 `--------------------------------------------------------------*/
670
671<SC_PROLOGUE>
672{
673 "%}" {
41141c56 674 STRING_FINISH;
3f2d73f1 675 loc->start = code_start;
223ff46e 676 val->chars = last_string;
a706a1cc 677 BEGIN INITIAL;
e9955c83
AD
678 return PROLOGUE;
679 }
680
47aee066
JD
681 <<EOF>> {
682 unexpected_eof (code_start, "%}");
683 STRING_FINISH;
684 loc->start = code_start;
685 val->chars = last_string;
686 BEGIN INITIAL;
687 return PROLOGUE;
688 }
e9955c83
AD
689}
690
691
692 /*---------------------------------------------------------------.
693 | Scanning the epilogue (everything after the second "%%", which |
d8d3f94a 694 | has already been eaten). |
e9955c83
AD
695 `---------------------------------------------------------------*/
696
697<SC_EPILOGUE>
698{
e9955c83 699 <<EOF>> {
41141c56 700 STRING_FINISH;
3f2d73f1 701 loc->start = code_start;
223ff46e 702 val->chars = last_string;
a706a1cc 703 BEGIN INITIAL;
e9955c83
AD
704 return EPILOGUE;
705 }
706}
707
708
4febdd96
PE
709 /*-----------------------------------------------------.
710 | By default, grow the string obstack with the input. |
711 `-----------------------------------------------------*/
712
713<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>. |
714<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>\n STRING_GROW;
715
e9955c83
AD
716%%
717
6c30d641
PE
718/* Read bytes from FP into buffer BUF of size SIZE. Return the
719 number of bytes read. Remove '\r' from input, treating \r\n
720 and isolated \r as \n. */
721
722static size_t
723no_cr_read (FILE *fp, char *buf, size_t size)
724{
a737b216
PE
725 size_t bytes_read = fread (buf, 1, size, fp);
726 if (bytes_read)
6c30d641 727 {
a737b216 728 char *w = memchr (buf, '\r', bytes_read);
6c30d641
PE
729 if (w)
730 {
731 char const *r = ++w;
a737b216 732 char const *lim = buf + bytes_read;
6c30d641
PE
733
734 for (;;)
735 {
736 /* Found an '\r'. Treat it like '\n', but ignore any
737 '\n' that immediately follows. */
738 w[-1] = '\n';
739 if (r == lim)
740 {
741 int ch = getc (fp);
742 if (ch != '\n' && ungetc (ch, fp) != ch)
743 break;
744 }
745 else if (*r == '\n')
746 r++;
747
748 /* Copy until the next '\r'. */
749 do
750 {
751 if (r == lim)
752 return w - buf;
753 }
754 while ((*w++ = *r++) != '\r');
755 }
756
757 return w - buf;
758 }
759 }
760
a737b216 761 return bytes_read;
6c30d641
PE
762}
763
764
f25bfb75 765
1452af69
PE
766/*------------------------------------------------------.
767| Scan NUMBER for a base-BASE integer at location LOC. |
768`------------------------------------------------------*/
769
770static unsigned long int
771scan_integer (char const *number, int base, location loc)
772{
4517da37
PE
773 verify (INT_MAX < ULONG_MAX);
774 unsigned long int num = strtoul (number, NULL, base);
775
776 if (INT_MAX < num)
1452af69
PE
777 {
778 complain_at (loc, _("integer out of range: %s"), quote (number));
779 num = INT_MAX;
780 }
4517da37 781
1452af69
PE
782 return num;
783}
784
785
d8d3f94a
PE
786/*------------------------------------------------------------------.
787| Convert universal character name UCN to a single-byte character, |
788| and return that character. Return -1 if UCN does not correspond |
789| to a single-byte character. |
790`------------------------------------------------------------------*/
791
792static int
793convert_ucn_to_byte (char const *ucn)
794{
4517da37
PE
795 verify (UCHAR_MAX <= INT_MAX);
796 unsigned long int code = strtoul (ucn + 2, NULL, 16);
d8d3f94a
PE
797
798 /* FIXME: Currently we assume Unicode-compatible unibyte characters
799 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
800 non-ASCII hosts we support only the portable C character set.
801 These limitations should be removed once we add support for
802 multibyte characters. */
803
804 if (UCHAR_MAX < code)
805 return -1;
806
807#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
808 {
809 /* A non-ASCII host. Use CODE to index into a table of the C
810 basic execution character set, which is guaranteed to exist on
811 all Standard C platforms. This table also includes '$', '@',
8e6ef483 812 and '`', which are not in the basic execution character set but
d8d3f94a
PE
813 which are unibyte characters on all the platforms that we know
814 about. */
815 static signed char const table[] =
816 {
817 '\0', -1, -1, -1, -1, -1, -1, '\a',
818 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
819 -1, -1, -1, -1, -1, -1, -1, -1,
820 -1, -1, -1, -1, -1, -1, -1, -1,
821 ' ', '!', '"', '#', '$', '%', '&', '\'',
822 '(', ')', '*', '+', ',', '-', '.', '/',
823 '0', '1', '2', '3', '4', '5', '6', '7',
824 '8', '9', ':', ';', '<', '=', '>', '?',
825 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
826 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
827 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
828 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
829 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
830 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
831 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
832 'x', 'y', 'z', '{', '|', '}', '~'
833 };
834
835 code = code < sizeof table ? table[code] : -1;
836 }
837#endif
c4d720cd 838
d8d3f94a
PE
839 return code;
840}
841
842
900c5db5 843/*----------------------------------------------------------------.
9874f80b 844| Handle '#line INT "FILE"'. ARGS has already skipped '#line '. |
900c5db5
AD
845`----------------------------------------------------------------*/
846
847static void
4517da37 848handle_syncline (char *args, location loc)
900c5db5 849{
4517da37
PE
850 char *after_num;
851 unsigned long int lineno = strtoul (args, &after_num, 10);
d143e9c3
JD
852 char *file = mbschr (after_num, '"') + 1;
853 *mbschr (file, '"') = '\0';
4517da37
PE
854 if (INT_MAX <= lineno)
855 {
856 warn_at (loc, _("line number overflow"));
857 lineno = INT_MAX;
858 }
e9071366 859 current_file = uniqstr_new (file);
0c8e079f 860 boundary_set (&scanner_cursor, current_file, lineno, 1);
4517da37
PE
861}
862
863
4febdd96
PE
864/*----------------------------------------------------------------.
865| For a token or comment starting at START, report message MSGID, |
866| which should say that an end marker was found before |
867| the expected TOKEN_END. |
868`----------------------------------------------------------------*/
869
870static void
871unexpected_end (boundary start, char const *msgid, char const *token_end)
872{
873 location loc;
874 loc.start = start;
875 loc.end = scanner_cursor;
4c787a31
AD
876 token_end = quote (token_end);
877 // Instead of '\'', display "'".
878 if (STREQ (token_end, "'\\''", '\'', '\\', '\'', '\'', 0,0,0,0,0))
879 token_end = "\"'\"";
4febdd96
PE
880 complain_at (loc, _(msgid), token_end);
881}
882
883
3f2d73f1
PE
884/*------------------------------------------------------------------------.
885| Report an unexpected EOF in a token or comment starting at START. |
886| An end of file was encountered and the expected TOKEN_END was missing. |
3f2d73f1 887`------------------------------------------------------------------------*/
a706a1cc
PE
888
889static void
aa418041 890unexpected_eof (boundary start, char const *token_end)
a706a1cc 891{
4c787a31 892 unexpected_end (start, N_("missing %s at end of file"), token_end);
4febdd96
PE
893}
894
895
896/*----------------------------------------.
897| Likewise, but for unexpected newlines. |
898`----------------------------------------*/
899
900static void
901unexpected_newline (boundary start, char const *token_end)
902{
4c787a31 903 unexpected_end (start, N_("missing %s at end of line"), token_end);
a706a1cc
PE
904}
905
906
f25bfb75
AD
907/*-------------------------.
908| Initialize the scanner. |
909`-------------------------*/
910
1d6412ad 911void
e9071366 912gram_scanner_initialize (void)
1d6412ad 913{
223ff46e 914 obstack_init (&obstack_for_string);
1d6412ad
AD
915}
916
917
f25bfb75
AD
918/*-----------------------------------------------.
919| Free all the memory allocated to the scanner. |
920`-----------------------------------------------*/
921
4cdb01db 922void
e9071366 923gram_scanner_free (void)
4cdb01db 924{
223ff46e 925 obstack_free (&obstack_for_string, 0);
536545f3 926 /* Reclaim Flex's buffers. */
580b8926 927 yylex_destroy ();
4cdb01db 928}