]> git.saurik.com Git - bison.git/blame - src/scan-gram.l
quote consistently and make tests pass with new quoting from gnulib
[bison.git] / src / scan-gram.l
CommitLineData
e9955c83 1/* Bison Grammar Scanner -*- C -*-
3b1e470c 2
c932d613 3 Copyright (C) 2002-2012 Free Software Foundation, Inc.
e9955c83
AD
4
5 This file is part of Bison, the GNU Compiler Compiler.
6
f16b0819 7 This program is free software: you can redistribute it and/or modify
e9955c83 8 it under the terms of the GNU General Public License as published by
f16b0819 9 the Free Software Foundation, either version 3 of the License, or
e9955c83
AD
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
f16b0819 18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
e9955c83 19
4521fcdf 20%option debug nodefault noinput nounput noyywrap never-interactive
e9955c83
AD
21%option prefix="gram_" outfile="lex.yy.c"
22
23%{
4f6e011e
PE
24/* Work around a bug in flex 2.5.31. See Debian bug 333231
25 <http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=333231>. */
26#undef gram_wrap
27#define gram_wrap() 1
28
e9071366
AD
29#define FLEX_PREFIX(Id) gram_ ## Id
30#include "flex-scanner.h"
223ff46e 31
e9955c83 32#include "complain.h"
3f2d73f1 33#include "files.h"
e9955c83 34#include "gram.h"
ca407bdf 35#include "quotearg.h"
e9955c83 36#include "reader.h"
223ff46e 37#include "uniqstr.h"
e9955c83 38
39fb7e62 39#include <ctype.h>
e9071366
AD
40#include <mbswidth.h>
41#include <quote.h>
42
43#include "scan-gram.h"
44
45#define YY_DECL GRAM_LEX_DECL
2346344a 46
3f2d73f1 47#define YY_USER_INIT \
e9071366 48 code_start = scanner_cursor = loc->start; \
dc9701e8 49
3f2d73f1 50/* Location of scanner cursor. */
4a678af8 51static boundary scanner_cursor;
41141c56 52
e9071366 53#define YY_USER_ACTION location_compute (loc, &scanner_cursor, yytext, yyleng);
d8d3f94a 54
6c30d641 55static size_t no_cr_read (FILE *, char *, size_t);
d8d3f94a
PE
56#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
57
7685e2f7
AR
58#define ROLLBACK_CURRENT_TOKEN \
59 do { \
60 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0); \
61 yyless (0); \
62 } while (0)
63
7ec2d4cd 64/* A string representing the most recently saved token. */
7c0c6181 65static char *last_string;
7ec2d4cd 66
d5e8574b 67/* Bracketed identifier. */
7685e2f7
AR
68static uniqstr bracketed_id_str = 0;
69static location bracketed_id_loc;
70static boundary bracketed_id_start;
71static int bracketed_id_context_state = 0;
72
7ec2d4cd 73void
e9071366 74gram_scanner_last_string_free (void)
7ec2d4cd 75{
41141c56 76 STRING_FREE;
7ec2d4cd 77}
e9955c83 78
4517da37 79static void handle_syncline (char *, location);
1452af69 80static unsigned long int scan_integer (char const *p, int base, location loc);
d8d3f94a 81static int convert_ucn_to_byte (char const *hex_text);
aa418041 82static void unexpected_eof (boundary, char const *);
4febdd96 83static void unexpected_newline (boundary, char const *);
e9955c83
AD
84
85%}
e9071366
AD
86 /* A C-like comment in directives/rules. */
87%x SC_YACC_COMMENT
88 /* Strings and characters in directives/rules. */
e9955c83 89%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
e9071366 90 /* A identifier was just read in directives/rules. Special state
9874f80b 91 to capture the sequence 'identifier :'. */
e9071366 92%x SC_AFTER_IDENTIFIER
e9071366
AD
93
94 /* Three types of user code:
9874f80b 95 - prologue (code between '%{' '%}' in the first section, before %%);
e9071366
AD
96 - actions, printers, union, etc, (between braced in the middle section);
97 - epilogue (everything after the second %%). */
98%x SC_PROLOGUE SC_BRACED_CODE SC_EPILOGUE
99 /* C and C++ comments in code. */
100%x SC_COMMENT SC_LINE_COMMENT
101 /* Strings and characters in code. */
102%x SC_STRING SC_CHARACTER
d5e8574b 103 /* Bracketed identifiers support. */
7685e2f7 104%x SC_BRACKETED_ID SC_RETURN_BRACKETED_ID
e9955c83 105
d236ad94 106letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
eb8c66bb 107id {letter}({letter}|[-0-9])*
663ce7bb 108directive %{id}
624a35e2 109int [0-9]+
d8d3f94a
PE
110
111/* POSIX says that a tag must be both an id and a C union member, but
112 historically almost any character is allowed in a tag. We disallow
113 NUL and newline, as this simplifies our implementation. */
114tag [^\0\n>]+
115
116/* Zero or more instances of backslash-newline. Following GCC, allow
117 white space between the backslash and the newline. */
118splice (\\[ \f\t\v]*\n)*
e9955c83
AD
119
120%%
121%{
a706a1cc 122 /* Nesting level of the current code in braces. */
77bb73e7 123 int braces_level PACIFY_CC (= 0);
1a9e39f1 124
3f2d73f1 125 /* Parent context state, when applicable. */
77bb73e7 126 int context_state PACIFY_CC (= 0);
a706a1cc 127
3f2d73f1 128 /* Location of most recent identifier, when applicable. */
77bb73e7 129 location id_loc PACIFY_CC (= empty_location);
3f2d73f1 130
a2bc9dbc
PE
131 /* Where containing code started, when applicable. Its initial
132 value is relevant only when yylex is invoked in the SC_EPILOGUE
133 start condition. */
134 boundary code_start = scanner_cursor;
3f2d73f1 135
223ff46e
PE
136 /* Where containing comment or string or character literal started,
137 when applicable. */
77bb73e7 138 boundary token_start PACIFY_CC (= scanner_cursor);
e9955c83
AD
139%}
140
141
3f2d73f1
PE
142 /*-----------------------.
143 | Scanning white space. |
144 `-----------------------*/
145
7685e2f7 146<INITIAL,SC_AFTER_IDENTIFIER,SC_BRACKETED_ID,SC_RETURN_BRACKETED_ID>
3f2d73f1 147{
4febdd96 148 /* Comments and white space. */
9874f80b 149 "," warn_at (*loc, _("stray ',' treated as white space"));
4febdd96 150 [ \f\n\t\v] |
3f2d73f1 151 "//".* ;
83adb046
PE
152 "/*" {
153 token_start = loc->start;
154 context_state = YY_START;
155 BEGIN SC_YACC_COMMENT;
156 }
3f2d73f1
PE
157
158 /* #line directives are not documented, and may be withdrawn or
159 modified in future versions of Bison. */
160 ^"#line "{int}" \"".*"\"\n" {
4517da37 161 handle_syncline (yytext + sizeof "#line " - 1, *loc);
3f2d73f1
PE
162 }
163}
164
165
e9955c83
AD
166 /*----------------------------.
167 | Scanning Bison directives. |
168 `----------------------------*/
72183df4
DJ
169
170 /* For directives that are also command line options, the regex must be
171 "%..."
172 after "[-_]"s are removed, and the directive must match the --long
173 option name, with a single string argument. Otherwise, add exceptions
174 to ../build-aux/cross-options.pl. */
175
e9955c83
AD
176<INITIAL>
177{
43e6aea5 178 "%binary" return PERCENT_NONASSOC;
136a0f76 179 "%code" return PERCENT_CODE;
43e6aea5
AD
180 "%debug" return PERCENT_DEBUG;
181 "%default"[-_]"prec" return PERCENT_DEFAULT_PREC;
182 "%define" return PERCENT_DEFINE;
183 "%defines" return PERCENT_DEFINES;
184 "%destructor" return PERCENT_DESTRUCTOR;
185 "%dprec" return PERCENT_DPREC;
186 "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
187 "%expect" return PERCENT_EXPECT;
188 "%expect"[-_]"rr" return PERCENT_EXPECT_RR;
189 "%file-prefix" return PERCENT_FILE_PREFIX;
e9955c83 190 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
43e6aea5
AD
191 "%initial-action" return PERCENT_INITIAL_ACTION;
192 "%glr-parser" return PERCENT_GLR_PARSER;
193 "%language" return PERCENT_LANGUAGE;
194 "%left" return PERCENT_LEFT;
195 "%lex-param" return PERCENT_LEX_PARAM;
196 "%locations" return PERCENT_LOCATIONS;
197 "%merge" return PERCENT_MERGE;
198 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
199 "%no"[-_]"default"[-_]"prec" return PERCENT_NO_DEFAULT_PREC;
200 "%no"[-_]"lines" return PERCENT_NO_LINES;
201 "%nonassoc" return PERCENT_NONASSOC;
202 "%nondeterministic-parser" return PERCENT_NONDETERMINISTIC_PARSER;
203 "%nterm" return PERCENT_NTERM;
204 "%output" return PERCENT_OUTPUT;
205 "%parse-param" return PERCENT_PARSE_PARAM;
206 "%prec" return PERCENT_PREC;
207 "%printer" return PERCENT_PRINTER;
208 "%pure"[-_]"parser" return PERCENT_PURE_PARSER;
209 "%require" return PERCENT_REQUIRE;
210 "%right" return PERCENT_RIGHT;
211 "%skeleton" return PERCENT_SKELETON;
212 "%start" return PERCENT_START;
213 "%term" return PERCENT_TOKEN;
214 "%token" return PERCENT_TOKEN;
215 "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
216 "%type" return PERCENT_TYPE;
217 "%union" return PERCENT_UNION;
218 "%verbose" return PERCENT_VERBOSE;
219 "%yacc" return PERCENT_YACC;
e9955c83 220
3f2d73f1 221 {directive} {
41141c56 222 complain_at (*loc, _("invalid directive: %s"), quote (yytext));
412f8a59 223 }
900c5db5 224
e9955c83 225 "=" return EQUAL;
e9071366 226 "|" return PIPE;
e9955c83 227 ";" return SEMICOLON;
12e35840 228 "<*>" return TYPE_TAG_ANY;
3ebecc24 229 "<>" return TYPE_TAG_NONE;
e9955c83 230
3f2d73f1 231 {id} {
58d7a1a1 232 val->uniqstr = uniqstr_new (yytext);
3f2d73f1 233 id_loc = *loc;
7685e2f7 234 bracketed_id_str = NULL;
3f2d73f1 235 BEGIN SC_AFTER_IDENTIFIER;
e9955c83
AD
236 }
237
d8d3f94a 238 {int} {
1452af69
PE
239 val->integer = scan_integer (yytext, 10, *loc);
240 return INT;
241 }
242 0[xX][0-9abcdefABCDEF]+ {
243 val->integer = scan_integer (yytext, 16, *loc);
d8d3f94a
PE
244 return INT;
245 }
e9955c83 246
601bdfab
AD
247 /* Identifiers may not start with a digit. Yet, don't silently
248 accept "1FOO" as "1 FOO". */
249 {int}{id} {
250 complain_at (*loc, _("invalid identifier: %s"), quote (yytext));
251 }
252
ac9b0e95 253 /* Characters. */
07c0db18 254 "'" token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
e9955c83
AD
255
256 /* Strings. */
ca407bdf 257 "\"" token_start = loc->start; BEGIN SC_ESCAPED_STRING;
e9955c83
AD
258
259 /* Prologue. */
3f2d73f1 260 "%{" code_start = loc->start; BEGIN SC_PROLOGUE;
e9955c83
AD
261
262 /* Code in between braces. */
3f2d73f1
PE
263 "{" {
264 STRING_GROW;
265 braces_level = 0;
266 code_start = loc->start;
267 BEGIN SC_BRACED_CODE;
268 }
e9955c83
AD
269
270 /* A type. */
d8d3f94a 271 "<"{tag}">" {
223ff46e 272 obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
41141c56 273 STRING_FINISH;
223ff46e 274 val->uniqstr = uniqstr_new (last_string);
41141c56 275 STRING_FREE;
4cdb01db
AD
276 return TYPE;
277 }
278
a706a1cc
PE
279 "%%" {
280 static int percent_percent_count;
e9955c83 281 if (++percent_percent_count == 2)
a2bc9dbc 282 BEGIN SC_EPILOGUE;
e9955c83
AD
283 return PERCENT_PERCENT;
284 }
285
7685e2f7
AR
286 "[" {
287 bracketed_id_str = NULL;
288 bracketed_id_start = loc->start;
289 bracketed_id_context_state = YY_START;
290 BEGIN SC_BRACKETED_ID;
291 }
292
a706a1cc 293 . {
41141c56 294 complain_at (*loc, _("invalid character: %s"), quote (yytext));
3f2d73f1 295 }
379f0ac8
PE
296
297 <<EOF>> {
298 loc->start = loc->end = scanner_cursor;
299 yyterminate ();
300 }
3f2d73f1
PE
301}
302
303
304 /*-----------------------------------------------------------------.
305 | Scanning after an identifier, checking whether a colon is next. |
306 `-----------------------------------------------------------------*/
307
308<SC_AFTER_IDENTIFIER>
309{
7685e2f7 310 "[" {
d5e8574b 311 if (bracketed_id_str)
7685e2f7
AR
312 {
313 ROLLBACK_CURRENT_TOKEN;
314 BEGIN SC_RETURN_BRACKETED_ID;
315 *loc = id_loc;
316 return ID;
317 }
d5e8574b
AR
318 else
319 {
320 bracketed_id_start = loc->start;
321 bracketed_id_context_state = YY_START;
322 BEGIN SC_BRACKETED_ID;
323 }
7685e2f7 324 }
3f2d73f1 325 ":" {
7685e2f7 326 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
3f2d73f1 327 *loc = id_loc;
3f2d73f1
PE
328 return ID_COLON;
329 }
330 . {
7685e2f7
AR
331 ROLLBACK_CURRENT_TOKEN;
332 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
3f2d73f1 333 *loc = id_loc;
3f2d73f1
PE
334 return ID;
335 }
336 <<EOF>> {
7685e2f7 337 BEGIN (bracketed_id_str ? SC_RETURN_BRACKETED_ID : INITIAL);
3f2d73f1 338 *loc = id_loc;
3f2d73f1 339 return ID;
e9955c83
AD
340 }
341}
342
7685e2f7
AR
343 /*--------------------------------.
344 | Scanning bracketed identifiers. |
345 `--------------------------------*/
346
347<SC_BRACKETED_ID>
348{
349 {id} {
d5e8574b 350 if (bracketed_id_str)
7685e2f7 351 {
d5e8574b
AR
352 complain_at (*loc, _("unexpected identifier in bracketed name: %s"),
353 quote (yytext));
7685e2f7
AR
354 }
355 else
356 {
d5e8574b
AR
357 bracketed_id_str = uniqstr_new (yytext);
358 bracketed_id_loc = *loc;
7685e2f7
AR
359 }
360 }
361 "]" {
362 BEGIN bracketed_id_context_state;
363 if (bracketed_id_str)
364 {
365 if (INITIAL == bracketed_id_context_state)
366 {
367 val->uniqstr = bracketed_id_str;
368 bracketed_id_str = 0;
369 *loc = bracketed_id_loc;
370 return BRACKETED_ID;
371 }
372 }
373 else
d5e8574b 374 complain_at (*loc, _("an identifier expected"));
7685e2f7
AR
375 }
376 . {
377 complain_at (*loc, _("invalid character in bracketed name: %s"),
378 quote (yytext));
379 }
380 <<EOF>> {
381 BEGIN bracketed_id_context_state;
382 unexpected_eof (bracketed_id_start, "]");
383 }
384}
385
386<SC_RETURN_BRACKETED_ID>
387{
388 . {
389 ROLLBACK_CURRENT_TOKEN;
390 val->uniqstr = bracketed_id_str;
391 bracketed_id_str = 0;
392 *loc = bracketed_id_loc;
393 BEGIN INITIAL;
394 return BRACKETED_ID;
395 }
396}
397
e9955c83 398
d8d3f94a 399 /*---------------------------------------------------------------.
9874f80b 400 | Scanning a Yacc comment. The initial '/ *' is already eaten. |
d8d3f94a 401 `---------------------------------------------------------------*/
e9955c83 402
d8d3f94a 403<SC_YACC_COMMENT>
e9955c83 404{
3f2d73f1 405 "*/" BEGIN context_state;
a706a1cc 406 .|\n ;
aa418041 407 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
d8d3f94a
PE
408}
409
410
411 /*------------------------------------------------------------.
9874f80b 412 | Scanning a C comment. The initial '/ *' is already eaten. |
d8d3f94a
PE
413 `------------------------------------------------------------*/
414
415<SC_COMMENT>
416{
3f2d73f1 417 "*"{splice}"/" STRING_GROW; BEGIN context_state;
aa418041 418 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
e9955c83
AD
419}
420
421
d8d3f94a 422 /*--------------------------------------------------------------.
9874f80b 423 | Scanning a line comment. The initial '//' is already eaten. |
d8d3f94a
PE
424 `--------------------------------------------------------------*/
425
426<SC_LINE_COMMENT>
427{
3f2d73f1 428 "\n" STRING_GROW; BEGIN context_state;
41141c56 429 {splice} STRING_GROW;
3f2d73f1 430 <<EOF>> BEGIN context_state;
d8d3f94a
PE
431}
432
433
4febdd96
PE
434 /*------------------------------------------------.
435 | Scanning a Bison string, including its escapes. |
436 | The initial quote is already eaten. |
437 `------------------------------------------------*/
e9955c83
AD
438
439<SC_ESCAPED_STRING>
440{
47aee066
JD
441 "\""|"\n" {
442 if (yytext[0] == '\n')
443 unexpected_newline (token_start, "\"");
444 STRING_FINISH;
445 loc->start = token_start;
446 val->chars = last_string;
447 BEGIN INITIAL;
448 return STRING;
449 }
450 <<EOF>> {
451 unexpected_eof (token_start, "\"");
41141c56 452 STRING_FINISH;
3f2d73f1 453 loc->start = token_start;
223ff46e 454 val->chars = last_string;
a706a1cc 455 BEGIN INITIAL;
e9955c83
AD
456 return STRING;
457 }
e9955c83
AD
458}
459
4febdd96
PE
460 /*----------------------------------------------------------.
461 | Scanning a Bison character literal, decoding its escapes. |
462 | The initial quote is already eaten. |
463 `----------------------------------------------------------*/
e9955c83
AD
464
465<SC_ESCAPED_CHARACTER>
466{
47aee066 467 "'"|"\n" {
41141c56 468 STRING_FINISH;
3f2d73f1 469 loc->start = token_start;
07c0db18 470 val->character = last_string[0];
ac9b0e95
JD
471 {
472 /* FIXME: Eventually, make these errors. */
07c0db18
JD
473 if (last_string[0] == '\0')
474 {
475 warn_at (*loc, _("empty character literal"));
476 /* '\0' seems dangerous even if we are about to complain. */
477 val->character = '\'';
478 }
479 else if (last_string[1] != '\0')
ac9b0e95
JD
480 warn_at (*loc, _("extra characters in character literal"));
481 }
482 if (yytext[0] == '\n')
483 unexpected_newline (token_start, "'");
41141c56 484 STRING_FREE;
a706a1cc 485 BEGIN INITIAL;
58d7a1a1 486 return CHAR;
e9955c83 487 }
47aee066 488 <<EOF>> {
47aee066
JD
489 STRING_FINISH;
490 loc->start = token_start;
07c0db18 491 val->character = last_string[0];
ac9b0e95 492 {
ac9b0e95 493 /* FIXME: Eventually, make these errors. */
07c0db18
JD
494 if (last_string[0] == '\0')
495 {
496 warn_at (*loc, _("empty character literal"));
497 /* '\0' seems dangerous even if we are about to complain. */
498 val->character = '\'';
499 }
500 else if (last_string[1] != '\0')
ac9b0e95 501 warn_at (*loc, _("extra characters in character literal"));
ac9b0e95
JD
502 }
503 unexpected_eof (token_start, "'");
47aee066
JD
504 STRING_FREE;
505 BEGIN INITIAL;
506 return CHAR;
507 }
4febdd96 508}
a706a1cc 509
4febdd96
PE
510<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING>
511{
92ac3705 512 \0 complain_at (*loc, _("invalid null character"));
e9955c83
AD
513}
514
515
516 /*----------------------------.
517 | Decode escaped characters. |
518 `----------------------------*/
519
520<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
521{
d8d3f94a 522 \\[0-7]{1,3} {
4517da37 523 unsigned long int c = strtoul (yytext + 1, NULL, 8);
39fb7e62
JD
524 if (!c || UCHAR_MAX < c)
525 complain_at (*loc, _("invalid number after \\-escape: %s"),
526 yytext+1);
e9955c83 527 else
223ff46e 528 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
529 }
530
6b0d38ab 531 \\x[0-9abcdefABCDEF]+ {
4517da37
PE
532 verify (UCHAR_MAX < ULONG_MAX);
533 unsigned long int c = strtoul (yytext + 2, NULL, 16);
39fb7e62
JD
534 if (!c || UCHAR_MAX < c)
535 complain_at (*loc, _("invalid number after \\-escape: %s"),
536 yytext+1);
d8d3f94a 537 else
223ff46e 538 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
539 }
540
223ff46e
PE
541 \\a obstack_1grow (&obstack_for_string, '\a');
542 \\b obstack_1grow (&obstack_for_string, '\b');
543 \\f obstack_1grow (&obstack_for_string, '\f');
544 \\n obstack_1grow (&obstack_for_string, '\n');
545 \\r obstack_1grow (&obstack_for_string, '\r');
546 \\t obstack_1grow (&obstack_for_string, '\t');
547 \\v obstack_1grow (&obstack_for_string, '\v');
412f8a59
PE
548
549 /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
223ff46e 550 \\("\""|"'"|"?"|"\\") obstack_1grow (&obstack_for_string, yytext[1]);
412f8a59 551
6b0d38ab 552 \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
d8d3f94a 553 int c = convert_ucn_to_byte (yytext);
39fb7e62
JD
554 if (c <= 0)
555 complain_at (*loc, _("invalid number after \\-escape: %s"),
556 yytext+1);
d8d3f94a 557 else
223ff46e 558 obstack_1grow (&obstack_for_string, c);
d8d3f94a 559 }
4f25ebb0 560 \\(.|\n) {
39fb7e62 561 char const *p = yytext + 1;
890ab17c 562 /* Quote only if escaping won't make the character visible. */
4bb975e1 563 if (isspace ((unsigned char) *p) && isprint ((unsigned char) *p))
890ab17c 564 p = quote (p);
39fb7e62
JD
565 else
566 p = quotearg_style_mem (escape_quoting_style, p, 1);
567 complain_at (*loc, _("invalid character after \\-escape: %s"), p);
e9955c83
AD
568 }
569}
570
4febdd96
PE
571 /*--------------------------------------------.
572 | Scanning user-code characters and strings. |
573 `--------------------------------------------*/
e9955c83 574
4febdd96
PE
575<SC_CHARACTER,SC_STRING>
576{
e9071366 577 {splice}|\\{splice}[^\n\[\]] STRING_GROW;
4febdd96 578}
e9955c83
AD
579
580<SC_CHARACTER>
581{
4febdd96
PE
582 "'" STRING_GROW; BEGIN context_state;
583 \n unexpected_newline (token_start, "'"); BEGIN context_state;
584 <<EOF>> unexpected_eof (token_start, "'"); BEGIN context_state;
e9955c83
AD
585}
586
e9955c83
AD
587<SC_STRING>
588{
4febdd96
PE
589 "\"" STRING_GROW; BEGIN context_state;
590 \n unexpected_newline (token_start, "\""); BEGIN context_state;
591 <<EOF>> unexpected_eof (token_start, "\""); BEGIN context_state;
e9955c83
AD
592}
593
594
595 /*---------------------------------------------------.
596 | Strings, comments etc. can be found in user code. |
597 `---------------------------------------------------*/
598
599<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
600{
3f2d73f1
PE
601 "'" {
602 STRING_GROW;
603 context_state = YY_START;
604 token_start = loc->start;
605 BEGIN SC_CHARACTER;
606 }
607 "\"" {
608 STRING_GROW;
609 context_state = YY_START;
610 token_start = loc->start;
611 BEGIN SC_STRING;
612 }
613 "/"{splice}"*" {
614 STRING_GROW;
615 context_state = YY_START;
616 token_start = loc->start;
617 BEGIN SC_COMMENT;
618 }
619 "/"{splice}"/" {
620 STRING_GROW;
621 context_state = YY_START;
622 BEGIN SC_LINE_COMMENT;
623 }
e9955c83
AD
624}
625
626
624a35e2 627
58d7a1a1
AD
628 /*-----------------------------------------------------------.
629 | Scanning some code in braces (actions). The initial "{" is |
630 | already eaten. |
631 `-----------------------------------------------------------*/
e9955c83
AD
632
633<SC_BRACED_CODE>
634{
41141c56
PE
635 "{"|"<"{splice}"%" STRING_GROW; braces_level++;
636 "%"{splice}">" STRING_GROW; braces_level--;
e9955c83 637 "}" {
25522739
PE
638 obstack_1grow (&obstack_for_string, '}');
639
2346344a
AD
640 --braces_level;
641 if (braces_level < 0)
e9955c83 642 {
41141c56 643 STRING_FINISH;
3f2d73f1 644 loc->start = code_start;
eb095650 645 val->code = last_string;
a706a1cc 646 BEGIN INITIAL;
58d7a1a1 647 return BRACED_CODE;
e9955c83
AD
648 }
649 }
650
9874f80b
JM
651 /* Tokenize '<<%' correctly (as '<<' '%') rather than incorrrectly
652 (as '<' '<%'). */
41141c56 653 "<"{splice}"<" STRING_GROW;
a706a1cc 654
47aee066
JD
655 <<EOF>> {
656 unexpected_eof (code_start, "}");
657 STRING_FINISH;
658 loc->start = code_start;
eb095650 659 val->code = last_string;
47aee066
JD
660 BEGIN INITIAL;
661 return BRACED_CODE;
662 }
e9955c83
AD
663}
664
665
666 /*--------------------------------------------------------------.
667 | Scanning some prologue: from "%{" (already scanned) to "%}". |
668 `--------------------------------------------------------------*/
669
670<SC_PROLOGUE>
671{
672 "%}" {
41141c56 673 STRING_FINISH;
3f2d73f1 674 loc->start = code_start;
223ff46e 675 val->chars = last_string;
a706a1cc 676 BEGIN INITIAL;
e9955c83
AD
677 return PROLOGUE;
678 }
679
47aee066
JD
680 <<EOF>> {
681 unexpected_eof (code_start, "%}");
682 STRING_FINISH;
683 loc->start = code_start;
684 val->chars = last_string;
685 BEGIN INITIAL;
686 return PROLOGUE;
687 }
e9955c83
AD
688}
689
690
691 /*---------------------------------------------------------------.
692 | Scanning the epilogue (everything after the second "%%", which |
d8d3f94a 693 | has already been eaten). |
e9955c83
AD
694 `---------------------------------------------------------------*/
695
696<SC_EPILOGUE>
697{
e9955c83 698 <<EOF>> {
41141c56 699 STRING_FINISH;
3f2d73f1 700 loc->start = code_start;
223ff46e 701 val->chars = last_string;
a706a1cc 702 BEGIN INITIAL;
e9955c83
AD
703 return EPILOGUE;
704 }
705}
706
707
4febdd96
PE
708 /*-----------------------------------------------------.
709 | By default, grow the string obstack with the input. |
710 `-----------------------------------------------------*/
711
712<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>. |
713<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>\n STRING_GROW;
714
e9955c83
AD
715%%
716
6c30d641
PE
717/* Read bytes from FP into buffer BUF of size SIZE. Return the
718 number of bytes read. Remove '\r' from input, treating \r\n
719 and isolated \r as \n. */
720
721static size_t
722no_cr_read (FILE *fp, char *buf, size_t size)
723{
a737b216
PE
724 size_t bytes_read = fread (buf, 1, size, fp);
725 if (bytes_read)
6c30d641 726 {
a737b216 727 char *w = memchr (buf, '\r', bytes_read);
6c30d641
PE
728 if (w)
729 {
730 char const *r = ++w;
a737b216 731 char const *lim = buf + bytes_read;
6c30d641
PE
732
733 for (;;)
734 {
735 /* Found an '\r'. Treat it like '\n', but ignore any
736 '\n' that immediately follows. */
737 w[-1] = '\n';
738 if (r == lim)
739 {
740 int ch = getc (fp);
741 if (ch != '\n' && ungetc (ch, fp) != ch)
742 break;
743 }
744 else if (*r == '\n')
745 r++;
746
747 /* Copy until the next '\r'. */
748 do
749 {
750 if (r == lim)
751 return w - buf;
752 }
753 while ((*w++ = *r++) != '\r');
754 }
755
756 return w - buf;
757 }
758 }
759
a737b216 760 return bytes_read;
6c30d641
PE
761}
762
763
f25bfb75 764
1452af69
PE
765/*------------------------------------------------------.
766| Scan NUMBER for a base-BASE integer at location LOC. |
767`------------------------------------------------------*/
768
769static unsigned long int
770scan_integer (char const *number, int base, location loc)
771{
4517da37
PE
772 verify (INT_MAX < ULONG_MAX);
773 unsigned long int num = strtoul (number, NULL, base);
774
775 if (INT_MAX < num)
1452af69
PE
776 {
777 complain_at (loc, _("integer out of range: %s"), quote (number));
778 num = INT_MAX;
779 }
4517da37 780
1452af69
PE
781 return num;
782}
783
784
d8d3f94a
PE
785/*------------------------------------------------------------------.
786| Convert universal character name UCN to a single-byte character, |
787| and return that character. Return -1 if UCN does not correspond |
788| to a single-byte character. |
789`------------------------------------------------------------------*/
790
791static int
792convert_ucn_to_byte (char const *ucn)
793{
4517da37
PE
794 verify (UCHAR_MAX <= INT_MAX);
795 unsigned long int code = strtoul (ucn + 2, NULL, 16);
d8d3f94a
PE
796
797 /* FIXME: Currently we assume Unicode-compatible unibyte characters
798 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
799 non-ASCII hosts we support only the portable C character set.
800 These limitations should be removed once we add support for
801 multibyte characters. */
802
803 if (UCHAR_MAX < code)
804 return -1;
805
806#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
807 {
808 /* A non-ASCII host. Use CODE to index into a table of the C
809 basic execution character set, which is guaranteed to exist on
810 all Standard C platforms. This table also includes '$', '@',
8e6ef483 811 and '`', which are not in the basic execution character set but
d8d3f94a
PE
812 which are unibyte characters on all the platforms that we know
813 about. */
814 static signed char const table[] =
815 {
816 '\0', -1, -1, -1, -1, -1, -1, '\a',
817 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
818 -1, -1, -1, -1, -1, -1, -1, -1,
819 -1, -1, -1, -1, -1, -1, -1, -1,
820 ' ', '!', '"', '#', '$', '%', '&', '\'',
821 '(', ')', '*', '+', ',', '-', '.', '/',
822 '0', '1', '2', '3', '4', '5', '6', '7',
823 '8', '9', ':', ';', '<', '=', '>', '?',
824 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
825 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
826 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
827 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
828 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
829 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
830 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
831 'x', 'y', 'z', '{', '|', '}', '~'
832 };
833
834 code = code < sizeof table ? table[code] : -1;
835 }
836#endif
c4d720cd 837
d8d3f94a
PE
838 return code;
839}
840
841
900c5db5 842/*----------------------------------------------------------------.
9874f80b 843| Handle '#line INT "FILE"'. ARGS has already skipped '#line '. |
900c5db5
AD
844`----------------------------------------------------------------*/
845
846static void
4517da37 847handle_syncline (char *args, location loc)
900c5db5 848{
4517da37
PE
849 char *after_num;
850 unsigned long int lineno = strtoul (args, &after_num, 10);
d143e9c3
JD
851 char *file = mbschr (after_num, '"') + 1;
852 *mbschr (file, '"') = '\0';
4517da37
PE
853 if (INT_MAX <= lineno)
854 {
855 warn_at (loc, _("line number overflow"));
856 lineno = INT_MAX;
857 }
e9071366 858 current_file = uniqstr_new (file);
0c8e079f 859 boundary_set (&scanner_cursor, current_file, lineno, 1);
4517da37
PE
860}
861
862
4febdd96
PE
863/*----------------------------------------------------------------.
864| For a token or comment starting at START, report message MSGID, |
865| which should say that an end marker was found before |
866| the expected TOKEN_END. |
867`----------------------------------------------------------------*/
868
869static void
870unexpected_end (boundary start, char const *msgid, char const *token_end)
871{
872 location loc;
873 loc.start = start;
874 loc.end = scanner_cursor;
875 complain_at (loc, _(msgid), token_end);
876}
877
878
3f2d73f1
PE
879/*------------------------------------------------------------------------.
880| Report an unexpected EOF in a token or comment starting at START. |
881| An end of file was encountered and the expected TOKEN_END was missing. |
3f2d73f1 882`------------------------------------------------------------------------*/
a706a1cc
PE
883
884static void
aa418041 885unexpected_eof (boundary start, char const *token_end)
a706a1cc 886{
9874f80b 887 unexpected_end (start, N_("missing '%s' at end of file"), token_end);
4febdd96
PE
888}
889
890
891/*----------------------------------------.
892| Likewise, but for unexpected newlines. |
893`----------------------------------------*/
894
895static void
896unexpected_newline (boundary start, char const *token_end)
897{
9874f80b 898 unexpected_end (start, N_("missing '%s' at end of line"), token_end);
a706a1cc
PE
899}
900
901
f25bfb75
AD
902/*-------------------------.
903| Initialize the scanner. |
904`-------------------------*/
905
1d6412ad 906void
e9071366 907gram_scanner_initialize (void)
1d6412ad 908{
223ff46e 909 obstack_init (&obstack_for_string);
1d6412ad
AD
910}
911
912
f25bfb75
AD
913/*-----------------------------------------------.
914| Free all the memory allocated to the scanner. |
915`-----------------------------------------------*/
916
4cdb01db 917void
e9071366 918gram_scanner_free (void)
4cdb01db 919{
223ff46e 920 obstack_free (&obstack_for_string, 0);
536545f3 921 /* Reclaim Flex's buffers. */
580b8926 922 yylex_destroy ();
4cdb01db 923}