]> git.saurik.com Git - bison.git/blame - src/scan-gram.l
Work around Java's ``code too large'' problem for parser tables.
[bison.git] / src / scan-gram.l
CommitLineData
e9955c83 1/* Bison Grammar Scanner -*- C -*-
3b1e470c 2
deef2a0a 3 Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation,
7c0c6181 4 Inc.
e9955c83
AD
5
6 This file is part of Bison, the GNU Compiler Compiler.
7
f16b0819 8 This program is free software: you can redistribute it and/or modify
e9955c83 9 it under the terms of the GNU General Public License as published by
f16b0819 10 the Free Software Foundation, either version 3 of the License, or
e9955c83
AD
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
f16b0819 19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
e9955c83 20
aa418041 21%option debug nodefault nounput noyywrap never-interactive
e9955c83
AD
22%option prefix="gram_" outfile="lex.yy.c"
23
24%{
4f6e011e
PE
25/* Work around a bug in flex 2.5.31. See Debian bug 333231
26 <http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=333231>. */
27#undef gram_wrap
28#define gram_wrap() 1
29
e9071366
AD
30#define FLEX_PREFIX(Id) gram_ ## Id
31#include "flex-scanner.h"
223ff46e 32
e9955c83 33#include "complain.h"
3f2d73f1 34#include "files.h"
e9955c83 35#include "gram.h"
ca407bdf 36#include "quotearg.h"
e9955c83 37#include "reader.h"
223ff46e 38#include "uniqstr.h"
e9955c83 39
e9071366
AD
40#include <mbswidth.h>
41#include <quote.h>
42
43#include "scan-gram.h"
44
45#define YY_DECL GRAM_LEX_DECL
2346344a 46
3f2d73f1 47#define YY_USER_INIT \
e9071366 48 code_start = scanner_cursor = loc->start; \
dc9701e8 49
3f2d73f1 50/* Location of scanner cursor. */
4a678af8 51static boundary scanner_cursor;
41141c56 52
e9071366 53#define YY_USER_ACTION location_compute (loc, &scanner_cursor, yytext, yyleng);
d8d3f94a 54
6c30d641 55static size_t no_cr_read (FILE *, char *, size_t);
d8d3f94a
PE
56#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
57
7ec2d4cd 58/* A string representing the most recently saved token. */
7c0c6181 59static char *last_string;
7ec2d4cd 60
7ec2d4cd 61void
e9071366 62gram_scanner_last_string_free (void)
7ec2d4cd 63{
41141c56 64 STRING_FREE;
7ec2d4cd 65}
e9955c83 66
4517da37 67static void handle_syncline (char *, location);
1452af69 68static unsigned long int scan_integer (char const *p, int base, location loc);
d8d3f94a 69static int convert_ucn_to_byte (char const *hex_text);
aa418041 70static void unexpected_eof (boundary, char const *);
4febdd96 71static void unexpected_newline (boundary, char const *);
e9955c83
AD
72
73%}
e9071366
AD
74 /* A C-like comment in directives/rules. */
75%x SC_YACC_COMMENT
76 /* Strings and characters in directives/rules. */
e9955c83 77%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
e9071366
AD
78 /* A identifier was just read in directives/rules. Special state
79 to capture the sequence `identifier :'. */
80%x SC_AFTER_IDENTIFIER
e9071366
AD
81
82 /* Three types of user code:
83 - prologue (code between `%{' `%}' in the first section, before %%);
84 - actions, printers, union, etc, (between braced in the middle section);
85 - epilogue (everything after the second %%). */
86%x SC_PROLOGUE SC_BRACED_CODE SC_EPILOGUE
87 /* C and C++ comments in code. */
88%x SC_COMMENT SC_LINE_COMMENT
89 /* Strings and characters in code. */
90%x SC_STRING SC_CHARACTER
e9955c83 91
29c01725
AD
92letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
93id {letter}({letter}|[0-9])*
94directive %{letter}({letter}|[0-9]|-)*
624a35e2 95int [0-9]+
d8d3f94a
PE
96
97/* POSIX says that a tag must be both an id and a C union member, but
98 historically almost any character is allowed in a tag. We disallow
99 NUL and newline, as this simplifies our implementation. */
100tag [^\0\n>]+
101
102/* Zero or more instances of backslash-newline. Following GCC, allow
103 white space between the backslash and the newline. */
104splice (\\[ \f\t\v]*\n)*
e9955c83
AD
105
106%%
107%{
a706a1cc 108 /* Nesting level of the current code in braces. */
5362ed19 109 int braces_level IF_LINT (= 0);
1a9e39f1 110
3f2d73f1 111 /* Parent context state, when applicable. */
5362ed19 112 int context_state IF_LINT (= 0);
a706a1cc 113
3f2d73f1 114 /* Location of most recent identifier, when applicable. */
a2bc9dbc 115 location id_loc IF_LINT (= empty_location);
3f2d73f1 116
a2bc9dbc
PE
117 /* Where containing code started, when applicable. Its initial
118 value is relevant only when yylex is invoked in the SC_EPILOGUE
119 start condition. */
120 boundary code_start = scanner_cursor;
3f2d73f1 121
223ff46e
PE
122 /* Where containing comment or string or character literal started,
123 when applicable. */
a2bc9dbc 124 boundary token_start IF_LINT (= scanner_cursor);
e9955c83
AD
125%}
126
127
3f2d73f1
PE
128 /*-----------------------.
129 | Scanning white space. |
130 `-----------------------*/
131
58d7a1a1 132<INITIAL,SC_AFTER_IDENTIFIER>
3f2d73f1 133{
4febdd96 134 /* Comments and white space. */
83adb046 135 "," warn_at (*loc, _("stray `,' treated as white space"));
4febdd96 136 [ \f\n\t\v] |
3f2d73f1 137 "//".* ;
83adb046
PE
138 "/*" {
139 token_start = loc->start;
140 context_state = YY_START;
141 BEGIN SC_YACC_COMMENT;
142 }
3f2d73f1
PE
143
144 /* #line directives are not documented, and may be withdrawn or
145 modified in future versions of Bison. */
146 ^"#line "{int}" \"".*"\"\n" {
4517da37 147 handle_syncline (yytext + sizeof "#line " - 1, *loc);
3f2d73f1
PE
148 }
149}
150
151
e9955c83
AD
152 /*----------------------------.
153 | Scanning Bison directives. |
154 `----------------------------*/
155<INITIAL>
156{
deef2a0a 157 "%binary" return PERCENT_NONASSOC;
136a0f76 158 "%code" return PERCENT_CODE;
deef2a0a
AD
159 "%debug" return PERCENT_DEBUG;
160 "%default"[-_]"prec" return PERCENT_DEFAULT_PREC;
161 "%define" return PERCENT_DEFINE;
162 "%defines" return PERCENT_DEFINES;
163 "%destructor" return PERCENT_DESTRUCTOR;
164 "%dprec" return PERCENT_DPREC;
165 "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
166 "%expect" return PERCENT_EXPECT;
167 "%expect"[-_]"rr" return PERCENT_EXPECT_RR;
168 "%file-prefix" return PERCENT_FILE_PREFIX;
e9955c83 169 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
deef2a0a
AD
170 "%initial-action" return PERCENT_INITIAL_ACTION;
171 "%glr-parser" return PERCENT_GLR_PARSER;
172 "%language" return PERCENT_LANGUAGE;
173 "%left" return PERCENT_LEFT;
174 "%lex-param" return PERCENT_LEX_PARAM;
175 "%locations" return PERCENT_LOCATIONS;
176 "%merge" return PERCENT_MERGE;
177 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
178 "%no"[-_]"default"[-_]"prec" return PERCENT_NO_DEFAULT_PREC;
179 "%no"[-_]"lines" return PERCENT_NO_LINES;
180 "%nonassoc" return PERCENT_NONASSOC;
181 "%nondeterministic-parser" return PERCENT_NONDETERMINISTIC_PARSER;
182 "%nterm" return PERCENT_NTERM;
183 "%output" return PERCENT_OUTPUT;
184 "%parse-param" return PERCENT_PARSE_PARAM;
185 "%prec" return PERCENT_PREC;
d78f0ac9 186 "%precedence" return PERCENT_PRECEDENCE;
deef2a0a
AD
187 "%printer" return PERCENT_PRINTER;
188 "%pure"[-_]"parser" return PERCENT_PURE_PARSER;
189 "%require" return PERCENT_REQUIRE;
190 "%right" return PERCENT_RIGHT;
191 "%skeleton" return PERCENT_SKELETON;
192 "%start" return PERCENT_START;
193 "%term" return PERCENT_TOKEN;
194 "%token" return PERCENT_TOKEN;
195 "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
196 "%type" return PERCENT_TYPE;
197 "%union" return PERCENT_UNION;
198 "%verbose" return PERCENT_VERBOSE;
199 "%yacc" return PERCENT_YACC;
e9955c83 200
3f2d73f1 201 {directive} {
41141c56 202 complain_at (*loc, _("invalid directive: %s"), quote (yytext));
412f8a59 203 }
900c5db5 204
e9955c83 205 "=" return EQUAL;
e9071366 206 "|" return PIPE;
e9955c83 207 ";" return SEMICOLON;
12e35840 208 "<*>" return TYPE_TAG_ANY;
3ebecc24 209 "<>" return TYPE_TAG_NONE;
e9955c83 210
3f2d73f1 211 {id} {
58d7a1a1 212 val->uniqstr = uniqstr_new (yytext);
3f2d73f1 213 id_loc = *loc;
3f2d73f1 214 BEGIN SC_AFTER_IDENTIFIER;
e9955c83
AD
215 }
216
d8d3f94a 217 {int} {
1452af69
PE
218 val->integer = scan_integer (yytext, 10, *loc);
219 return INT;
220 }
221 0[xX][0-9abcdefABCDEF]+ {
222 val->integer = scan_integer (yytext, 16, *loc);
d8d3f94a
PE
223 return INT;
224 }
e9955c83
AD
225
226 /* Characters. We don't check there is only one. */
3f2d73f1 227 "'" STRING_GROW; token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
e9955c83
AD
228
229 /* Strings. */
ca407bdf 230 "\"" token_start = loc->start; BEGIN SC_ESCAPED_STRING;
e9955c83
AD
231
232 /* Prologue. */
3f2d73f1 233 "%{" code_start = loc->start; BEGIN SC_PROLOGUE;
e9955c83
AD
234
235 /* Code in between braces. */
3f2d73f1
PE
236 "{" {
237 STRING_GROW;
238 braces_level = 0;
239 code_start = loc->start;
240 BEGIN SC_BRACED_CODE;
241 }
e9955c83
AD
242
243 /* A type. */
d8d3f94a 244 "<"{tag}">" {
223ff46e 245 obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
41141c56 246 STRING_FINISH;
223ff46e 247 val->uniqstr = uniqstr_new (last_string);
41141c56 248 STRING_FREE;
4cdb01db
AD
249 return TYPE;
250 }
251
a706a1cc
PE
252 "%%" {
253 static int percent_percent_count;
e9955c83 254 if (++percent_percent_count == 2)
a2bc9dbc 255 BEGIN SC_EPILOGUE;
e9955c83
AD
256 return PERCENT_PERCENT;
257 }
258
a706a1cc 259 . {
41141c56 260 complain_at (*loc, _("invalid character: %s"), quote (yytext));
3f2d73f1 261 }
379f0ac8
PE
262
263 <<EOF>> {
264 loc->start = loc->end = scanner_cursor;
265 yyterminate ();
266 }
3f2d73f1
PE
267}
268
269
270 /*-----------------------------------------------------------------.
271 | Scanning after an identifier, checking whether a colon is next. |
272 `-----------------------------------------------------------------*/
273
274<SC_AFTER_IDENTIFIER>
275{
276 ":" {
3f2d73f1
PE
277 *loc = id_loc;
278 BEGIN INITIAL;
279 return ID_COLON;
280 }
281 . {
282 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
283 yyless (0);
284 *loc = id_loc;
285 BEGIN INITIAL;
286 return ID;
287 }
288 <<EOF>> {
289 *loc = id_loc;
290 BEGIN INITIAL;
291 return ID;
e9955c83
AD
292 }
293}
294
295
d8d3f94a
PE
296 /*---------------------------------------------------------------.
297 | Scanning a Yacc comment. The initial `/ *' is already eaten. |
298 `---------------------------------------------------------------*/
e9955c83 299
d8d3f94a 300<SC_YACC_COMMENT>
e9955c83 301{
3f2d73f1 302 "*/" BEGIN context_state;
a706a1cc 303 .|\n ;
aa418041 304 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
d8d3f94a
PE
305}
306
307
308 /*------------------------------------------------------------.
309 | Scanning a C comment. The initial `/ *' is already eaten. |
310 `------------------------------------------------------------*/
311
312<SC_COMMENT>
313{
3f2d73f1 314 "*"{splice}"/" STRING_GROW; BEGIN context_state;
aa418041 315 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
e9955c83
AD
316}
317
318
d8d3f94a
PE
319 /*--------------------------------------------------------------.
320 | Scanning a line comment. The initial `//' is already eaten. |
321 `--------------------------------------------------------------*/
322
323<SC_LINE_COMMENT>
324{
3f2d73f1 325 "\n" STRING_GROW; BEGIN context_state;
41141c56 326 {splice} STRING_GROW;
3f2d73f1 327 <<EOF>> BEGIN context_state;
d8d3f94a
PE
328}
329
330
4febdd96
PE
331 /*------------------------------------------------.
332 | Scanning a Bison string, including its escapes. |
333 | The initial quote is already eaten. |
334 `------------------------------------------------*/
e9955c83
AD
335
336<SC_ESCAPED_STRING>
337{
47aee066
JD
338 "\""|"\n" {
339 if (yytext[0] == '\n')
340 unexpected_newline (token_start, "\"");
341 STRING_FINISH;
342 loc->start = token_start;
343 val->chars = last_string;
344 BEGIN INITIAL;
345 return STRING;
346 }
347 <<EOF>> {
348 unexpected_eof (token_start, "\"");
41141c56 349 STRING_FINISH;
3f2d73f1 350 loc->start = token_start;
223ff46e 351 val->chars = last_string;
a706a1cc 352 BEGIN INITIAL;
e9955c83
AD
353 return STRING;
354 }
e9955c83
AD
355}
356
4febdd96
PE
357 /*----------------------------------------------------------.
358 | Scanning a Bison character literal, decoding its escapes. |
359 | The initial quote is already eaten. |
360 `----------------------------------------------------------*/
e9955c83
AD
361
362<SC_ESCAPED_CHARACTER>
363{
47aee066
JD
364 "'"|"\n" {
365 if (yytext[0] == '\n')
366 unexpected_newline (token_start, "'");
41141c56
PE
367 STRING_GROW;
368 STRING_FINISH;
3f2d73f1 369 loc->start = token_start;
58d7a1a1 370 val->character = last_string[1];
41141c56 371 STRING_FREE;
a706a1cc 372 BEGIN INITIAL;
58d7a1a1 373 return CHAR;
e9955c83 374 }
47aee066
JD
375 <<EOF>> {
376 unexpected_eof (token_start, "'");
377 STRING_FINISH;
378 loc->start = token_start;
379 if (strlen(last_string) > 1)
380 val->character = last_string[1];
381 else
382 val->character = last_string[0];
383 STRING_FREE;
384 BEGIN INITIAL;
385 return CHAR;
386 }
4febdd96 387}
a706a1cc 388
4febdd96
PE
389<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING>
390{
92ac3705 391 \0 complain_at (*loc, _("invalid null character"));
e9955c83
AD
392}
393
394
395 /*----------------------------.
396 | Decode escaped characters. |
397 `----------------------------*/
398
399<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
400{
d8d3f94a 401 \\[0-7]{1,3} {
4517da37 402 unsigned long int c = strtoul (yytext + 1, NULL, 8);
d8d3f94a 403 if (UCHAR_MAX < c)
3f2d73f1 404 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
05ac60f3 405 else if (! c)
92ac3705 406 complain_at (*loc, _("invalid null character: %s"), quote (yytext));
e9955c83 407 else
223ff46e 408 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
409 }
410
6b0d38ab 411 \\x[0-9abcdefABCDEF]+ {
4517da37
PE
412 verify (UCHAR_MAX < ULONG_MAX);
413 unsigned long int c = strtoul (yytext + 2, NULL, 16);
414 if (UCHAR_MAX < c)
3f2d73f1 415 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
92ac3705
PE
416 else if (! c)
417 complain_at (*loc, _("invalid null character: %s"), quote (yytext));
d8d3f94a 418 else
223ff46e 419 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
420 }
421
223ff46e
PE
422 \\a obstack_1grow (&obstack_for_string, '\a');
423 \\b obstack_1grow (&obstack_for_string, '\b');
424 \\f obstack_1grow (&obstack_for_string, '\f');
425 \\n obstack_1grow (&obstack_for_string, '\n');
426 \\r obstack_1grow (&obstack_for_string, '\r');
427 \\t obstack_1grow (&obstack_for_string, '\t');
428 \\v obstack_1grow (&obstack_for_string, '\v');
412f8a59
PE
429
430 /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
223ff46e 431 \\("\""|"'"|"?"|"\\") obstack_1grow (&obstack_for_string, yytext[1]);
412f8a59 432
6b0d38ab 433 \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
d8d3f94a
PE
434 int c = convert_ucn_to_byte (yytext);
435 if (c < 0)
3f2d73f1 436 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
92ac3705
PE
437 else if (! c)
438 complain_at (*loc, _("invalid null character: %s"), quote (yytext));
d8d3f94a 439 else
223ff46e 440 obstack_1grow (&obstack_for_string, c);
d8d3f94a 441 }
4f25ebb0 442 \\(.|\n) {
3f2d73f1 443 complain_at (*loc, _("unrecognized escape sequence: %s"), quote (yytext));
41141c56 444 STRING_GROW;
e9955c83
AD
445 }
446}
447
4febdd96
PE
448 /*--------------------------------------------.
449 | Scanning user-code characters and strings. |
450 `--------------------------------------------*/
e9955c83 451
4febdd96
PE
452<SC_CHARACTER,SC_STRING>
453{
e9071366 454 {splice}|\\{splice}[^\n\[\]] STRING_GROW;
4febdd96 455}
e9955c83
AD
456
457<SC_CHARACTER>
458{
4febdd96
PE
459 "'" STRING_GROW; BEGIN context_state;
460 \n unexpected_newline (token_start, "'"); BEGIN context_state;
461 <<EOF>> unexpected_eof (token_start, "'"); BEGIN context_state;
e9955c83
AD
462}
463
e9955c83
AD
464<SC_STRING>
465{
4febdd96
PE
466 "\"" STRING_GROW; BEGIN context_state;
467 \n unexpected_newline (token_start, "\""); BEGIN context_state;
468 <<EOF>> unexpected_eof (token_start, "\""); BEGIN context_state;
e9955c83
AD
469}
470
471
472 /*---------------------------------------------------.
473 | Strings, comments etc. can be found in user code. |
474 `---------------------------------------------------*/
475
476<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
477{
3f2d73f1
PE
478 "'" {
479 STRING_GROW;
480 context_state = YY_START;
481 token_start = loc->start;
482 BEGIN SC_CHARACTER;
483 }
484 "\"" {
485 STRING_GROW;
486 context_state = YY_START;
487 token_start = loc->start;
488 BEGIN SC_STRING;
489 }
490 "/"{splice}"*" {
491 STRING_GROW;
492 context_state = YY_START;
493 token_start = loc->start;
494 BEGIN SC_COMMENT;
495 }
496 "/"{splice}"/" {
497 STRING_GROW;
498 context_state = YY_START;
499 BEGIN SC_LINE_COMMENT;
500 }
e9955c83
AD
501}
502
503
624a35e2 504
58d7a1a1
AD
505 /*-----------------------------------------------------------.
506 | Scanning some code in braces (actions). The initial "{" is |
507 | already eaten. |
508 `-----------------------------------------------------------*/
e9955c83
AD
509
510<SC_BRACED_CODE>
511{
41141c56
PE
512 "{"|"<"{splice}"%" STRING_GROW; braces_level++;
513 "%"{splice}">" STRING_GROW; braces_level--;
e9955c83 514 "}" {
25522739
PE
515 obstack_1grow (&obstack_for_string, '}');
516
2346344a
AD
517 --braces_level;
518 if (braces_level < 0)
e9955c83 519 {
41141c56 520 STRING_FINISH;
3f2d73f1 521 loc->start = code_start;
eb095650 522 val->code = last_string;
a706a1cc 523 BEGIN INITIAL;
58d7a1a1 524 return BRACED_CODE;
e9955c83
AD
525 }
526 }
527
a706a1cc
PE
528 /* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly
529 (as `<' `<%'). */
41141c56 530 "<"{splice}"<" STRING_GROW;
a706a1cc 531
47aee066
JD
532 <<EOF>> {
533 unexpected_eof (code_start, "}");
534 STRING_FINISH;
535 loc->start = code_start;
eb095650 536 val->code = last_string;
47aee066
JD
537 BEGIN INITIAL;
538 return BRACED_CODE;
539 }
e9955c83
AD
540}
541
542
543 /*--------------------------------------------------------------.
544 | Scanning some prologue: from "%{" (already scanned) to "%}". |
545 `--------------------------------------------------------------*/
546
547<SC_PROLOGUE>
548{
549 "%}" {
41141c56 550 STRING_FINISH;
3f2d73f1 551 loc->start = code_start;
223ff46e 552 val->chars = last_string;
a706a1cc 553 BEGIN INITIAL;
e9955c83
AD
554 return PROLOGUE;
555 }
556
47aee066
JD
557 <<EOF>> {
558 unexpected_eof (code_start, "%}");
559 STRING_FINISH;
560 loc->start = code_start;
561 val->chars = last_string;
562 BEGIN INITIAL;
563 return PROLOGUE;
564 }
e9955c83
AD
565}
566
567
568 /*---------------------------------------------------------------.
569 | Scanning the epilogue (everything after the second "%%", which |
d8d3f94a 570 | has already been eaten). |
e9955c83
AD
571 `---------------------------------------------------------------*/
572
573<SC_EPILOGUE>
574{
e9955c83 575 <<EOF>> {
41141c56 576 STRING_FINISH;
3f2d73f1 577 loc->start = code_start;
223ff46e 578 val->chars = last_string;
a706a1cc 579 BEGIN INITIAL;
e9955c83
AD
580 return EPILOGUE;
581 }
582}
583
584
4febdd96
PE
585 /*-----------------------------------------------------.
586 | By default, grow the string obstack with the input. |
587 `-----------------------------------------------------*/
588
589<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>. |
590<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>\n STRING_GROW;
591
e9955c83
AD
592%%
593
6c30d641
PE
594/* Read bytes from FP into buffer BUF of size SIZE. Return the
595 number of bytes read. Remove '\r' from input, treating \r\n
596 and isolated \r as \n. */
597
598static size_t
599no_cr_read (FILE *fp, char *buf, size_t size)
600{
a737b216
PE
601 size_t bytes_read = fread (buf, 1, size, fp);
602 if (bytes_read)
6c30d641 603 {
a737b216 604 char *w = memchr (buf, '\r', bytes_read);
6c30d641
PE
605 if (w)
606 {
607 char const *r = ++w;
a737b216 608 char const *lim = buf + bytes_read;
6c30d641
PE
609
610 for (;;)
611 {
612 /* Found an '\r'. Treat it like '\n', but ignore any
613 '\n' that immediately follows. */
614 w[-1] = '\n';
615 if (r == lim)
616 {
617 int ch = getc (fp);
618 if (ch != '\n' && ungetc (ch, fp) != ch)
619 break;
620 }
621 else if (*r == '\n')
622 r++;
623
624 /* Copy until the next '\r'. */
625 do
626 {
627 if (r == lim)
628 return w - buf;
629 }
630 while ((*w++ = *r++) != '\r');
631 }
632
633 return w - buf;
634 }
635 }
636
a737b216 637 return bytes_read;
6c30d641
PE
638}
639
640
f25bfb75 641
1452af69
PE
642/*------------------------------------------------------.
643| Scan NUMBER for a base-BASE integer at location LOC. |
644`------------------------------------------------------*/
645
646static unsigned long int
647scan_integer (char const *number, int base, location loc)
648{
4517da37
PE
649 verify (INT_MAX < ULONG_MAX);
650 unsigned long int num = strtoul (number, NULL, base);
651
652 if (INT_MAX < num)
1452af69
PE
653 {
654 complain_at (loc, _("integer out of range: %s"), quote (number));
655 num = INT_MAX;
656 }
4517da37 657
1452af69
PE
658 return num;
659}
660
661
d8d3f94a
PE
662/*------------------------------------------------------------------.
663| Convert universal character name UCN to a single-byte character, |
664| and return that character. Return -1 if UCN does not correspond |
665| to a single-byte character. |
666`------------------------------------------------------------------*/
667
668static int
669convert_ucn_to_byte (char const *ucn)
670{
4517da37
PE
671 verify (UCHAR_MAX <= INT_MAX);
672 unsigned long int code = strtoul (ucn + 2, NULL, 16);
d8d3f94a
PE
673
674 /* FIXME: Currently we assume Unicode-compatible unibyte characters
675 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
676 non-ASCII hosts we support only the portable C character set.
677 These limitations should be removed once we add support for
678 multibyte characters. */
679
680 if (UCHAR_MAX < code)
681 return -1;
682
683#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
684 {
685 /* A non-ASCII host. Use CODE to index into a table of the C
686 basic execution character set, which is guaranteed to exist on
687 all Standard C platforms. This table also includes '$', '@',
8e6ef483 688 and '`', which are not in the basic execution character set but
d8d3f94a
PE
689 which are unibyte characters on all the platforms that we know
690 about. */
691 static signed char const table[] =
692 {
693 '\0', -1, -1, -1, -1, -1, -1, '\a',
694 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
695 -1, -1, -1, -1, -1, -1, -1, -1,
696 -1, -1, -1, -1, -1, -1, -1, -1,
697 ' ', '!', '"', '#', '$', '%', '&', '\'',
698 '(', ')', '*', '+', ',', '-', '.', '/',
699 '0', '1', '2', '3', '4', '5', '6', '7',
700 '8', '9', ':', ';', '<', '=', '>', '?',
701 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
702 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
703 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
704 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
705 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
706 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
707 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
708 'x', 'y', 'z', '{', '|', '}', '~'
709 };
710
711 code = code < sizeof table ? table[code] : -1;
712 }
713#endif
c4d720cd 714
d8d3f94a
PE
715 return code;
716}
717
718
900c5db5
AD
719/*----------------------------------------------------------------.
720| Handle `#line INT "FILE"'. ARGS has already skipped `#line '. |
721`----------------------------------------------------------------*/
722
723static void
4517da37 724handle_syncline (char *args, location loc)
900c5db5 725{
4517da37
PE
726 char *after_num;
727 unsigned long int lineno = strtoul (args, &after_num, 10);
728 char *file = strchr (after_num, '"') + 1;
729 *strchr (file, '"') = '\0';
730 if (INT_MAX <= lineno)
731 {
732 warn_at (loc, _("line number overflow"));
733 lineno = INT_MAX;
734 }
e9071366 735 current_file = uniqstr_new (file);
0c8e079f 736 boundary_set (&scanner_cursor, current_file, lineno, 1);
4517da37
PE
737}
738
739
4febdd96
PE
740/*----------------------------------------------------------------.
741| For a token or comment starting at START, report message MSGID, |
742| which should say that an end marker was found before |
743| the expected TOKEN_END. |
744`----------------------------------------------------------------*/
745
746static void
747unexpected_end (boundary start, char const *msgid, char const *token_end)
748{
749 location loc;
750 loc.start = start;
751 loc.end = scanner_cursor;
752 complain_at (loc, _(msgid), token_end);
753}
754
755
3f2d73f1
PE
756/*------------------------------------------------------------------------.
757| Report an unexpected EOF in a token or comment starting at START. |
758| An end of file was encountered and the expected TOKEN_END was missing. |
3f2d73f1 759`------------------------------------------------------------------------*/
a706a1cc
PE
760
761static void
aa418041 762unexpected_eof (boundary start, char const *token_end)
a706a1cc 763{
4febdd96
PE
764 unexpected_end (start, N_("missing `%s' at end of file"), token_end);
765}
766
767
768/*----------------------------------------.
769| Likewise, but for unexpected newlines. |
770`----------------------------------------*/
771
772static void
773unexpected_newline (boundary start, char const *token_end)
774{
775 unexpected_end (start, N_("missing `%s' at end of line"), token_end);
a706a1cc
PE
776}
777
778
f25bfb75
AD
779/*-------------------------.
780| Initialize the scanner. |
781`-------------------------*/
782
1d6412ad 783void
e9071366 784gram_scanner_initialize (void)
1d6412ad 785{
223ff46e 786 obstack_init (&obstack_for_string);
1d6412ad
AD
787}
788
789
f25bfb75
AD
790/*-----------------------------------------------.
791| Free all the memory allocated to the scanner. |
792`-----------------------------------------------*/
793
4cdb01db 794void
e9071366 795gram_scanner_free (void)
4cdb01db 796{
223ff46e 797 obstack_free (&obstack_for_string, 0);
536545f3 798 /* Reclaim Flex's buffers. */
580b8926 799 yylex_destroy ();
4cdb01db 800}