]> git.saurik.com Git - bison.git/blame - src/scan-gram.l
Add copyright updates missed during previous cherry pick.
[bison.git] / src / scan-gram.l
CommitLineData
e9955c83 1/* Bison Grammar Scanner -*- C -*-
3b1e470c 2
6789b8bd
JD
3 Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free
4 Software Foundation, Inc.
e9955c83
AD
5
6 This file is part of Bison, the GNU Compiler Compiler.
7
f16b0819 8 This program is free software: you can redistribute it and/or modify
e9955c83 9 it under the terms of the GNU General Public License as published by
f16b0819 10 the Free Software Foundation, either version 3 of the License, or
e9955c83
AD
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
f16b0819 19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
e9955c83 20
aa418041 21%option debug nodefault nounput noyywrap never-interactive
e9955c83
AD
22%option prefix="gram_" outfile="lex.yy.c"
23
24%{
4f6e011e
PE
25/* Work around a bug in flex 2.5.31. See Debian bug 333231
26 <http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=333231>. */
27#undef gram_wrap
28#define gram_wrap() 1
29
e9071366
AD
30#define FLEX_PREFIX(Id) gram_ ## Id
31#include "flex-scanner.h"
223ff46e 32
e9955c83 33#include "complain.h"
3f2d73f1 34#include "files.h"
e9955c83 35#include "gram.h"
ca407bdf 36#include "quotearg.h"
e9955c83 37#include "reader.h"
223ff46e 38#include "uniqstr.h"
e9955c83 39
e9071366
AD
40#include <mbswidth.h>
41#include <quote.h>
42
43#include "scan-gram.h"
44
45#define YY_DECL GRAM_LEX_DECL
2346344a 46
3f2d73f1 47#define YY_USER_INIT \
e9071366 48 code_start = scanner_cursor = loc->start; \
dc9701e8 49
3f2d73f1 50/* Location of scanner cursor. */
4a678af8 51static boundary scanner_cursor;
41141c56 52
e9071366 53#define YY_USER_ACTION location_compute (loc, &scanner_cursor, yytext, yyleng);
d8d3f94a 54
6c30d641 55static size_t no_cr_read (FILE *, char *, size_t);
d8d3f94a
PE
56#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
57
7ec2d4cd 58/* A string representing the most recently saved token. */
7c0c6181 59static char *last_string;
7ec2d4cd 60
7ec2d4cd 61void
e9071366 62gram_scanner_last_string_free (void)
7ec2d4cd 63{
41141c56 64 STRING_FREE;
7ec2d4cd 65}
e9955c83 66
4517da37 67static void handle_syncline (char *, location);
1452af69 68static unsigned long int scan_integer (char const *p, int base, location loc);
d8d3f94a 69static int convert_ucn_to_byte (char const *hex_text);
aa418041 70static void unexpected_eof (boundary, char const *);
4febdd96 71static void unexpected_newline (boundary, char const *);
e9955c83
AD
72
73%}
e9071366
AD
74 /* A C-like comment in directives/rules. */
75%x SC_YACC_COMMENT
76 /* Strings and characters in directives/rules. */
e9955c83 77%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
e9071366
AD
78 /* A identifier was just read in directives/rules. Special state
79 to capture the sequence `identifier :'. */
80%x SC_AFTER_IDENTIFIER
e9071366
AD
81
82 /* Three types of user code:
83 - prologue (code between `%{' `%}' in the first section, before %%);
84 - actions, printers, union, etc, (between braced in the middle section);
85 - epilogue (everything after the second %%). */
86%x SC_PROLOGUE SC_BRACED_CODE SC_EPILOGUE
87 /* C and C++ comments in code. */
88%x SC_COMMENT SC_LINE_COMMENT
89 /* Strings and characters in code. */
90%x SC_STRING SC_CHARACTER
e9955c83 91
29c01725 92letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
663ce7bb
AD
93id {letter}({letter}|[0-9]|-)*
94directive %{id}
624a35e2 95int [0-9]+
d8d3f94a
PE
96
97/* POSIX says that a tag must be both an id and a C union member, but
98 historically almost any character is allowed in a tag. We disallow
99 NUL and newline, as this simplifies our implementation. */
100tag [^\0\n>]+
101
102/* Zero or more instances of backslash-newline. Following GCC, allow
103 white space between the backslash and the newline. */
104splice (\\[ \f\t\v]*\n)*
e9955c83
AD
105
106%%
107%{
a706a1cc 108 /* Nesting level of the current code in braces. */
5362ed19 109 int braces_level IF_LINT (= 0);
1a9e39f1 110
3f2d73f1 111 /* Parent context state, when applicable. */
5362ed19 112 int context_state IF_LINT (= 0);
a706a1cc 113
3f2d73f1 114 /* Location of most recent identifier, when applicable. */
a2bc9dbc 115 location id_loc IF_LINT (= empty_location);
3f2d73f1 116
a2bc9dbc
PE
117 /* Where containing code started, when applicable. Its initial
118 value is relevant only when yylex is invoked in the SC_EPILOGUE
119 start condition. */
120 boundary code_start = scanner_cursor;
3f2d73f1 121
223ff46e
PE
122 /* Where containing comment or string or character literal started,
123 when applicable. */
a2bc9dbc 124 boundary token_start IF_LINT (= scanner_cursor);
e9955c83
AD
125%}
126
127
3f2d73f1
PE
128 /*-----------------------.
129 | Scanning white space. |
130 `-----------------------*/
131
58d7a1a1 132<INITIAL,SC_AFTER_IDENTIFIER>
3f2d73f1 133{
4febdd96 134 /* Comments and white space. */
83adb046 135 "," warn_at (*loc, _("stray `,' treated as white space"));
4febdd96 136 [ \f\n\t\v] |
3f2d73f1 137 "//".* ;
83adb046
PE
138 "/*" {
139 token_start = loc->start;
140 context_state = YY_START;
141 BEGIN SC_YACC_COMMENT;
142 }
3f2d73f1
PE
143
144 /* #line directives are not documented, and may be withdrawn or
145 modified in future versions of Bison. */
146 ^"#line "{int}" \"".*"\"\n" {
4517da37 147 handle_syncline (yytext + sizeof "#line " - 1, *loc);
3f2d73f1
PE
148 }
149}
150
151
e9955c83
AD
152 /*----------------------------.
153 | Scanning Bison directives. |
154 `----------------------------*/
72183df4
DJ
155
156 /* For directives that are also command line options, the regex must be
157 "%..."
158 after "[-_]"s are removed, and the directive must match the --long
159 option name, with a single string argument. Otherwise, add exceptions
160 to ../build-aux/cross-options.pl. */
161
e9955c83
AD
162<INITIAL>
163{
58d7a1a1 164 "%binary" return PERCENT_NONASSOC;
136a0f76 165 "%code" return PERCENT_CODE;
58d7a1a1
AD
166 "%debug" return PERCENT_DEBUG;
167 "%default"[-_]"prec" return PERCENT_DEFAULT_PREC;
168 "%define" return PERCENT_DEFINE;
169 "%defines" return PERCENT_DEFINES;
170 "%destructor" return PERCENT_DESTRUCTOR;
171 "%dprec" return PERCENT_DPREC;
172 "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
173 "%expect" return PERCENT_EXPECT;
174 "%expect"[-_]"rr" return PERCENT_EXPECT_RR;
175 "%file-prefix" return PERCENT_FILE_PREFIX;
e9955c83 176 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
58d7a1a1
AD
177 "%initial-action" return PERCENT_INITIAL_ACTION;
178 "%glr-parser" return PERCENT_GLR_PARSER;
0e021770 179 "%language" return PERCENT_LANGUAGE;
58d7a1a1
AD
180 "%left" return PERCENT_LEFT;
181 "%lex-param" return PERCENT_LEX_PARAM;
182 "%locations" return PERCENT_LOCATIONS;
183 "%merge" return PERCENT_MERGE;
184 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
185 "%no"[-_]"default"[-_]"prec" return PERCENT_NO_DEFAULT_PREC;
186 "%no"[-_]"lines" return PERCENT_NO_LINES;
187 "%nonassoc" return PERCENT_NONASSOC;
188 "%nondeterministic-parser" return PERCENT_NONDETERMINISTIC_PARSER;
189 "%nterm" return PERCENT_NTERM;
190 "%output" return PERCENT_OUTPUT;
191 "%parse-param" return PERCENT_PARSE_PARAM;
192 "%prec" return PERCENT_PREC;
193 "%printer" return PERCENT_PRINTER;
194 "%pure"[-_]"parser" return PERCENT_PURE_PARSER;
195 "%require" return PERCENT_REQUIRE;
196 "%right" return PERCENT_RIGHT;
197 "%skeleton" return PERCENT_SKELETON;
198 "%start" return PERCENT_START;
199 "%term" return PERCENT_TOKEN;
200 "%token" return PERCENT_TOKEN;
201 "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
202 "%type" return PERCENT_TYPE;
203 "%union" return PERCENT_UNION;
204 "%verbose" return PERCENT_VERBOSE;
205 "%yacc" return PERCENT_YACC;
e9955c83 206
3f2d73f1 207 {directive} {
41141c56 208 complain_at (*loc, _("invalid directive: %s"), quote (yytext));
412f8a59 209 }
900c5db5 210
e9955c83 211 "=" return EQUAL;
e9071366 212 "|" return PIPE;
e9955c83 213 ";" return SEMICOLON;
12e35840 214 "<*>" return TYPE_TAG_ANY;
3ebecc24 215 "<>" return TYPE_TAG_NONE;
e9955c83 216
3f2d73f1 217 {id} {
58d7a1a1 218 val->uniqstr = uniqstr_new (yytext);
3f2d73f1 219 id_loc = *loc;
3f2d73f1 220 BEGIN SC_AFTER_IDENTIFIER;
e9955c83
AD
221 }
222
d8d3f94a 223 {int} {
1452af69
PE
224 val->integer = scan_integer (yytext, 10, *loc);
225 return INT;
226 }
227 0[xX][0-9abcdefABCDEF]+ {
228 val->integer = scan_integer (yytext, 16, *loc);
d8d3f94a
PE
229 return INT;
230 }
e9955c83
AD
231
232 /* Characters. We don't check there is only one. */
3f2d73f1 233 "'" STRING_GROW; token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
e9955c83
AD
234
235 /* Strings. */
ca407bdf 236 "\"" token_start = loc->start; BEGIN SC_ESCAPED_STRING;
e9955c83
AD
237
238 /* Prologue. */
3f2d73f1 239 "%{" code_start = loc->start; BEGIN SC_PROLOGUE;
e9955c83
AD
240
241 /* Code in between braces. */
3f2d73f1
PE
242 "{" {
243 STRING_GROW;
244 braces_level = 0;
245 code_start = loc->start;
246 BEGIN SC_BRACED_CODE;
247 }
e9955c83
AD
248
249 /* A type. */
d8d3f94a 250 "<"{tag}">" {
223ff46e 251 obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
41141c56 252 STRING_FINISH;
223ff46e 253 val->uniqstr = uniqstr_new (last_string);
41141c56 254 STRING_FREE;
4cdb01db
AD
255 return TYPE;
256 }
257
a706a1cc
PE
258 "%%" {
259 static int percent_percent_count;
e9955c83 260 if (++percent_percent_count == 2)
a2bc9dbc 261 BEGIN SC_EPILOGUE;
e9955c83
AD
262 return PERCENT_PERCENT;
263 }
264
a706a1cc 265 . {
41141c56 266 complain_at (*loc, _("invalid character: %s"), quote (yytext));
3f2d73f1 267 }
379f0ac8
PE
268
269 <<EOF>> {
270 loc->start = loc->end = scanner_cursor;
271 yyterminate ();
272 }
3f2d73f1
PE
273}
274
275
276 /*-----------------------------------------------------------------.
277 | Scanning after an identifier, checking whether a colon is next. |
278 `-----------------------------------------------------------------*/
279
280<SC_AFTER_IDENTIFIER>
281{
282 ":" {
3f2d73f1
PE
283 *loc = id_loc;
284 BEGIN INITIAL;
285 return ID_COLON;
286 }
287 . {
288 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
289 yyless (0);
290 *loc = id_loc;
291 BEGIN INITIAL;
292 return ID;
293 }
294 <<EOF>> {
295 *loc = id_loc;
296 BEGIN INITIAL;
297 return ID;
e9955c83
AD
298 }
299}
300
301
d8d3f94a
PE
302 /*---------------------------------------------------------------.
303 | Scanning a Yacc comment. The initial `/ *' is already eaten. |
304 `---------------------------------------------------------------*/
e9955c83 305
d8d3f94a 306<SC_YACC_COMMENT>
e9955c83 307{
3f2d73f1 308 "*/" BEGIN context_state;
a706a1cc 309 .|\n ;
aa418041 310 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
d8d3f94a
PE
311}
312
313
314 /*------------------------------------------------------------.
315 | Scanning a C comment. The initial `/ *' is already eaten. |
316 `------------------------------------------------------------*/
317
318<SC_COMMENT>
319{
3f2d73f1 320 "*"{splice}"/" STRING_GROW; BEGIN context_state;
aa418041 321 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
e9955c83
AD
322}
323
324
d8d3f94a
PE
325 /*--------------------------------------------------------------.
326 | Scanning a line comment. The initial `//' is already eaten. |
327 `--------------------------------------------------------------*/
328
329<SC_LINE_COMMENT>
330{
3f2d73f1 331 "\n" STRING_GROW; BEGIN context_state;
41141c56 332 {splice} STRING_GROW;
3f2d73f1 333 <<EOF>> BEGIN context_state;
d8d3f94a
PE
334}
335
336
4febdd96
PE
337 /*------------------------------------------------.
338 | Scanning a Bison string, including its escapes. |
339 | The initial quote is already eaten. |
340 `------------------------------------------------*/
e9955c83
AD
341
342<SC_ESCAPED_STRING>
343{
47aee066
JD
344 "\""|"\n" {
345 if (yytext[0] == '\n')
346 unexpected_newline (token_start, "\"");
347 STRING_FINISH;
348 loc->start = token_start;
349 val->chars = last_string;
350 BEGIN INITIAL;
351 return STRING;
352 }
353 <<EOF>> {
354 unexpected_eof (token_start, "\"");
41141c56 355 STRING_FINISH;
3f2d73f1 356 loc->start = token_start;
223ff46e 357 val->chars = last_string;
a706a1cc 358 BEGIN INITIAL;
e9955c83
AD
359 return STRING;
360 }
e9955c83
AD
361}
362
4febdd96
PE
363 /*----------------------------------------------------------.
364 | Scanning a Bison character literal, decoding its escapes. |
365 | The initial quote is already eaten. |
366 `----------------------------------------------------------*/
e9955c83
AD
367
368<SC_ESCAPED_CHARACTER>
369{
47aee066
JD
370 "'"|"\n" {
371 if (yytext[0] == '\n')
372 unexpected_newline (token_start, "'");
41141c56
PE
373 STRING_GROW;
374 STRING_FINISH;
3f2d73f1 375 loc->start = token_start;
58d7a1a1 376 val->character = last_string[1];
41141c56 377 STRING_FREE;
a706a1cc 378 BEGIN INITIAL;
58d7a1a1 379 return CHAR;
e9955c83 380 }
47aee066
JD
381 <<EOF>> {
382 unexpected_eof (token_start, "'");
383 STRING_FINISH;
384 loc->start = token_start;
663ce7bb 385 if (strlen (last_string) > 1)
47aee066
JD
386 val->character = last_string[1];
387 else
388 val->character = last_string[0];
389 STRING_FREE;
390 BEGIN INITIAL;
391 return CHAR;
392 }
4febdd96 393}
a706a1cc 394
4febdd96
PE
395<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING>
396{
92ac3705 397 \0 complain_at (*loc, _("invalid null character"));
e9955c83
AD
398}
399
400
401 /*----------------------------.
402 | Decode escaped characters. |
403 `----------------------------*/
404
405<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
406{
d8d3f94a 407 \\[0-7]{1,3} {
4517da37 408 unsigned long int c = strtoul (yytext + 1, NULL, 8);
d8d3f94a 409 if (UCHAR_MAX < c)
3f2d73f1 410 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
05ac60f3 411 else if (! c)
92ac3705 412 complain_at (*loc, _("invalid null character: %s"), quote (yytext));
e9955c83 413 else
223ff46e 414 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
415 }
416
6b0d38ab 417 \\x[0-9abcdefABCDEF]+ {
4517da37
PE
418 verify (UCHAR_MAX < ULONG_MAX);
419 unsigned long int c = strtoul (yytext + 2, NULL, 16);
420 if (UCHAR_MAX < c)
3f2d73f1 421 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
92ac3705
PE
422 else if (! c)
423 complain_at (*loc, _("invalid null character: %s"), quote (yytext));
d8d3f94a 424 else
223ff46e 425 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
426 }
427
223ff46e
PE
428 \\a obstack_1grow (&obstack_for_string, '\a');
429 \\b obstack_1grow (&obstack_for_string, '\b');
430 \\f obstack_1grow (&obstack_for_string, '\f');
431 \\n obstack_1grow (&obstack_for_string, '\n');
432 \\r obstack_1grow (&obstack_for_string, '\r');
433 \\t obstack_1grow (&obstack_for_string, '\t');
434 \\v obstack_1grow (&obstack_for_string, '\v');
412f8a59
PE
435
436 /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
223ff46e 437 \\("\""|"'"|"?"|"\\") obstack_1grow (&obstack_for_string, yytext[1]);
412f8a59 438
6b0d38ab 439 \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
d8d3f94a
PE
440 int c = convert_ucn_to_byte (yytext);
441 if (c < 0)
3f2d73f1 442 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
92ac3705
PE
443 else if (! c)
444 complain_at (*loc, _("invalid null character: %s"), quote (yytext));
d8d3f94a 445 else
223ff46e 446 obstack_1grow (&obstack_for_string, c);
d8d3f94a 447 }
4f25ebb0 448 \\(.|\n) {
3f2d73f1 449 complain_at (*loc, _("unrecognized escape sequence: %s"), quote (yytext));
41141c56 450 STRING_GROW;
e9955c83
AD
451 }
452}
453
4febdd96
PE
454 /*--------------------------------------------.
455 | Scanning user-code characters and strings. |
456 `--------------------------------------------*/
e9955c83 457
4febdd96
PE
458<SC_CHARACTER,SC_STRING>
459{
e9071366 460 {splice}|\\{splice}[^\n\[\]] STRING_GROW;
4febdd96 461}
e9955c83
AD
462
463<SC_CHARACTER>
464{
4febdd96
PE
465 "'" STRING_GROW; BEGIN context_state;
466 \n unexpected_newline (token_start, "'"); BEGIN context_state;
467 <<EOF>> unexpected_eof (token_start, "'"); BEGIN context_state;
e9955c83
AD
468}
469
e9955c83
AD
470<SC_STRING>
471{
4febdd96
PE
472 "\"" STRING_GROW; BEGIN context_state;
473 \n unexpected_newline (token_start, "\""); BEGIN context_state;
474 <<EOF>> unexpected_eof (token_start, "\""); BEGIN context_state;
e9955c83
AD
475}
476
477
478 /*---------------------------------------------------.
479 | Strings, comments etc. can be found in user code. |
480 `---------------------------------------------------*/
481
482<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
483{
3f2d73f1
PE
484 "'" {
485 STRING_GROW;
486 context_state = YY_START;
487 token_start = loc->start;
488 BEGIN SC_CHARACTER;
489 }
490 "\"" {
491 STRING_GROW;
492 context_state = YY_START;
493 token_start = loc->start;
494 BEGIN SC_STRING;
495 }
496 "/"{splice}"*" {
497 STRING_GROW;
498 context_state = YY_START;
499 token_start = loc->start;
500 BEGIN SC_COMMENT;
501 }
502 "/"{splice}"/" {
503 STRING_GROW;
504 context_state = YY_START;
505 BEGIN SC_LINE_COMMENT;
506 }
e9955c83
AD
507}
508
509
624a35e2 510
58d7a1a1
AD
511 /*-----------------------------------------------------------.
512 | Scanning some code in braces (actions). The initial "{" is |
513 | already eaten. |
514 `-----------------------------------------------------------*/
e9955c83
AD
515
516<SC_BRACED_CODE>
517{
41141c56
PE
518 "{"|"<"{splice}"%" STRING_GROW; braces_level++;
519 "%"{splice}">" STRING_GROW; braces_level--;
e9955c83 520 "}" {
25522739
PE
521 obstack_1grow (&obstack_for_string, '}');
522
2346344a
AD
523 --braces_level;
524 if (braces_level < 0)
e9955c83 525 {
41141c56 526 STRING_FINISH;
3f2d73f1 527 loc->start = code_start;
eb095650 528 val->code = last_string;
a706a1cc 529 BEGIN INITIAL;
58d7a1a1 530 return BRACED_CODE;
e9955c83
AD
531 }
532 }
533
a706a1cc
PE
534 /* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly
535 (as `<' `<%'). */
41141c56 536 "<"{splice}"<" STRING_GROW;
a706a1cc 537
47aee066
JD
538 <<EOF>> {
539 unexpected_eof (code_start, "}");
540 STRING_FINISH;
541 loc->start = code_start;
eb095650 542 val->code = last_string;
47aee066
JD
543 BEGIN INITIAL;
544 return BRACED_CODE;
545 }
e9955c83
AD
546}
547
548
549 /*--------------------------------------------------------------.
550 | Scanning some prologue: from "%{" (already scanned) to "%}". |
551 `--------------------------------------------------------------*/
552
553<SC_PROLOGUE>
554{
555 "%}" {
41141c56 556 STRING_FINISH;
3f2d73f1 557 loc->start = code_start;
223ff46e 558 val->chars = last_string;
a706a1cc 559 BEGIN INITIAL;
e9955c83
AD
560 return PROLOGUE;
561 }
562
47aee066
JD
563 <<EOF>> {
564 unexpected_eof (code_start, "%}");
565 STRING_FINISH;
566 loc->start = code_start;
567 val->chars = last_string;
568 BEGIN INITIAL;
569 return PROLOGUE;
570 }
e9955c83
AD
571}
572
573
574 /*---------------------------------------------------------------.
575 | Scanning the epilogue (everything after the second "%%", which |
d8d3f94a 576 | has already been eaten). |
e9955c83
AD
577 `---------------------------------------------------------------*/
578
579<SC_EPILOGUE>
580{
e9955c83 581 <<EOF>> {
41141c56 582 STRING_FINISH;
3f2d73f1 583 loc->start = code_start;
223ff46e 584 val->chars = last_string;
a706a1cc 585 BEGIN INITIAL;
e9955c83
AD
586 return EPILOGUE;
587 }
588}
589
590
4febdd96
PE
591 /*-----------------------------------------------------.
592 | By default, grow the string obstack with the input. |
593 `-----------------------------------------------------*/
594
595<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>. |
596<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>\n STRING_GROW;
597
e9955c83
AD
598%%
599
6c30d641
PE
600/* Read bytes from FP into buffer BUF of size SIZE. Return the
601 number of bytes read. Remove '\r' from input, treating \r\n
602 and isolated \r as \n. */
603
604static size_t
605no_cr_read (FILE *fp, char *buf, size_t size)
606{
a737b216
PE
607 size_t bytes_read = fread (buf, 1, size, fp);
608 if (bytes_read)
6c30d641 609 {
a737b216 610 char *w = memchr (buf, '\r', bytes_read);
6c30d641
PE
611 if (w)
612 {
613 char const *r = ++w;
a737b216 614 char const *lim = buf + bytes_read;
6c30d641
PE
615
616 for (;;)
617 {
618 /* Found an '\r'. Treat it like '\n', but ignore any
619 '\n' that immediately follows. */
620 w[-1] = '\n';
621 if (r == lim)
622 {
623 int ch = getc (fp);
624 if (ch != '\n' && ungetc (ch, fp) != ch)
625 break;
626 }
627 else if (*r == '\n')
628 r++;
629
630 /* Copy until the next '\r'. */
631 do
632 {
633 if (r == lim)
634 return w - buf;
635 }
636 while ((*w++ = *r++) != '\r');
637 }
638
639 return w - buf;
640 }
641 }
642
a737b216 643 return bytes_read;
6c30d641
PE
644}
645
646
f25bfb75 647
1452af69
PE
648/*------------------------------------------------------.
649| Scan NUMBER for a base-BASE integer at location LOC. |
650`------------------------------------------------------*/
651
652static unsigned long int
653scan_integer (char const *number, int base, location loc)
654{
4517da37
PE
655 verify (INT_MAX < ULONG_MAX);
656 unsigned long int num = strtoul (number, NULL, base);
657
658 if (INT_MAX < num)
1452af69
PE
659 {
660 complain_at (loc, _("integer out of range: %s"), quote (number));
661 num = INT_MAX;
662 }
4517da37 663
1452af69
PE
664 return num;
665}
666
667
d8d3f94a
PE
668/*------------------------------------------------------------------.
669| Convert universal character name UCN to a single-byte character, |
670| and return that character. Return -1 if UCN does not correspond |
671| to a single-byte character. |
672`------------------------------------------------------------------*/
673
674static int
675convert_ucn_to_byte (char const *ucn)
676{
4517da37
PE
677 verify (UCHAR_MAX <= INT_MAX);
678 unsigned long int code = strtoul (ucn + 2, NULL, 16);
d8d3f94a
PE
679
680 /* FIXME: Currently we assume Unicode-compatible unibyte characters
681 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
682 non-ASCII hosts we support only the portable C character set.
683 These limitations should be removed once we add support for
684 multibyte characters. */
685
686 if (UCHAR_MAX < code)
687 return -1;
688
689#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
690 {
691 /* A non-ASCII host. Use CODE to index into a table of the C
692 basic execution character set, which is guaranteed to exist on
693 all Standard C platforms. This table also includes '$', '@',
8e6ef483 694 and '`', which are not in the basic execution character set but
d8d3f94a
PE
695 which are unibyte characters on all the platforms that we know
696 about. */
697 static signed char const table[] =
698 {
699 '\0', -1, -1, -1, -1, -1, -1, '\a',
700 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
701 -1, -1, -1, -1, -1, -1, -1, -1,
702 -1, -1, -1, -1, -1, -1, -1, -1,
703 ' ', '!', '"', '#', '$', '%', '&', '\'',
704 '(', ')', '*', '+', ',', '-', '.', '/',
705 '0', '1', '2', '3', '4', '5', '6', '7',
706 '8', '9', ':', ';', '<', '=', '>', '?',
707 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
708 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
709 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
710 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
711 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
712 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
713 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
714 'x', 'y', 'z', '{', '|', '}', '~'
715 };
716
717 code = code < sizeof table ? table[code] : -1;
718 }
719#endif
c4d720cd 720
d8d3f94a
PE
721 return code;
722}
723
724
900c5db5
AD
725/*----------------------------------------------------------------.
726| Handle `#line INT "FILE"'. ARGS has already skipped `#line '. |
727`----------------------------------------------------------------*/
728
729static void
4517da37 730handle_syncline (char *args, location loc)
900c5db5 731{
4517da37
PE
732 char *after_num;
733 unsigned long int lineno = strtoul (args, &after_num, 10);
734 char *file = strchr (after_num, '"') + 1;
735 *strchr (file, '"') = '\0';
736 if (INT_MAX <= lineno)
737 {
738 warn_at (loc, _("line number overflow"));
739 lineno = INT_MAX;
740 }
e9071366 741 current_file = uniqstr_new (file);
0c8e079f 742 boundary_set (&scanner_cursor, current_file, lineno, 1);
4517da37
PE
743}
744
745
4febdd96
PE
746/*----------------------------------------------------------------.
747| For a token or comment starting at START, report message MSGID, |
748| which should say that an end marker was found before |
749| the expected TOKEN_END. |
750`----------------------------------------------------------------*/
751
752static void
753unexpected_end (boundary start, char const *msgid, char const *token_end)
754{
755 location loc;
756 loc.start = start;
757 loc.end = scanner_cursor;
758 complain_at (loc, _(msgid), token_end);
759}
760
761
3f2d73f1
PE
762/*------------------------------------------------------------------------.
763| Report an unexpected EOF in a token or comment starting at START. |
764| An end of file was encountered and the expected TOKEN_END was missing. |
3f2d73f1 765`------------------------------------------------------------------------*/
a706a1cc
PE
766
767static void
aa418041 768unexpected_eof (boundary start, char const *token_end)
a706a1cc 769{
4febdd96
PE
770 unexpected_end (start, N_("missing `%s' at end of file"), token_end);
771}
772
773
774/*----------------------------------------.
775| Likewise, but for unexpected newlines. |
776`----------------------------------------*/
777
778static void
779unexpected_newline (boundary start, char const *token_end)
780{
781 unexpected_end (start, N_("missing `%s' at end of line"), token_end);
a706a1cc
PE
782}
783
784
f25bfb75
AD
785/*-------------------------.
786| Initialize the scanner. |
787`-------------------------*/
788
1d6412ad 789void
e9071366 790gram_scanner_initialize (void)
1d6412ad 791{
223ff46e 792 obstack_init (&obstack_for_string);
1d6412ad
AD
793}
794
795
f25bfb75
AD
796/*-----------------------------------------------.
797| Free all the memory allocated to the scanner. |
798`-----------------------------------------------*/
799
4cdb01db 800void
e9071366 801gram_scanner_free (void)
4cdb01db 802{
223ff46e 803 obstack_free (&obstack_for_string, 0);
536545f3 804 /* Reclaim Flex's buffers. */
580b8926 805 yylex_destroy ();
4cdb01db 806}