]> git.saurik.com Git - bison.git/blame - src/scan-gram.l
2007-11-08 Paolo Bonzini <bonzini@gnu.org>
[bison.git] / src / scan-gram.l
CommitLineData
e9955c83 1/* Bison Grammar Scanner -*- C -*-
3b1e470c 2
7c0c6181
JD
3 Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation,
4 Inc.
e9955c83
AD
5
6 This file is part of Bison, the GNU Compiler Compiler.
7
f16b0819 8 This program is free software: you can redistribute it and/or modify
e9955c83 9 it under the terms of the GNU General Public License as published by
f16b0819 10 the Free Software Foundation, either version 3 of the License, or
e9955c83
AD
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
f16b0819 19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
e9955c83 20
aa418041 21%option debug nodefault nounput noyywrap never-interactive
e9955c83
AD
22%option prefix="gram_" outfile="lex.yy.c"
23
24%{
4f6e011e
PE
25/* Work around a bug in flex 2.5.31. See Debian bug 333231
26 <http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=333231>. */
27#undef gram_wrap
28#define gram_wrap() 1
29
e9071366
AD
30#define FLEX_PREFIX(Id) gram_ ## Id
31#include "flex-scanner.h"
223ff46e 32
e9955c83 33#include "complain.h"
3f2d73f1 34#include "files.h"
e9955c83 35#include "gram.h"
ca407bdf 36#include "quotearg.h"
e9955c83 37#include "reader.h"
223ff46e 38#include "uniqstr.h"
e9955c83 39
e9071366
AD
40#include <mbswidth.h>
41#include <quote.h>
42
43#include "scan-gram.h"
44
45#define YY_DECL GRAM_LEX_DECL
2346344a 46
3f2d73f1 47#define YY_USER_INIT \
e9071366 48 code_start = scanner_cursor = loc->start; \
dc9701e8 49
3f2d73f1 50/* Location of scanner cursor. */
4a678af8 51static boundary scanner_cursor;
41141c56 52
e9071366 53#define YY_USER_ACTION location_compute (loc, &scanner_cursor, yytext, yyleng);
d8d3f94a 54
6c30d641 55static size_t no_cr_read (FILE *, char *, size_t);
d8d3f94a
PE
56#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
57
7ec2d4cd 58/* A string representing the most recently saved token. */
7c0c6181 59static char *last_string;
7ec2d4cd 60
7ec2d4cd 61void
e9071366 62gram_scanner_last_string_free (void)
7ec2d4cd 63{
41141c56 64 STRING_FREE;
7ec2d4cd 65}
e9955c83 66
4517da37 67static void handle_syncline (char *, location);
1452af69 68static unsigned long int scan_integer (char const *p, int base, location loc);
d8d3f94a 69static int convert_ucn_to_byte (char const *hex_text);
aa418041 70static void unexpected_eof (boundary, char const *);
4febdd96 71static void unexpected_newline (boundary, char const *);
e9955c83
AD
72
73%}
e9071366
AD
74 /* A C-like comment in directives/rules. */
75%x SC_YACC_COMMENT
76 /* Strings and characters in directives/rules. */
e9955c83 77%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
e9071366
AD
78 /* A identifier was just read in directives/rules. Special state
79 to capture the sequence `identifier :'. */
80%x SC_AFTER_IDENTIFIER
e9071366
AD
81
82 /* Three types of user code:
83 - prologue (code between `%{' `%}' in the first section, before %%);
84 - actions, printers, union, etc, (between braced in the middle section);
85 - epilogue (everything after the second %%). */
86%x SC_PROLOGUE SC_BRACED_CODE SC_EPILOGUE
87 /* C and C++ comments in code. */
88%x SC_COMMENT SC_LINE_COMMENT
89 /* Strings and characters in code. */
90%x SC_STRING SC_CHARACTER
e9955c83 91
29c01725
AD
92letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
93id {letter}({letter}|[0-9])*
94directive %{letter}({letter}|[0-9]|-)*
624a35e2 95int [0-9]+
d8d3f94a
PE
96
97/* POSIX says that a tag must be both an id and a C union member, but
98 historically almost any character is allowed in a tag. We disallow
99 NUL and newline, as this simplifies our implementation. */
100tag [^\0\n>]+
101
102/* Zero or more instances of backslash-newline. Following GCC, allow
103 white space between the backslash and the newline. */
104splice (\\[ \f\t\v]*\n)*
e9955c83
AD
105
106%%
107%{
a706a1cc 108 /* Nesting level of the current code in braces. */
5362ed19 109 int braces_level IF_LINT (= 0);
1a9e39f1 110
3f2d73f1 111 /* Parent context state, when applicable. */
5362ed19 112 int context_state IF_LINT (= 0);
a706a1cc 113
3f2d73f1 114 /* Location of most recent identifier, when applicable. */
a2bc9dbc 115 location id_loc IF_LINT (= empty_location);
3f2d73f1 116
a2bc9dbc
PE
117 /* Where containing code started, when applicable. Its initial
118 value is relevant only when yylex is invoked in the SC_EPILOGUE
119 start condition. */
120 boundary code_start = scanner_cursor;
3f2d73f1 121
223ff46e
PE
122 /* Where containing comment or string or character literal started,
123 when applicable. */
a2bc9dbc 124 boundary token_start IF_LINT (= scanner_cursor);
e9955c83
AD
125%}
126
127
3f2d73f1
PE
128 /*-----------------------.
129 | Scanning white space. |
130 `-----------------------*/
131
58d7a1a1 132<INITIAL,SC_AFTER_IDENTIFIER>
3f2d73f1 133{
4febdd96 134 /* Comments and white space. */
83adb046 135 "," warn_at (*loc, _("stray `,' treated as white space"));
4febdd96 136 [ \f\n\t\v] |
3f2d73f1 137 "//".* ;
83adb046
PE
138 "/*" {
139 token_start = loc->start;
140 context_state = YY_START;
141 BEGIN SC_YACC_COMMENT;
142 }
3f2d73f1
PE
143
144 /* #line directives are not documented, and may be withdrawn or
145 modified in future versions of Bison. */
146 ^"#line "{int}" \"".*"\"\n" {
4517da37 147 handle_syncline (yytext + sizeof "#line " - 1, *loc);
3f2d73f1
PE
148 }
149}
150
151
e9955c83
AD
152 /*----------------------------.
153 | Scanning Bison directives. |
154 `----------------------------*/
155<INITIAL>
156{
58d7a1a1 157 "%binary" return PERCENT_NONASSOC;
136a0f76 158 "%code" return PERCENT_CODE;
58d7a1a1
AD
159 "%debug" return PERCENT_DEBUG;
160 "%default"[-_]"prec" return PERCENT_DEFAULT_PREC;
161 "%define" return PERCENT_DEFINE;
162 "%defines" return PERCENT_DEFINES;
163 "%destructor" return PERCENT_DESTRUCTOR;
164 "%dprec" return PERCENT_DPREC;
165 "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
166 "%expect" return PERCENT_EXPECT;
167 "%expect"[-_]"rr" return PERCENT_EXPECT_RR;
168 "%file-prefix" return PERCENT_FILE_PREFIX;
e9955c83 169 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
58d7a1a1
AD
170 "%initial-action" return PERCENT_INITIAL_ACTION;
171 "%glr-parser" return PERCENT_GLR_PARSER;
0e021770 172 "%language" return PERCENT_LANGUAGE;
58d7a1a1
AD
173 "%left" return PERCENT_LEFT;
174 "%lex-param" return PERCENT_LEX_PARAM;
175 "%locations" return PERCENT_LOCATIONS;
176 "%merge" return PERCENT_MERGE;
177 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
178 "%no"[-_]"default"[-_]"prec" return PERCENT_NO_DEFAULT_PREC;
179 "%no"[-_]"lines" return PERCENT_NO_LINES;
180 "%nonassoc" return PERCENT_NONASSOC;
181 "%nondeterministic-parser" return PERCENT_NONDETERMINISTIC_PARSER;
182 "%nterm" return PERCENT_NTERM;
183 "%output" return PERCENT_OUTPUT;
184 "%parse-param" return PERCENT_PARSE_PARAM;
185 "%prec" return PERCENT_PREC;
186 "%printer" return PERCENT_PRINTER;
187 "%pure"[-_]"parser" return PERCENT_PURE_PARSER;
188 "%require" return PERCENT_REQUIRE;
189 "%right" return PERCENT_RIGHT;
190 "%skeleton" return PERCENT_SKELETON;
191 "%start" return PERCENT_START;
192 "%term" return PERCENT_TOKEN;
193 "%token" return PERCENT_TOKEN;
194 "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
195 "%type" return PERCENT_TYPE;
196 "%union" return PERCENT_UNION;
197 "%verbose" return PERCENT_VERBOSE;
198 "%yacc" return PERCENT_YACC;
e9955c83 199
3f2d73f1 200 {directive} {
41141c56 201 complain_at (*loc, _("invalid directive: %s"), quote (yytext));
412f8a59 202 }
900c5db5 203
e9955c83 204 "=" return EQUAL;
e9071366 205 "|" return PIPE;
e9955c83 206 ";" return SEMICOLON;
12e35840 207 "<*>" return TYPE_TAG_ANY;
3ebecc24 208 "<>" return TYPE_TAG_NONE;
e9955c83 209
3f2d73f1 210 {id} {
58d7a1a1 211 val->uniqstr = uniqstr_new (yytext);
3f2d73f1 212 id_loc = *loc;
3f2d73f1 213 BEGIN SC_AFTER_IDENTIFIER;
e9955c83
AD
214 }
215
d8d3f94a 216 {int} {
1452af69
PE
217 val->integer = scan_integer (yytext, 10, *loc);
218 return INT;
219 }
220 0[xX][0-9abcdefABCDEF]+ {
221 val->integer = scan_integer (yytext, 16, *loc);
d8d3f94a
PE
222 return INT;
223 }
e9955c83
AD
224
225 /* Characters. We don't check there is only one. */
3f2d73f1 226 "'" STRING_GROW; token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
e9955c83
AD
227
228 /* Strings. */
ca407bdf 229 "\"" token_start = loc->start; BEGIN SC_ESCAPED_STRING;
e9955c83
AD
230
231 /* Prologue. */
3f2d73f1 232 "%{" code_start = loc->start; BEGIN SC_PROLOGUE;
e9955c83
AD
233
234 /* Code in between braces. */
3f2d73f1
PE
235 "{" {
236 STRING_GROW;
237 braces_level = 0;
238 code_start = loc->start;
239 BEGIN SC_BRACED_CODE;
240 }
e9955c83
AD
241
242 /* A type. */
d8d3f94a 243 "<"{tag}">" {
223ff46e 244 obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
41141c56 245 STRING_FINISH;
223ff46e 246 val->uniqstr = uniqstr_new (last_string);
41141c56 247 STRING_FREE;
4cdb01db
AD
248 return TYPE;
249 }
250
a706a1cc
PE
251 "%%" {
252 static int percent_percent_count;
e9955c83 253 if (++percent_percent_count == 2)
a2bc9dbc 254 BEGIN SC_EPILOGUE;
e9955c83
AD
255 return PERCENT_PERCENT;
256 }
257
a706a1cc 258 . {
41141c56 259 complain_at (*loc, _("invalid character: %s"), quote (yytext));
3f2d73f1 260 }
379f0ac8
PE
261
262 <<EOF>> {
263 loc->start = loc->end = scanner_cursor;
264 yyterminate ();
265 }
3f2d73f1
PE
266}
267
268
269 /*-----------------------------------------------------------------.
270 | Scanning after an identifier, checking whether a colon is next. |
271 `-----------------------------------------------------------------*/
272
273<SC_AFTER_IDENTIFIER>
274{
275 ":" {
3f2d73f1
PE
276 *loc = id_loc;
277 BEGIN INITIAL;
278 return ID_COLON;
279 }
280 . {
281 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
282 yyless (0);
283 *loc = id_loc;
284 BEGIN INITIAL;
285 return ID;
286 }
287 <<EOF>> {
288 *loc = id_loc;
289 BEGIN INITIAL;
290 return ID;
e9955c83
AD
291 }
292}
293
294
d8d3f94a
PE
295 /*---------------------------------------------------------------.
296 | Scanning a Yacc comment. The initial `/ *' is already eaten. |
297 `---------------------------------------------------------------*/
e9955c83 298
d8d3f94a 299<SC_YACC_COMMENT>
e9955c83 300{
3f2d73f1 301 "*/" BEGIN context_state;
a706a1cc 302 .|\n ;
aa418041 303 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
d8d3f94a
PE
304}
305
306
307 /*------------------------------------------------------------.
308 | Scanning a C comment. The initial `/ *' is already eaten. |
309 `------------------------------------------------------------*/
310
311<SC_COMMENT>
312{
3f2d73f1 313 "*"{splice}"/" STRING_GROW; BEGIN context_state;
aa418041 314 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
e9955c83
AD
315}
316
317
d8d3f94a
PE
318 /*--------------------------------------------------------------.
319 | Scanning a line comment. The initial `//' is already eaten. |
320 `--------------------------------------------------------------*/
321
322<SC_LINE_COMMENT>
323{
3f2d73f1 324 "\n" STRING_GROW; BEGIN context_state;
41141c56 325 {splice} STRING_GROW;
3f2d73f1 326 <<EOF>> BEGIN context_state;
d8d3f94a
PE
327}
328
329
4febdd96
PE
330 /*------------------------------------------------.
331 | Scanning a Bison string, including its escapes. |
332 | The initial quote is already eaten. |
333 `------------------------------------------------*/
e9955c83
AD
334
335<SC_ESCAPED_STRING>
336{
47aee066
JD
337 "\""|"\n" {
338 if (yytext[0] == '\n')
339 unexpected_newline (token_start, "\"");
340 STRING_FINISH;
341 loc->start = token_start;
342 val->chars = last_string;
343 BEGIN INITIAL;
344 return STRING;
345 }
346 <<EOF>> {
347 unexpected_eof (token_start, "\"");
41141c56 348 STRING_FINISH;
3f2d73f1 349 loc->start = token_start;
223ff46e 350 val->chars = last_string;
a706a1cc 351 BEGIN INITIAL;
e9955c83
AD
352 return STRING;
353 }
e9955c83
AD
354}
355
4febdd96
PE
356 /*----------------------------------------------------------.
357 | Scanning a Bison character literal, decoding its escapes. |
358 | The initial quote is already eaten. |
359 `----------------------------------------------------------*/
e9955c83
AD
360
361<SC_ESCAPED_CHARACTER>
362{
47aee066
JD
363 "'"|"\n" {
364 if (yytext[0] == '\n')
365 unexpected_newline (token_start, "'");
41141c56
PE
366 STRING_GROW;
367 STRING_FINISH;
3f2d73f1 368 loc->start = token_start;
58d7a1a1 369 val->character = last_string[1];
41141c56 370 STRING_FREE;
a706a1cc 371 BEGIN INITIAL;
58d7a1a1 372 return CHAR;
e9955c83 373 }
47aee066
JD
374 <<EOF>> {
375 unexpected_eof (token_start, "'");
376 STRING_FINISH;
377 loc->start = token_start;
378 if (strlen(last_string) > 1)
379 val->character = last_string[1];
380 else
381 val->character = last_string[0];
382 STRING_FREE;
383 BEGIN INITIAL;
384 return CHAR;
385 }
4febdd96 386}
a706a1cc 387
4febdd96
PE
388<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING>
389{
92ac3705 390 \0 complain_at (*loc, _("invalid null character"));
e9955c83
AD
391}
392
393
394 /*----------------------------.
395 | Decode escaped characters. |
396 `----------------------------*/
397
398<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
399{
d8d3f94a 400 \\[0-7]{1,3} {
4517da37 401 unsigned long int c = strtoul (yytext + 1, NULL, 8);
d8d3f94a 402 if (UCHAR_MAX < c)
3f2d73f1 403 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
05ac60f3 404 else if (! c)
92ac3705 405 complain_at (*loc, _("invalid null character: %s"), quote (yytext));
e9955c83 406 else
223ff46e 407 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
408 }
409
6b0d38ab 410 \\x[0-9abcdefABCDEF]+ {
4517da37
PE
411 verify (UCHAR_MAX < ULONG_MAX);
412 unsigned long int c = strtoul (yytext + 2, NULL, 16);
413 if (UCHAR_MAX < c)
3f2d73f1 414 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
92ac3705
PE
415 else if (! c)
416 complain_at (*loc, _("invalid null character: %s"), quote (yytext));
d8d3f94a 417 else
223ff46e 418 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
419 }
420
223ff46e
PE
421 \\a obstack_1grow (&obstack_for_string, '\a');
422 \\b obstack_1grow (&obstack_for_string, '\b');
423 \\f obstack_1grow (&obstack_for_string, '\f');
424 \\n obstack_1grow (&obstack_for_string, '\n');
425 \\r obstack_1grow (&obstack_for_string, '\r');
426 \\t obstack_1grow (&obstack_for_string, '\t');
427 \\v obstack_1grow (&obstack_for_string, '\v');
412f8a59
PE
428
429 /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
223ff46e 430 \\("\""|"'"|"?"|"\\") obstack_1grow (&obstack_for_string, yytext[1]);
412f8a59 431
6b0d38ab 432 \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
d8d3f94a
PE
433 int c = convert_ucn_to_byte (yytext);
434 if (c < 0)
3f2d73f1 435 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
92ac3705
PE
436 else if (! c)
437 complain_at (*loc, _("invalid null character: %s"), quote (yytext));
d8d3f94a 438 else
223ff46e 439 obstack_1grow (&obstack_for_string, c);
d8d3f94a 440 }
4f25ebb0 441 \\(.|\n) {
3f2d73f1 442 complain_at (*loc, _("unrecognized escape sequence: %s"), quote (yytext));
41141c56 443 STRING_GROW;
e9955c83
AD
444 }
445}
446
4febdd96
PE
447 /*--------------------------------------------.
448 | Scanning user-code characters and strings. |
449 `--------------------------------------------*/
e9955c83 450
4febdd96
PE
451<SC_CHARACTER,SC_STRING>
452{
e9071366 453 {splice}|\\{splice}[^\n\[\]] STRING_GROW;
4febdd96 454}
e9955c83
AD
455
456<SC_CHARACTER>
457{
4febdd96
PE
458 "'" STRING_GROW; BEGIN context_state;
459 \n unexpected_newline (token_start, "'"); BEGIN context_state;
460 <<EOF>> unexpected_eof (token_start, "'"); BEGIN context_state;
e9955c83
AD
461}
462
e9955c83
AD
463<SC_STRING>
464{
4febdd96
PE
465 "\"" STRING_GROW; BEGIN context_state;
466 \n unexpected_newline (token_start, "\""); BEGIN context_state;
467 <<EOF>> unexpected_eof (token_start, "\""); BEGIN context_state;
e9955c83
AD
468}
469
470
471 /*---------------------------------------------------.
472 | Strings, comments etc. can be found in user code. |
473 `---------------------------------------------------*/
474
475<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
476{
3f2d73f1
PE
477 "'" {
478 STRING_GROW;
479 context_state = YY_START;
480 token_start = loc->start;
481 BEGIN SC_CHARACTER;
482 }
483 "\"" {
484 STRING_GROW;
485 context_state = YY_START;
486 token_start = loc->start;
487 BEGIN SC_STRING;
488 }
489 "/"{splice}"*" {
490 STRING_GROW;
491 context_state = YY_START;
492 token_start = loc->start;
493 BEGIN SC_COMMENT;
494 }
495 "/"{splice}"/" {
496 STRING_GROW;
497 context_state = YY_START;
498 BEGIN SC_LINE_COMMENT;
499 }
e9955c83
AD
500}
501
502
624a35e2 503
58d7a1a1
AD
504 /*-----------------------------------------------------------.
505 | Scanning some code in braces (actions). The initial "{" is |
506 | already eaten. |
507 `-----------------------------------------------------------*/
e9955c83
AD
508
509<SC_BRACED_CODE>
510{
41141c56
PE
511 "{"|"<"{splice}"%" STRING_GROW; braces_level++;
512 "%"{splice}">" STRING_GROW; braces_level--;
e9955c83 513 "}" {
25522739
PE
514 obstack_1grow (&obstack_for_string, '}');
515
2346344a
AD
516 --braces_level;
517 if (braces_level < 0)
e9955c83 518 {
41141c56 519 STRING_FINISH;
3f2d73f1 520 loc->start = code_start;
eb095650 521 val->code = last_string;
a706a1cc 522 BEGIN INITIAL;
58d7a1a1 523 return BRACED_CODE;
e9955c83
AD
524 }
525 }
526
a706a1cc
PE
527 /* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly
528 (as `<' `<%'). */
41141c56 529 "<"{splice}"<" STRING_GROW;
a706a1cc 530
47aee066
JD
531 <<EOF>> {
532 unexpected_eof (code_start, "}");
533 STRING_FINISH;
534 loc->start = code_start;
eb095650 535 val->code = last_string;
47aee066
JD
536 BEGIN INITIAL;
537 return BRACED_CODE;
538 }
e9955c83
AD
539}
540
541
542 /*--------------------------------------------------------------.
543 | Scanning some prologue: from "%{" (already scanned) to "%}". |
544 `--------------------------------------------------------------*/
545
546<SC_PROLOGUE>
547{
548 "%}" {
41141c56 549 STRING_FINISH;
3f2d73f1 550 loc->start = code_start;
223ff46e 551 val->chars = last_string;
a706a1cc 552 BEGIN INITIAL;
e9955c83
AD
553 return PROLOGUE;
554 }
555
47aee066
JD
556 <<EOF>> {
557 unexpected_eof (code_start, "%}");
558 STRING_FINISH;
559 loc->start = code_start;
560 val->chars = last_string;
561 BEGIN INITIAL;
562 return PROLOGUE;
563 }
e9955c83
AD
564}
565
566
567 /*---------------------------------------------------------------.
568 | Scanning the epilogue (everything after the second "%%", which |
d8d3f94a 569 | has already been eaten). |
e9955c83
AD
570 `---------------------------------------------------------------*/
571
572<SC_EPILOGUE>
573{
e9955c83 574 <<EOF>> {
41141c56 575 STRING_FINISH;
3f2d73f1 576 loc->start = code_start;
223ff46e 577 val->chars = last_string;
a706a1cc 578 BEGIN INITIAL;
e9955c83
AD
579 return EPILOGUE;
580 }
581}
582
583
4febdd96
PE
584 /*-----------------------------------------------------.
585 | By default, grow the string obstack with the input. |
586 `-----------------------------------------------------*/
587
588<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>. |
589<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>\n STRING_GROW;
590
e9955c83
AD
591%%
592
6c30d641
PE
593/* Read bytes from FP into buffer BUF of size SIZE. Return the
594 number of bytes read. Remove '\r' from input, treating \r\n
595 and isolated \r as \n. */
596
597static size_t
598no_cr_read (FILE *fp, char *buf, size_t size)
599{
a737b216
PE
600 size_t bytes_read = fread (buf, 1, size, fp);
601 if (bytes_read)
6c30d641 602 {
a737b216 603 char *w = memchr (buf, '\r', bytes_read);
6c30d641
PE
604 if (w)
605 {
606 char const *r = ++w;
a737b216 607 char const *lim = buf + bytes_read;
6c30d641
PE
608
609 for (;;)
610 {
611 /* Found an '\r'. Treat it like '\n', but ignore any
612 '\n' that immediately follows. */
613 w[-1] = '\n';
614 if (r == lim)
615 {
616 int ch = getc (fp);
617 if (ch != '\n' && ungetc (ch, fp) != ch)
618 break;
619 }
620 else if (*r == '\n')
621 r++;
622
623 /* Copy until the next '\r'. */
624 do
625 {
626 if (r == lim)
627 return w - buf;
628 }
629 while ((*w++ = *r++) != '\r');
630 }
631
632 return w - buf;
633 }
634 }
635
a737b216 636 return bytes_read;
6c30d641
PE
637}
638
639
f25bfb75 640
1452af69
PE
641/*------------------------------------------------------.
642| Scan NUMBER for a base-BASE integer at location LOC. |
643`------------------------------------------------------*/
644
645static unsigned long int
646scan_integer (char const *number, int base, location loc)
647{
4517da37
PE
648 verify (INT_MAX < ULONG_MAX);
649 unsigned long int num = strtoul (number, NULL, base);
650
651 if (INT_MAX < num)
1452af69
PE
652 {
653 complain_at (loc, _("integer out of range: %s"), quote (number));
654 num = INT_MAX;
655 }
4517da37 656
1452af69
PE
657 return num;
658}
659
660
d8d3f94a
PE
661/*------------------------------------------------------------------.
662| Convert universal character name UCN to a single-byte character, |
663| and return that character. Return -1 if UCN does not correspond |
664| to a single-byte character. |
665`------------------------------------------------------------------*/
666
667static int
668convert_ucn_to_byte (char const *ucn)
669{
4517da37
PE
670 verify (UCHAR_MAX <= INT_MAX);
671 unsigned long int code = strtoul (ucn + 2, NULL, 16);
d8d3f94a
PE
672
673 /* FIXME: Currently we assume Unicode-compatible unibyte characters
674 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
675 non-ASCII hosts we support only the portable C character set.
676 These limitations should be removed once we add support for
677 multibyte characters. */
678
679 if (UCHAR_MAX < code)
680 return -1;
681
682#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
683 {
684 /* A non-ASCII host. Use CODE to index into a table of the C
685 basic execution character set, which is guaranteed to exist on
686 all Standard C platforms. This table also includes '$', '@',
8e6ef483 687 and '`', which are not in the basic execution character set but
d8d3f94a
PE
688 which are unibyte characters on all the platforms that we know
689 about. */
690 static signed char const table[] =
691 {
692 '\0', -1, -1, -1, -1, -1, -1, '\a',
693 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
694 -1, -1, -1, -1, -1, -1, -1, -1,
695 -1, -1, -1, -1, -1, -1, -1, -1,
696 ' ', '!', '"', '#', '$', '%', '&', '\'',
697 '(', ')', '*', '+', ',', '-', '.', '/',
698 '0', '1', '2', '3', '4', '5', '6', '7',
699 '8', '9', ':', ';', '<', '=', '>', '?',
700 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
701 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
702 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
703 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
704 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
705 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
706 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
707 'x', 'y', 'z', '{', '|', '}', '~'
708 };
709
710 code = code < sizeof table ? table[code] : -1;
711 }
712#endif
c4d720cd 713
d8d3f94a
PE
714 return code;
715}
716
717
900c5db5
AD
718/*----------------------------------------------------------------.
719| Handle `#line INT "FILE"'. ARGS has already skipped `#line '. |
720`----------------------------------------------------------------*/
721
722static void
4517da37 723handle_syncline (char *args, location loc)
900c5db5 724{
4517da37
PE
725 char *after_num;
726 unsigned long int lineno = strtoul (args, &after_num, 10);
727 char *file = strchr (after_num, '"') + 1;
728 *strchr (file, '"') = '\0';
729 if (INT_MAX <= lineno)
730 {
731 warn_at (loc, _("line number overflow"));
732 lineno = INT_MAX;
733 }
e9071366 734 current_file = uniqstr_new (file);
0c8e079f 735 boundary_set (&scanner_cursor, current_file, lineno, 1);
4517da37
PE
736}
737
738
4febdd96
PE
739/*----------------------------------------------------------------.
740| For a token or comment starting at START, report message MSGID, |
741| which should say that an end marker was found before |
742| the expected TOKEN_END. |
743`----------------------------------------------------------------*/
744
745static void
746unexpected_end (boundary start, char const *msgid, char const *token_end)
747{
748 location loc;
749 loc.start = start;
750 loc.end = scanner_cursor;
751 complain_at (loc, _(msgid), token_end);
752}
753
754
3f2d73f1
PE
755/*------------------------------------------------------------------------.
756| Report an unexpected EOF in a token or comment starting at START. |
757| An end of file was encountered and the expected TOKEN_END was missing. |
3f2d73f1 758`------------------------------------------------------------------------*/
a706a1cc
PE
759
760static void
aa418041 761unexpected_eof (boundary start, char const *token_end)
a706a1cc 762{
4febdd96
PE
763 unexpected_end (start, N_("missing `%s' at end of file"), token_end);
764}
765
766
767/*----------------------------------------.
768| Likewise, but for unexpected newlines. |
769`----------------------------------------*/
770
771static void
772unexpected_newline (boundary start, char const *token_end)
773{
774 unexpected_end (start, N_("missing `%s' at end of line"), token_end);
a706a1cc
PE
775}
776
777
f25bfb75
AD
778/*-------------------------.
779| Initialize the scanner. |
780`-------------------------*/
781
1d6412ad 782void
e9071366 783gram_scanner_initialize (void)
1d6412ad 784{
223ff46e 785 obstack_init (&obstack_for_string);
1d6412ad
AD
786}
787
788
f25bfb75
AD
789/*-----------------------------------------------.
790| Free all the memory allocated to the scanner. |
791`-----------------------------------------------*/
792
4cdb01db 793void
e9071366 794gram_scanner_free (void)
4cdb01db 795{
223ff46e 796 obstack_free (&obstack_for_string, 0);
536545f3 797 /* Reclaim Flex's buffers. */
580b8926 798 yylex_destroy ();
4cdb01db 799}