]> git.saurik.com Git - bison.git/blame - src/scan-gram.l
* NEWS (2.4.3): Mention fix for Sun Studio C++.
[bison.git] / src / scan-gram.l
CommitLineData
e9955c83 1/* Bison Grammar Scanner -*- C -*-
3b1e470c 2
219c26ea 3 Copyright (C) 2002-2007, 2009-2010 Free Software Foundation, Inc.
e9955c83
AD
4
5 This file is part of Bison, the GNU Compiler Compiler.
6
f16b0819 7 This program is free software: you can redistribute it and/or modify
e9955c83 8 it under the terms of the GNU General Public License as published by
f16b0819 9 the Free Software Foundation, either version 3 of the License, or
e9955c83
AD
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
f16b0819 18 along with this program. If not, see <http://www.gnu.org/licenses/>. */
e9955c83 19
42f8609b 20%option debug nodefault noinput nounput noyywrap never-interactive
e9955c83
AD
21%option prefix="gram_" outfile="lex.yy.c"
22
23%{
4f6e011e
PE
24/* Work around a bug in flex 2.5.31. See Debian bug 333231
25 <http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=333231>. */
26#undef gram_wrap
27#define gram_wrap() 1
28
e9071366
AD
29#define FLEX_PREFIX(Id) gram_ ## Id
30#include "flex-scanner.h"
223ff46e 31
e9955c83 32#include "complain.h"
3f2d73f1 33#include "files.h"
e9955c83 34#include "gram.h"
ca407bdf 35#include "quotearg.h"
e9955c83 36#include "reader.h"
223ff46e 37#include "uniqstr.h"
e9955c83 38
b1a4261e 39#include <ctype.h>
e9071366
AD
40#include <mbswidth.h>
41#include <quote.h>
42
43#include "scan-gram.h"
44
45#define YY_DECL GRAM_LEX_DECL
2346344a 46
3f2d73f1 47#define YY_USER_INIT \
e9071366 48 code_start = scanner_cursor = loc->start; \
dc9701e8 49
3f2d73f1 50/* Location of scanner cursor. */
4a678af8 51static boundary scanner_cursor;
41141c56 52
e9071366 53#define YY_USER_ACTION location_compute (loc, &scanner_cursor, yytext, yyleng);
d8d3f94a 54
6c30d641 55static size_t no_cr_read (FILE *, char *, size_t);
d8d3f94a
PE
56#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
57
7ec2d4cd 58/* A string representing the most recently saved token. */
7c0c6181 59static char *last_string;
7ec2d4cd 60
7ec2d4cd 61void
e9071366 62gram_scanner_last_string_free (void)
7ec2d4cd 63{
41141c56 64 STRING_FREE;
7ec2d4cd 65}
e9955c83 66
4517da37 67static void handle_syncline (char *, location);
1452af69 68static unsigned long int scan_integer (char const *p, int base, location loc);
d8d3f94a 69static int convert_ucn_to_byte (char const *hex_text);
aa418041 70static void unexpected_eof (boundary, char const *);
4febdd96 71static void unexpected_newline (boundary, char const *);
e9955c83
AD
72
73%}
e9071366
AD
74 /* A C-like comment in directives/rules. */
75%x SC_YACC_COMMENT
76 /* Strings and characters in directives/rules. */
e9955c83 77%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
e9071366
AD
78 /* A identifier was just read in directives/rules. Special state
79 to capture the sequence `identifier :'. */
80%x SC_AFTER_IDENTIFIER
e9071366
AD
81
82 /* Three types of user code:
83 - prologue (code between `%{' `%}' in the first section, before %%);
84 - actions, printers, union, etc, (between braced in the middle section);
85 - epilogue (everything after the second %%). */
86%x SC_PROLOGUE SC_BRACED_CODE SC_EPILOGUE
87 /* C and C++ comments in code. */
88%x SC_COMMENT SC_LINE_COMMENT
89 /* Strings and characters in code. */
90%x SC_STRING SC_CHARACTER
e9955c83 91
29c01725
AD
92letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
93id {letter}({letter}|[0-9])*
94directive %{letter}({letter}|[0-9]|-)*
624a35e2 95int [0-9]+
d8d3f94a
PE
96
97/* POSIX says that a tag must be both an id and a C union member, but
98 historically almost any character is allowed in a tag. We disallow
99 NUL and newline, as this simplifies our implementation. */
100tag [^\0\n>]+
101
102/* Zero or more instances of backslash-newline. Following GCC, allow
103 white space between the backslash and the newline. */
104splice (\\[ \f\t\v]*\n)*
e9955c83
AD
105
106%%
107%{
a706a1cc 108 /* Nesting level of the current code in braces. */
5362ed19 109 int braces_level IF_LINT (= 0);
1a9e39f1 110
3f2d73f1 111 /* Parent context state, when applicable. */
5362ed19 112 int context_state IF_LINT (= 0);
a706a1cc 113
3f2d73f1 114 /* Location of most recent identifier, when applicable. */
a2bc9dbc 115 location id_loc IF_LINT (= empty_location);
3f2d73f1 116
a2bc9dbc
PE
117 /* Where containing code started, when applicable. Its initial
118 value is relevant only when yylex is invoked in the SC_EPILOGUE
119 start condition. */
120 boundary code_start = scanner_cursor;
3f2d73f1 121
223ff46e
PE
122 /* Where containing comment or string or character literal started,
123 when applicable. */
a2bc9dbc 124 boundary token_start IF_LINT (= scanner_cursor);
e9955c83
AD
125%}
126
127
3f2d73f1
PE
128 /*-----------------------.
129 | Scanning white space. |
130 `-----------------------*/
131
58d7a1a1 132<INITIAL,SC_AFTER_IDENTIFIER>
3f2d73f1 133{
4febdd96 134 /* Comments and white space. */
83adb046 135 "," warn_at (*loc, _("stray `,' treated as white space"));
4febdd96 136 [ \f\n\t\v] |
3f2d73f1 137 "//".* ;
83adb046
PE
138 "/*" {
139 token_start = loc->start;
140 context_state = YY_START;
141 BEGIN SC_YACC_COMMENT;
142 }
3f2d73f1
PE
143
144 /* #line directives are not documented, and may be withdrawn or
145 modified in future versions of Bison. */
146 ^"#line "{int}" \"".*"\"\n" {
4517da37 147 handle_syncline (yytext + sizeof "#line " - 1, *loc);
3f2d73f1
PE
148 }
149}
150
151
e9955c83
AD
152 /*----------------------------.
153 | Scanning Bison directives. |
154 `----------------------------*/
155<INITIAL>
156{
58d7a1a1 157 "%binary" return PERCENT_NONASSOC;
136a0f76 158 "%code" return PERCENT_CODE;
58d7a1a1
AD
159 "%debug" return PERCENT_DEBUG;
160 "%default"[-_]"prec" return PERCENT_DEFAULT_PREC;
161 "%define" return PERCENT_DEFINE;
162 "%defines" return PERCENT_DEFINES;
163 "%destructor" return PERCENT_DESTRUCTOR;
164 "%dprec" return PERCENT_DPREC;
165 "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
166 "%expect" return PERCENT_EXPECT;
167 "%expect"[-_]"rr" return PERCENT_EXPECT_RR;
168 "%file-prefix" return PERCENT_FILE_PREFIX;
e9955c83 169 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
58d7a1a1
AD
170 "%initial-action" return PERCENT_INITIAL_ACTION;
171 "%glr-parser" return PERCENT_GLR_PARSER;
0e021770 172 "%language" return PERCENT_LANGUAGE;
58d7a1a1
AD
173 "%left" return PERCENT_LEFT;
174 "%lex-param" return PERCENT_LEX_PARAM;
175 "%locations" return PERCENT_LOCATIONS;
176 "%merge" return PERCENT_MERGE;
177 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
178 "%no"[-_]"default"[-_]"prec" return PERCENT_NO_DEFAULT_PREC;
179 "%no"[-_]"lines" return PERCENT_NO_LINES;
180 "%nonassoc" return PERCENT_NONASSOC;
181 "%nondeterministic-parser" return PERCENT_NONDETERMINISTIC_PARSER;
182 "%nterm" return PERCENT_NTERM;
183 "%output" return PERCENT_OUTPUT;
184 "%parse-param" return PERCENT_PARSE_PARAM;
185 "%prec" return PERCENT_PREC;
186 "%printer" return PERCENT_PRINTER;
187 "%pure"[-_]"parser" return PERCENT_PURE_PARSER;
188 "%require" return PERCENT_REQUIRE;
189 "%right" return PERCENT_RIGHT;
190 "%skeleton" return PERCENT_SKELETON;
191 "%start" return PERCENT_START;
192 "%term" return PERCENT_TOKEN;
193 "%token" return PERCENT_TOKEN;
194 "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
195 "%type" return PERCENT_TYPE;
196 "%union" return PERCENT_UNION;
197 "%verbose" return PERCENT_VERBOSE;
198 "%yacc" return PERCENT_YACC;
e9955c83 199
3f2d73f1 200 {directive} {
41141c56 201 complain_at (*loc, _("invalid directive: %s"), quote (yytext));
412f8a59 202 }
900c5db5 203
e9955c83 204 "=" return EQUAL;
e9071366 205 "|" return PIPE;
e9955c83 206 ";" return SEMICOLON;
12e35840 207 "<*>" return TYPE_TAG_ANY;
3ebecc24 208 "<>" return TYPE_TAG_NONE;
e9955c83 209
3f2d73f1 210 {id} {
58d7a1a1 211 val->uniqstr = uniqstr_new (yytext);
3f2d73f1 212 id_loc = *loc;
3f2d73f1 213 BEGIN SC_AFTER_IDENTIFIER;
e9955c83
AD
214 }
215
d8d3f94a 216 {int} {
1452af69
PE
217 val->integer = scan_integer (yytext, 10, *loc);
218 return INT;
219 }
220 0[xX][0-9abcdefABCDEF]+ {
221 val->integer = scan_integer (yytext, 16, *loc);
d8d3f94a
PE
222 return INT;
223 }
e9955c83
AD
224
225 /* Characters. We don't check there is only one. */
3f2d73f1 226 "'" STRING_GROW; token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
e9955c83
AD
227
228 /* Strings. */
ca407bdf 229 "\"" token_start = loc->start; BEGIN SC_ESCAPED_STRING;
e9955c83
AD
230
231 /* Prologue. */
3f2d73f1 232 "%{" code_start = loc->start; BEGIN SC_PROLOGUE;
e9955c83
AD
233
234 /* Code in between braces. */
3f2d73f1
PE
235 "{" {
236 STRING_GROW;
237 braces_level = 0;
238 code_start = loc->start;
239 BEGIN SC_BRACED_CODE;
240 }
e9955c83
AD
241
242 /* A type. */
d8d3f94a 243 "<"{tag}">" {
223ff46e 244 obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
41141c56 245 STRING_FINISH;
223ff46e 246 val->uniqstr = uniqstr_new (last_string);
41141c56 247 STRING_FREE;
4cdb01db
AD
248 return TYPE;
249 }
250
a706a1cc
PE
251 "%%" {
252 static int percent_percent_count;
e9955c83 253 if (++percent_percent_count == 2)
a2bc9dbc 254 BEGIN SC_EPILOGUE;
e9955c83
AD
255 return PERCENT_PERCENT;
256 }
257
a706a1cc 258 . {
41141c56 259 complain_at (*loc, _("invalid character: %s"), quote (yytext));
3f2d73f1 260 }
379f0ac8
PE
261
262 <<EOF>> {
263 loc->start = loc->end = scanner_cursor;
264 yyterminate ();
265 }
3f2d73f1
PE
266}
267
268
269 /*-----------------------------------------------------------------.
270 | Scanning after an identifier, checking whether a colon is next. |
271 `-----------------------------------------------------------------*/
272
273<SC_AFTER_IDENTIFIER>
274{
275 ":" {
3f2d73f1
PE
276 *loc = id_loc;
277 BEGIN INITIAL;
278 return ID_COLON;
279 }
280 . {
281 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
282 yyless (0);
283 *loc = id_loc;
284 BEGIN INITIAL;
285 return ID;
286 }
287 <<EOF>> {
288 *loc = id_loc;
289 BEGIN INITIAL;
290 return ID;
e9955c83
AD
291 }
292}
293
294
d8d3f94a
PE
295 /*---------------------------------------------------------------.
296 | Scanning a Yacc comment. The initial `/ *' is already eaten. |
297 `---------------------------------------------------------------*/
e9955c83 298
d8d3f94a 299<SC_YACC_COMMENT>
e9955c83 300{
3f2d73f1 301 "*/" BEGIN context_state;
a706a1cc 302 .|\n ;
aa418041 303 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
d8d3f94a
PE
304}
305
306
307 /*------------------------------------------------------------.
308 | Scanning a C comment. The initial `/ *' is already eaten. |
309 `------------------------------------------------------------*/
310
311<SC_COMMENT>
312{
3f2d73f1 313 "*"{splice}"/" STRING_GROW; BEGIN context_state;
aa418041 314 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
e9955c83
AD
315}
316
317
d8d3f94a
PE
318 /*--------------------------------------------------------------.
319 | Scanning a line comment. The initial `//' is already eaten. |
320 `--------------------------------------------------------------*/
321
322<SC_LINE_COMMENT>
323{
3f2d73f1 324 "\n" STRING_GROW; BEGIN context_state;
41141c56 325 {splice} STRING_GROW;
3f2d73f1 326 <<EOF>> BEGIN context_state;
d8d3f94a
PE
327}
328
329
4febdd96
PE
330 /*------------------------------------------------.
331 | Scanning a Bison string, including its escapes. |
332 | The initial quote is already eaten. |
333 `------------------------------------------------*/
e9955c83
AD
334
335<SC_ESCAPED_STRING>
336{
47aee066
JD
337 "\""|"\n" {
338 if (yytext[0] == '\n')
339 unexpected_newline (token_start, "\"");
340 STRING_FINISH;
341 loc->start = token_start;
342 val->chars = last_string;
343 BEGIN INITIAL;
344 return STRING;
345 }
346 <<EOF>> {
347 unexpected_eof (token_start, "\"");
41141c56 348 STRING_FINISH;
3f2d73f1 349 loc->start = token_start;
223ff46e 350 val->chars = last_string;
a706a1cc 351 BEGIN INITIAL;
e9955c83
AD
352 return STRING;
353 }
e9955c83
AD
354}
355
4febdd96
PE
356 /*----------------------------------------------------------.
357 | Scanning a Bison character literal, decoding its escapes. |
358 | The initial quote is already eaten. |
359 `----------------------------------------------------------*/
e9955c83
AD
360
361<SC_ESCAPED_CHARACTER>
362{
47aee066
JD
363 "'"|"\n" {
364 if (yytext[0] == '\n')
365 unexpected_newline (token_start, "'");
41141c56
PE
366 STRING_GROW;
367 STRING_FINISH;
3f2d73f1 368 loc->start = token_start;
58d7a1a1 369 val->character = last_string[1];
41141c56 370 STRING_FREE;
a706a1cc 371 BEGIN INITIAL;
58d7a1a1 372 return CHAR;
e9955c83 373 }
47aee066
JD
374 <<EOF>> {
375 unexpected_eof (token_start, "'");
376 STRING_FINISH;
377 loc->start = token_start;
378 if (strlen(last_string) > 1)
379 val->character = last_string[1];
380 else
381 val->character = last_string[0];
382 STRING_FREE;
383 BEGIN INITIAL;
384 return CHAR;
385 }
4febdd96 386}
a706a1cc 387
4febdd96
PE
388<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING>
389{
92ac3705 390 \0 complain_at (*loc, _("invalid null character"));
e9955c83
AD
391}
392
393
394 /*----------------------------.
395 | Decode escaped characters. |
396 `----------------------------*/
397
398<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
399{
d8d3f94a 400 \\[0-7]{1,3} {
4517da37 401 unsigned long int c = strtoul (yytext + 1, NULL, 8);
b1a4261e
JD
402 if (!c || UCHAR_MAX < c)
403 complain_at (*loc, _("invalid number after \\-escape: %s"),
404 yytext+1);
e9955c83 405 else
223ff46e 406 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
407 }
408
6b0d38ab 409 \\x[0-9abcdefABCDEF]+ {
4517da37
PE
410 verify (UCHAR_MAX < ULONG_MAX);
411 unsigned long int c = strtoul (yytext + 2, NULL, 16);
b1a4261e
JD
412 if (!c || UCHAR_MAX < c)
413 complain_at (*loc, _("invalid number after \\-escape: %s"),
414 yytext+1);
d8d3f94a 415 else
223ff46e 416 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
417 }
418
223ff46e
PE
419 \\a obstack_1grow (&obstack_for_string, '\a');
420 \\b obstack_1grow (&obstack_for_string, '\b');
421 \\f obstack_1grow (&obstack_for_string, '\f');
422 \\n obstack_1grow (&obstack_for_string, '\n');
423 \\r obstack_1grow (&obstack_for_string, '\r');
424 \\t obstack_1grow (&obstack_for_string, '\t');
425 \\v obstack_1grow (&obstack_for_string, '\v');
412f8a59
PE
426
427 /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
223ff46e 428 \\("\""|"'"|"?"|"\\") obstack_1grow (&obstack_for_string, yytext[1]);
412f8a59 429
6b0d38ab 430 \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
d8d3f94a 431 int c = convert_ucn_to_byte (yytext);
b1a4261e
JD
432 if (c <= 0)
433 complain_at (*loc, _("invalid number after \\-escape: %s"),
434 yytext+1);
d8d3f94a 435 else
223ff46e 436 obstack_1grow (&obstack_for_string, c);
d8d3f94a 437 }
4f25ebb0 438 \\(.|\n) {
b1a4261e 439 char const *p = yytext + 1;
321fe707 440 /* Quote only if escaping won't make the character visible. */
bbbbe221 441 if (isspace ((unsigned char) *p) && isprint ((unsigned char) *p))
321fe707 442 p = quote (p);
b1a4261e
JD
443 else
444 p = quotearg_style_mem (escape_quoting_style, p, 1);
445 complain_at (*loc, _("invalid character after \\-escape: %s"), p);
e9955c83
AD
446 }
447}
448
4febdd96
PE
449 /*--------------------------------------------.
450 | Scanning user-code characters and strings. |
451 `--------------------------------------------*/
e9955c83 452
4febdd96
PE
453<SC_CHARACTER,SC_STRING>
454{
e9071366 455 {splice}|\\{splice}[^\n\[\]] STRING_GROW;
4febdd96 456}
e9955c83
AD
457
458<SC_CHARACTER>
459{
4febdd96
PE
460 "'" STRING_GROW; BEGIN context_state;
461 \n unexpected_newline (token_start, "'"); BEGIN context_state;
462 <<EOF>> unexpected_eof (token_start, "'"); BEGIN context_state;
e9955c83
AD
463}
464
e9955c83
AD
465<SC_STRING>
466{
4febdd96
PE
467 "\"" STRING_GROW; BEGIN context_state;
468 \n unexpected_newline (token_start, "\""); BEGIN context_state;
469 <<EOF>> unexpected_eof (token_start, "\""); BEGIN context_state;
e9955c83
AD
470}
471
472
473 /*---------------------------------------------------.
474 | Strings, comments etc. can be found in user code. |
475 `---------------------------------------------------*/
476
477<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
478{
3f2d73f1
PE
479 "'" {
480 STRING_GROW;
481 context_state = YY_START;
482 token_start = loc->start;
483 BEGIN SC_CHARACTER;
484 }
485 "\"" {
486 STRING_GROW;
487 context_state = YY_START;
488 token_start = loc->start;
489 BEGIN SC_STRING;
490 }
491 "/"{splice}"*" {
492 STRING_GROW;
493 context_state = YY_START;
494 token_start = loc->start;
495 BEGIN SC_COMMENT;
496 }
497 "/"{splice}"/" {
498 STRING_GROW;
499 context_state = YY_START;
500 BEGIN SC_LINE_COMMENT;
501 }
e9955c83
AD
502}
503
504
624a35e2 505
58d7a1a1
AD
506 /*-----------------------------------------------------------.
507 | Scanning some code in braces (actions). The initial "{" is |
508 | already eaten. |
509 `-----------------------------------------------------------*/
e9955c83
AD
510
511<SC_BRACED_CODE>
512{
41141c56
PE
513 "{"|"<"{splice}"%" STRING_GROW; braces_level++;
514 "%"{splice}">" STRING_GROW; braces_level--;
e9955c83 515 "}" {
25522739
PE
516 obstack_1grow (&obstack_for_string, '}');
517
2346344a
AD
518 --braces_level;
519 if (braces_level < 0)
e9955c83 520 {
41141c56 521 STRING_FINISH;
3f2d73f1 522 loc->start = code_start;
eb095650 523 val->code = last_string;
a706a1cc 524 BEGIN INITIAL;
58d7a1a1 525 return BRACED_CODE;
e9955c83
AD
526 }
527 }
528
a706a1cc
PE
529 /* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly
530 (as `<' `<%'). */
41141c56 531 "<"{splice}"<" STRING_GROW;
a706a1cc 532
47aee066
JD
533 <<EOF>> {
534 unexpected_eof (code_start, "}");
535 STRING_FINISH;
536 loc->start = code_start;
eb095650 537 val->code = last_string;
47aee066
JD
538 BEGIN INITIAL;
539 return BRACED_CODE;
540 }
e9955c83
AD
541}
542
543
544 /*--------------------------------------------------------------.
545 | Scanning some prologue: from "%{" (already scanned) to "%}". |
546 `--------------------------------------------------------------*/
547
548<SC_PROLOGUE>
549{
550 "%}" {
41141c56 551 STRING_FINISH;
3f2d73f1 552 loc->start = code_start;
223ff46e 553 val->chars = last_string;
a706a1cc 554 BEGIN INITIAL;
e9955c83
AD
555 return PROLOGUE;
556 }
557
47aee066
JD
558 <<EOF>> {
559 unexpected_eof (code_start, "%}");
560 STRING_FINISH;
561 loc->start = code_start;
562 val->chars = last_string;
563 BEGIN INITIAL;
564 return PROLOGUE;
565 }
e9955c83
AD
566}
567
568
569 /*---------------------------------------------------------------.
570 | Scanning the epilogue (everything after the second "%%", which |
d8d3f94a 571 | has already been eaten). |
e9955c83
AD
572 `---------------------------------------------------------------*/
573
574<SC_EPILOGUE>
575{
e9955c83 576 <<EOF>> {
41141c56 577 STRING_FINISH;
3f2d73f1 578 loc->start = code_start;
223ff46e 579 val->chars = last_string;
a706a1cc 580 BEGIN INITIAL;
e9955c83
AD
581 return EPILOGUE;
582 }
583}
584
585
4febdd96
PE
586 /*-----------------------------------------------------.
587 | By default, grow the string obstack with the input. |
588 `-----------------------------------------------------*/
589
590<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>. |
591<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>\n STRING_GROW;
592
e9955c83
AD
593%%
594
6c30d641
PE
595/* Read bytes from FP into buffer BUF of size SIZE. Return the
596 number of bytes read. Remove '\r' from input, treating \r\n
597 and isolated \r as \n. */
598
599static size_t
600no_cr_read (FILE *fp, char *buf, size_t size)
601{
a737b216
PE
602 size_t bytes_read = fread (buf, 1, size, fp);
603 if (bytes_read)
6c30d641 604 {
a737b216 605 char *w = memchr (buf, '\r', bytes_read);
6c30d641
PE
606 if (w)
607 {
608 char const *r = ++w;
a737b216 609 char const *lim = buf + bytes_read;
6c30d641
PE
610
611 for (;;)
612 {
613 /* Found an '\r'. Treat it like '\n', but ignore any
614 '\n' that immediately follows. */
615 w[-1] = '\n';
616 if (r == lim)
617 {
618 int ch = getc (fp);
619 if (ch != '\n' && ungetc (ch, fp) != ch)
620 break;
621 }
622 else if (*r == '\n')
623 r++;
624
625 /* Copy until the next '\r'. */
626 do
627 {
628 if (r == lim)
629 return w - buf;
630 }
631 while ((*w++ = *r++) != '\r');
632 }
633
634 return w - buf;
635 }
636 }
637
a737b216 638 return bytes_read;
6c30d641
PE
639}
640
641
f25bfb75 642
1452af69
PE
643/*------------------------------------------------------.
644| Scan NUMBER for a base-BASE integer at location LOC. |
645`------------------------------------------------------*/
646
647static unsigned long int
648scan_integer (char const *number, int base, location loc)
649{
4517da37
PE
650 verify (INT_MAX < ULONG_MAX);
651 unsigned long int num = strtoul (number, NULL, base);
652
653 if (INT_MAX < num)
1452af69
PE
654 {
655 complain_at (loc, _("integer out of range: %s"), quote (number));
656 num = INT_MAX;
657 }
4517da37 658
1452af69
PE
659 return num;
660}
661
662
d8d3f94a
PE
663/*------------------------------------------------------------------.
664| Convert universal character name UCN to a single-byte character, |
665| and return that character. Return -1 if UCN does not correspond |
666| to a single-byte character. |
667`------------------------------------------------------------------*/
668
669static int
670convert_ucn_to_byte (char const *ucn)
671{
4517da37
PE
672 verify (UCHAR_MAX <= INT_MAX);
673 unsigned long int code = strtoul (ucn + 2, NULL, 16);
d8d3f94a
PE
674
675 /* FIXME: Currently we assume Unicode-compatible unibyte characters
676 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
677 non-ASCII hosts we support only the portable C character set.
678 These limitations should be removed once we add support for
679 multibyte characters. */
680
681 if (UCHAR_MAX < code)
682 return -1;
683
684#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
685 {
686 /* A non-ASCII host. Use CODE to index into a table of the C
687 basic execution character set, which is guaranteed to exist on
688 all Standard C platforms. This table also includes '$', '@',
8e6ef483 689 and '`', which are not in the basic execution character set but
d8d3f94a
PE
690 which are unibyte characters on all the platforms that we know
691 about. */
692 static signed char const table[] =
693 {
694 '\0', -1, -1, -1, -1, -1, -1, '\a',
695 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
696 -1, -1, -1, -1, -1, -1, -1, -1,
697 -1, -1, -1, -1, -1, -1, -1, -1,
698 ' ', '!', '"', '#', '$', '%', '&', '\'',
699 '(', ')', '*', '+', ',', '-', '.', '/',
700 '0', '1', '2', '3', '4', '5', '6', '7',
701 '8', '9', ':', ';', '<', '=', '>', '?',
702 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
703 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
704 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
705 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
706 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
707 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
708 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
709 'x', 'y', 'z', '{', '|', '}', '~'
710 };
711
712 code = code < sizeof table ? table[code] : -1;
713 }
714#endif
c4d720cd 715
d8d3f94a
PE
716 return code;
717}
718
719
900c5db5
AD
720/*----------------------------------------------------------------.
721| Handle `#line INT "FILE"'. ARGS has already skipped `#line '. |
722`----------------------------------------------------------------*/
723
724static void
4517da37 725handle_syncline (char *args, location loc)
900c5db5 726{
4517da37
PE
727 char *after_num;
728 unsigned long int lineno = strtoul (args, &after_num, 10);
729 char *file = strchr (after_num, '"') + 1;
730 *strchr (file, '"') = '\0';
731 if (INT_MAX <= lineno)
732 {
733 warn_at (loc, _("line number overflow"));
734 lineno = INT_MAX;
735 }
e9071366 736 current_file = uniqstr_new (file);
0c8e079f 737 boundary_set (&scanner_cursor, current_file, lineno, 1);
4517da37
PE
738}
739
740
4febdd96
PE
741/*----------------------------------------------------------------.
742| For a token or comment starting at START, report message MSGID, |
743| which should say that an end marker was found before |
744| the expected TOKEN_END. |
745`----------------------------------------------------------------*/
746
747static void
748unexpected_end (boundary start, char const *msgid, char const *token_end)
749{
750 location loc;
751 loc.start = start;
752 loc.end = scanner_cursor;
753 complain_at (loc, _(msgid), token_end);
754}
755
756
3f2d73f1
PE
757/*------------------------------------------------------------------------.
758| Report an unexpected EOF in a token or comment starting at START. |
759| An end of file was encountered and the expected TOKEN_END was missing. |
3f2d73f1 760`------------------------------------------------------------------------*/
a706a1cc
PE
761
762static void
aa418041 763unexpected_eof (boundary start, char const *token_end)
a706a1cc 764{
4febdd96
PE
765 unexpected_end (start, N_("missing `%s' at end of file"), token_end);
766}
767
768
769/*----------------------------------------.
770| Likewise, but for unexpected newlines. |
771`----------------------------------------*/
772
773static void
774unexpected_newline (boundary start, char const *token_end)
775{
776 unexpected_end (start, N_("missing `%s' at end of line"), token_end);
a706a1cc
PE
777}
778
779
f25bfb75
AD
780/*-------------------------.
781| Initialize the scanner. |
782`-------------------------*/
783
1d6412ad 784void
e9071366 785gram_scanner_initialize (void)
1d6412ad 786{
223ff46e 787 obstack_init (&obstack_for_string);
1d6412ad
AD
788}
789
790
f25bfb75
AD
791/*-----------------------------------------------.
792| Free all the memory allocated to the scanner. |
793`-----------------------------------------------*/
794
4cdb01db 795void
e9071366 796gram_scanner_free (void)
4cdb01db 797{
223ff46e 798 obstack_free (&obstack_for_string, 0);
536545f3 799 /* Reclaim Flex's buffers. */
580b8926 800 yylex_destroy ();
4cdb01db 801}