]> git.saurik.com Git - bison.git/blame - src/scan-gram.l
scan-gram: avoid portability trap with ctype usage.
[bison.git] / src / scan-gram.l
CommitLineData
e9955c83 1/* Bison Grammar Scanner -*- C -*-
3b1e470c 2
98744608
JD
3 Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2009 Free Software
4 Foundation, Inc.
e9955c83
AD
5
6 This file is part of Bison, the GNU Compiler Compiler.
7
f16b0819 8 This program is free software: you can redistribute it and/or modify
e9955c83 9 it under the terms of the GNU General Public License as published by
f16b0819 10 the Free Software Foundation, either version 3 of the License, or
e9955c83
AD
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
f16b0819 19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
e9955c83 20
42f8609b 21%option debug nodefault noinput nounput noyywrap never-interactive
e9955c83
AD
22%option prefix="gram_" outfile="lex.yy.c"
23
24%{
4f6e011e
PE
25/* Work around a bug in flex 2.5.31. See Debian bug 333231
26 <http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=333231>. */
27#undef gram_wrap
28#define gram_wrap() 1
29
e9071366
AD
30#define FLEX_PREFIX(Id) gram_ ## Id
31#include "flex-scanner.h"
223ff46e 32
e9955c83 33#include "complain.h"
3f2d73f1 34#include "files.h"
e9955c83 35#include "gram.h"
ca407bdf 36#include "quotearg.h"
e9955c83 37#include "reader.h"
223ff46e 38#include "uniqstr.h"
e9955c83 39
b1a4261e 40#include <ctype.h>
e9071366
AD
41#include <mbswidth.h>
42#include <quote.h>
43
44#include "scan-gram.h"
45
46#define YY_DECL GRAM_LEX_DECL
2346344a 47
3f2d73f1 48#define YY_USER_INIT \
e9071366 49 code_start = scanner_cursor = loc->start; \
dc9701e8 50
3f2d73f1 51/* Location of scanner cursor. */
4a678af8 52static boundary scanner_cursor;
41141c56 53
e9071366 54#define YY_USER_ACTION location_compute (loc, &scanner_cursor, yytext, yyleng);
d8d3f94a 55
6c30d641 56static size_t no_cr_read (FILE *, char *, size_t);
d8d3f94a
PE
57#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
58
7ec2d4cd 59/* A string representing the most recently saved token. */
7c0c6181 60static char *last_string;
7ec2d4cd 61
7ec2d4cd 62void
e9071366 63gram_scanner_last_string_free (void)
7ec2d4cd 64{
41141c56 65 STRING_FREE;
7ec2d4cd 66}
e9955c83 67
4517da37 68static void handle_syncline (char *, location);
1452af69 69static unsigned long int scan_integer (char const *p, int base, location loc);
d8d3f94a 70static int convert_ucn_to_byte (char const *hex_text);
aa418041 71static void unexpected_eof (boundary, char const *);
4febdd96 72static void unexpected_newline (boundary, char const *);
e9955c83
AD
73
74%}
e9071366
AD
75 /* A C-like comment in directives/rules. */
76%x SC_YACC_COMMENT
77 /* Strings and characters in directives/rules. */
e9955c83 78%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
e9071366
AD
79 /* A identifier was just read in directives/rules. Special state
80 to capture the sequence `identifier :'. */
81%x SC_AFTER_IDENTIFIER
e9071366
AD
82
83 /* Three types of user code:
84 - prologue (code between `%{' `%}' in the first section, before %%);
85 - actions, printers, union, etc, (between braced in the middle section);
86 - epilogue (everything after the second %%). */
87%x SC_PROLOGUE SC_BRACED_CODE SC_EPILOGUE
88 /* C and C++ comments in code. */
89%x SC_COMMENT SC_LINE_COMMENT
90 /* Strings and characters in code. */
91%x SC_STRING SC_CHARACTER
e9955c83 92
29c01725
AD
93letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
94id {letter}({letter}|[0-9])*
95directive %{letter}({letter}|[0-9]|-)*
624a35e2 96int [0-9]+
d8d3f94a
PE
97
98/* POSIX says that a tag must be both an id and a C union member, but
99 historically almost any character is allowed in a tag. We disallow
100 NUL and newline, as this simplifies our implementation. */
101tag [^\0\n>]+
102
103/* Zero or more instances of backslash-newline. Following GCC, allow
104 white space between the backslash and the newline. */
105splice (\\[ \f\t\v]*\n)*
e9955c83
AD
106
107%%
108%{
a706a1cc 109 /* Nesting level of the current code in braces. */
5362ed19 110 int braces_level IF_LINT (= 0);
1a9e39f1 111
3f2d73f1 112 /* Parent context state, when applicable. */
5362ed19 113 int context_state IF_LINT (= 0);
a706a1cc 114
3f2d73f1 115 /* Location of most recent identifier, when applicable. */
a2bc9dbc 116 location id_loc IF_LINT (= empty_location);
3f2d73f1 117
a2bc9dbc
PE
118 /* Where containing code started, when applicable. Its initial
119 value is relevant only when yylex is invoked in the SC_EPILOGUE
120 start condition. */
121 boundary code_start = scanner_cursor;
3f2d73f1 122
223ff46e
PE
123 /* Where containing comment or string or character literal started,
124 when applicable. */
a2bc9dbc 125 boundary token_start IF_LINT (= scanner_cursor);
e9955c83
AD
126%}
127
128
3f2d73f1
PE
129 /*-----------------------.
130 | Scanning white space. |
131 `-----------------------*/
132
58d7a1a1 133<INITIAL,SC_AFTER_IDENTIFIER>
3f2d73f1 134{
4febdd96 135 /* Comments and white space. */
83adb046 136 "," warn_at (*loc, _("stray `,' treated as white space"));
4febdd96 137 [ \f\n\t\v] |
3f2d73f1 138 "//".* ;
83adb046
PE
139 "/*" {
140 token_start = loc->start;
141 context_state = YY_START;
142 BEGIN SC_YACC_COMMENT;
143 }
3f2d73f1
PE
144
145 /* #line directives are not documented, and may be withdrawn or
146 modified in future versions of Bison. */
147 ^"#line "{int}" \"".*"\"\n" {
4517da37 148 handle_syncline (yytext + sizeof "#line " - 1, *loc);
3f2d73f1
PE
149 }
150}
151
152
e9955c83
AD
153 /*----------------------------.
154 | Scanning Bison directives. |
155 `----------------------------*/
156<INITIAL>
157{
58d7a1a1 158 "%binary" return PERCENT_NONASSOC;
136a0f76 159 "%code" return PERCENT_CODE;
58d7a1a1
AD
160 "%debug" return PERCENT_DEBUG;
161 "%default"[-_]"prec" return PERCENT_DEFAULT_PREC;
162 "%define" return PERCENT_DEFINE;
163 "%defines" return PERCENT_DEFINES;
164 "%destructor" return PERCENT_DESTRUCTOR;
165 "%dprec" return PERCENT_DPREC;
166 "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
167 "%expect" return PERCENT_EXPECT;
168 "%expect"[-_]"rr" return PERCENT_EXPECT_RR;
169 "%file-prefix" return PERCENT_FILE_PREFIX;
e9955c83 170 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
58d7a1a1
AD
171 "%initial-action" return PERCENT_INITIAL_ACTION;
172 "%glr-parser" return PERCENT_GLR_PARSER;
0e021770 173 "%language" return PERCENT_LANGUAGE;
58d7a1a1
AD
174 "%left" return PERCENT_LEFT;
175 "%lex-param" return PERCENT_LEX_PARAM;
176 "%locations" return PERCENT_LOCATIONS;
177 "%merge" return PERCENT_MERGE;
178 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
179 "%no"[-_]"default"[-_]"prec" return PERCENT_NO_DEFAULT_PREC;
180 "%no"[-_]"lines" return PERCENT_NO_LINES;
181 "%nonassoc" return PERCENT_NONASSOC;
182 "%nondeterministic-parser" return PERCENT_NONDETERMINISTIC_PARSER;
183 "%nterm" return PERCENT_NTERM;
184 "%output" return PERCENT_OUTPUT;
185 "%parse-param" return PERCENT_PARSE_PARAM;
186 "%prec" return PERCENT_PREC;
187 "%printer" return PERCENT_PRINTER;
188 "%pure"[-_]"parser" return PERCENT_PURE_PARSER;
189 "%require" return PERCENT_REQUIRE;
190 "%right" return PERCENT_RIGHT;
191 "%skeleton" return PERCENT_SKELETON;
192 "%start" return PERCENT_START;
193 "%term" return PERCENT_TOKEN;
194 "%token" return PERCENT_TOKEN;
195 "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
196 "%type" return PERCENT_TYPE;
197 "%union" return PERCENT_UNION;
198 "%verbose" return PERCENT_VERBOSE;
199 "%yacc" return PERCENT_YACC;
e9955c83 200
3f2d73f1 201 {directive} {
41141c56 202 complain_at (*loc, _("invalid directive: %s"), quote (yytext));
412f8a59 203 }
900c5db5 204
e9955c83 205 "=" return EQUAL;
e9071366 206 "|" return PIPE;
e9955c83 207 ";" return SEMICOLON;
12e35840 208 "<*>" return TYPE_TAG_ANY;
3ebecc24 209 "<>" return TYPE_TAG_NONE;
e9955c83 210
3f2d73f1 211 {id} {
58d7a1a1 212 val->uniqstr = uniqstr_new (yytext);
3f2d73f1 213 id_loc = *loc;
3f2d73f1 214 BEGIN SC_AFTER_IDENTIFIER;
e9955c83
AD
215 }
216
d8d3f94a 217 {int} {
1452af69
PE
218 val->integer = scan_integer (yytext, 10, *loc);
219 return INT;
220 }
221 0[xX][0-9abcdefABCDEF]+ {
222 val->integer = scan_integer (yytext, 16, *loc);
d8d3f94a
PE
223 return INT;
224 }
e9955c83
AD
225
226 /* Characters. We don't check there is only one. */
3f2d73f1 227 "'" STRING_GROW; token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
e9955c83
AD
228
229 /* Strings. */
ca407bdf 230 "\"" token_start = loc->start; BEGIN SC_ESCAPED_STRING;
e9955c83
AD
231
232 /* Prologue. */
3f2d73f1 233 "%{" code_start = loc->start; BEGIN SC_PROLOGUE;
e9955c83
AD
234
235 /* Code in between braces. */
3f2d73f1
PE
236 "{" {
237 STRING_GROW;
238 braces_level = 0;
239 code_start = loc->start;
240 BEGIN SC_BRACED_CODE;
241 }
e9955c83
AD
242
243 /* A type. */
d8d3f94a 244 "<"{tag}">" {
223ff46e 245 obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
41141c56 246 STRING_FINISH;
223ff46e 247 val->uniqstr = uniqstr_new (last_string);
41141c56 248 STRING_FREE;
4cdb01db
AD
249 return TYPE;
250 }
251
a706a1cc
PE
252 "%%" {
253 static int percent_percent_count;
e9955c83 254 if (++percent_percent_count == 2)
a2bc9dbc 255 BEGIN SC_EPILOGUE;
e9955c83
AD
256 return PERCENT_PERCENT;
257 }
258
a706a1cc 259 . {
41141c56 260 complain_at (*loc, _("invalid character: %s"), quote (yytext));
3f2d73f1 261 }
379f0ac8
PE
262
263 <<EOF>> {
264 loc->start = loc->end = scanner_cursor;
265 yyterminate ();
266 }
3f2d73f1
PE
267}
268
269
270 /*-----------------------------------------------------------------.
271 | Scanning after an identifier, checking whether a colon is next. |
272 `-----------------------------------------------------------------*/
273
274<SC_AFTER_IDENTIFIER>
275{
276 ":" {
3f2d73f1
PE
277 *loc = id_loc;
278 BEGIN INITIAL;
279 return ID_COLON;
280 }
281 . {
282 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
283 yyless (0);
284 *loc = id_loc;
285 BEGIN INITIAL;
286 return ID;
287 }
288 <<EOF>> {
289 *loc = id_loc;
290 BEGIN INITIAL;
291 return ID;
e9955c83
AD
292 }
293}
294
295
d8d3f94a
PE
296 /*---------------------------------------------------------------.
297 | Scanning a Yacc comment. The initial `/ *' is already eaten. |
298 `---------------------------------------------------------------*/
e9955c83 299
d8d3f94a 300<SC_YACC_COMMENT>
e9955c83 301{
3f2d73f1 302 "*/" BEGIN context_state;
a706a1cc 303 .|\n ;
aa418041 304 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
d8d3f94a
PE
305}
306
307
308 /*------------------------------------------------------------.
309 | Scanning a C comment. The initial `/ *' is already eaten. |
310 `------------------------------------------------------------*/
311
312<SC_COMMENT>
313{
3f2d73f1 314 "*"{splice}"/" STRING_GROW; BEGIN context_state;
aa418041 315 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
e9955c83
AD
316}
317
318
d8d3f94a
PE
319 /*--------------------------------------------------------------.
320 | Scanning a line comment. The initial `//' is already eaten. |
321 `--------------------------------------------------------------*/
322
323<SC_LINE_COMMENT>
324{
3f2d73f1 325 "\n" STRING_GROW; BEGIN context_state;
41141c56 326 {splice} STRING_GROW;
3f2d73f1 327 <<EOF>> BEGIN context_state;
d8d3f94a
PE
328}
329
330
4febdd96
PE
331 /*------------------------------------------------.
332 | Scanning a Bison string, including its escapes. |
333 | The initial quote is already eaten. |
334 `------------------------------------------------*/
e9955c83
AD
335
336<SC_ESCAPED_STRING>
337{
47aee066
JD
338 "\""|"\n" {
339 if (yytext[0] == '\n')
340 unexpected_newline (token_start, "\"");
341 STRING_FINISH;
342 loc->start = token_start;
343 val->chars = last_string;
344 BEGIN INITIAL;
345 return STRING;
346 }
347 <<EOF>> {
348 unexpected_eof (token_start, "\"");
41141c56 349 STRING_FINISH;
3f2d73f1 350 loc->start = token_start;
223ff46e 351 val->chars = last_string;
a706a1cc 352 BEGIN INITIAL;
e9955c83
AD
353 return STRING;
354 }
e9955c83
AD
355}
356
4febdd96
PE
357 /*----------------------------------------------------------.
358 | Scanning a Bison character literal, decoding its escapes. |
359 | The initial quote is already eaten. |
360 `----------------------------------------------------------*/
e9955c83
AD
361
362<SC_ESCAPED_CHARACTER>
363{
47aee066
JD
364 "'"|"\n" {
365 if (yytext[0] == '\n')
366 unexpected_newline (token_start, "'");
41141c56
PE
367 STRING_GROW;
368 STRING_FINISH;
3f2d73f1 369 loc->start = token_start;
58d7a1a1 370 val->character = last_string[1];
41141c56 371 STRING_FREE;
a706a1cc 372 BEGIN INITIAL;
58d7a1a1 373 return CHAR;
e9955c83 374 }
47aee066
JD
375 <<EOF>> {
376 unexpected_eof (token_start, "'");
377 STRING_FINISH;
378 loc->start = token_start;
379 if (strlen(last_string) > 1)
380 val->character = last_string[1];
381 else
382 val->character = last_string[0];
383 STRING_FREE;
384 BEGIN INITIAL;
385 return CHAR;
386 }
4febdd96 387}
a706a1cc 388
4febdd96
PE
389<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING>
390{
92ac3705 391 \0 complain_at (*loc, _("invalid null character"));
e9955c83
AD
392}
393
394
395 /*----------------------------.
396 | Decode escaped characters. |
397 `----------------------------*/
398
399<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
400{
d8d3f94a 401 \\[0-7]{1,3} {
4517da37 402 unsigned long int c = strtoul (yytext + 1, NULL, 8);
b1a4261e
JD
403 if (!c || UCHAR_MAX < c)
404 complain_at (*loc, _("invalid number after \\-escape: %s"),
405 yytext+1);
e9955c83 406 else
223ff46e 407 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
408 }
409
6b0d38ab 410 \\x[0-9abcdefABCDEF]+ {
4517da37
PE
411 verify (UCHAR_MAX < ULONG_MAX);
412 unsigned long int c = strtoul (yytext + 2, NULL, 16);
b1a4261e
JD
413 if (!c || UCHAR_MAX < c)
414 complain_at (*loc, _("invalid number after \\-escape: %s"),
415 yytext+1);
d8d3f94a 416 else
223ff46e 417 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
418 }
419
223ff46e
PE
420 \\a obstack_1grow (&obstack_for_string, '\a');
421 \\b obstack_1grow (&obstack_for_string, '\b');
422 \\f obstack_1grow (&obstack_for_string, '\f');
423 \\n obstack_1grow (&obstack_for_string, '\n');
424 \\r obstack_1grow (&obstack_for_string, '\r');
425 \\t obstack_1grow (&obstack_for_string, '\t');
426 \\v obstack_1grow (&obstack_for_string, '\v');
412f8a59
PE
427
428 /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
223ff46e 429 \\("\""|"'"|"?"|"\\") obstack_1grow (&obstack_for_string, yytext[1]);
412f8a59 430
6b0d38ab 431 \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
d8d3f94a 432 int c = convert_ucn_to_byte (yytext);
b1a4261e
JD
433 if (c <= 0)
434 complain_at (*loc, _("invalid number after \\-escape: %s"),
435 yytext+1);
d8d3f94a 436 else
223ff46e 437 obstack_1grow (&obstack_for_string, c);
d8d3f94a 438 }
4f25ebb0 439 \\(.|\n) {
b1a4261e 440 char const *p = yytext + 1;
321fe707 441 /* Quote only if escaping won't make the character visible. */
bbbbe221 442 if (isspace ((unsigned char) *p) && isprint ((unsigned char) *p))
321fe707 443 p = quote (p);
b1a4261e
JD
444 else
445 p = quotearg_style_mem (escape_quoting_style, p, 1);
446 complain_at (*loc, _("invalid character after \\-escape: %s"), p);
e9955c83
AD
447 }
448}
449
4febdd96
PE
450 /*--------------------------------------------.
451 | Scanning user-code characters and strings. |
452 `--------------------------------------------*/
e9955c83 453
4febdd96
PE
454<SC_CHARACTER,SC_STRING>
455{
e9071366 456 {splice}|\\{splice}[^\n\[\]] STRING_GROW;
4febdd96 457}
e9955c83
AD
458
459<SC_CHARACTER>
460{
4febdd96
PE
461 "'" STRING_GROW; BEGIN context_state;
462 \n unexpected_newline (token_start, "'"); BEGIN context_state;
463 <<EOF>> unexpected_eof (token_start, "'"); BEGIN context_state;
e9955c83
AD
464}
465
e9955c83
AD
466<SC_STRING>
467{
4febdd96
PE
468 "\"" STRING_GROW; BEGIN context_state;
469 \n unexpected_newline (token_start, "\""); BEGIN context_state;
470 <<EOF>> unexpected_eof (token_start, "\""); BEGIN context_state;
e9955c83
AD
471}
472
473
474 /*---------------------------------------------------.
475 | Strings, comments etc. can be found in user code. |
476 `---------------------------------------------------*/
477
478<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
479{
3f2d73f1
PE
480 "'" {
481 STRING_GROW;
482 context_state = YY_START;
483 token_start = loc->start;
484 BEGIN SC_CHARACTER;
485 }
486 "\"" {
487 STRING_GROW;
488 context_state = YY_START;
489 token_start = loc->start;
490 BEGIN SC_STRING;
491 }
492 "/"{splice}"*" {
493 STRING_GROW;
494 context_state = YY_START;
495 token_start = loc->start;
496 BEGIN SC_COMMENT;
497 }
498 "/"{splice}"/" {
499 STRING_GROW;
500 context_state = YY_START;
501 BEGIN SC_LINE_COMMENT;
502 }
e9955c83
AD
503}
504
505
624a35e2 506
58d7a1a1
AD
507 /*-----------------------------------------------------------.
508 | Scanning some code in braces (actions). The initial "{" is |
509 | already eaten. |
510 `-----------------------------------------------------------*/
e9955c83
AD
511
512<SC_BRACED_CODE>
513{
41141c56
PE
514 "{"|"<"{splice}"%" STRING_GROW; braces_level++;
515 "%"{splice}">" STRING_GROW; braces_level--;
e9955c83 516 "}" {
25522739
PE
517 obstack_1grow (&obstack_for_string, '}');
518
2346344a
AD
519 --braces_level;
520 if (braces_level < 0)
e9955c83 521 {
41141c56 522 STRING_FINISH;
3f2d73f1 523 loc->start = code_start;
eb095650 524 val->code = last_string;
a706a1cc 525 BEGIN INITIAL;
58d7a1a1 526 return BRACED_CODE;
e9955c83
AD
527 }
528 }
529
a706a1cc
PE
530 /* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly
531 (as `<' `<%'). */
41141c56 532 "<"{splice}"<" STRING_GROW;
a706a1cc 533
47aee066
JD
534 <<EOF>> {
535 unexpected_eof (code_start, "}");
536 STRING_FINISH;
537 loc->start = code_start;
eb095650 538 val->code = last_string;
47aee066
JD
539 BEGIN INITIAL;
540 return BRACED_CODE;
541 }
e9955c83
AD
542}
543
544
545 /*--------------------------------------------------------------.
546 | Scanning some prologue: from "%{" (already scanned) to "%}". |
547 `--------------------------------------------------------------*/
548
549<SC_PROLOGUE>
550{
551 "%}" {
41141c56 552 STRING_FINISH;
3f2d73f1 553 loc->start = code_start;
223ff46e 554 val->chars = last_string;
a706a1cc 555 BEGIN INITIAL;
e9955c83
AD
556 return PROLOGUE;
557 }
558
47aee066
JD
559 <<EOF>> {
560 unexpected_eof (code_start, "%}");
561 STRING_FINISH;
562 loc->start = code_start;
563 val->chars = last_string;
564 BEGIN INITIAL;
565 return PROLOGUE;
566 }
e9955c83
AD
567}
568
569
570 /*---------------------------------------------------------------.
571 | Scanning the epilogue (everything after the second "%%", which |
d8d3f94a 572 | has already been eaten). |
e9955c83
AD
573 `---------------------------------------------------------------*/
574
575<SC_EPILOGUE>
576{
e9955c83 577 <<EOF>> {
41141c56 578 STRING_FINISH;
3f2d73f1 579 loc->start = code_start;
223ff46e 580 val->chars = last_string;
a706a1cc 581 BEGIN INITIAL;
e9955c83
AD
582 return EPILOGUE;
583 }
584}
585
586
4febdd96
PE
587 /*-----------------------------------------------------.
588 | By default, grow the string obstack with the input. |
589 `-----------------------------------------------------*/
590
591<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>. |
592<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>\n STRING_GROW;
593
e9955c83
AD
594%%
595
6c30d641
PE
596/* Read bytes from FP into buffer BUF of size SIZE. Return the
597 number of bytes read. Remove '\r' from input, treating \r\n
598 and isolated \r as \n. */
599
600static size_t
601no_cr_read (FILE *fp, char *buf, size_t size)
602{
a737b216
PE
603 size_t bytes_read = fread (buf, 1, size, fp);
604 if (bytes_read)
6c30d641 605 {
a737b216 606 char *w = memchr (buf, '\r', bytes_read);
6c30d641
PE
607 if (w)
608 {
609 char const *r = ++w;
a737b216 610 char const *lim = buf + bytes_read;
6c30d641
PE
611
612 for (;;)
613 {
614 /* Found an '\r'. Treat it like '\n', but ignore any
615 '\n' that immediately follows. */
616 w[-1] = '\n';
617 if (r == lim)
618 {
619 int ch = getc (fp);
620 if (ch != '\n' && ungetc (ch, fp) != ch)
621 break;
622 }
623 else if (*r == '\n')
624 r++;
625
626 /* Copy until the next '\r'. */
627 do
628 {
629 if (r == lim)
630 return w - buf;
631 }
632 while ((*w++ = *r++) != '\r');
633 }
634
635 return w - buf;
636 }
637 }
638
a737b216 639 return bytes_read;
6c30d641
PE
640}
641
642
f25bfb75 643
1452af69
PE
644/*------------------------------------------------------.
645| Scan NUMBER for a base-BASE integer at location LOC. |
646`------------------------------------------------------*/
647
648static unsigned long int
649scan_integer (char const *number, int base, location loc)
650{
4517da37
PE
651 verify (INT_MAX < ULONG_MAX);
652 unsigned long int num = strtoul (number, NULL, base);
653
654 if (INT_MAX < num)
1452af69
PE
655 {
656 complain_at (loc, _("integer out of range: %s"), quote (number));
657 num = INT_MAX;
658 }
4517da37 659
1452af69
PE
660 return num;
661}
662
663
d8d3f94a
PE
664/*------------------------------------------------------------------.
665| Convert universal character name UCN to a single-byte character, |
666| and return that character. Return -1 if UCN does not correspond |
667| to a single-byte character. |
668`------------------------------------------------------------------*/
669
670static int
671convert_ucn_to_byte (char const *ucn)
672{
4517da37
PE
673 verify (UCHAR_MAX <= INT_MAX);
674 unsigned long int code = strtoul (ucn + 2, NULL, 16);
d8d3f94a
PE
675
676 /* FIXME: Currently we assume Unicode-compatible unibyte characters
677 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
678 non-ASCII hosts we support only the portable C character set.
679 These limitations should be removed once we add support for
680 multibyte characters. */
681
682 if (UCHAR_MAX < code)
683 return -1;
684
685#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
686 {
687 /* A non-ASCII host. Use CODE to index into a table of the C
688 basic execution character set, which is guaranteed to exist on
689 all Standard C platforms. This table also includes '$', '@',
8e6ef483 690 and '`', which are not in the basic execution character set but
d8d3f94a
PE
691 which are unibyte characters on all the platforms that we know
692 about. */
693 static signed char const table[] =
694 {
695 '\0', -1, -1, -1, -1, -1, -1, '\a',
696 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
697 -1, -1, -1, -1, -1, -1, -1, -1,
698 -1, -1, -1, -1, -1, -1, -1, -1,
699 ' ', '!', '"', '#', '$', '%', '&', '\'',
700 '(', ')', '*', '+', ',', '-', '.', '/',
701 '0', '1', '2', '3', '4', '5', '6', '7',
702 '8', '9', ':', ';', '<', '=', '>', '?',
703 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
704 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
705 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
706 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
707 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
708 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
709 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
710 'x', 'y', 'z', '{', '|', '}', '~'
711 };
712
713 code = code < sizeof table ? table[code] : -1;
714 }
715#endif
c4d720cd 716
d8d3f94a
PE
717 return code;
718}
719
720
900c5db5
AD
721/*----------------------------------------------------------------.
722| Handle `#line INT "FILE"'. ARGS has already skipped `#line '. |
723`----------------------------------------------------------------*/
724
725static void
4517da37 726handle_syncline (char *args, location loc)
900c5db5 727{
4517da37
PE
728 char *after_num;
729 unsigned long int lineno = strtoul (args, &after_num, 10);
730 char *file = strchr (after_num, '"') + 1;
731 *strchr (file, '"') = '\0';
732 if (INT_MAX <= lineno)
733 {
734 warn_at (loc, _("line number overflow"));
735 lineno = INT_MAX;
736 }
e9071366 737 current_file = uniqstr_new (file);
0c8e079f 738 boundary_set (&scanner_cursor, current_file, lineno, 1);
4517da37
PE
739}
740
741
4febdd96
PE
742/*----------------------------------------------------------------.
743| For a token or comment starting at START, report message MSGID, |
744| which should say that an end marker was found before |
745| the expected TOKEN_END. |
746`----------------------------------------------------------------*/
747
748static void
749unexpected_end (boundary start, char const *msgid, char const *token_end)
750{
751 location loc;
752 loc.start = start;
753 loc.end = scanner_cursor;
754 complain_at (loc, _(msgid), token_end);
755}
756
757
3f2d73f1
PE
758/*------------------------------------------------------------------------.
759| Report an unexpected EOF in a token or comment starting at START. |
760| An end of file was encountered and the expected TOKEN_END was missing. |
3f2d73f1 761`------------------------------------------------------------------------*/
a706a1cc
PE
762
763static void
aa418041 764unexpected_eof (boundary start, char const *token_end)
a706a1cc 765{
4febdd96
PE
766 unexpected_end (start, N_("missing `%s' at end of file"), token_end);
767}
768
769
770/*----------------------------------------.
771| Likewise, but for unexpected newlines. |
772`----------------------------------------*/
773
774static void
775unexpected_newline (boundary start, char const *token_end)
776{
777 unexpected_end (start, N_("missing `%s' at end of line"), token_end);
a706a1cc
PE
778}
779
780
f25bfb75
AD
781/*-------------------------.
782| Initialize the scanner. |
783`-------------------------*/
784
1d6412ad 785void
e9071366 786gram_scanner_initialize (void)
1d6412ad 787{
223ff46e 788 obstack_init (&obstack_for_string);
1d6412ad
AD
789}
790
791
f25bfb75
AD
792/*-----------------------------------------------.
793| Free all the memory allocated to the scanner. |
794`-----------------------------------------------*/
795
4cdb01db 796void
e9071366 797gram_scanner_free (void)
4cdb01db 798{
223ff46e 799 obstack_free (&obstack_for_string, 0);
536545f3 800 /* Reclaim Flex's buffers. */
580b8926 801 yylex_destroy ();
4cdb01db 802}