]> git.saurik.com Git - bison.git/blame - src/scan-gram.l
Regenerate.
[bison.git] / src / scan-gram.l
CommitLineData
e9955c83 1/* Bison Grammar Scanner -*- C -*-
3b1e470c 2
e9955c83
AD
3 Copyright (C) 2002 Free Software Foundation, Inc.
4
5 This file is part of Bison, the GNU Compiler Compiler.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
20 02111-1307 USA
21*/
22
a706a1cc 23%option debug nodefault noyywrap never-interactive
e9955c83
AD
24%option prefix="gram_" outfile="lex.yy.c"
25
26%{
27#include "system.h"
223ff46e
PE
28
29#include <mbswidth.h>
30#include <get-errno.h>
31#include <quote.h>
32
e9955c83 33#include "complain.h"
3f2d73f1 34#include "files.h"
e9955c83
AD
35#include "getargs.h"
36#include "gram.h"
37#include "reader.h"
223ff46e 38#include "uniqstr.h"
e9955c83 39
3f2d73f1
PE
40#define YY_USER_INIT \
41 do \
42 { \
43 scanner_cursor.file = current_file; \
44 scanner_cursor.line = 1; \
45 scanner_cursor.column = 1; \
46 } \
47 while (0)
8efe435c 48
3f2d73f1
PE
49/* Location of scanner cursor. */
50boundary scanner_cursor;
41141c56 51
223ff46e 52static void adjust_location (location *, char const *, size_t);
3f2d73f1 53#define YY_USER_ACTION adjust_location (loc, yytext, yyleng);
d8d3f94a 54
6c30d641 55static size_t no_cr_read (FILE *, char *, size_t);
d8d3f94a
PE
56#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
57
58
223ff46e 59/* OBSTACK_FOR_STRING -- Used to store all the characters that we need to
44995b2e
AD
60 keep (to construct ID, STRINGS etc.). Use the following macros to
61 use it.
62
41141c56
PE
63 Use STRING_GROW to append what has just been matched, and
64 STRING_FINISH to end the string (it puts the ending 0).
65 STRING_FINISH also stores this string in LAST_STRING, which can be
66 used, and which is used by STRING_FREE to free the last string. */
44995b2e 67
223ff46e 68static struct obstack obstack_for_string;
44995b2e 69
7ec2d4cd
AD
70/* A string representing the most recently saved token. */
71static char *last_string;
72
73
41141c56 74#define STRING_GROW \
223ff46e 75 obstack_grow (&obstack_for_string, yytext, yyleng)
44995b2e 76
41141c56 77#define STRING_FINISH \
44995b2e 78 do { \
223ff46e
PE
79 obstack_1grow (&obstack_for_string, '\0'); \
80 last_string = obstack_finish (&obstack_for_string); \
44995b2e
AD
81 } while (0)
82
41141c56 83#define STRING_FREE \
223ff46e 84 obstack_free (&obstack_for_string, last_string)
e9955c83 85
7ec2d4cd
AD
86void
87scanner_last_string_free (void)
88{
41141c56 89 STRING_FREE;
7ec2d4cd 90}
e9955c83 91
efcb44dd
PE
92/* Within well-formed rules, RULE_LENGTH is the number of values in
93 the current rule so far, which says where to find `$0' with respect
94 to the top of the stack. It is not the same as the rule->length in
95 the case of mid rule actions.
96
97 Outside of well-formed rules, RULE_LENGTH has an undefined value. */
98static int rule_length;
99
624a35e2
PE
100static void handle_dollar (int token_type, char *cp, location loc);
101static void handle_at (int token_type, char *cp, location loc);
3f2d73f1 102static void handle_syncline (char *args);
d8d3f94a 103static int convert_ucn_to_byte (char const *hex_text);
3f2d73f1 104static void unexpected_end_of_file (boundary, char const *);
e9955c83
AD
105
106%}
d8d3f94a 107%x SC_COMMENT SC_LINE_COMMENT SC_YACC_COMMENT
e9955c83 108%x SC_STRING SC_CHARACTER
3f2d73f1 109%x SC_AFTER_IDENTIFIER
e9955c83 110%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
624a35e2 111%x SC_PRE_CODE SC_BRACED_CODE SC_PROLOGUE SC_EPILOGUE
e9955c83 112
29c01725
AD
113letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
114id {letter}({letter}|[0-9])*
115directive %{letter}({letter}|[0-9]|-)*
624a35e2 116int [0-9]+
d8d3f94a
PE
117
118/* POSIX says that a tag must be both an id and a C union member, but
119 historically almost any character is allowed in a tag. We disallow
120 NUL and newline, as this simplifies our implementation. */
121tag [^\0\n>]+
122
123/* Zero or more instances of backslash-newline. Following GCC, allow
124 white space between the backslash and the newline. */
125splice (\\[ \f\t\v]*\n)*
e9955c83
AD
126
127%%
128%{
a706a1cc 129 /* Nesting level of the current code in braces. */
1a9e39f1
PE
130 int braces_level IF_LINT (= 0);
131
3f2d73f1
PE
132 /* Parent context state, when applicable. */
133 int context_state IF_LINT (= 0);
a706a1cc 134
624a35e2
PE
135 /* Token type to return, when applicable. */
136 int token_type IF_LINT (= 0);
137
3f2d73f1 138 /* Location of most recent identifier, when applicable. */
223ff46e 139 location id_loc IF_LINT (= *loc);
3f2d73f1 140
223ff46e 141 /* Where containing code started, when applicable. */
3f2d73f1
PE
142 boundary code_start IF_LINT (= loc->start);
143
223ff46e
PE
144 /* Where containing comment or string or character literal started,
145 when applicable. */
3f2d73f1 146 boundary token_start IF_LINT (= loc->start);
e9955c83
AD
147%}
148
149
3f2d73f1
PE
150 /*-----------------------.
151 | Scanning white space. |
152 `-----------------------*/
153
624a35e2 154<INITIAL,SC_AFTER_IDENTIFIER,SC_PRE_CODE>
3f2d73f1
PE
155{
156 [ \f\n\t\v] ;
157
158 /* Comments. */
159 "/*" token_start = loc->start; context_state = YY_START; BEGIN SC_YACC_COMMENT;
160 "//".* ;
161
162 /* #line directives are not documented, and may be withdrawn or
163 modified in future versions of Bison. */
164 ^"#line "{int}" \"".*"\"\n" {
165 handle_syncline (yytext + sizeof "#line " - 1);
166 }
167}
168
169
e9955c83
AD
170 /*----------------------------.
171 | Scanning Bison directives. |
172 `----------------------------*/
173<INITIAL>
174{
175 "%binary" return PERCENT_NONASSOC;
176 "%debug" return PERCENT_DEBUG;
177 "%define" return PERCENT_DEFINE;
178 "%defines" return PERCENT_DEFINES;
624a35e2 179 "%destructor" token_type = PERCENT_DESTRUCTOR; BEGIN SC_PRE_CODE;
676385e2 180 "%dprec" return PERCENT_DPREC;
e9955c83
AD
181 "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
182 "%expect" return PERCENT_EXPECT;
183 "%file-prefix" return PERCENT_FILE_PREFIX;
184 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
ae7453f2 185 "%glr-parser" return PERCENT_GLR_PARSER;
e9955c83 186 "%left" return PERCENT_LEFT;
624a35e2 187 "%lex-param" token_type = PERCENT_LEX_PARAM; BEGIN SC_PRE_CODE;
e9955c83 188 "%locations" return PERCENT_LOCATIONS;
676385e2 189 "%merge" return PERCENT_MERGE;
e9955c83
AD
190 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
191 "%no"[-_]"lines" return PERCENT_NO_LINES;
192 "%nonassoc" return PERCENT_NONASSOC;
193 "%nterm" return PERCENT_NTERM;
194 "%output" return PERCENT_OUTPUT;
624a35e2 195 "%parse-param" token_type = PERCENT_PARSE_PARAM; BEGIN SC_PRE_CODE;
d8d3f94a 196 "%prec" rule_length--; return PERCENT_PREC;
624a35e2 197 "%printer" token_type = PERCENT_PRINTER; BEGIN SC_PRE_CODE;
e9955c83
AD
198 "%pure"[-_]"parser" return PERCENT_PURE_PARSER;
199 "%right" return PERCENT_RIGHT;
200 "%skeleton" return PERCENT_SKELETON;
201 "%start" return PERCENT_START;
202 "%term" return PERCENT_TOKEN;
203 "%token" return PERCENT_TOKEN;
204 "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
205 "%type" return PERCENT_TYPE;
624a35e2 206 "%union" token_type = PERCENT_UNION; BEGIN SC_PRE_CODE;
e9955c83
AD
207 "%verbose" return PERCENT_VERBOSE;
208 "%yacc" return PERCENT_YACC;
209
3f2d73f1 210 {directive} {
41141c56 211 complain_at (*loc, _("invalid directive: %s"), quote (yytext));
412f8a59 212 }
900c5db5 213
e9955c83 214 "=" return EQUAL;
d8d3f94a 215 "|" rule_length = 0; return PIPE;
e9955c83
AD
216 ";" return SEMICOLON;
217
763ed7a6
PE
218 "," {
219 warn_at (*loc, _("stray `,' treated as white space"));
763ed7a6
PE
220 }
221
3f2d73f1 222 {id} {
41141c56 223 val->symbol = symbol_get (yytext, *loc);
3f2d73f1 224 id_loc = *loc;
efcb44dd 225 rule_length++;
3f2d73f1 226 BEGIN SC_AFTER_IDENTIFIER;
e9955c83
AD
227 }
228
d8d3f94a
PE
229 {int} {
230 unsigned long num;
223ff46e 231 set_errno (0);
d8d3f94a 232 num = strtoul (yytext, 0, 10);
223ff46e 233 if (INT_MAX < num || get_errno ())
d8d3f94a 234 {
41141c56 235 complain_at (*loc, _("integer out of range: %s"), quote (yytext));
d8d3f94a
PE
236 num = INT_MAX;
237 }
41141c56 238 val->integer = num;
d8d3f94a
PE
239 return INT;
240 }
e9955c83
AD
241
242 /* Characters. We don't check there is only one. */
3f2d73f1 243 "'" STRING_GROW; token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
e9955c83
AD
244
245 /* Strings. */
3f2d73f1 246 "\"" STRING_GROW; token_start = loc->start; BEGIN SC_ESCAPED_STRING;
e9955c83
AD
247
248 /* Prologue. */
3f2d73f1 249 "%{" code_start = loc->start; BEGIN SC_PROLOGUE;
e9955c83
AD
250
251 /* Code in between braces. */
3f2d73f1
PE
252 "{" {
253 STRING_GROW;
624a35e2 254 token_type = BRACED_CODE;
3f2d73f1
PE
255 braces_level = 0;
256 code_start = loc->start;
257 BEGIN SC_BRACED_CODE;
258 }
e9955c83
AD
259
260 /* A type. */
d8d3f94a 261 "<"{tag}">" {
223ff46e 262 obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
41141c56 263 STRING_FINISH;
223ff46e 264 val->uniqstr = uniqstr_new (last_string);
41141c56 265 STRING_FREE;
4cdb01db
AD
266 return TYPE;
267 }
268
a706a1cc
PE
269 "%%" {
270 static int percent_percent_count;
e9955c83 271 if (++percent_percent_count == 2)
3f2d73f1
PE
272 {
273 code_start = loc->start;
274 BEGIN SC_EPILOGUE;
275 }
e9955c83
AD
276 return PERCENT_PERCENT;
277 }
278
a706a1cc 279 . {
41141c56 280 complain_at (*loc, _("invalid character: %s"), quote (yytext));
3f2d73f1
PE
281 }
282}
283
284
285 /*-----------------------------------------------------------------.
286 | Scanning after an identifier, checking whether a colon is next. |
287 `-----------------------------------------------------------------*/
288
289<SC_AFTER_IDENTIFIER>
290{
291 ":" {
292 rule_length = 0;
293 *loc = id_loc;
294 BEGIN INITIAL;
295 return ID_COLON;
296 }
297 . {
298 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
299 yyless (0);
300 *loc = id_loc;
301 BEGIN INITIAL;
302 return ID;
303 }
304 <<EOF>> {
305 *loc = id_loc;
306 BEGIN INITIAL;
307 return ID;
e9955c83
AD
308 }
309}
310
311
d8d3f94a
PE
312 /*---------------------------------------------------------------.
313 | Scanning a Yacc comment. The initial `/ *' is already eaten. |
314 `---------------------------------------------------------------*/
e9955c83 315
d8d3f94a 316<SC_YACC_COMMENT>
e9955c83 317{
3f2d73f1 318 "*/" BEGIN context_state;
a706a1cc 319 .|\n ;
3f2d73f1 320 <<EOF>> unexpected_end_of_file (token_start, "*/");
d8d3f94a
PE
321}
322
323
324 /*------------------------------------------------------------.
325 | Scanning a C comment. The initial `/ *' is already eaten. |
326 `------------------------------------------------------------*/
327
328<SC_COMMENT>
329{
3f2d73f1
PE
330 "*"{splice}"/" STRING_GROW; BEGIN context_state;
331 <<EOF>> unexpected_end_of_file (token_start, "*/");
e9955c83
AD
332}
333
334
d8d3f94a
PE
335 /*--------------------------------------------------------------.
336 | Scanning a line comment. The initial `//' is already eaten. |
337 `--------------------------------------------------------------*/
338
339<SC_LINE_COMMENT>
340{
3f2d73f1 341 "\n" STRING_GROW; BEGIN context_state;
41141c56 342 {splice} STRING_GROW;
3f2d73f1 343 <<EOF>> BEGIN context_state;
d8d3f94a
PE
344}
345
346
e9955c83
AD
347 /*----------------------------------------------------------------.
348 | Scanning a C string, including its escapes. The initial `"' is |
349 | already eaten. |
350 `----------------------------------------------------------------*/
351
352<SC_ESCAPED_STRING>
353{
db2cc12f 354 "\"" {
41141c56
PE
355 STRING_GROW;
356 STRING_FINISH;
3f2d73f1 357 loc->start = token_start;
223ff46e 358 val->chars = last_string;
efcb44dd 359 rule_length++;
a706a1cc 360 BEGIN INITIAL;
e9955c83
AD
361 return STRING;
362 }
363
41141c56 364 .|\n STRING_GROW;
3f2d73f1 365 <<EOF>> unexpected_end_of_file (token_start, "\"");
e9955c83
AD
366}
367
368 /*---------------------------------------------------------------.
369 | Scanning a C character, decoding its escapes. The initial "'" |
370 | is already eaten. |
371 `---------------------------------------------------------------*/
372
373<SC_ESCAPED_CHARACTER>
374{
db2cc12f 375 "'" {
3b1e470c 376 unsigned char last_string_1;
41141c56
PE
377 STRING_GROW;
378 STRING_FINISH;
3f2d73f1 379 loc->start = token_start;
41141c56
PE
380 val->symbol = symbol_get (last_string, *loc);
381 symbol_class_set (val->symbol, token_sym, *loc);
3b1e470c
PE
382 last_string_1 = last_string[1];
383 symbol_user_token_number_set (val->symbol, last_string_1, *loc);
41141c56 384 STRING_FREE;
a706a1cc
PE
385 rule_length++;
386 BEGIN INITIAL;
387 return ID;
e9955c83 388 }
a706a1cc 389
41141c56 390 .|\n STRING_GROW;
3f2d73f1 391 <<EOF>> unexpected_end_of_file (token_start, "'");
e9955c83
AD
392}
393
394
395 /*----------------------------.
396 | Decode escaped characters. |
397 `----------------------------*/
398
399<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
400{
d8d3f94a
PE
401 \\[0-7]{1,3} {
402 unsigned long c = strtoul (yytext + 1, 0, 8);
403 if (UCHAR_MAX < c)
3f2d73f1 404 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
e9955c83 405 else
223ff46e 406 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
407 }
408
6b0d38ab 409 \\x[0-9abcdefABCDEF]+ {
d8d3f94a 410 unsigned long c;
223ff46e 411 set_errno (0);
d8d3f94a 412 c = strtoul (yytext + 2, 0, 16);
223ff46e 413 if (UCHAR_MAX < c || get_errno ())
3f2d73f1 414 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
d8d3f94a 415 else
223ff46e 416 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
417 }
418
223ff46e
PE
419 \\a obstack_1grow (&obstack_for_string, '\a');
420 \\b obstack_1grow (&obstack_for_string, '\b');
421 \\f obstack_1grow (&obstack_for_string, '\f');
422 \\n obstack_1grow (&obstack_for_string, '\n');
423 \\r obstack_1grow (&obstack_for_string, '\r');
424 \\t obstack_1grow (&obstack_for_string, '\t');
425 \\v obstack_1grow (&obstack_for_string, '\v');
412f8a59
PE
426
427 /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
223ff46e 428 \\("\""|"'"|"?"|"\\") obstack_1grow (&obstack_for_string, yytext[1]);
412f8a59 429
6b0d38ab 430 \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
d8d3f94a
PE
431 int c = convert_ucn_to_byte (yytext);
432 if (c < 0)
3f2d73f1 433 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
d8d3f94a 434 else
223ff46e 435 obstack_1grow (&obstack_for_string, c);
d8d3f94a 436 }
4f25ebb0 437 \\(.|\n) {
3f2d73f1 438 complain_at (*loc, _("unrecognized escape sequence: %s"), quote (yytext));
41141c56 439 STRING_GROW;
e9955c83
AD
440 }
441}
442
443
444 /*----------------------------------------------------------.
445 | Scanning a C character without decoding its escapes. The |
446 | initial "'" is already eaten. |
447 `----------------------------------------------------------*/
448
449<SC_CHARACTER>
450{
3f2d73f1 451 "'" STRING_GROW; BEGIN context_state;
41141c56 452 \\{splice}[^$@\[\]] STRING_GROW;
3f2d73f1 453 <<EOF>> unexpected_end_of_file (token_start, "'");
e9955c83
AD
454}
455
456
457 /*----------------------------------------------------------------.
458 | Scanning a C string, without decoding its escapes. The initial |
459 | `"' is already eaten. |
460 `----------------------------------------------------------------*/
461
462<SC_STRING>
463{
3f2d73f1 464 "\"" STRING_GROW; BEGIN context_state;
41141c56 465 \\{splice}[^$@\[\]] STRING_GROW;
3f2d73f1 466 <<EOF>> unexpected_end_of_file (token_start, "\"");
e9955c83
AD
467}
468
469
470 /*---------------------------------------------------.
471 | Strings, comments etc. can be found in user code. |
472 `---------------------------------------------------*/
473
474<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
475{
3f2d73f1
PE
476 "'" {
477 STRING_GROW;
478 context_state = YY_START;
479 token_start = loc->start;
480 BEGIN SC_CHARACTER;
481 }
482 "\"" {
483 STRING_GROW;
484 context_state = YY_START;
485 token_start = loc->start;
486 BEGIN SC_STRING;
487 }
488 "/"{splice}"*" {
489 STRING_GROW;
490 context_state = YY_START;
491 token_start = loc->start;
492 BEGIN SC_COMMENT;
493 }
494 "/"{splice}"/" {
495 STRING_GROW;
496 context_state = YY_START;
497 BEGIN SC_LINE_COMMENT;
498 }
e9955c83
AD
499}
500
501
624a35e2
PE
502 /*---------------------------------------------------------------.
503 | Scanning after %union etc., possibly followed by white space. |
504 | For %union only, allow arbitrary C code to appear before the |
505 | following brace, as an extension to POSIX. |
506 `---------------------------------------------------------------*/
507
508<SC_PRE_CODE>
509{
510 . {
511 bool valid = yytext[0] == '{' || token_type == PERCENT_UNION;
512 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
513 yyless (0);
514
515 if (valid)
516 {
517 braces_level = -1;
518 code_start = loc->start;
519 BEGIN SC_BRACED_CODE;
520 }
521 else
522 {
523 complain_at (*loc, _("missing `{' in `%s'"),
524 token_name (token_type));
525 obstack_sgrow (&obstack_for_string, "{}");
526 STRING_FINISH;
527 val->chars = last_string;
528 BEGIN INITIAL;
529 return token_type;
530 }
531 }
532}
533
534
e9955c83
AD
535 /*---------------------------------------------------------------.
536 | Scanning some code in braces (%union and actions). The initial |
537 | "{" is already eaten. |
538 `---------------------------------------------------------------*/
539
540<SC_BRACED_CODE>
541{
41141c56
PE
542 "{"|"<"{splice}"%" STRING_GROW; braces_level++;
543 "%"{splice}">" STRING_GROW; braces_level--;
e9955c83 544 "}" {
41141c56 545 STRING_GROW;
1a9e39f1
PE
546 braces_level--;
547 if (braces_level < 0)
e9955c83 548 {
41141c56 549 STRING_FINISH;
624a35e2 550 rule_length++;
3f2d73f1 551 loc->start = code_start;
223ff46e 552 val->chars = last_string;
a706a1cc 553 BEGIN INITIAL;
624a35e2 554 return token_type;
e9955c83
AD
555 }
556 }
557
a706a1cc
PE
558 /* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly
559 (as `<' `<%'). */
41141c56 560 "<"{splice}"<" STRING_GROW;
a706a1cc 561
624a35e2
PE
562 "$"("<"{tag}">")?(-?[0-9]+|"$") handle_dollar (token_type, yytext, *loc);
563 "@"(-?[0-9]+|"$") handle_at (token_type, yytext, *loc);
e9955c83 564
3f2d73f1 565 <<EOF>> unexpected_end_of_file (code_start, "}");
e9955c83
AD
566}
567
568
569 /*--------------------------------------------------------------.
570 | Scanning some prologue: from "%{" (already scanned) to "%}". |
571 `--------------------------------------------------------------*/
572
573<SC_PROLOGUE>
574{
575 "%}" {
41141c56 576 STRING_FINISH;
3f2d73f1 577 loc->start = code_start;
223ff46e 578 val->chars = last_string;
a706a1cc 579 BEGIN INITIAL;
e9955c83
AD
580 return PROLOGUE;
581 }
582
3f2d73f1 583 <<EOF>> unexpected_end_of_file (code_start, "%}");
e9955c83
AD
584}
585
586
587 /*---------------------------------------------------------------.
588 | Scanning the epilogue (everything after the second "%%", which |
d8d3f94a 589 | has already been eaten). |
e9955c83
AD
590 `---------------------------------------------------------------*/
591
592<SC_EPILOGUE>
593{
e9955c83 594 <<EOF>> {
41141c56 595 STRING_FINISH;
3f2d73f1 596 loc->start = code_start;
223ff46e 597 val->chars = last_string;
a706a1cc 598 BEGIN INITIAL;
e9955c83
AD
599 return EPILOGUE;
600 }
601}
602
603
a706a1cc
PE
604 /*----------------------------------------------------------------.
605 | By default, grow the string obstack with the input, escaping M4 |
606 | quoting characters. |
607 `----------------------------------------------------------------*/
608
609<SC_COMMENT,SC_LINE_COMMENT,SC_STRING,SC_CHARACTER,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
610{
223ff46e
PE
611 \$ obstack_sgrow (&obstack_for_string, "$][");
612 \@ obstack_sgrow (&obstack_for_string, "@@");
613 \[ obstack_sgrow (&obstack_for_string, "@{");
614 \] obstack_sgrow (&obstack_for_string, "@}");
41141c56 615 .|\n STRING_GROW;
a706a1cc
PE
616}
617
618
e9955c83
AD
619%%
620
3f2d73f1
PE
621/* Set *LOC and adjust scanner cursor to account for token TOKEN of
622 size SIZE. */
6c30d641
PE
623
624static void
223ff46e 625adjust_location (location *loc, char const *token, size_t size)
6c30d641 626{
3f2d73f1
PE
627 int line = scanner_cursor.line;
628 int column = scanner_cursor.column;
6c30d641
PE
629 char const *p0 = token;
630 char const *p = token;
631 char const *lim = token + size;
632
3f2d73f1
PE
633 loc->start = scanner_cursor;
634
6c30d641
PE
635 for (p = token; p < lim; p++)
636 switch (*p)
637 {
6c30d641
PE
638 case '\n':
639 line++;
640 column = 1;
641 p0 = p + 1;
642 break;
643
644 case '\t':
645 column += mbsnwidth (p0, p - p0, 0);
646 column += 8 - ((column - 1) & 7);
647 p0 = p + 1;
648 break;
649 }
650
3f2d73f1
PE
651 scanner_cursor.line = line;
652 scanner_cursor.column = column + mbsnwidth (p0, p - p0, 0);
653
654 loc->end = scanner_cursor;
6c30d641
PE
655}
656
657
658/* Read bytes from FP into buffer BUF of size SIZE. Return the
659 number of bytes read. Remove '\r' from input, treating \r\n
660 and isolated \r as \n. */
661
662static size_t
663no_cr_read (FILE *fp, char *buf, size_t size)
664{
665 size_t s = fread (buf, 1, size, fp);
666 if (s)
667 {
668 char *w = memchr (buf, '\r', s);
669 if (w)
670 {
671 char const *r = ++w;
672 char const *lim = buf + s;
673
674 for (;;)
675 {
676 /* Found an '\r'. Treat it like '\n', but ignore any
677 '\n' that immediately follows. */
678 w[-1] = '\n';
679 if (r == lim)
680 {
681 int ch = getc (fp);
682 if (ch != '\n' && ungetc (ch, fp) != ch)
683 break;
684 }
685 else if (*r == '\n')
686 r++;
687
688 /* Copy until the next '\r'. */
689 do
690 {
691 if (r == lim)
692 return w - buf;
693 }
694 while ((*w++ = *r++) != '\r');
695 }
696
697 return w - buf;
698 }
699 }
700
701 return s;
702}
703
704
e9955c83 705/*------------------------------------------------------------------.
366eea36 706| TEXT is pointing to a wannabee semantic value (i.e., a `$'). |
e9955c83
AD
707| |
708| Possible inputs: $[<TYPENAME>]($|integer) |
709| |
223ff46e 710| Output to OBSTACK_FOR_STRING a reference to this semantic value. |
e9955c83
AD
711`------------------------------------------------------------------*/
712
624a35e2 713static inline bool
223ff46e 714handle_action_dollar (char *text, location loc)
e9955c83
AD
715{
716 const char *type_name = NULL;
366eea36 717 char *cp = text + 1;
e9955c83 718
624a35e2
PE
719 if (! current_rule)
720 return false;
721
e9955c83
AD
722 /* Get the type name if explicit. */
723 if (*cp == '<')
724 {
725 type_name = ++cp;
726 while (*cp != '>')
727 ++cp;
728 *cp = '\0';
729 ++cp;
730 }
731
732 if (*cp == '$')
733 {
734 if (!type_name)
223ff46e 735 type_name = symbol_list_n_type_name_get (current_rule, loc, 0);
e9955c83 736 if (!type_name && typed)
223ff46e 737 complain_at (loc, _("$$ of `%s' has no declared type"),
97650f4e 738 current_rule->sym->tag);
e9955c83
AD
739 if (!type_name)
740 type_name = "";
223ff46e 741 obstack_fgrow1 (&obstack_for_string,
e9955c83
AD
742 "]b4_lhs_value([%s])[", type_name);
743 }
d8d3f94a 744 else
e9955c83 745 {
d8d3f94a 746 long num;
223ff46e 747 set_errno (0);
d8d3f94a 748 num = strtol (cp, 0, 10);
e9955c83 749
223ff46e 750 if (INT_MIN <= num && num <= rule_length && ! get_errno ())
e9955c83 751 {
d8d3f94a 752 int n = num;
e9955c83 753 if (!type_name && n > 0)
223ff46e 754 type_name = symbol_list_n_type_name_get (current_rule, loc, n);
e9955c83 755 if (!type_name && typed)
223ff46e
PE
756 complain_at (loc, _("$%d of `%s' has no declared type"),
757 n, current_rule->sym->tag);
e9955c83
AD
758 if (!type_name)
759 type_name = "";
223ff46e 760 obstack_fgrow3 (&obstack_for_string,
e9955c83
AD
761 "]b4_rhs_value([%d], [%d], [%s])[",
762 rule_length, n, type_name);
763 }
d8d3f94a 764 else
223ff46e 765 complain_at (loc, _("integer out of range: %s"), quote (text));
9280d3ef 766 }
9280d3ef 767
624a35e2 768 return true;
e9955c83
AD
769}
770
f25bfb75
AD
771
772/*-----------------------------------------------------------------.
773| Dispatch onto handle_action_dollar, or handle_destructor_dollar, |
624a35e2 774| depending upon TOKEN_TYPE. |
f25bfb75 775`-----------------------------------------------------------------*/
e9955c83
AD
776
777static void
624a35e2 778handle_dollar (int token_type, char *text, location loc)
f25bfb75 779{
624a35e2 780 switch (token_type)
f25bfb75 781 {
624a35e2
PE
782 case BRACED_CODE:
783 if (handle_action_dollar (text, loc))
784 return;
f25bfb75
AD
785 break;
786
624a35e2
PE
787 case PERCENT_DESTRUCTOR:
788 case PERCENT_PRINTER:
789 if (text[1] == '$')
790 {
791 obstack_sgrow (&obstack_for_string, "]b4_dollar_dollar[");
792 return;
793 }
794 break;
795
796 default:
f25bfb75
AD
797 break;
798 }
624a35e2
PE
799
800 complain_at (loc, _("invalid value: %s"), quote (text));
f25bfb75
AD
801}
802
803
804/*------------------------------------------------------.
805| TEXT is a location token (i.e., a `@...'). Output to |
223ff46e 806| OBSTACK_FOR_STRING a reference to this location. |
f25bfb75
AD
807`------------------------------------------------------*/
808
624a35e2 809static inline bool
223ff46e 810handle_action_at (char *text, location loc)
e9955c83 811{
366eea36 812 char *cp = text + 1;
e9955c83 813 locations_flag = 1;
e9955c83 814
624a35e2
PE
815 if (! current_rule)
816 return false;
817
366eea36 818 if (*cp == '$')
624a35e2 819 obstack_sgrow (&obstack_for_string, "]b4_lhs_location[");
d8d3f94a 820 else
e9955c83 821 {
d8d3f94a 822 long num;
223ff46e 823 set_errno (0);
d8d3f94a 824 num = strtol (cp, 0, 10);
dafdc66f 825
223ff46e 826 if (INT_MIN <= num && num <= rule_length && ! get_errno ())
d8d3f94a
PE
827 {
828 int n = num;
223ff46e 829 obstack_fgrow2 (&obstack_for_string, "]b4_rhs_location([%d], [%d])[",
d8d3f94a
PE
830 rule_length, n);
831 }
e9955c83 832 else
223ff46e 833 complain_at (loc, _("integer out of range: %s"), quote (text));
f25bfb75 834 }
f25bfb75 835
624a35e2 836 return true;
e9955c83 837}
4cdb01db 838
f25bfb75
AD
839
840/*-------------------------------------------------------------------.
841| Dispatch onto handle_action_at, or handle_destructor_at, depending |
842| upon CODE_KIND. |
843`-------------------------------------------------------------------*/
844
845static void
624a35e2 846handle_at (int token_type, char *text, location loc)
f25bfb75 847{
624a35e2 848 switch (token_type)
f25bfb75 849 {
624a35e2 850 case BRACED_CODE:
223ff46e 851 handle_action_at (text, loc);
624a35e2
PE
852 return;
853
854 case PERCENT_DESTRUCTOR:
855 case PERCENT_PRINTER:
856 if (text[1] == '$')
857 {
858 obstack_sgrow (&obstack_for_string, "]b4_at_dollar[");
859 return;
860 }
f25bfb75
AD
861 break;
862
624a35e2 863 default:
f25bfb75
AD
864 break;
865 }
624a35e2
PE
866
867 complain_at (loc, _("invalid value: %s"), quote (text));
f25bfb75
AD
868}
869
870
d8d3f94a
PE
871/*------------------------------------------------------------------.
872| Convert universal character name UCN to a single-byte character, |
873| and return that character. Return -1 if UCN does not correspond |
874| to a single-byte character. |
875`------------------------------------------------------------------*/
876
877static int
878convert_ucn_to_byte (char const *ucn)
879{
880 unsigned long code = strtoul (ucn + 2, 0, 16);
881
882 /* FIXME: Currently we assume Unicode-compatible unibyte characters
883 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
884 non-ASCII hosts we support only the portable C character set.
885 These limitations should be removed once we add support for
886 multibyte characters. */
887
888 if (UCHAR_MAX < code)
889 return -1;
890
891#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
892 {
893 /* A non-ASCII host. Use CODE to index into a table of the C
894 basic execution character set, which is guaranteed to exist on
895 all Standard C platforms. This table also includes '$', '@',
8e6ef483 896 and '`', which are not in the basic execution character set but
d8d3f94a
PE
897 which are unibyte characters on all the platforms that we know
898 about. */
899 static signed char const table[] =
900 {
901 '\0', -1, -1, -1, -1, -1, -1, '\a',
902 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
903 -1, -1, -1, -1, -1, -1, -1, -1,
904 -1, -1, -1, -1, -1, -1, -1, -1,
905 ' ', '!', '"', '#', '$', '%', '&', '\'',
906 '(', ')', '*', '+', ',', '-', '.', '/',
907 '0', '1', '2', '3', '4', '5', '6', '7',
908 '8', '9', ':', ';', '<', '=', '>', '?',
909 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
910 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
911 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
912 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
913 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
914 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
915 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
916 'x', 'y', 'z', '{', '|', '}', '~'
917 };
918
919 code = code < sizeof table ? table[code] : -1;
920 }
921#endif
c4d720cd 922
d8d3f94a
PE
923 return code;
924}
925
926
900c5db5
AD
927/*----------------------------------------------------------------.
928| Handle `#line INT "FILE"'. ARGS has already skipped `#line '. |
929`----------------------------------------------------------------*/
930
931static void
3f2d73f1 932handle_syncline (char *args)
900c5db5
AD
933{
934 int lineno = strtol (args, &args, 10);
935 const char *file = NULL;
936 file = strchr (args, '"') + 1;
937 *strchr (file, '"') = 0;
3f2d73f1
PE
938 scanner_cursor.file = current_file = xstrdup (file);
939 scanner_cursor.line = lineno;
940 scanner_cursor.column = 1;
900c5db5
AD
941}
942
a706a1cc 943
3f2d73f1
PE
944/*------------------------------------------------------------------------.
945| Report an unexpected EOF in a token or comment starting at START. |
946| An end of file was encountered and the expected TOKEN_END was missing. |
947| After reporting the problem, pretend that TOKEN_END was found. |
948`------------------------------------------------------------------------*/
a706a1cc
PE
949
950static void
3f2d73f1 951unexpected_end_of_file (boundary start, char const *token_end)
a706a1cc 952{
345532d7 953 size_t i = strlen (token_end);
a706a1cc 954
223ff46e
PE
955 location loc;
956 loc.start = start;
957 loc.end = scanner_cursor;
958 complain_at (loc, _("missing `%s' at end of file"), token_end);
345532d7 959
3f2d73f1
PE
960 /* Adjust scanner cursor so that any later message does not count
961 the characters about to be inserted. */
962 scanner_cursor.column -= i;
345532d7
PE
963
964 while (i != 0)
965 unput (token_end[--i]);
a706a1cc
PE
966}
967
968
f25bfb75
AD
969/*-------------------------.
970| Initialize the scanner. |
971`-------------------------*/
972
1d6412ad
AD
973void
974scanner_initialize (void)
975{
223ff46e 976 obstack_init (&obstack_for_string);
1d6412ad
AD
977}
978
979
f25bfb75
AD
980/*-----------------------------------------------.
981| Free all the memory allocated to the scanner. |
982`-----------------------------------------------*/
983
4cdb01db
AD
984void
985scanner_free (void)
986{
223ff46e 987 obstack_free (&obstack_for_string, 0);
536545f3
AD
988 /* Reclaim Flex's buffers. */
989 yy_delete_buffer (YY_CURRENT_BUFFER);
4cdb01db 990}