]> git.saurik.com Git - bison.git/blame - src/scan-gram.l
Update copyright.
[bison.git] / src / scan-gram.l
CommitLineData
e9955c83 1/* Bison Grammar Scanner -*- C -*-
3b1e470c 2
a737b216 3 Copyright (C) 2002, 2003 Free Software Foundation, Inc.
e9955c83
AD
4
5 This file is part of Bison, the GNU Compiler Compiler.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
20 02111-1307 USA
21*/
22
a706a1cc 23%option debug nodefault noyywrap never-interactive
e9955c83
AD
24%option prefix="gram_" outfile="lex.yy.c"
25
26%{
27#include "system.h"
223ff46e
PE
28
29#include <mbswidth.h>
30#include <get-errno.h>
31#include <quote.h>
32
e9955c83 33#include "complain.h"
3f2d73f1 34#include "files.h"
e9955c83
AD
35#include "getargs.h"
36#include "gram.h"
37#include "reader.h"
223ff46e 38#include "uniqstr.h"
e9955c83 39
3f2d73f1
PE
40#define YY_USER_INIT \
41 do \
42 { \
43 scanner_cursor.file = current_file; \
44 scanner_cursor.line = 1; \
45 scanner_cursor.column = 1; \
46 } \
47 while (0)
8efe435c 48
3f2d73f1
PE
49/* Location of scanner cursor. */
50boundary scanner_cursor;
41141c56 51
223ff46e 52static void adjust_location (location *, char const *, size_t);
3f2d73f1 53#define YY_USER_ACTION adjust_location (loc, yytext, yyleng);
d8d3f94a 54
6c30d641 55static size_t no_cr_read (FILE *, char *, size_t);
d8d3f94a
PE
56#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
57
58
223ff46e 59/* OBSTACK_FOR_STRING -- Used to store all the characters that we need to
44995b2e
AD
60 keep (to construct ID, STRINGS etc.). Use the following macros to
61 use it.
62
41141c56
PE
63 Use STRING_GROW to append what has just been matched, and
64 STRING_FINISH to end the string (it puts the ending 0).
65 STRING_FINISH also stores this string in LAST_STRING, which can be
66 used, and which is used by STRING_FREE to free the last string. */
44995b2e 67
223ff46e 68static struct obstack obstack_for_string;
44995b2e 69
7ec2d4cd
AD
70/* A string representing the most recently saved token. */
71static char *last_string;
72
73
41141c56 74#define STRING_GROW \
223ff46e 75 obstack_grow (&obstack_for_string, yytext, yyleng)
44995b2e 76
41141c56 77#define STRING_FINISH \
44995b2e 78 do { \
223ff46e
PE
79 obstack_1grow (&obstack_for_string, '\0'); \
80 last_string = obstack_finish (&obstack_for_string); \
44995b2e
AD
81 } while (0)
82
41141c56 83#define STRING_FREE \
223ff46e 84 obstack_free (&obstack_for_string, last_string)
e9955c83 85
7ec2d4cd
AD
86void
87scanner_last_string_free (void)
88{
41141c56 89 STRING_FREE;
7ec2d4cd 90}
e9955c83 91
efcb44dd
PE
92/* Within well-formed rules, RULE_LENGTH is the number of values in
93 the current rule so far, which says where to find `$0' with respect
94 to the top of the stack. It is not the same as the rule->length in
95 the case of mid rule actions.
96
97 Outside of well-formed rules, RULE_LENGTH has an undefined value. */
98static int rule_length;
99
624a35e2
PE
100static void handle_dollar (int token_type, char *cp, location loc);
101static void handle_at (int token_type, char *cp, location loc);
3f2d73f1 102static void handle_syncline (char *args);
d8d3f94a 103static int convert_ucn_to_byte (char const *hex_text);
3f2d73f1 104static void unexpected_end_of_file (boundary, char const *);
e9955c83
AD
105
106%}
d8d3f94a 107%x SC_COMMENT SC_LINE_COMMENT SC_YACC_COMMENT
e9955c83 108%x SC_STRING SC_CHARACTER
3f2d73f1 109%x SC_AFTER_IDENTIFIER
e9955c83 110%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
624a35e2 111%x SC_PRE_CODE SC_BRACED_CODE SC_PROLOGUE SC_EPILOGUE
e9955c83 112
29c01725
AD
113letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
114id {letter}({letter}|[0-9])*
115directive %{letter}({letter}|[0-9]|-)*
624a35e2 116int [0-9]+
d8d3f94a
PE
117
118/* POSIX says that a tag must be both an id and a C union member, but
119 historically almost any character is allowed in a tag. We disallow
120 NUL and newline, as this simplifies our implementation. */
121tag [^\0\n>]+
122
123/* Zero or more instances of backslash-newline. Following GCC, allow
124 white space between the backslash and the newline. */
125splice (\\[ \f\t\v]*\n)*
e9955c83
AD
126
127%%
128%{
a706a1cc 129 /* Nesting level of the current code in braces. */
1a9e39f1
PE
130 int braces_level IF_LINT (= 0);
131
3f2d73f1
PE
132 /* Parent context state, when applicable. */
133 int context_state IF_LINT (= 0);
a706a1cc 134
624a35e2
PE
135 /* Token type to return, when applicable. */
136 int token_type IF_LINT (= 0);
137
3f2d73f1 138 /* Location of most recent identifier, when applicable. */
a2bc9dbc 139 location id_loc IF_LINT (= empty_location);
3f2d73f1 140
a2bc9dbc
PE
141 /* Where containing code started, when applicable. Its initial
142 value is relevant only when yylex is invoked in the SC_EPILOGUE
143 start condition. */
144 boundary code_start = scanner_cursor;
3f2d73f1 145
223ff46e
PE
146 /* Where containing comment or string or character literal started,
147 when applicable. */
a2bc9dbc 148 boundary token_start IF_LINT (= scanner_cursor);
e9955c83
AD
149%}
150
151
3f2d73f1
PE
152 /*-----------------------.
153 | Scanning white space. |
154 `-----------------------*/
155
624a35e2 156<INITIAL,SC_AFTER_IDENTIFIER,SC_PRE_CODE>
3f2d73f1
PE
157{
158 [ \f\n\t\v] ;
83adb046 159 "," warn_at (*loc, _("stray `,' treated as white space"));
3f2d73f1
PE
160
161 /* Comments. */
3f2d73f1 162 "//".* ;
83adb046
PE
163 "/*" {
164 token_start = loc->start;
165 context_state = YY_START;
166 BEGIN SC_YACC_COMMENT;
167 }
3f2d73f1
PE
168
169 /* #line directives are not documented, and may be withdrawn or
170 modified in future versions of Bison. */
171 ^"#line "{int}" \"".*"\"\n" {
172 handle_syncline (yytext + sizeof "#line " - 1);
173 }
174}
175
176
e9955c83
AD
177 /*----------------------------.
178 | Scanning Bison directives. |
179 `----------------------------*/
180<INITIAL>
181{
182 "%binary" return PERCENT_NONASSOC;
183 "%debug" return PERCENT_DEBUG;
184 "%define" return PERCENT_DEFINE;
185 "%defines" return PERCENT_DEFINES;
624a35e2 186 "%destructor" token_type = PERCENT_DESTRUCTOR; BEGIN SC_PRE_CODE;
676385e2 187 "%dprec" return PERCENT_DPREC;
e9955c83
AD
188 "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
189 "%expect" return PERCENT_EXPECT;
190 "%file-prefix" return PERCENT_FILE_PREFIX;
191 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
ae7453f2 192 "%glr-parser" return PERCENT_GLR_PARSER;
e9955c83 193 "%left" return PERCENT_LEFT;
624a35e2 194 "%lex-param" token_type = PERCENT_LEX_PARAM; BEGIN SC_PRE_CODE;
e9955c83 195 "%locations" return PERCENT_LOCATIONS;
676385e2 196 "%merge" return PERCENT_MERGE;
e9955c83
AD
197 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
198 "%no"[-_]"lines" return PERCENT_NO_LINES;
199 "%nonassoc" return PERCENT_NONASSOC;
200 "%nterm" return PERCENT_NTERM;
201 "%output" return PERCENT_OUTPUT;
624a35e2 202 "%parse-param" token_type = PERCENT_PARSE_PARAM; BEGIN SC_PRE_CODE;
d8d3f94a 203 "%prec" rule_length--; return PERCENT_PREC;
624a35e2 204 "%printer" token_type = PERCENT_PRINTER; BEGIN SC_PRE_CODE;
e9955c83
AD
205 "%pure"[-_]"parser" return PERCENT_PURE_PARSER;
206 "%right" return PERCENT_RIGHT;
207 "%skeleton" return PERCENT_SKELETON;
208 "%start" return PERCENT_START;
209 "%term" return PERCENT_TOKEN;
210 "%token" return PERCENT_TOKEN;
211 "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
212 "%type" return PERCENT_TYPE;
624a35e2 213 "%union" token_type = PERCENT_UNION; BEGIN SC_PRE_CODE;
e9955c83
AD
214 "%verbose" return PERCENT_VERBOSE;
215 "%yacc" return PERCENT_YACC;
216
3f2d73f1 217 {directive} {
41141c56 218 complain_at (*loc, _("invalid directive: %s"), quote (yytext));
412f8a59 219 }
900c5db5 220
e9955c83 221 "=" return EQUAL;
d8d3f94a 222 "|" rule_length = 0; return PIPE;
e9955c83
AD
223 ";" return SEMICOLON;
224
3f2d73f1 225 {id} {
41141c56 226 val->symbol = symbol_get (yytext, *loc);
3f2d73f1 227 id_loc = *loc;
efcb44dd 228 rule_length++;
3f2d73f1 229 BEGIN SC_AFTER_IDENTIFIER;
e9955c83
AD
230 }
231
d8d3f94a
PE
232 {int} {
233 unsigned long num;
223ff46e 234 set_errno (0);
d8d3f94a 235 num = strtoul (yytext, 0, 10);
223ff46e 236 if (INT_MAX < num || get_errno ())
d8d3f94a 237 {
41141c56 238 complain_at (*loc, _("integer out of range: %s"), quote (yytext));
d8d3f94a
PE
239 num = INT_MAX;
240 }
41141c56 241 val->integer = num;
d8d3f94a
PE
242 return INT;
243 }
e9955c83
AD
244
245 /* Characters. We don't check there is only one. */
3f2d73f1 246 "'" STRING_GROW; token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
e9955c83
AD
247
248 /* Strings. */
3f2d73f1 249 "\"" STRING_GROW; token_start = loc->start; BEGIN SC_ESCAPED_STRING;
e9955c83
AD
250
251 /* Prologue. */
3f2d73f1 252 "%{" code_start = loc->start; BEGIN SC_PROLOGUE;
e9955c83
AD
253
254 /* Code in between braces. */
3f2d73f1
PE
255 "{" {
256 STRING_GROW;
624a35e2 257 token_type = BRACED_CODE;
3f2d73f1
PE
258 braces_level = 0;
259 code_start = loc->start;
260 BEGIN SC_BRACED_CODE;
261 }
e9955c83
AD
262
263 /* A type. */
d8d3f94a 264 "<"{tag}">" {
223ff46e 265 obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
41141c56 266 STRING_FINISH;
223ff46e 267 val->uniqstr = uniqstr_new (last_string);
41141c56 268 STRING_FREE;
4cdb01db
AD
269 return TYPE;
270 }
271
a706a1cc
PE
272 "%%" {
273 static int percent_percent_count;
e9955c83 274 if (++percent_percent_count == 2)
a2bc9dbc 275 BEGIN SC_EPILOGUE;
e9955c83
AD
276 return PERCENT_PERCENT;
277 }
278
a706a1cc 279 . {
41141c56 280 complain_at (*loc, _("invalid character: %s"), quote (yytext));
3f2d73f1
PE
281 }
282}
283
284
285 /*-----------------------------------------------------------------.
286 | Scanning after an identifier, checking whether a colon is next. |
287 `-----------------------------------------------------------------*/
288
289<SC_AFTER_IDENTIFIER>
290{
291 ":" {
292 rule_length = 0;
293 *loc = id_loc;
294 BEGIN INITIAL;
295 return ID_COLON;
296 }
297 . {
298 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
299 yyless (0);
300 *loc = id_loc;
301 BEGIN INITIAL;
302 return ID;
303 }
304 <<EOF>> {
305 *loc = id_loc;
306 BEGIN INITIAL;
307 return ID;
e9955c83
AD
308 }
309}
310
311
d8d3f94a
PE
312 /*---------------------------------------------------------------.
313 | Scanning a Yacc comment. The initial `/ *' is already eaten. |
314 `---------------------------------------------------------------*/
e9955c83 315
d8d3f94a 316<SC_YACC_COMMENT>
e9955c83 317{
3f2d73f1 318 "*/" BEGIN context_state;
a706a1cc 319 .|\n ;
3f2d73f1 320 <<EOF>> unexpected_end_of_file (token_start, "*/");
d8d3f94a
PE
321}
322
323
324 /*------------------------------------------------------------.
325 | Scanning a C comment. The initial `/ *' is already eaten. |
326 `------------------------------------------------------------*/
327
328<SC_COMMENT>
329{
3f2d73f1
PE
330 "*"{splice}"/" STRING_GROW; BEGIN context_state;
331 <<EOF>> unexpected_end_of_file (token_start, "*/");
e9955c83
AD
332}
333
334
d8d3f94a
PE
335 /*--------------------------------------------------------------.
336 | Scanning a line comment. The initial `//' is already eaten. |
337 `--------------------------------------------------------------*/
338
339<SC_LINE_COMMENT>
340{
3f2d73f1 341 "\n" STRING_GROW; BEGIN context_state;
41141c56 342 {splice} STRING_GROW;
3f2d73f1 343 <<EOF>> BEGIN context_state;
d8d3f94a
PE
344}
345
346
e9955c83
AD
347 /*----------------------------------------------------------------.
348 | Scanning a C string, including its escapes. The initial `"' is |
349 | already eaten. |
350 `----------------------------------------------------------------*/
351
352<SC_ESCAPED_STRING>
353{
db2cc12f 354 "\"" {
41141c56
PE
355 STRING_GROW;
356 STRING_FINISH;
3f2d73f1 357 loc->start = token_start;
223ff46e 358 val->chars = last_string;
efcb44dd 359 rule_length++;
a706a1cc 360 BEGIN INITIAL;
e9955c83
AD
361 return STRING;
362 }
363
41141c56 364 .|\n STRING_GROW;
3f2d73f1 365 <<EOF>> unexpected_end_of_file (token_start, "\"");
e9955c83
AD
366}
367
368 /*---------------------------------------------------------------.
369 | Scanning a C character, decoding its escapes. The initial "'" |
370 | is already eaten. |
371 `---------------------------------------------------------------*/
372
373<SC_ESCAPED_CHARACTER>
374{
db2cc12f 375 "'" {
3b1e470c 376 unsigned char last_string_1;
41141c56
PE
377 STRING_GROW;
378 STRING_FINISH;
3f2d73f1 379 loc->start = token_start;
41141c56
PE
380 val->symbol = symbol_get (last_string, *loc);
381 symbol_class_set (val->symbol, token_sym, *loc);
3b1e470c
PE
382 last_string_1 = last_string[1];
383 symbol_user_token_number_set (val->symbol, last_string_1, *loc);
41141c56 384 STRING_FREE;
a706a1cc
PE
385 rule_length++;
386 BEGIN INITIAL;
387 return ID;
e9955c83 388 }
a706a1cc 389
41141c56 390 .|\n STRING_GROW;
3f2d73f1 391 <<EOF>> unexpected_end_of_file (token_start, "'");
e9955c83
AD
392}
393
394
395 /*----------------------------.
396 | Decode escaped characters. |
397 `----------------------------*/
398
399<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
400{
d8d3f94a
PE
401 \\[0-7]{1,3} {
402 unsigned long c = strtoul (yytext + 1, 0, 8);
403 if (UCHAR_MAX < c)
3f2d73f1 404 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
e9955c83 405 else
223ff46e 406 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
407 }
408
6b0d38ab 409 \\x[0-9abcdefABCDEF]+ {
d8d3f94a 410 unsigned long c;
223ff46e 411 set_errno (0);
d8d3f94a 412 c = strtoul (yytext + 2, 0, 16);
223ff46e 413 if (UCHAR_MAX < c || get_errno ())
3f2d73f1 414 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
d8d3f94a 415 else
223ff46e 416 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
417 }
418
223ff46e
PE
419 \\a obstack_1grow (&obstack_for_string, '\a');
420 \\b obstack_1grow (&obstack_for_string, '\b');
421 \\f obstack_1grow (&obstack_for_string, '\f');
422 \\n obstack_1grow (&obstack_for_string, '\n');
423 \\r obstack_1grow (&obstack_for_string, '\r');
424 \\t obstack_1grow (&obstack_for_string, '\t');
425 \\v obstack_1grow (&obstack_for_string, '\v');
412f8a59
PE
426
427 /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
223ff46e 428 \\("\""|"'"|"?"|"\\") obstack_1grow (&obstack_for_string, yytext[1]);
412f8a59 429
6b0d38ab 430 \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
d8d3f94a
PE
431 int c = convert_ucn_to_byte (yytext);
432 if (c < 0)
3f2d73f1 433 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
d8d3f94a 434 else
223ff46e 435 obstack_1grow (&obstack_for_string, c);
d8d3f94a 436 }
4f25ebb0 437 \\(.|\n) {
3f2d73f1 438 complain_at (*loc, _("unrecognized escape sequence: %s"), quote (yytext));
41141c56 439 STRING_GROW;
e9955c83
AD
440 }
441}
442
443
444 /*----------------------------------------------------------.
445 | Scanning a C character without decoding its escapes. The |
446 | initial "'" is already eaten. |
447 `----------------------------------------------------------*/
448
449<SC_CHARACTER>
450{
3f2d73f1 451 "'" STRING_GROW; BEGIN context_state;
41141c56 452 \\{splice}[^$@\[\]] STRING_GROW;
3f2d73f1 453 <<EOF>> unexpected_end_of_file (token_start, "'");
e9955c83
AD
454}
455
456
457 /*----------------------------------------------------------------.
458 | Scanning a C string, without decoding its escapes. The initial |
459 | `"' is already eaten. |
460 `----------------------------------------------------------------*/
461
462<SC_STRING>
463{
3f2d73f1 464 "\"" STRING_GROW; BEGIN context_state;
41141c56 465 \\{splice}[^$@\[\]] STRING_GROW;
3f2d73f1 466 <<EOF>> unexpected_end_of_file (token_start, "\"");
e9955c83
AD
467}
468
469
470 /*---------------------------------------------------.
471 | Strings, comments etc. can be found in user code. |
472 `---------------------------------------------------*/
473
474<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
475{
3f2d73f1
PE
476 "'" {
477 STRING_GROW;
478 context_state = YY_START;
479 token_start = loc->start;
480 BEGIN SC_CHARACTER;
481 }
482 "\"" {
483 STRING_GROW;
484 context_state = YY_START;
485 token_start = loc->start;
486 BEGIN SC_STRING;
487 }
488 "/"{splice}"*" {
489 STRING_GROW;
490 context_state = YY_START;
491 token_start = loc->start;
492 BEGIN SC_COMMENT;
493 }
494 "/"{splice}"/" {
495 STRING_GROW;
496 context_state = YY_START;
497 BEGIN SC_LINE_COMMENT;
498 }
e9955c83
AD
499}
500
501
624a35e2
PE
502 /*---------------------------------------------------------------.
503 | Scanning after %union etc., possibly followed by white space. |
504 | For %union only, allow arbitrary C code to appear before the |
505 | following brace, as an extension to POSIX. |
506 `---------------------------------------------------------------*/
507
508<SC_PRE_CODE>
509{
510 . {
511 bool valid = yytext[0] == '{' || token_type == PERCENT_UNION;
512 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
513 yyless (0);
514
515 if (valid)
516 {
517 braces_level = -1;
518 code_start = loc->start;
519 BEGIN SC_BRACED_CODE;
520 }
521 else
522 {
523 complain_at (*loc, _("missing `{' in `%s'"),
524 token_name (token_type));
525 obstack_sgrow (&obstack_for_string, "{}");
526 STRING_FINISH;
527 val->chars = last_string;
528 BEGIN INITIAL;
529 return token_type;
530 }
531 }
532}
533
534
e9955c83
AD
535 /*---------------------------------------------------------------.
536 | Scanning some code in braces (%union and actions). The initial |
537 | "{" is already eaten. |
538 `---------------------------------------------------------------*/
539
540<SC_BRACED_CODE>
541{
41141c56
PE
542 "{"|"<"{splice}"%" STRING_GROW; braces_level++;
543 "%"{splice}">" STRING_GROW; braces_level--;
e9955c83 544 "}" {
25522739
PE
545 bool outer_brace = --braces_level < 0;
546
547 /* As an undocumented Bison extension, append `;' before the last
548 brace in braced code, so that the user code can omit trailing
549 `;'. But do not append `;' if emulating Yacc, since Yacc does
550 not append one.
551
552 FIXME: Bison should warn if a semicolon seems to be necessary
553 here, and should omit the semicolon if it seems unnecessary
554 (e.g., after ';', '{', or '}', each followed by comments or
555 white space). Such a warning shouldn't depend on --yacc; it
556 should depend on a new --pedantic option, which would cause
557 Bison to warn if it detects an extension to POSIX. --pedantic
558 should also diagnose other Bison extensions like %yacc.
559 Perhaps there should also be a GCC-style --pedantic-errors
560 option, so that such warnings are diagnosed as errors. */
1deb9bdc 561 if (outer_brace && token_type == BRACED_CODE && ! yacc_flag)
25522739
PE
562 obstack_1grow (&obstack_for_string, ';');
563
564 obstack_1grow (&obstack_for_string, '}');
565
566 if (outer_brace)
e9955c83 567 {
41141c56 568 STRING_FINISH;
624a35e2 569 rule_length++;
3f2d73f1 570 loc->start = code_start;
223ff46e 571 val->chars = last_string;
a706a1cc 572 BEGIN INITIAL;
624a35e2 573 return token_type;
e9955c83
AD
574 }
575 }
576
a706a1cc
PE
577 /* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly
578 (as `<' `<%'). */
41141c56 579 "<"{splice}"<" STRING_GROW;
a706a1cc 580
624a35e2
PE
581 "$"("<"{tag}">")?(-?[0-9]+|"$") handle_dollar (token_type, yytext, *loc);
582 "@"(-?[0-9]+|"$") handle_at (token_type, yytext, *loc);
e9955c83 583
3f2d73f1 584 <<EOF>> unexpected_end_of_file (code_start, "}");
e9955c83
AD
585}
586
587
588 /*--------------------------------------------------------------.
589 | Scanning some prologue: from "%{" (already scanned) to "%}". |
590 `--------------------------------------------------------------*/
591
592<SC_PROLOGUE>
593{
594 "%}" {
41141c56 595 STRING_FINISH;
3f2d73f1 596 loc->start = code_start;
223ff46e 597 val->chars = last_string;
a706a1cc 598 BEGIN INITIAL;
e9955c83
AD
599 return PROLOGUE;
600 }
601
3f2d73f1 602 <<EOF>> unexpected_end_of_file (code_start, "%}");
e9955c83
AD
603}
604
605
606 /*---------------------------------------------------------------.
607 | Scanning the epilogue (everything after the second "%%", which |
d8d3f94a 608 | has already been eaten). |
e9955c83
AD
609 `---------------------------------------------------------------*/
610
611<SC_EPILOGUE>
612{
e9955c83 613 <<EOF>> {
41141c56 614 STRING_FINISH;
3f2d73f1 615 loc->start = code_start;
223ff46e 616 val->chars = last_string;
a706a1cc 617 BEGIN INITIAL;
e9955c83
AD
618 return EPILOGUE;
619 }
620}
621
622
a706a1cc
PE
623 /*----------------------------------------------------------------.
624 | By default, grow the string obstack with the input, escaping M4 |
625 | quoting characters. |
626 `----------------------------------------------------------------*/
627
628<SC_COMMENT,SC_LINE_COMMENT,SC_STRING,SC_CHARACTER,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
629{
223ff46e
PE
630 \$ obstack_sgrow (&obstack_for_string, "$][");
631 \@ obstack_sgrow (&obstack_for_string, "@@");
632 \[ obstack_sgrow (&obstack_for_string, "@{");
633 \] obstack_sgrow (&obstack_for_string, "@}");
41141c56 634 .|\n STRING_GROW;
a706a1cc
PE
635}
636
637
e9955c83
AD
638%%
639
3f2d73f1
PE
640/* Set *LOC and adjust scanner cursor to account for token TOKEN of
641 size SIZE. */
6c30d641
PE
642
643static void
223ff46e 644adjust_location (location *loc, char const *token, size_t size)
6c30d641 645{
3f2d73f1
PE
646 int line = scanner_cursor.line;
647 int column = scanner_cursor.column;
6c30d641
PE
648 char const *p0 = token;
649 char const *p = token;
650 char const *lim = token + size;
651
3f2d73f1
PE
652 loc->start = scanner_cursor;
653
6c30d641
PE
654 for (p = token; p < lim; p++)
655 switch (*p)
656 {
6c30d641
PE
657 case '\n':
658 line++;
659 column = 1;
660 p0 = p + 1;
661 break;
662
663 case '\t':
664 column += mbsnwidth (p0, p - p0, 0);
665 column += 8 - ((column - 1) & 7);
666 p0 = p + 1;
667 break;
668 }
669
3f2d73f1
PE
670 scanner_cursor.line = line;
671 scanner_cursor.column = column + mbsnwidth (p0, p - p0, 0);
672
673 loc->end = scanner_cursor;
6c30d641
PE
674}
675
676
677/* Read bytes from FP into buffer BUF of size SIZE. Return the
678 number of bytes read. Remove '\r' from input, treating \r\n
679 and isolated \r as \n. */
680
681static size_t
682no_cr_read (FILE *fp, char *buf, size_t size)
683{
a737b216
PE
684 size_t bytes_read = fread (buf, 1, size, fp);
685 if (bytes_read)
6c30d641 686 {
a737b216 687 char *w = memchr (buf, '\r', bytes_read);
6c30d641
PE
688 if (w)
689 {
690 char const *r = ++w;
a737b216 691 char const *lim = buf + bytes_read;
6c30d641
PE
692
693 for (;;)
694 {
695 /* Found an '\r'. Treat it like '\n', but ignore any
696 '\n' that immediately follows. */
697 w[-1] = '\n';
698 if (r == lim)
699 {
700 int ch = getc (fp);
701 if (ch != '\n' && ungetc (ch, fp) != ch)
702 break;
703 }
704 else if (*r == '\n')
705 r++;
706
707 /* Copy until the next '\r'. */
708 do
709 {
710 if (r == lim)
711 return w - buf;
712 }
713 while ((*w++ = *r++) != '\r');
714 }
715
716 return w - buf;
717 }
718 }
719
a737b216 720 return bytes_read;
6c30d641
PE
721}
722
723
e9955c83 724/*------------------------------------------------------------------.
366eea36 725| TEXT is pointing to a wannabee semantic value (i.e., a `$'). |
e9955c83
AD
726| |
727| Possible inputs: $[<TYPENAME>]($|integer) |
728| |
223ff46e 729| Output to OBSTACK_FOR_STRING a reference to this semantic value. |
e9955c83
AD
730`------------------------------------------------------------------*/
731
624a35e2 732static inline bool
223ff46e 733handle_action_dollar (char *text, location loc)
e9955c83
AD
734{
735 const char *type_name = NULL;
366eea36 736 char *cp = text + 1;
e9955c83 737
624a35e2
PE
738 if (! current_rule)
739 return false;
740
e9955c83
AD
741 /* Get the type name if explicit. */
742 if (*cp == '<')
743 {
744 type_name = ++cp;
745 while (*cp != '>')
746 ++cp;
747 *cp = '\0';
748 ++cp;
749 }
750
751 if (*cp == '$')
752 {
753 if (!type_name)
223ff46e 754 type_name = symbol_list_n_type_name_get (current_rule, loc, 0);
e9955c83 755 if (!type_name && typed)
223ff46e 756 complain_at (loc, _("$$ of `%s' has no declared type"),
97650f4e 757 current_rule->sym->tag);
e9955c83
AD
758 if (!type_name)
759 type_name = "";
223ff46e 760 obstack_fgrow1 (&obstack_for_string,
e9955c83
AD
761 "]b4_lhs_value([%s])[", type_name);
762 }
d8d3f94a 763 else
e9955c83 764 {
d8d3f94a 765 long num;
223ff46e 766 set_errno (0);
d8d3f94a 767 num = strtol (cp, 0, 10);
e9955c83 768
223ff46e 769 if (INT_MIN <= num && num <= rule_length && ! get_errno ())
e9955c83 770 {
d8d3f94a 771 int n = num;
e9955c83 772 if (!type_name && n > 0)
223ff46e 773 type_name = symbol_list_n_type_name_get (current_rule, loc, n);
e9955c83 774 if (!type_name && typed)
223ff46e
PE
775 complain_at (loc, _("$%d of `%s' has no declared type"),
776 n, current_rule->sym->tag);
e9955c83
AD
777 if (!type_name)
778 type_name = "";
223ff46e 779 obstack_fgrow3 (&obstack_for_string,
e9955c83
AD
780 "]b4_rhs_value([%d], [%d], [%s])[",
781 rule_length, n, type_name);
782 }
d8d3f94a 783 else
223ff46e 784 complain_at (loc, _("integer out of range: %s"), quote (text));
9280d3ef 785 }
9280d3ef 786
624a35e2 787 return true;
e9955c83
AD
788}
789
f25bfb75
AD
790
791/*-----------------------------------------------------------------.
792| Dispatch onto handle_action_dollar, or handle_destructor_dollar, |
624a35e2 793| depending upon TOKEN_TYPE. |
f25bfb75 794`-----------------------------------------------------------------*/
e9955c83
AD
795
796static void
624a35e2 797handle_dollar (int token_type, char *text, location loc)
f25bfb75 798{
624a35e2 799 switch (token_type)
f25bfb75 800 {
624a35e2
PE
801 case BRACED_CODE:
802 if (handle_action_dollar (text, loc))
803 return;
f25bfb75
AD
804 break;
805
624a35e2
PE
806 case PERCENT_DESTRUCTOR:
807 case PERCENT_PRINTER:
808 if (text[1] == '$')
809 {
810 obstack_sgrow (&obstack_for_string, "]b4_dollar_dollar[");
811 return;
812 }
813 break;
814
815 default:
f25bfb75
AD
816 break;
817 }
624a35e2
PE
818
819 complain_at (loc, _("invalid value: %s"), quote (text));
f25bfb75
AD
820}
821
822
823/*------------------------------------------------------.
824| TEXT is a location token (i.e., a `@...'). Output to |
223ff46e 825| OBSTACK_FOR_STRING a reference to this location. |
f25bfb75
AD
826`------------------------------------------------------*/
827
624a35e2 828static inline bool
223ff46e 829handle_action_at (char *text, location loc)
e9955c83 830{
366eea36 831 char *cp = text + 1;
e9955c83 832 locations_flag = 1;
e9955c83 833
624a35e2
PE
834 if (! current_rule)
835 return false;
836
366eea36 837 if (*cp == '$')
624a35e2 838 obstack_sgrow (&obstack_for_string, "]b4_lhs_location[");
d8d3f94a 839 else
e9955c83 840 {
d8d3f94a 841 long num;
223ff46e 842 set_errno (0);
d8d3f94a 843 num = strtol (cp, 0, 10);
dafdc66f 844
223ff46e 845 if (INT_MIN <= num && num <= rule_length && ! get_errno ())
d8d3f94a
PE
846 {
847 int n = num;
223ff46e 848 obstack_fgrow2 (&obstack_for_string, "]b4_rhs_location([%d], [%d])[",
d8d3f94a
PE
849 rule_length, n);
850 }
e9955c83 851 else
223ff46e 852 complain_at (loc, _("integer out of range: %s"), quote (text));
f25bfb75 853 }
f25bfb75 854
624a35e2 855 return true;
e9955c83 856}
4cdb01db 857
f25bfb75
AD
858
859/*-------------------------------------------------------------------.
860| Dispatch onto handle_action_at, or handle_destructor_at, depending |
861| upon CODE_KIND. |
862`-------------------------------------------------------------------*/
863
864static void
624a35e2 865handle_at (int token_type, char *text, location loc)
f25bfb75 866{
624a35e2 867 switch (token_type)
f25bfb75 868 {
624a35e2 869 case BRACED_CODE:
223ff46e 870 handle_action_at (text, loc);
624a35e2
PE
871 return;
872
873 case PERCENT_DESTRUCTOR:
874 case PERCENT_PRINTER:
875 if (text[1] == '$')
876 {
877 obstack_sgrow (&obstack_for_string, "]b4_at_dollar[");
878 return;
879 }
f25bfb75
AD
880 break;
881
624a35e2 882 default:
f25bfb75
AD
883 break;
884 }
624a35e2
PE
885
886 complain_at (loc, _("invalid value: %s"), quote (text));
f25bfb75
AD
887}
888
889
d8d3f94a
PE
890/*------------------------------------------------------------------.
891| Convert universal character name UCN to a single-byte character, |
892| and return that character. Return -1 if UCN does not correspond |
893| to a single-byte character. |
894`------------------------------------------------------------------*/
895
896static int
897convert_ucn_to_byte (char const *ucn)
898{
899 unsigned long code = strtoul (ucn + 2, 0, 16);
900
901 /* FIXME: Currently we assume Unicode-compatible unibyte characters
902 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
903 non-ASCII hosts we support only the portable C character set.
904 These limitations should be removed once we add support for
905 multibyte characters. */
906
907 if (UCHAR_MAX < code)
908 return -1;
909
910#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
911 {
912 /* A non-ASCII host. Use CODE to index into a table of the C
913 basic execution character set, which is guaranteed to exist on
914 all Standard C platforms. This table also includes '$', '@',
8e6ef483 915 and '`', which are not in the basic execution character set but
d8d3f94a
PE
916 which are unibyte characters on all the platforms that we know
917 about. */
918 static signed char const table[] =
919 {
920 '\0', -1, -1, -1, -1, -1, -1, '\a',
921 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
922 -1, -1, -1, -1, -1, -1, -1, -1,
923 -1, -1, -1, -1, -1, -1, -1, -1,
924 ' ', '!', '"', '#', '$', '%', '&', '\'',
925 '(', ')', '*', '+', ',', '-', '.', '/',
926 '0', '1', '2', '3', '4', '5', '6', '7',
927 '8', '9', ':', ';', '<', '=', '>', '?',
928 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
929 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
930 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
931 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
932 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
933 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
934 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
935 'x', 'y', 'z', '{', '|', '}', '~'
936 };
937
938 code = code < sizeof table ? table[code] : -1;
939 }
940#endif
c4d720cd 941
d8d3f94a
PE
942 return code;
943}
944
945
900c5db5
AD
946/*----------------------------------------------------------------.
947| Handle `#line INT "FILE"'. ARGS has already skipped `#line '. |
948`----------------------------------------------------------------*/
949
950static void
3f2d73f1 951handle_syncline (char *args)
900c5db5
AD
952{
953 int lineno = strtol (args, &args, 10);
954 const char *file = NULL;
955 file = strchr (args, '"') + 1;
956 *strchr (file, '"') = 0;
3f2d73f1
PE
957 scanner_cursor.file = current_file = xstrdup (file);
958 scanner_cursor.line = lineno;
959 scanner_cursor.column = 1;
900c5db5
AD
960}
961
a706a1cc 962
3f2d73f1
PE
963/*------------------------------------------------------------------------.
964| Report an unexpected EOF in a token or comment starting at START. |
965| An end of file was encountered and the expected TOKEN_END was missing. |
966| After reporting the problem, pretend that TOKEN_END was found. |
967`------------------------------------------------------------------------*/
a706a1cc
PE
968
969static void
3f2d73f1 970unexpected_end_of_file (boundary start, char const *token_end)
a706a1cc 971{
345532d7 972 size_t i = strlen (token_end);
a706a1cc 973
223ff46e
PE
974 location loc;
975 loc.start = start;
976 loc.end = scanner_cursor;
977 complain_at (loc, _("missing `%s' at end of file"), token_end);
345532d7 978
3f2d73f1
PE
979 /* Adjust scanner cursor so that any later message does not count
980 the characters about to be inserted. */
981 scanner_cursor.column -= i;
345532d7
PE
982
983 while (i != 0)
984 unput (token_end[--i]);
a706a1cc
PE
985}
986
987
f25bfb75
AD
988/*-------------------------.
989| Initialize the scanner. |
990`-------------------------*/
991
1d6412ad
AD
992void
993scanner_initialize (void)
994{
223ff46e 995 obstack_init (&obstack_for_string);
1d6412ad
AD
996}
997
998
f25bfb75
AD
999/*-----------------------------------------------.
1000| Free all the memory allocated to the scanner. |
1001`-----------------------------------------------*/
1002
4cdb01db
AD
1003void
1004scanner_free (void)
1005{
223ff46e 1006 obstack_free (&obstack_for_string, 0);
536545f3
AD
1007 /* Reclaim Flex's buffers. */
1008 yy_delete_buffer (YY_CURRENT_BUFFER);
4cdb01db 1009}