]> git.saurik.com Git - bison.git/blame - src/scan-gram.l
* data/yacc.c: (b4_lex_param): Corrected for the case where
[bison.git] / src / scan-gram.l
CommitLineData
e9955c83 1/* Bison Grammar Scanner -*- C -*-
3b1e470c 2
a737b216 3 Copyright (C) 2002, 2003 Free Software Foundation, Inc.
e9955c83
AD
4
5 This file is part of Bison, the GNU Compiler Compiler.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
20 02111-1307 USA
21*/
22
aa418041 23%option debug nodefault nounput noyywrap never-interactive
e9955c83
AD
24%option prefix="gram_" outfile="lex.yy.c"
25
26%{
27#include "system.h"
223ff46e
PE
28
29#include <mbswidth.h>
30#include <get-errno.h>
31#include <quote.h>
32
e9955c83 33#include "complain.h"
3f2d73f1 34#include "files.h"
e9955c83
AD
35#include "getargs.h"
36#include "gram.h"
37#include "reader.h"
223ff46e 38#include "uniqstr.h"
e9955c83 39
3f2d73f1
PE
40#define YY_USER_INIT \
41 do \
42 { \
43 scanner_cursor.file = current_file; \
44 scanner_cursor.line = 1; \
45 scanner_cursor.column = 1; \
379f0ac8 46 code_start = scanner_cursor; \
3f2d73f1
PE
47 } \
48 while (0)
8efe435c 49
3f2d73f1
PE
50/* Location of scanner cursor. */
51boundary scanner_cursor;
41141c56 52
223ff46e 53static void adjust_location (location *, char const *, size_t);
3f2d73f1 54#define YY_USER_ACTION adjust_location (loc, yytext, yyleng);
d8d3f94a 55
6c30d641 56static size_t no_cr_read (FILE *, char *, size_t);
d8d3f94a
PE
57#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
58
59
223ff46e 60/* OBSTACK_FOR_STRING -- Used to store all the characters that we need to
44995b2e
AD
61 keep (to construct ID, STRINGS etc.). Use the following macros to
62 use it.
63
41141c56
PE
64 Use STRING_GROW to append what has just been matched, and
65 STRING_FINISH to end the string (it puts the ending 0).
66 STRING_FINISH also stores this string in LAST_STRING, which can be
67 used, and which is used by STRING_FREE to free the last string. */
44995b2e 68
223ff46e 69static struct obstack obstack_for_string;
44995b2e 70
7ec2d4cd
AD
71/* A string representing the most recently saved token. */
72static char *last_string;
73
74
41141c56 75#define STRING_GROW \
223ff46e 76 obstack_grow (&obstack_for_string, yytext, yyleng)
44995b2e 77
41141c56 78#define STRING_FINISH \
44995b2e 79 do { \
223ff46e
PE
80 obstack_1grow (&obstack_for_string, '\0'); \
81 last_string = obstack_finish (&obstack_for_string); \
44995b2e
AD
82 } while (0)
83
41141c56 84#define STRING_FREE \
223ff46e 85 obstack_free (&obstack_for_string, last_string)
e9955c83 86
7ec2d4cd
AD
87void
88scanner_last_string_free (void)
89{
41141c56 90 STRING_FREE;
7ec2d4cd 91}
e9955c83 92
efcb44dd
PE
93/* Within well-formed rules, RULE_LENGTH is the number of values in
94 the current rule so far, which says where to find `$0' with respect
95 to the top of the stack. It is not the same as the rule->length in
96 the case of mid rule actions.
97
98 Outside of well-formed rules, RULE_LENGTH has an undefined value. */
99static int rule_length;
100
624a35e2
PE
101static void handle_dollar (int token_type, char *cp, location loc);
102static void handle_at (int token_type, char *cp, location loc);
3f2d73f1 103static void handle_syncline (char *args);
d8d3f94a 104static int convert_ucn_to_byte (char const *hex_text);
aa418041 105static void unexpected_eof (boundary, char const *);
e9955c83
AD
106
107%}
d8d3f94a 108%x SC_COMMENT SC_LINE_COMMENT SC_YACC_COMMENT
e9955c83 109%x SC_STRING SC_CHARACTER
3f2d73f1 110%x SC_AFTER_IDENTIFIER
e9955c83 111%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
624a35e2 112%x SC_PRE_CODE SC_BRACED_CODE SC_PROLOGUE SC_EPILOGUE
e9955c83 113
29c01725
AD
114letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
115id {letter}({letter}|[0-9])*
116directive %{letter}({letter}|[0-9]|-)*
624a35e2 117int [0-9]+
d8d3f94a
PE
118
119/* POSIX says that a tag must be both an id and a C union member, but
120 historically almost any character is allowed in a tag. We disallow
121 NUL and newline, as this simplifies our implementation. */
122tag [^\0\n>]+
123
124/* Zero or more instances of backslash-newline. Following GCC, allow
125 white space between the backslash and the newline. */
126splice (\\[ \f\t\v]*\n)*
e9955c83
AD
127
128%%
129%{
a706a1cc 130 /* Nesting level of the current code in braces. */
1a9e39f1
PE
131 int braces_level IF_LINT (= 0);
132
3f2d73f1
PE
133 /* Parent context state, when applicable. */
134 int context_state IF_LINT (= 0);
a706a1cc 135
624a35e2
PE
136 /* Token type to return, when applicable. */
137 int token_type IF_LINT (= 0);
138
3f2d73f1 139 /* Location of most recent identifier, when applicable. */
a2bc9dbc 140 location id_loc IF_LINT (= empty_location);
3f2d73f1 141
a2bc9dbc
PE
142 /* Where containing code started, when applicable. Its initial
143 value is relevant only when yylex is invoked in the SC_EPILOGUE
144 start condition. */
145 boundary code_start = scanner_cursor;
3f2d73f1 146
223ff46e
PE
147 /* Where containing comment or string or character literal started,
148 when applicable. */
a2bc9dbc 149 boundary token_start IF_LINT (= scanner_cursor);
e9955c83
AD
150%}
151
152
3f2d73f1
PE
153 /*-----------------------.
154 | Scanning white space. |
155 `-----------------------*/
156
624a35e2 157<INITIAL,SC_AFTER_IDENTIFIER,SC_PRE_CODE>
3f2d73f1
PE
158{
159 [ \f\n\t\v] ;
83adb046 160 "," warn_at (*loc, _("stray `,' treated as white space"));
3f2d73f1
PE
161
162 /* Comments. */
3f2d73f1 163 "//".* ;
83adb046
PE
164 "/*" {
165 token_start = loc->start;
166 context_state = YY_START;
167 BEGIN SC_YACC_COMMENT;
168 }
3f2d73f1
PE
169
170 /* #line directives are not documented, and may be withdrawn or
171 modified in future versions of Bison. */
172 ^"#line "{int}" \"".*"\"\n" {
173 handle_syncline (yytext + sizeof "#line " - 1);
174 }
175}
176
177
e9955c83
AD
178 /*----------------------------.
179 | Scanning Bison directives. |
180 `----------------------------*/
181<INITIAL>
182{
183 "%binary" return PERCENT_NONASSOC;
184 "%debug" return PERCENT_DEBUG;
185 "%define" return PERCENT_DEFINE;
186 "%defines" return PERCENT_DEFINES;
624a35e2 187 "%destructor" token_type = PERCENT_DESTRUCTOR; BEGIN SC_PRE_CODE;
676385e2 188 "%dprec" return PERCENT_DPREC;
e9955c83
AD
189 "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
190 "%expect" return PERCENT_EXPECT;
191 "%file-prefix" return PERCENT_FILE_PREFIX;
192 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
ae7453f2 193 "%glr-parser" return PERCENT_GLR_PARSER;
e9955c83 194 "%left" return PERCENT_LEFT;
624a35e2 195 "%lex-param" token_type = PERCENT_LEX_PARAM; BEGIN SC_PRE_CODE;
e9955c83 196 "%locations" return PERCENT_LOCATIONS;
676385e2 197 "%merge" return PERCENT_MERGE;
e9955c83
AD
198 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
199 "%no"[-_]"lines" return PERCENT_NO_LINES;
200 "%nonassoc" return PERCENT_NONASSOC;
201 "%nterm" return PERCENT_NTERM;
202 "%output" return PERCENT_OUTPUT;
624a35e2 203 "%parse-param" token_type = PERCENT_PARSE_PARAM; BEGIN SC_PRE_CODE;
d8d3f94a 204 "%prec" rule_length--; return PERCENT_PREC;
624a35e2 205 "%printer" token_type = PERCENT_PRINTER; BEGIN SC_PRE_CODE;
e9955c83
AD
206 "%pure"[-_]"parser" return PERCENT_PURE_PARSER;
207 "%right" return PERCENT_RIGHT;
208 "%skeleton" return PERCENT_SKELETON;
209 "%start" return PERCENT_START;
210 "%term" return PERCENT_TOKEN;
211 "%token" return PERCENT_TOKEN;
212 "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
213 "%type" return PERCENT_TYPE;
624a35e2 214 "%union" token_type = PERCENT_UNION; BEGIN SC_PRE_CODE;
e9955c83
AD
215 "%verbose" return PERCENT_VERBOSE;
216 "%yacc" return PERCENT_YACC;
217
3f2d73f1 218 {directive} {
41141c56 219 complain_at (*loc, _("invalid directive: %s"), quote (yytext));
412f8a59 220 }
900c5db5 221
e9955c83 222 "=" return EQUAL;
d8d3f94a 223 "|" rule_length = 0; return PIPE;
e9955c83
AD
224 ";" return SEMICOLON;
225
3f2d73f1 226 {id} {
41141c56 227 val->symbol = symbol_get (yytext, *loc);
3f2d73f1 228 id_loc = *loc;
efcb44dd 229 rule_length++;
3f2d73f1 230 BEGIN SC_AFTER_IDENTIFIER;
e9955c83
AD
231 }
232
d8d3f94a
PE
233 {int} {
234 unsigned long num;
223ff46e 235 set_errno (0);
d8d3f94a 236 num = strtoul (yytext, 0, 10);
223ff46e 237 if (INT_MAX < num || get_errno ())
d8d3f94a 238 {
41141c56 239 complain_at (*loc, _("integer out of range: %s"), quote (yytext));
d8d3f94a
PE
240 num = INT_MAX;
241 }
41141c56 242 val->integer = num;
d8d3f94a
PE
243 return INT;
244 }
e9955c83
AD
245
246 /* Characters. We don't check there is only one. */
3f2d73f1 247 "'" STRING_GROW; token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
e9955c83
AD
248
249 /* Strings. */
3f2d73f1 250 "\"" STRING_GROW; token_start = loc->start; BEGIN SC_ESCAPED_STRING;
e9955c83
AD
251
252 /* Prologue. */
3f2d73f1 253 "%{" code_start = loc->start; BEGIN SC_PROLOGUE;
e9955c83
AD
254
255 /* Code in between braces. */
3f2d73f1
PE
256 "{" {
257 STRING_GROW;
624a35e2 258 token_type = BRACED_CODE;
3f2d73f1
PE
259 braces_level = 0;
260 code_start = loc->start;
261 BEGIN SC_BRACED_CODE;
262 }
e9955c83
AD
263
264 /* A type. */
d8d3f94a 265 "<"{tag}">" {
223ff46e 266 obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
41141c56 267 STRING_FINISH;
223ff46e 268 val->uniqstr = uniqstr_new (last_string);
41141c56 269 STRING_FREE;
4cdb01db
AD
270 return TYPE;
271 }
272
a706a1cc
PE
273 "%%" {
274 static int percent_percent_count;
e9955c83 275 if (++percent_percent_count == 2)
a2bc9dbc 276 BEGIN SC_EPILOGUE;
e9955c83
AD
277 return PERCENT_PERCENT;
278 }
279
a706a1cc 280 . {
41141c56 281 complain_at (*loc, _("invalid character: %s"), quote (yytext));
3f2d73f1 282 }
379f0ac8
PE
283
284 <<EOF>> {
285 loc->start = loc->end = scanner_cursor;
286 yyterminate ();
287 }
3f2d73f1
PE
288}
289
290
291 /*-----------------------------------------------------------------.
292 | Scanning after an identifier, checking whether a colon is next. |
293 `-----------------------------------------------------------------*/
294
295<SC_AFTER_IDENTIFIER>
296{
297 ":" {
298 rule_length = 0;
299 *loc = id_loc;
300 BEGIN INITIAL;
301 return ID_COLON;
302 }
303 . {
304 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
305 yyless (0);
306 *loc = id_loc;
307 BEGIN INITIAL;
308 return ID;
309 }
310 <<EOF>> {
311 *loc = id_loc;
312 BEGIN INITIAL;
313 return ID;
e9955c83
AD
314 }
315}
316
317
d8d3f94a
PE
318 /*---------------------------------------------------------------.
319 | Scanning a Yacc comment. The initial `/ *' is already eaten. |
320 `---------------------------------------------------------------*/
e9955c83 321
d8d3f94a 322<SC_YACC_COMMENT>
e9955c83 323{
3f2d73f1 324 "*/" BEGIN context_state;
a706a1cc 325 .|\n ;
aa418041 326 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
d8d3f94a
PE
327}
328
329
330 /*------------------------------------------------------------.
331 | Scanning a C comment. The initial `/ *' is already eaten. |
332 `------------------------------------------------------------*/
333
334<SC_COMMENT>
335{
3f2d73f1 336 "*"{splice}"/" STRING_GROW; BEGIN context_state;
aa418041 337 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
e9955c83
AD
338}
339
340
d8d3f94a
PE
341 /*--------------------------------------------------------------.
342 | Scanning a line comment. The initial `//' is already eaten. |
343 `--------------------------------------------------------------*/
344
345<SC_LINE_COMMENT>
346{
3f2d73f1 347 "\n" STRING_GROW; BEGIN context_state;
41141c56 348 {splice} STRING_GROW;
3f2d73f1 349 <<EOF>> BEGIN context_state;
d8d3f94a
PE
350}
351
352
e9955c83
AD
353 /*----------------------------------------------------------------.
354 | Scanning a C string, including its escapes. The initial `"' is |
355 | already eaten. |
356 `----------------------------------------------------------------*/
357
358<SC_ESCAPED_STRING>
359{
db2cc12f 360 "\"" {
41141c56
PE
361 STRING_GROW;
362 STRING_FINISH;
3f2d73f1 363 loc->start = token_start;
223ff46e 364 val->chars = last_string;
efcb44dd 365 rule_length++;
a706a1cc 366 BEGIN INITIAL;
e9955c83
AD
367 return STRING;
368 }
369
41141c56 370 .|\n STRING_GROW;
aa418041 371 <<EOF>> unexpected_eof (token_start, "\""); BEGIN INITIAL;
e9955c83
AD
372}
373
374 /*---------------------------------------------------------------.
375 | Scanning a C character, decoding its escapes. The initial "'" |
376 | is already eaten. |
377 `---------------------------------------------------------------*/
378
379<SC_ESCAPED_CHARACTER>
380{
db2cc12f 381 "'" {
3b1e470c 382 unsigned char last_string_1;
41141c56
PE
383 STRING_GROW;
384 STRING_FINISH;
3f2d73f1 385 loc->start = token_start;
41141c56
PE
386 val->symbol = symbol_get (last_string, *loc);
387 symbol_class_set (val->symbol, token_sym, *loc);
3b1e470c
PE
388 last_string_1 = last_string[1];
389 symbol_user_token_number_set (val->symbol, last_string_1, *loc);
41141c56 390 STRING_FREE;
a706a1cc
PE
391 rule_length++;
392 BEGIN INITIAL;
393 return ID;
e9955c83 394 }
a706a1cc 395
41141c56 396 .|\n STRING_GROW;
aa418041 397 <<EOF>> unexpected_eof (token_start, "'"); BEGIN INITIAL;
e9955c83
AD
398}
399
400
401 /*----------------------------.
402 | Decode escaped characters. |
403 `----------------------------*/
404
405<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
406{
d8d3f94a
PE
407 \\[0-7]{1,3} {
408 unsigned long c = strtoul (yytext + 1, 0, 8);
409 if (UCHAR_MAX < c)
3f2d73f1 410 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
e9955c83 411 else
223ff46e 412 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
413 }
414
6b0d38ab 415 \\x[0-9abcdefABCDEF]+ {
d8d3f94a 416 unsigned long c;
223ff46e 417 set_errno (0);
d8d3f94a 418 c = strtoul (yytext + 2, 0, 16);
223ff46e 419 if (UCHAR_MAX < c || get_errno ())
3f2d73f1 420 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
d8d3f94a 421 else
223ff46e 422 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
423 }
424
223ff46e
PE
425 \\a obstack_1grow (&obstack_for_string, '\a');
426 \\b obstack_1grow (&obstack_for_string, '\b');
427 \\f obstack_1grow (&obstack_for_string, '\f');
428 \\n obstack_1grow (&obstack_for_string, '\n');
429 \\r obstack_1grow (&obstack_for_string, '\r');
430 \\t obstack_1grow (&obstack_for_string, '\t');
431 \\v obstack_1grow (&obstack_for_string, '\v');
412f8a59
PE
432
433 /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
223ff46e 434 \\("\""|"'"|"?"|"\\") obstack_1grow (&obstack_for_string, yytext[1]);
412f8a59 435
6b0d38ab 436 \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
d8d3f94a
PE
437 int c = convert_ucn_to_byte (yytext);
438 if (c < 0)
3f2d73f1 439 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
d8d3f94a 440 else
223ff46e 441 obstack_1grow (&obstack_for_string, c);
d8d3f94a 442 }
4f25ebb0 443 \\(.|\n) {
3f2d73f1 444 complain_at (*loc, _("unrecognized escape sequence: %s"), quote (yytext));
41141c56 445 STRING_GROW;
e9955c83
AD
446 }
447}
448
449
450 /*----------------------------------------------------------.
451 | Scanning a C character without decoding its escapes. The |
452 | initial "'" is already eaten. |
453 `----------------------------------------------------------*/
454
455<SC_CHARACTER>
456{
3f2d73f1 457 "'" STRING_GROW; BEGIN context_state;
41141c56 458 \\{splice}[^$@\[\]] STRING_GROW;
aa418041 459 <<EOF>> unexpected_eof (token_start, "'"); BEGIN context_state;
e9955c83
AD
460}
461
462
463 /*----------------------------------------------------------------.
464 | Scanning a C string, without decoding its escapes. The initial |
465 | `"' is already eaten. |
466 `----------------------------------------------------------------*/
467
468<SC_STRING>
469{
3f2d73f1 470 "\"" STRING_GROW; BEGIN context_state;
41141c56 471 \\{splice}[^$@\[\]] STRING_GROW;
aa418041
PE
472 <<EOF>> {
473 unexpected_eof (token_start, "\"");
474 BEGIN context_state;
475 }
e9955c83
AD
476}
477
478
479 /*---------------------------------------------------.
480 | Strings, comments etc. can be found in user code. |
481 `---------------------------------------------------*/
482
483<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
484{
3f2d73f1
PE
485 "'" {
486 STRING_GROW;
487 context_state = YY_START;
488 token_start = loc->start;
489 BEGIN SC_CHARACTER;
490 }
491 "\"" {
492 STRING_GROW;
493 context_state = YY_START;
494 token_start = loc->start;
495 BEGIN SC_STRING;
496 }
497 "/"{splice}"*" {
498 STRING_GROW;
499 context_state = YY_START;
500 token_start = loc->start;
501 BEGIN SC_COMMENT;
502 }
503 "/"{splice}"/" {
504 STRING_GROW;
505 context_state = YY_START;
506 BEGIN SC_LINE_COMMENT;
507 }
e9955c83
AD
508}
509
510
624a35e2
PE
511 /*---------------------------------------------------------------.
512 | Scanning after %union etc., possibly followed by white space. |
513 | For %union only, allow arbitrary C code to appear before the |
514 | following brace, as an extension to POSIX. |
515 `---------------------------------------------------------------*/
516
517<SC_PRE_CODE>
518{
519 . {
520 bool valid = yytext[0] == '{' || token_type == PERCENT_UNION;
521 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
522 yyless (0);
523
524 if (valid)
525 {
526 braces_level = -1;
527 code_start = loc->start;
528 BEGIN SC_BRACED_CODE;
529 }
530 else
531 {
532 complain_at (*loc, _("missing `{' in `%s'"),
533 token_name (token_type));
534 obstack_sgrow (&obstack_for_string, "{}");
535 STRING_FINISH;
536 val->chars = last_string;
537 BEGIN INITIAL;
538 return token_type;
539 }
540 }
379f0ac8 541
aa418041 542 <<EOF>> unexpected_eof (scanner_cursor, "{}"); BEGIN INITIAL;
624a35e2
PE
543}
544
545
e9955c83
AD
546 /*---------------------------------------------------------------.
547 | Scanning some code in braces (%union and actions). The initial |
548 | "{" is already eaten. |
549 `---------------------------------------------------------------*/
550
551<SC_BRACED_CODE>
552{
41141c56
PE
553 "{"|"<"{splice}"%" STRING_GROW; braces_level++;
554 "%"{splice}">" STRING_GROW; braces_level--;
e9955c83 555 "}" {
25522739
PE
556 bool outer_brace = --braces_level < 0;
557
558 /* As an undocumented Bison extension, append `;' before the last
559 brace in braced code, so that the user code can omit trailing
560 `;'. But do not append `;' if emulating Yacc, since Yacc does
561 not append one.
562
563 FIXME: Bison should warn if a semicolon seems to be necessary
564 here, and should omit the semicolon if it seems unnecessary
565 (e.g., after ';', '{', or '}', each followed by comments or
566 white space). Such a warning shouldn't depend on --yacc; it
567 should depend on a new --pedantic option, which would cause
568 Bison to warn if it detects an extension to POSIX. --pedantic
569 should also diagnose other Bison extensions like %yacc.
570 Perhaps there should also be a GCC-style --pedantic-errors
571 option, so that such warnings are diagnosed as errors. */
1deb9bdc 572 if (outer_brace && token_type == BRACED_CODE && ! yacc_flag)
25522739
PE
573 obstack_1grow (&obstack_for_string, ';');
574
575 obstack_1grow (&obstack_for_string, '}');
576
577 if (outer_brace)
e9955c83 578 {
41141c56 579 STRING_FINISH;
624a35e2 580 rule_length++;
3f2d73f1 581 loc->start = code_start;
223ff46e 582 val->chars = last_string;
a706a1cc 583 BEGIN INITIAL;
624a35e2 584 return token_type;
e9955c83
AD
585 }
586 }
587
a706a1cc
PE
588 /* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly
589 (as `<' `<%'). */
41141c56 590 "<"{splice}"<" STRING_GROW;
a706a1cc 591
624a35e2
PE
592 "$"("<"{tag}">")?(-?[0-9]+|"$") handle_dollar (token_type, yytext, *loc);
593 "@"(-?[0-9]+|"$") handle_at (token_type, yytext, *loc);
e9955c83 594
aa418041 595 <<EOF>> unexpected_eof (code_start, "}"); BEGIN INITIAL;
e9955c83
AD
596}
597
598
599 /*--------------------------------------------------------------.
600 | Scanning some prologue: from "%{" (already scanned) to "%}". |
601 `--------------------------------------------------------------*/
602
603<SC_PROLOGUE>
604{
605 "%}" {
41141c56 606 STRING_FINISH;
3f2d73f1 607 loc->start = code_start;
223ff46e 608 val->chars = last_string;
a706a1cc 609 BEGIN INITIAL;
e9955c83
AD
610 return PROLOGUE;
611 }
612
aa418041 613 <<EOF>> unexpected_eof (code_start, "%}"); BEGIN INITIAL;
e9955c83
AD
614}
615
616
617 /*---------------------------------------------------------------.
618 | Scanning the epilogue (everything after the second "%%", which |
d8d3f94a 619 | has already been eaten). |
e9955c83
AD
620 `---------------------------------------------------------------*/
621
622<SC_EPILOGUE>
623{
e9955c83 624 <<EOF>> {
41141c56 625 STRING_FINISH;
3f2d73f1 626 loc->start = code_start;
223ff46e 627 val->chars = last_string;
a706a1cc 628 BEGIN INITIAL;
e9955c83
AD
629 return EPILOGUE;
630 }
631}
632
633
a706a1cc
PE
634 /*----------------------------------------------------------------.
635 | By default, grow the string obstack with the input, escaping M4 |
636 | quoting characters. |
637 `----------------------------------------------------------------*/
638
639<SC_COMMENT,SC_LINE_COMMENT,SC_STRING,SC_CHARACTER,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
640{
223ff46e
PE
641 \$ obstack_sgrow (&obstack_for_string, "$][");
642 \@ obstack_sgrow (&obstack_for_string, "@@");
643 \[ obstack_sgrow (&obstack_for_string, "@{");
644 \] obstack_sgrow (&obstack_for_string, "@}");
41141c56 645 .|\n STRING_GROW;
a706a1cc
PE
646}
647
648
e9955c83
AD
649%%
650
3f2d73f1
PE
651/* Set *LOC and adjust scanner cursor to account for token TOKEN of
652 size SIZE. */
6c30d641
PE
653
654static void
223ff46e 655adjust_location (location *loc, char const *token, size_t size)
6c30d641 656{
3f2d73f1
PE
657 int line = scanner_cursor.line;
658 int column = scanner_cursor.column;
6c30d641
PE
659 char const *p0 = token;
660 char const *p = token;
661 char const *lim = token + size;
662
3f2d73f1
PE
663 loc->start = scanner_cursor;
664
6c30d641
PE
665 for (p = token; p < lim; p++)
666 switch (*p)
667 {
6c30d641
PE
668 case '\n':
669 line++;
670 column = 1;
671 p0 = p + 1;
672 break;
673
674 case '\t':
675 column += mbsnwidth (p0, p - p0, 0);
676 column += 8 - ((column - 1) & 7);
677 p0 = p + 1;
678 break;
679 }
680
3f2d73f1
PE
681 scanner_cursor.line = line;
682 scanner_cursor.column = column + mbsnwidth (p0, p - p0, 0);
683
684 loc->end = scanner_cursor;
6c30d641
PE
685}
686
687
688/* Read bytes from FP into buffer BUF of size SIZE. Return the
689 number of bytes read. Remove '\r' from input, treating \r\n
690 and isolated \r as \n. */
691
692static size_t
693no_cr_read (FILE *fp, char *buf, size_t size)
694{
a737b216
PE
695 size_t bytes_read = fread (buf, 1, size, fp);
696 if (bytes_read)
6c30d641 697 {
a737b216 698 char *w = memchr (buf, '\r', bytes_read);
6c30d641
PE
699 if (w)
700 {
701 char const *r = ++w;
a737b216 702 char const *lim = buf + bytes_read;
6c30d641
PE
703
704 for (;;)
705 {
706 /* Found an '\r'. Treat it like '\n', but ignore any
707 '\n' that immediately follows. */
708 w[-1] = '\n';
709 if (r == lim)
710 {
711 int ch = getc (fp);
712 if (ch != '\n' && ungetc (ch, fp) != ch)
713 break;
714 }
715 else if (*r == '\n')
716 r++;
717
718 /* Copy until the next '\r'. */
719 do
720 {
721 if (r == lim)
722 return w - buf;
723 }
724 while ((*w++ = *r++) != '\r');
725 }
726
727 return w - buf;
728 }
729 }
730
a737b216 731 return bytes_read;
6c30d641
PE
732}
733
734
e9955c83 735/*------------------------------------------------------------------.
366eea36 736| TEXT is pointing to a wannabee semantic value (i.e., a `$'). |
e9955c83
AD
737| |
738| Possible inputs: $[<TYPENAME>]($|integer) |
739| |
223ff46e 740| Output to OBSTACK_FOR_STRING a reference to this semantic value. |
e9955c83
AD
741`------------------------------------------------------------------*/
742
624a35e2 743static inline bool
223ff46e 744handle_action_dollar (char *text, location loc)
e9955c83
AD
745{
746 const char *type_name = NULL;
366eea36 747 char *cp = text + 1;
e9955c83 748
624a35e2
PE
749 if (! current_rule)
750 return false;
751
e9955c83
AD
752 /* Get the type name if explicit. */
753 if (*cp == '<')
754 {
755 type_name = ++cp;
756 while (*cp != '>')
757 ++cp;
758 *cp = '\0';
759 ++cp;
760 }
761
762 if (*cp == '$')
763 {
764 if (!type_name)
223ff46e 765 type_name = symbol_list_n_type_name_get (current_rule, loc, 0);
e9955c83 766 if (!type_name && typed)
223ff46e 767 complain_at (loc, _("$$ of `%s' has no declared type"),
97650f4e 768 current_rule->sym->tag);
e9955c83
AD
769 if (!type_name)
770 type_name = "";
223ff46e 771 obstack_fgrow1 (&obstack_for_string,
e9955c83
AD
772 "]b4_lhs_value([%s])[", type_name);
773 }
d8d3f94a 774 else
e9955c83 775 {
d8d3f94a 776 long num;
223ff46e 777 set_errno (0);
d8d3f94a 778 num = strtol (cp, 0, 10);
e9955c83 779
223ff46e 780 if (INT_MIN <= num && num <= rule_length && ! get_errno ())
e9955c83 781 {
d8d3f94a 782 int n = num;
e9955c83 783 if (!type_name && n > 0)
223ff46e 784 type_name = symbol_list_n_type_name_get (current_rule, loc, n);
e9955c83 785 if (!type_name && typed)
223ff46e
PE
786 complain_at (loc, _("$%d of `%s' has no declared type"),
787 n, current_rule->sym->tag);
e9955c83
AD
788 if (!type_name)
789 type_name = "";
223ff46e 790 obstack_fgrow3 (&obstack_for_string,
e9955c83
AD
791 "]b4_rhs_value([%d], [%d], [%s])[",
792 rule_length, n, type_name);
793 }
d8d3f94a 794 else
223ff46e 795 complain_at (loc, _("integer out of range: %s"), quote (text));
9280d3ef 796 }
9280d3ef 797
624a35e2 798 return true;
e9955c83
AD
799}
800
f25bfb75
AD
801
802/*-----------------------------------------------------------------.
803| Dispatch onto handle_action_dollar, or handle_destructor_dollar, |
624a35e2 804| depending upon TOKEN_TYPE. |
f25bfb75 805`-----------------------------------------------------------------*/
e9955c83
AD
806
807static void
624a35e2 808handle_dollar (int token_type, char *text, location loc)
f25bfb75 809{
624a35e2 810 switch (token_type)
f25bfb75 811 {
624a35e2
PE
812 case BRACED_CODE:
813 if (handle_action_dollar (text, loc))
814 return;
f25bfb75
AD
815 break;
816
624a35e2
PE
817 case PERCENT_DESTRUCTOR:
818 case PERCENT_PRINTER:
819 if (text[1] == '$')
820 {
821 obstack_sgrow (&obstack_for_string, "]b4_dollar_dollar[");
822 return;
823 }
824 break;
825
826 default:
f25bfb75
AD
827 break;
828 }
624a35e2
PE
829
830 complain_at (loc, _("invalid value: %s"), quote (text));
f25bfb75
AD
831}
832
833
834/*------------------------------------------------------.
835| TEXT is a location token (i.e., a `@...'). Output to |
223ff46e 836| OBSTACK_FOR_STRING a reference to this location. |
f25bfb75
AD
837`------------------------------------------------------*/
838
624a35e2 839static inline bool
223ff46e 840handle_action_at (char *text, location loc)
e9955c83 841{
366eea36 842 char *cp = text + 1;
e9955c83 843 locations_flag = 1;
e9955c83 844
624a35e2
PE
845 if (! current_rule)
846 return false;
847
366eea36 848 if (*cp == '$')
624a35e2 849 obstack_sgrow (&obstack_for_string, "]b4_lhs_location[");
d8d3f94a 850 else
e9955c83 851 {
d8d3f94a 852 long num;
223ff46e 853 set_errno (0);
d8d3f94a 854 num = strtol (cp, 0, 10);
dafdc66f 855
223ff46e 856 if (INT_MIN <= num && num <= rule_length && ! get_errno ())
d8d3f94a
PE
857 {
858 int n = num;
223ff46e 859 obstack_fgrow2 (&obstack_for_string, "]b4_rhs_location([%d], [%d])[",
d8d3f94a
PE
860 rule_length, n);
861 }
e9955c83 862 else
223ff46e 863 complain_at (loc, _("integer out of range: %s"), quote (text));
f25bfb75 864 }
f25bfb75 865
624a35e2 866 return true;
e9955c83 867}
4cdb01db 868
f25bfb75
AD
869
870/*-------------------------------------------------------------------.
871| Dispatch onto handle_action_at, or handle_destructor_at, depending |
872| upon CODE_KIND. |
873`-------------------------------------------------------------------*/
874
875static void
624a35e2 876handle_at (int token_type, char *text, location loc)
f25bfb75 877{
624a35e2 878 switch (token_type)
f25bfb75 879 {
624a35e2 880 case BRACED_CODE:
223ff46e 881 handle_action_at (text, loc);
624a35e2
PE
882 return;
883
884 case PERCENT_DESTRUCTOR:
885 case PERCENT_PRINTER:
886 if (text[1] == '$')
887 {
888 obstack_sgrow (&obstack_for_string, "]b4_at_dollar[");
889 return;
890 }
f25bfb75
AD
891 break;
892
624a35e2 893 default:
f25bfb75
AD
894 break;
895 }
624a35e2
PE
896
897 complain_at (loc, _("invalid value: %s"), quote (text));
f25bfb75
AD
898}
899
900
d8d3f94a
PE
901/*------------------------------------------------------------------.
902| Convert universal character name UCN to a single-byte character, |
903| and return that character. Return -1 if UCN does not correspond |
904| to a single-byte character. |
905`------------------------------------------------------------------*/
906
907static int
908convert_ucn_to_byte (char const *ucn)
909{
910 unsigned long code = strtoul (ucn + 2, 0, 16);
911
912 /* FIXME: Currently we assume Unicode-compatible unibyte characters
913 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
914 non-ASCII hosts we support only the portable C character set.
915 These limitations should be removed once we add support for
916 multibyte characters. */
917
918 if (UCHAR_MAX < code)
919 return -1;
920
921#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
922 {
923 /* A non-ASCII host. Use CODE to index into a table of the C
924 basic execution character set, which is guaranteed to exist on
925 all Standard C platforms. This table also includes '$', '@',
8e6ef483 926 and '`', which are not in the basic execution character set but
d8d3f94a
PE
927 which are unibyte characters on all the platforms that we know
928 about. */
929 static signed char const table[] =
930 {
931 '\0', -1, -1, -1, -1, -1, -1, '\a',
932 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
933 -1, -1, -1, -1, -1, -1, -1, -1,
934 -1, -1, -1, -1, -1, -1, -1, -1,
935 ' ', '!', '"', '#', '$', '%', '&', '\'',
936 '(', ')', '*', '+', ',', '-', '.', '/',
937 '0', '1', '2', '3', '4', '5', '6', '7',
938 '8', '9', ':', ';', '<', '=', '>', '?',
939 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
940 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
941 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
942 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
943 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
944 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
945 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
946 'x', 'y', 'z', '{', '|', '}', '~'
947 };
948
949 code = code < sizeof table ? table[code] : -1;
950 }
951#endif
c4d720cd 952
d8d3f94a
PE
953 return code;
954}
955
956
900c5db5
AD
957/*----------------------------------------------------------------.
958| Handle `#line INT "FILE"'. ARGS has already skipped `#line '. |
959`----------------------------------------------------------------*/
960
961static void
3f2d73f1 962handle_syncline (char *args)
900c5db5
AD
963{
964 int lineno = strtol (args, &args, 10);
965 const char *file = NULL;
966 file = strchr (args, '"') + 1;
967 *strchr (file, '"') = 0;
3f2d73f1
PE
968 scanner_cursor.file = current_file = xstrdup (file);
969 scanner_cursor.line = lineno;
970 scanner_cursor.column = 1;
900c5db5
AD
971}
972
a706a1cc 973
3f2d73f1
PE
974/*------------------------------------------------------------------------.
975| Report an unexpected EOF in a token or comment starting at START. |
976| An end of file was encountered and the expected TOKEN_END was missing. |
3f2d73f1 977`------------------------------------------------------------------------*/
a706a1cc
PE
978
979static void
aa418041 980unexpected_eof (boundary start, char const *token_end)
a706a1cc 981{
223ff46e
PE
982 location loc;
983 loc.start = start;
984 loc.end = scanner_cursor;
985 complain_at (loc, _("missing `%s' at end of file"), token_end);
a706a1cc
PE
986}
987
988
f25bfb75
AD
989/*-------------------------.
990| Initialize the scanner. |
991`-------------------------*/
992
1d6412ad
AD
993void
994scanner_initialize (void)
995{
223ff46e 996 obstack_init (&obstack_for_string);
1d6412ad
AD
997}
998
999
f25bfb75
AD
1000/*-----------------------------------------------.
1001| Free all the memory allocated to the scanner. |
1002`-----------------------------------------------*/
1003
4cdb01db
AD
1004void
1005scanner_free (void)
1006{
223ff46e 1007 obstack_free (&obstack_for_string, 0);
536545f3
AD
1008 /* Reclaim Flex's buffers. */
1009 yy_delete_buffer (YY_CURRENT_BUFFER);
4cdb01db 1010}