]> git.saurik.com Git - bison.git/blame - src/scan-gram.l
(AC_INIT): Bump version number to 1.875c.
[bison.git] / src / scan-gram.l
CommitLineData
e9955c83 1/* Bison Grammar Scanner -*- C -*-
3b1e470c 2
a737b216 3 Copyright (C) 2002, 2003 Free Software Foundation, Inc.
e9955c83
AD
4
5 This file is part of Bison, the GNU Compiler Compiler.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
20 02111-1307 USA
21*/
22
aa418041 23%option debug nodefault nounput noyywrap never-interactive
e9955c83
AD
24%option prefix="gram_" outfile="lex.yy.c"
25
26%{
27#include "system.h"
223ff46e
PE
28
29#include <mbswidth.h>
30#include <get-errno.h>
31#include <quote.h>
32
e9955c83 33#include "complain.h"
3f2d73f1 34#include "files.h"
e9955c83
AD
35#include "getargs.h"
36#include "gram.h"
37#include "reader.h"
223ff46e 38#include "uniqstr.h"
e9955c83 39
3f2d73f1
PE
40#define YY_USER_INIT \
41 do \
42 { \
43 scanner_cursor.file = current_file; \
44 scanner_cursor.line = 1; \
45 scanner_cursor.column = 1; \
379f0ac8 46 code_start = scanner_cursor; \
3f2d73f1
PE
47 } \
48 while (0)
8efe435c 49
3f2d73f1
PE
50/* Location of scanner cursor. */
51boundary scanner_cursor;
41141c56 52
223ff46e 53static void adjust_location (location *, char const *, size_t);
3f2d73f1 54#define YY_USER_ACTION adjust_location (loc, yytext, yyleng);
d8d3f94a 55
6c30d641 56static size_t no_cr_read (FILE *, char *, size_t);
d8d3f94a
PE
57#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
58
59
223ff46e 60/* OBSTACK_FOR_STRING -- Used to store all the characters that we need to
44995b2e
AD
61 keep (to construct ID, STRINGS etc.). Use the following macros to
62 use it.
63
41141c56
PE
64 Use STRING_GROW to append what has just been matched, and
65 STRING_FINISH to end the string (it puts the ending 0).
66 STRING_FINISH also stores this string in LAST_STRING, which can be
67 used, and which is used by STRING_FREE to free the last string. */
44995b2e 68
223ff46e 69static struct obstack obstack_for_string;
44995b2e 70
7ec2d4cd
AD
71/* A string representing the most recently saved token. */
72static char *last_string;
73
74
41141c56 75#define STRING_GROW \
223ff46e 76 obstack_grow (&obstack_for_string, yytext, yyleng)
44995b2e 77
41141c56 78#define STRING_FINISH \
44995b2e 79 do { \
223ff46e
PE
80 obstack_1grow (&obstack_for_string, '\0'); \
81 last_string = obstack_finish (&obstack_for_string); \
44995b2e
AD
82 } while (0)
83
41141c56 84#define STRING_FREE \
223ff46e 85 obstack_free (&obstack_for_string, last_string)
e9955c83 86
7ec2d4cd
AD
87void
88scanner_last_string_free (void)
89{
41141c56 90 STRING_FREE;
7ec2d4cd 91}
e9955c83 92
efcb44dd
PE
93/* Within well-formed rules, RULE_LENGTH is the number of values in
94 the current rule so far, which says where to find `$0' with respect
95 to the top of the stack. It is not the same as the rule->length in
96 the case of mid rule actions.
97
98 Outside of well-formed rules, RULE_LENGTH has an undefined value. */
99static int rule_length;
100
624a35e2
PE
101static void handle_dollar (int token_type, char *cp, location loc);
102static void handle_at (int token_type, char *cp, location loc);
3f2d73f1 103static void handle_syncline (char *args);
d8d3f94a 104static int convert_ucn_to_byte (char const *hex_text);
aa418041 105static void unexpected_eof (boundary, char const *);
e9955c83
AD
106
107%}
d8d3f94a 108%x SC_COMMENT SC_LINE_COMMENT SC_YACC_COMMENT
e9955c83 109%x SC_STRING SC_CHARACTER
3f2d73f1 110%x SC_AFTER_IDENTIFIER
e9955c83 111%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
624a35e2 112%x SC_PRE_CODE SC_BRACED_CODE SC_PROLOGUE SC_EPILOGUE
e9955c83 113
29c01725
AD
114letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
115id {letter}({letter}|[0-9])*
116directive %{letter}({letter}|[0-9]|-)*
624a35e2 117int [0-9]+
d8d3f94a
PE
118
119/* POSIX says that a tag must be both an id and a C union member, but
120 historically almost any character is allowed in a tag. We disallow
121 NUL and newline, as this simplifies our implementation. */
122tag [^\0\n>]+
123
124/* Zero or more instances of backslash-newline. Following GCC, allow
125 white space between the backslash and the newline. */
126splice (\\[ \f\t\v]*\n)*
e9955c83
AD
127
128%%
129%{
a706a1cc 130 /* Nesting level of the current code in braces. */
1a9e39f1
PE
131 int braces_level IF_LINT (= 0);
132
3f2d73f1
PE
133 /* Parent context state, when applicable. */
134 int context_state IF_LINT (= 0);
a706a1cc 135
624a35e2
PE
136 /* Token type to return, when applicable. */
137 int token_type IF_LINT (= 0);
138
3f2d73f1 139 /* Location of most recent identifier, when applicable. */
a2bc9dbc 140 location id_loc IF_LINT (= empty_location);
3f2d73f1 141
a2bc9dbc
PE
142 /* Where containing code started, when applicable. Its initial
143 value is relevant only when yylex is invoked in the SC_EPILOGUE
144 start condition. */
145 boundary code_start = scanner_cursor;
3f2d73f1 146
223ff46e
PE
147 /* Where containing comment or string or character literal started,
148 when applicable. */
a2bc9dbc 149 boundary token_start IF_LINT (= scanner_cursor);
e9955c83
AD
150%}
151
152
3f2d73f1
PE
153 /*-----------------------.
154 | Scanning white space. |
155 `-----------------------*/
156
624a35e2 157<INITIAL,SC_AFTER_IDENTIFIER,SC_PRE_CODE>
3f2d73f1
PE
158{
159 [ \f\n\t\v] ;
83adb046 160 "," warn_at (*loc, _("stray `,' treated as white space"));
3f2d73f1
PE
161
162 /* Comments. */
3f2d73f1 163 "//".* ;
83adb046
PE
164 "/*" {
165 token_start = loc->start;
166 context_state = YY_START;
167 BEGIN SC_YACC_COMMENT;
168 }
3f2d73f1
PE
169
170 /* #line directives are not documented, and may be withdrawn or
171 modified in future versions of Bison. */
172 ^"#line "{int}" \"".*"\"\n" {
173 handle_syncline (yytext + sizeof "#line " - 1);
174 }
175}
176
177
e9955c83
AD
178 /*----------------------------.
179 | Scanning Bison directives. |
180 `----------------------------*/
181<INITIAL>
182{
183 "%binary" return PERCENT_NONASSOC;
184 "%debug" return PERCENT_DEBUG;
185 "%define" return PERCENT_DEFINE;
186 "%defines" return PERCENT_DEFINES;
624a35e2 187 "%destructor" token_type = PERCENT_DESTRUCTOR; BEGIN SC_PRE_CODE;
676385e2 188 "%dprec" return PERCENT_DPREC;
e9955c83
AD
189 "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
190 "%expect" return PERCENT_EXPECT;
191 "%file-prefix" return PERCENT_FILE_PREFIX;
192 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
ae7453f2 193 "%glr-parser" return PERCENT_GLR_PARSER;
e9955c83 194 "%left" return PERCENT_LEFT;
624a35e2 195 "%lex-param" token_type = PERCENT_LEX_PARAM; BEGIN SC_PRE_CODE;
e9955c83 196 "%locations" return PERCENT_LOCATIONS;
676385e2 197 "%merge" return PERCENT_MERGE;
e9955c83
AD
198 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
199 "%no"[-_]"lines" return PERCENT_NO_LINES;
200 "%nonassoc" return PERCENT_NONASSOC;
916708d5 201 "%nondeterministic-parser" return PERCENT_NONDETERMINISTIC_PARSER;
e9955c83
AD
202 "%nterm" return PERCENT_NTERM;
203 "%output" return PERCENT_OUTPUT;
624a35e2 204 "%parse-param" token_type = PERCENT_PARSE_PARAM; BEGIN SC_PRE_CODE;
d8d3f94a 205 "%prec" rule_length--; return PERCENT_PREC;
624a35e2 206 "%printer" token_type = PERCENT_PRINTER; BEGIN SC_PRE_CODE;
e9955c83
AD
207 "%pure"[-_]"parser" return PERCENT_PURE_PARSER;
208 "%right" return PERCENT_RIGHT;
209 "%skeleton" return PERCENT_SKELETON;
210 "%start" return PERCENT_START;
211 "%term" return PERCENT_TOKEN;
212 "%token" return PERCENT_TOKEN;
213 "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
214 "%type" return PERCENT_TYPE;
624a35e2 215 "%union" token_type = PERCENT_UNION; BEGIN SC_PRE_CODE;
e9955c83
AD
216 "%verbose" return PERCENT_VERBOSE;
217 "%yacc" return PERCENT_YACC;
218
3f2d73f1 219 {directive} {
41141c56 220 complain_at (*loc, _("invalid directive: %s"), quote (yytext));
412f8a59 221 }
900c5db5 222
e9955c83 223 "=" return EQUAL;
d8d3f94a 224 "|" rule_length = 0; return PIPE;
e9955c83
AD
225 ";" return SEMICOLON;
226
3f2d73f1 227 {id} {
41141c56 228 val->symbol = symbol_get (yytext, *loc);
3f2d73f1 229 id_loc = *loc;
efcb44dd 230 rule_length++;
3f2d73f1 231 BEGIN SC_AFTER_IDENTIFIER;
e9955c83
AD
232 }
233
d8d3f94a
PE
234 {int} {
235 unsigned long num;
223ff46e 236 set_errno (0);
d8d3f94a 237 num = strtoul (yytext, 0, 10);
223ff46e 238 if (INT_MAX < num || get_errno ())
d8d3f94a 239 {
41141c56 240 complain_at (*loc, _("integer out of range: %s"), quote (yytext));
d8d3f94a
PE
241 num = INT_MAX;
242 }
41141c56 243 val->integer = num;
d8d3f94a
PE
244 return INT;
245 }
e9955c83
AD
246
247 /* Characters. We don't check there is only one. */
3f2d73f1 248 "'" STRING_GROW; token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
e9955c83
AD
249
250 /* Strings. */
3f2d73f1 251 "\"" STRING_GROW; token_start = loc->start; BEGIN SC_ESCAPED_STRING;
e9955c83
AD
252
253 /* Prologue. */
3f2d73f1 254 "%{" code_start = loc->start; BEGIN SC_PROLOGUE;
e9955c83
AD
255
256 /* Code in between braces. */
3f2d73f1
PE
257 "{" {
258 STRING_GROW;
624a35e2 259 token_type = BRACED_CODE;
3f2d73f1
PE
260 braces_level = 0;
261 code_start = loc->start;
262 BEGIN SC_BRACED_CODE;
263 }
e9955c83
AD
264
265 /* A type. */
d8d3f94a 266 "<"{tag}">" {
223ff46e 267 obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
41141c56 268 STRING_FINISH;
223ff46e 269 val->uniqstr = uniqstr_new (last_string);
41141c56 270 STRING_FREE;
4cdb01db
AD
271 return TYPE;
272 }
273
a706a1cc
PE
274 "%%" {
275 static int percent_percent_count;
e9955c83 276 if (++percent_percent_count == 2)
a2bc9dbc 277 BEGIN SC_EPILOGUE;
e9955c83
AD
278 return PERCENT_PERCENT;
279 }
280
a706a1cc 281 . {
41141c56 282 complain_at (*loc, _("invalid character: %s"), quote (yytext));
3f2d73f1 283 }
379f0ac8
PE
284
285 <<EOF>> {
286 loc->start = loc->end = scanner_cursor;
287 yyterminate ();
288 }
3f2d73f1
PE
289}
290
291
292 /*-----------------------------------------------------------------.
293 | Scanning after an identifier, checking whether a colon is next. |
294 `-----------------------------------------------------------------*/
295
296<SC_AFTER_IDENTIFIER>
297{
298 ":" {
299 rule_length = 0;
300 *loc = id_loc;
301 BEGIN INITIAL;
302 return ID_COLON;
303 }
304 . {
305 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
306 yyless (0);
307 *loc = id_loc;
308 BEGIN INITIAL;
309 return ID;
310 }
311 <<EOF>> {
312 *loc = id_loc;
313 BEGIN INITIAL;
314 return ID;
e9955c83
AD
315 }
316}
317
318
d8d3f94a
PE
319 /*---------------------------------------------------------------.
320 | Scanning a Yacc comment. The initial `/ *' is already eaten. |
321 `---------------------------------------------------------------*/
e9955c83 322
d8d3f94a 323<SC_YACC_COMMENT>
e9955c83 324{
3f2d73f1 325 "*/" BEGIN context_state;
a706a1cc 326 .|\n ;
aa418041 327 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
d8d3f94a
PE
328}
329
330
331 /*------------------------------------------------------------.
332 | Scanning a C comment. The initial `/ *' is already eaten. |
333 `------------------------------------------------------------*/
334
335<SC_COMMENT>
336{
3f2d73f1 337 "*"{splice}"/" STRING_GROW; BEGIN context_state;
aa418041 338 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
e9955c83
AD
339}
340
341
d8d3f94a
PE
342 /*--------------------------------------------------------------.
343 | Scanning a line comment. The initial `//' is already eaten. |
344 `--------------------------------------------------------------*/
345
346<SC_LINE_COMMENT>
347{
3f2d73f1 348 "\n" STRING_GROW; BEGIN context_state;
41141c56 349 {splice} STRING_GROW;
3f2d73f1 350 <<EOF>> BEGIN context_state;
d8d3f94a
PE
351}
352
353
e9955c83
AD
354 /*----------------------------------------------------------------.
355 | Scanning a C string, including its escapes. The initial `"' is |
356 | already eaten. |
357 `----------------------------------------------------------------*/
358
359<SC_ESCAPED_STRING>
360{
db2cc12f 361 "\"" {
41141c56
PE
362 STRING_GROW;
363 STRING_FINISH;
3f2d73f1 364 loc->start = token_start;
223ff46e 365 val->chars = last_string;
efcb44dd 366 rule_length++;
a706a1cc 367 BEGIN INITIAL;
e9955c83
AD
368 return STRING;
369 }
370
41141c56 371 .|\n STRING_GROW;
aa418041 372 <<EOF>> unexpected_eof (token_start, "\""); BEGIN INITIAL;
e9955c83
AD
373}
374
375 /*---------------------------------------------------------------.
376 | Scanning a C character, decoding its escapes. The initial "'" |
377 | is already eaten. |
378 `---------------------------------------------------------------*/
379
380<SC_ESCAPED_CHARACTER>
381{
db2cc12f 382 "'" {
3b1e470c 383 unsigned char last_string_1;
41141c56
PE
384 STRING_GROW;
385 STRING_FINISH;
3f2d73f1 386 loc->start = token_start;
41141c56
PE
387 val->symbol = symbol_get (last_string, *loc);
388 symbol_class_set (val->symbol, token_sym, *loc);
3b1e470c
PE
389 last_string_1 = last_string[1];
390 symbol_user_token_number_set (val->symbol, last_string_1, *loc);
41141c56 391 STRING_FREE;
a706a1cc
PE
392 rule_length++;
393 BEGIN INITIAL;
394 return ID;
e9955c83 395 }
a706a1cc 396
41141c56 397 .|\n STRING_GROW;
aa418041 398 <<EOF>> unexpected_eof (token_start, "'"); BEGIN INITIAL;
e9955c83
AD
399}
400
401
402 /*----------------------------.
403 | Decode escaped characters. |
404 `----------------------------*/
405
406<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
407{
d8d3f94a
PE
408 \\[0-7]{1,3} {
409 unsigned long c = strtoul (yytext + 1, 0, 8);
410 if (UCHAR_MAX < c)
3f2d73f1 411 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
e9955c83 412 else
223ff46e 413 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
414 }
415
6b0d38ab 416 \\x[0-9abcdefABCDEF]+ {
d8d3f94a 417 unsigned long c;
223ff46e 418 set_errno (0);
d8d3f94a 419 c = strtoul (yytext + 2, 0, 16);
223ff46e 420 if (UCHAR_MAX < c || get_errno ())
3f2d73f1 421 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
d8d3f94a 422 else
223ff46e 423 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
424 }
425
223ff46e
PE
426 \\a obstack_1grow (&obstack_for_string, '\a');
427 \\b obstack_1grow (&obstack_for_string, '\b');
428 \\f obstack_1grow (&obstack_for_string, '\f');
429 \\n obstack_1grow (&obstack_for_string, '\n');
430 \\r obstack_1grow (&obstack_for_string, '\r');
431 \\t obstack_1grow (&obstack_for_string, '\t');
432 \\v obstack_1grow (&obstack_for_string, '\v');
412f8a59
PE
433
434 /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
223ff46e 435 \\("\""|"'"|"?"|"\\") obstack_1grow (&obstack_for_string, yytext[1]);
412f8a59 436
6b0d38ab 437 \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
d8d3f94a
PE
438 int c = convert_ucn_to_byte (yytext);
439 if (c < 0)
3f2d73f1 440 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
d8d3f94a 441 else
223ff46e 442 obstack_1grow (&obstack_for_string, c);
d8d3f94a 443 }
4f25ebb0 444 \\(.|\n) {
3f2d73f1 445 complain_at (*loc, _("unrecognized escape sequence: %s"), quote (yytext));
41141c56 446 STRING_GROW;
e9955c83
AD
447 }
448}
449
450
451 /*----------------------------------------------------------.
452 | Scanning a C character without decoding its escapes. The |
453 | initial "'" is already eaten. |
454 `----------------------------------------------------------*/
455
456<SC_CHARACTER>
457{
3f2d73f1 458 "'" STRING_GROW; BEGIN context_state;
41141c56 459 \\{splice}[^$@\[\]] STRING_GROW;
aa418041 460 <<EOF>> unexpected_eof (token_start, "'"); BEGIN context_state;
e9955c83
AD
461}
462
463
464 /*----------------------------------------------------------------.
465 | Scanning a C string, without decoding its escapes. The initial |
466 | `"' is already eaten. |
467 `----------------------------------------------------------------*/
468
469<SC_STRING>
470{
3f2d73f1 471 "\"" STRING_GROW; BEGIN context_state;
41141c56 472 \\{splice}[^$@\[\]] STRING_GROW;
aa418041
PE
473 <<EOF>> {
474 unexpected_eof (token_start, "\"");
475 BEGIN context_state;
476 }
e9955c83
AD
477}
478
479
480 /*---------------------------------------------------.
481 | Strings, comments etc. can be found in user code. |
482 `---------------------------------------------------*/
483
484<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
485{
3f2d73f1
PE
486 "'" {
487 STRING_GROW;
488 context_state = YY_START;
489 token_start = loc->start;
490 BEGIN SC_CHARACTER;
491 }
492 "\"" {
493 STRING_GROW;
494 context_state = YY_START;
495 token_start = loc->start;
496 BEGIN SC_STRING;
497 }
498 "/"{splice}"*" {
499 STRING_GROW;
500 context_state = YY_START;
501 token_start = loc->start;
502 BEGIN SC_COMMENT;
503 }
504 "/"{splice}"/" {
505 STRING_GROW;
506 context_state = YY_START;
507 BEGIN SC_LINE_COMMENT;
508 }
e9955c83
AD
509}
510
511
624a35e2
PE
512 /*---------------------------------------------------------------.
513 | Scanning after %union etc., possibly followed by white space. |
514 | For %union only, allow arbitrary C code to appear before the |
515 | following brace, as an extension to POSIX. |
516 `---------------------------------------------------------------*/
517
518<SC_PRE_CODE>
519{
520 . {
521 bool valid = yytext[0] == '{' || token_type == PERCENT_UNION;
522 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
523 yyless (0);
524
525 if (valid)
526 {
527 braces_level = -1;
528 code_start = loc->start;
529 BEGIN SC_BRACED_CODE;
530 }
531 else
532 {
533 complain_at (*loc, _("missing `{' in `%s'"),
534 token_name (token_type));
535 obstack_sgrow (&obstack_for_string, "{}");
536 STRING_FINISH;
537 val->chars = last_string;
538 BEGIN INITIAL;
539 return token_type;
540 }
541 }
379f0ac8 542
aa418041 543 <<EOF>> unexpected_eof (scanner_cursor, "{}"); BEGIN INITIAL;
624a35e2
PE
544}
545
546
e9955c83
AD
547 /*---------------------------------------------------------------.
548 | Scanning some code in braces (%union and actions). The initial |
549 | "{" is already eaten. |
550 `---------------------------------------------------------------*/
551
552<SC_BRACED_CODE>
553{
41141c56
PE
554 "{"|"<"{splice}"%" STRING_GROW; braces_level++;
555 "%"{splice}">" STRING_GROW; braces_level--;
e9955c83 556 "}" {
25522739
PE
557 bool outer_brace = --braces_level < 0;
558
559 /* As an undocumented Bison extension, append `;' before the last
560 brace in braced code, so that the user code can omit trailing
561 `;'. But do not append `;' if emulating Yacc, since Yacc does
562 not append one.
563
564 FIXME: Bison should warn if a semicolon seems to be necessary
565 here, and should omit the semicolon if it seems unnecessary
566 (e.g., after ';', '{', or '}', each followed by comments or
567 white space). Such a warning shouldn't depend on --yacc; it
568 should depend on a new --pedantic option, which would cause
569 Bison to warn if it detects an extension to POSIX. --pedantic
570 should also diagnose other Bison extensions like %yacc.
571 Perhaps there should also be a GCC-style --pedantic-errors
572 option, so that such warnings are diagnosed as errors. */
1deb9bdc 573 if (outer_brace && token_type == BRACED_CODE && ! yacc_flag)
25522739
PE
574 obstack_1grow (&obstack_for_string, ';');
575
576 obstack_1grow (&obstack_for_string, '}');
577
578 if (outer_brace)
e9955c83 579 {
41141c56 580 STRING_FINISH;
624a35e2 581 rule_length++;
3f2d73f1 582 loc->start = code_start;
223ff46e 583 val->chars = last_string;
a706a1cc 584 BEGIN INITIAL;
624a35e2 585 return token_type;
e9955c83
AD
586 }
587 }
588
a706a1cc
PE
589 /* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly
590 (as `<' `<%'). */
41141c56 591 "<"{splice}"<" STRING_GROW;
a706a1cc 592
624a35e2
PE
593 "$"("<"{tag}">")?(-?[0-9]+|"$") handle_dollar (token_type, yytext, *loc);
594 "@"(-?[0-9]+|"$") handle_at (token_type, yytext, *loc);
e9955c83 595
aa418041 596 <<EOF>> unexpected_eof (code_start, "}"); BEGIN INITIAL;
e9955c83
AD
597}
598
599
600 /*--------------------------------------------------------------.
601 | Scanning some prologue: from "%{" (already scanned) to "%}". |
602 `--------------------------------------------------------------*/
603
604<SC_PROLOGUE>
605{
606 "%}" {
41141c56 607 STRING_FINISH;
3f2d73f1 608 loc->start = code_start;
223ff46e 609 val->chars = last_string;
a706a1cc 610 BEGIN INITIAL;
e9955c83
AD
611 return PROLOGUE;
612 }
613
aa418041 614 <<EOF>> unexpected_eof (code_start, "%}"); BEGIN INITIAL;
e9955c83
AD
615}
616
617
618 /*---------------------------------------------------------------.
619 | Scanning the epilogue (everything after the second "%%", which |
d8d3f94a 620 | has already been eaten). |
e9955c83
AD
621 `---------------------------------------------------------------*/
622
623<SC_EPILOGUE>
624{
e9955c83 625 <<EOF>> {
41141c56 626 STRING_FINISH;
3f2d73f1 627 loc->start = code_start;
223ff46e 628 val->chars = last_string;
a706a1cc 629 BEGIN INITIAL;
e9955c83
AD
630 return EPILOGUE;
631 }
632}
633
634
a706a1cc
PE
635 /*----------------------------------------------------------------.
636 | By default, grow the string obstack with the input, escaping M4 |
637 | quoting characters. |
638 `----------------------------------------------------------------*/
639
640<SC_COMMENT,SC_LINE_COMMENT,SC_STRING,SC_CHARACTER,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
641{
223ff46e
PE
642 \$ obstack_sgrow (&obstack_for_string, "$][");
643 \@ obstack_sgrow (&obstack_for_string, "@@");
644 \[ obstack_sgrow (&obstack_for_string, "@{");
645 \] obstack_sgrow (&obstack_for_string, "@}");
41141c56 646 .|\n STRING_GROW;
a706a1cc
PE
647}
648
649
e9955c83
AD
650%%
651
25005f6a
PH
652/* Keeps track of the maximum number of semantic values to the left of
653 a handle (those referenced by $0, $-1, etc.) are required by the
654 semantic actions of this grammar. */
655int max_left_semantic_context = 0;
656
3f2d73f1
PE
657/* Set *LOC and adjust scanner cursor to account for token TOKEN of
658 size SIZE. */
6c30d641
PE
659
660static void
223ff46e 661adjust_location (location *loc, char const *token, size_t size)
6c30d641 662{
3f2d73f1
PE
663 int line = scanner_cursor.line;
664 int column = scanner_cursor.column;
6c30d641
PE
665 char const *p0 = token;
666 char const *p = token;
667 char const *lim = token + size;
668
3f2d73f1
PE
669 loc->start = scanner_cursor;
670
6c30d641
PE
671 for (p = token; p < lim; p++)
672 switch (*p)
673 {
6c30d641
PE
674 case '\n':
675 line++;
676 column = 1;
677 p0 = p + 1;
678 break;
679
680 case '\t':
681 column += mbsnwidth (p0, p - p0, 0);
682 column += 8 - ((column - 1) & 7);
683 p0 = p + 1;
684 break;
685 }
686
3f2d73f1
PE
687 scanner_cursor.line = line;
688 scanner_cursor.column = column + mbsnwidth (p0, p - p0, 0);
689
690 loc->end = scanner_cursor;
6c30d641
PE
691}
692
693
694/* Read bytes from FP into buffer BUF of size SIZE. Return the
695 number of bytes read. Remove '\r' from input, treating \r\n
696 and isolated \r as \n. */
697
698static size_t
699no_cr_read (FILE *fp, char *buf, size_t size)
700{
a737b216
PE
701 size_t bytes_read = fread (buf, 1, size, fp);
702 if (bytes_read)
6c30d641 703 {
a737b216 704 char *w = memchr (buf, '\r', bytes_read);
6c30d641
PE
705 if (w)
706 {
707 char const *r = ++w;
a737b216 708 char const *lim = buf + bytes_read;
6c30d641
PE
709
710 for (;;)
711 {
712 /* Found an '\r'. Treat it like '\n', but ignore any
713 '\n' that immediately follows. */
714 w[-1] = '\n';
715 if (r == lim)
716 {
717 int ch = getc (fp);
718 if (ch != '\n' && ungetc (ch, fp) != ch)
719 break;
720 }
721 else if (*r == '\n')
722 r++;
723
724 /* Copy until the next '\r'. */
725 do
726 {
727 if (r == lim)
728 return w - buf;
729 }
730 while ((*w++ = *r++) != '\r');
731 }
732
733 return w - buf;
734 }
735 }
736
a737b216 737 return bytes_read;
6c30d641
PE
738}
739
740
e9955c83 741/*------------------------------------------------------------------.
366eea36 742| TEXT is pointing to a wannabee semantic value (i.e., a `$'). |
e9955c83
AD
743| |
744| Possible inputs: $[<TYPENAME>]($|integer) |
745| |
223ff46e 746| Output to OBSTACK_FOR_STRING a reference to this semantic value. |
e9955c83
AD
747`------------------------------------------------------------------*/
748
624a35e2 749static inline bool
223ff46e 750handle_action_dollar (char *text, location loc)
e9955c83
AD
751{
752 const char *type_name = NULL;
366eea36 753 char *cp = text + 1;
e9955c83 754
624a35e2
PE
755 if (! current_rule)
756 return false;
757
e9955c83
AD
758 /* Get the type name if explicit. */
759 if (*cp == '<')
760 {
761 type_name = ++cp;
762 while (*cp != '>')
763 ++cp;
764 *cp = '\0';
765 ++cp;
766 }
767
768 if (*cp == '$')
769 {
770 if (!type_name)
223ff46e 771 type_name = symbol_list_n_type_name_get (current_rule, loc, 0);
e9955c83 772 if (!type_name && typed)
223ff46e 773 complain_at (loc, _("$$ of `%s' has no declared type"),
97650f4e 774 current_rule->sym->tag);
e9955c83
AD
775 if (!type_name)
776 type_name = "";
223ff46e 777 obstack_fgrow1 (&obstack_for_string,
e9955c83
AD
778 "]b4_lhs_value([%s])[", type_name);
779 }
d8d3f94a 780 else
e9955c83 781 {
d8d3f94a 782 long num;
223ff46e 783 set_errno (0);
d8d3f94a 784 num = strtol (cp, 0, 10);
e9955c83 785
223ff46e 786 if (INT_MIN <= num && num <= rule_length && ! get_errno ())
e9955c83 787 {
d8d3f94a 788 int n = num;
25005f6a
PH
789 if (1-n > max_left_semantic_context)
790 max_left_semantic_context = 1-n;
e9955c83 791 if (!type_name && n > 0)
223ff46e 792 type_name = symbol_list_n_type_name_get (current_rule, loc, n);
e9955c83 793 if (!type_name && typed)
223ff46e
PE
794 complain_at (loc, _("$%d of `%s' has no declared type"),
795 n, current_rule->sym->tag);
e9955c83
AD
796 if (!type_name)
797 type_name = "";
223ff46e 798 obstack_fgrow3 (&obstack_for_string,
e9955c83
AD
799 "]b4_rhs_value([%d], [%d], [%s])[",
800 rule_length, n, type_name);
801 }
d8d3f94a 802 else
223ff46e 803 complain_at (loc, _("integer out of range: %s"), quote (text));
9280d3ef 804 }
9280d3ef 805
624a35e2 806 return true;
e9955c83
AD
807}
808
f25bfb75
AD
809
810/*-----------------------------------------------------------------.
811| Dispatch onto handle_action_dollar, or handle_destructor_dollar, |
624a35e2 812| depending upon TOKEN_TYPE. |
f25bfb75 813`-----------------------------------------------------------------*/
e9955c83
AD
814
815static void
624a35e2 816handle_dollar (int token_type, char *text, location loc)
f25bfb75 817{
624a35e2 818 switch (token_type)
f25bfb75 819 {
624a35e2
PE
820 case BRACED_CODE:
821 if (handle_action_dollar (text, loc))
822 return;
f25bfb75
AD
823 break;
824
624a35e2
PE
825 case PERCENT_DESTRUCTOR:
826 case PERCENT_PRINTER:
827 if (text[1] == '$')
828 {
829 obstack_sgrow (&obstack_for_string, "]b4_dollar_dollar[");
830 return;
831 }
832 break;
833
834 default:
f25bfb75
AD
835 break;
836 }
624a35e2
PE
837
838 complain_at (loc, _("invalid value: %s"), quote (text));
f25bfb75
AD
839}
840
841
842/*------------------------------------------------------.
843| TEXT is a location token (i.e., a `@...'). Output to |
223ff46e 844| OBSTACK_FOR_STRING a reference to this location. |
f25bfb75
AD
845`------------------------------------------------------*/
846
624a35e2 847static inline bool
223ff46e 848handle_action_at (char *text, location loc)
e9955c83 849{
366eea36 850 char *cp = text + 1;
d0829076 851 locations_flag = true;
e9955c83 852
624a35e2
PE
853 if (! current_rule)
854 return false;
855
366eea36 856 if (*cp == '$')
624a35e2 857 obstack_sgrow (&obstack_for_string, "]b4_lhs_location[");
d8d3f94a 858 else
e9955c83 859 {
d8d3f94a 860 long num;
223ff46e 861 set_errno (0);
d8d3f94a 862 num = strtol (cp, 0, 10);
dafdc66f 863
223ff46e 864 if (INT_MIN <= num && num <= rule_length && ! get_errno ())
d8d3f94a
PE
865 {
866 int n = num;
223ff46e 867 obstack_fgrow2 (&obstack_for_string, "]b4_rhs_location([%d], [%d])[",
d8d3f94a
PE
868 rule_length, n);
869 }
e9955c83 870 else
223ff46e 871 complain_at (loc, _("integer out of range: %s"), quote (text));
f25bfb75 872 }
f25bfb75 873
624a35e2 874 return true;
e9955c83 875}
4cdb01db 876
f25bfb75
AD
877
878/*-------------------------------------------------------------------.
879| Dispatch onto handle_action_at, or handle_destructor_at, depending |
880| upon CODE_KIND. |
881`-------------------------------------------------------------------*/
882
883static void
624a35e2 884handle_at (int token_type, char *text, location loc)
f25bfb75 885{
624a35e2 886 switch (token_type)
f25bfb75 887 {
624a35e2 888 case BRACED_CODE:
223ff46e 889 handle_action_at (text, loc);
624a35e2
PE
890 return;
891
892 case PERCENT_DESTRUCTOR:
893 case PERCENT_PRINTER:
894 if (text[1] == '$')
895 {
896 obstack_sgrow (&obstack_for_string, "]b4_at_dollar[");
897 return;
898 }
f25bfb75
AD
899 break;
900
624a35e2 901 default:
f25bfb75
AD
902 break;
903 }
624a35e2
PE
904
905 complain_at (loc, _("invalid value: %s"), quote (text));
f25bfb75
AD
906}
907
908
d8d3f94a
PE
909/*------------------------------------------------------------------.
910| Convert universal character name UCN to a single-byte character, |
911| and return that character. Return -1 if UCN does not correspond |
912| to a single-byte character. |
913`------------------------------------------------------------------*/
914
915static int
916convert_ucn_to_byte (char const *ucn)
917{
918 unsigned long code = strtoul (ucn + 2, 0, 16);
919
920 /* FIXME: Currently we assume Unicode-compatible unibyte characters
921 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
922 non-ASCII hosts we support only the portable C character set.
923 These limitations should be removed once we add support for
924 multibyte characters. */
925
926 if (UCHAR_MAX < code)
927 return -1;
928
929#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
930 {
931 /* A non-ASCII host. Use CODE to index into a table of the C
932 basic execution character set, which is guaranteed to exist on
933 all Standard C platforms. This table also includes '$', '@',
8e6ef483 934 and '`', which are not in the basic execution character set but
d8d3f94a
PE
935 which are unibyte characters on all the platforms that we know
936 about. */
937 static signed char const table[] =
938 {
939 '\0', -1, -1, -1, -1, -1, -1, '\a',
940 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
941 -1, -1, -1, -1, -1, -1, -1, -1,
942 -1, -1, -1, -1, -1, -1, -1, -1,
943 ' ', '!', '"', '#', '$', '%', '&', '\'',
944 '(', ')', '*', '+', ',', '-', '.', '/',
945 '0', '1', '2', '3', '4', '5', '6', '7',
946 '8', '9', ':', ';', '<', '=', '>', '?',
947 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
948 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
949 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
950 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
951 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
952 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
953 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
954 'x', 'y', 'z', '{', '|', '}', '~'
955 };
956
957 code = code < sizeof table ? table[code] : -1;
958 }
959#endif
c4d720cd 960
d8d3f94a
PE
961 return code;
962}
963
964
900c5db5
AD
965/*----------------------------------------------------------------.
966| Handle `#line INT "FILE"'. ARGS has already skipped `#line '. |
967`----------------------------------------------------------------*/
968
969static void
3f2d73f1 970handle_syncline (char *args)
900c5db5
AD
971{
972 int lineno = strtol (args, &args, 10);
973 const char *file = NULL;
974 file = strchr (args, '"') + 1;
975 *strchr (file, '"') = 0;
3f2d73f1
PE
976 scanner_cursor.file = current_file = xstrdup (file);
977 scanner_cursor.line = lineno;
978 scanner_cursor.column = 1;
900c5db5
AD
979}
980
a706a1cc 981
3f2d73f1
PE
982/*------------------------------------------------------------------------.
983| Report an unexpected EOF in a token or comment starting at START. |
984| An end of file was encountered and the expected TOKEN_END was missing. |
3f2d73f1 985`------------------------------------------------------------------------*/
a706a1cc
PE
986
987static void
aa418041 988unexpected_eof (boundary start, char const *token_end)
a706a1cc 989{
223ff46e
PE
990 location loc;
991 loc.start = start;
992 loc.end = scanner_cursor;
993 complain_at (loc, _("missing `%s' at end of file"), token_end);
a706a1cc
PE
994}
995
996
f25bfb75
AD
997/*-------------------------.
998| Initialize the scanner. |
999`-------------------------*/
1000
1d6412ad
AD
1001void
1002scanner_initialize (void)
1003{
223ff46e 1004 obstack_init (&obstack_for_string);
1d6412ad
AD
1005}
1006
1007
f25bfb75
AD
1008/*-----------------------------------------------.
1009| Free all the memory allocated to the scanner. |
1010`-----------------------------------------------*/
1011
4cdb01db
AD
1012void
1013scanner_free (void)
1014{
223ff46e 1015 obstack_free (&obstack_for_string, 0);
536545f3
AD
1016 /* Reclaim Flex's buffers. */
1017 yy_delete_buffer (YY_CURRENT_BUFFER);
4cdb01db 1018}