]> git.saurik.com Git - bison.git/blame - src/scan-gram.l
* src/gram.h, src/gram.c (pure_parser, glr_parser): Move to...
[bison.git] / src / scan-gram.l
CommitLineData
e9955c83 1/* Bison Grammar Scanner -*- C -*-
3b1e470c 2
a737b216 3 Copyright (C) 2002, 2003 Free Software Foundation, Inc.
e9955c83
AD
4
5 This file is part of Bison, the GNU Compiler Compiler.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
20 02111-1307 USA
21*/
22
aa418041 23%option debug nodefault nounput noyywrap never-interactive
e9955c83
AD
24%option prefix="gram_" outfile="lex.yy.c"
25
26%{
27#include "system.h"
223ff46e
PE
28
29#include <mbswidth.h>
30#include <get-errno.h>
31#include <quote.h>
32
e9955c83 33#include "complain.h"
3f2d73f1 34#include "files.h"
e9955c83
AD
35#include "getargs.h"
36#include "gram.h"
37#include "reader.h"
223ff46e 38#include "uniqstr.h"
e9955c83 39
3f2d73f1
PE
40#define YY_USER_INIT \
41 do \
42 { \
43 scanner_cursor.file = current_file; \
44 scanner_cursor.line = 1; \
45 scanner_cursor.column = 1; \
379f0ac8 46 code_start = scanner_cursor; \
3f2d73f1
PE
47 } \
48 while (0)
8efe435c 49
3f2d73f1
PE
50/* Location of scanner cursor. */
51boundary scanner_cursor;
41141c56 52
223ff46e 53static void adjust_location (location *, char const *, size_t);
3f2d73f1 54#define YY_USER_ACTION adjust_location (loc, yytext, yyleng);
d8d3f94a 55
6c30d641 56static size_t no_cr_read (FILE *, char *, size_t);
d8d3f94a
PE
57#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
58
59
223ff46e 60/* OBSTACK_FOR_STRING -- Used to store all the characters that we need to
44995b2e
AD
61 keep (to construct ID, STRINGS etc.). Use the following macros to
62 use it.
63
41141c56
PE
64 Use STRING_GROW to append what has just been matched, and
65 STRING_FINISH to end the string (it puts the ending 0).
66 STRING_FINISH also stores this string in LAST_STRING, which can be
67 used, and which is used by STRING_FREE to free the last string. */
44995b2e 68
223ff46e 69static struct obstack obstack_for_string;
44995b2e 70
7ec2d4cd
AD
71/* A string representing the most recently saved token. */
72static char *last_string;
73
74
41141c56 75#define STRING_GROW \
223ff46e 76 obstack_grow (&obstack_for_string, yytext, yyleng)
44995b2e 77
41141c56 78#define STRING_FINISH \
44995b2e 79 do { \
223ff46e
PE
80 obstack_1grow (&obstack_for_string, '\0'); \
81 last_string = obstack_finish (&obstack_for_string); \
44995b2e
AD
82 } while (0)
83
41141c56 84#define STRING_FREE \
223ff46e 85 obstack_free (&obstack_for_string, last_string)
e9955c83 86
7ec2d4cd
AD
87void
88scanner_last_string_free (void)
89{
41141c56 90 STRING_FREE;
7ec2d4cd 91}
e9955c83 92
efcb44dd
PE
93/* Within well-formed rules, RULE_LENGTH is the number of values in
94 the current rule so far, which says where to find `$0' with respect
95 to the top of the stack. It is not the same as the rule->length in
96 the case of mid rule actions.
97
98 Outside of well-formed rules, RULE_LENGTH has an undefined value. */
99static int rule_length;
100
624a35e2
PE
101static void handle_dollar (int token_type, char *cp, location loc);
102static void handle_at (int token_type, char *cp, location loc);
3f2d73f1 103static void handle_syncline (char *args);
d8d3f94a 104static int convert_ucn_to_byte (char const *hex_text);
aa418041 105static void unexpected_eof (boundary, char const *);
e9955c83
AD
106
107%}
d8d3f94a 108%x SC_COMMENT SC_LINE_COMMENT SC_YACC_COMMENT
e9955c83 109%x SC_STRING SC_CHARACTER
3f2d73f1 110%x SC_AFTER_IDENTIFIER
e9955c83 111%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
624a35e2 112%x SC_PRE_CODE SC_BRACED_CODE SC_PROLOGUE SC_EPILOGUE
e9955c83 113
29c01725
AD
114letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
115id {letter}({letter}|[0-9])*
116directive %{letter}({letter}|[0-9]|-)*
624a35e2 117int [0-9]+
d8d3f94a
PE
118
119/* POSIX says that a tag must be both an id and a C union member, but
120 historically almost any character is allowed in a tag. We disallow
121 NUL and newline, as this simplifies our implementation. */
122tag [^\0\n>]+
123
124/* Zero or more instances of backslash-newline. Following GCC, allow
125 white space between the backslash and the newline. */
126splice (\\[ \f\t\v]*\n)*
e9955c83
AD
127
128%%
129%{
a706a1cc 130 /* Nesting level of the current code in braces. */
1a9e39f1
PE
131 int braces_level IF_LINT (= 0);
132
3f2d73f1
PE
133 /* Parent context state, when applicable. */
134 int context_state IF_LINT (= 0);
a706a1cc 135
624a35e2
PE
136 /* Token type to return, when applicable. */
137 int token_type IF_LINT (= 0);
138
3f2d73f1 139 /* Location of most recent identifier, when applicable. */
a2bc9dbc 140 location id_loc IF_LINT (= empty_location);
3f2d73f1 141
a2bc9dbc
PE
142 /* Where containing code started, when applicable. Its initial
143 value is relevant only when yylex is invoked in the SC_EPILOGUE
144 start condition. */
145 boundary code_start = scanner_cursor;
3f2d73f1 146
223ff46e
PE
147 /* Where containing comment or string or character literal started,
148 when applicable. */
a2bc9dbc 149 boundary token_start IF_LINT (= scanner_cursor);
e9955c83
AD
150%}
151
152
3f2d73f1
PE
153 /*-----------------------.
154 | Scanning white space. |
155 `-----------------------*/
156
624a35e2 157<INITIAL,SC_AFTER_IDENTIFIER,SC_PRE_CODE>
3f2d73f1
PE
158{
159 [ \f\n\t\v] ;
83adb046 160 "," warn_at (*loc, _("stray `,' treated as white space"));
3f2d73f1
PE
161
162 /* Comments. */
3f2d73f1 163 "//".* ;
83adb046
PE
164 "/*" {
165 token_start = loc->start;
166 context_state = YY_START;
167 BEGIN SC_YACC_COMMENT;
168 }
3f2d73f1
PE
169
170 /* #line directives are not documented, and may be withdrawn or
171 modified in future versions of Bison. */
172 ^"#line "{int}" \"".*"\"\n" {
173 handle_syncline (yytext + sizeof "#line " - 1);
174 }
175}
176
177
e9955c83
AD
178 /*----------------------------.
179 | Scanning Bison directives. |
180 `----------------------------*/
181<INITIAL>
182{
183 "%binary" return PERCENT_NONASSOC;
184 "%debug" return PERCENT_DEBUG;
185 "%define" return PERCENT_DEFINE;
186 "%defines" return PERCENT_DEFINES;
624a35e2 187 "%destructor" token_type = PERCENT_DESTRUCTOR; BEGIN SC_PRE_CODE;
676385e2 188 "%dprec" return PERCENT_DPREC;
e9955c83
AD
189 "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
190 "%expect" return PERCENT_EXPECT;
191 "%file-prefix" return PERCENT_FILE_PREFIX;
192 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
ae7453f2 193 "%glr-parser" return PERCENT_GLR_PARSER;
e9955c83 194 "%left" return PERCENT_LEFT;
624a35e2 195 "%lex-param" token_type = PERCENT_LEX_PARAM; BEGIN SC_PRE_CODE;
e9955c83 196 "%locations" return PERCENT_LOCATIONS;
676385e2 197 "%merge" return PERCENT_MERGE;
e9955c83
AD
198 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
199 "%no"[-_]"lines" return PERCENT_NO_LINES;
200 "%nonassoc" return PERCENT_NONASSOC;
916708d5 201 "%nondeterministic-parser" return PERCENT_NONDETERMINISTIC_PARSER;
e9955c83
AD
202 "%nterm" return PERCENT_NTERM;
203 "%output" return PERCENT_OUTPUT;
624a35e2 204 "%parse-param" token_type = PERCENT_PARSE_PARAM; BEGIN SC_PRE_CODE;
d8d3f94a 205 "%prec" rule_length--; return PERCENT_PREC;
624a35e2 206 "%printer" token_type = PERCENT_PRINTER; BEGIN SC_PRE_CODE;
e9955c83
AD
207 "%pure"[-_]"parser" return PERCENT_PURE_PARSER;
208 "%right" return PERCENT_RIGHT;
209 "%skeleton" return PERCENT_SKELETON;
210 "%start" return PERCENT_START;
211 "%term" return PERCENT_TOKEN;
212 "%token" return PERCENT_TOKEN;
213 "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
214 "%type" return PERCENT_TYPE;
624a35e2 215 "%union" token_type = PERCENT_UNION; BEGIN SC_PRE_CODE;
e9955c83
AD
216 "%verbose" return PERCENT_VERBOSE;
217 "%yacc" return PERCENT_YACC;
218
3f2d73f1 219 {directive} {
41141c56 220 complain_at (*loc, _("invalid directive: %s"), quote (yytext));
412f8a59 221 }
900c5db5 222
e9955c83 223 "=" return EQUAL;
d8d3f94a 224 "|" rule_length = 0; return PIPE;
e9955c83
AD
225 ";" return SEMICOLON;
226
3f2d73f1 227 {id} {
41141c56 228 val->symbol = symbol_get (yytext, *loc);
3f2d73f1 229 id_loc = *loc;
efcb44dd 230 rule_length++;
3f2d73f1 231 BEGIN SC_AFTER_IDENTIFIER;
e9955c83
AD
232 }
233
d8d3f94a
PE
234 {int} {
235 unsigned long num;
223ff46e 236 set_errno (0);
d8d3f94a 237 num = strtoul (yytext, 0, 10);
223ff46e 238 if (INT_MAX < num || get_errno ())
d8d3f94a 239 {
41141c56 240 complain_at (*loc, _("integer out of range: %s"), quote (yytext));
d8d3f94a
PE
241 num = INT_MAX;
242 }
41141c56 243 val->integer = num;
d8d3f94a
PE
244 return INT;
245 }
e9955c83
AD
246
247 /* Characters. We don't check there is only one. */
3f2d73f1 248 "'" STRING_GROW; token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
e9955c83
AD
249
250 /* Strings. */
3f2d73f1 251 "\"" STRING_GROW; token_start = loc->start; BEGIN SC_ESCAPED_STRING;
e9955c83
AD
252
253 /* Prologue. */
3f2d73f1 254 "%{" code_start = loc->start; BEGIN SC_PROLOGUE;
e9955c83
AD
255
256 /* Code in between braces. */
3f2d73f1
PE
257 "{" {
258 STRING_GROW;
624a35e2 259 token_type = BRACED_CODE;
3f2d73f1
PE
260 braces_level = 0;
261 code_start = loc->start;
262 BEGIN SC_BRACED_CODE;
263 }
e9955c83
AD
264
265 /* A type. */
d8d3f94a 266 "<"{tag}">" {
223ff46e 267 obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
41141c56 268 STRING_FINISH;
223ff46e 269 val->uniqstr = uniqstr_new (last_string);
41141c56 270 STRING_FREE;
4cdb01db
AD
271 return TYPE;
272 }
273
a706a1cc
PE
274 "%%" {
275 static int percent_percent_count;
e9955c83 276 if (++percent_percent_count == 2)
a2bc9dbc 277 BEGIN SC_EPILOGUE;
e9955c83
AD
278 return PERCENT_PERCENT;
279 }
280
a706a1cc 281 . {
41141c56 282 complain_at (*loc, _("invalid character: %s"), quote (yytext));
3f2d73f1 283 }
379f0ac8
PE
284
285 <<EOF>> {
286 loc->start = loc->end = scanner_cursor;
287 yyterminate ();
288 }
3f2d73f1
PE
289}
290
291
292 /*-----------------------------------------------------------------.
293 | Scanning after an identifier, checking whether a colon is next. |
294 `-----------------------------------------------------------------*/
295
296<SC_AFTER_IDENTIFIER>
297{
298 ":" {
299 rule_length = 0;
300 *loc = id_loc;
301 BEGIN INITIAL;
302 return ID_COLON;
303 }
304 . {
305 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
306 yyless (0);
307 *loc = id_loc;
308 BEGIN INITIAL;
309 return ID;
310 }
311 <<EOF>> {
312 *loc = id_loc;
313 BEGIN INITIAL;
314 return ID;
e9955c83
AD
315 }
316}
317
318
d8d3f94a
PE
319 /*---------------------------------------------------------------.
320 | Scanning a Yacc comment. The initial `/ *' is already eaten. |
321 `---------------------------------------------------------------*/
e9955c83 322
d8d3f94a 323<SC_YACC_COMMENT>
e9955c83 324{
3f2d73f1 325 "*/" BEGIN context_state;
a706a1cc 326 .|\n ;
aa418041 327 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
d8d3f94a
PE
328}
329
330
331 /*------------------------------------------------------------.
332 | Scanning a C comment. The initial `/ *' is already eaten. |
333 `------------------------------------------------------------*/
334
335<SC_COMMENT>
336{
3f2d73f1 337 "*"{splice}"/" STRING_GROW; BEGIN context_state;
aa418041 338 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
e9955c83
AD
339}
340
341
d8d3f94a
PE
342 /*--------------------------------------------------------------.
343 | Scanning a line comment. The initial `//' is already eaten. |
344 `--------------------------------------------------------------*/
345
346<SC_LINE_COMMENT>
347{
3f2d73f1 348 "\n" STRING_GROW; BEGIN context_state;
41141c56 349 {splice} STRING_GROW;
3f2d73f1 350 <<EOF>> BEGIN context_state;
d8d3f94a
PE
351}
352
353
e9955c83
AD
354 /*----------------------------------------------------------------.
355 | Scanning a C string, including its escapes. The initial `"' is |
356 | already eaten. |
357 `----------------------------------------------------------------*/
358
359<SC_ESCAPED_STRING>
360{
db2cc12f 361 "\"" {
41141c56
PE
362 STRING_GROW;
363 STRING_FINISH;
3f2d73f1 364 loc->start = token_start;
223ff46e 365 val->chars = last_string;
efcb44dd 366 rule_length++;
a706a1cc 367 BEGIN INITIAL;
e9955c83
AD
368 return STRING;
369 }
370
41141c56 371 .|\n STRING_GROW;
aa418041 372 <<EOF>> unexpected_eof (token_start, "\""); BEGIN INITIAL;
e9955c83
AD
373}
374
375 /*---------------------------------------------------------------.
376 | Scanning a C character, decoding its escapes. The initial "'" |
377 | is already eaten. |
378 `---------------------------------------------------------------*/
379
380<SC_ESCAPED_CHARACTER>
381{
db2cc12f 382 "'" {
3b1e470c 383 unsigned char last_string_1;
41141c56
PE
384 STRING_GROW;
385 STRING_FINISH;
3f2d73f1 386 loc->start = token_start;
41141c56
PE
387 val->symbol = symbol_get (last_string, *loc);
388 symbol_class_set (val->symbol, token_sym, *loc);
3b1e470c
PE
389 last_string_1 = last_string[1];
390 symbol_user_token_number_set (val->symbol, last_string_1, *loc);
41141c56 391 STRING_FREE;
a706a1cc
PE
392 rule_length++;
393 BEGIN INITIAL;
394 return ID;
e9955c83 395 }
a706a1cc 396
41141c56 397 .|\n STRING_GROW;
aa418041 398 <<EOF>> unexpected_eof (token_start, "'"); BEGIN INITIAL;
e9955c83
AD
399}
400
401
402 /*----------------------------.
403 | Decode escaped characters. |
404 `----------------------------*/
405
406<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
407{
d8d3f94a
PE
408 \\[0-7]{1,3} {
409 unsigned long c = strtoul (yytext + 1, 0, 8);
410 if (UCHAR_MAX < c)
3f2d73f1 411 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
e9955c83 412 else
223ff46e 413 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
414 }
415
6b0d38ab 416 \\x[0-9abcdefABCDEF]+ {
d8d3f94a 417 unsigned long c;
223ff46e 418 set_errno (0);
d8d3f94a 419 c = strtoul (yytext + 2, 0, 16);
223ff46e 420 if (UCHAR_MAX < c || get_errno ())
3f2d73f1 421 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
d8d3f94a 422 else
223ff46e 423 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
424 }
425
223ff46e
PE
426 \\a obstack_1grow (&obstack_for_string, '\a');
427 \\b obstack_1grow (&obstack_for_string, '\b');
428 \\f obstack_1grow (&obstack_for_string, '\f');
429 \\n obstack_1grow (&obstack_for_string, '\n');
430 \\r obstack_1grow (&obstack_for_string, '\r');
431 \\t obstack_1grow (&obstack_for_string, '\t');
432 \\v obstack_1grow (&obstack_for_string, '\v');
412f8a59
PE
433
434 /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
223ff46e 435 \\("\""|"'"|"?"|"\\") obstack_1grow (&obstack_for_string, yytext[1]);
412f8a59 436
6b0d38ab 437 \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
d8d3f94a
PE
438 int c = convert_ucn_to_byte (yytext);
439 if (c < 0)
3f2d73f1 440 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
d8d3f94a 441 else
223ff46e 442 obstack_1grow (&obstack_for_string, c);
d8d3f94a 443 }
4f25ebb0 444 \\(.|\n) {
3f2d73f1 445 complain_at (*loc, _("unrecognized escape sequence: %s"), quote (yytext));
41141c56 446 STRING_GROW;
e9955c83
AD
447 }
448}
449
450
451 /*----------------------------------------------------------.
452 | Scanning a C character without decoding its escapes. The |
453 | initial "'" is already eaten. |
454 `----------------------------------------------------------*/
455
456<SC_CHARACTER>
457{
3f2d73f1 458 "'" STRING_GROW; BEGIN context_state;
41141c56 459 \\{splice}[^$@\[\]] STRING_GROW;
aa418041 460 <<EOF>> unexpected_eof (token_start, "'"); BEGIN context_state;
e9955c83
AD
461}
462
463
464 /*----------------------------------------------------------------.
465 | Scanning a C string, without decoding its escapes. The initial |
466 | `"' is already eaten. |
467 `----------------------------------------------------------------*/
468
469<SC_STRING>
470{
3f2d73f1 471 "\"" STRING_GROW; BEGIN context_state;
41141c56 472 \\{splice}[^$@\[\]] STRING_GROW;
aa418041
PE
473 <<EOF>> {
474 unexpected_eof (token_start, "\"");
475 BEGIN context_state;
476 }
e9955c83
AD
477}
478
479
480 /*---------------------------------------------------.
481 | Strings, comments etc. can be found in user code. |
482 `---------------------------------------------------*/
483
484<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
485{
3f2d73f1
PE
486 "'" {
487 STRING_GROW;
488 context_state = YY_START;
489 token_start = loc->start;
490 BEGIN SC_CHARACTER;
491 }
492 "\"" {
493 STRING_GROW;
494 context_state = YY_START;
495 token_start = loc->start;
496 BEGIN SC_STRING;
497 }
498 "/"{splice}"*" {
499 STRING_GROW;
500 context_state = YY_START;
501 token_start = loc->start;
502 BEGIN SC_COMMENT;
503 }
504 "/"{splice}"/" {
505 STRING_GROW;
506 context_state = YY_START;
507 BEGIN SC_LINE_COMMENT;
508 }
e9955c83
AD
509}
510
511
624a35e2
PE
512 /*---------------------------------------------------------------.
513 | Scanning after %union etc., possibly followed by white space. |
514 | For %union only, allow arbitrary C code to appear before the |
515 | following brace, as an extension to POSIX. |
516 `---------------------------------------------------------------*/
517
518<SC_PRE_CODE>
519{
520 . {
521 bool valid = yytext[0] == '{' || token_type == PERCENT_UNION;
522 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
523 yyless (0);
524
525 if (valid)
526 {
527 braces_level = -1;
528 code_start = loc->start;
529 BEGIN SC_BRACED_CODE;
530 }
531 else
532 {
533 complain_at (*loc, _("missing `{' in `%s'"),
534 token_name (token_type));
535 obstack_sgrow (&obstack_for_string, "{}");
536 STRING_FINISH;
537 val->chars = last_string;
538 BEGIN INITIAL;
539 return token_type;
540 }
541 }
379f0ac8 542
aa418041 543 <<EOF>> unexpected_eof (scanner_cursor, "{}"); BEGIN INITIAL;
624a35e2
PE
544}
545
546
e9955c83
AD
547 /*---------------------------------------------------------------.
548 | Scanning some code in braces (%union and actions). The initial |
549 | "{" is already eaten. |
550 `---------------------------------------------------------------*/
551
552<SC_BRACED_CODE>
553{
41141c56
PE
554 "{"|"<"{splice}"%" STRING_GROW; braces_level++;
555 "%"{splice}">" STRING_GROW; braces_level--;
e9955c83 556 "}" {
25522739
PE
557 bool outer_brace = --braces_level < 0;
558
559 /* As an undocumented Bison extension, append `;' before the last
560 brace in braced code, so that the user code can omit trailing
561 `;'. But do not append `;' if emulating Yacc, since Yacc does
562 not append one.
563
564 FIXME: Bison should warn if a semicolon seems to be necessary
565 here, and should omit the semicolon if it seems unnecessary
566 (e.g., after ';', '{', or '}', each followed by comments or
567 white space). Such a warning shouldn't depend on --yacc; it
568 should depend on a new --pedantic option, which would cause
569 Bison to warn if it detects an extension to POSIX. --pedantic
570 should also diagnose other Bison extensions like %yacc.
571 Perhaps there should also be a GCC-style --pedantic-errors
572 option, so that such warnings are diagnosed as errors. */
1deb9bdc 573 if (outer_brace && token_type == BRACED_CODE && ! yacc_flag)
25522739
PE
574 obstack_1grow (&obstack_for_string, ';');
575
576 obstack_1grow (&obstack_for_string, '}');
577
578 if (outer_brace)
e9955c83 579 {
41141c56 580 STRING_FINISH;
624a35e2 581 rule_length++;
3f2d73f1 582 loc->start = code_start;
223ff46e 583 val->chars = last_string;
a706a1cc 584 BEGIN INITIAL;
624a35e2 585 return token_type;
e9955c83
AD
586 }
587 }
588
a706a1cc
PE
589 /* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly
590 (as `<' `<%'). */
41141c56 591 "<"{splice}"<" STRING_GROW;
a706a1cc 592
624a35e2
PE
593 "$"("<"{tag}">")?(-?[0-9]+|"$") handle_dollar (token_type, yytext, *loc);
594 "@"(-?[0-9]+|"$") handle_at (token_type, yytext, *loc);
e9955c83 595
aa418041 596 <<EOF>> unexpected_eof (code_start, "}"); BEGIN INITIAL;
e9955c83
AD
597}
598
599
600 /*--------------------------------------------------------------.
601 | Scanning some prologue: from "%{" (already scanned) to "%}". |
602 `--------------------------------------------------------------*/
603
604<SC_PROLOGUE>
605{
606 "%}" {
41141c56 607 STRING_FINISH;
3f2d73f1 608 loc->start = code_start;
223ff46e 609 val->chars = last_string;
a706a1cc 610 BEGIN INITIAL;
e9955c83
AD
611 return PROLOGUE;
612 }
613
aa418041 614 <<EOF>> unexpected_eof (code_start, "%}"); BEGIN INITIAL;
e9955c83
AD
615}
616
617
618 /*---------------------------------------------------------------.
619 | Scanning the epilogue (everything after the second "%%", which |
d8d3f94a 620 | has already been eaten). |
e9955c83
AD
621 `---------------------------------------------------------------*/
622
623<SC_EPILOGUE>
624{
e9955c83 625 <<EOF>> {
41141c56 626 STRING_FINISH;
3f2d73f1 627 loc->start = code_start;
223ff46e 628 val->chars = last_string;
a706a1cc 629 BEGIN INITIAL;
e9955c83
AD
630 return EPILOGUE;
631 }
632}
633
634
a706a1cc
PE
635 /*----------------------------------------------------------------.
636 | By default, grow the string obstack with the input, escaping M4 |
637 | quoting characters. |
638 `----------------------------------------------------------------*/
639
640<SC_COMMENT,SC_LINE_COMMENT,SC_STRING,SC_CHARACTER,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
641{
223ff46e
PE
642 \$ obstack_sgrow (&obstack_for_string, "$][");
643 \@ obstack_sgrow (&obstack_for_string, "@@");
644 \[ obstack_sgrow (&obstack_for_string, "@{");
645 \] obstack_sgrow (&obstack_for_string, "@}");
41141c56 646 .|\n STRING_GROW;
a706a1cc
PE
647}
648
649
e9955c83
AD
650%%
651
3f2d73f1
PE
652/* Set *LOC and adjust scanner cursor to account for token TOKEN of
653 size SIZE. */
6c30d641
PE
654
655static void
223ff46e 656adjust_location (location *loc, char const *token, size_t size)
6c30d641 657{
3f2d73f1
PE
658 int line = scanner_cursor.line;
659 int column = scanner_cursor.column;
6c30d641
PE
660 char const *p0 = token;
661 char const *p = token;
662 char const *lim = token + size;
663
3f2d73f1
PE
664 loc->start = scanner_cursor;
665
6c30d641
PE
666 for (p = token; p < lim; p++)
667 switch (*p)
668 {
6c30d641
PE
669 case '\n':
670 line++;
671 column = 1;
672 p0 = p + 1;
673 break;
674
675 case '\t':
676 column += mbsnwidth (p0, p - p0, 0);
677 column += 8 - ((column - 1) & 7);
678 p0 = p + 1;
679 break;
680 }
681
3f2d73f1
PE
682 scanner_cursor.line = line;
683 scanner_cursor.column = column + mbsnwidth (p0, p - p0, 0);
684
685 loc->end = scanner_cursor;
6c30d641
PE
686}
687
688
689/* Read bytes from FP into buffer BUF of size SIZE. Return the
690 number of bytes read. Remove '\r' from input, treating \r\n
691 and isolated \r as \n. */
692
693static size_t
694no_cr_read (FILE *fp, char *buf, size_t size)
695{
a737b216
PE
696 size_t bytes_read = fread (buf, 1, size, fp);
697 if (bytes_read)
6c30d641 698 {
a737b216 699 char *w = memchr (buf, '\r', bytes_read);
6c30d641
PE
700 if (w)
701 {
702 char const *r = ++w;
a737b216 703 char const *lim = buf + bytes_read;
6c30d641
PE
704
705 for (;;)
706 {
707 /* Found an '\r'. Treat it like '\n', but ignore any
708 '\n' that immediately follows. */
709 w[-1] = '\n';
710 if (r == lim)
711 {
712 int ch = getc (fp);
713 if (ch != '\n' && ungetc (ch, fp) != ch)
714 break;
715 }
716 else if (*r == '\n')
717 r++;
718
719 /* Copy until the next '\r'. */
720 do
721 {
722 if (r == lim)
723 return w - buf;
724 }
725 while ((*w++ = *r++) != '\r');
726 }
727
728 return w - buf;
729 }
730 }
731
a737b216 732 return bytes_read;
6c30d641
PE
733}
734
735
e9955c83 736/*------------------------------------------------------------------.
366eea36 737| TEXT is pointing to a wannabee semantic value (i.e., a `$'). |
e9955c83
AD
738| |
739| Possible inputs: $[<TYPENAME>]($|integer) |
740| |
223ff46e 741| Output to OBSTACK_FOR_STRING a reference to this semantic value. |
e9955c83
AD
742`------------------------------------------------------------------*/
743
624a35e2 744static inline bool
223ff46e 745handle_action_dollar (char *text, location loc)
e9955c83
AD
746{
747 const char *type_name = NULL;
366eea36 748 char *cp = text + 1;
e9955c83 749
624a35e2
PE
750 if (! current_rule)
751 return false;
752
e9955c83
AD
753 /* Get the type name if explicit. */
754 if (*cp == '<')
755 {
756 type_name = ++cp;
757 while (*cp != '>')
758 ++cp;
759 *cp = '\0';
760 ++cp;
761 }
762
763 if (*cp == '$')
764 {
765 if (!type_name)
223ff46e 766 type_name = symbol_list_n_type_name_get (current_rule, loc, 0);
e9955c83 767 if (!type_name && typed)
223ff46e 768 complain_at (loc, _("$$ of `%s' has no declared type"),
97650f4e 769 current_rule->sym->tag);
e9955c83
AD
770 if (!type_name)
771 type_name = "";
223ff46e 772 obstack_fgrow1 (&obstack_for_string,
e9955c83
AD
773 "]b4_lhs_value([%s])[", type_name);
774 }
d8d3f94a 775 else
e9955c83 776 {
d8d3f94a 777 long num;
223ff46e 778 set_errno (0);
d8d3f94a 779 num = strtol (cp, 0, 10);
e9955c83 780
223ff46e 781 if (INT_MIN <= num && num <= rule_length && ! get_errno ())
e9955c83 782 {
d8d3f94a 783 int n = num;
e9955c83 784 if (!type_name && n > 0)
223ff46e 785 type_name = symbol_list_n_type_name_get (current_rule, loc, n);
e9955c83 786 if (!type_name && typed)
223ff46e
PE
787 complain_at (loc, _("$%d of `%s' has no declared type"),
788 n, current_rule->sym->tag);
e9955c83
AD
789 if (!type_name)
790 type_name = "";
223ff46e 791 obstack_fgrow3 (&obstack_for_string,
e9955c83
AD
792 "]b4_rhs_value([%d], [%d], [%s])[",
793 rule_length, n, type_name);
794 }
d8d3f94a 795 else
223ff46e 796 complain_at (loc, _("integer out of range: %s"), quote (text));
9280d3ef 797 }
9280d3ef 798
624a35e2 799 return true;
e9955c83
AD
800}
801
f25bfb75
AD
802
803/*-----------------------------------------------------------------.
804| Dispatch onto handle_action_dollar, or handle_destructor_dollar, |
624a35e2 805| depending upon TOKEN_TYPE. |
f25bfb75 806`-----------------------------------------------------------------*/
e9955c83
AD
807
808static void
624a35e2 809handle_dollar (int token_type, char *text, location loc)
f25bfb75 810{
624a35e2 811 switch (token_type)
f25bfb75 812 {
624a35e2
PE
813 case BRACED_CODE:
814 if (handle_action_dollar (text, loc))
815 return;
f25bfb75
AD
816 break;
817
624a35e2
PE
818 case PERCENT_DESTRUCTOR:
819 case PERCENT_PRINTER:
820 if (text[1] == '$')
821 {
822 obstack_sgrow (&obstack_for_string, "]b4_dollar_dollar[");
823 return;
824 }
825 break;
826
827 default:
f25bfb75
AD
828 break;
829 }
624a35e2
PE
830
831 complain_at (loc, _("invalid value: %s"), quote (text));
f25bfb75
AD
832}
833
834
835/*------------------------------------------------------.
836| TEXT is a location token (i.e., a `@...'). Output to |
223ff46e 837| OBSTACK_FOR_STRING a reference to this location. |
f25bfb75
AD
838`------------------------------------------------------*/
839
624a35e2 840static inline bool
223ff46e 841handle_action_at (char *text, location loc)
e9955c83 842{
366eea36 843 char *cp = text + 1;
e9955c83 844 locations_flag = 1;
e9955c83 845
624a35e2
PE
846 if (! current_rule)
847 return false;
848
366eea36 849 if (*cp == '$')
624a35e2 850 obstack_sgrow (&obstack_for_string, "]b4_lhs_location[");
d8d3f94a 851 else
e9955c83 852 {
d8d3f94a 853 long num;
223ff46e 854 set_errno (0);
d8d3f94a 855 num = strtol (cp, 0, 10);
dafdc66f 856
223ff46e 857 if (INT_MIN <= num && num <= rule_length && ! get_errno ())
d8d3f94a
PE
858 {
859 int n = num;
223ff46e 860 obstack_fgrow2 (&obstack_for_string, "]b4_rhs_location([%d], [%d])[",
d8d3f94a
PE
861 rule_length, n);
862 }
e9955c83 863 else
223ff46e 864 complain_at (loc, _("integer out of range: %s"), quote (text));
f25bfb75 865 }
f25bfb75 866
624a35e2 867 return true;
e9955c83 868}
4cdb01db 869
f25bfb75
AD
870
871/*-------------------------------------------------------------------.
872| Dispatch onto handle_action_at, or handle_destructor_at, depending |
873| upon CODE_KIND. |
874`-------------------------------------------------------------------*/
875
876static void
624a35e2 877handle_at (int token_type, char *text, location loc)
f25bfb75 878{
624a35e2 879 switch (token_type)
f25bfb75 880 {
624a35e2 881 case BRACED_CODE:
223ff46e 882 handle_action_at (text, loc);
624a35e2
PE
883 return;
884
885 case PERCENT_DESTRUCTOR:
886 case PERCENT_PRINTER:
887 if (text[1] == '$')
888 {
889 obstack_sgrow (&obstack_for_string, "]b4_at_dollar[");
890 return;
891 }
f25bfb75
AD
892 break;
893
624a35e2 894 default:
f25bfb75
AD
895 break;
896 }
624a35e2
PE
897
898 complain_at (loc, _("invalid value: %s"), quote (text));
f25bfb75
AD
899}
900
901
d8d3f94a
PE
902/*------------------------------------------------------------------.
903| Convert universal character name UCN to a single-byte character, |
904| and return that character. Return -1 if UCN does not correspond |
905| to a single-byte character. |
906`------------------------------------------------------------------*/
907
908static int
909convert_ucn_to_byte (char const *ucn)
910{
911 unsigned long code = strtoul (ucn + 2, 0, 16);
912
913 /* FIXME: Currently we assume Unicode-compatible unibyte characters
914 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
915 non-ASCII hosts we support only the portable C character set.
916 These limitations should be removed once we add support for
917 multibyte characters. */
918
919 if (UCHAR_MAX < code)
920 return -1;
921
922#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
923 {
924 /* A non-ASCII host. Use CODE to index into a table of the C
925 basic execution character set, which is guaranteed to exist on
926 all Standard C platforms. This table also includes '$', '@',
8e6ef483 927 and '`', which are not in the basic execution character set but
d8d3f94a
PE
928 which are unibyte characters on all the platforms that we know
929 about. */
930 static signed char const table[] =
931 {
932 '\0', -1, -1, -1, -1, -1, -1, '\a',
933 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
934 -1, -1, -1, -1, -1, -1, -1, -1,
935 -1, -1, -1, -1, -1, -1, -1, -1,
936 ' ', '!', '"', '#', '$', '%', '&', '\'',
937 '(', ')', '*', '+', ',', '-', '.', '/',
938 '0', '1', '2', '3', '4', '5', '6', '7',
939 '8', '9', ':', ';', '<', '=', '>', '?',
940 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
941 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
942 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
943 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
944 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
945 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
946 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
947 'x', 'y', 'z', '{', '|', '}', '~'
948 };
949
950 code = code < sizeof table ? table[code] : -1;
951 }
952#endif
c4d720cd 953
d8d3f94a
PE
954 return code;
955}
956
957
900c5db5
AD
958/*----------------------------------------------------------------.
959| Handle `#line INT "FILE"'. ARGS has already skipped `#line '. |
960`----------------------------------------------------------------*/
961
962static void
3f2d73f1 963handle_syncline (char *args)
900c5db5
AD
964{
965 int lineno = strtol (args, &args, 10);
966 const char *file = NULL;
967 file = strchr (args, '"') + 1;
968 *strchr (file, '"') = 0;
3f2d73f1
PE
969 scanner_cursor.file = current_file = xstrdup (file);
970 scanner_cursor.line = lineno;
971 scanner_cursor.column = 1;
900c5db5
AD
972}
973
a706a1cc 974
3f2d73f1
PE
975/*------------------------------------------------------------------------.
976| Report an unexpected EOF in a token or comment starting at START. |
977| An end of file was encountered and the expected TOKEN_END was missing. |
3f2d73f1 978`------------------------------------------------------------------------*/
a706a1cc
PE
979
980static void
aa418041 981unexpected_eof (boundary start, char const *token_end)
a706a1cc 982{
223ff46e
PE
983 location loc;
984 loc.start = start;
985 loc.end = scanner_cursor;
986 complain_at (loc, _("missing `%s' at end of file"), token_end);
a706a1cc
PE
987}
988
989
f25bfb75
AD
990/*-------------------------.
991| Initialize the scanner. |
992`-------------------------*/
993
1d6412ad
AD
994void
995scanner_initialize (void)
996{
223ff46e 997 obstack_init (&obstack_for_string);
1d6412ad
AD
998}
999
1000
f25bfb75
AD
1001/*-----------------------------------------------.
1002| Free all the memory allocated to the scanner. |
1003`-----------------------------------------------*/
1004
4cdb01db
AD
1005void
1006scanner_free (void)
1007{
223ff46e 1008 obstack_free (&obstack_for_string, 0);
536545f3
AD
1009 /* Reclaim Flex's buffers. */
1010 yy_delete_buffer (YY_CURRENT_BUFFER);
4cdb01db 1011}