]> git.saurik.com Git - bison.git/blame - src/scan-gram.l
* Makefile.maint: Sync with Autoconf:
[bison.git] / src / scan-gram.l
CommitLineData
e9955c83
AD
1/* Bison Grammar Scanner -*- C -*-
2 Copyright (C) 2002 Free Software Foundation, Inc.
3
4 This file is part of Bison, the GNU Compiler Compiler.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA
20*/
21
a706a1cc 22%option debug nodefault noyywrap never-interactive
e9955c83
AD
23%option prefix="gram_" outfile="lex.yy.c"
24
25%{
26#include "system.h"
d8d3f94a 27#include "mbswidth.h"
e9955c83
AD
28#include "complain.h"
29#include "quote.h"
3e6656f9 30#include "struniq.h"
e9955c83
AD
31#include "getargs.h"
32#include "gram.h"
33#include "reader.h"
34
35/* Each time we match a string, move the end cursor to its end. */
8efe435c
AD
36#define YY_USER_INIT \
37do { \
38 LOCATION_RESET (*yylloc); \
95612cfa 39 yylloc->file = current_file; \
8efe435c
AD
40 /* This is only to avoid GCC warnings. */ \
41 if (yycontrol) {;}; \
42} while (0)
43
d8d3f94a
PE
44#define YY_USER_ACTION extend_location (yylloc, yytext, yyleng);
45#define YY_STEP LOCATION_STEP (*yylloc)
46
47#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
48
49
50/* Read bytes from FP into buffer BUF of size SIZE. Return the
51 number of bytes read. Remove '\r' from input, treating \r\n
52 and isolated \r as \n. */
53
54static size_t
55no_cr_read (FILE *fp, char *buf, size_t size)
56{
57 size_t s = fread (buf, 1, size, fp);
58 if (s)
59 {
60 char *w = memchr (buf, '\r', s);
61 if (w)
62 {
63 char const *r = ++w;
64 char const *lim = buf + s;
65
66 for (;;)
67 {
68 /* Found an '\r'. Treat it like '\n', but ignore any
69 '\n' that immediately follows. */
70 w[-1] = '\n';
71 if (r == lim)
72 {
73 int ch = getc (fp);
74 if (ch != '\n' && ungetc (ch, fp) != ch)
75 break;
76 }
77 else if (*r == '\n')
78 r++;
79
80 /* Copy until the next '\r'. */
81 do
82 {
83 if (r == lim)
84 return w - buf;
85 }
86 while ((*w++ = *r++) != '\r');
87 }
88
89 return w - buf;
90 }
91 }
92
93 return s;
94}
95
96
97/* Extend *LOC to account for token TOKEN of size SIZE. */
98
99static void
100extend_location (location_t *loc, char const *token, int size)
101{
102 int line = loc->last_line;
103 int column = loc->last_column;
104 char const *p0 = token;
105 char const *p = token;
106 char const *lim = token + size;
107
108 for (p = token; p < lim; p++)
109 switch (*p)
110 {
111 case '\r':
112 /* \r shouldn't survive no_cr_read. */
113 abort ();
114
115 case '\n':
116 line++;
117 column = 1;
118 p0 = p + 1;
119 break;
120
121 case '\t':
122 column += mbsnwidth (p0, p - p0, 0);
123 column += 8 - ((column - 1) & 7);
124 p0 = p + 1;
125 break;
126 }
127
128 loc->last_line = line;
129 loc->last_column = column + mbsnwidth (p0, p - p0, 0);
130}
131
132
e9955c83 133
44995b2e
AD
134/* STRING_OBSTACK -- Used to store all the characters that we need to
135 keep (to construct ID, STRINGS etc.). Use the following macros to
136 use it.
137
1d6412ad
AD
138 Use YY_OBS_GROW to append what has just been matched, and
139 YY_OBS_FINISH to end the string (it puts the ending 0).
140 YY_OBS_FINISH also stores this string in LAST_STRING, which can be
141 used, and which is used by YY_OBS_FREE to free the last string. */
44995b2e
AD
142
143static struct obstack string_obstack;
44995b2e 144
7ec2d4cd
AD
145/* A string representing the most recently saved token. */
146static char *last_string;
147
148
44995b2e
AD
149#define YY_OBS_GROW \
150 obstack_grow (&string_obstack, yytext, yyleng)
151
152#define YY_OBS_FINISH \
153 do { \
154 obstack_1grow (&string_obstack, '\0'); \
155 last_string = obstack_finish (&string_obstack); \
44995b2e
AD
156 } while (0)
157
a706a1cc
PE
158#define YY_OBS_FREE \
159 obstack_free (&string_obstack, last_string)
e9955c83 160
7ec2d4cd
AD
161void
162scanner_last_string_free (void)
163{
164 YY_OBS_FREE;
165}
e9955c83 166
efcb44dd
PE
167/* Within well-formed rules, RULE_LENGTH is the number of values in
168 the current rule so far, which says where to find `$0' with respect
169 to the top of the stack. It is not the same as the rule->length in
170 the case of mid rule actions.
171
172 Outside of well-formed rules, RULE_LENGTH has an undefined value. */
173static int rule_length;
174
d33cb3ae
PE
175static void handle_dollar (braced_code_t code_kind,
176 char *cp, location_t location);
177static void handle_at (braced_code_t code_kind,
178 char *cp, location_t location);
900c5db5 179static void handle_syncline (char *args, location_t *location);
d8d3f94a 180static int convert_ucn_to_byte (char const *hex_text);
345532d7 181static void unexpected_end_of_file (location_t *, char const *);
e9955c83
AD
182
183%}
d8d3f94a 184%x SC_COMMENT SC_LINE_COMMENT SC_YACC_COMMENT
e9955c83
AD
185%x SC_STRING SC_CHARACTER
186%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
187%x SC_BRACED_CODE SC_PROLOGUE SC_EPILOGUE
188
29c01725
AD
189letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
190id {letter}({letter}|[0-9])*
191directive %{letter}({letter}|[0-9]|-)*
192int [0-9]+
d8d3f94a
PE
193
194/* POSIX says that a tag must be both an id and a C union member, but
195 historically almost any character is allowed in a tag. We disallow
196 NUL and newline, as this simplifies our implementation. */
197tag [^\0\n>]+
198
199/* Zero or more instances of backslash-newline. Following GCC, allow
200 white space between the backslash and the newline. */
201splice (\\[ \f\t\v]*\n)*
e9955c83
AD
202
203%%
204%{
a706a1cc 205 /* Nesting level of the current code in braces. */
1a9e39f1
PE
206 int braces_level IF_LINT (= 0);
207
a706a1cc
PE
208 /* Scanner context when scanning C code. */
209 int c_context IF_LINT (= 0);
210
e9955c83
AD
211 /* At each yylex invocation, mark the current position as the
212 start of the next token. */
e9955c83 213 YY_STEP;
e9955c83
AD
214%}
215
216
217 /*----------------------------.
218 | Scanning Bison directives. |
219 `----------------------------*/
220<INITIAL>
221{
222 "%binary" return PERCENT_NONASSOC;
223 "%debug" return PERCENT_DEBUG;
224 "%define" return PERCENT_DEFINE;
225 "%defines" return PERCENT_DEFINES;
9280d3ef 226 "%destructor" return PERCENT_DESTRUCTOR;
676385e2 227 "%dprec" return PERCENT_DPREC;
e9955c83
AD
228 "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
229 "%expect" return PERCENT_EXPECT;
230 "%file-prefix" return PERCENT_FILE_PREFIX;
231 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
ae7453f2 232 "%glr-parser" return PERCENT_GLR_PARSER;
e9955c83
AD
233 "%left" return PERCENT_LEFT;
234 "%locations" return PERCENT_LOCATIONS;
676385e2 235 "%merge" return PERCENT_MERGE;
e9955c83
AD
236 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
237 "%no"[-_]"lines" return PERCENT_NO_LINES;
238 "%nonassoc" return PERCENT_NONASSOC;
239 "%nterm" return PERCENT_NTERM;
240 "%output" return PERCENT_OUTPUT;
ae7453f2 241 "%parse-param" return PERCENT_PARSE_PARAM;
d8d3f94a 242 "%prec" rule_length--; return PERCENT_PREC;
366eea36 243 "%printer" return PERCENT_PRINTER;
e9955c83
AD
244 "%pure"[-_]"parser" return PERCENT_PURE_PARSER;
245 "%right" return PERCENT_RIGHT;
ae7453f2 246 "%lex-param" return PERCENT_LEX_PARAM;
e9955c83
AD
247 "%skeleton" return PERCENT_SKELETON;
248 "%start" return PERCENT_START;
249 "%term" return PERCENT_TOKEN;
250 "%token" return PERCENT_TOKEN;
251 "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
252 "%type" return PERCENT_TYPE;
253 "%union" return PERCENT_UNION;
254 "%verbose" return PERCENT_VERBOSE;
255 "%yacc" return PERCENT_YACC;
256
29c01725
AD
257 {directive} {
258 complain_at (*yylloc, _("invalid directive: %s"), quote (yytext));
259 YY_STEP;
260 }
261
900c5db5
AD
262 ^"#line "{int}" \""[^\"]*"\"\n" handle_syncline (yytext + strlen ("#line "), yylloc); YY_STEP;
263
e9955c83 264 "=" return EQUAL;
d8d3f94a
PE
265 ":" rule_length = 0; return COLON;
266 "|" rule_length = 0; return PIPE;
ae7453f2 267 "," return COMMA;
e9955c83
AD
268 ";" return SEMICOLON;
269
a706a1cc 270 [ \f\n\t\v] YY_STEP;
d8d3f94a 271
e9955c83 272 {id} {
39f41916 273 yylval->symbol = symbol_get (yytext, *yylloc);
efcb44dd 274 rule_length++;
e9955c83
AD
275 return ID;
276 }
277
d8d3f94a
PE
278 {int} {
279 unsigned long num;
280 errno = 0;
281 num = strtoul (yytext, 0, 10);
282 if (INT_MAX < num || errno)
283 {
98f2caaa 284 complain_at (*yylloc, _("integer out of range: %s"), quote (yytext));
d8d3f94a
PE
285 num = INT_MAX;
286 }
287 yylval->integer = num;
288 return INT;
289 }
e9955c83
AD
290
291 /* Characters. We don't check there is only one. */
a706a1cc 292 "'" YY_OBS_GROW; BEGIN SC_ESCAPED_CHARACTER;
e9955c83
AD
293
294 /* Strings. */
a706a1cc 295 "\"" YY_OBS_GROW; BEGIN SC_ESCAPED_STRING;
e9955c83
AD
296
297 /* Comments. */
d8d3f94a 298 "/*" BEGIN SC_YACC_COMMENT;
e9955c83
AD
299 "//".* YY_STEP;
300
301 /* Prologue. */
a706a1cc 302 "%{" BEGIN SC_PROLOGUE;
e9955c83
AD
303
304 /* Code in between braces. */
a706a1cc 305 "{" YY_OBS_GROW; braces_level = 0; BEGIN SC_BRACED_CODE;
e9955c83
AD
306
307 /* A type. */
d8d3f94a 308 "<"{tag}">" {
4cdb01db
AD
309 obstack_grow (&string_obstack, yytext + 1, yyleng - 2);
310 YY_OBS_FINISH;
3e6656f9
AD
311 yylval->struniq = struniq_new (last_string);
312 YY_OBS_FREE;
4cdb01db
AD
313 return TYPE;
314 }
315
a706a1cc
PE
316 "%%" {
317 static int percent_percent_count;
e9955c83 318 if (++percent_percent_count == 2)
a706a1cc 319 BEGIN SC_EPILOGUE;
e9955c83
AD
320 return PERCENT_PERCENT;
321 }
322
a706a1cc 323 . {
c4d720cd 324 complain_at (*yylloc, _("invalid character: %s"), quote (yytext));
e9955c83
AD
325 YY_STEP;
326 }
327}
328
329
d8d3f94a
PE
330 /*---------------------------------------------------------------.
331 | Scanning a Yacc comment. The initial `/ *' is already eaten. |
332 `---------------------------------------------------------------*/
e9955c83 333
d8d3f94a 334<SC_YACC_COMMENT>
e9955c83 335{
d8d3f94a
PE
336 "*/" {
337 YY_STEP;
338 BEGIN INITIAL;
e9955c83
AD
339 }
340
a706a1cc 341 .|\n ;
345532d7 342 <<EOF>> unexpected_end_of_file (yylloc, "*/");
d8d3f94a
PE
343}
344
345
346 /*------------------------------------------------------------.
347 | Scanning a C comment. The initial `/ *' is already eaten. |
348 `------------------------------------------------------------*/
349
350<SC_COMMENT>
351{
a706a1cc 352 "*"{splice}"/" YY_OBS_GROW; BEGIN c_context;
345532d7 353 <<EOF>> unexpected_end_of_file (yylloc, "*/");
e9955c83
AD
354}
355
356
d8d3f94a
PE
357 /*--------------------------------------------------------------.
358 | Scanning a line comment. The initial `//' is already eaten. |
359 `--------------------------------------------------------------*/
360
361<SC_LINE_COMMENT>
362{
a706a1cc
PE
363 "\n" YY_OBS_GROW; BEGIN c_context;
364 {splice} YY_OBS_GROW;
365 <<EOF>> BEGIN c_context;
d8d3f94a
PE
366}
367
368
e9955c83
AD
369 /*----------------------------------------------------------------.
370 | Scanning a C string, including its escapes. The initial `"' is |
371 | already eaten. |
372 `----------------------------------------------------------------*/
373
374<SC_ESCAPED_STRING>
375{
db2cc12f 376 "\"" {
44995b2e
AD
377 YY_OBS_GROW;
378 YY_OBS_FINISH;
4cdb01db 379 yylval->string = last_string;
efcb44dd 380 rule_length++;
a706a1cc 381 BEGIN INITIAL;
e9955c83
AD
382 return STRING;
383 }
384
a706a1cc 385 .|\n YY_OBS_GROW;
345532d7 386 <<EOF>> unexpected_end_of_file (yylloc, "\"");
e9955c83
AD
387}
388
389 /*---------------------------------------------------------------.
390 | Scanning a C character, decoding its escapes. The initial "'" |
391 | is already eaten. |
392 `---------------------------------------------------------------*/
393
394<SC_ESCAPED_CHARACTER>
395{
db2cc12f 396 "'" {
44995b2e 397 YY_OBS_GROW;
44995b2e 398 YY_OBS_FINISH;
a706a1cc
PE
399 yylval->symbol = symbol_get (last_string, *yylloc);
400 symbol_class_set (yylval->symbol, token_sym, *yylloc);
401 symbol_user_token_number_set (yylval->symbol,
402 (unsigned char) last_string[1], *yylloc);
403 YY_OBS_FREE;
404 rule_length++;
405 BEGIN INITIAL;
406 return ID;
e9955c83 407 }
a706a1cc
PE
408
409 .|\n YY_OBS_GROW;
345532d7 410 <<EOF>> unexpected_end_of_file (yylloc, "'");
e9955c83
AD
411}
412
413
414 /*----------------------------.
415 | Decode escaped characters. |
416 `----------------------------*/
417
418<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
419{
d8d3f94a
PE
420 \\[0-7]{1,3} {
421 unsigned long c = strtoul (yytext + 1, 0, 8);
422 if (UCHAR_MAX < c)
e9955c83 423 {
98f2caaa
PE
424 complain_at (*yylloc, _("invalid escape sequence: %s"),
425 quote (yytext));
e9955c83
AD
426 YY_STEP;
427 }
428 else
429 obstack_1grow (&string_obstack, c);
430 }
431
d8d3f94a
PE
432 \\x[0-9a-fA-F]+ {
433 unsigned long c;
434 errno = 0;
435 c = strtoul (yytext + 2, 0, 16);
436 if (UCHAR_MAX < c || errno)
437 {
98f2caaa
PE
438 complain_at (*yylloc, _("invalid escape sequence: %s"),
439 quote (yytext));
d8d3f94a
PE
440 YY_STEP;
441 }
442 else
443 obstack_1grow (&string_obstack, c);
e9955c83
AD
444 }
445
446 \\a obstack_1grow (&string_obstack, '\a');
447 \\b obstack_1grow (&string_obstack, '\b');
448 \\f obstack_1grow (&string_obstack, '\f');
449 \\n obstack_1grow (&string_obstack, '\n');
450 \\r obstack_1grow (&string_obstack, '\r');
451 \\t obstack_1grow (&string_obstack, '\t');
452 \\v obstack_1grow (&string_obstack, '\v');
c4d720cd 453 \\[\"\'?\\] obstack_1grow (&string_obstack, yytext[1]);
d8d3f94a
PE
454 \\(u|U[0-9a-fA-F]{4})[0-9a-fA-F]{4} {
455 int c = convert_ucn_to_byte (yytext);
456 if (c < 0)
457 {
98f2caaa
PE
458 complain_at (*yylloc, _("invalid escape sequence: %s"),
459 quote (yytext));
d8d3f94a
PE
460 YY_STEP;
461 }
462 else
463 obstack_1grow (&string_obstack, c);
464 }
4f25ebb0 465 \\(.|\n) {
98f2caaa
PE
466 complain_at (*yylloc, _("unrecognized escape sequence: %s"),
467 quote (yytext));
44995b2e 468 YY_OBS_GROW;
e9955c83
AD
469 }
470}
471
472
473 /*----------------------------------------------------------.
474 | Scanning a C character without decoding its escapes. The |
475 | initial "'" is already eaten. |
476 `----------------------------------------------------------*/
477
478<SC_CHARACTER>
479{
ac060e78
PE
480 "'" YY_OBS_GROW; BEGIN c_context;
481 \\{splice}[\'\\] YY_OBS_GROW;
482 <<EOF>> unexpected_end_of_file (yylloc, "'");
e9955c83
AD
483}
484
485
486 /*----------------------------------------------------------------.
487 | Scanning a C string, without decoding its escapes. The initial |
488 | `"' is already eaten. |
489 `----------------------------------------------------------------*/
490
491<SC_STRING>
492{
ac060e78
PE
493 "\"" YY_OBS_GROW; BEGIN c_context;
494 \\{splice}[\"\\] YY_OBS_GROW;
495 <<EOF>> unexpected_end_of_file (yylloc, "\"");
e9955c83
AD
496}
497
498
499 /*---------------------------------------------------.
500 | Strings, comments etc. can be found in user code. |
501 `---------------------------------------------------*/
502
503<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
504{
a706a1cc
PE
505 "'" YY_OBS_GROW; c_context = YY_START; BEGIN SC_CHARACTER;
506 "\"" YY_OBS_GROW; c_context = YY_START; BEGIN SC_STRING;
507 "/"{splice}"*" YY_OBS_GROW; c_context = YY_START; BEGIN SC_COMMENT;
508 "/"{splice}"/" YY_OBS_GROW; c_context = YY_START; BEGIN SC_LINE_COMMENT;
e9955c83
AD
509}
510
511
512 /*---------------------------------------------------------------.
513 | Scanning some code in braces (%union and actions). The initial |
514 | "{" is already eaten. |
515 `---------------------------------------------------------------*/
516
517<SC_BRACED_CODE>
518{
1a9e39f1
PE
519 "{"|"<"{splice}"%" YY_OBS_GROW; braces_level++;
520 "%"{splice}">" YY_OBS_GROW; braces_level--;
e9955c83 521 "}" {
44995b2e 522 YY_OBS_GROW;
1a9e39f1
PE
523 braces_level--;
524 if (braces_level < 0)
e9955c83 525 {
44995b2e 526 YY_OBS_FINISH;
4cdb01db 527 yylval->string = last_string;
efcb44dd 528 rule_length++;
a706a1cc 529 BEGIN INITIAL;
e9955c83
AD
530 return BRACED_CODE;
531 }
532 }
533
a706a1cc
PE
534 /* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly
535 (as `<' `<%'). */
536 "<"{splice}"<" YY_OBS_GROW;
537
d8d3f94a 538 "$"("<"{tag}">")?(-?[0-9]+|"$") { handle_dollar (current_braced_code,
f25bfb75
AD
539 yytext, *yylloc); }
540 "@"(-?[0-9]+|"$") { handle_at (current_braced_code,
541 yytext, *yylloc); }
e9955c83 542
345532d7 543 <<EOF>> unexpected_end_of_file (yylloc, "}");
e9955c83
AD
544}
545
546
547 /*--------------------------------------------------------------.
548 | Scanning some prologue: from "%{" (already scanned) to "%}". |
549 `--------------------------------------------------------------*/
550
551<SC_PROLOGUE>
552{
553 "%}" {
44995b2e 554 YY_OBS_FINISH;
4cdb01db 555 yylval->string = last_string;
a706a1cc 556 BEGIN INITIAL;
e9955c83
AD
557 return PROLOGUE;
558 }
559
345532d7 560 <<EOF>> unexpected_end_of_file (yylloc, "%}");
e9955c83
AD
561}
562
563
564 /*---------------------------------------------------------------.
565 | Scanning the epilogue (everything after the second "%%", which |
d8d3f94a 566 | has already been eaten). |
e9955c83
AD
567 `---------------------------------------------------------------*/
568
569<SC_EPILOGUE>
570{
e9955c83 571 <<EOF>> {
44995b2e 572 YY_OBS_FINISH;
4cdb01db 573 yylval->string = last_string;
a706a1cc 574 BEGIN INITIAL;
e9955c83
AD
575 return EPILOGUE;
576 }
577}
578
579
a706a1cc
PE
580 /*----------------------------------------------------------------.
581 | By default, grow the string obstack with the input, escaping M4 |
582 | quoting characters. |
583 `----------------------------------------------------------------*/
584
585<SC_COMMENT,SC_LINE_COMMENT,SC_STRING,SC_CHARACTER,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
586{
ac060e78
PE
587 \$ obstack_sgrow (&string_obstack, "$][");
588 \@ obstack_sgrow (&string_obstack, "@@");
589 \[ obstack_sgrow (&string_obstack, "@{");
590 \] obstack_sgrow (&string_obstack, "@}");
a706a1cc
PE
591 .|\n YY_OBS_GROW;
592}
593
594
e9955c83
AD
595%%
596
597/*------------------------------------------------------------------.
366eea36 598| TEXT is pointing to a wannabee semantic value (i.e., a `$'). |
e9955c83
AD
599| |
600| Possible inputs: $[<TYPENAME>]($|integer) |
601| |
602| Output to the STRING_OBSTACK a reference to this semantic value. |
603`------------------------------------------------------------------*/
604
f25bfb75 605static inline void
366eea36 606handle_action_dollar (char *text, location_t location)
e9955c83
AD
607{
608 const char *type_name = NULL;
366eea36 609 char *cp = text + 1;
e9955c83
AD
610
611 /* Get the type name if explicit. */
612 if (*cp == '<')
613 {
614 type_name = ++cp;
615 while (*cp != '>')
616 ++cp;
617 *cp = '\0';
618 ++cp;
619 }
620
621 if (*cp == '$')
622 {
623 if (!type_name)
56c47203 624 type_name = symbol_list_n_type_name_get (current_rule, location, 0);
e9955c83 625 if (!type_name && typed)
56c47203 626 complain_at (location, _("$$ of `%s' has no declared type"),
97650f4e 627 current_rule->sym->tag);
e9955c83
AD
628 if (!type_name)
629 type_name = "";
630 obstack_fgrow1 (&string_obstack,
631 "]b4_lhs_value([%s])[", type_name);
632 }
d8d3f94a 633 else
e9955c83 634 {
d8d3f94a
PE
635 long num;
636 errno = 0;
637 num = strtol (cp, 0, 10);
e9955c83 638
d8d3f94a 639 if (INT_MIN <= num && num <= rule_length && ! errno)
e9955c83 640 {
d8d3f94a 641 int n = num;
e9955c83 642 if (!type_name && n > 0)
56c47203
AD
643 type_name = symbol_list_n_type_name_get (current_rule, location,
644 n);
e9955c83 645 if (!type_name && typed)
56c47203 646 complain_at (location, _("$%d of `%s' has no declared type"),
97650f4e 647 n, current_rule->sym->tag);
e9955c83
AD
648 if (!type_name)
649 type_name = "";
650 obstack_fgrow3 (&string_obstack,
651 "]b4_rhs_value([%d], [%d], [%s])[",
652 rule_length, n, type_name);
653 }
d8d3f94a 654 else
98f2caaa 655 complain_at (location, _("integer out of range: %s"), quote (text));
9280d3ef
AD
656 }
657}
658
659
366eea36 660/*---------------------------------------------------------------.
d8d3f94a 661| TEXT is expected to be $$ in some code associated to a symbol: |
366eea36
AD
662| destructor or printer. |
663`---------------------------------------------------------------*/
9280d3ef 664
f25bfb75 665static inline void
366eea36 666handle_symbol_code_dollar (char *text, location_t location)
9280d3ef 667{
366eea36 668 char *cp = text + 1;
9280d3ef 669 if (*cp == '$')
366eea36 670 obstack_sgrow (&string_obstack, "]b4_dollar_dollar[");
9280d3ef 671 else
c4d720cd 672 complain_at (location, _("invalid value: %s"), quote (text));
e9955c83
AD
673}
674
f25bfb75
AD
675
676/*-----------------------------------------------------------------.
677| Dispatch onto handle_action_dollar, or handle_destructor_dollar, |
678| depending upon CODE_KIND. |
679`-----------------------------------------------------------------*/
e9955c83
AD
680
681static void
f25bfb75
AD
682handle_dollar (braced_code_t braced_code_kind,
683 char *text, location_t location)
684{
685 switch (braced_code_kind)
686 {
687 case action_braced_code:
688 handle_action_dollar (text, location);
689 break;
690
691 case destructor_braced_code:
366eea36
AD
692 case printer_braced_code:
693 handle_symbol_code_dollar (text, location);
f25bfb75
AD
694 break;
695 }
696}
697
698
699/*------------------------------------------------------.
700| TEXT is a location token (i.e., a `@...'). Output to |
701| STRING_OBSTACK a reference to this location. |
702`------------------------------------------------------*/
703
704static inline void
705handle_action_at (char *text, location_t location)
e9955c83 706{
366eea36 707 char *cp = text + 1;
e9955c83 708 locations_flag = 1;
e9955c83 709
366eea36 710 if (*cp == '$')
e9955c83
AD
711 {
712 obstack_sgrow (&string_obstack, "]b4_lhs_location[");
713 }
d8d3f94a 714 else
e9955c83 715 {
d8d3f94a
PE
716 long num;
717 errno = 0;
718 num = strtol (cp, 0, 10);
dafdc66f 719
d8d3f94a
PE
720 if (INT_MIN <= num && num <= rule_length && ! errno)
721 {
722 int n = num;
723 obstack_fgrow2 (&string_obstack, "]b4_rhs_location([%d], [%d])[",
724 rule_length, n);
725 }
e9955c83 726 else
98f2caaa 727 complain_at (location, _("integer out of range: %s"), quote (text));
f25bfb75
AD
728 }
729}
730
731
366eea36 732/*---------------------------------------------------------------.
d8d3f94a 733| TEXT is expected to be @$ in some code associated to a symbol: |
366eea36
AD
734| destructor or printer. |
735`---------------------------------------------------------------*/
f25bfb75
AD
736
737static inline void
366eea36 738handle_symbol_code_at (char *text, location_t location)
f25bfb75 739{
366eea36
AD
740 char *cp = text + 1;
741 if (*cp == '$')
742 obstack_sgrow (&string_obstack, "]b4_at_dollar[");
f25bfb75 743 else
c4d720cd 744 complain_at (location, _("invalid value: %s"), quote (text));
e9955c83 745}
4cdb01db 746
f25bfb75
AD
747
748/*-------------------------------------------------------------------.
749| Dispatch onto handle_action_at, or handle_destructor_at, depending |
750| upon CODE_KIND. |
751`-------------------------------------------------------------------*/
752
753static void
754handle_at (braced_code_t braced_code_kind,
755 char *text, location_t location)
756{
757 switch (braced_code_kind)
758 {
759 case action_braced_code:
760 handle_action_at (text, location);
761 break;
762
763 case destructor_braced_code:
366eea36
AD
764 case printer_braced_code:
765 handle_symbol_code_at (text, location);
f25bfb75
AD
766 break;
767 }
768}
769
770
d8d3f94a
PE
771/*------------------------------------------------------------------.
772| Convert universal character name UCN to a single-byte character, |
773| and return that character. Return -1 if UCN does not correspond |
774| to a single-byte character. |
775`------------------------------------------------------------------*/
776
777static int
778convert_ucn_to_byte (char const *ucn)
779{
780 unsigned long code = strtoul (ucn + 2, 0, 16);
781
782 /* FIXME: Currently we assume Unicode-compatible unibyte characters
783 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
784 non-ASCII hosts we support only the portable C character set.
785 These limitations should be removed once we add support for
786 multibyte characters. */
787
788 if (UCHAR_MAX < code)
789 return -1;
790
791#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
792 {
793 /* A non-ASCII host. Use CODE to index into a table of the C
794 basic execution character set, which is guaranteed to exist on
795 all Standard C platforms. This table also includes '$', '@',
8e6ef483 796 and '`', which are not in the basic execution character set but
d8d3f94a
PE
797 which are unibyte characters on all the platforms that we know
798 about. */
799 static signed char const table[] =
800 {
801 '\0', -1, -1, -1, -1, -1, -1, '\a',
802 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
803 -1, -1, -1, -1, -1, -1, -1, -1,
804 -1, -1, -1, -1, -1, -1, -1, -1,
805 ' ', '!', '"', '#', '$', '%', '&', '\'',
806 '(', ')', '*', '+', ',', '-', '.', '/',
807 '0', '1', '2', '3', '4', '5', '6', '7',
808 '8', '9', ':', ';', '<', '=', '>', '?',
809 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
810 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
811 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
812 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
813 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
814 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
815 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
816 'x', 'y', 'z', '{', '|', '}', '~'
817 };
818
819 code = code < sizeof table ? table[code] : -1;
820 }
821#endif
c4d720cd 822
d8d3f94a
PE
823 return code;
824}
825
826
900c5db5
AD
827/*----------------------------------------------------------------.
828| Handle `#line INT "FILE"'. ARGS has already skipped `#line '. |
829`----------------------------------------------------------------*/
830
831static void
832handle_syncline (char *args, location_t *location)
833{
834 int lineno = strtol (args, &args, 10);
835 const char *file = NULL;
836 file = strchr (args, '"') + 1;
837 *strchr (file, '"') = 0;
95612cfa
AD
838 current_file = xstrdup (file);
839 location->file = current_file;
900c5db5
AD
840 location->last_line = lineno;
841}
842
a706a1cc
PE
843
844/*-------------------------------------------------------------.
845| Report an unexpected end of file at LOC. An end of file was |
846| encountered and the expected TOKEN_END was missing. After |
847| reporting the problem, pretend that TOKEN_END was found. |
848`-------------------------------------------------------------*/
849
850static void
345532d7 851unexpected_end_of_file (location_t *loc, char const *token_end)
a706a1cc 852{
345532d7 853 size_t i = strlen (token_end);
a706a1cc 854
345532d7
PE
855 complain_at (*loc, _("missing `%s' at end of file"), token_end);
856
857 /* Adjust location's last column so that any later message does not
858 mention the characters just inserted. */
859 loc->last_column -= i;
860
861 while (i != 0)
862 unput (token_end[--i]);
a706a1cc
PE
863}
864
865
f25bfb75
AD
866/*-------------------------.
867| Initialize the scanner. |
868`-------------------------*/
869
1d6412ad
AD
870void
871scanner_initialize (void)
872{
873 obstack_init (&string_obstack);
874}
875
876
f25bfb75
AD
877/*-----------------------------------------------.
878| Free all the memory allocated to the scanner. |
879`-----------------------------------------------*/
880
4cdb01db
AD
881void
882scanner_free (void)
883{
884 obstack_free (&string_obstack, 0);
536545f3
AD
885 /* Reclaim Flex's buffers. */
886 yy_delete_buffer (YY_CURRENT_BUFFER);
4cdb01db 887}