]> git.saurik.com Git - bison.git/blame - src/scan-gram.l
%parse-param and %lex-param now take just one argument, the
[bison.git] / src / scan-gram.l
CommitLineData
e9955c83
AD
1/* Bison Grammar Scanner -*- C -*-
2 Copyright (C) 2002 Free Software Foundation, Inc.
3
4 This file is part of Bison, the GNU Compiler Compiler.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA
20*/
21
a706a1cc 22%option debug nodefault noyywrap never-interactive
e9955c83
AD
23%option prefix="gram_" outfile="lex.yy.c"
24
25%{
26#include "system.h"
d8d3f94a 27#include "mbswidth.h"
e9955c83
AD
28#include "complain.h"
29#include "quote.h"
3e6656f9 30#include "struniq.h"
e9955c83
AD
31#include "getargs.h"
32#include "gram.h"
33#include "reader.h"
34
8efe435c
AD
35#define YY_USER_INIT \
36do { \
41141c56
PE
37 LOCATION_RESET (*loc); \
38 loc->file = current_file; \
8efe435c
AD
39} while (0)
40
41141c56
PE
41/* Each time we match a string, move the end cursor to its end. */
42#define STEP LOCATION_STEP (*loc)
43
44#define YY_USER_ACTION extend_location (loc, yytext, yyleng);
d8d3f94a
PE
45
46#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
47
48
49/* Read bytes from FP into buffer BUF of size SIZE. Return the
50 number of bytes read. Remove '\r' from input, treating \r\n
51 and isolated \r as \n. */
52
53static size_t
54no_cr_read (FILE *fp, char *buf, size_t size)
55{
56 size_t s = fread (buf, 1, size, fp);
57 if (s)
58 {
59 char *w = memchr (buf, '\r', s);
60 if (w)
61 {
62 char const *r = ++w;
63 char const *lim = buf + s;
64
65 for (;;)
66 {
67 /* Found an '\r'. Treat it like '\n', but ignore any
68 '\n' that immediately follows. */
69 w[-1] = '\n';
70 if (r == lim)
71 {
72 int ch = getc (fp);
73 if (ch != '\n' && ungetc (ch, fp) != ch)
74 break;
75 }
76 else if (*r == '\n')
77 r++;
78
79 /* Copy until the next '\r'. */
80 do
81 {
82 if (r == lim)
83 return w - buf;
84 }
85 while ((*w++ = *r++) != '\r');
86 }
87
88 return w - buf;
89 }
90 }
91
92 return s;
93}
94
95
96/* Extend *LOC to account for token TOKEN of size SIZE. */
97
98static void
99extend_location (location_t *loc, char const *token, int size)
100{
101 int line = loc->last_line;
102 int column = loc->last_column;
103 char const *p0 = token;
104 char const *p = token;
105 char const *lim = token + size;
106
107 for (p = token; p < lim; p++)
108 switch (*p)
109 {
110 case '\r':
111 /* \r shouldn't survive no_cr_read. */
112 abort ();
113
114 case '\n':
115 line++;
116 column = 1;
117 p0 = p + 1;
118 break;
119
120 case '\t':
121 column += mbsnwidth (p0, p - p0, 0);
122 column += 8 - ((column - 1) & 7);
123 p0 = p + 1;
124 break;
125 }
126
127 loc->last_line = line;
128 loc->last_column = column + mbsnwidth (p0, p - p0, 0);
129}
130
131
e9955c83 132
44995b2e
AD
133/* STRING_OBSTACK -- Used to store all the characters that we need to
134 keep (to construct ID, STRINGS etc.). Use the following macros to
135 use it.
136
41141c56
PE
137 Use STRING_GROW to append what has just been matched, and
138 STRING_FINISH to end the string (it puts the ending 0).
139 STRING_FINISH also stores this string in LAST_STRING, which can be
140 used, and which is used by STRING_FREE to free the last string. */
44995b2e
AD
141
142static struct obstack string_obstack;
44995b2e 143
7ec2d4cd
AD
144/* A string representing the most recently saved token. */
145static char *last_string;
146
147
41141c56 148#define STRING_GROW \
44995b2e
AD
149 obstack_grow (&string_obstack, yytext, yyleng)
150
41141c56 151#define STRING_FINISH \
44995b2e
AD
152 do { \
153 obstack_1grow (&string_obstack, '\0'); \
154 last_string = obstack_finish (&string_obstack); \
44995b2e
AD
155 } while (0)
156
41141c56 157#define STRING_FREE \
a706a1cc 158 obstack_free (&string_obstack, last_string)
e9955c83 159
7ec2d4cd
AD
160void
161scanner_last_string_free (void)
162{
41141c56 163 STRING_FREE;
7ec2d4cd 164}
e9955c83 165
efcb44dd
PE
166/* Within well-formed rules, RULE_LENGTH is the number of values in
167 the current rule so far, which says where to find `$0' with respect
168 to the top of the stack. It is not the same as the rule->length in
169 the case of mid rule actions.
170
171 Outside of well-formed rules, RULE_LENGTH has an undefined value. */
172static int rule_length;
173
d33cb3ae
PE
174static void handle_dollar (braced_code_t code_kind,
175 char *cp, location_t location);
176static void handle_at (braced_code_t code_kind,
177 char *cp, location_t location);
900c5db5 178static void handle_syncline (char *args, location_t *location);
d8d3f94a 179static int convert_ucn_to_byte (char const *hex_text);
345532d7 180static void unexpected_end_of_file (location_t *, char const *);
e9955c83
AD
181
182%}
d8d3f94a 183%x SC_COMMENT SC_LINE_COMMENT SC_YACC_COMMENT
e9955c83
AD
184%x SC_STRING SC_CHARACTER
185%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
186%x SC_BRACED_CODE SC_PROLOGUE SC_EPILOGUE
187
29c01725
AD
188letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
189id {letter}({letter}|[0-9])*
190directive %{letter}({letter}|[0-9]|-)*
191int [0-9]+
d8d3f94a
PE
192
193/* POSIX says that a tag must be both an id and a C union member, but
194 historically almost any character is allowed in a tag. We disallow
195 NUL and newline, as this simplifies our implementation. */
196tag [^\0\n>]+
197
198/* Zero or more instances of backslash-newline. Following GCC, allow
199 white space between the backslash and the newline. */
200splice (\\[ \f\t\v]*\n)*
e9955c83
AD
201
202%%
203%{
a706a1cc 204 /* Nesting level of the current code in braces. */
1a9e39f1
PE
205 int braces_level IF_LINT (= 0);
206
a706a1cc
PE
207 /* Scanner context when scanning C code. */
208 int c_context IF_LINT (= 0);
209
e9955c83
AD
210 /* At each yylex invocation, mark the current position as the
211 start of the next token. */
41141c56 212 STEP;
e9955c83
AD
213%}
214
215
216 /*----------------------------.
217 | Scanning Bison directives. |
218 `----------------------------*/
219<INITIAL>
220{
221 "%binary" return PERCENT_NONASSOC;
222 "%debug" return PERCENT_DEBUG;
223 "%define" return PERCENT_DEFINE;
224 "%defines" return PERCENT_DEFINES;
9280d3ef 225 "%destructor" return PERCENT_DESTRUCTOR;
676385e2 226 "%dprec" return PERCENT_DPREC;
e9955c83
AD
227 "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
228 "%expect" return PERCENT_EXPECT;
229 "%file-prefix" return PERCENT_FILE_PREFIX;
230 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
ae7453f2 231 "%glr-parser" return PERCENT_GLR_PARSER;
e9955c83
AD
232 "%left" return PERCENT_LEFT;
233 "%locations" return PERCENT_LOCATIONS;
676385e2 234 "%merge" return PERCENT_MERGE;
e9955c83
AD
235 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
236 "%no"[-_]"lines" return PERCENT_NO_LINES;
237 "%nonassoc" return PERCENT_NONASSOC;
238 "%nterm" return PERCENT_NTERM;
239 "%output" return PERCENT_OUTPUT;
ae7453f2 240 "%parse-param" return PERCENT_PARSE_PARAM;
d8d3f94a 241 "%prec" rule_length--; return PERCENT_PREC;
366eea36 242 "%printer" return PERCENT_PRINTER;
e9955c83
AD
243 "%pure"[-_]"parser" return PERCENT_PURE_PARSER;
244 "%right" return PERCENT_RIGHT;
ae7453f2 245 "%lex-param" return PERCENT_LEX_PARAM;
e9955c83
AD
246 "%skeleton" return PERCENT_SKELETON;
247 "%start" return PERCENT_START;
248 "%term" return PERCENT_TOKEN;
249 "%token" return PERCENT_TOKEN;
250 "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
251 "%type" return PERCENT_TYPE;
252 "%union" return PERCENT_UNION;
253 "%verbose" return PERCENT_VERBOSE;
254 "%yacc" return PERCENT_YACC;
255
29c01725 256 {directive} {
41141c56
PE
257 complain_at (*loc, _("invalid directive: %s"), quote (yytext));
258 STEP;
29c01725
AD
259 }
260
412f8a59 261 ^"#line "{int}" \"".*"\"\n" {
41141c56
PE
262 handle_syncline (yytext + sizeof "#line " - 1, loc);
263 STEP;
412f8a59 264 }
900c5db5 265
e9955c83 266 "=" return EQUAL;
d8d3f94a
PE
267 ":" rule_length = 0; return COLON;
268 "|" rule_length = 0; return PIPE;
e9955c83
AD
269 ";" return SEMICOLON;
270
41141c56 271 [ \f\n\t\v] STEP;
d8d3f94a 272
763ed7a6
PE
273 "," {
274 warn_at (*loc, _("stray `,' treated as white space"));
275 STEP;
276 }
277
e9955c83 278 {id} {
41141c56 279 val->symbol = symbol_get (yytext, *loc);
efcb44dd 280 rule_length++;
e9955c83
AD
281 return ID;
282 }
283
d8d3f94a
PE
284 {int} {
285 unsigned long num;
286 errno = 0;
287 num = strtoul (yytext, 0, 10);
288 if (INT_MAX < num || errno)
289 {
41141c56 290 complain_at (*loc, _("integer out of range: %s"), quote (yytext));
d8d3f94a
PE
291 num = INT_MAX;
292 }
41141c56 293 val->integer = num;
d8d3f94a
PE
294 return INT;
295 }
e9955c83
AD
296
297 /* Characters. We don't check there is only one. */
41141c56 298 "'" STRING_GROW; BEGIN SC_ESCAPED_CHARACTER;
e9955c83
AD
299
300 /* Strings. */
41141c56 301 "\"" STRING_GROW; BEGIN SC_ESCAPED_STRING;
e9955c83
AD
302
303 /* Comments. */
d8d3f94a 304 "/*" BEGIN SC_YACC_COMMENT;
41141c56 305 "//".* STEP;
e9955c83
AD
306
307 /* Prologue. */
a706a1cc 308 "%{" BEGIN SC_PROLOGUE;
e9955c83
AD
309
310 /* Code in between braces. */
41141c56 311 "{" STRING_GROW; braces_level = 0; BEGIN SC_BRACED_CODE;
e9955c83
AD
312
313 /* A type. */
d8d3f94a 314 "<"{tag}">" {
4cdb01db 315 obstack_grow (&string_obstack, yytext + 1, yyleng - 2);
41141c56
PE
316 STRING_FINISH;
317 val->struniq = struniq_new (last_string);
318 STRING_FREE;
4cdb01db
AD
319 return TYPE;
320 }
321
a706a1cc
PE
322 "%%" {
323 static int percent_percent_count;
e9955c83 324 if (++percent_percent_count == 2)
a706a1cc 325 BEGIN SC_EPILOGUE;
e9955c83
AD
326 return PERCENT_PERCENT;
327 }
328
a706a1cc 329 . {
41141c56
PE
330 complain_at (*loc, _("invalid character: %s"), quote (yytext));
331 STEP;
e9955c83
AD
332 }
333}
334
335
d8d3f94a
PE
336 /*---------------------------------------------------------------.
337 | Scanning a Yacc comment. The initial `/ *' is already eaten. |
338 `---------------------------------------------------------------*/
e9955c83 339
d8d3f94a 340<SC_YACC_COMMENT>
e9955c83 341{
d8d3f94a 342 "*/" {
41141c56 343 STEP;
d8d3f94a 344 BEGIN INITIAL;
e9955c83
AD
345 }
346
a706a1cc 347 .|\n ;
41141c56 348 <<EOF>> unexpected_end_of_file (loc, "*/");
d8d3f94a
PE
349}
350
351
352 /*------------------------------------------------------------.
353 | Scanning a C comment. The initial `/ *' is already eaten. |
354 `------------------------------------------------------------*/
355
356<SC_COMMENT>
357{
41141c56
PE
358 "*"{splice}"/" STRING_GROW; BEGIN c_context;
359 <<EOF>> unexpected_end_of_file (loc, "*/");
e9955c83
AD
360}
361
362
d8d3f94a
PE
363 /*--------------------------------------------------------------.
364 | Scanning a line comment. The initial `//' is already eaten. |
365 `--------------------------------------------------------------*/
366
367<SC_LINE_COMMENT>
368{
41141c56
PE
369 "\n" STRING_GROW; BEGIN c_context;
370 {splice} STRING_GROW;
a706a1cc 371 <<EOF>> BEGIN c_context;
d8d3f94a
PE
372}
373
374
e9955c83
AD
375 /*----------------------------------------------------------------.
376 | Scanning a C string, including its escapes. The initial `"' is |
377 | already eaten. |
378 `----------------------------------------------------------------*/
379
380<SC_ESCAPED_STRING>
381{
db2cc12f 382 "\"" {
41141c56
PE
383 STRING_GROW;
384 STRING_FINISH;
385 val->string = last_string;
efcb44dd 386 rule_length++;
a706a1cc 387 BEGIN INITIAL;
e9955c83
AD
388 return STRING;
389 }
390
41141c56
PE
391 .|\n STRING_GROW;
392 <<EOF>> unexpected_end_of_file (loc, "\"");
e9955c83
AD
393}
394
395 /*---------------------------------------------------------------.
396 | Scanning a C character, decoding its escapes. The initial "'" |
397 | is already eaten. |
398 `---------------------------------------------------------------*/
399
400<SC_ESCAPED_CHARACTER>
401{
db2cc12f 402 "'" {
41141c56
PE
403 STRING_GROW;
404 STRING_FINISH;
405 val->symbol = symbol_get (last_string, *loc);
406 symbol_class_set (val->symbol, token_sym, *loc);
407 symbol_user_token_number_set (val->symbol,
408 (unsigned char) last_string[1], *loc);
409 STRING_FREE;
a706a1cc
PE
410 rule_length++;
411 BEGIN INITIAL;
412 return ID;
e9955c83 413 }
a706a1cc 414
41141c56
PE
415 .|\n STRING_GROW;
416 <<EOF>> unexpected_end_of_file (loc, "'");
e9955c83
AD
417}
418
419
420 /*----------------------------.
421 | Decode escaped characters. |
422 `----------------------------*/
423
424<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
425{
d8d3f94a
PE
426 \\[0-7]{1,3} {
427 unsigned long c = strtoul (yytext + 1, 0, 8);
428 if (UCHAR_MAX < c)
e9955c83 429 {
41141c56 430 complain_at (*loc, _("invalid escape sequence: %s"),
98f2caaa 431 quote (yytext));
41141c56 432 STEP;
e9955c83
AD
433 }
434 else
435 obstack_1grow (&string_obstack, c);
436 }
437
d8d3f94a
PE
438 \\x[0-9a-fA-F]+ {
439 unsigned long c;
440 errno = 0;
441 c = strtoul (yytext + 2, 0, 16);
442 if (UCHAR_MAX < c || errno)
443 {
41141c56 444 complain_at (*loc, _("invalid escape sequence: %s"),
98f2caaa 445 quote (yytext));
41141c56 446 STEP;
d8d3f94a
PE
447 }
448 else
449 obstack_1grow (&string_obstack, c);
e9955c83
AD
450 }
451
452 \\a obstack_1grow (&string_obstack, '\a');
453 \\b obstack_1grow (&string_obstack, '\b');
454 \\f obstack_1grow (&string_obstack, '\f');
455 \\n obstack_1grow (&string_obstack, '\n');
456 \\r obstack_1grow (&string_obstack, '\r');
457 \\t obstack_1grow (&string_obstack, '\t');
458 \\v obstack_1grow (&string_obstack, '\v');
412f8a59
PE
459
460 /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
461 \\("\""|"'"|"?"|"\\") obstack_1grow (&string_obstack, yytext[1]);
462
d8d3f94a
PE
463 \\(u|U[0-9a-fA-F]{4})[0-9a-fA-F]{4} {
464 int c = convert_ucn_to_byte (yytext);
465 if (c < 0)
466 {
41141c56 467 complain_at (*loc, _("invalid escape sequence: %s"),
98f2caaa 468 quote (yytext));
41141c56 469 STEP;
d8d3f94a
PE
470 }
471 else
472 obstack_1grow (&string_obstack, c);
473 }
4f25ebb0 474 \\(.|\n) {
41141c56 475 complain_at (*loc, _("unrecognized escape sequence: %s"),
98f2caaa 476 quote (yytext));
41141c56 477 STRING_GROW;
e9955c83
AD
478 }
479}
480
481
482 /*----------------------------------------------------------.
483 | Scanning a C character without decoding its escapes. The |
484 | initial "'" is already eaten. |
485 `----------------------------------------------------------*/
486
487<SC_CHARACTER>
488{
41141c56
PE
489 "'" STRING_GROW; BEGIN c_context;
490 \\{splice}[^$@\[\]] STRING_GROW;
491 <<EOF>> unexpected_end_of_file (loc, "'");
e9955c83
AD
492}
493
494
495 /*----------------------------------------------------------------.
496 | Scanning a C string, without decoding its escapes. The initial |
497 | `"' is already eaten. |
498 `----------------------------------------------------------------*/
499
500<SC_STRING>
501{
41141c56
PE
502 "\"" STRING_GROW; BEGIN c_context;
503 \\{splice}[^$@\[\]] STRING_GROW;
504 <<EOF>> unexpected_end_of_file (loc, "\"");
e9955c83
AD
505}
506
507
508 /*---------------------------------------------------.
509 | Strings, comments etc. can be found in user code. |
510 `---------------------------------------------------*/
511
512<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
513{
41141c56
PE
514 "'" STRING_GROW; c_context = YY_START; BEGIN SC_CHARACTER;
515 "\"" STRING_GROW; c_context = YY_START; BEGIN SC_STRING;
516 "/"{splice}"*" STRING_GROW; c_context = YY_START; BEGIN SC_COMMENT;
517 "/"{splice}"/" STRING_GROW; c_context = YY_START; BEGIN SC_LINE_COMMENT;
e9955c83
AD
518}
519
520
521 /*---------------------------------------------------------------.
522 | Scanning some code in braces (%union and actions). The initial |
523 | "{" is already eaten. |
524 `---------------------------------------------------------------*/
525
526<SC_BRACED_CODE>
527{
41141c56
PE
528 "{"|"<"{splice}"%" STRING_GROW; braces_level++;
529 "%"{splice}">" STRING_GROW; braces_level--;
e9955c83 530 "}" {
41141c56 531 STRING_GROW;
1a9e39f1
PE
532 braces_level--;
533 if (braces_level < 0)
e9955c83 534 {
41141c56
PE
535 STRING_FINISH;
536 val->string = last_string;
efcb44dd 537 rule_length++;
a706a1cc 538 BEGIN INITIAL;
e9955c83
AD
539 return BRACED_CODE;
540 }
541 }
542
a706a1cc
PE
543 /* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly
544 (as `<' `<%'). */
41141c56 545 "<"{splice}"<" STRING_GROW;
a706a1cc 546
d8d3f94a 547 "$"("<"{tag}">")?(-?[0-9]+|"$") { handle_dollar (current_braced_code,
41141c56 548 yytext, *loc); }
f25bfb75 549 "@"(-?[0-9]+|"$") { handle_at (current_braced_code,
41141c56 550 yytext, *loc); }
e9955c83 551
41141c56 552 <<EOF>> unexpected_end_of_file (loc, "}");
e9955c83
AD
553}
554
555
556 /*--------------------------------------------------------------.
557 | Scanning some prologue: from "%{" (already scanned) to "%}". |
558 `--------------------------------------------------------------*/
559
560<SC_PROLOGUE>
561{
562 "%}" {
41141c56
PE
563 STRING_FINISH;
564 val->string = last_string;
a706a1cc 565 BEGIN INITIAL;
e9955c83
AD
566 return PROLOGUE;
567 }
568
41141c56 569 <<EOF>> unexpected_end_of_file (loc, "%}");
e9955c83
AD
570}
571
572
573 /*---------------------------------------------------------------.
574 | Scanning the epilogue (everything after the second "%%", which |
d8d3f94a 575 | has already been eaten). |
e9955c83
AD
576 `---------------------------------------------------------------*/
577
578<SC_EPILOGUE>
579{
e9955c83 580 <<EOF>> {
41141c56
PE
581 STRING_FINISH;
582 val->string = last_string;
a706a1cc 583 BEGIN INITIAL;
e9955c83
AD
584 return EPILOGUE;
585 }
586}
587
588
a706a1cc
PE
589 /*----------------------------------------------------------------.
590 | By default, grow the string obstack with the input, escaping M4 |
591 | quoting characters. |
592 `----------------------------------------------------------------*/
593
594<SC_COMMENT,SC_LINE_COMMENT,SC_STRING,SC_CHARACTER,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
595{
ac060e78
PE
596 \$ obstack_sgrow (&string_obstack, "$][");
597 \@ obstack_sgrow (&string_obstack, "@@");
598 \[ obstack_sgrow (&string_obstack, "@{");
599 \] obstack_sgrow (&string_obstack, "@}");
41141c56 600 .|\n STRING_GROW;
a706a1cc
PE
601}
602
603
e9955c83
AD
604%%
605
606/*------------------------------------------------------------------.
366eea36 607| TEXT is pointing to a wannabee semantic value (i.e., a `$'). |
e9955c83
AD
608| |
609| Possible inputs: $[<TYPENAME>]($|integer) |
610| |
611| Output to the STRING_OBSTACK a reference to this semantic value. |
612`------------------------------------------------------------------*/
613
f25bfb75 614static inline void
366eea36 615handle_action_dollar (char *text, location_t location)
e9955c83
AD
616{
617 const char *type_name = NULL;
366eea36 618 char *cp = text + 1;
e9955c83
AD
619
620 /* Get the type name if explicit. */
621 if (*cp == '<')
622 {
623 type_name = ++cp;
624 while (*cp != '>')
625 ++cp;
626 *cp = '\0';
627 ++cp;
628 }
629
630 if (*cp == '$')
631 {
632 if (!type_name)
56c47203 633 type_name = symbol_list_n_type_name_get (current_rule, location, 0);
e9955c83 634 if (!type_name && typed)
56c47203 635 complain_at (location, _("$$ of `%s' has no declared type"),
97650f4e 636 current_rule->sym->tag);
e9955c83
AD
637 if (!type_name)
638 type_name = "";
639 obstack_fgrow1 (&string_obstack,
640 "]b4_lhs_value([%s])[", type_name);
641 }
d8d3f94a 642 else
e9955c83 643 {
d8d3f94a
PE
644 long num;
645 errno = 0;
646 num = strtol (cp, 0, 10);
e9955c83 647
d8d3f94a 648 if (INT_MIN <= num && num <= rule_length && ! errno)
e9955c83 649 {
d8d3f94a 650 int n = num;
e9955c83 651 if (!type_name && n > 0)
56c47203
AD
652 type_name = symbol_list_n_type_name_get (current_rule, location,
653 n);
e9955c83 654 if (!type_name && typed)
56c47203 655 complain_at (location, _("$%d of `%s' has no declared type"),
97650f4e 656 n, current_rule->sym->tag);
e9955c83
AD
657 if (!type_name)
658 type_name = "";
659 obstack_fgrow3 (&string_obstack,
660 "]b4_rhs_value([%d], [%d], [%s])[",
661 rule_length, n, type_name);
662 }
d8d3f94a 663 else
98f2caaa 664 complain_at (location, _("integer out of range: %s"), quote (text));
9280d3ef
AD
665 }
666}
667
668
366eea36 669/*---------------------------------------------------------------.
d8d3f94a 670| TEXT is expected to be $$ in some code associated to a symbol: |
366eea36
AD
671| destructor or printer. |
672`---------------------------------------------------------------*/
9280d3ef 673
f25bfb75 674static inline void
366eea36 675handle_symbol_code_dollar (char *text, location_t location)
9280d3ef 676{
366eea36 677 char *cp = text + 1;
9280d3ef 678 if (*cp == '$')
366eea36 679 obstack_sgrow (&string_obstack, "]b4_dollar_dollar[");
9280d3ef 680 else
c4d720cd 681 complain_at (location, _("invalid value: %s"), quote (text));
e9955c83
AD
682}
683
f25bfb75
AD
684
685/*-----------------------------------------------------------------.
686| Dispatch onto handle_action_dollar, or handle_destructor_dollar, |
687| depending upon CODE_KIND. |
688`-----------------------------------------------------------------*/
e9955c83
AD
689
690static void
f25bfb75
AD
691handle_dollar (braced_code_t braced_code_kind,
692 char *text, location_t location)
693{
694 switch (braced_code_kind)
695 {
696 case action_braced_code:
697 handle_action_dollar (text, location);
698 break;
699
700 case destructor_braced_code:
366eea36
AD
701 case printer_braced_code:
702 handle_symbol_code_dollar (text, location);
f25bfb75
AD
703 break;
704 }
705}
706
707
708/*------------------------------------------------------.
709| TEXT is a location token (i.e., a `@...'). Output to |
710| STRING_OBSTACK a reference to this location. |
711`------------------------------------------------------*/
712
713static inline void
714handle_action_at (char *text, location_t location)
e9955c83 715{
366eea36 716 char *cp = text + 1;
e9955c83 717 locations_flag = 1;
e9955c83 718
366eea36 719 if (*cp == '$')
e9955c83
AD
720 {
721 obstack_sgrow (&string_obstack, "]b4_lhs_location[");
722 }
d8d3f94a 723 else
e9955c83 724 {
d8d3f94a
PE
725 long num;
726 errno = 0;
727 num = strtol (cp, 0, 10);
dafdc66f 728
d8d3f94a
PE
729 if (INT_MIN <= num && num <= rule_length && ! errno)
730 {
731 int n = num;
732 obstack_fgrow2 (&string_obstack, "]b4_rhs_location([%d], [%d])[",
733 rule_length, n);
734 }
e9955c83 735 else
98f2caaa 736 complain_at (location, _("integer out of range: %s"), quote (text));
f25bfb75
AD
737 }
738}
739
740
366eea36 741/*---------------------------------------------------------------.
d8d3f94a 742| TEXT is expected to be @$ in some code associated to a symbol: |
366eea36
AD
743| destructor or printer. |
744`---------------------------------------------------------------*/
f25bfb75
AD
745
746static inline void
366eea36 747handle_symbol_code_at (char *text, location_t location)
f25bfb75 748{
366eea36
AD
749 char *cp = text + 1;
750 if (*cp == '$')
751 obstack_sgrow (&string_obstack, "]b4_at_dollar[");
f25bfb75 752 else
c4d720cd 753 complain_at (location, _("invalid value: %s"), quote (text));
e9955c83 754}
4cdb01db 755
f25bfb75
AD
756
757/*-------------------------------------------------------------------.
758| Dispatch onto handle_action_at, or handle_destructor_at, depending |
759| upon CODE_KIND. |
760`-------------------------------------------------------------------*/
761
762static void
763handle_at (braced_code_t braced_code_kind,
764 char *text, location_t location)
765{
766 switch (braced_code_kind)
767 {
768 case action_braced_code:
769 handle_action_at (text, location);
770 break;
771
772 case destructor_braced_code:
366eea36
AD
773 case printer_braced_code:
774 handle_symbol_code_at (text, location);
f25bfb75
AD
775 break;
776 }
777}
778
779
d8d3f94a
PE
780/*------------------------------------------------------------------.
781| Convert universal character name UCN to a single-byte character, |
782| and return that character. Return -1 if UCN does not correspond |
783| to a single-byte character. |
784`------------------------------------------------------------------*/
785
786static int
787convert_ucn_to_byte (char const *ucn)
788{
789 unsigned long code = strtoul (ucn + 2, 0, 16);
790
791 /* FIXME: Currently we assume Unicode-compatible unibyte characters
792 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
793 non-ASCII hosts we support only the portable C character set.
794 These limitations should be removed once we add support for
795 multibyte characters. */
796
797 if (UCHAR_MAX < code)
798 return -1;
799
800#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
801 {
802 /* A non-ASCII host. Use CODE to index into a table of the C
803 basic execution character set, which is guaranteed to exist on
804 all Standard C platforms. This table also includes '$', '@',
8e6ef483 805 and '`', which are not in the basic execution character set but
d8d3f94a
PE
806 which are unibyte characters on all the platforms that we know
807 about. */
808 static signed char const table[] =
809 {
810 '\0', -1, -1, -1, -1, -1, -1, '\a',
811 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
812 -1, -1, -1, -1, -1, -1, -1, -1,
813 -1, -1, -1, -1, -1, -1, -1, -1,
814 ' ', '!', '"', '#', '$', '%', '&', '\'',
815 '(', ')', '*', '+', ',', '-', '.', '/',
816 '0', '1', '2', '3', '4', '5', '6', '7',
817 '8', '9', ':', ';', '<', '=', '>', '?',
818 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
819 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
820 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
821 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
822 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
823 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
824 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
825 'x', 'y', 'z', '{', '|', '}', '~'
826 };
827
828 code = code < sizeof table ? table[code] : -1;
829 }
830#endif
c4d720cd 831
d8d3f94a
PE
832 return code;
833}
834
835
900c5db5
AD
836/*----------------------------------------------------------------.
837| Handle `#line INT "FILE"'. ARGS has already skipped `#line '. |
838`----------------------------------------------------------------*/
839
840static void
841handle_syncline (char *args, location_t *location)
842{
843 int lineno = strtol (args, &args, 10);
844 const char *file = NULL;
845 file = strchr (args, '"') + 1;
846 *strchr (file, '"') = 0;
95612cfa
AD
847 current_file = xstrdup (file);
848 location->file = current_file;
900c5db5
AD
849 location->last_line = lineno;
850}
851
a706a1cc
PE
852
853/*-------------------------------------------------------------.
854| Report an unexpected end of file at LOC. An end of file was |
855| encountered and the expected TOKEN_END was missing. After |
856| reporting the problem, pretend that TOKEN_END was found. |
857`-------------------------------------------------------------*/
858
859static void
345532d7 860unexpected_end_of_file (location_t *loc, char const *token_end)
a706a1cc 861{
345532d7 862 size_t i = strlen (token_end);
a706a1cc 863
345532d7
PE
864 complain_at (*loc, _("missing `%s' at end of file"), token_end);
865
866 /* Adjust location's last column so that any later message does not
867 mention the characters just inserted. */
868 loc->last_column -= i;
869
870 while (i != 0)
871 unput (token_end[--i]);
a706a1cc
PE
872}
873
874
f25bfb75
AD
875/*-------------------------.
876| Initialize the scanner. |
877`-------------------------*/
878
1d6412ad
AD
879void
880scanner_initialize (void)
881{
882 obstack_init (&string_obstack);
883}
884
885
f25bfb75
AD
886/*-----------------------------------------------.
887| Free all the memory allocated to the scanner. |
888`-----------------------------------------------*/
889
4cdb01db
AD
890void
891scanner_free (void)
892{
893 obstack_free (&string_obstack, 0);
536545f3
AD
894 /* Reclaim Flex's buffers. */
895 yy_delete_buffer (YY_CURRENT_BUFFER);
4cdb01db 896}