]> git.saurik.com Git - bison.git/blame - src/scan-gram.l
* src/struniq.h, src/struniq.c (struniq_t): Is const.
[bison.git] / src / scan-gram.l
CommitLineData
e9955c83
AD
1/* Bison Grammar Scanner -*- C -*-
2 Copyright (C) 2002 Free Software Foundation, Inc.
3
4 This file is part of Bison, the GNU Compiler Compiler.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA
20*/
21
a706a1cc 22%option debug nodefault noyywrap never-interactive
e9955c83
AD
23%option prefix="gram_" outfile="lex.yy.c"
24
25%{
26#include "system.h"
d8d3f94a 27#include "mbswidth.h"
e9955c83
AD
28#include "complain.h"
29#include "quote.h"
3e6656f9 30#include "struniq.h"
e9955c83
AD
31#include "getargs.h"
32#include "gram.h"
33#include "reader.h"
34
35/* Each time we match a string, move the end cursor to its end. */
8efe435c
AD
36#define YY_USER_INIT \
37do { \
38 LOCATION_RESET (*yylloc); \
95612cfa 39 yylloc->file = current_file; \
8efe435c
AD
40 /* This is only to avoid GCC warnings. */ \
41 if (yycontrol) {;}; \
42} while (0)
43
d8d3f94a
PE
44#define YY_USER_ACTION extend_location (yylloc, yytext, yyleng);
45#define YY_STEP LOCATION_STEP (*yylloc)
46
47#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
48
49
50/* Read bytes from FP into buffer BUF of size SIZE. Return the
51 number of bytes read. Remove '\r' from input, treating \r\n
52 and isolated \r as \n. */
53
54static size_t
55no_cr_read (FILE *fp, char *buf, size_t size)
56{
57 size_t s = fread (buf, 1, size, fp);
58 if (s)
59 {
60 char *w = memchr (buf, '\r', s);
61 if (w)
62 {
63 char const *r = ++w;
64 char const *lim = buf + s;
65
66 for (;;)
67 {
68 /* Found an '\r'. Treat it like '\n', but ignore any
69 '\n' that immediately follows. */
70 w[-1] = '\n';
71 if (r == lim)
72 {
73 int ch = getc (fp);
74 if (ch != '\n' && ungetc (ch, fp) != ch)
75 break;
76 }
77 else if (*r == '\n')
78 r++;
79
80 /* Copy until the next '\r'. */
81 do
82 {
83 if (r == lim)
84 return w - buf;
85 }
86 while ((*w++ = *r++) != '\r');
87 }
88
89 return w - buf;
90 }
91 }
92
93 return s;
94}
95
96
97/* Extend *LOC to account for token TOKEN of size SIZE. */
98
99static void
100extend_location (location_t *loc, char const *token, int size)
101{
102 int line = loc->last_line;
103 int column = loc->last_column;
104 char const *p0 = token;
105 char const *p = token;
106 char const *lim = token + size;
107
108 for (p = token; p < lim; p++)
109 switch (*p)
110 {
111 case '\r':
112 /* \r shouldn't survive no_cr_read. */
113 abort ();
114
115 case '\n':
116 line++;
117 column = 1;
118 p0 = p + 1;
119 break;
120
121 case '\t':
122 column += mbsnwidth (p0, p - p0, 0);
123 column += 8 - ((column - 1) & 7);
124 p0 = p + 1;
125 break;
126 }
127
128 loc->last_line = line;
129 loc->last_column = column + mbsnwidth (p0, p - p0, 0);
130}
131
132
e9955c83 133
44995b2e
AD
134/* STRING_OBSTACK -- Used to store all the characters that we need to
135 keep (to construct ID, STRINGS etc.). Use the following macros to
136 use it.
137
1d6412ad
AD
138 Use YY_OBS_GROW to append what has just been matched, and
139 YY_OBS_FINISH to end the string (it puts the ending 0).
140 YY_OBS_FINISH also stores this string in LAST_STRING, which can be
141 used, and which is used by YY_OBS_FREE to free the last string. */
44995b2e
AD
142
143static struct obstack string_obstack;
44995b2e 144
44995b2e
AD
145#define YY_OBS_GROW \
146 obstack_grow (&string_obstack, yytext, yyleng)
147
148#define YY_OBS_FINISH \
149 do { \
150 obstack_1grow (&string_obstack, '\0'); \
151 last_string = obstack_finish (&string_obstack); \
44995b2e
AD
152 } while (0)
153
a706a1cc
PE
154#define YY_OBS_FREE \
155 obstack_free (&string_obstack, last_string)
e9955c83 156
e9955c83 157
efcb44dd
PE
158/* Within well-formed rules, RULE_LENGTH is the number of values in
159 the current rule so far, which says where to find `$0' with respect
160 to the top of the stack. It is not the same as the rule->length in
161 the case of mid rule actions.
162
163 Outside of well-formed rules, RULE_LENGTH has an undefined value. */
164static int rule_length;
165
d33cb3ae
PE
166static void handle_dollar (braced_code_t code_kind,
167 char *cp, location_t location);
168static void handle_at (braced_code_t code_kind,
169 char *cp, location_t location);
900c5db5 170static void handle_syncline (char *args, location_t *location);
d8d3f94a 171static int convert_ucn_to_byte (char const *hex_text);
345532d7 172static void unexpected_end_of_file (location_t *, char const *);
e9955c83
AD
173
174%}
d8d3f94a 175%x SC_COMMENT SC_LINE_COMMENT SC_YACC_COMMENT
e9955c83
AD
176%x SC_STRING SC_CHARACTER
177%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
178%x SC_BRACED_CODE SC_PROLOGUE SC_EPILOGUE
179
29c01725
AD
180letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
181id {letter}({letter}|[0-9])*
182directive %{letter}({letter}|[0-9]|-)*
183int [0-9]+
d8d3f94a
PE
184
185/* POSIX says that a tag must be both an id and a C union member, but
186 historically almost any character is allowed in a tag. We disallow
187 NUL and newline, as this simplifies our implementation. */
188tag [^\0\n>]+
189
190/* Zero or more instances of backslash-newline. Following GCC, allow
191 white space between the backslash and the newline. */
192splice (\\[ \f\t\v]*\n)*
e9955c83
AD
193
194%%
195%{
a706a1cc 196 /* Nesting level of the current code in braces. */
1a9e39f1
PE
197 int braces_level IF_LINT (= 0);
198
a706a1cc
PE
199 /* Scanner context when scanning C code. */
200 int c_context IF_LINT (= 0);
201
202 /* A string representing the most recently saved token. */
203 char *last_string;
204
e9955c83
AD
205 /* At each yylex invocation, mark the current position as the
206 start of the next token. */
e9955c83 207 YY_STEP;
e9955c83
AD
208%}
209
210
211 /*----------------------------.
212 | Scanning Bison directives. |
213 `----------------------------*/
214<INITIAL>
215{
216 "%binary" return PERCENT_NONASSOC;
217 "%debug" return PERCENT_DEBUG;
218 "%define" return PERCENT_DEFINE;
219 "%defines" return PERCENT_DEFINES;
9280d3ef 220 "%destructor" return PERCENT_DESTRUCTOR;
676385e2 221 "%dprec" return PERCENT_DPREC;
e9955c83
AD
222 "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
223 "%expect" return PERCENT_EXPECT;
224 "%file-prefix" return PERCENT_FILE_PREFIX;
225 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
ae7453f2 226 "%glr-parser" return PERCENT_GLR_PARSER;
e9955c83
AD
227 "%left" return PERCENT_LEFT;
228 "%locations" return PERCENT_LOCATIONS;
676385e2 229 "%merge" return PERCENT_MERGE;
e9955c83
AD
230 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
231 "%no"[-_]"lines" return PERCENT_NO_LINES;
232 "%nonassoc" return PERCENT_NONASSOC;
233 "%nterm" return PERCENT_NTERM;
234 "%output" return PERCENT_OUTPUT;
ae7453f2 235 "%parse-param" return PERCENT_PARSE_PARAM;
d8d3f94a 236 "%prec" rule_length--; return PERCENT_PREC;
366eea36 237 "%printer" return PERCENT_PRINTER;
e9955c83
AD
238 "%pure"[-_]"parser" return PERCENT_PURE_PARSER;
239 "%right" return PERCENT_RIGHT;
ae7453f2 240 "%lex-param" return PERCENT_LEX_PARAM;
e9955c83
AD
241 "%skeleton" return PERCENT_SKELETON;
242 "%start" return PERCENT_START;
243 "%term" return PERCENT_TOKEN;
244 "%token" return PERCENT_TOKEN;
245 "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
246 "%type" return PERCENT_TYPE;
247 "%union" return PERCENT_UNION;
248 "%verbose" return PERCENT_VERBOSE;
249 "%yacc" return PERCENT_YACC;
250
29c01725
AD
251 {directive} {
252 complain_at (*yylloc, _("invalid directive: %s"), quote (yytext));
253 YY_STEP;
254 }
255
900c5db5
AD
256 ^"#line "{int}" \""[^\"]*"\"\n" handle_syncline (yytext + strlen ("#line "), yylloc); YY_STEP;
257
e9955c83 258 "=" return EQUAL;
d8d3f94a
PE
259 ":" rule_length = 0; return COLON;
260 "|" rule_length = 0; return PIPE;
ae7453f2 261 "," return COMMA;
e9955c83
AD
262 ";" return SEMICOLON;
263
a706a1cc 264 [ \f\n\t\v] YY_STEP;
d8d3f94a 265
e9955c83 266 {id} {
39f41916 267 yylval->symbol = symbol_get (yytext, *yylloc);
efcb44dd 268 rule_length++;
e9955c83
AD
269 return ID;
270 }
271
d8d3f94a
PE
272 {int} {
273 unsigned long num;
274 errno = 0;
275 num = strtoul (yytext, 0, 10);
276 if (INT_MAX < num || errno)
277 {
98f2caaa 278 complain_at (*yylloc, _("integer out of range: %s"), quote (yytext));
d8d3f94a
PE
279 num = INT_MAX;
280 }
281 yylval->integer = num;
282 return INT;
283 }
e9955c83
AD
284
285 /* Characters. We don't check there is only one. */
a706a1cc 286 "'" YY_OBS_GROW; BEGIN SC_ESCAPED_CHARACTER;
e9955c83
AD
287
288 /* Strings. */
a706a1cc 289 "\"" YY_OBS_GROW; BEGIN SC_ESCAPED_STRING;
e9955c83
AD
290
291 /* Comments. */
d8d3f94a 292 "/*" BEGIN SC_YACC_COMMENT;
e9955c83
AD
293 "//".* YY_STEP;
294
295 /* Prologue. */
a706a1cc 296 "%{" BEGIN SC_PROLOGUE;
e9955c83
AD
297
298 /* Code in between braces. */
a706a1cc 299 "{" YY_OBS_GROW; braces_level = 0; BEGIN SC_BRACED_CODE;
e9955c83
AD
300
301 /* A type. */
d8d3f94a 302 "<"{tag}">" {
4cdb01db
AD
303 obstack_grow (&string_obstack, yytext + 1, yyleng - 2);
304 YY_OBS_FINISH;
3e6656f9
AD
305 yylval->struniq = struniq_new (last_string);
306 YY_OBS_FREE;
4cdb01db
AD
307 return TYPE;
308 }
309
a706a1cc
PE
310 "%%" {
311 static int percent_percent_count;
e9955c83 312 if (++percent_percent_count == 2)
a706a1cc 313 BEGIN SC_EPILOGUE;
e9955c83
AD
314 return PERCENT_PERCENT;
315 }
316
a706a1cc 317 . {
c4d720cd 318 complain_at (*yylloc, _("invalid character: %s"), quote (yytext));
e9955c83
AD
319 YY_STEP;
320 }
321}
322
323
d8d3f94a
PE
324 /*---------------------------------------------------------------.
325 | Scanning a Yacc comment. The initial `/ *' is already eaten. |
326 `---------------------------------------------------------------*/
e9955c83 327
d8d3f94a 328<SC_YACC_COMMENT>
e9955c83 329{
d8d3f94a
PE
330 "*/" {
331 YY_STEP;
332 BEGIN INITIAL;
e9955c83
AD
333 }
334
a706a1cc 335 .|\n ;
345532d7 336 <<EOF>> unexpected_end_of_file (yylloc, "*/");
d8d3f94a
PE
337}
338
339
340 /*------------------------------------------------------------.
341 | Scanning a C comment. The initial `/ *' is already eaten. |
342 `------------------------------------------------------------*/
343
344<SC_COMMENT>
345{
a706a1cc 346 "*"{splice}"/" YY_OBS_GROW; BEGIN c_context;
345532d7 347 <<EOF>> unexpected_end_of_file (yylloc, "*/");
e9955c83
AD
348}
349
350
d8d3f94a
PE
351 /*--------------------------------------------------------------.
352 | Scanning a line comment. The initial `//' is already eaten. |
353 `--------------------------------------------------------------*/
354
355<SC_LINE_COMMENT>
356{
a706a1cc
PE
357 "\n" YY_OBS_GROW; BEGIN c_context;
358 {splice} YY_OBS_GROW;
359 <<EOF>> BEGIN c_context;
d8d3f94a
PE
360}
361
362
e9955c83
AD
363 /*----------------------------------------------------------------.
364 | Scanning a C string, including its escapes. The initial `"' is |
365 | already eaten. |
366 `----------------------------------------------------------------*/
367
368<SC_ESCAPED_STRING>
369{
db2cc12f 370 "\"" {
44995b2e
AD
371 YY_OBS_GROW;
372 YY_OBS_FINISH;
4cdb01db 373 yylval->string = last_string;
efcb44dd 374 rule_length++;
a706a1cc 375 BEGIN INITIAL;
e9955c83
AD
376 return STRING;
377 }
378
a706a1cc 379 .|\n YY_OBS_GROW;
345532d7 380 <<EOF>> unexpected_end_of_file (yylloc, "\"");
e9955c83
AD
381}
382
383 /*---------------------------------------------------------------.
384 | Scanning a C character, decoding its escapes. The initial "'" |
385 | is already eaten. |
386 `---------------------------------------------------------------*/
387
388<SC_ESCAPED_CHARACTER>
389{
db2cc12f 390 "'" {
44995b2e 391 YY_OBS_GROW;
44995b2e 392 YY_OBS_FINISH;
a706a1cc
PE
393 yylval->symbol = symbol_get (last_string, *yylloc);
394 symbol_class_set (yylval->symbol, token_sym, *yylloc);
395 symbol_user_token_number_set (yylval->symbol,
396 (unsigned char) last_string[1], *yylloc);
397 YY_OBS_FREE;
398 rule_length++;
399 BEGIN INITIAL;
400 return ID;
e9955c83 401 }
a706a1cc
PE
402
403 .|\n YY_OBS_GROW;
345532d7 404 <<EOF>> unexpected_end_of_file (yylloc, "'");
e9955c83
AD
405}
406
407
408 /*----------------------------.
409 | Decode escaped characters. |
410 `----------------------------*/
411
412<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
413{
d8d3f94a
PE
414 \\[0-7]{1,3} {
415 unsigned long c = strtoul (yytext + 1, 0, 8);
416 if (UCHAR_MAX < c)
e9955c83 417 {
98f2caaa
PE
418 complain_at (*yylloc, _("invalid escape sequence: %s"),
419 quote (yytext));
e9955c83
AD
420 YY_STEP;
421 }
422 else
423 obstack_1grow (&string_obstack, c);
424 }
425
d8d3f94a
PE
426 \\x[0-9a-fA-F]+ {
427 unsigned long c;
428 errno = 0;
429 c = strtoul (yytext + 2, 0, 16);
430 if (UCHAR_MAX < c || errno)
431 {
98f2caaa
PE
432 complain_at (*yylloc, _("invalid escape sequence: %s"),
433 quote (yytext));
d8d3f94a
PE
434 YY_STEP;
435 }
436 else
437 obstack_1grow (&string_obstack, c);
e9955c83
AD
438 }
439
440 \\a obstack_1grow (&string_obstack, '\a');
441 \\b obstack_1grow (&string_obstack, '\b');
442 \\f obstack_1grow (&string_obstack, '\f');
443 \\n obstack_1grow (&string_obstack, '\n');
444 \\r obstack_1grow (&string_obstack, '\r');
445 \\t obstack_1grow (&string_obstack, '\t');
446 \\v obstack_1grow (&string_obstack, '\v');
c4d720cd 447 \\[\"\'?\\] obstack_1grow (&string_obstack, yytext[1]);
d8d3f94a
PE
448 \\(u|U[0-9a-fA-F]{4})[0-9a-fA-F]{4} {
449 int c = convert_ucn_to_byte (yytext);
450 if (c < 0)
451 {
98f2caaa
PE
452 complain_at (*yylloc, _("invalid escape sequence: %s"),
453 quote (yytext));
d8d3f94a
PE
454 YY_STEP;
455 }
456 else
457 obstack_1grow (&string_obstack, c);
458 }
4f25ebb0 459 \\(.|\n) {
98f2caaa
PE
460 complain_at (*yylloc, _("unrecognized escape sequence: %s"),
461 quote (yytext));
44995b2e 462 YY_OBS_GROW;
e9955c83
AD
463 }
464}
465
466
467 /*----------------------------------------------------------.
468 | Scanning a C character without decoding its escapes. The |
469 | initial "'" is already eaten. |
470 `----------------------------------------------------------*/
471
472<SC_CHARACTER>
473{
ac060e78
PE
474 "'" YY_OBS_GROW; BEGIN c_context;
475 \\{splice}[\'\\] YY_OBS_GROW;
476 <<EOF>> unexpected_end_of_file (yylloc, "'");
e9955c83
AD
477}
478
479
480 /*----------------------------------------------------------------.
481 | Scanning a C string, without decoding its escapes. The initial |
482 | `"' is already eaten. |
483 `----------------------------------------------------------------*/
484
485<SC_STRING>
486{
ac060e78
PE
487 "\"" YY_OBS_GROW; BEGIN c_context;
488 \\{splice}[\"\\] YY_OBS_GROW;
489 <<EOF>> unexpected_end_of_file (yylloc, "\"");
e9955c83
AD
490}
491
492
493 /*---------------------------------------------------.
494 | Strings, comments etc. can be found in user code. |
495 `---------------------------------------------------*/
496
497<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
498{
a706a1cc
PE
499 "'" YY_OBS_GROW; c_context = YY_START; BEGIN SC_CHARACTER;
500 "\"" YY_OBS_GROW; c_context = YY_START; BEGIN SC_STRING;
501 "/"{splice}"*" YY_OBS_GROW; c_context = YY_START; BEGIN SC_COMMENT;
502 "/"{splice}"/" YY_OBS_GROW; c_context = YY_START; BEGIN SC_LINE_COMMENT;
e9955c83
AD
503}
504
505
506 /*---------------------------------------------------------------.
507 | Scanning some code in braces (%union and actions). The initial |
508 | "{" is already eaten. |
509 `---------------------------------------------------------------*/
510
511<SC_BRACED_CODE>
512{
1a9e39f1
PE
513 "{"|"<"{splice}"%" YY_OBS_GROW; braces_level++;
514 "%"{splice}">" YY_OBS_GROW; braces_level--;
e9955c83 515 "}" {
44995b2e 516 YY_OBS_GROW;
1a9e39f1
PE
517 braces_level--;
518 if (braces_level < 0)
e9955c83 519 {
44995b2e 520 YY_OBS_FINISH;
4cdb01db 521 yylval->string = last_string;
efcb44dd 522 rule_length++;
a706a1cc 523 BEGIN INITIAL;
e9955c83
AD
524 return BRACED_CODE;
525 }
526 }
527
a706a1cc
PE
528 /* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly
529 (as `<' `<%'). */
530 "<"{splice}"<" YY_OBS_GROW;
531
d8d3f94a 532 "$"("<"{tag}">")?(-?[0-9]+|"$") { handle_dollar (current_braced_code,
f25bfb75
AD
533 yytext, *yylloc); }
534 "@"(-?[0-9]+|"$") { handle_at (current_braced_code,
535 yytext, *yylloc); }
e9955c83 536
345532d7 537 <<EOF>> unexpected_end_of_file (yylloc, "}");
e9955c83
AD
538}
539
540
541 /*--------------------------------------------------------------.
542 | Scanning some prologue: from "%{" (already scanned) to "%}". |
543 `--------------------------------------------------------------*/
544
545<SC_PROLOGUE>
546{
547 "%}" {
44995b2e 548 YY_OBS_FINISH;
4cdb01db 549 yylval->string = last_string;
a706a1cc 550 BEGIN INITIAL;
e9955c83
AD
551 return PROLOGUE;
552 }
553
345532d7 554 <<EOF>> unexpected_end_of_file (yylloc, "%}");
e9955c83
AD
555}
556
557
558 /*---------------------------------------------------------------.
559 | Scanning the epilogue (everything after the second "%%", which |
d8d3f94a 560 | has already been eaten). |
e9955c83
AD
561 `---------------------------------------------------------------*/
562
563<SC_EPILOGUE>
564{
e9955c83 565 <<EOF>> {
44995b2e 566 YY_OBS_FINISH;
4cdb01db 567 yylval->string = last_string;
a706a1cc 568 BEGIN INITIAL;
e9955c83
AD
569 return EPILOGUE;
570 }
571}
572
573
a706a1cc
PE
574 /*----------------------------------------------------------------.
575 | By default, grow the string obstack with the input, escaping M4 |
576 | quoting characters. |
577 `----------------------------------------------------------------*/
578
579<SC_COMMENT,SC_LINE_COMMENT,SC_STRING,SC_CHARACTER,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
580{
ac060e78
PE
581 \$ obstack_sgrow (&string_obstack, "$][");
582 \@ obstack_sgrow (&string_obstack, "@@");
583 \[ obstack_sgrow (&string_obstack, "@{");
584 \] obstack_sgrow (&string_obstack, "@}");
a706a1cc
PE
585 .|\n YY_OBS_GROW;
586}
587
588
e9955c83
AD
589%%
590
591/*------------------------------------------------------------------.
366eea36 592| TEXT is pointing to a wannabee semantic value (i.e., a `$'). |
e9955c83
AD
593| |
594| Possible inputs: $[<TYPENAME>]($|integer) |
595| |
596| Output to the STRING_OBSTACK a reference to this semantic value. |
597`------------------------------------------------------------------*/
598
f25bfb75 599static inline void
366eea36 600handle_action_dollar (char *text, location_t location)
e9955c83
AD
601{
602 const char *type_name = NULL;
366eea36 603 char *cp = text + 1;
e9955c83
AD
604
605 /* Get the type name if explicit. */
606 if (*cp == '<')
607 {
608 type_name = ++cp;
609 while (*cp != '>')
610 ++cp;
611 *cp = '\0';
612 ++cp;
613 }
614
615 if (*cp == '$')
616 {
617 if (!type_name)
56c47203 618 type_name = symbol_list_n_type_name_get (current_rule, location, 0);
e9955c83 619 if (!type_name && typed)
56c47203 620 complain_at (location, _("$$ of `%s' has no declared type"),
97650f4e 621 current_rule->sym->tag);
e9955c83
AD
622 if (!type_name)
623 type_name = "";
624 obstack_fgrow1 (&string_obstack,
625 "]b4_lhs_value([%s])[", type_name);
626 }
d8d3f94a 627 else
e9955c83 628 {
d8d3f94a
PE
629 long num;
630 errno = 0;
631 num = strtol (cp, 0, 10);
e9955c83 632
d8d3f94a 633 if (INT_MIN <= num && num <= rule_length && ! errno)
e9955c83 634 {
d8d3f94a 635 int n = num;
e9955c83 636 if (!type_name && n > 0)
56c47203
AD
637 type_name = symbol_list_n_type_name_get (current_rule, location,
638 n);
e9955c83 639 if (!type_name && typed)
56c47203 640 complain_at (location, _("$%d of `%s' has no declared type"),
97650f4e 641 n, current_rule->sym->tag);
e9955c83
AD
642 if (!type_name)
643 type_name = "";
644 obstack_fgrow3 (&string_obstack,
645 "]b4_rhs_value([%d], [%d], [%s])[",
646 rule_length, n, type_name);
647 }
d8d3f94a 648 else
98f2caaa 649 complain_at (location, _("integer out of range: %s"), quote (text));
9280d3ef
AD
650 }
651}
652
653
366eea36 654/*---------------------------------------------------------------.
d8d3f94a 655| TEXT is expected to be $$ in some code associated to a symbol: |
366eea36
AD
656| destructor or printer. |
657`---------------------------------------------------------------*/
9280d3ef 658
f25bfb75 659static inline void
366eea36 660handle_symbol_code_dollar (char *text, location_t location)
9280d3ef 661{
366eea36 662 char *cp = text + 1;
9280d3ef 663 if (*cp == '$')
366eea36 664 obstack_sgrow (&string_obstack, "]b4_dollar_dollar[");
9280d3ef 665 else
c4d720cd 666 complain_at (location, _("invalid value: %s"), quote (text));
e9955c83
AD
667}
668
f25bfb75
AD
669
670/*-----------------------------------------------------------------.
671| Dispatch onto handle_action_dollar, or handle_destructor_dollar, |
672| depending upon CODE_KIND. |
673`-----------------------------------------------------------------*/
e9955c83
AD
674
675static void
f25bfb75
AD
676handle_dollar (braced_code_t braced_code_kind,
677 char *text, location_t location)
678{
679 switch (braced_code_kind)
680 {
681 case action_braced_code:
682 handle_action_dollar (text, location);
683 break;
684
685 case destructor_braced_code:
366eea36
AD
686 case printer_braced_code:
687 handle_symbol_code_dollar (text, location);
f25bfb75
AD
688 break;
689 }
690}
691
692
693/*------------------------------------------------------.
694| TEXT is a location token (i.e., a `@...'). Output to |
695| STRING_OBSTACK a reference to this location. |
696`------------------------------------------------------*/
697
698static inline void
699handle_action_at (char *text, location_t location)
e9955c83 700{
366eea36 701 char *cp = text + 1;
e9955c83 702 locations_flag = 1;
e9955c83 703
366eea36 704 if (*cp == '$')
e9955c83
AD
705 {
706 obstack_sgrow (&string_obstack, "]b4_lhs_location[");
707 }
d8d3f94a 708 else
e9955c83 709 {
d8d3f94a
PE
710 long num;
711 errno = 0;
712 num = strtol (cp, 0, 10);
dafdc66f 713
d8d3f94a
PE
714 if (INT_MIN <= num && num <= rule_length && ! errno)
715 {
716 int n = num;
717 obstack_fgrow2 (&string_obstack, "]b4_rhs_location([%d], [%d])[",
718 rule_length, n);
719 }
e9955c83 720 else
98f2caaa 721 complain_at (location, _("integer out of range: %s"), quote (text));
f25bfb75
AD
722 }
723}
724
725
366eea36 726/*---------------------------------------------------------------.
d8d3f94a 727| TEXT is expected to be @$ in some code associated to a symbol: |
366eea36
AD
728| destructor or printer. |
729`---------------------------------------------------------------*/
f25bfb75
AD
730
731static inline void
366eea36 732handle_symbol_code_at (char *text, location_t location)
f25bfb75 733{
366eea36
AD
734 char *cp = text + 1;
735 if (*cp == '$')
736 obstack_sgrow (&string_obstack, "]b4_at_dollar[");
f25bfb75 737 else
c4d720cd 738 complain_at (location, _("invalid value: %s"), quote (text));
e9955c83 739}
4cdb01db 740
f25bfb75
AD
741
742/*-------------------------------------------------------------------.
743| Dispatch onto handle_action_at, or handle_destructor_at, depending |
744| upon CODE_KIND. |
745`-------------------------------------------------------------------*/
746
747static void
748handle_at (braced_code_t braced_code_kind,
749 char *text, location_t location)
750{
751 switch (braced_code_kind)
752 {
753 case action_braced_code:
754 handle_action_at (text, location);
755 break;
756
757 case destructor_braced_code:
366eea36
AD
758 case printer_braced_code:
759 handle_symbol_code_at (text, location);
f25bfb75
AD
760 break;
761 }
762}
763
764
d8d3f94a
PE
765/*------------------------------------------------------------------.
766| Convert universal character name UCN to a single-byte character, |
767| and return that character. Return -1 if UCN does not correspond |
768| to a single-byte character. |
769`------------------------------------------------------------------*/
770
771static int
772convert_ucn_to_byte (char const *ucn)
773{
774 unsigned long code = strtoul (ucn + 2, 0, 16);
775
776 /* FIXME: Currently we assume Unicode-compatible unibyte characters
777 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
778 non-ASCII hosts we support only the portable C character set.
779 These limitations should be removed once we add support for
780 multibyte characters. */
781
782 if (UCHAR_MAX < code)
783 return -1;
784
785#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
786 {
787 /* A non-ASCII host. Use CODE to index into a table of the C
788 basic execution character set, which is guaranteed to exist on
789 all Standard C platforms. This table also includes '$', '@',
8e6ef483 790 and '`', which are not in the basic execution character set but
d8d3f94a
PE
791 which are unibyte characters on all the platforms that we know
792 about. */
793 static signed char const table[] =
794 {
795 '\0', -1, -1, -1, -1, -1, -1, '\a',
796 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
797 -1, -1, -1, -1, -1, -1, -1, -1,
798 -1, -1, -1, -1, -1, -1, -1, -1,
799 ' ', '!', '"', '#', '$', '%', '&', '\'',
800 '(', ')', '*', '+', ',', '-', '.', '/',
801 '0', '1', '2', '3', '4', '5', '6', '7',
802 '8', '9', ':', ';', '<', '=', '>', '?',
803 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
804 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
805 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
806 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
807 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
808 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
809 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
810 'x', 'y', 'z', '{', '|', '}', '~'
811 };
812
813 code = code < sizeof table ? table[code] : -1;
814 }
815#endif
c4d720cd 816
d8d3f94a
PE
817 return code;
818}
819
820
900c5db5
AD
821/*----------------------------------------------------------------.
822| Handle `#line INT "FILE"'. ARGS has already skipped `#line '. |
823`----------------------------------------------------------------*/
824
825static void
826handle_syncline (char *args, location_t *location)
827{
828 int lineno = strtol (args, &args, 10);
829 const char *file = NULL;
830 file = strchr (args, '"') + 1;
831 *strchr (file, '"') = 0;
95612cfa
AD
832 current_file = xstrdup (file);
833 location->file = current_file;
900c5db5
AD
834 location->last_line = lineno;
835}
836
a706a1cc
PE
837
838/*-------------------------------------------------------------.
839| Report an unexpected end of file at LOC. An end of file was |
840| encountered and the expected TOKEN_END was missing. After |
841| reporting the problem, pretend that TOKEN_END was found. |
842`-------------------------------------------------------------*/
843
844static void
345532d7 845unexpected_end_of_file (location_t *loc, char const *token_end)
a706a1cc 846{
345532d7 847 size_t i = strlen (token_end);
a706a1cc 848
345532d7
PE
849 complain_at (*loc, _("missing `%s' at end of file"), token_end);
850
851 /* Adjust location's last column so that any later message does not
852 mention the characters just inserted. */
853 loc->last_column -= i;
854
855 while (i != 0)
856 unput (token_end[--i]);
a706a1cc
PE
857}
858
859
f25bfb75
AD
860/*-------------------------.
861| Initialize the scanner. |
862`-------------------------*/
863
1d6412ad
AD
864void
865scanner_initialize (void)
866{
867 obstack_init (&string_obstack);
868}
869
870
f25bfb75
AD
871/*-----------------------------------------------.
872| Free all the memory allocated to the scanner. |
873`-----------------------------------------------*/
874
4cdb01db
AD
875void
876scanner_free (void)
877{
878 obstack_free (&string_obstack, 0);
536545f3
AD
879 /* Reclaim Flex's buffers. */
880 yy_delete_buffer (YY_CURRENT_BUFFER);
4cdb01db 881}