]> git.saurik.com Git - bison.git/blame - src/scan-gram.l
(struct state_list): Renamed from struct state_list_s.
[bison.git] / src / scan-gram.l
CommitLineData
e9955c83
AD
1/* Bison Grammar Scanner -*- C -*-
2 Copyright (C) 2002 Free Software Foundation, Inc.
3
4 This file is part of Bison, the GNU Compiler Compiler.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA
20*/
21
a706a1cc 22%option debug nodefault noyywrap never-interactive
e9955c83
AD
23%option prefix="gram_" outfile="lex.yy.c"
24
25%{
26#include "system.h"
d8d3f94a 27#include "mbswidth.h"
e9955c83 28#include "complain.h"
3f2d73f1 29#include "files.h"
e9955c83 30#include "quote.h"
3e6656f9 31#include "struniq.h"
e9955c83
AD
32#include "getargs.h"
33#include "gram.h"
34#include "reader.h"
35
3f2d73f1
PE
36#define YY_USER_INIT \
37 do \
38 { \
39 scanner_cursor.file = current_file; \
40 scanner_cursor.line = 1; \
41 scanner_cursor.column = 1; \
42 } \
43 while (0)
8efe435c 44
3f2d73f1
PE
45/* Location of scanner cursor. */
46boundary scanner_cursor;
41141c56 47
3f2d73f1
PE
48static void adjust_location (location_t *, char const *, size_t);
49#define YY_USER_ACTION adjust_location (loc, yytext, yyleng);
d8d3f94a 50
6c30d641 51static size_t no_cr_read (FILE *, char *, size_t);
d8d3f94a
PE
52#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
53
54
44995b2e
AD
55/* STRING_OBSTACK -- Used to store all the characters that we need to
56 keep (to construct ID, STRINGS etc.). Use the following macros to
57 use it.
58
41141c56
PE
59 Use STRING_GROW to append what has just been matched, and
60 STRING_FINISH to end the string (it puts the ending 0).
61 STRING_FINISH also stores this string in LAST_STRING, which can be
62 used, and which is used by STRING_FREE to free the last string. */
44995b2e
AD
63
64static struct obstack string_obstack;
44995b2e 65
7ec2d4cd
AD
66/* A string representing the most recently saved token. */
67static char *last_string;
68
69
41141c56 70#define STRING_GROW \
44995b2e
AD
71 obstack_grow (&string_obstack, yytext, yyleng)
72
41141c56 73#define STRING_FINISH \
44995b2e
AD
74 do { \
75 obstack_1grow (&string_obstack, '\0'); \
76 last_string = obstack_finish (&string_obstack); \
44995b2e
AD
77 } while (0)
78
41141c56 79#define STRING_FREE \
a706a1cc 80 obstack_free (&string_obstack, last_string)
e9955c83 81
7ec2d4cd
AD
82void
83scanner_last_string_free (void)
84{
41141c56 85 STRING_FREE;
7ec2d4cd 86}
e9955c83 87
efcb44dd
PE
88/* Within well-formed rules, RULE_LENGTH is the number of values in
89 the current rule so far, which says where to find `$0' with respect
90 to the top of the stack. It is not the same as the rule->length in
91 the case of mid rule actions.
92
93 Outside of well-formed rules, RULE_LENGTH has an undefined value. */
94static int rule_length;
95
d33cb3ae
PE
96static void handle_dollar (braced_code_t code_kind,
97 char *cp, location_t location);
98static void handle_at (braced_code_t code_kind,
99 char *cp, location_t location);
3f2d73f1 100static void handle_syncline (char *args);
d8d3f94a 101static int convert_ucn_to_byte (char const *hex_text);
3f2d73f1 102static void unexpected_end_of_file (boundary, char const *);
e9955c83
AD
103
104%}
d8d3f94a 105%x SC_COMMENT SC_LINE_COMMENT SC_YACC_COMMENT
e9955c83 106%x SC_STRING SC_CHARACTER
3f2d73f1 107%x SC_AFTER_IDENTIFIER
e9955c83
AD
108%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
109%x SC_BRACED_CODE SC_PROLOGUE SC_EPILOGUE
110
29c01725
AD
111letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
112id {letter}({letter}|[0-9])*
113directive %{letter}({letter}|[0-9]|-)*
114int [0-9]+
d8d3f94a
PE
115
116/* POSIX says that a tag must be both an id and a C union member, but
117 historically almost any character is allowed in a tag. We disallow
118 NUL and newline, as this simplifies our implementation. */
119tag [^\0\n>]+
120
121/* Zero or more instances of backslash-newline. Following GCC, allow
122 white space between the backslash and the newline. */
123splice (\\[ \f\t\v]*\n)*
e9955c83
AD
124
125%%
126%{
a706a1cc 127 /* Nesting level of the current code in braces. */
1a9e39f1
PE
128 int braces_level IF_LINT (= 0);
129
3f2d73f1
PE
130 /* Parent context state, when applicable. */
131 int context_state IF_LINT (= 0);
a706a1cc 132
3f2d73f1
PE
133 /* Location of most recent identifier, when applicable. */
134 location_t id_loc IF_LINT (= *loc);
135
136 /* Location where containing code started, when applicable. */
137 boundary code_start IF_LINT (= loc->start);
138
139 /* Location where containing comment or string or character literal
140 started, when applicable. */
141 boundary token_start IF_LINT (= loc->start);
e9955c83
AD
142%}
143
144
3f2d73f1
PE
145 /*-----------------------.
146 | Scanning white space. |
147 `-----------------------*/
148
149<INITIAL,SC_AFTER_IDENTIFIER>
150{
151 [ \f\n\t\v] ;
152
153 /* Comments. */
154 "/*" token_start = loc->start; context_state = YY_START; BEGIN SC_YACC_COMMENT;
155 "//".* ;
156
157 /* #line directives are not documented, and may be withdrawn or
158 modified in future versions of Bison. */
159 ^"#line "{int}" \"".*"\"\n" {
160 handle_syncline (yytext + sizeof "#line " - 1);
161 }
162}
163
164
e9955c83
AD
165 /*----------------------------.
166 | Scanning Bison directives. |
167 `----------------------------*/
168<INITIAL>
169{
170 "%binary" return PERCENT_NONASSOC;
171 "%debug" return PERCENT_DEBUG;
172 "%define" return PERCENT_DEFINE;
173 "%defines" return PERCENT_DEFINES;
9280d3ef 174 "%destructor" return PERCENT_DESTRUCTOR;
676385e2 175 "%dprec" return PERCENT_DPREC;
e9955c83
AD
176 "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
177 "%expect" return PERCENT_EXPECT;
178 "%file-prefix" return PERCENT_FILE_PREFIX;
179 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
ae7453f2 180 "%glr-parser" return PERCENT_GLR_PARSER;
e9955c83
AD
181 "%left" return PERCENT_LEFT;
182 "%locations" return PERCENT_LOCATIONS;
676385e2 183 "%merge" return PERCENT_MERGE;
e9955c83
AD
184 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
185 "%no"[-_]"lines" return PERCENT_NO_LINES;
186 "%nonassoc" return PERCENT_NONASSOC;
187 "%nterm" return PERCENT_NTERM;
188 "%output" return PERCENT_OUTPUT;
ae7453f2 189 "%parse-param" return PERCENT_PARSE_PARAM;
d8d3f94a 190 "%prec" rule_length--; return PERCENT_PREC;
366eea36 191 "%printer" return PERCENT_PRINTER;
e9955c83
AD
192 "%pure"[-_]"parser" return PERCENT_PURE_PARSER;
193 "%right" return PERCENT_RIGHT;
ae7453f2 194 "%lex-param" return PERCENT_LEX_PARAM;
e9955c83
AD
195 "%skeleton" return PERCENT_SKELETON;
196 "%start" return PERCENT_START;
197 "%term" return PERCENT_TOKEN;
198 "%token" return PERCENT_TOKEN;
199 "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
200 "%type" return PERCENT_TYPE;
201 "%union" return PERCENT_UNION;
202 "%verbose" return PERCENT_VERBOSE;
203 "%yacc" return PERCENT_YACC;
204
3f2d73f1 205 {directive} {
41141c56 206 complain_at (*loc, _("invalid directive: %s"), quote (yytext));
412f8a59 207 }
900c5db5 208
e9955c83 209 "=" return EQUAL;
d8d3f94a 210 "|" rule_length = 0; return PIPE;
e9955c83
AD
211 ";" return SEMICOLON;
212
763ed7a6
PE
213 "," {
214 warn_at (*loc, _("stray `,' treated as white space"));
763ed7a6
PE
215 }
216
3f2d73f1 217 {id} {
41141c56 218 val->symbol = symbol_get (yytext, *loc);
3f2d73f1 219 id_loc = *loc;
efcb44dd 220 rule_length++;
3f2d73f1 221 BEGIN SC_AFTER_IDENTIFIER;
e9955c83
AD
222 }
223
d8d3f94a
PE
224 {int} {
225 unsigned long num;
226 errno = 0;
227 num = strtoul (yytext, 0, 10);
228 if (INT_MAX < num || errno)
229 {
41141c56 230 complain_at (*loc, _("integer out of range: %s"), quote (yytext));
d8d3f94a
PE
231 num = INT_MAX;
232 }
41141c56 233 val->integer = num;
d8d3f94a
PE
234 return INT;
235 }
e9955c83
AD
236
237 /* Characters. We don't check there is only one. */
3f2d73f1 238 "'" STRING_GROW; token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
e9955c83
AD
239
240 /* Strings. */
3f2d73f1 241 "\"" STRING_GROW; token_start = loc->start; BEGIN SC_ESCAPED_STRING;
e9955c83
AD
242
243 /* Prologue. */
3f2d73f1 244 "%{" code_start = loc->start; BEGIN SC_PROLOGUE;
e9955c83
AD
245
246 /* Code in between braces. */
3f2d73f1
PE
247 "{" {
248 STRING_GROW;
249 braces_level = 0;
250 code_start = loc->start;
251 BEGIN SC_BRACED_CODE;
252 }
e9955c83
AD
253
254 /* A type. */
d8d3f94a 255 "<"{tag}">" {
4cdb01db 256 obstack_grow (&string_obstack, yytext + 1, yyleng - 2);
41141c56
PE
257 STRING_FINISH;
258 val->struniq = struniq_new (last_string);
259 STRING_FREE;
4cdb01db
AD
260 return TYPE;
261 }
262
a706a1cc
PE
263 "%%" {
264 static int percent_percent_count;
e9955c83 265 if (++percent_percent_count == 2)
3f2d73f1
PE
266 {
267 code_start = loc->start;
268 BEGIN SC_EPILOGUE;
269 }
e9955c83
AD
270 return PERCENT_PERCENT;
271 }
272
a706a1cc 273 . {
41141c56 274 complain_at (*loc, _("invalid character: %s"), quote (yytext));
3f2d73f1
PE
275 }
276}
277
278
279 /*-----------------------------------------------------------------.
280 | Scanning after an identifier, checking whether a colon is next. |
281 `-----------------------------------------------------------------*/
282
283<SC_AFTER_IDENTIFIER>
284{
285 ":" {
286 rule_length = 0;
287 *loc = id_loc;
288 BEGIN INITIAL;
289 return ID_COLON;
290 }
291 . {
292 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
293 yyless (0);
294 *loc = id_loc;
295 BEGIN INITIAL;
296 return ID;
297 }
298 <<EOF>> {
299 *loc = id_loc;
300 BEGIN INITIAL;
301 return ID;
e9955c83
AD
302 }
303}
304
305
d8d3f94a
PE
306 /*---------------------------------------------------------------.
307 | Scanning a Yacc comment. The initial `/ *' is already eaten. |
308 `---------------------------------------------------------------*/
e9955c83 309
d8d3f94a 310<SC_YACC_COMMENT>
e9955c83 311{
3f2d73f1 312 "*/" BEGIN context_state;
a706a1cc 313 .|\n ;
3f2d73f1 314 <<EOF>> unexpected_end_of_file (token_start, "*/");
d8d3f94a
PE
315}
316
317
318 /*------------------------------------------------------------.
319 | Scanning a C comment. The initial `/ *' is already eaten. |
320 `------------------------------------------------------------*/
321
322<SC_COMMENT>
323{
3f2d73f1
PE
324 "*"{splice}"/" STRING_GROW; BEGIN context_state;
325 <<EOF>> unexpected_end_of_file (token_start, "*/");
e9955c83
AD
326}
327
328
d8d3f94a
PE
329 /*--------------------------------------------------------------.
330 | Scanning a line comment. The initial `//' is already eaten. |
331 `--------------------------------------------------------------*/
332
333<SC_LINE_COMMENT>
334{
3f2d73f1 335 "\n" STRING_GROW; BEGIN context_state;
41141c56 336 {splice} STRING_GROW;
3f2d73f1 337 <<EOF>> BEGIN context_state;
d8d3f94a
PE
338}
339
340
e9955c83
AD
341 /*----------------------------------------------------------------.
342 | Scanning a C string, including its escapes. The initial `"' is |
343 | already eaten. |
344 `----------------------------------------------------------------*/
345
346<SC_ESCAPED_STRING>
347{
db2cc12f 348 "\"" {
41141c56
PE
349 STRING_GROW;
350 STRING_FINISH;
3f2d73f1 351 loc->start = token_start;
41141c56 352 val->string = last_string;
efcb44dd 353 rule_length++;
a706a1cc 354 BEGIN INITIAL;
e9955c83
AD
355 return STRING;
356 }
357
41141c56 358 .|\n STRING_GROW;
3f2d73f1 359 <<EOF>> unexpected_end_of_file (token_start, "\"");
e9955c83
AD
360}
361
362 /*---------------------------------------------------------------.
363 | Scanning a C character, decoding its escapes. The initial "'" |
364 | is already eaten. |
365 `---------------------------------------------------------------*/
366
367<SC_ESCAPED_CHARACTER>
368{
db2cc12f 369 "'" {
41141c56
PE
370 STRING_GROW;
371 STRING_FINISH;
3f2d73f1 372 loc->start = token_start;
41141c56
PE
373 val->symbol = symbol_get (last_string, *loc);
374 symbol_class_set (val->symbol, token_sym, *loc);
375 symbol_user_token_number_set (val->symbol,
376 (unsigned char) last_string[1], *loc);
377 STRING_FREE;
a706a1cc
PE
378 rule_length++;
379 BEGIN INITIAL;
380 return ID;
e9955c83 381 }
a706a1cc 382
41141c56 383 .|\n STRING_GROW;
3f2d73f1 384 <<EOF>> unexpected_end_of_file (token_start, "'");
e9955c83
AD
385}
386
387
388 /*----------------------------.
389 | Decode escaped characters. |
390 `----------------------------*/
391
392<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
393{
d8d3f94a
PE
394 \\[0-7]{1,3} {
395 unsigned long c = strtoul (yytext + 1, 0, 8);
396 if (UCHAR_MAX < c)
3f2d73f1 397 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
e9955c83
AD
398 else
399 obstack_1grow (&string_obstack, c);
400 }
401
6b0d38ab 402 \\x[0-9abcdefABCDEF]+ {
d8d3f94a
PE
403 unsigned long c;
404 errno = 0;
405 c = strtoul (yytext + 2, 0, 16);
406 if (UCHAR_MAX < c || errno)
3f2d73f1 407 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
d8d3f94a
PE
408 else
409 obstack_1grow (&string_obstack, c);
e9955c83
AD
410 }
411
412 \\a obstack_1grow (&string_obstack, '\a');
413 \\b obstack_1grow (&string_obstack, '\b');
414 \\f obstack_1grow (&string_obstack, '\f');
415 \\n obstack_1grow (&string_obstack, '\n');
416 \\r obstack_1grow (&string_obstack, '\r');
417 \\t obstack_1grow (&string_obstack, '\t');
418 \\v obstack_1grow (&string_obstack, '\v');
412f8a59
PE
419
420 /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
421 \\("\""|"'"|"?"|"\\") obstack_1grow (&string_obstack, yytext[1]);
422
6b0d38ab 423 \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
d8d3f94a
PE
424 int c = convert_ucn_to_byte (yytext);
425 if (c < 0)
3f2d73f1 426 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
d8d3f94a
PE
427 else
428 obstack_1grow (&string_obstack, c);
429 }
4f25ebb0 430 \\(.|\n) {
3f2d73f1 431 complain_at (*loc, _("unrecognized escape sequence: %s"), quote (yytext));
41141c56 432 STRING_GROW;
e9955c83
AD
433 }
434}
435
436
437 /*----------------------------------------------------------.
438 | Scanning a C character without decoding its escapes. The |
439 | initial "'" is already eaten. |
440 `----------------------------------------------------------*/
441
442<SC_CHARACTER>
443{
3f2d73f1 444 "'" STRING_GROW; BEGIN context_state;
41141c56 445 \\{splice}[^$@\[\]] STRING_GROW;
3f2d73f1 446 <<EOF>> unexpected_end_of_file (token_start, "'");
e9955c83
AD
447}
448
449
450 /*----------------------------------------------------------------.
451 | Scanning a C string, without decoding its escapes. The initial |
452 | `"' is already eaten. |
453 `----------------------------------------------------------------*/
454
455<SC_STRING>
456{
3f2d73f1 457 "\"" STRING_GROW; BEGIN context_state;
41141c56 458 \\{splice}[^$@\[\]] STRING_GROW;
3f2d73f1 459 <<EOF>> unexpected_end_of_file (token_start, "\"");
e9955c83
AD
460}
461
462
463 /*---------------------------------------------------.
464 | Strings, comments etc. can be found in user code. |
465 `---------------------------------------------------*/
466
467<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
468{
3f2d73f1
PE
469 "'" {
470 STRING_GROW;
471 context_state = YY_START;
472 token_start = loc->start;
473 BEGIN SC_CHARACTER;
474 }
475 "\"" {
476 STRING_GROW;
477 context_state = YY_START;
478 token_start = loc->start;
479 BEGIN SC_STRING;
480 }
481 "/"{splice}"*" {
482 STRING_GROW;
483 context_state = YY_START;
484 token_start = loc->start;
485 BEGIN SC_COMMENT;
486 }
487 "/"{splice}"/" {
488 STRING_GROW;
489 context_state = YY_START;
490 BEGIN SC_LINE_COMMENT;
491 }
e9955c83
AD
492}
493
494
495 /*---------------------------------------------------------------.
496 | Scanning some code in braces (%union and actions). The initial |
497 | "{" is already eaten. |
498 `---------------------------------------------------------------*/
499
500<SC_BRACED_CODE>
501{
41141c56
PE
502 "{"|"<"{splice}"%" STRING_GROW; braces_level++;
503 "%"{splice}">" STRING_GROW; braces_level--;
e9955c83 504 "}" {
41141c56 505 STRING_GROW;
1a9e39f1
PE
506 braces_level--;
507 if (braces_level < 0)
e9955c83 508 {
41141c56 509 STRING_FINISH;
3f2d73f1 510 loc->start = code_start;
41141c56 511 val->string = last_string;
efcb44dd 512 rule_length++;
a706a1cc 513 BEGIN INITIAL;
e9955c83
AD
514 return BRACED_CODE;
515 }
516 }
517
a706a1cc
PE
518 /* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly
519 (as `<' `<%'). */
41141c56 520 "<"{splice}"<" STRING_GROW;
a706a1cc 521
d8d3f94a 522 "$"("<"{tag}">")?(-?[0-9]+|"$") { handle_dollar (current_braced_code,
41141c56 523 yytext, *loc); }
f25bfb75 524 "@"(-?[0-9]+|"$") { handle_at (current_braced_code,
41141c56 525 yytext, *loc); }
e9955c83 526
3f2d73f1 527 <<EOF>> unexpected_end_of_file (code_start, "}");
e9955c83
AD
528}
529
530
531 /*--------------------------------------------------------------.
532 | Scanning some prologue: from "%{" (already scanned) to "%}". |
533 `--------------------------------------------------------------*/
534
535<SC_PROLOGUE>
536{
537 "%}" {
41141c56 538 STRING_FINISH;
3f2d73f1 539 loc->start = code_start;
41141c56 540 val->string = last_string;
a706a1cc 541 BEGIN INITIAL;
e9955c83
AD
542 return PROLOGUE;
543 }
544
3f2d73f1 545 <<EOF>> unexpected_end_of_file (code_start, "%}");
e9955c83
AD
546}
547
548
549 /*---------------------------------------------------------------.
550 | Scanning the epilogue (everything after the second "%%", which |
d8d3f94a 551 | has already been eaten). |
e9955c83
AD
552 `---------------------------------------------------------------*/
553
554<SC_EPILOGUE>
555{
e9955c83 556 <<EOF>> {
41141c56 557 STRING_FINISH;
3f2d73f1 558 loc->start = code_start;
41141c56 559 val->string = last_string;
a706a1cc 560 BEGIN INITIAL;
e9955c83
AD
561 return EPILOGUE;
562 }
563}
564
565
a706a1cc
PE
566 /*----------------------------------------------------------------.
567 | By default, grow the string obstack with the input, escaping M4 |
568 | quoting characters. |
569 `----------------------------------------------------------------*/
570
571<SC_COMMENT,SC_LINE_COMMENT,SC_STRING,SC_CHARACTER,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
572{
ac060e78
PE
573 \$ obstack_sgrow (&string_obstack, "$][");
574 \@ obstack_sgrow (&string_obstack, "@@");
575 \[ obstack_sgrow (&string_obstack, "@{");
576 \] obstack_sgrow (&string_obstack, "@}");
41141c56 577 .|\n STRING_GROW;
a706a1cc
PE
578}
579
580
e9955c83
AD
581%%
582
3f2d73f1
PE
583/* Set *LOC and adjust scanner cursor to account for token TOKEN of
584 size SIZE. */
6c30d641
PE
585
586static void
3f2d73f1 587adjust_location (location_t *loc, char const *token, size_t size)
6c30d641 588{
3f2d73f1
PE
589 int line = scanner_cursor.line;
590 int column = scanner_cursor.column;
6c30d641
PE
591 char const *p0 = token;
592 char const *p = token;
593 char const *lim = token + size;
594
3f2d73f1
PE
595 loc->start = scanner_cursor;
596
6c30d641
PE
597 for (p = token; p < lim; p++)
598 switch (*p)
599 {
6c30d641
PE
600 case '\n':
601 line++;
602 column = 1;
603 p0 = p + 1;
604 break;
605
606 case '\t':
607 column += mbsnwidth (p0, p - p0, 0);
608 column += 8 - ((column - 1) & 7);
609 p0 = p + 1;
610 break;
611 }
612
3f2d73f1
PE
613 scanner_cursor.line = line;
614 scanner_cursor.column = column + mbsnwidth (p0, p - p0, 0);
615
616 loc->end = scanner_cursor;
6c30d641
PE
617}
618
619
620/* Read bytes from FP into buffer BUF of size SIZE. Return the
621 number of bytes read. Remove '\r' from input, treating \r\n
622 and isolated \r as \n. */
623
624static size_t
625no_cr_read (FILE *fp, char *buf, size_t size)
626{
627 size_t s = fread (buf, 1, size, fp);
628 if (s)
629 {
630 char *w = memchr (buf, '\r', s);
631 if (w)
632 {
633 char const *r = ++w;
634 char const *lim = buf + s;
635
636 for (;;)
637 {
638 /* Found an '\r'. Treat it like '\n', but ignore any
639 '\n' that immediately follows. */
640 w[-1] = '\n';
641 if (r == lim)
642 {
643 int ch = getc (fp);
644 if (ch != '\n' && ungetc (ch, fp) != ch)
645 break;
646 }
647 else if (*r == '\n')
648 r++;
649
650 /* Copy until the next '\r'. */
651 do
652 {
653 if (r == lim)
654 return w - buf;
655 }
656 while ((*w++ = *r++) != '\r');
657 }
658
659 return w - buf;
660 }
661 }
662
663 return s;
664}
665
666
e9955c83 667/*------------------------------------------------------------------.
366eea36 668| TEXT is pointing to a wannabee semantic value (i.e., a `$'). |
e9955c83
AD
669| |
670| Possible inputs: $[<TYPENAME>]($|integer) |
671| |
672| Output to the STRING_OBSTACK a reference to this semantic value. |
673`------------------------------------------------------------------*/
674
f25bfb75 675static inline void
366eea36 676handle_action_dollar (char *text, location_t location)
e9955c83
AD
677{
678 const char *type_name = NULL;
366eea36 679 char *cp = text + 1;
e9955c83
AD
680
681 /* Get the type name if explicit. */
682 if (*cp == '<')
683 {
684 type_name = ++cp;
685 while (*cp != '>')
686 ++cp;
687 *cp = '\0';
688 ++cp;
689 }
690
691 if (*cp == '$')
692 {
693 if (!type_name)
56c47203 694 type_name = symbol_list_n_type_name_get (current_rule, location, 0);
e9955c83 695 if (!type_name && typed)
56c47203 696 complain_at (location, _("$$ of `%s' has no declared type"),
97650f4e 697 current_rule->sym->tag);
e9955c83
AD
698 if (!type_name)
699 type_name = "";
700 obstack_fgrow1 (&string_obstack,
701 "]b4_lhs_value([%s])[", type_name);
702 }
d8d3f94a 703 else
e9955c83 704 {
d8d3f94a
PE
705 long num;
706 errno = 0;
707 num = strtol (cp, 0, 10);
e9955c83 708
d8d3f94a 709 if (INT_MIN <= num && num <= rule_length && ! errno)
e9955c83 710 {
d8d3f94a 711 int n = num;
e9955c83 712 if (!type_name && n > 0)
56c47203
AD
713 type_name = symbol_list_n_type_name_get (current_rule, location,
714 n);
e9955c83 715 if (!type_name && typed)
56c47203 716 complain_at (location, _("$%d of `%s' has no declared type"),
97650f4e 717 n, current_rule->sym->tag);
e9955c83
AD
718 if (!type_name)
719 type_name = "";
720 obstack_fgrow3 (&string_obstack,
721 "]b4_rhs_value([%d], [%d], [%s])[",
722 rule_length, n, type_name);
723 }
d8d3f94a 724 else
98f2caaa 725 complain_at (location, _("integer out of range: %s"), quote (text));
9280d3ef
AD
726 }
727}
728
729
366eea36 730/*---------------------------------------------------------------.
d8d3f94a 731| TEXT is expected to be $$ in some code associated to a symbol: |
366eea36
AD
732| destructor or printer. |
733`---------------------------------------------------------------*/
9280d3ef 734
f25bfb75 735static inline void
366eea36 736handle_symbol_code_dollar (char *text, location_t location)
9280d3ef 737{
366eea36 738 char *cp = text + 1;
9280d3ef 739 if (*cp == '$')
366eea36 740 obstack_sgrow (&string_obstack, "]b4_dollar_dollar[");
9280d3ef 741 else
c4d720cd 742 complain_at (location, _("invalid value: %s"), quote (text));
e9955c83
AD
743}
744
f25bfb75
AD
745
746/*-----------------------------------------------------------------.
747| Dispatch onto handle_action_dollar, or handle_destructor_dollar, |
748| depending upon CODE_KIND. |
749`-----------------------------------------------------------------*/
e9955c83
AD
750
751static void
f25bfb75
AD
752handle_dollar (braced_code_t braced_code_kind,
753 char *text, location_t location)
754{
755 switch (braced_code_kind)
756 {
757 case action_braced_code:
758 handle_action_dollar (text, location);
759 break;
760
761 case destructor_braced_code:
366eea36
AD
762 case printer_braced_code:
763 handle_symbol_code_dollar (text, location);
f25bfb75
AD
764 break;
765 }
766}
767
768
769/*------------------------------------------------------.
770| TEXT is a location token (i.e., a `@...'). Output to |
771| STRING_OBSTACK a reference to this location. |
772`------------------------------------------------------*/
773
774static inline void
775handle_action_at (char *text, location_t location)
e9955c83 776{
366eea36 777 char *cp = text + 1;
e9955c83 778 locations_flag = 1;
e9955c83 779
366eea36 780 if (*cp == '$')
e9955c83
AD
781 {
782 obstack_sgrow (&string_obstack, "]b4_lhs_location[");
783 }
d8d3f94a 784 else
e9955c83 785 {
d8d3f94a
PE
786 long num;
787 errno = 0;
788 num = strtol (cp, 0, 10);
dafdc66f 789
d8d3f94a
PE
790 if (INT_MIN <= num && num <= rule_length && ! errno)
791 {
792 int n = num;
793 obstack_fgrow2 (&string_obstack, "]b4_rhs_location([%d], [%d])[",
794 rule_length, n);
795 }
e9955c83 796 else
98f2caaa 797 complain_at (location, _("integer out of range: %s"), quote (text));
f25bfb75
AD
798 }
799}
800
801
366eea36 802/*---------------------------------------------------------------.
d8d3f94a 803| TEXT is expected to be @$ in some code associated to a symbol: |
366eea36
AD
804| destructor or printer. |
805`---------------------------------------------------------------*/
f25bfb75
AD
806
807static inline void
366eea36 808handle_symbol_code_at (char *text, location_t location)
f25bfb75 809{
366eea36
AD
810 char *cp = text + 1;
811 if (*cp == '$')
812 obstack_sgrow (&string_obstack, "]b4_at_dollar[");
f25bfb75 813 else
c4d720cd 814 complain_at (location, _("invalid value: %s"), quote (text));
e9955c83 815}
4cdb01db 816
f25bfb75
AD
817
818/*-------------------------------------------------------------------.
819| Dispatch onto handle_action_at, or handle_destructor_at, depending |
820| upon CODE_KIND. |
821`-------------------------------------------------------------------*/
822
823static void
824handle_at (braced_code_t braced_code_kind,
825 char *text, location_t location)
826{
827 switch (braced_code_kind)
828 {
829 case action_braced_code:
830 handle_action_at (text, location);
831 break;
832
833 case destructor_braced_code:
366eea36
AD
834 case printer_braced_code:
835 handle_symbol_code_at (text, location);
f25bfb75
AD
836 break;
837 }
838}
839
840
d8d3f94a
PE
841/*------------------------------------------------------------------.
842| Convert universal character name UCN to a single-byte character, |
843| and return that character. Return -1 if UCN does not correspond |
844| to a single-byte character. |
845`------------------------------------------------------------------*/
846
847static int
848convert_ucn_to_byte (char const *ucn)
849{
850 unsigned long code = strtoul (ucn + 2, 0, 16);
851
852 /* FIXME: Currently we assume Unicode-compatible unibyte characters
853 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
854 non-ASCII hosts we support only the portable C character set.
855 These limitations should be removed once we add support for
856 multibyte characters. */
857
858 if (UCHAR_MAX < code)
859 return -1;
860
861#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
862 {
863 /* A non-ASCII host. Use CODE to index into a table of the C
864 basic execution character set, which is guaranteed to exist on
865 all Standard C platforms. This table also includes '$', '@',
8e6ef483 866 and '`', which are not in the basic execution character set but
d8d3f94a
PE
867 which are unibyte characters on all the platforms that we know
868 about. */
869 static signed char const table[] =
870 {
871 '\0', -1, -1, -1, -1, -1, -1, '\a',
872 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
873 -1, -1, -1, -1, -1, -1, -1, -1,
874 -1, -1, -1, -1, -1, -1, -1, -1,
875 ' ', '!', '"', '#', '$', '%', '&', '\'',
876 '(', ')', '*', '+', ',', '-', '.', '/',
877 '0', '1', '2', '3', '4', '5', '6', '7',
878 '8', '9', ':', ';', '<', '=', '>', '?',
879 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
880 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
881 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
882 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
883 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
884 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
885 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
886 'x', 'y', 'z', '{', '|', '}', '~'
887 };
888
889 code = code < sizeof table ? table[code] : -1;
890 }
891#endif
c4d720cd 892
d8d3f94a
PE
893 return code;
894}
895
896
900c5db5
AD
897/*----------------------------------------------------------------.
898| Handle `#line INT "FILE"'. ARGS has already skipped `#line '. |
899`----------------------------------------------------------------*/
900
901static void
3f2d73f1 902handle_syncline (char *args)
900c5db5
AD
903{
904 int lineno = strtol (args, &args, 10);
905 const char *file = NULL;
906 file = strchr (args, '"') + 1;
907 *strchr (file, '"') = 0;
3f2d73f1
PE
908 scanner_cursor.file = current_file = xstrdup (file);
909 scanner_cursor.line = lineno;
910 scanner_cursor.column = 1;
900c5db5
AD
911}
912
a706a1cc 913
3f2d73f1
PE
914/*------------------------------------------------------------------------.
915| Report an unexpected EOF in a token or comment starting at START. |
916| An end of file was encountered and the expected TOKEN_END was missing. |
917| After reporting the problem, pretend that TOKEN_END was found. |
918`------------------------------------------------------------------------*/
a706a1cc
PE
919
920static void
3f2d73f1 921unexpected_end_of_file (boundary start, char const *token_end)
a706a1cc 922{
345532d7 923 size_t i = strlen (token_end);
a706a1cc 924
3f2d73f1
PE
925 location_t location;
926 location.start = start;
927 location.end = scanner_cursor;
928 complain_at (location, _("missing `%s' at end of file"), token_end);
345532d7 929
3f2d73f1
PE
930 /* Adjust scanner cursor so that any later message does not count
931 the characters about to be inserted. */
932 scanner_cursor.column -= i;
345532d7
PE
933
934 while (i != 0)
935 unput (token_end[--i]);
a706a1cc
PE
936}
937
938
f25bfb75
AD
939/*-------------------------.
940| Initialize the scanner. |
941`-------------------------*/
942
1d6412ad
AD
943void
944scanner_initialize (void)
945{
946 obstack_init (&string_obstack);
947}
948
949
f25bfb75
AD
950/*-----------------------------------------------.
951| Free all the memory allocated to the scanner. |
952`-----------------------------------------------*/
953
4cdb01db
AD
954void
955scanner_free (void)
956{
957 obstack_free (&string_obstack, 0);
536545f3
AD
958 /* Reclaim Flex's buffers. */
959 yy_delete_buffer (YY_CURRENT_BUFFER);
4cdb01db 960}