]> git.saurik.com Git - bison.git/blame - src/scan-gram.l
Do not include <errno.h>.
[bison.git] / src / scan-gram.l
CommitLineData
e9955c83
AD
1/* Bison Grammar Scanner -*- C -*-
2 Copyright (C) 2002 Free Software Foundation, Inc.
3
4 This file is part of Bison, the GNU Compiler Compiler.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA
20*/
21
a706a1cc 22%option debug nodefault noyywrap never-interactive
e9955c83
AD
23%option prefix="gram_" outfile="lex.yy.c"
24
25%{
26#include "system.h"
223ff46e
PE
27
28#include <mbswidth.h>
29#include <get-errno.h>
30#include <quote.h>
31
e9955c83 32#include "complain.h"
3f2d73f1 33#include "files.h"
e9955c83
AD
34#include "getargs.h"
35#include "gram.h"
36#include "reader.h"
223ff46e 37#include "uniqstr.h"
e9955c83 38
3f2d73f1
PE
39#define YY_USER_INIT \
40 do \
41 { \
42 scanner_cursor.file = current_file; \
43 scanner_cursor.line = 1; \
44 scanner_cursor.column = 1; \
45 } \
46 while (0)
8efe435c 47
3f2d73f1
PE
48/* Location of scanner cursor. */
49boundary scanner_cursor;
41141c56 50
223ff46e 51static void adjust_location (location *, char const *, size_t);
3f2d73f1 52#define YY_USER_ACTION adjust_location (loc, yytext, yyleng);
d8d3f94a 53
6c30d641 54static size_t no_cr_read (FILE *, char *, size_t);
d8d3f94a
PE
55#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
56
57
223ff46e 58/* OBSTACK_FOR_STRING -- Used to store all the characters that we need to
44995b2e
AD
59 keep (to construct ID, STRINGS etc.). Use the following macros to
60 use it.
61
41141c56
PE
62 Use STRING_GROW to append what has just been matched, and
63 STRING_FINISH to end the string (it puts the ending 0).
64 STRING_FINISH also stores this string in LAST_STRING, which can be
65 used, and which is used by STRING_FREE to free the last string. */
44995b2e 66
223ff46e 67static struct obstack obstack_for_string;
44995b2e 68
7ec2d4cd
AD
69/* A string representing the most recently saved token. */
70static char *last_string;
71
72
41141c56 73#define STRING_GROW \
223ff46e 74 obstack_grow (&obstack_for_string, yytext, yyleng)
44995b2e 75
41141c56 76#define STRING_FINISH \
44995b2e 77 do { \
223ff46e
PE
78 obstack_1grow (&obstack_for_string, '\0'); \
79 last_string = obstack_finish (&obstack_for_string); \
44995b2e
AD
80 } while (0)
81
41141c56 82#define STRING_FREE \
223ff46e 83 obstack_free (&obstack_for_string, last_string)
e9955c83 84
7ec2d4cd
AD
85void
86scanner_last_string_free (void)
87{
41141c56 88 STRING_FREE;
7ec2d4cd 89}
e9955c83 90
efcb44dd
PE
91/* Within well-formed rules, RULE_LENGTH is the number of values in
92 the current rule so far, which says where to find `$0' with respect
93 to the top of the stack. It is not the same as the rule->length in
94 the case of mid rule actions.
95
96 Outside of well-formed rules, RULE_LENGTH has an undefined value. */
97static int rule_length;
98
223ff46e
PE
99static void handle_dollar (braced_code code_kind, char *cp, location loc);
100static void handle_at (braced_code code_kind, char *cp, location loc);
3f2d73f1 101static void handle_syncline (char *args);
d8d3f94a 102static int convert_ucn_to_byte (char const *hex_text);
3f2d73f1 103static void unexpected_end_of_file (boundary, char const *);
e9955c83
AD
104
105%}
d8d3f94a 106%x SC_COMMENT SC_LINE_COMMENT SC_YACC_COMMENT
e9955c83 107%x SC_STRING SC_CHARACTER
3f2d73f1 108%x SC_AFTER_IDENTIFIER
e9955c83
AD
109%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
110%x SC_BRACED_CODE SC_PROLOGUE SC_EPILOGUE
111
29c01725
AD
112letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
113id {letter}({letter}|[0-9])*
114directive %{letter}({letter}|[0-9]|-)*
115int [0-9]+
d8d3f94a
PE
116
117/* POSIX says that a tag must be both an id and a C union member, but
118 historically almost any character is allowed in a tag. We disallow
119 NUL and newline, as this simplifies our implementation. */
120tag [^\0\n>]+
121
122/* Zero or more instances of backslash-newline. Following GCC, allow
123 white space between the backslash and the newline. */
124splice (\\[ \f\t\v]*\n)*
e9955c83
AD
125
126%%
127%{
a706a1cc 128 /* Nesting level of the current code in braces. */
1a9e39f1
PE
129 int braces_level IF_LINT (= 0);
130
3f2d73f1
PE
131 /* Parent context state, when applicable. */
132 int context_state IF_LINT (= 0);
a706a1cc 133
3f2d73f1 134 /* Location of most recent identifier, when applicable. */
223ff46e 135 location id_loc IF_LINT (= *loc);
3f2d73f1 136
223ff46e 137 /* Where containing code started, when applicable. */
3f2d73f1
PE
138 boundary code_start IF_LINT (= loc->start);
139
223ff46e
PE
140 /* Where containing comment or string or character literal started,
141 when applicable. */
3f2d73f1 142 boundary token_start IF_LINT (= loc->start);
e9955c83
AD
143%}
144
145
3f2d73f1
PE
146 /*-----------------------.
147 | Scanning white space. |
148 `-----------------------*/
149
150<INITIAL,SC_AFTER_IDENTIFIER>
151{
152 [ \f\n\t\v] ;
153
154 /* Comments. */
155 "/*" token_start = loc->start; context_state = YY_START; BEGIN SC_YACC_COMMENT;
156 "//".* ;
157
158 /* #line directives are not documented, and may be withdrawn or
159 modified in future versions of Bison. */
160 ^"#line "{int}" \"".*"\"\n" {
161 handle_syncline (yytext + sizeof "#line " - 1);
162 }
163}
164
165
e9955c83
AD
166 /*----------------------------.
167 | Scanning Bison directives. |
168 `----------------------------*/
169<INITIAL>
170{
171 "%binary" return PERCENT_NONASSOC;
172 "%debug" return PERCENT_DEBUG;
173 "%define" return PERCENT_DEFINE;
174 "%defines" return PERCENT_DEFINES;
9280d3ef 175 "%destructor" return PERCENT_DESTRUCTOR;
676385e2 176 "%dprec" return PERCENT_DPREC;
e9955c83
AD
177 "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
178 "%expect" return PERCENT_EXPECT;
179 "%file-prefix" return PERCENT_FILE_PREFIX;
180 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
ae7453f2 181 "%glr-parser" return PERCENT_GLR_PARSER;
e9955c83
AD
182 "%left" return PERCENT_LEFT;
183 "%locations" return PERCENT_LOCATIONS;
676385e2 184 "%merge" return PERCENT_MERGE;
e9955c83
AD
185 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
186 "%no"[-_]"lines" return PERCENT_NO_LINES;
187 "%nonassoc" return PERCENT_NONASSOC;
188 "%nterm" return PERCENT_NTERM;
189 "%output" return PERCENT_OUTPUT;
ae7453f2 190 "%parse-param" return PERCENT_PARSE_PARAM;
d8d3f94a 191 "%prec" rule_length--; return PERCENT_PREC;
366eea36 192 "%printer" return PERCENT_PRINTER;
e9955c83
AD
193 "%pure"[-_]"parser" return PERCENT_PURE_PARSER;
194 "%right" return PERCENT_RIGHT;
ae7453f2 195 "%lex-param" return PERCENT_LEX_PARAM;
e9955c83
AD
196 "%skeleton" return PERCENT_SKELETON;
197 "%start" return PERCENT_START;
198 "%term" return PERCENT_TOKEN;
199 "%token" return PERCENT_TOKEN;
200 "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
201 "%type" return PERCENT_TYPE;
202 "%union" return PERCENT_UNION;
203 "%verbose" return PERCENT_VERBOSE;
204 "%yacc" return PERCENT_YACC;
205
3f2d73f1 206 {directive} {
41141c56 207 complain_at (*loc, _("invalid directive: %s"), quote (yytext));
412f8a59 208 }
900c5db5 209
e9955c83 210 "=" return EQUAL;
d8d3f94a 211 "|" rule_length = 0; return PIPE;
e9955c83
AD
212 ";" return SEMICOLON;
213
763ed7a6
PE
214 "," {
215 warn_at (*loc, _("stray `,' treated as white space"));
763ed7a6
PE
216 }
217
3f2d73f1 218 {id} {
41141c56 219 val->symbol = symbol_get (yytext, *loc);
3f2d73f1 220 id_loc = *loc;
efcb44dd 221 rule_length++;
3f2d73f1 222 BEGIN SC_AFTER_IDENTIFIER;
e9955c83
AD
223 }
224
d8d3f94a
PE
225 {int} {
226 unsigned long num;
223ff46e 227 set_errno (0);
d8d3f94a 228 num = strtoul (yytext, 0, 10);
223ff46e 229 if (INT_MAX < num || get_errno ())
d8d3f94a 230 {
41141c56 231 complain_at (*loc, _("integer out of range: %s"), quote (yytext));
d8d3f94a
PE
232 num = INT_MAX;
233 }
41141c56 234 val->integer = num;
d8d3f94a
PE
235 return INT;
236 }
e9955c83
AD
237
238 /* Characters. We don't check there is only one. */
3f2d73f1 239 "'" STRING_GROW; token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
e9955c83
AD
240
241 /* Strings. */
3f2d73f1 242 "\"" STRING_GROW; token_start = loc->start; BEGIN SC_ESCAPED_STRING;
e9955c83
AD
243
244 /* Prologue. */
3f2d73f1 245 "%{" code_start = loc->start; BEGIN SC_PROLOGUE;
e9955c83
AD
246
247 /* Code in between braces. */
3f2d73f1
PE
248 "{" {
249 STRING_GROW;
250 braces_level = 0;
251 code_start = loc->start;
252 BEGIN SC_BRACED_CODE;
253 }
e9955c83
AD
254
255 /* A type. */
d8d3f94a 256 "<"{tag}">" {
223ff46e 257 obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
41141c56 258 STRING_FINISH;
223ff46e 259 val->uniqstr = uniqstr_new (last_string);
41141c56 260 STRING_FREE;
4cdb01db
AD
261 return TYPE;
262 }
263
a706a1cc
PE
264 "%%" {
265 static int percent_percent_count;
e9955c83 266 if (++percent_percent_count == 2)
3f2d73f1
PE
267 {
268 code_start = loc->start;
269 BEGIN SC_EPILOGUE;
270 }
e9955c83
AD
271 return PERCENT_PERCENT;
272 }
273
a706a1cc 274 . {
41141c56 275 complain_at (*loc, _("invalid character: %s"), quote (yytext));
3f2d73f1
PE
276 }
277}
278
279
280 /*-----------------------------------------------------------------.
281 | Scanning after an identifier, checking whether a colon is next. |
282 `-----------------------------------------------------------------*/
283
284<SC_AFTER_IDENTIFIER>
285{
286 ":" {
287 rule_length = 0;
288 *loc = id_loc;
289 BEGIN INITIAL;
290 return ID_COLON;
291 }
292 . {
293 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
294 yyless (0);
295 *loc = id_loc;
296 BEGIN INITIAL;
297 return ID;
298 }
299 <<EOF>> {
300 *loc = id_loc;
301 BEGIN INITIAL;
302 return ID;
e9955c83
AD
303 }
304}
305
306
d8d3f94a
PE
307 /*---------------------------------------------------------------.
308 | Scanning a Yacc comment. The initial `/ *' is already eaten. |
309 `---------------------------------------------------------------*/
e9955c83 310
d8d3f94a 311<SC_YACC_COMMENT>
e9955c83 312{
3f2d73f1 313 "*/" BEGIN context_state;
a706a1cc 314 .|\n ;
3f2d73f1 315 <<EOF>> unexpected_end_of_file (token_start, "*/");
d8d3f94a
PE
316}
317
318
319 /*------------------------------------------------------------.
320 | Scanning a C comment. The initial `/ *' is already eaten. |
321 `------------------------------------------------------------*/
322
323<SC_COMMENT>
324{
3f2d73f1
PE
325 "*"{splice}"/" STRING_GROW; BEGIN context_state;
326 <<EOF>> unexpected_end_of_file (token_start, "*/");
e9955c83
AD
327}
328
329
d8d3f94a
PE
330 /*--------------------------------------------------------------.
331 | Scanning a line comment. The initial `//' is already eaten. |
332 `--------------------------------------------------------------*/
333
334<SC_LINE_COMMENT>
335{
3f2d73f1 336 "\n" STRING_GROW; BEGIN context_state;
41141c56 337 {splice} STRING_GROW;
3f2d73f1 338 <<EOF>> BEGIN context_state;
d8d3f94a
PE
339}
340
341
e9955c83
AD
342 /*----------------------------------------------------------------.
343 | Scanning a C string, including its escapes. The initial `"' is |
344 | already eaten. |
345 `----------------------------------------------------------------*/
346
347<SC_ESCAPED_STRING>
348{
db2cc12f 349 "\"" {
41141c56
PE
350 STRING_GROW;
351 STRING_FINISH;
3f2d73f1 352 loc->start = token_start;
223ff46e 353 val->chars = last_string;
efcb44dd 354 rule_length++;
a706a1cc 355 BEGIN INITIAL;
e9955c83
AD
356 return STRING;
357 }
358
41141c56 359 .|\n STRING_GROW;
3f2d73f1 360 <<EOF>> unexpected_end_of_file (token_start, "\"");
e9955c83
AD
361}
362
363 /*---------------------------------------------------------------.
364 | Scanning a C character, decoding its escapes. The initial "'" |
365 | is already eaten. |
366 `---------------------------------------------------------------*/
367
368<SC_ESCAPED_CHARACTER>
369{
db2cc12f 370 "'" {
41141c56
PE
371 STRING_GROW;
372 STRING_FINISH;
3f2d73f1 373 loc->start = token_start;
41141c56
PE
374 val->symbol = symbol_get (last_string, *loc);
375 symbol_class_set (val->symbol, token_sym, *loc);
376 symbol_user_token_number_set (val->symbol,
377 (unsigned char) last_string[1], *loc);
378 STRING_FREE;
a706a1cc
PE
379 rule_length++;
380 BEGIN INITIAL;
381 return ID;
e9955c83 382 }
a706a1cc 383
41141c56 384 .|\n STRING_GROW;
3f2d73f1 385 <<EOF>> unexpected_end_of_file (token_start, "'");
e9955c83
AD
386}
387
388
389 /*----------------------------.
390 | Decode escaped characters. |
391 `----------------------------*/
392
393<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
394{
d8d3f94a
PE
395 \\[0-7]{1,3} {
396 unsigned long c = strtoul (yytext + 1, 0, 8);
397 if (UCHAR_MAX < c)
3f2d73f1 398 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
e9955c83 399 else
223ff46e 400 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
401 }
402
6b0d38ab 403 \\x[0-9abcdefABCDEF]+ {
d8d3f94a 404 unsigned long c;
223ff46e 405 set_errno (0);
d8d3f94a 406 c = strtoul (yytext + 2, 0, 16);
223ff46e 407 if (UCHAR_MAX < c || get_errno ())
3f2d73f1 408 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
d8d3f94a 409 else
223ff46e 410 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
411 }
412
223ff46e
PE
413 \\a obstack_1grow (&obstack_for_string, '\a');
414 \\b obstack_1grow (&obstack_for_string, '\b');
415 \\f obstack_1grow (&obstack_for_string, '\f');
416 \\n obstack_1grow (&obstack_for_string, '\n');
417 \\r obstack_1grow (&obstack_for_string, '\r');
418 \\t obstack_1grow (&obstack_for_string, '\t');
419 \\v obstack_1grow (&obstack_for_string, '\v');
412f8a59
PE
420
421 /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
223ff46e 422 \\("\""|"'"|"?"|"\\") obstack_1grow (&obstack_for_string, yytext[1]);
412f8a59 423
6b0d38ab 424 \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
d8d3f94a
PE
425 int c = convert_ucn_to_byte (yytext);
426 if (c < 0)
3f2d73f1 427 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
d8d3f94a 428 else
223ff46e 429 obstack_1grow (&obstack_for_string, c);
d8d3f94a 430 }
4f25ebb0 431 \\(.|\n) {
3f2d73f1 432 complain_at (*loc, _("unrecognized escape sequence: %s"), quote (yytext));
41141c56 433 STRING_GROW;
e9955c83
AD
434 }
435}
436
437
438 /*----------------------------------------------------------.
439 | Scanning a C character without decoding its escapes. The |
440 | initial "'" is already eaten. |
441 `----------------------------------------------------------*/
442
443<SC_CHARACTER>
444{
3f2d73f1 445 "'" STRING_GROW; BEGIN context_state;
41141c56 446 \\{splice}[^$@\[\]] STRING_GROW;
3f2d73f1 447 <<EOF>> unexpected_end_of_file (token_start, "'");
e9955c83
AD
448}
449
450
451 /*----------------------------------------------------------------.
452 | Scanning a C string, without decoding its escapes. The initial |
453 | `"' is already eaten. |
454 `----------------------------------------------------------------*/
455
456<SC_STRING>
457{
3f2d73f1 458 "\"" STRING_GROW; BEGIN context_state;
41141c56 459 \\{splice}[^$@\[\]] STRING_GROW;
3f2d73f1 460 <<EOF>> unexpected_end_of_file (token_start, "\"");
e9955c83
AD
461}
462
463
464 /*---------------------------------------------------.
465 | Strings, comments etc. can be found in user code. |
466 `---------------------------------------------------*/
467
468<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
469{
3f2d73f1
PE
470 "'" {
471 STRING_GROW;
472 context_state = YY_START;
473 token_start = loc->start;
474 BEGIN SC_CHARACTER;
475 }
476 "\"" {
477 STRING_GROW;
478 context_state = YY_START;
479 token_start = loc->start;
480 BEGIN SC_STRING;
481 }
482 "/"{splice}"*" {
483 STRING_GROW;
484 context_state = YY_START;
485 token_start = loc->start;
486 BEGIN SC_COMMENT;
487 }
488 "/"{splice}"/" {
489 STRING_GROW;
490 context_state = YY_START;
491 BEGIN SC_LINE_COMMENT;
492 }
e9955c83
AD
493}
494
495
496 /*---------------------------------------------------------------.
497 | Scanning some code in braces (%union and actions). The initial |
498 | "{" is already eaten. |
499 `---------------------------------------------------------------*/
500
501<SC_BRACED_CODE>
502{
41141c56
PE
503 "{"|"<"{splice}"%" STRING_GROW; braces_level++;
504 "%"{splice}">" STRING_GROW; braces_level--;
e9955c83 505 "}" {
41141c56 506 STRING_GROW;
1a9e39f1
PE
507 braces_level--;
508 if (braces_level < 0)
e9955c83 509 {
41141c56 510 STRING_FINISH;
3f2d73f1 511 loc->start = code_start;
223ff46e 512 val->chars = last_string;
efcb44dd 513 rule_length++;
a706a1cc 514 BEGIN INITIAL;
e9955c83
AD
515 return BRACED_CODE;
516 }
517 }
518
a706a1cc
PE
519 /* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly
520 (as `<' `<%'). */
41141c56 521 "<"{splice}"<" STRING_GROW;
a706a1cc 522
d8d3f94a 523 "$"("<"{tag}">")?(-?[0-9]+|"$") { handle_dollar (current_braced_code,
41141c56 524 yytext, *loc); }
f25bfb75 525 "@"(-?[0-9]+|"$") { handle_at (current_braced_code,
41141c56 526 yytext, *loc); }
e9955c83 527
3f2d73f1 528 <<EOF>> unexpected_end_of_file (code_start, "}");
e9955c83
AD
529}
530
531
532 /*--------------------------------------------------------------.
533 | Scanning some prologue: from "%{" (already scanned) to "%}". |
534 `--------------------------------------------------------------*/
535
536<SC_PROLOGUE>
537{
538 "%}" {
41141c56 539 STRING_FINISH;
3f2d73f1 540 loc->start = code_start;
223ff46e 541 val->chars = last_string;
a706a1cc 542 BEGIN INITIAL;
e9955c83
AD
543 return PROLOGUE;
544 }
545
3f2d73f1 546 <<EOF>> unexpected_end_of_file (code_start, "%}");
e9955c83
AD
547}
548
549
550 /*---------------------------------------------------------------.
551 | Scanning the epilogue (everything after the second "%%", which |
d8d3f94a 552 | has already been eaten). |
e9955c83
AD
553 `---------------------------------------------------------------*/
554
555<SC_EPILOGUE>
556{
e9955c83 557 <<EOF>> {
41141c56 558 STRING_FINISH;
3f2d73f1 559 loc->start = code_start;
223ff46e 560 val->chars = last_string;
a706a1cc 561 BEGIN INITIAL;
e9955c83
AD
562 return EPILOGUE;
563 }
564}
565
566
a706a1cc
PE
567 /*----------------------------------------------------------------.
568 | By default, grow the string obstack with the input, escaping M4 |
569 | quoting characters. |
570 `----------------------------------------------------------------*/
571
572<SC_COMMENT,SC_LINE_COMMENT,SC_STRING,SC_CHARACTER,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
573{
223ff46e
PE
574 \$ obstack_sgrow (&obstack_for_string, "$][");
575 \@ obstack_sgrow (&obstack_for_string, "@@");
576 \[ obstack_sgrow (&obstack_for_string, "@{");
577 \] obstack_sgrow (&obstack_for_string, "@}");
41141c56 578 .|\n STRING_GROW;
a706a1cc
PE
579}
580
581
e9955c83
AD
582%%
583
3f2d73f1
PE
584/* Set *LOC and adjust scanner cursor to account for token TOKEN of
585 size SIZE. */
6c30d641
PE
586
587static void
223ff46e 588adjust_location (location *loc, char const *token, size_t size)
6c30d641 589{
3f2d73f1
PE
590 int line = scanner_cursor.line;
591 int column = scanner_cursor.column;
6c30d641
PE
592 char const *p0 = token;
593 char const *p = token;
594 char const *lim = token + size;
595
3f2d73f1
PE
596 loc->start = scanner_cursor;
597
6c30d641
PE
598 for (p = token; p < lim; p++)
599 switch (*p)
600 {
6c30d641
PE
601 case '\n':
602 line++;
603 column = 1;
604 p0 = p + 1;
605 break;
606
607 case '\t':
608 column += mbsnwidth (p0, p - p0, 0);
609 column += 8 - ((column - 1) & 7);
610 p0 = p + 1;
611 break;
612 }
613
3f2d73f1
PE
614 scanner_cursor.line = line;
615 scanner_cursor.column = column + mbsnwidth (p0, p - p0, 0);
616
617 loc->end = scanner_cursor;
6c30d641
PE
618}
619
620
621/* Read bytes from FP into buffer BUF of size SIZE. Return the
622 number of bytes read. Remove '\r' from input, treating \r\n
623 and isolated \r as \n. */
624
625static size_t
626no_cr_read (FILE *fp, char *buf, size_t size)
627{
628 size_t s = fread (buf, 1, size, fp);
629 if (s)
630 {
631 char *w = memchr (buf, '\r', s);
632 if (w)
633 {
634 char const *r = ++w;
635 char const *lim = buf + s;
636
637 for (;;)
638 {
639 /* Found an '\r'. Treat it like '\n', but ignore any
640 '\n' that immediately follows. */
641 w[-1] = '\n';
642 if (r == lim)
643 {
644 int ch = getc (fp);
645 if (ch != '\n' && ungetc (ch, fp) != ch)
646 break;
647 }
648 else if (*r == '\n')
649 r++;
650
651 /* Copy until the next '\r'. */
652 do
653 {
654 if (r == lim)
655 return w - buf;
656 }
657 while ((*w++ = *r++) != '\r');
658 }
659
660 return w - buf;
661 }
662 }
663
664 return s;
665}
666
667
e9955c83 668/*------------------------------------------------------------------.
366eea36 669| TEXT is pointing to a wannabee semantic value (i.e., a `$'). |
e9955c83
AD
670| |
671| Possible inputs: $[<TYPENAME>]($|integer) |
672| |
223ff46e 673| Output to OBSTACK_FOR_STRING a reference to this semantic value. |
e9955c83
AD
674`------------------------------------------------------------------*/
675
f25bfb75 676static inline void
223ff46e 677handle_action_dollar (char *text, location loc)
e9955c83
AD
678{
679 const char *type_name = NULL;
366eea36 680 char *cp = text + 1;
e9955c83
AD
681
682 /* Get the type name if explicit. */
683 if (*cp == '<')
684 {
685 type_name = ++cp;
686 while (*cp != '>')
687 ++cp;
688 *cp = '\0';
689 ++cp;
690 }
691
692 if (*cp == '$')
693 {
694 if (!type_name)
223ff46e 695 type_name = symbol_list_n_type_name_get (current_rule, loc, 0);
e9955c83 696 if (!type_name && typed)
223ff46e 697 complain_at (loc, _("$$ of `%s' has no declared type"),
97650f4e 698 current_rule->sym->tag);
e9955c83
AD
699 if (!type_name)
700 type_name = "";
223ff46e 701 obstack_fgrow1 (&obstack_for_string,
e9955c83
AD
702 "]b4_lhs_value([%s])[", type_name);
703 }
d8d3f94a 704 else
e9955c83 705 {
d8d3f94a 706 long num;
223ff46e 707 set_errno (0);
d8d3f94a 708 num = strtol (cp, 0, 10);
e9955c83 709
223ff46e 710 if (INT_MIN <= num && num <= rule_length && ! get_errno ())
e9955c83 711 {
d8d3f94a 712 int n = num;
e9955c83 713 if (!type_name && n > 0)
223ff46e 714 type_name = symbol_list_n_type_name_get (current_rule, loc, n);
e9955c83 715 if (!type_name && typed)
223ff46e
PE
716 complain_at (loc, _("$%d of `%s' has no declared type"),
717 n, current_rule->sym->tag);
e9955c83
AD
718 if (!type_name)
719 type_name = "";
223ff46e 720 obstack_fgrow3 (&obstack_for_string,
e9955c83
AD
721 "]b4_rhs_value([%d], [%d], [%s])[",
722 rule_length, n, type_name);
723 }
d8d3f94a 724 else
223ff46e 725 complain_at (loc, _("integer out of range: %s"), quote (text));
9280d3ef
AD
726 }
727}
728
729
366eea36 730/*---------------------------------------------------------------.
d8d3f94a 731| TEXT is expected to be $$ in some code associated to a symbol: |
366eea36
AD
732| destructor or printer. |
733`---------------------------------------------------------------*/
9280d3ef 734
f25bfb75 735static inline void
223ff46e 736handle_symbol_code_dollar (char *text, location loc)
9280d3ef 737{
366eea36 738 char *cp = text + 1;
9280d3ef 739 if (*cp == '$')
223ff46e 740 obstack_sgrow (&obstack_for_string, "]b4_dollar_dollar[");
9280d3ef 741 else
223ff46e 742 complain_at (loc, _("invalid value: %s"), quote (text));
e9955c83
AD
743}
744
f25bfb75
AD
745
746/*-----------------------------------------------------------------.
747| Dispatch onto handle_action_dollar, or handle_destructor_dollar, |
748| depending upon CODE_KIND. |
749`-----------------------------------------------------------------*/
e9955c83
AD
750
751static void
223ff46e 752handle_dollar (braced_code braced_code_kind, char *text, location loc)
f25bfb75
AD
753{
754 switch (braced_code_kind)
755 {
756 case action_braced_code:
223ff46e 757 handle_action_dollar (text, loc);
f25bfb75
AD
758 break;
759
760 case destructor_braced_code:
366eea36 761 case printer_braced_code:
223ff46e 762 handle_symbol_code_dollar (text, loc);
f25bfb75
AD
763 break;
764 }
765}
766
767
768/*------------------------------------------------------.
769| TEXT is a location token (i.e., a `@...'). Output to |
223ff46e 770| OBSTACK_FOR_STRING a reference to this location. |
f25bfb75
AD
771`------------------------------------------------------*/
772
773static inline void
223ff46e 774handle_action_at (char *text, location loc)
e9955c83 775{
366eea36 776 char *cp = text + 1;
e9955c83 777 locations_flag = 1;
e9955c83 778
366eea36 779 if (*cp == '$')
e9955c83 780 {
223ff46e 781 obstack_sgrow (&obstack_for_string, "]b4_lhs_location[");
e9955c83 782 }
d8d3f94a 783 else
e9955c83 784 {
d8d3f94a 785 long num;
223ff46e 786 set_errno (0);
d8d3f94a 787 num = strtol (cp, 0, 10);
dafdc66f 788
223ff46e 789 if (INT_MIN <= num && num <= rule_length && ! get_errno ())
d8d3f94a
PE
790 {
791 int n = num;
223ff46e 792 obstack_fgrow2 (&obstack_for_string, "]b4_rhs_location([%d], [%d])[",
d8d3f94a
PE
793 rule_length, n);
794 }
e9955c83 795 else
223ff46e 796 complain_at (loc, _("integer out of range: %s"), quote (text));
f25bfb75
AD
797 }
798}
799
800
366eea36 801/*---------------------------------------------------------------.
d8d3f94a 802| TEXT is expected to be @$ in some code associated to a symbol: |
366eea36
AD
803| destructor or printer. |
804`---------------------------------------------------------------*/
f25bfb75
AD
805
806static inline void
223ff46e 807handle_symbol_code_at (char *text, location loc)
f25bfb75 808{
366eea36
AD
809 char *cp = text + 1;
810 if (*cp == '$')
223ff46e 811 obstack_sgrow (&obstack_for_string, "]b4_at_dollar[");
f25bfb75 812 else
223ff46e 813 complain_at (loc, _("invalid value: %s"), quote (text));
e9955c83 814}
4cdb01db 815
f25bfb75
AD
816
817/*-------------------------------------------------------------------.
818| Dispatch onto handle_action_at, or handle_destructor_at, depending |
819| upon CODE_KIND. |
820`-------------------------------------------------------------------*/
821
822static void
223ff46e 823handle_at (braced_code braced_code_kind, char *text, location loc)
f25bfb75
AD
824{
825 switch (braced_code_kind)
826 {
827 case action_braced_code:
223ff46e 828 handle_action_at (text, loc);
f25bfb75
AD
829 break;
830
831 case destructor_braced_code:
366eea36 832 case printer_braced_code:
223ff46e 833 handle_symbol_code_at (text, loc);
f25bfb75
AD
834 break;
835 }
836}
837
838
d8d3f94a
PE
839/*------------------------------------------------------------------.
840| Convert universal character name UCN to a single-byte character, |
841| and return that character. Return -1 if UCN does not correspond |
842| to a single-byte character. |
843`------------------------------------------------------------------*/
844
845static int
846convert_ucn_to_byte (char const *ucn)
847{
848 unsigned long code = strtoul (ucn + 2, 0, 16);
849
850 /* FIXME: Currently we assume Unicode-compatible unibyte characters
851 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
852 non-ASCII hosts we support only the portable C character set.
853 These limitations should be removed once we add support for
854 multibyte characters. */
855
856 if (UCHAR_MAX < code)
857 return -1;
858
859#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
860 {
861 /* A non-ASCII host. Use CODE to index into a table of the C
862 basic execution character set, which is guaranteed to exist on
863 all Standard C platforms. This table also includes '$', '@',
8e6ef483 864 and '`', which are not in the basic execution character set but
d8d3f94a
PE
865 which are unibyte characters on all the platforms that we know
866 about. */
867 static signed char const table[] =
868 {
869 '\0', -1, -1, -1, -1, -1, -1, '\a',
870 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
871 -1, -1, -1, -1, -1, -1, -1, -1,
872 -1, -1, -1, -1, -1, -1, -1, -1,
873 ' ', '!', '"', '#', '$', '%', '&', '\'',
874 '(', ')', '*', '+', ',', '-', '.', '/',
875 '0', '1', '2', '3', '4', '5', '6', '7',
876 '8', '9', ':', ';', '<', '=', '>', '?',
877 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
878 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
879 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
880 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
881 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
882 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
883 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
884 'x', 'y', 'z', '{', '|', '}', '~'
885 };
886
887 code = code < sizeof table ? table[code] : -1;
888 }
889#endif
c4d720cd 890
d8d3f94a
PE
891 return code;
892}
893
894
900c5db5
AD
895/*----------------------------------------------------------------.
896| Handle `#line INT "FILE"'. ARGS has already skipped `#line '. |
897`----------------------------------------------------------------*/
898
899static void
3f2d73f1 900handle_syncline (char *args)
900c5db5
AD
901{
902 int lineno = strtol (args, &args, 10);
903 const char *file = NULL;
904 file = strchr (args, '"') + 1;
905 *strchr (file, '"') = 0;
3f2d73f1
PE
906 scanner_cursor.file = current_file = xstrdup (file);
907 scanner_cursor.line = lineno;
908 scanner_cursor.column = 1;
900c5db5
AD
909}
910
a706a1cc 911
3f2d73f1
PE
912/*------------------------------------------------------------------------.
913| Report an unexpected EOF in a token or comment starting at START. |
914| An end of file was encountered and the expected TOKEN_END was missing. |
915| After reporting the problem, pretend that TOKEN_END was found. |
916`------------------------------------------------------------------------*/
a706a1cc
PE
917
918static void
3f2d73f1 919unexpected_end_of_file (boundary start, char const *token_end)
a706a1cc 920{
345532d7 921 size_t i = strlen (token_end);
a706a1cc 922
223ff46e
PE
923 location loc;
924 loc.start = start;
925 loc.end = scanner_cursor;
926 complain_at (loc, _("missing `%s' at end of file"), token_end);
345532d7 927
3f2d73f1
PE
928 /* Adjust scanner cursor so that any later message does not count
929 the characters about to be inserted. */
930 scanner_cursor.column -= i;
345532d7
PE
931
932 while (i != 0)
933 unput (token_end[--i]);
a706a1cc
PE
934}
935
936
f25bfb75
AD
937/*-------------------------.
938| Initialize the scanner. |
939`-------------------------*/
940
1d6412ad
AD
941void
942scanner_initialize (void)
943{
223ff46e 944 obstack_init (&obstack_for_string);
1d6412ad
AD
945}
946
947
f25bfb75
AD
948/*-----------------------------------------------.
949| Free all the memory allocated to the scanner. |
950`-----------------------------------------------*/
951
4cdb01db
AD
952void
953scanner_free (void)
954{
223ff46e 955 obstack_free (&obstack_for_string, 0);
536545f3
AD
956 /* Reclaim Flex's buffers. */
957 yy_delete_buffer (YY_CURRENT_BUFFER);
4cdb01db 958}