]> git.saurik.com Git - bison.git/blame - src/scan-gram.l
(add_param): 2nd arg is now char * not char
[bison.git] / src / scan-gram.l
CommitLineData
e9955c83 1/* Bison Grammar Scanner -*- C -*-
3b1e470c 2
e9955c83
AD
3 Copyright (C) 2002 Free Software Foundation, Inc.
4
5 This file is part of Bison, the GNU Compiler Compiler.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
20 02111-1307 USA
21*/
22
a706a1cc 23%option debug nodefault noyywrap never-interactive
e9955c83
AD
24%option prefix="gram_" outfile="lex.yy.c"
25
26%{
27#include "system.h"
223ff46e
PE
28
29#include <mbswidth.h>
30#include <get-errno.h>
31#include <quote.h>
32
e9955c83 33#include "complain.h"
3f2d73f1 34#include "files.h"
e9955c83
AD
35#include "getargs.h"
36#include "gram.h"
37#include "reader.h"
223ff46e 38#include "uniqstr.h"
e9955c83 39
3f2d73f1
PE
40#define YY_USER_INIT \
41 do \
42 { \
43 scanner_cursor.file = current_file; \
44 scanner_cursor.line = 1; \
45 scanner_cursor.column = 1; \
46 } \
47 while (0)
8efe435c 48
3f2d73f1
PE
49/* Location of scanner cursor. */
50boundary scanner_cursor;
41141c56 51
223ff46e 52static void adjust_location (location *, char const *, size_t);
3f2d73f1 53#define YY_USER_ACTION adjust_location (loc, yytext, yyleng);
d8d3f94a 54
6c30d641 55static size_t no_cr_read (FILE *, char *, size_t);
d8d3f94a
PE
56#define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
57
58
223ff46e 59/* OBSTACK_FOR_STRING -- Used to store all the characters that we need to
44995b2e
AD
60 keep (to construct ID, STRINGS etc.). Use the following macros to
61 use it.
62
41141c56
PE
63 Use STRING_GROW to append what has just been matched, and
64 STRING_FINISH to end the string (it puts the ending 0).
65 STRING_FINISH also stores this string in LAST_STRING, which can be
66 used, and which is used by STRING_FREE to free the last string. */
44995b2e 67
223ff46e 68static struct obstack obstack_for_string;
44995b2e 69
7ec2d4cd
AD
70/* A string representing the most recently saved token. */
71static char *last_string;
72
73
41141c56 74#define STRING_GROW \
223ff46e 75 obstack_grow (&obstack_for_string, yytext, yyleng)
44995b2e 76
41141c56 77#define STRING_FINISH \
44995b2e 78 do { \
223ff46e
PE
79 obstack_1grow (&obstack_for_string, '\0'); \
80 last_string = obstack_finish (&obstack_for_string); \
44995b2e
AD
81 } while (0)
82
41141c56 83#define STRING_FREE \
223ff46e 84 obstack_free (&obstack_for_string, last_string)
e9955c83 85
7ec2d4cd
AD
86void
87scanner_last_string_free (void)
88{
41141c56 89 STRING_FREE;
7ec2d4cd 90}
e9955c83 91
efcb44dd
PE
92/* Within well-formed rules, RULE_LENGTH is the number of values in
93 the current rule so far, which says where to find `$0' with respect
94 to the top of the stack. It is not the same as the rule->length in
95 the case of mid rule actions.
96
97 Outside of well-formed rules, RULE_LENGTH has an undefined value. */
98static int rule_length;
99
223ff46e
PE
100static void handle_dollar (braced_code code_kind, char *cp, location loc);
101static void handle_at (braced_code code_kind, char *cp, location loc);
3f2d73f1 102static void handle_syncline (char *args);
d8d3f94a 103static int convert_ucn_to_byte (char const *hex_text);
3f2d73f1 104static void unexpected_end_of_file (boundary, char const *);
e9955c83
AD
105
106%}
d8d3f94a 107%x SC_COMMENT SC_LINE_COMMENT SC_YACC_COMMENT
e9955c83 108%x SC_STRING SC_CHARACTER
3f2d73f1 109%x SC_AFTER_IDENTIFIER
e9955c83
AD
110%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
111%x SC_BRACED_CODE SC_PROLOGUE SC_EPILOGUE
112
29c01725
AD
113letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
114id {letter}({letter}|[0-9])*
115directive %{letter}({letter}|[0-9]|-)*
116int [0-9]+
d8d3f94a
PE
117
118/* POSIX says that a tag must be both an id and a C union member, but
119 historically almost any character is allowed in a tag. We disallow
120 NUL and newline, as this simplifies our implementation. */
121tag [^\0\n>]+
122
123/* Zero or more instances of backslash-newline. Following GCC, allow
124 white space between the backslash and the newline. */
125splice (\\[ \f\t\v]*\n)*
e9955c83
AD
126
127%%
128%{
a706a1cc 129 /* Nesting level of the current code in braces. */
1a9e39f1
PE
130 int braces_level IF_LINT (= 0);
131
3f2d73f1
PE
132 /* Parent context state, when applicable. */
133 int context_state IF_LINT (= 0);
a706a1cc 134
3f2d73f1 135 /* Location of most recent identifier, when applicable. */
223ff46e 136 location id_loc IF_LINT (= *loc);
3f2d73f1 137
223ff46e 138 /* Where containing code started, when applicable. */
3f2d73f1
PE
139 boundary code_start IF_LINT (= loc->start);
140
223ff46e
PE
141 /* Where containing comment or string or character literal started,
142 when applicable. */
3f2d73f1 143 boundary token_start IF_LINT (= loc->start);
e9955c83
AD
144%}
145
146
3f2d73f1
PE
147 /*-----------------------.
148 | Scanning white space. |
149 `-----------------------*/
150
151<INITIAL,SC_AFTER_IDENTIFIER>
152{
153 [ \f\n\t\v] ;
154
155 /* Comments. */
156 "/*" token_start = loc->start; context_state = YY_START; BEGIN SC_YACC_COMMENT;
157 "//".* ;
158
159 /* #line directives are not documented, and may be withdrawn or
160 modified in future versions of Bison. */
161 ^"#line "{int}" \"".*"\"\n" {
162 handle_syncline (yytext + sizeof "#line " - 1);
163 }
164}
165
166
e9955c83
AD
167 /*----------------------------.
168 | Scanning Bison directives. |
169 `----------------------------*/
170<INITIAL>
171{
172 "%binary" return PERCENT_NONASSOC;
173 "%debug" return PERCENT_DEBUG;
174 "%define" return PERCENT_DEFINE;
175 "%defines" return PERCENT_DEFINES;
9280d3ef 176 "%destructor" return PERCENT_DESTRUCTOR;
676385e2 177 "%dprec" return PERCENT_DPREC;
e9955c83
AD
178 "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
179 "%expect" return PERCENT_EXPECT;
180 "%file-prefix" return PERCENT_FILE_PREFIX;
181 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
ae7453f2 182 "%glr-parser" return PERCENT_GLR_PARSER;
e9955c83
AD
183 "%left" return PERCENT_LEFT;
184 "%locations" return PERCENT_LOCATIONS;
676385e2 185 "%merge" return PERCENT_MERGE;
e9955c83
AD
186 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
187 "%no"[-_]"lines" return PERCENT_NO_LINES;
188 "%nonassoc" return PERCENT_NONASSOC;
189 "%nterm" return PERCENT_NTERM;
190 "%output" return PERCENT_OUTPUT;
ae7453f2 191 "%parse-param" return PERCENT_PARSE_PARAM;
d8d3f94a 192 "%prec" rule_length--; return PERCENT_PREC;
366eea36 193 "%printer" return PERCENT_PRINTER;
e9955c83
AD
194 "%pure"[-_]"parser" return PERCENT_PURE_PARSER;
195 "%right" return PERCENT_RIGHT;
ae7453f2 196 "%lex-param" return PERCENT_LEX_PARAM;
e9955c83
AD
197 "%skeleton" return PERCENT_SKELETON;
198 "%start" return PERCENT_START;
199 "%term" return PERCENT_TOKEN;
200 "%token" return PERCENT_TOKEN;
201 "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
202 "%type" return PERCENT_TYPE;
203 "%union" return PERCENT_UNION;
204 "%verbose" return PERCENT_VERBOSE;
205 "%yacc" return PERCENT_YACC;
206
3f2d73f1 207 {directive} {
41141c56 208 complain_at (*loc, _("invalid directive: %s"), quote (yytext));
412f8a59 209 }
900c5db5 210
e9955c83 211 "=" return EQUAL;
d8d3f94a 212 "|" rule_length = 0; return PIPE;
e9955c83
AD
213 ";" return SEMICOLON;
214
763ed7a6
PE
215 "," {
216 warn_at (*loc, _("stray `,' treated as white space"));
763ed7a6
PE
217 }
218
3f2d73f1 219 {id} {
41141c56 220 val->symbol = symbol_get (yytext, *loc);
3f2d73f1 221 id_loc = *loc;
efcb44dd 222 rule_length++;
3f2d73f1 223 BEGIN SC_AFTER_IDENTIFIER;
e9955c83
AD
224 }
225
d8d3f94a
PE
226 {int} {
227 unsigned long num;
223ff46e 228 set_errno (0);
d8d3f94a 229 num = strtoul (yytext, 0, 10);
223ff46e 230 if (INT_MAX < num || get_errno ())
d8d3f94a 231 {
41141c56 232 complain_at (*loc, _("integer out of range: %s"), quote (yytext));
d8d3f94a
PE
233 num = INT_MAX;
234 }
41141c56 235 val->integer = num;
d8d3f94a
PE
236 return INT;
237 }
e9955c83
AD
238
239 /* Characters. We don't check there is only one. */
3f2d73f1 240 "'" STRING_GROW; token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
e9955c83
AD
241
242 /* Strings. */
3f2d73f1 243 "\"" STRING_GROW; token_start = loc->start; BEGIN SC_ESCAPED_STRING;
e9955c83
AD
244
245 /* Prologue. */
3f2d73f1 246 "%{" code_start = loc->start; BEGIN SC_PROLOGUE;
e9955c83
AD
247
248 /* Code in between braces. */
3f2d73f1
PE
249 "{" {
250 STRING_GROW;
251 braces_level = 0;
252 code_start = loc->start;
253 BEGIN SC_BRACED_CODE;
254 }
e9955c83
AD
255
256 /* A type. */
d8d3f94a 257 "<"{tag}">" {
223ff46e 258 obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
41141c56 259 STRING_FINISH;
223ff46e 260 val->uniqstr = uniqstr_new (last_string);
41141c56 261 STRING_FREE;
4cdb01db
AD
262 return TYPE;
263 }
264
a706a1cc
PE
265 "%%" {
266 static int percent_percent_count;
e9955c83 267 if (++percent_percent_count == 2)
3f2d73f1
PE
268 {
269 code_start = loc->start;
270 BEGIN SC_EPILOGUE;
271 }
e9955c83
AD
272 return PERCENT_PERCENT;
273 }
274
a706a1cc 275 . {
41141c56 276 complain_at (*loc, _("invalid character: %s"), quote (yytext));
3f2d73f1
PE
277 }
278}
279
280
281 /*-----------------------------------------------------------------.
282 | Scanning after an identifier, checking whether a colon is next. |
283 `-----------------------------------------------------------------*/
284
285<SC_AFTER_IDENTIFIER>
286{
287 ":" {
288 rule_length = 0;
289 *loc = id_loc;
290 BEGIN INITIAL;
291 return ID_COLON;
292 }
293 . {
294 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
295 yyless (0);
296 *loc = id_loc;
297 BEGIN INITIAL;
298 return ID;
299 }
300 <<EOF>> {
301 *loc = id_loc;
302 BEGIN INITIAL;
303 return ID;
e9955c83
AD
304 }
305}
306
307
d8d3f94a
PE
308 /*---------------------------------------------------------------.
309 | Scanning a Yacc comment. The initial `/ *' is already eaten. |
310 `---------------------------------------------------------------*/
e9955c83 311
d8d3f94a 312<SC_YACC_COMMENT>
e9955c83 313{
3f2d73f1 314 "*/" BEGIN context_state;
a706a1cc 315 .|\n ;
3f2d73f1 316 <<EOF>> unexpected_end_of_file (token_start, "*/");
d8d3f94a
PE
317}
318
319
320 /*------------------------------------------------------------.
321 | Scanning a C comment. The initial `/ *' is already eaten. |
322 `------------------------------------------------------------*/
323
324<SC_COMMENT>
325{
3f2d73f1
PE
326 "*"{splice}"/" STRING_GROW; BEGIN context_state;
327 <<EOF>> unexpected_end_of_file (token_start, "*/");
e9955c83
AD
328}
329
330
d8d3f94a
PE
331 /*--------------------------------------------------------------.
332 | Scanning a line comment. The initial `//' is already eaten. |
333 `--------------------------------------------------------------*/
334
335<SC_LINE_COMMENT>
336{
3f2d73f1 337 "\n" STRING_GROW; BEGIN context_state;
41141c56 338 {splice} STRING_GROW;
3f2d73f1 339 <<EOF>> BEGIN context_state;
d8d3f94a
PE
340}
341
342
e9955c83
AD
343 /*----------------------------------------------------------------.
344 | Scanning a C string, including its escapes. The initial `"' is |
345 | already eaten. |
346 `----------------------------------------------------------------*/
347
348<SC_ESCAPED_STRING>
349{
db2cc12f 350 "\"" {
41141c56
PE
351 STRING_GROW;
352 STRING_FINISH;
3f2d73f1 353 loc->start = token_start;
223ff46e 354 val->chars = last_string;
efcb44dd 355 rule_length++;
a706a1cc 356 BEGIN INITIAL;
e9955c83
AD
357 return STRING;
358 }
359
41141c56 360 .|\n STRING_GROW;
3f2d73f1 361 <<EOF>> unexpected_end_of_file (token_start, "\"");
e9955c83
AD
362}
363
364 /*---------------------------------------------------------------.
365 | Scanning a C character, decoding its escapes. The initial "'" |
366 | is already eaten. |
367 `---------------------------------------------------------------*/
368
369<SC_ESCAPED_CHARACTER>
370{
db2cc12f 371 "'" {
3b1e470c 372 unsigned char last_string_1;
41141c56
PE
373 STRING_GROW;
374 STRING_FINISH;
3f2d73f1 375 loc->start = token_start;
41141c56
PE
376 val->symbol = symbol_get (last_string, *loc);
377 symbol_class_set (val->symbol, token_sym, *loc);
3b1e470c
PE
378 last_string_1 = last_string[1];
379 symbol_user_token_number_set (val->symbol, last_string_1, *loc);
41141c56 380 STRING_FREE;
a706a1cc
PE
381 rule_length++;
382 BEGIN INITIAL;
383 return ID;
e9955c83 384 }
a706a1cc 385
41141c56 386 .|\n STRING_GROW;
3f2d73f1 387 <<EOF>> unexpected_end_of_file (token_start, "'");
e9955c83
AD
388}
389
390
391 /*----------------------------.
392 | Decode escaped characters. |
393 `----------------------------*/
394
395<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
396{
d8d3f94a
PE
397 \\[0-7]{1,3} {
398 unsigned long c = strtoul (yytext + 1, 0, 8);
399 if (UCHAR_MAX < c)
3f2d73f1 400 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
e9955c83 401 else
223ff46e 402 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
403 }
404
6b0d38ab 405 \\x[0-9abcdefABCDEF]+ {
d8d3f94a 406 unsigned long c;
223ff46e 407 set_errno (0);
d8d3f94a 408 c = strtoul (yytext + 2, 0, 16);
223ff46e 409 if (UCHAR_MAX < c || get_errno ())
3f2d73f1 410 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
d8d3f94a 411 else
223ff46e 412 obstack_1grow (&obstack_for_string, c);
e9955c83
AD
413 }
414
223ff46e
PE
415 \\a obstack_1grow (&obstack_for_string, '\a');
416 \\b obstack_1grow (&obstack_for_string, '\b');
417 \\f obstack_1grow (&obstack_for_string, '\f');
418 \\n obstack_1grow (&obstack_for_string, '\n');
419 \\r obstack_1grow (&obstack_for_string, '\r');
420 \\t obstack_1grow (&obstack_for_string, '\t');
421 \\v obstack_1grow (&obstack_for_string, '\v');
412f8a59
PE
422
423 /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
223ff46e 424 \\("\""|"'"|"?"|"\\") obstack_1grow (&obstack_for_string, yytext[1]);
412f8a59 425
6b0d38ab 426 \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
d8d3f94a
PE
427 int c = convert_ucn_to_byte (yytext);
428 if (c < 0)
3f2d73f1 429 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
d8d3f94a 430 else
223ff46e 431 obstack_1grow (&obstack_for_string, c);
d8d3f94a 432 }
4f25ebb0 433 \\(.|\n) {
3f2d73f1 434 complain_at (*loc, _("unrecognized escape sequence: %s"), quote (yytext));
41141c56 435 STRING_GROW;
e9955c83
AD
436 }
437}
438
439
440 /*----------------------------------------------------------.
441 | Scanning a C character without decoding its escapes. The |
442 | initial "'" is already eaten. |
443 `----------------------------------------------------------*/
444
445<SC_CHARACTER>
446{
3f2d73f1 447 "'" STRING_GROW; BEGIN context_state;
41141c56 448 \\{splice}[^$@\[\]] STRING_GROW;
3f2d73f1 449 <<EOF>> unexpected_end_of_file (token_start, "'");
e9955c83
AD
450}
451
452
453 /*----------------------------------------------------------------.
454 | Scanning a C string, without decoding its escapes. The initial |
455 | `"' is already eaten. |
456 `----------------------------------------------------------------*/
457
458<SC_STRING>
459{
3f2d73f1 460 "\"" STRING_GROW; BEGIN context_state;
41141c56 461 \\{splice}[^$@\[\]] STRING_GROW;
3f2d73f1 462 <<EOF>> unexpected_end_of_file (token_start, "\"");
e9955c83
AD
463}
464
465
466 /*---------------------------------------------------.
467 | Strings, comments etc. can be found in user code. |
468 `---------------------------------------------------*/
469
470<SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
471{
3f2d73f1
PE
472 "'" {
473 STRING_GROW;
474 context_state = YY_START;
475 token_start = loc->start;
476 BEGIN SC_CHARACTER;
477 }
478 "\"" {
479 STRING_GROW;
480 context_state = YY_START;
481 token_start = loc->start;
482 BEGIN SC_STRING;
483 }
484 "/"{splice}"*" {
485 STRING_GROW;
486 context_state = YY_START;
487 token_start = loc->start;
488 BEGIN SC_COMMENT;
489 }
490 "/"{splice}"/" {
491 STRING_GROW;
492 context_state = YY_START;
493 BEGIN SC_LINE_COMMENT;
494 }
e9955c83
AD
495}
496
497
498 /*---------------------------------------------------------------.
499 | Scanning some code in braces (%union and actions). The initial |
500 | "{" is already eaten. |
501 `---------------------------------------------------------------*/
502
503<SC_BRACED_CODE>
504{
41141c56
PE
505 "{"|"<"{splice}"%" STRING_GROW; braces_level++;
506 "%"{splice}">" STRING_GROW; braces_level--;
e9955c83 507 "}" {
41141c56 508 STRING_GROW;
1a9e39f1
PE
509 braces_level--;
510 if (braces_level < 0)
e9955c83 511 {
41141c56 512 STRING_FINISH;
3f2d73f1 513 loc->start = code_start;
223ff46e 514 val->chars = last_string;
efcb44dd 515 rule_length++;
a706a1cc 516 BEGIN INITIAL;
e9955c83
AD
517 return BRACED_CODE;
518 }
519 }
520
a706a1cc
PE
521 /* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly
522 (as `<' `<%'). */
41141c56 523 "<"{splice}"<" STRING_GROW;
a706a1cc 524
d8d3f94a 525 "$"("<"{tag}">")?(-?[0-9]+|"$") { handle_dollar (current_braced_code,
41141c56 526 yytext, *loc); }
f25bfb75 527 "@"(-?[0-9]+|"$") { handle_at (current_braced_code,
41141c56 528 yytext, *loc); }
e9955c83 529
3f2d73f1 530 <<EOF>> unexpected_end_of_file (code_start, "}");
e9955c83
AD
531}
532
533
534 /*--------------------------------------------------------------.
535 | Scanning some prologue: from "%{" (already scanned) to "%}". |
536 `--------------------------------------------------------------*/
537
538<SC_PROLOGUE>
539{
540 "%}" {
41141c56 541 STRING_FINISH;
3f2d73f1 542 loc->start = code_start;
223ff46e 543 val->chars = last_string;
a706a1cc 544 BEGIN INITIAL;
e9955c83
AD
545 return PROLOGUE;
546 }
547
3f2d73f1 548 <<EOF>> unexpected_end_of_file (code_start, "%}");
e9955c83
AD
549}
550
551
552 /*---------------------------------------------------------------.
553 | Scanning the epilogue (everything after the second "%%", which |
d8d3f94a 554 | has already been eaten). |
e9955c83
AD
555 `---------------------------------------------------------------*/
556
557<SC_EPILOGUE>
558{
e9955c83 559 <<EOF>> {
41141c56 560 STRING_FINISH;
3f2d73f1 561 loc->start = code_start;
223ff46e 562 val->chars = last_string;
a706a1cc 563 BEGIN INITIAL;
e9955c83
AD
564 return EPILOGUE;
565 }
566}
567
568
a706a1cc
PE
569 /*----------------------------------------------------------------.
570 | By default, grow the string obstack with the input, escaping M4 |
571 | quoting characters. |
572 `----------------------------------------------------------------*/
573
574<SC_COMMENT,SC_LINE_COMMENT,SC_STRING,SC_CHARACTER,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
575{
223ff46e
PE
576 \$ obstack_sgrow (&obstack_for_string, "$][");
577 \@ obstack_sgrow (&obstack_for_string, "@@");
578 \[ obstack_sgrow (&obstack_for_string, "@{");
579 \] obstack_sgrow (&obstack_for_string, "@}");
41141c56 580 .|\n STRING_GROW;
a706a1cc
PE
581}
582
583
e9955c83
AD
584%%
585
3f2d73f1
PE
586/* Set *LOC and adjust scanner cursor to account for token TOKEN of
587 size SIZE. */
6c30d641
PE
588
589static void
223ff46e 590adjust_location (location *loc, char const *token, size_t size)
6c30d641 591{
3f2d73f1
PE
592 int line = scanner_cursor.line;
593 int column = scanner_cursor.column;
6c30d641
PE
594 char const *p0 = token;
595 char const *p = token;
596 char const *lim = token + size;
597
3f2d73f1
PE
598 loc->start = scanner_cursor;
599
6c30d641
PE
600 for (p = token; p < lim; p++)
601 switch (*p)
602 {
6c30d641
PE
603 case '\n':
604 line++;
605 column = 1;
606 p0 = p + 1;
607 break;
608
609 case '\t':
610 column += mbsnwidth (p0, p - p0, 0);
611 column += 8 - ((column - 1) & 7);
612 p0 = p + 1;
613 break;
614 }
615
3f2d73f1
PE
616 scanner_cursor.line = line;
617 scanner_cursor.column = column + mbsnwidth (p0, p - p0, 0);
618
619 loc->end = scanner_cursor;
6c30d641
PE
620}
621
622
623/* Read bytes from FP into buffer BUF of size SIZE. Return the
624 number of bytes read. Remove '\r' from input, treating \r\n
625 and isolated \r as \n. */
626
627static size_t
628no_cr_read (FILE *fp, char *buf, size_t size)
629{
630 size_t s = fread (buf, 1, size, fp);
631 if (s)
632 {
633 char *w = memchr (buf, '\r', s);
634 if (w)
635 {
636 char const *r = ++w;
637 char const *lim = buf + s;
638
639 for (;;)
640 {
641 /* Found an '\r'. Treat it like '\n', but ignore any
642 '\n' that immediately follows. */
643 w[-1] = '\n';
644 if (r == lim)
645 {
646 int ch = getc (fp);
647 if (ch != '\n' && ungetc (ch, fp) != ch)
648 break;
649 }
650 else if (*r == '\n')
651 r++;
652
653 /* Copy until the next '\r'. */
654 do
655 {
656 if (r == lim)
657 return w - buf;
658 }
659 while ((*w++ = *r++) != '\r');
660 }
661
662 return w - buf;
663 }
664 }
665
666 return s;
667}
668
669
e9955c83 670/*------------------------------------------------------------------.
366eea36 671| TEXT is pointing to a wannabee semantic value (i.e., a `$'). |
e9955c83
AD
672| |
673| Possible inputs: $[<TYPENAME>]($|integer) |
674| |
223ff46e 675| Output to OBSTACK_FOR_STRING a reference to this semantic value. |
e9955c83
AD
676`------------------------------------------------------------------*/
677
f25bfb75 678static inline void
223ff46e 679handle_action_dollar (char *text, location loc)
e9955c83
AD
680{
681 const char *type_name = NULL;
366eea36 682 char *cp = text + 1;
e9955c83
AD
683
684 /* Get the type name if explicit. */
685 if (*cp == '<')
686 {
687 type_name = ++cp;
688 while (*cp != '>')
689 ++cp;
690 *cp = '\0';
691 ++cp;
692 }
693
694 if (*cp == '$')
695 {
696 if (!type_name)
223ff46e 697 type_name = symbol_list_n_type_name_get (current_rule, loc, 0);
e9955c83 698 if (!type_name && typed)
223ff46e 699 complain_at (loc, _("$$ of `%s' has no declared type"),
97650f4e 700 current_rule->sym->tag);
e9955c83
AD
701 if (!type_name)
702 type_name = "";
223ff46e 703 obstack_fgrow1 (&obstack_for_string,
e9955c83
AD
704 "]b4_lhs_value([%s])[", type_name);
705 }
d8d3f94a 706 else
e9955c83 707 {
d8d3f94a 708 long num;
223ff46e 709 set_errno (0);
d8d3f94a 710 num = strtol (cp, 0, 10);
e9955c83 711
223ff46e 712 if (INT_MIN <= num && num <= rule_length && ! get_errno ())
e9955c83 713 {
d8d3f94a 714 int n = num;
e9955c83 715 if (!type_name && n > 0)
223ff46e 716 type_name = symbol_list_n_type_name_get (current_rule, loc, n);
e9955c83 717 if (!type_name && typed)
223ff46e
PE
718 complain_at (loc, _("$%d of `%s' has no declared type"),
719 n, current_rule->sym->tag);
e9955c83
AD
720 if (!type_name)
721 type_name = "";
223ff46e 722 obstack_fgrow3 (&obstack_for_string,
e9955c83
AD
723 "]b4_rhs_value([%d], [%d], [%s])[",
724 rule_length, n, type_name);
725 }
d8d3f94a 726 else
223ff46e 727 complain_at (loc, _("integer out of range: %s"), quote (text));
9280d3ef
AD
728 }
729}
730
731
366eea36 732/*---------------------------------------------------------------.
d8d3f94a 733| TEXT is expected to be $$ in some code associated to a symbol: |
366eea36
AD
734| destructor or printer. |
735`---------------------------------------------------------------*/
9280d3ef 736
f25bfb75 737static inline void
223ff46e 738handle_symbol_code_dollar (char *text, location loc)
9280d3ef 739{
366eea36 740 char *cp = text + 1;
9280d3ef 741 if (*cp == '$')
223ff46e 742 obstack_sgrow (&obstack_for_string, "]b4_dollar_dollar[");
9280d3ef 743 else
223ff46e 744 complain_at (loc, _("invalid value: %s"), quote (text));
e9955c83
AD
745}
746
f25bfb75
AD
747
748/*-----------------------------------------------------------------.
749| Dispatch onto handle_action_dollar, or handle_destructor_dollar, |
750| depending upon CODE_KIND. |
751`-----------------------------------------------------------------*/
e9955c83
AD
752
753static void
223ff46e 754handle_dollar (braced_code braced_code_kind, char *text, location loc)
f25bfb75
AD
755{
756 switch (braced_code_kind)
757 {
758 case action_braced_code:
223ff46e 759 handle_action_dollar (text, loc);
f25bfb75
AD
760 break;
761
762 case destructor_braced_code:
366eea36 763 case printer_braced_code:
223ff46e 764 handle_symbol_code_dollar (text, loc);
f25bfb75
AD
765 break;
766 }
767}
768
769
770/*------------------------------------------------------.
771| TEXT is a location token (i.e., a `@...'). Output to |
223ff46e 772| OBSTACK_FOR_STRING a reference to this location. |
f25bfb75
AD
773`------------------------------------------------------*/
774
775static inline void
223ff46e 776handle_action_at (char *text, location loc)
e9955c83 777{
366eea36 778 char *cp = text + 1;
e9955c83 779 locations_flag = 1;
e9955c83 780
366eea36 781 if (*cp == '$')
e9955c83 782 {
223ff46e 783 obstack_sgrow (&obstack_for_string, "]b4_lhs_location[");
e9955c83 784 }
d8d3f94a 785 else
e9955c83 786 {
d8d3f94a 787 long num;
223ff46e 788 set_errno (0);
d8d3f94a 789 num = strtol (cp, 0, 10);
dafdc66f 790
223ff46e 791 if (INT_MIN <= num && num <= rule_length && ! get_errno ())
d8d3f94a
PE
792 {
793 int n = num;
223ff46e 794 obstack_fgrow2 (&obstack_for_string, "]b4_rhs_location([%d], [%d])[",
d8d3f94a
PE
795 rule_length, n);
796 }
e9955c83 797 else
223ff46e 798 complain_at (loc, _("integer out of range: %s"), quote (text));
f25bfb75
AD
799 }
800}
801
802
366eea36 803/*---------------------------------------------------------------.
d8d3f94a 804| TEXT is expected to be @$ in some code associated to a symbol: |
366eea36
AD
805| destructor or printer. |
806`---------------------------------------------------------------*/
f25bfb75
AD
807
808static inline void
223ff46e 809handle_symbol_code_at (char *text, location loc)
f25bfb75 810{
366eea36
AD
811 char *cp = text + 1;
812 if (*cp == '$')
223ff46e 813 obstack_sgrow (&obstack_for_string, "]b4_at_dollar[");
f25bfb75 814 else
223ff46e 815 complain_at (loc, _("invalid value: %s"), quote (text));
e9955c83 816}
4cdb01db 817
f25bfb75
AD
818
819/*-------------------------------------------------------------------.
820| Dispatch onto handle_action_at, or handle_destructor_at, depending |
821| upon CODE_KIND. |
822`-------------------------------------------------------------------*/
823
824static void
223ff46e 825handle_at (braced_code braced_code_kind, char *text, location loc)
f25bfb75
AD
826{
827 switch (braced_code_kind)
828 {
829 case action_braced_code:
223ff46e 830 handle_action_at (text, loc);
f25bfb75
AD
831 break;
832
833 case destructor_braced_code:
366eea36 834 case printer_braced_code:
223ff46e 835 handle_symbol_code_at (text, loc);
f25bfb75
AD
836 break;
837 }
838}
839
840
d8d3f94a
PE
841/*------------------------------------------------------------------.
842| Convert universal character name UCN to a single-byte character, |
843| and return that character. Return -1 if UCN does not correspond |
844| to a single-byte character. |
845`------------------------------------------------------------------*/
846
847static int
848convert_ucn_to_byte (char const *ucn)
849{
850 unsigned long code = strtoul (ucn + 2, 0, 16);
851
852 /* FIXME: Currently we assume Unicode-compatible unibyte characters
853 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
854 non-ASCII hosts we support only the portable C character set.
855 These limitations should be removed once we add support for
856 multibyte characters. */
857
858 if (UCHAR_MAX < code)
859 return -1;
860
861#if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
862 {
863 /* A non-ASCII host. Use CODE to index into a table of the C
864 basic execution character set, which is guaranteed to exist on
865 all Standard C platforms. This table also includes '$', '@',
8e6ef483 866 and '`', which are not in the basic execution character set but
d8d3f94a
PE
867 which are unibyte characters on all the platforms that we know
868 about. */
869 static signed char const table[] =
870 {
871 '\0', -1, -1, -1, -1, -1, -1, '\a',
872 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
873 -1, -1, -1, -1, -1, -1, -1, -1,
874 -1, -1, -1, -1, -1, -1, -1, -1,
875 ' ', '!', '"', '#', '$', '%', '&', '\'',
876 '(', ')', '*', '+', ',', '-', '.', '/',
877 '0', '1', '2', '3', '4', '5', '6', '7',
878 '8', '9', ':', ';', '<', '=', '>', '?',
879 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
880 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
881 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
882 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
883 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
884 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
885 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
886 'x', 'y', 'z', '{', '|', '}', '~'
887 };
888
889 code = code < sizeof table ? table[code] : -1;
890 }
891#endif
c4d720cd 892
d8d3f94a
PE
893 return code;
894}
895
896
900c5db5
AD
897/*----------------------------------------------------------------.
898| Handle `#line INT "FILE"'. ARGS has already skipped `#line '. |
899`----------------------------------------------------------------*/
900
901static void
3f2d73f1 902handle_syncline (char *args)
900c5db5
AD
903{
904 int lineno = strtol (args, &args, 10);
905 const char *file = NULL;
906 file = strchr (args, '"') + 1;
907 *strchr (file, '"') = 0;
3f2d73f1
PE
908 scanner_cursor.file = current_file = xstrdup (file);
909 scanner_cursor.line = lineno;
910 scanner_cursor.column = 1;
900c5db5
AD
911}
912
a706a1cc 913
3f2d73f1
PE
914/*------------------------------------------------------------------------.
915| Report an unexpected EOF in a token or comment starting at START. |
916| An end of file was encountered and the expected TOKEN_END was missing. |
917| After reporting the problem, pretend that TOKEN_END was found. |
918`------------------------------------------------------------------------*/
a706a1cc
PE
919
920static void
3f2d73f1 921unexpected_end_of_file (boundary start, char const *token_end)
a706a1cc 922{
345532d7 923 size_t i = strlen (token_end);
a706a1cc 924
223ff46e
PE
925 location loc;
926 loc.start = start;
927 loc.end = scanner_cursor;
928 complain_at (loc, _("missing `%s' at end of file"), token_end);
345532d7 929
3f2d73f1
PE
930 /* Adjust scanner cursor so that any later message does not count
931 the characters about to be inserted. */
932 scanner_cursor.column -= i;
345532d7
PE
933
934 while (i != 0)
935 unput (token_end[--i]);
a706a1cc
PE
936}
937
938
f25bfb75
AD
939/*-------------------------.
940| Initialize the scanner. |
941`-------------------------*/
942
1d6412ad
AD
943void
944scanner_initialize (void)
945{
223ff46e 946 obstack_init (&obstack_for_string);
1d6412ad
AD
947}
948
949
f25bfb75
AD
950/*-----------------------------------------------.
951| Free all the memory allocated to the scanner. |
952`-----------------------------------------------*/
953
4cdb01db
AD
954void
955scanner_free (void)
956{
223ff46e 957 obstack_free (&obstack_for_string, 0);
536545f3
AD
958 /* Reclaim Flex's buffers. */
959 yy_delete_buffer (YY_CURRENT_BUFFER);
4cdb01db 960}