1 /* Bison Grammar Scanner -*- C -*-
3 Copyright (C) 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
5 This file is part of Bison, the GNU Compiler Compiler.
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2 of the License, or
10 (at your option) any later version.
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 %option debug nodefault nounput noyywrap never-interactive
24 %option prefix="gram_" outfile="lex.yy.c"
27 /* Work around a bug in flex 2.5.31. See Debian bug 333231
28 <http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=333231>. */
45 #define YY_USER_INIT \
48 scanner_cursor.file = current_file; \
49 scanner_cursor.line = 1; \
50 scanner_cursor.column = 1; \
51 code_start = scanner_cursor; \
55 /* Pacify "gcc -Wmissing-prototypes" when flex 2.5.31 is used. */
56 int gram_get_lineno (void);
57 FILE *gram_get_in (void);
58 FILE *gram_get_out (void);
59 int gram_get_leng (void);
60 char *gram_get_text (void);
61 void gram_set_lineno (int);
62 void gram_set_in (FILE *);
63 void gram_set_out (FILE *);
64 int gram_get_debug (void);
65 void gram_set_debug (int);
66 int gram_lex_destroy (void);
68 /* Location of scanner cursor. */
69 boundary scanner_cursor;
71 static void adjust_location (location *, char const *, size_t);
72 #define YY_USER_ACTION adjust_location (loc, yytext, yyleng);
74 static size_t no_cr_read (FILE *, char *, size_t);
75 #define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
78 /* OBSTACK_FOR_STRING -- Used to store all the characters that we need to
79 keep (to construct ID, STRINGS etc.). Use the following macros to
82 Use STRING_GROW to append what has just been matched, and
83 STRING_FINISH to end the string (it puts the ending 0).
84 STRING_FINISH also stores this string in LAST_STRING, which can be
85 used, and which is used by STRING_FREE to free the last string. */
87 static struct obstack obstack_for_string;
89 /* A string representing the most recently saved token. */
92 /* The location of the most recently saved token, if it was a
93 BRACED_CODE token; otherwise, this has an unspecified value. */
94 location last_braced_code_loc;
97 obstack_grow (&obstack_for_string, yytext, yyleng)
99 #define STRING_FINISH \
101 obstack_1grow (&obstack_for_string, '\0'); \
102 last_string = obstack_finish (&obstack_for_string); \
105 #define STRING_FREE \
106 obstack_free (&obstack_for_string, last_string)
109 scanner_last_string_free (void)
114 /* Within well-formed rules, RULE_LENGTH is the number of values in
115 the current rule so far, which says where to find `$0' with respect
116 to the top of the stack. It is not the same as the rule->length in
117 the case of mid rule actions.
119 Outside of well-formed rules, RULE_LENGTH has an undefined value. */
120 static int rule_length;
122 static void rule_length_overflow (location) __attribute__ ((__noreturn__));
124 /* Increment the rule length by one, checking for overflow. */
126 increment_rule_length (location loc)
130 /* Don't allow rule_length == INT_MAX, since that might cause
131 confusion with strtol if INT_MAX == LONG_MAX. */
132 if (rule_length == INT_MAX)
133 rule_length_overflow (loc);
136 static void handle_dollar (int token_type, char *cp, location loc);
137 static void handle_at (int token_type, char *cp, location loc);
138 static void handle_syncline (char *, location);
139 static unsigned long int scan_integer (char const *p, int base, location loc);
140 static int convert_ucn_to_byte (char const *hex_text);
141 static void unexpected_eof (boundary, char const *);
142 static void unexpected_newline (boundary, char const *);
145 %x SC_COMMENT SC_LINE_COMMENT SC_YACC_COMMENT
146 %x SC_STRING SC_CHARACTER
147 %x SC_AFTER_IDENTIFIER
148 %x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
149 %x SC_PRE_CODE SC_BRACED_CODE SC_PROLOGUE SC_EPILOGUE
151 letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
152 id {letter}({letter}|[0-9])*
153 directive %{letter}({letter}|[0-9]|-)*
156 /* POSIX says that a tag must be both an id and a C union member, but
157 historically almost any character is allowed in a tag. We disallow
158 NUL and newline, as this simplifies our implementation. */
161 /* Zero or more instances of backslash-newline. Following GCC, allow
162 white space between the backslash and the newline. */
163 splice (\\[ \f\t\v]*\n)*
167 /* Nesting level of the current code in braces. */
168 int braces_level IF_LINT (= 0);
170 /* Parent context state, when applicable. */
171 int context_state IF_LINT (= 0);
173 /* Token type to return, when applicable. */
174 int token_type IF_LINT (= 0);
176 /* Location of most recent identifier, when applicable. */
177 location id_loc IF_LINT (= empty_location);
179 /* Where containing code started, when applicable. Its initial
180 value is relevant only when yylex is invoked in the SC_EPILOGUE
182 boundary code_start = scanner_cursor;
184 /* Where containing comment or string or character literal started,
186 boundary token_start IF_LINT (= scanner_cursor);
190 /*-----------------------.
191 | Scanning white space. |
192 `-----------------------*/
194 <INITIAL,SC_AFTER_IDENTIFIER,SC_PRE_CODE>
196 /* Comments and white space. */
197 "," warn_at (*loc, _("stray `,' treated as white space"));
201 token_start = loc->start;
202 context_state = YY_START;
203 BEGIN SC_YACC_COMMENT;
206 /* #line directives are not documented, and may be withdrawn or
207 modified in future versions of Bison. */
208 ^"#line "{int}" \"".*"\"\n" {
209 handle_syncline (yytext + sizeof "#line " - 1, *loc);
214 /*----------------------------.
215 | Scanning Bison directives. |
216 `----------------------------*/
219 "%binary" return PERCENT_NONASSOC;
220 "%debug" return PERCENT_DEBUG;
221 "%default"[-_]"prec" return PERCENT_DEFAULT_PREC;
222 "%define" return PERCENT_DEFINE;
223 "%defines" return PERCENT_DEFINES;
224 "%destructor" token_type = PERCENT_DESTRUCTOR; BEGIN SC_PRE_CODE;
225 "%dprec" return PERCENT_DPREC;
226 "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE;
227 "%expect" return PERCENT_EXPECT;
228 "%expect"[-_]"rr" return PERCENT_EXPECT_RR;
229 "%file-prefix" return PERCENT_FILE_PREFIX;
230 "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC;
231 "%initial-action" token_type = PERCENT_INITIAL_ACTION; BEGIN SC_PRE_CODE;
232 "%glr-parser" return PERCENT_GLR_PARSER;
233 "%left" return PERCENT_LEFT;
234 "%lex-param" token_type = PERCENT_LEX_PARAM; BEGIN SC_PRE_CODE;
235 "%locations" return PERCENT_LOCATIONS;
236 "%merge" return PERCENT_MERGE;
237 "%name"[-_]"prefix" return PERCENT_NAME_PREFIX;
238 "%no"[-_]"default"[-_]"prec" return PERCENT_NO_DEFAULT_PREC;
239 "%no"[-_]"lines" return PERCENT_NO_LINES;
240 "%nonassoc" return PERCENT_NONASSOC;
241 "%nondeterministic-parser" return PERCENT_NONDETERMINISTIC_PARSER;
242 "%nterm" return PERCENT_NTERM;
243 "%output" return PERCENT_OUTPUT;
244 "%parse-param" token_type = PERCENT_PARSE_PARAM; BEGIN SC_PRE_CODE;
245 "%prec" rule_length--; return PERCENT_PREC;
246 "%printer" token_type = PERCENT_PRINTER; BEGIN SC_PRE_CODE;
247 "%pure"[-_]"parser" return PERCENT_PURE_PARSER;
248 "%require" return PERCENT_REQUIRE;
249 "%right" return PERCENT_RIGHT;
250 "%skeleton" return PERCENT_SKELETON;
251 "%start" return PERCENT_START;
252 "%term" return PERCENT_TOKEN;
253 "%token" return PERCENT_TOKEN;
254 "%token"[-_]"table" return PERCENT_TOKEN_TABLE;
255 "%type" return PERCENT_TYPE;
256 "%union" token_type = PERCENT_UNION; BEGIN SC_PRE_CODE;
257 "%verbose" return PERCENT_VERBOSE;
258 "%yacc" return PERCENT_YACC;
261 complain_at (*loc, _("invalid directive: %s"), quote (yytext));
265 "|" rule_length = 0; return PIPE;
266 ";" return SEMICOLON;
269 val->symbol = symbol_get (yytext, *loc);
271 increment_rule_length (*loc);
272 BEGIN SC_AFTER_IDENTIFIER;
276 val->integer = scan_integer (yytext, 10, *loc);
279 0[xX][0-9abcdefABCDEF]+ {
280 val->integer = scan_integer (yytext, 16, *loc);
284 /* Characters. We don't check there is only one. */
285 "'" STRING_GROW; token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
288 "\"" token_start = loc->start; BEGIN SC_ESCAPED_STRING;
291 "%{" code_start = loc->start; BEGIN SC_PROLOGUE;
293 /* Code in between braces. */
295 if (current_rule->action)
296 grammar_midrule_action ();
298 token_type = BRACED_CODE;
300 code_start = loc->start;
301 BEGIN SC_BRACED_CODE;
306 obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
308 val->uniqstr = uniqstr_new (last_string);
314 static int percent_percent_count;
315 if (++percent_percent_count == 2)
317 return PERCENT_PERCENT;
321 complain_at (*loc, _("invalid character: %s"), quote (yytext));
325 loc->start = loc->end = scanner_cursor;
331 /*-----------------------------------------------------------------.
332 | Scanning after an identifier, checking whether a colon is next. |
333 `-----------------------------------------------------------------*/
335 <SC_AFTER_IDENTIFIER>
344 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
358 /*---------------------------------------------------------------.
359 | Scanning a Yacc comment. The initial `/ *' is already eaten. |
360 `---------------------------------------------------------------*/
364 "*/" BEGIN context_state;
366 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
370 /*------------------------------------------------------------.
371 | Scanning a C comment. The initial `/ *' is already eaten. |
372 `------------------------------------------------------------*/
376 "*"{splice}"/" STRING_GROW; BEGIN context_state;
377 <<EOF>> unexpected_eof (token_start, "*/"); BEGIN context_state;
381 /*--------------------------------------------------------------.
382 | Scanning a line comment. The initial `//' is already eaten. |
383 `--------------------------------------------------------------*/
387 "\n" STRING_GROW; BEGIN context_state;
388 {splice} STRING_GROW;
389 <<EOF>> BEGIN context_state;
393 /*------------------------------------------------.
394 | Scanning a Bison string, including its escapes. |
395 | The initial quote is already eaten. |
396 `------------------------------------------------*/
402 loc->start = token_start;
403 val->chars = last_string;
404 increment_rule_length (*loc);
408 \n unexpected_newline (token_start, "\""); BEGIN INITIAL;
409 <<EOF>> unexpected_eof (token_start, "\""); BEGIN INITIAL;
412 /*----------------------------------------------------------.
413 | Scanning a Bison character literal, decoding its escapes. |
414 | The initial quote is already eaten. |
415 `----------------------------------------------------------*/
417 <SC_ESCAPED_CHARACTER>
420 unsigned char last_string_1;
423 loc->start = token_start;
424 val->symbol = symbol_get (quotearg_style (escape_quoting_style,
427 symbol_class_set (val->symbol, token_sym, *loc, false);
428 last_string_1 = last_string[1];
429 symbol_user_token_number_set (val->symbol, last_string_1, *loc);
431 increment_rule_length (*loc);
435 \n unexpected_newline (token_start, "'"); BEGIN INITIAL;
436 <<EOF>> unexpected_eof (token_start, "'"); BEGIN INITIAL;
439 <SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING>
441 \0 complain_at (*loc, _("invalid null character"));
445 /*----------------------------.
446 | Decode escaped characters. |
447 `----------------------------*/
449 <SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
452 unsigned long int c = strtoul (yytext + 1, NULL, 8);
454 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
456 complain_at (*loc, _("invalid null character: %s"), quote (yytext));
458 obstack_1grow (&obstack_for_string, c);
461 \\x[0-9abcdefABCDEF]+ {
462 verify (UCHAR_MAX < ULONG_MAX);
463 unsigned long int c = strtoul (yytext + 2, NULL, 16);
465 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
467 complain_at (*loc, _("invalid null character: %s"), quote (yytext));
469 obstack_1grow (&obstack_for_string, c);
472 \\a obstack_1grow (&obstack_for_string, '\a');
473 \\b obstack_1grow (&obstack_for_string, '\b');
474 \\f obstack_1grow (&obstack_for_string, '\f');
475 \\n obstack_1grow (&obstack_for_string, '\n');
476 \\r obstack_1grow (&obstack_for_string, '\r');
477 \\t obstack_1grow (&obstack_for_string, '\t');
478 \\v obstack_1grow (&obstack_for_string, '\v');
480 /* \\[\"\'?\\] would be shorter, but it confuses xgettext. */
481 \\("\""|"'"|"?"|"\\") obstack_1grow (&obstack_for_string, yytext[1]);
483 \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
484 int c = convert_ucn_to_byte (yytext);
486 complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
488 complain_at (*loc, _("invalid null character: %s"), quote (yytext));
490 obstack_1grow (&obstack_for_string, c);
493 complain_at (*loc, _("unrecognized escape sequence: %s"), quote (yytext));
498 /*--------------------------------------------.
499 | Scanning user-code characters and strings. |
500 `--------------------------------------------*/
502 <SC_CHARACTER,SC_STRING>
504 {splice}|\\{splice}[^\n$@\[\]] STRING_GROW;
509 "'" STRING_GROW; BEGIN context_state;
510 \n unexpected_newline (token_start, "'"); BEGIN context_state;
511 <<EOF>> unexpected_eof (token_start, "'"); BEGIN context_state;
516 "\"" STRING_GROW; BEGIN context_state;
517 \n unexpected_newline (token_start, "\""); BEGIN context_state;
518 <<EOF>> unexpected_eof (token_start, "\""); BEGIN context_state;
522 /*---------------------------------------------------.
523 | Strings, comments etc. can be found in user code. |
524 `---------------------------------------------------*/
526 <SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
530 context_state = YY_START;
531 token_start = loc->start;
536 context_state = YY_START;
537 token_start = loc->start;
542 context_state = YY_START;
543 token_start = loc->start;
548 context_state = YY_START;
549 BEGIN SC_LINE_COMMENT;
554 /*---------------------------------------------------------------.
555 | Scanning after %union etc., possibly followed by white space. |
556 | For %union only, allow arbitrary C code to appear before the |
557 | following brace, as an extension to POSIX. |
558 `---------------------------------------------------------------*/
563 bool valid = yytext[0] == '{' || token_type == PERCENT_UNION;
564 scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
570 code_start = loc->start;
571 BEGIN SC_BRACED_CODE;
575 complain_at (*loc, _("missing `{' in %s"),
576 token_name (token_type));
577 obstack_sgrow (&obstack_for_string, "{}");
579 val->chars = last_string;
585 <<EOF>> unexpected_eof (scanner_cursor, "{}"); BEGIN INITIAL;
589 /*---------------------------------------------------------------.
590 | Scanning some code in braces (%union and actions). The initial |
591 | "{" is already eaten. |
592 `---------------------------------------------------------------*/
596 "{"|"<"{splice}"%" STRING_GROW; braces_level++;
597 "%"{splice}">" STRING_GROW; braces_level--;
599 bool outer_brace = --braces_level < 0;
601 /* As an undocumented Bison extension, append `;' before the last
602 brace in braced code, so that the user code can omit trailing
603 `;'. But do not append `;' if emulating Yacc, since Yacc does
606 FIXME: Bison should warn if a semicolon seems to be necessary
607 here, and should omit the semicolon if it seems unnecessary
608 (e.g., after ';', '{', or '}', each followed by comments or
609 white space). Such a warning shouldn't depend on --yacc; it
610 should depend on a new --pedantic option, which would cause
611 Bison to warn if it detects an extension to POSIX. --pedantic
612 should also diagnose other Bison extensions like %yacc.
613 Perhaps there should also be a GCC-style --pedantic-errors
614 option, so that such warnings are diagnosed as errors. */
615 if (outer_brace && token_type == BRACED_CODE && ! yacc_flag)
616 obstack_1grow (&obstack_for_string, ';');
618 obstack_1grow (&obstack_for_string, '}');
623 loc->start = code_start;
624 val->chars = last_string;
625 increment_rule_length (*loc);
626 last_braced_code_loc = *loc;
632 /* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly
634 "<"{splice}"<" STRING_GROW;
636 "$"("<"{tag}">")?(-?[0-9]+|"$") handle_dollar (token_type, yytext, *loc);
637 "@"(-?[0-9]+|"$") handle_at (token_type, yytext, *loc);
640 warn_at (*loc, _("stray `$'"));
641 obstack_sgrow (&obstack_for_string, "$][");
644 warn_at (*loc, _("stray `@'"));
645 obstack_sgrow (&obstack_for_string, "@@");
648 <<EOF>> unexpected_eof (code_start, "}"); BEGIN INITIAL;
652 /*--------------------------------------------------------------.
653 | Scanning some prologue: from "%{" (already scanned) to "%}". |
654 `--------------------------------------------------------------*/
660 loc->start = code_start;
661 val->chars = last_string;
666 <<EOF>> unexpected_eof (code_start, "%}"); BEGIN INITIAL;
670 /*---------------------------------------------------------------.
671 | Scanning the epilogue (everything after the second "%%", which |
672 | has already been eaten). |
673 `---------------------------------------------------------------*/
679 loc->start = code_start;
680 val->chars = last_string;
687 /*-----------------------------------------.
688 | Escape M4 quoting characters in C code. |
689 `-----------------------------------------*/
691 <SC_COMMENT,SC_LINE_COMMENT,SC_STRING,SC_CHARACTER,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
693 \$ obstack_sgrow (&obstack_for_string, "$][");
694 \@ obstack_sgrow (&obstack_for_string, "@@");
695 \[ obstack_sgrow (&obstack_for_string, "@{");
696 \] obstack_sgrow (&obstack_for_string, "@}");
700 /*-----------------------------------------------------.
701 | By default, grow the string obstack with the input. |
702 `-----------------------------------------------------*/
704 <SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>. |
705 <SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>\n STRING_GROW;
709 /* Keeps track of the maximum number of semantic values to the left of
710 a handle (those referenced by $0, $-1, etc.) are required by the
711 semantic actions of this grammar. */
712 int max_left_semantic_context = 0;
714 /* If BUF is null, add BUFSIZE (which in this case must be less than
715 INT_MAX) to COLUMN; otherwise, add mbsnwidth (BUF, BUFSIZE, 0) to
716 COLUMN. If an overflow occurs, or might occur but is undetectable,
717 return INT_MAX. Assume COLUMN is nonnegative. */
720 add_column_width (int column, char const *buf, size_t bufsize)
723 unsigned int remaining_columns = INT_MAX - column;
727 if (INT_MAX / 2 <= bufsize)
729 width = mbsnwidth (buf, bufsize, 0);
734 return width <= remaining_columns ? column + width : INT_MAX;
737 /* Set *LOC and adjust scanner cursor to account for token TOKEN of
741 adjust_location (location *loc, char const *token, size_t size)
743 int line = scanner_cursor.line;
744 int column = scanner_cursor.column;
745 char const *p0 = token;
746 char const *p = token;
747 char const *lim = token + size;
749 loc->start = scanner_cursor;
751 for (p = token; p < lim; p++)
755 line += line < INT_MAX;
761 column = add_column_width (column, p0, p - p0);
762 column = add_column_width (column, NULL, 8 - ((column - 1) & 7));
770 scanner_cursor.line = line;
771 scanner_cursor.column = column = add_column_width (column, p0, p - p0);
773 loc->end = scanner_cursor;
775 if (line == INT_MAX && loc->start.line != INT_MAX)
776 warn_at (*loc, _("line number overflow"));
777 if (column == INT_MAX && loc->start.column != INT_MAX)
778 warn_at (*loc, _("column number overflow"));
782 /* Read bytes from FP into buffer BUF of size SIZE. Return the
783 number of bytes read. Remove '\r' from input, treating \r\n
784 and isolated \r as \n. */
787 no_cr_read (FILE *fp, char *buf, size_t size)
789 size_t bytes_read = fread (buf, 1, size, fp);
792 char *w = memchr (buf, '\r', bytes_read);
796 char const *lim = buf + bytes_read;
800 /* Found an '\r'. Treat it like '\n', but ignore any
801 '\n' that immediately follows. */
806 if (ch != '\n' && ungetc (ch, fp) != ch)
812 /* Copy until the next '\r'. */
818 while ((*w++ = *r++) != '\r');
829 /*------------------------------------------------------------------.
830 | TEXT is pointing to a wannabee semantic value (i.e., a `$'). |
832 | Possible inputs: $[<TYPENAME>]($|integer) |
834 | Output to OBSTACK_FOR_STRING a reference to this semantic value. |
835 `------------------------------------------------------------------*/
838 handle_action_dollar (char *text, location loc)
840 const char *type_name = NULL;
846 /* Get the type name if explicit. */
859 type_name = symbol_list_n_type_name_get (current_rule, loc, 0);
860 if (!type_name && typed)
861 complain_at (loc, _("$$ of `%s' has no declared type"),
862 current_rule->sym->tag);
865 obstack_fgrow1 (&obstack_for_string,
866 "]b4_lhs_value([%s])[", type_name);
867 current_rule->used = true;
871 long int num = strtol (cp, NULL, 10);
873 if (1 - INT_MAX + rule_length <= num && num <= rule_length)
876 if (max_left_semantic_context < 1 - n)
877 max_left_semantic_context = 1 - n;
878 if (!type_name && 0 < n)
879 type_name = symbol_list_n_type_name_get (current_rule, loc, n);
880 if (!type_name && typed)
881 complain_at (loc, _("$%d of `%s' has no declared type"),
882 n, current_rule->sym->tag);
885 obstack_fgrow3 (&obstack_for_string,
886 "]b4_rhs_value(%d, %d, [%s])[",
887 rule_length, n, type_name);
888 symbol_list_n_used_set (current_rule, n, true);
891 complain_at (loc, _("integer out of range: %s"), quote (text));
898 /*----------------------------------------------------------------.
899 | Map `$?' onto the proper M4 symbol, depending on its TOKEN_TYPE |
900 | (are we in an action?). |
901 `----------------------------------------------------------------*/
904 handle_dollar (int token_type, char *text, location loc)
909 if (handle_action_dollar (text, loc))
913 case PERCENT_DESTRUCTOR:
914 case PERCENT_INITIAL_ACTION:
915 case PERCENT_PRINTER:
918 obstack_sgrow (&obstack_for_string, "]b4_dollar_dollar[");
927 complain_at (loc, _("invalid value: %s"), quote (text));
931 /*------------------------------------------------------.
932 | TEXT is a location token (i.e., a `@...'). Output to |
933 | OBSTACK_FOR_STRING a reference to this location. |
934 `------------------------------------------------------*/
937 handle_action_at (char *text, location loc)
940 locations_flag = true;
946 obstack_sgrow (&obstack_for_string, "]b4_lhs_location[");
949 long int num = strtol (cp, NULL, 10);
951 if (1 - INT_MAX + rule_length <= num && num <= rule_length)
954 obstack_fgrow2 (&obstack_for_string, "]b4_rhs_location(%d, %d)[",
958 complain_at (loc, _("integer out of range: %s"), quote (text));
965 /*----------------------------------------------------------------.
966 | Map `@?' onto the proper M4 symbol, depending on its TOKEN_TYPE |
967 | (are we in an action?). |
968 `----------------------------------------------------------------*/
971 handle_at (int token_type, char *text, location loc)
976 handle_action_at (text, loc);
979 case PERCENT_INITIAL_ACTION:
980 case PERCENT_DESTRUCTOR:
981 case PERCENT_PRINTER:
984 obstack_sgrow (&obstack_for_string, "]b4_at_dollar[");
993 complain_at (loc, _("invalid value: %s"), quote (text));
997 /*------------------------------------------------------.
998 | Scan NUMBER for a base-BASE integer at location LOC. |
999 `------------------------------------------------------*/
1001 static unsigned long int
1002 scan_integer (char const *number, int base, location loc)
1004 verify (INT_MAX < ULONG_MAX);
1005 unsigned long int num = strtoul (number, NULL, base);
1009 complain_at (loc, _("integer out of range: %s"), quote (number));
1017 /*------------------------------------------------------------------.
1018 | Convert universal character name UCN to a single-byte character, |
1019 | and return that character. Return -1 if UCN does not correspond |
1020 | to a single-byte character. |
1021 `------------------------------------------------------------------*/
1024 convert_ucn_to_byte (char const *ucn)
1026 verify (UCHAR_MAX <= INT_MAX);
1027 unsigned long int code = strtoul (ucn + 2, NULL, 16);
1029 /* FIXME: Currently we assume Unicode-compatible unibyte characters
1030 on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On
1031 non-ASCII hosts we support only the portable C character set.
1032 These limitations should be removed once we add support for
1033 multibyte characters. */
1035 if (UCHAR_MAX < code)
1038 #if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
1040 /* A non-ASCII host. Use CODE to index into a table of the C
1041 basic execution character set, which is guaranteed to exist on
1042 all Standard C platforms. This table also includes '$', '@',
1043 and '`', which are not in the basic execution character set but
1044 which are unibyte characters on all the platforms that we know
1046 static signed char const table[] =
1048 '\0', -1, -1, -1, -1, -1, -1, '\a',
1049 '\b', '\t', '\n', '\v', '\f', '\r', -1, -1,
1050 -1, -1, -1, -1, -1, -1, -1, -1,
1051 -1, -1, -1, -1, -1, -1, -1, -1,
1052 ' ', '!', '"', '#', '$', '%', '&', '\'',
1053 '(', ')', '*', '+', ',', '-', '.', '/',
1054 '0', '1', '2', '3', '4', '5', '6', '7',
1055 '8', '9', ':', ';', '<', '=', '>', '?',
1056 '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
1057 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
1058 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
1059 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
1060 '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
1061 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
1062 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
1063 'x', 'y', 'z', '{', '|', '}', '~'
1066 code = code < sizeof table ? table[code] : -1;
1074 /*----------------------------------------------------------------.
1075 | Handle `#line INT "FILE"'. ARGS has already skipped `#line '. |
1076 `----------------------------------------------------------------*/
1079 handle_syncline (char *args, location loc)
1082 unsigned long int lineno = strtoul (args, &after_num, 10);
1083 char *file = strchr (after_num, '"') + 1;
1084 *strchr (file, '"') = '\0';
1085 if (INT_MAX <= lineno)
1087 warn_at (loc, _("line number overflow"));
1090 scanner_cursor.file = current_file = uniqstr_new (file);
1091 scanner_cursor.line = lineno;
1092 scanner_cursor.column = 1;
1096 /*---------------------------------.
1097 | Report a rule that is too long. |
1098 `---------------------------------*/
1101 rule_length_overflow (location loc)
1103 fatal_at (loc, _("rule is too long"));
1107 /*----------------------------------------------------------------.
1108 | For a token or comment starting at START, report message MSGID, |
1109 | which should say that an end marker was found before |
1110 | the expected TOKEN_END. |
1111 `----------------------------------------------------------------*/
1114 unexpected_end (boundary start, char const *msgid, char const *token_end)
1118 loc.end = scanner_cursor;
1119 complain_at (loc, _(msgid), token_end);
1123 /*------------------------------------------------------------------------.
1124 | Report an unexpected EOF in a token or comment starting at START. |
1125 | An end of file was encountered and the expected TOKEN_END was missing. |
1126 `------------------------------------------------------------------------*/
1129 unexpected_eof (boundary start, char const *token_end)
1131 unexpected_end (start, N_("missing `%s' at end of file"), token_end);
1135 /*----------------------------------------.
1136 | Likewise, but for unexpected newlines. |
1137 `----------------------------------------*/
1140 unexpected_newline (boundary start, char const *token_end)
1142 unexpected_end (start, N_("missing `%s' at end of line"), token_end);
1146 /*-------------------------.
1147 | Initialize the scanner. |
1148 `-------------------------*/
1151 scanner_initialize (void)
1153 obstack_init (&obstack_for_string);
1157 /*-----------------------------------------------.
1158 | Free all the memory allocated to the scanner. |
1159 `-----------------------------------------------*/
1164 obstack_free (&obstack_for_string, 0);
1165 /* Reclaim Flex's buffers. */
1166 yy_delete_buffer (YY_CURRENT_BUFFER);