]>
Commit | Line | Data |
---|---|---|
e9955c83 AD |
1 | /* Bison Grammar Scanner -*- C -*- |
2 | Copyright (C) 2002 Free Software Foundation, Inc. | |
3 | ||
4 | This file is part of Bison, the GNU Compiler Compiler. | |
5 | ||
6 | This program is free software; you can redistribute it and/or modify | |
7 | it under the terms of the GNU General Public License as published by | |
8 | the Free Software Foundation; either version 2 of the License, or | |
9 | (at your option) any later version. | |
10 | ||
11 | This program is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | GNU General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU General Public License | |
17 | along with this program; if not, write to the Free Software | |
18 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA | |
19 | 02111-1307 USA | |
20 | */ | |
21 | ||
a706a1cc | 22 | %option debug nodefault noyywrap never-interactive |
e9955c83 AD |
23 | %option prefix="gram_" outfile="lex.yy.c" |
24 | ||
25 | %{ | |
26 | #include "system.h" | |
d8d3f94a | 27 | #include "mbswidth.h" |
e9955c83 AD |
28 | #include "complain.h" |
29 | #include "quote.h" | |
3e6656f9 | 30 | #include "struniq.h" |
e9955c83 AD |
31 | #include "getargs.h" |
32 | #include "gram.h" | |
33 | #include "reader.h" | |
34 | ||
35 | /* Each time we match a string, move the end cursor to its end. */ | |
8efe435c AD |
36 | #define YY_USER_INIT \ |
37 | do { \ | |
38 | LOCATION_RESET (*yylloc); \ | |
95612cfa | 39 | yylloc->file = current_file; \ |
8efe435c AD |
40 | /* This is only to avoid GCC warnings. */ \ |
41 | if (yycontrol) {;}; \ | |
42 | } while (0) | |
43 | ||
d8d3f94a PE |
44 | #define YY_USER_ACTION extend_location (yylloc, yytext, yyleng); |
45 | #define YY_STEP LOCATION_STEP (*yylloc) | |
46 | ||
47 | #define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size)) | |
48 | ||
49 | ||
50 | /* Read bytes from FP into buffer BUF of size SIZE. Return the | |
51 | number of bytes read. Remove '\r' from input, treating \r\n | |
52 | and isolated \r as \n. */ | |
53 | ||
54 | static size_t | |
55 | no_cr_read (FILE *fp, char *buf, size_t size) | |
56 | { | |
57 | size_t s = fread (buf, 1, size, fp); | |
58 | if (s) | |
59 | { | |
60 | char *w = memchr (buf, '\r', s); | |
61 | if (w) | |
62 | { | |
63 | char const *r = ++w; | |
64 | char const *lim = buf + s; | |
65 | ||
66 | for (;;) | |
67 | { | |
68 | /* Found an '\r'. Treat it like '\n', but ignore any | |
69 | '\n' that immediately follows. */ | |
70 | w[-1] = '\n'; | |
71 | if (r == lim) | |
72 | { | |
73 | int ch = getc (fp); | |
74 | if (ch != '\n' && ungetc (ch, fp) != ch) | |
75 | break; | |
76 | } | |
77 | else if (*r == '\n') | |
78 | r++; | |
79 | ||
80 | /* Copy until the next '\r'. */ | |
81 | do | |
82 | { | |
83 | if (r == lim) | |
84 | return w - buf; | |
85 | } | |
86 | while ((*w++ = *r++) != '\r'); | |
87 | } | |
88 | ||
89 | return w - buf; | |
90 | } | |
91 | } | |
92 | ||
93 | return s; | |
94 | } | |
95 | ||
96 | ||
97 | /* Extend *LOC to account for token TOKEN of size SIZE. */ | |
98 | ||
99 | static void | |
100 | extend_location (location_t *loc, char const *token, int size) | |
101 | { | |
102 | int line = loc->last_line; | |
103 | int column = loc->last_column; | |
104 | char const *p0 = token; | |
105 | char const *p = token; | |
106 | char const *lim = token + size; | |
107 | ||
108 | for (p = token; p < lim; p++) | |
109 | switch (*p) | |
110 | { | |
111 | case '\r': | |
112 | /* \r shouldn't survive no_cr_read. */ | |
113 | abort (); | |
114 | ||
115 | case '\n': | |
116 | line++; | |
117 | column = 1; | |
118 | p0 = p + 1; | |
119 | break; | |
120 | ||
121 | case '\t': | |
122 | column += mbsnwidth (p0, p - p0, 0); | |
123 | column += 8 - ((column - 1) & 7); | |
124 | p0 = p + 1; | |
125 | break; | |
126 | } | |
127 | ||
128 | loc->last_line = line; | |
129 | loc->last_column = column + mbsnwidth (p0, p - p0, 0); | |
130 | } | |
131 | ||
132 | ||
e9955c83 | 133 | |
44995b2e AD |
134 | /* STRING_OBSTACK -- Used to store all the characters that we need to |
135 | keep (to construct ID, STRINGS etc.). Use the following macros to | |
136 | use it. | |
137 | ||
1d6412ad AD |
138 | Use YY_OBS_GROW to append what has just been matched, and |
139 | YY_OBS_FINISH to end the string (it puts the ending 0). | |
140 | YY_OBS_FINISH also stores this string in LAST_STRING, which can be | |
141 | used, and which is used by YY_OBS_FREE to free the last string. */ | |
44995b2e AD |
142 | |
143 | static struct obstack string_obstack; | |
44995b2e | 144 | |
44995b2e AD |
145 | #define YY_OBS_GROW \ |
146 | obstack_grow (&string_obstack, yytext, yyleng) | |
147 | ||
148 | #define YY_OBS_FINISH \ | |
149 | do { \ | |
150 | obstack_1grow (&string_obstack, '\0'); \ | |
151 | last_string = obstack_finish (&string_obstack); \ | |
44995b2e AD |
152 | } while (0) |
153 | ||
a706a1cc PE |
154 | #define YY_OBS_FREE \ |
155 | obstack_free (&string_obstack, last_string) | |
e9955c83 | 156 | |
e9955c83 | 157 | |
efcb44dd PE |
158 | /* Within well-formed rules, RULE_LENGTH is the number of values in |
159 | the current rule so far, which says where to find `$0' with respect | |
160 | to the top of the stack. It is not the same as the rule->length in | |
161 | the case of mid rule actions. | |
162 | ||
163 | Outside of well-formed rules, RULE_LENGTH has an undefined value. */ | |
164 | static int rule_length; | |
165 | ||
d33cb3ae PE |
166 | static void handle_dollar (braced_code_t code_kind, |
167 | char *cp, location_t location); | |
168 | static void handle_at (braced_code_t code_kind, | |
169 | char *cp, location_t location); | |
900c5db5 | 170 | static void handle_syncline (char *args, location_t *location); |
d8d3f94a | 171 | static int convert_ucn_to_byte (char const *hex_text); |
345532d7 | 172 | static void unexpected_end_of_file (location_t *, char const *); |
e9955c83 AD |
173 | |
174 | %} | |
d8d3f94a | 175 | %x SC_COMMENT SC_LINE_COMMENT SC_YACC_COMMENT |
e9955c83 AD |
176 | %x SC_STRING SC_CHARACTER |
177 | %x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER | |
178 | %x SC_BRACED_CODE SC_PROLOGUE SC_EPILOGUE | |
179 | ||
29c01725 AD |
180 | letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_] |
181 | id {letter}({letter}|[0-9])* | |
182 | directive %{letter}({letter}|[0-9]|-)* | |
183 | int [0-9]+ | |
d8d3f94a PE |
184 | |
185 | /* POSIX says that a tag must be both an id and a C union member, but | |
186 | historically almost any character is allowed in a tag. We disallow | |
187 | NUL and newline, as this simplifies our implementation. */ | |
188 | tag [^\0\n>]+ | |
189 | ||
190 | /* Zero or more instances of backslash-newline. Following GCC, allow | |
191 | white space between the backslash and the newline. */ | |
192 | splice (\\[ \f\t\v]*\n)* | |
e9955c83 AD |
193 | |
194 | %% | |
195 | %{ | |
a706a1cc | 196 | /* Nesting level of the current code in braces. */ |
1a9e39f1 PE |
197 | int braces_level IF_LINT (= 0); |
198 | ||
a706a1cc PE |
199 | /* Scanner context when scanning C code. */ |
200 | int c_context IF_LINT (= 0); | |
201 | ||
202 | /* A string representing the most recently saved token. */ | |
203 | char *last_string; | |
204 | ||
e9955c83 AD |
205 | /* At each yylex invocation, mark the current position as the |
206 | start of the next token. */ | |
e9955c83 | 207 | YY_STEP; |
e9955c83 AD |
208 | %} |
209 | ||
210 | ||
211 | /*----------------------------. | |
212 | | Scanning Bison directives. | | |
213 | `----------------------------*/ | |
214 | <INITIAL> | |
215 | { | |
216 | "%binary" return PERCENT_NONASSOC; | |
217 | "%debug" return PERCENT_DEBUG; | |
218 | "%define" return PERCENT_DEFINE; | |
219 | "%defines" return PERCENT_DEFINES; | |
9280d3ef | 220 | "%destructor" return PERCENT_DESTRUCTOR; |
676385e2 | 221 | "%dprec" return PERCENT_DPREC; |
e9955c83 AD |
222 | "%error"[-_]"verbose" return PERCENT_ERROR_VERBOSE; |
223 | "%expect" return PERCENT_EXPECT; | |
224 | "%file-prefix" return PERCENT_FILE_PREFIX; | |
225 | "%fixed"[-_]"output"[-_]"files" return PERCENT_YACC; | |
ae7453f2 | 226 | "%glr-parser" return PERCENT_GLR_PARSER; |
e9955c83 AD |
227 | "%left" return PERCENT_LEFT; |
228 | "%locations" return PERCENT_LOCATIONS; | |
676385e2 | 229 | "%merge" return PERCENT_MERGE; |
e9955c83 AD |
230 | "%name"[-_]"prefix" return PERCENT_NAME_PREFIX; |
231 | "%no"[-_]"lines" return PERCENT_NO_LINES; | |
232 | "%nonassoc" return PERCENT_NONASSOC; | |
233 | "%nterm" return PERCENT_NTERM; | |
234 | "%output" return PERCENT_OUTPUT; | |
ae7453f2 | 235 | "%parse-param" return PERCENT_PARSE_PARAM; |
d8d3f94a | 236 | "%prec" rule_length--; return PERCENT_PREC; |
366eea36 | 237 | "%printer" return PERCENT_PRINTER; |
e9955c83 AD |
238 | "%pure"[-_]"parser" return PERCENT_PURE_PARSER; |
239 | "%right" return PERCENT_RIGHT; | |
ae7453f2 | 240 | "%lex-param" return PERCENT_LEX_PARAM; |
e9955c83 AD |
241 | "%skeleton" return PERCENT_SKELETON; |
242 | "%start" return PERCENT_START; | |
243 | "%term" return PERCENT_TOKEN; | |
244 | "%token" return PERCENT_TOKEN; | |
245 | "%token"[-_]"table" return PERCENT_TOKEN_TABLE; | |
246 | "%type" return PERCENT_TYPE; | |
247 | "%union" return PERCENT_UNION; | |
248 | "%verbose" return PERCENT_VERBOSE; | |
249 | "%yacc" return PERCENT_YACC; | |
250 | ||
29c01725 AD |
251 | {directive} { |
252 | complain_at (*yylloc, _("invalid directive: %s"), quote (yytext)); | |
253 | YY_STEP; | |
254 | } | |
255 | ||
900c5db5 AD |
256 | ^"#line "{int}" \""[^\"]*"\"\n" handle_syncline (yytext + strlen ("#line "), yylloc); YY_STEP; |
257 | ||
e9955c83 | 258 | "=" return EQUAL; |
d8d3f94a PE |
259 | ":" rule_length = 0; return COLON; |
260 | "|" rule_length = 0; return PIPE; | |
ae7453f2 | 261 | "," return COMMA; |
e9955c83 AD |
262 | ";" return SEMICOLON; |
263 | ||
a706a1cc | 264 | [ \f\n\t\v] YY_STEP; |
d8d3f94a | 265 | |
e9955c83 | 266 | {id} { |
39f41916 | 267 | yylval->symbol = symbol_get (yytext, *yylloc); |
efcb44dd | 268 | rule_length++; |
e9955c83 AD |
269 | return ID; |
270 | } | |
271 | ||
d8d3f94a PE |
272 | {int} { |
273 | unsigned long num; | |
274 | errno = 0; | |
275 | num = strtoul (yytext, 0, 10); | |
276 | if (INT_MAX < num || errno) | |
277 | { | |
98f2caaa | 278 | complain_at (*yylloc, _("integer out of range: %s"), quote (yytext)); |
d8d3f94a PE |
279 | num = INT_MAX; |
280 | } | |
281 | yylval->integer = num; | |
282 | return INT; | |
283 | } | |
e9955c83 AD |
284 | |
285 | /* Characters. We don't check there is only one. */ | |
a706a1cc | 286 | "'" YY_OBS_GROW; BEGIN SC_ESCAPED_CHARACTER; |
e9955c83 AD |
287 | |
288 | /* Strings. */ | |
a706a1cc | 289 | "\"" YY_OBS_GROW; BEGIN SC_ESCAPED_STRING; |
e9955c83 AD |
290 | |
291 | /* Comments. */ | |
d8d3f94a | 292 | "/*" BEGIN SC_YACC_COMMENT; |
e9955c83 AD |
293 | "//".* YY_STEP; |
294 | ||
295 | /* Prologue. */ | |
a706a1cc | 296 | "%{" BEGIN SC_PROLOGUE; |
e9955c83 AD |
297 | |
298 | /* Code in between braces. */ | |
a706a1cc | 299 | "{" YY_OBS_GROW; braces_level = 0; BEGIN SC_BRACED_CODE; |
e9955c83 AD |
300 | |
301 | /* A type. */ | |
d8d3f94a | 302 | "<"{tag}">" { |
4cdb01db AD |
303 | obstack_grow (&string_obstack, yytext + 1, yyleng - 2); |
304 | YY_OBS_FINISH; | |
3e6656f9 AD |
305 | yylval->struniq = struniq_new (last_string); |
306 | YY_OBS_FREE; | |
4cdb01db AD |
307 | return TYPE; |
308 | } | |
309 | ||
a706a1cc PE |
310 | "%%" { |
311 | static int percent_percent_count; | |
e9955c83 | 312 | if (++percent_percent_count == 2) |
a706a1cc | 313 | BEGIN SC_EPILOGUE; |
e9955c83 AD |
314 | return PERCENT_PERCENT; |
315 | } | |
316 | ||
a706a1cc | 317 | . { |
c4d720cd | 318 | complain_at (*yylloc, _("invalid character: %s"), quote (yytext)); |
e9955c83 AD |
319 | YY_STEP; |
320 | } | |
321 | } | |
322 | ||
323 | ||
d8d3f94a PE |
324 | /*---------------------------------------------------------------. |
325 | | Scanning a Yacc comment. The initial `/ *' is already eaten. | | |
326 | `---------------------------------------------------------------*/ | |
e9955c83 | 327 | |
d8d3f94a | 328 | <SC_YACC_COMMENT> |
e9955c83 | 329 | { |
d8d3f94a PE |
330 | "*/" { |
331 | YY_STEP; | |
332 | BEGIN INITIAL; | |
e9955c83 AD |
333 | } |
334 | ||
a706a1cc | 335 | .|\n ; |
345532d7 | 336 | <<EOF>> unexpected_end_of_file (yylloc, "*/"); |
d8d3f94a PE |
337 | } |
338 | ||
339 | ||
340 | /*------------------------------------------------------------. | |
341 | | Scanning a C comment. The initial `/ *' is already eaten. | | |
342 | `------------------------------------------------------------*/ | |
343 | ||
344 | <SC_COMMENT> | |
345 | { | |
a706a1cc | 346 | "*"{splice}"/" YY_OBS_GROW; BEGIN c_context; |
345532d7 | 347 | <<EOF>> unexpected_end_of_file (yylloc, "*/"); |
e9955c83 AD |
348 | } |
349 | ||
350 | ||
d8d3f94a PE |
351 | /*--------------------------------------------------------------. |
352 | | Scanning a line comment. The initial `//' is already eaten. | | |
353 | `--------------------------------------------------------------*/ | |
354 | ||
355 | <SC_LINE_COMMENT> | |
356 | { | |
a706a1cc PE |
357 | "\n" YY_OBS_GROW; BEGIN c_context; |
358 | {splice} YY_OBS_GROW; | |
359 | <<EOF>> BEGIN c_context; | |
d8d3f94a PE |
360 | } |
361 | ||
362 | ||
e9955c83 AD |
363 | /*----------------------------------------------------------------. |
364 | | Scanning a C string, including its escapes. The initial `"' is | | |
365 | | already eaten. | | |
366 | `----------------------------------------------------------------*/ | |
367 | ||
368 | <SC_ESCAPED_STRING> | |
369 | { | |
db2cc12f | 370 | "\"" { |
44995b2e AD |
371 | YY_OBS_GROW; |
372 | YY_OBS_FINISH; | |
4cdb01db | 373 | yylval->string = last_string; |
efcb44dd | 374 | rule_length++; |
a706a1cc | 375 | BEGIN INITIAL; |
e9955c83 AD |
376 | return STRING; |
377 | } | |
378 | ||
a706a1cc | 379 | .|\n YY_OBS_GROW; |
345532d7 | 380 | <<EOF>> unexpected_end_of_file (yylloc, "\""); |
e9955c83 AD |
381 | } |
382 | ||
383 | /*---------------------------------------------------------------. | |
384 | | Scanning a C character, decoding its escapes. The initial "'" | | |
385 | | is already eaten. | | |
386 | `---------------------------------------------------------------*/ | |
387 | ||
388 | <SC_ESCAPED_CHARACTER> | |
389 | { | |
db2cc12f | 390 | "'" { |
44995b2e | 391 | YY_OBS_GROW; |
44995b2e | 392 | YY_OBS_FINISH; |
a706a1cc PE |
393 | yylval->symbol = symbol_get (last_string, *yylloc); |
394 | symbol_class_set (yylval->symbol, token_sym, *yylloc); | |
395 | symbol_user_token_number_set (yylval->symbol, | |
396 | (unsigned char) last_string[1], *yylloc); | |
397 | YY_OBS_FREE; | |
398 | rule_length++; | |
399 | BEGIN INITIAL; | |
400 | return ID; | |
e9955c83 | 401 | } |
a706a1cc PE |
402 | |
403 | .|\n YY_OBS_GROW; | |
345532d7 | 404 | <<EOF>> unexpected_end_of_file (yylloc, "'"); |
e9955c83 AD |
405 | } |
406 | ||
407 | ||
408 | /*----------------------------. | |
409 | | Decode escaped characters. | | |
410 | `----------------------------*/ | |
411 | ||
412 | <SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER> | |
413 | { | |
d8d3f94a PE |
414 | \\[0-7]{1,3} { |
415 | unsigned long c = strtoul (yytext + 1, 0, 8); | |
416 | if (UCHAR_MAX < c) | |
e9955c83 | 417 | { |
98f2caaa PE |
418 | complain_at (*yylloc, _("invalid escape sequence: %s"), |
419 | quote (yytext)); | |
e9955c83 AD |
420 | YY_STEP; |
421 | } | |
422 | else | |
423 | obstack_1grow (&string_obstack, c); | |
424 | } | |
425 | ||
d8d3f94a PE |
426 | \\x[0-9a-fA-F]+ { |
427 | unsigned long c; | |
428 | errno = 0; | |
429 | c = strtoul (yytext + 2, 0, 16); | |
430 | if (UCHAR_MAX < c || errno) | |
431 | { | |
98f2caaa PE |
432 | complain_at (*yylloc, _("invalid escape sequence: %s"), |
433 | quote (yytext)); | |
d8d3f94a PE |
434 | YY_STEP; |
435 | } | |
436 | else | |
437 | obstack_1grow (&string_obstack, c); | |
e9955c83 AD |
438 | } |
439 | ||
440 | \\a obstack_1grow (&string_obstack, '\a'); | |
441 | \\b obstack_1grow (&string_obstack, '\b'); | |
442 | \\f obstack_1grow (&string_obstack, '\f'); | |
443 | \\n obstack_1grow (&string_obstack, '\n'); | |
444 | \\r obstack_1grow (&string_obstack, '\r'); | |
445 | \\t obstack_1grow (&string_obstack, '\t'); | |
446 | \\v obstack_1grow (&string_obstack, '\v'); | |
c4d720cd | 447 | \\[\"\'?\\] obstack_1grow (&string_obstack, yytext[1]); |
d8d3f94a PE |
448 | \\(u|U[0-9a-fA-F]{4})[0-9a-fA-F]{4} { |
449 | int c = convert_ucn_to_byte (yytext); | |
450 | if (c < 0) | |
451 | { | |
98f2caaa PE |
452 | complain_at (*yylloc, _("invalid escape sequence: %s"), |
453 | quote (yytext)); | |
d8d3f94a PE |
454 | YY_STEP; |
455 | } | |
456 | else | |
457 | obstack_1grow (&string_obstack, c); | |
458 | } | |
4f25ebb0 | 459 | \\(.|\n) { |
98f2caaa PE |
460 | complain_at (*yylloc, _("unrecognized escape sequence: %s"), |
461 | quote (yytext)); | |
44995b2e | 462 | YY_OBS_GROW; |
e9955c83 AD |
463 | } |
464 | } | |
465 | ||
466 | ||
467 | /*----------------------------------------------------------. | |
468 | | Scanning a C character without decoding its escapes. The | | |
469 | | initial "'" is already eaten. | | |
470 | `----------------------------------------------------------*/ | |
471 | ||
472 | <SC_CHARACTER> | |
473 | { | |
ac060e78 PE |
474 | "'" YY_OBS_GROW; BEGIN c_context; |
475 | \\{splice}[\'\\] YY_OBS_GROW; | |
476 | <<EOF>> unexpected_end_of_file (yylloc, "'"); | |
e9955c83 AD |
477 | } |
478 | ||
479 | ||
480 | /*----------------------------------------------------------------. | |
481 | | Scanning a C string, without decoding its escapes. The initial | | |
482 | | `"' is already eaten. | | |
483 | `----------------------------------------------------------------*/ | |
484 | ||
485 | <SC_STRING> | |
486 | { | |
ac060e78 PE |
487 | "\"" YY_OBS_GROW; BEGIN c_context; |
488 | \\{splice}[\"\\] YY_OBS_GROW; | |
489 | <<EOF>> unexpected_end_of_file (yylloc, "\""); | |
e9955c83 AD |
490 | } |
491 | ||
492 | ||
493 | /*---------------------------------------------------. | |
494 | | Strings, comments etc. can be found in user code. | | |
495 | `---------------------------------------------------*/ | |
496 | ||
497 | <SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE> | |
498 | { | |
a706a1cc PE |
499 | "'" YY_OBS_GROW; c_context = YY_START; BEGIN SC_CHARACTER; |
500 | "\"" YY_OBS_GROW; c_context = YY_START; BEGIN SC_STRING; | |
501 | "/"{splice}"*" YY_OBS_GROW; c_context = YY_START; BEGIN SC_COMMENT; | |
502 | "/"{splice}"/" YY_OBS_GROW; c_context = YY_START; BEGIN SC_LINE_COMMENT; | |
e9955c83 AD |
503 | } |
504 | ||
505 | ||
506 | /*---------------------------------------------------------------. | |
507 | | Scanning some code in braces (%union and actions). The initial | | |
508 | | "{" is already eaten. | | |
509 | `---------------------------------------------------------------*/ | |
510 | ||
511 | <SC_BRACED_CODE> | |
512 | { | |
1a9e39f1 PE |
513 | "{"|"<"{splice}"%" YY_OBS_GROW; braces_level++; |
514 | "%"{splice}">" YY_OBS_GROW; braces_level--; | |
e9955c83 | 515 | "}" { |
44995b2e | 516 | YY_OBS_GROW; |
1a9e39f1 PE |
517 | braces_level--; |
518 | if (braces_level < 0) | |
e9955c83 | 519 | { |
44995b2e | 520 | YY_OBS_FINISH; |
4cdb01db | 521 | yylval->string = last_string; |
efcb44dd | 522 | rule_length++; |
a706a1cc | 523 | BEGIN INITIAL; |
e9955c83 AD |
524 | return BRACED_CODE; |
525 | } | |
526 | } | |
527 | ||
a706a1cc PE |
528 | /* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly |
529 | (as `<' `<%'). */ | |
530 | "<"{splice}"<" YY_OBS_GROW; | |
531 | ||
d8d3f94a | 532 | "$"("<"{tag}">")?(-?[0-9]+|"$") { handle_dollar (current_braced_code, |
f25bfb75 AD |
533 | yytext, *yylloc); } |
534 | "@"(-?[0-9]+|"$") { handle_at (current_braced_code, | |
535 | yytext, *yylloc); } | |
e9955c83 | 536 | |
345532d7 | 537 | <<EOF>> unexpected_end_of_file (yylloc, "}"); |
e9955c83 AD |
538 | } |
539 | ||
540 | ||
541 | /*--------------------------------------------------------------. | |
542 | | Scanning some prologue: from "%{" (already scanned) to "%}". | | |
543 | `--------------------------------------------------------------*/ | |
544 | ||
545 | <SC_PROLOGUE> | |
546 | { | |
547 | "%}" { | |
44995b2e | 548 | YY_OBS_FINISH; |
4cdb01db | 549 | yylval->string = last_string; |
a706a1cc | 550 | BEGIN INITIAL; |
e9955c83 AD |
551 | return PROLOGUE; |
552 | } | |
553 | ||
345532d7 | 554 | <<EOF>> unexpected_end_of_file (yylloc, "%}"); |
e9955c83 AD |
555 | } |
556 | ||
557 | ||
558 | /*---------------------------------------------------------------. | |
559 | | Scanning the epilogue (everything after the second "%%", which | | |
d8d3f94a | 560 | | has already been eaten). | |
e9955c83 AD |
561 | `---------------------------------------------------------------*/ |
562 | ||
563 | <SC_EPILOGUE> | |
564 | { | |
e9955c83 | 565 | <<EOF>> { |
44995b2e | 566 | YY_OBS_FINISH; |
4cdb01db | 567 | yylval->string = last_string; |
a706a1cc | 568 | BEGIN INITIAL; |
e9955c83 AD |
569 | return EPILOGUE; |
570 | } | |
571 | } | |
572 | ||
573 | ||
a706a1cc PE |
574 | /*----------------------------------------------------------------. |
575 | | By default, grow the string obstack with the input, escaping M4 | | |
576 | | quoting characters. | | |
577 | `----------------------------------------------------------------*/ | |
578 | ||
579 | <SC_COMMENT,SC_LINE_COMMENT,SC_STRING,SC_CHARACTER,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE> | |
580 | { | |
ac060e78 PE |
581 | \$ obstack_sgrow (&string_obstack, "$]["); |
582 | \@ obstack_sgrow (&string_obstack, "@@"); | |
583 | \[ obstack_sgrow (&string_obstack, "@{"); | |
584 | \] obstack_sgrow (&string_obstack, "@}"); | |
a706a1cc PE |
585 | .|\n YY_OBS_GROW; |
586 | } | |
587 | ||
588 | ||
e9955c83 AD |
589 | %% |
590 | ||
591 | /*------------------------------------------------------------------. | |
366eea36 | 592 | | TEXT is pointing to a wannabee semantic value (i.e., a `$'). | |
e9955c83 AD |
593 | | | |
594 | | Possible inputs: $[<TYPENAME>]($|integer) | | |
595 | | | | |
596 | | Output to the STRING_OBSTACK a reference to this semantic value. | | |
597 | `------------------------------------------------------------------*/ | |
598 | ||
f25bfb75 | 599 | static inline void |
366eea36 | 600 | handle_action_dollar (char *text, location_t location) |
e9955c83 AD |
601 | { |
602 | const char *type_name = NULL; | |
366eea36 | 603 | char *cp = text + 1; |
e9955c83 AD |
604 | |
605 | /* Get the type name if explicit. */ | |
606 | if (*cp == '<') | |
607 | { | |
608 | type_name = ++cp; | |
609 | while (*cp != '>') | |
610 | ++cp; | |
611 | *cp = '\0'; | |
612 | ++cp; | |
613 | } | |
614 | ||
615 | if (*cp == '$') | |
616 | { | |
617 | if (!type_name) | |
56c47203 | 618 | type_name = symbol_list_n_type_name_get (current_rule, location, 0); |
e9955c83 | 619 | if (!type_name && typed) |
56c47203 | 620 | complain_at (location, _("$$ of `%s' has no declared type"), |
97650f4e | 621 | current_rule->sym->tag); |
e9955c83 AD |
622 | if (!type_name) |
623 | type_name = ""; | |
624 | obstack_fgrow1 (&string_obstack, | |
625 | "]b4_lhs_value([%s])[", type_name); | |
626 | } | |
d8d3f94a | 627 | else |
e9955c83 | 628 | { |
d8d3f94a PE |
629 | long num; |
630 | errno = 0; | |
631 | num = strtol (cp, 0, 10); | |
e9955c83 | 632 | |
d8d3f94a | 633 | if (INT_MIN <= num && num <= rule_length && ! errno) |
e9955c83 | 634 | { |
d8d3f94a | 635 | int n = num; |
e9955c83 | 636 | if (!type_name && n > 0) |
56c47203 AD |
637 | type_name = symbol_list_n_type_name_get (current_rule, location, |
638 | n); | |
e9955c83 | 639 | if (!type_name && typed) |
56c47203 | 640 | complain_at (location, _("$%d of `%s' has no declared type"), |
97650f4e | 641 | n, current_rule->sym->tag); |
e9955c83 AD |
642 | if (!type_name) |
643 | type_name = ""; | |
644 | obstack_fgrow3 (&string_obstack, | |
645 | "]b4_rhs_value([%d], [%d], [%s])[", | |
646 | rule_length, n, type_name); | |
647 | } | |
d8d3f94a | 648 | else |
98f2caaa | 649 | complain_at (location, _("integer out of range: %s"), quote (text)); |
9280d3ef AD |
650 | } |
651 | } | |
652 | ||
653 | ||
366eea36 | 654 | /*---------------------------------------------------------------. |
d8d3f94a | 655 | | TEXT is expected to be $$ in some code associated to a symbol: | |
366eea36 AD |
656 | | destructor or printer. | |
657 | `---------------------------------------------------------------*/ | |
9280d3ef | 658 | |
f25bfb75 | 659 | static inline void |
366eea36 | 660 | handle_symbol_code_dollar (char *text, location_t location) |
9280d3ef | 661 | { |
366eea36 | 662 | char *cp = text + 1; |
9280d3ef | 663 | if (*cp == '$') |
366eea36 | 664 | obstack_sgrow (&string_obstack, "]b4_dollar_dollar["); |
9280d3ef | 665 | else |
c4d720cd | 666 | complain_at (location, _("invalid value: %s"), quote (text)); |
e9955c83 AD |
667 | } |
668 | ||
f25bfb75 AD |
669 | |
670 | /*-----------------------------------------------------------------. | |
671 | | Dispatch onto handle_action_dollar, or handle_destructor_dollar, | | |
672 | | depending upon CODE_KIND. | | |
673 | `-----------------------------------------------------------------*/ | |
e9955c83 AD |
674 | |
675 | static void | |
f25bfb75 AD |
676 | handle_dollar (braced_code_t braced_code_kind, |
677 | char *text, location_t location) | |
678 | { | |
679 | switch (braced_code_kind) | |
680 | { | |
681 | case action_braced_code: | |
682 | handle_action_dollar (text, location); | |
683 | break; | |
684 | ||
685 | case destructor_braced_code: | |
366eea36 AD |
686 | case printer_braced_code: |
687 | handle_symbol_code_dollar (text, location); | |
f25bfb75 AD |
688 | break; |
689 | } | |
690 | } | |
691 | ||
692 | ||
693 | /*------------------------------------------------------. | |
694 | | TEXT is a location token (i.e., a `@...'). Output to | | |
695 | | STRING_OBSTACK a reference to this location. | | |
696 | `------------------------------------------------------*/ | |
697 | ||
698 | static inline void | |
699 | handle_action_at (char *text, location_t location) | |
e9955c83 | 700 | { |
366eea36 | 701 | char *cp = text + 1; |
e9955c83 | 702 | locations_flag = 1; |
e9955c83 | 703 | |
366eea36 | 704 | if (*cp == '$') |
e9955c83 AD |
705 | { |
706 | obstack_sgrow (&string_obstack, "]b4_lhs_location["); | |
707 | } | |
d8d3f94a | 708 | else |
e9955c83 | 709 | { |
d8d3f94a PE |
710 | long num; |
711 | errno = 0; | |
712 | num = strtol (cp, 0, 10); | |
dafdc66f | 713 | |
d8d3f94a PE |
714 | if (INT_MIN <= num && num <= rule_length && ! errno) |
715 | { | |
716 | int n = num; | |
717 | obstack_fgrow2 (&string_obstack, "]b4_rhs_location([%d], [%d])[", | |
718 | rule_length, n); | |
719 | } | |
e9955c83 | 720 | else |
98f2caaa | 721 | complain_at (location, _("integer out of range: %s"), quote (text)); |
f25bfb75 AD |
722 | } |
723 | } | |
724 | ||
725 | ||
366eea36 | 726 | /*---------------------------------------------------------------. |
d8d3f94a | 727 | | TEXT is expected to be @$ in some code associated to a symbol: | |
366eea36 AD |
728 | | destructor or printer. | |
729 | `---------------------------------------------------------------*/ | |
f25bfb75 AD |
730 | |
731 | static inline void | |
366eea36 | 732 | handle_symbol_code_at (char *text, location_t location) |
f25bfb75 | 733 | { |
366eea36 AD |
734 | char *cp = text + 1; |
735 | if (*cp == '$') | |
736 | obstack_sgrow (&string_obstack, "]b4_at_dollar["); | |
f25bfb75 | 737 | else |
c4d720cd | 738 | complain_at (location, _("invalid value: %s"), quote (text)); |
e9955c83 | 739 | } |
4cdb01db | 740 | |
f25bfb75 AD |
741 | |
742 | /*-------------------------------------------------------------------. | |
743 | | Dispatch onto handle_action_at, or handle_destructor_at, depending | | |
744 | | upon CODE_KIND. | | |
745 | `-------------------------------------------------------------------*/ | |
746 | ||
747 | static void | |
748 | handle_at (braced_code_t braced_code_kind, | |
749 | char *text, location_t location) | |
750 | { | |
751 | switch (braced_code_kind) | |
752 | { | |
753 | case action_braced_code: | |
754 | handle_action_at (text, location); | |
755 | break; | |
756 | ||
757 | case destructor_braced_code: | |
366eea36 AD |
758 | case printer_braced_code: |
759 | handle_symbol_code_at (text, location); | |
f25bfb75 AD |
760 | break; |
761 | } | |
762 | } | |
763 | ||
764 | ||
d8d3f94a PE |
765 | /*------------------------------------------------------------------. |
766 | | Convert universal character name UCN to a single-byte character, | | |
767 | | and return that character. Return -1 if UCN does not correspond | | |
768 | | to a single-byte character. | | |
769 | `------------------------------------------------------------------*/ | |
770 | ||
771 | static int | |
772 | convert_ucn_to_byte (char const *ucn) | |
773 | { | |
774 | unsigned long code = strtoul (ucn + 2, 0, 16); | |
775 | ||
776 | /* FIXME: Currently we assume Unicode-compatible unibyte characters | |
777 | on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes). On | |
778 | non-ASCII hosts we support only the portable C character set. | |
779 | These limitations should be removed once we add support for | |
780 | multibyte characters. */ | |
781 | ||
782 | if (UCHAR_MAX < code) | |
783 | return -1; | |
784 | ||
785 | #if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e) | |
786 | { | |
787 | /* A non-ASCII host. Use CODE to index into a table of the C | |
788 | basic execution character set, which is guaranteed to exist on | |
789 | all Standard C platforms. This table also includes '$', '@', | |
8e6ef483 | 790 | and '`', which are not in the basic execution character set but |
d8d3f94a PE |
791 | which are unibyte characters on all the platforms that we know |
792 | about. */ | |
793 | static signed char const table[] = | |
794 | { | |
795 | '\0', -1, -1, -1, -1, -1, -1, '\a', | |
796 | '\b', '\t', '\n', '\v', '\f', '\r', -1, -1, | |
797 | -1, -1, -1, -1, -1, -1, -1, -1, | |
798 | -1, -1, -1, -1, -1, -1, -1, -1, | |
799 | ' ', '!', '"', '#', '$', '%', '&', '\'', | |
800 | '(', ')', '*', '+', ',', '-', '.', '/', | |
801 | '0', '1', '2', '3', '4', '5', '6', '7', | |
802 | '8', '9', ':', ';', '<', '=', '>', '?', | |
803 | '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', | |
804 | 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', | |
805 | 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', | |
806 | 'X', 'Y', 'Z', '[', '\\', ']', '^', '_', | |
807 | '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', | |
808 | 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', | |
809 | 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', | |
810 | 'x', 'y', 'z', '{', '|', '}', '~' | |
811 | }; | |
812 | ||
813 | code = code < sizeof table ? table[code] : -1; | |
814 | } | |
815 | #endif | |
c4d720cd | 816 | |
d8d3f94a PE |
817 | return code; |
818 | } | |
819 | ||
820 | ||
900c5db5 AD |
821 | /*----------------------------------------------------------------. |
822 | | Handle `#line INT "FILE"'. ARGS has already skipped `#line '. | | |
823 | `----------------------------------------------------------------*/ | |
824 | ||
825 | static void | |
826 | handle_syncline (char *args, location_t *location) | |
827 | { | |
828 | int lineno = strtol (args, &args, 10); | |
829 | const char *file = NULL; | |
830 | file = strchr (args, '"') + 1; | |
831 | *strchr (file, '"') = 0; | |
95612cfa AD |
832 | current_file = xstrdup (file); |
833 | location->file = current_file; | |
900c5db5 AD |
834 | location->last_line = lineno; |
835 | } | |
836 | ||
a706a1cc PE |
837 | |
838 | /*-------------------------------------------------------------. | |
839 | | Report an unexpected end of file at LOC. An end of file was | | |
840 | | encountered and the expected TOKEN_END was missing. After | | |
841 | | reporting the problem, pretend that TOKEN_END was found. | | |
842 | `-------------------------------------------------------------*/ | |
843 | ||
844 | static void | |
345532d7 | 845 | unexpected_end_of_file (location_t *loc, char const *token_end) |
a706a1cc | 846 | { |
345532d7 | 847 | size_t i = strlen (token_end); |
a706a1cc | 848 | |
345532d7 PE |
849 | complain_at (*loc, _("missing `%s' at end of file"), token_end); |
850 | ||
851 | /* Adjust location's last column so that any later message does not | |
852 | mention the characters just inserted. */ | |
853 | loc->last_column -= i; | |
854 | ||
855 | while (i != 0) | |
856 | unput (token_end[--i]); | |
a706a1cc PE |
857 | } |
858 | ||
859 | ||
f25bfb75 AD |
860 | /*-------------------------. |
861 | | Initialize the scanner. | | |
862 | `-------------------------*/ | |
863 | ||
1d6412ad AD |
864 | void |
865 | scanner_initialize (void) | |
866 | { | |
867 | obstack_init (&string_obstack); | |
868 | } | |
869 | ||
870 | ||
f25bfb75 AD |
871 | /*-----------------------------------------------. |
872 | | Free all the memory allocated to the scanner. | | |
873 | `-----------------------------------------------*/ | |
874 | ||
4cdb01db AD |
875 | void |
876 | scanner_free (void) | |
877 | { | |
878 | obstack_free (&string_obstack, 0); | |
536545f3 AD |
879 | /* Reclaim Flex's buffers. */ |
880 | yy_delete_buffer (YY_CURRENT_BUFFER); | |
4cdb01db | 881 | } |