src/scan-gram.l

   1 /* Bison Grammar Scanner                             -*- C -*-
   2    Copyright (C) 2002 Free Software Foundation, Inc.
   3
   4    This file is part of Bison, the GNU Compiler Compiler.
   5
   6    This program is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 2 of the License, or
   9    (at your option) any later version.
  10
  11    This program is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with this program; if not, write to the Free Software
  18    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  19    02111-1307  USA
  20 */
  21
  22 %option debug nodefault noyywrap never-interactive
  23 %option prefix="gram_" outfile="lex.yy.c"
  24
  25 %{
  26 #include "system.h"
  27
  28 #include <mbswidth.h>
  29 #include <get-errno.h>
  30 #include <quote.h>
  31
  32 #include "complain.h"
  33 #include "files.h"
  34 #include "getargs.h"
  35 #include "gram.h"
  36 #include "reader.h"
  37 #include "uniqstr.h"
  38
  39 #define YY_USER_INIT                                    \
  40   do                                                    \
  41     {                                                   \
  42       scanner_cursor.file = current_file;               \
  43       scanner_cursor.line = 1;                          \
  44       scanner_cursor.column = 1;                        \
  45     }                                                   \
  46   while (0)
  47
  48 /* Location of scanner cursor.  */
  49 boundary scanner_cursor;
  50
  51 static void adjust_location (location *, char const *, size_t);
  52 #define YY_USER_ACTION  adjust_location (loc, yytext, yyleng);
  53
  54 static size_t no_cr_read (FILE *, char *, size_t);
  55 #define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
  56
  57
  58 /* OBSTACK_FOR_STRING -- Used to store all the characters that we need to
  59    keep (to construct ID, STRINGS etc.).  Use the following macros to
  60    use it.
  61
  62    Use STRING_GROW to append what has just been matched, and
  63    STRING_FINISH to end the string (it puts the ending 0).
  64    STRING_FINISH also stores this string in LAST_STRING, which can be
  65    used, and which is used by STRING_FREE to free the last string.  */
  66
  67 static struct obstack obstack_for_string;
  68
  69 /* A string representing the most recently saved token.  */
  70 static char *last_string;
  71
  72
  73 #define STRING_GROW   \
  74   obstack_grow (&obstack_for_string, yytext, yyleng)
  75
  76 #define STRING_FINISH                                   \
  77   do {                                                  \
  78     obstack_1grow (&obstack_for_string, '\0');          \
  79     last_string = obstack_finish (&obstack_for_string); \
  80   } while (0)
  81
  82 #define STRING_FREE \
  83   obstack_free (&obstack_for_string, last_string)
  84
  85 void
  86 scanner_last_string_free (void)
  87 {
  88   STRING_FREE;
  89 }
  90
  91 /* Within well-formed rules, RULE_LENGTH is the number of values in
  92    the current rule so far, which says where to find `$0' with respect
  93    to the top of the stack.  It is not the same as the rule->length in
  94    the case of mid rule actions.
  95
  96    Outside of well-formed rules, RULE_LENGTH has an undefined value.  */
  97 static int rule_length;
  98
  99 static void handle_dollar (braced_code code_kind, char *cp, location loc);
 100 static void handle_at (braced_code code_kind, char *cp, location loc);
 101 static void handle_syncline (char *args);
 102 static int convert_ucn_to_byte (char const *hex_text);
 103 static void unexpected_end_of_file (boundary, char const *);
 104
 105 %}
 106 %x SC_COMMENT SC_LINE_COMMENT SC_YACC_COMMENT
 107 %x SC_STRING SC_CHARACTER
 108 %x SC_AFTER_IDENTIFIER
 109 %x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
 110 %x SC_BRACED_CODE SC_PROLOGUE SC_EPILOGUE
 111
 112 letter    [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
 113 id        {letter}({letter}|[0-9])*
 114 directive %{letter}({letter}|[0-9]|-)*
 115 int       [0-9]+
 116
 117 /* POSIX says that a tag must be both an id and a C union member, but
 118    historically almost any character is allowed in a tag.  We disallow
 119    NUL and newline, as this simplifies our implementation.  */
 120 tag      [^\0\n>]+
 121
 122 /* Zero or more instances of backslash-newline.  Following GCC, allow
 123    white space between the backslash and the newline.  */
 124 splice   (\\[ \f\t\v]*\n)*
 125
 126 %%
 127 %{
 128   /* Nesting level of the current code in braces.  */
 129   int braces_level IF_LINT (= 0);
 130
 131   /* Parent context state, when applicable.  */
 132   int context_state IF_LINT (= 0);
 133
 134   /* Location of most recent identifier, when applicable.  */
 135   location id_loc IF_LINT (= *loc);
 136
 137   /* Where containing code started, when applicable.  */
 138   boundary code_start IF_LINT (= loc->start);
 139
 140   /* Where containing comment or string or character literal started,
 141      when applicable.  */
 142   boundary token_start IF_LINT (= loc->start);
 143 %}
 144
 145
 146   /*-----------------------.
 147   | Scanning white space.  |
 148   `-----------------------*/
 149
 150 <INITIAL,SC_AFTER_IDENTIFIER>
 151 {
 152   [ \f\n\t\v]  ;
 153
 154   /* Comments. */
 155   "/*"         token_start = loc->start; context_state = YY_START; BEGIN SC_YACC_COMMENT;
 156   "//".*       ;
 157
 158   /* #line directives are not documented, and may be withdrawn or
 159      modified in future versions of Bison.  */
 160   ^"#line "{int}" \"".*"\"\n" {
 161     handle_syncline (yytext + sizeof "#line " - 1);
 162   }
 163 }
 164
 165
 166   /*----------------------------.
 167   | Scanning Bison directives.  |
 168   `----------------------------*/
 169 <INITIAL>
 170 {
 171   "%binary"               return PERCENT_NONASSOC;
 172   "%debug"                return PERCENT_DEBUG;
 173   "%define"               return PERCENT_DEFINE;
 174   "%defines"              return PERCENT_DEFINES;
 175   "%destructor"           return PERCENT_DESTRUCTOR;
 176   "%dprec"                return PERCENT_DPREC;
 177   "%error"[-_]"verbose"   return PERCENT_ERROR_VERBOSE;
 178   "%expect"               return PERCENT_EXPECT;
 179   "%file-prefix"          return PERCENT_FILE_PREFIX;
 180   "%fixed"[-_]"output"[-_]"files"   return PERCENT_YACC;
 181   "%glr-parser"           return PERCENT_GLR_PARSER;
 182   "%left"                 return PERCENT_LEFT;
 183   "%locations"            return PERCENT_LOCATIONS;
 184   "%merge"                return PERCENT_MERGE;
 185   "%name"[-_]"prefix"     return PERCENT_NAME_PREFIX;
 186   "%no"[-_]"lines"        return PERCENT_NO_LINES;
 187   "%nonassoc"             return PERCENT_NONASSOC;
 188   "%nterm"                return PERCENT_NTERM;
 189   "%output"               return PERCENT_OUTPUT;
 190   "%parse-param"          return PERCENT_PARSE_PARAM;
 191   "%prec"                 rule_length--; return PERCENT_PREC;
 192   "%printer"              return PERCENT_PRINTER;
 193   "%pure"[-_]"parser"     return PERCENT_PURE_PARSER;
 194   "%right"                return PERCENT_RIGHT;
 195   "%lex-param"            return PERCENT_LEX_PARAM;
 196   "%skeleton"             return PERCENT_SKELETON;
 197   "%start"                return PERCENT_START;
 198   "%term"                 return PERCENT_TOKEN;
 199   "%token"                return PERCENT_TOKEN;
 200   "%token"[-_]"table"     return PERCENT_TOKEN_TABLE;
 201   "%type"                 return PERCENT_TYPE;
 202   "%union"                return PERCENT_UNION;
 203   "%verbose"              return PERCENT_VERBOSE;
 204   "%yacc"                 return PERCENT_YACC;
 205
 206   {directive} {
 207     complain_at (*loc, _("invalid directive: %s"), quote (yytext));
 208   }
 209
 210   "="                     return EQUAL;
 211   "|"                     rule_length = 0; return PIPE;
 212   ";"                     return SEMICOLON;
 213
 214   "," {
 215     warn_at (*loc, _("stray `,' treated as white space"));
 216   }
 217
 218   {id} {
 219     val->symbol = symbol_get (yytext, *loc);
 220     id_loc = *loc;
 221     rule_length++;
 222     BEGIN SC_AFTER_IDENTIFIER;
 223   }
 224
 225   {int} {
 226     unsigned long num;
 227     set_errno (0);
 228     num = strtoul (yytext, 0, 10);
 229     if (INT_MAX < num || get_errno ())
 230       {
 231         complain_at (*loc, _("integer out of range: %s"), quote (yytext));
 232         num = INT_MAX;
 233       }
 234     val->integer = num;
 235     return INT;
 236   }
 237
 238   /* Characters.  We don't check there is only one.  */
 239   "'"         STRING_GROW; token_start = loc->start; BEGIN SC_ESCAPED_CHARACTER;
 240
 241   /* Strings. */
 242   "\""        STRING_GROW; token_start = loc->start; BEGIN SC_ESCAPED_STRING;
 243
 244   /* Prologue. */
 245   "%{"        code_start = loc->start; BEGIN SC_PROLOGUE;
 246
 247   /* Code in between braces.  */
 248   "{" {
 249     STRING_GROW;
 250     braces_level = 0;
 251     code_start = loc->start;
 252     BEGIN SC_BRACED_CODE;
 253   }
 254
 255   /* A type. */
 256   "<"{tag}">" {
 257     obstack_grow (&obstack_for_string, yytext + 1, yyleng - 2);
 258     STRING_FINISH;
 259     val->uniqstr = uniqstr_new (last_string);
 260     STRING_FREE;
 261     return TYPE;
 262   }
 263
 264   "%%" {
 265     static int percent_percent_count;
 266     if (++percent_percent_count == 2)
 267       {
 268         code_start = loc->start;
 269         BEGIN SC_EPILOGUE;
 270       }
 271     return PERCENT_PERCENT;
 272   }
 273
 274   . {
 275     complain_at (*loc, _("invalid character: %s"), quote (yytext));
 276   }
 277 }
 278
 279
 280   /*-----------------------------------------------------------------.
 281   | Scanning after an identifier, checking whether a colon is next.  |
 282   `-----------------------------------------------------------------*/
 283
 284 <SC_AFTER_IDENTIFIER>
 285 {
 286   ":" {
 287     rule_length = 0;
 288     *loc = id_loc;
 289     BEGIN INITIAL;
 290     return ID_COLON;
 291   }
 292   . {
 293     scanner_cursor.column -= mbsnwidth (yytext, yyleng, 0);
 294     yyless (0);
 295     *loc = id_loc;
 296     BEGIN INITIAL;
 297     return ID;
 298   }
 299   <<EOF>> {
 300     *loc = id_loc;
 301     BEGIN INITIAL;
 302     return ID;
 303   }
 304 }
 305
 306
 307   /*---------------------------------------------------------------.
 308   | Scanning a Yacc comment.  The initial `/ *' is already eaten.  |
 309   `---------------------------------------------------------------*/
 310
 311 <SC_YACC_COMMENT>
 312 {
 313   "*/"     BEGIN context_state;
 314   .|\n     ;
 315   <<EOF>>  unexpected_end_of_file (token_start, "*/");
 316 }
 317
 318
 319   /*------------------------------------------------------------.
 320   | Scanning a C comment.  The initial `/ *' is already eaten.  |
 321   `------------------------------------------------------------*/
 322
 323 <SC_COMMENT>
 324 {
 325   "*"{splice}"/"  STRING_GROW; BEGIN context_state;
 326   <<EOF>>         unexpected_end_of_file (token_start, "*/");
 327 }
 328
 329
 330   /*--------------------------------------------------------------.
 331   | Scanning a line comment.  The initial `//' is already eaten.  |
 332   `--------------------------------------------------------------*/
 333
 334 <SC_LINE_COMMENT>
 335 {
 336   "\n"           STRING_GROW; BEGIN context_state;
 337   {splice}       STRING_GROW;
 338   <<EOF>>        BEGIN context_state;
 339 }
 340
 341
 342   /*----------------------------------------------------------------.
 343   | Scanning a C string, including its escapes.  The initial `"' is |
 344   | already eaten.                                                  |
 345   `----------------------------------------------------------------*/
 346
 347 <SC_ESCAPED_STRING>
 348 {
 349   "\"" {
 350     STRING_GROW;
 351     STRING_FINISH;
 352     loc->start = token_start;
 353     val->chars = last_string;
 354     rule_length++;
 355     BEGIN INITIAL;
 356     return STRING;
 357   }
 358
 359   .|\n      STRING_GROW;
 360   <<EOF>>   unexpected_end_of_file (token_start, "\"");
 361 }
 362
 363   /*---------------------------------------------------------------.
 364   | Scanning a C character, decoding its escapes.  The initial "'" |
 365   | is already eaten.                                              |
 366   `---------------------------------------------------------------*/
 367
 368 <SC_ESCAPED_CHARACTER>
 369 {
 370   "'" {
 371     STRING_GROW;
 372     STRING_FINISH;
 373     loc->start = token_start;
 374     val->symbol = symbol_get (last_string, *loc);
 375     symbol_class_set (val->symbol, token_sym, *loc);
 376     symbol_user_token_number_set (val->symbol,
 377                                   (unsigned char) last_string[1], *loc);
 378     STRING_FREE;
 379     rule_length++;
 380     BEGIN INITIAL;
 381     return ID;
 382   }
 383
 384   .|\n      STRING_GROW;
 385   <<EOF>>   unexpected_end_of_file (token_start, "'");
 386 }
 387
 388
 389   /*----------------------------.
 390   | Decode escaped characters.  |
 391   `----------------------------*/
 392
 393 <SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
 394 {
 395   \\[0-7]{1,3} {
 396     unsigned long c = strtoul (yytext + 1, 0, 8);
 397     if (UCHAR_MAX < c)
 398       complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
 399     else
 400       obstack_1grow (&obstack_for_string, c);
 401   }
 402
 403   \\x[0-9abcdefABCDEF]+ {
 404     unsigned long c;
 405     set_errno (0);
 406     c = strtoul (yytext + 2, 0, 16);
 407     if (UCHAR_MAX < c || get_errno ())
 408       complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
 409     else
 410       obstack_1grow (&obstack_for_string, c);
 411   }
 412
 413   \\a   obstack_1grow (&obstack_for_string, '\a');
 414   \\b   obstack_1grow (&obstack_for_string, '\b');
 415   \\f   obstack_1grow (&obstack_for_string, '\f');
 416   \\n   obstack_1grow (&obstack_for_string, '\n');
 417   \\r   obstack_1grow (&obstack_for_string, '\r');
 418   \\t   obstack_1grow (&obstack_for_string, '\t');
 419   \\v   obstack_1grow (&obstack_for_string, '\v');
 420
 421   /* \\[\"\'?\\] would be shorter, but it confuses xgettext.  */
 422   \\("\""|"'"|"?"|"\\")  obstack_1grow (&obstack_for_string, yytext[1]);
 423
 424   \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
 425     int c = convert_ucn_to_byte (yytext);
 426     if (c < 0)
 427       complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
 428     else
 429       obstack_1grow (&obstack_for_string, c);
 430   }
 431   \\(.|\n)      {
 432     complain_at (*loc, _("unrecognized escape sequence: %s"), quote (yytext));
 433     STRING_GROW;
 434   }
 435 }
 436
 437
 438   /*----------------------------------------------------------.
 439   | Scanning a C character without decoding its escapes.  The |
 440   | initial "'" is already eaten.                             |
 441   `----------------------------------------------------------*/
 442
 443 <SC_CHARACTER>
 444 {
 445   "'"                   STRING_GROW; BEGIN context_state;
 446   \\{splice}[^$@\[\]]   STRING_GROW;
 447   <<EOF>>               unexpected_end_of_file (token_start, "'");
 448 }
 449
 450
 451   /*----------------------------------------------------------------.
 452   | Scanning a C string, without decoding its escapes.  The initial |
 453   | `"' is already eaten.                                           |
 454   `----------------------------------------------------------------*/
 455
 456 <SC_STRING>
 457 {
 458   "\""                  STRING_GROW; BEGIN context_state;
 459   \\{splice}[^$@\[\]]   STRING_GROW;
 460   <<EOF>>               unexpected_end_of_file (token_start, "\"");
 461 }
 462
 463
 464   /*---------------------------------------------------.
 465   | Strings, comments etc. can be found in user code.  |
 466   `---------------------------------------------------*/
 467
 468 <SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
 469 {
 470   "'" {
 471     STRING_GROW;
 472     context_state = YY_START;
 473     token_start = loc->start;
 474     BEGIN SC_CHARACTER;
 475   }
 476   "\"" {
 477     STRING_GROW;
 478     context_state = YY_START;
 479     token_start = loc->start;
 480     BEGIN SC_STRING;
 481   }
 482   "/"{splice}"*" {
 483     STRING_GROW;
 484     context_state = YY_START;
 485     token_start = loc->start;
 486     BEGIN SC_COMMENT;
 487   }
 488   "/"{splice}"/" {
 489     STRING_GROW;
 490     context_state = YY_START;
 491     BEGIN SC_LINE_COMMENT;
 492   }
 493 }
 494
 495
 496   /*---------------------------------------------------------------.
 497   | Scanning some code in braces (%union and actions). The initial |
 498   | "{" is already eaten.                                          |
 499   `---------------------------------------------------------------*/
 500
 501 <SC_BRACED_CODE>
 502 {
 503   "{"|"<"{splice}"%"  STRING_GROW; braces_level++;
 504   "%"{splice}">"      STRING_GROW; braces_level--;
 505   "}" {
 506     STRING_GROW;
 507     braces_level--;
 508     if (braces_level < 0)
 509       {
 510         STRING_FINISH;
 511         loc->start = code_start;
 512         val->chars = last_string;
 513         rule_length++;
 514         BEGIN INITIAL;
 515         return BRACED_CODE;
 516       }
 517   }
 518
 519   /* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly
 520      (as `<' `<%').  */
 521   "<"{splice}"<"  STRING_GROW;
 522
 523   "$"("<"{tag}">")?(-?[0-9]+|"$") { handle_dollar (current_braced_code,
 524                                                    yytext, *loc); }
 525   "@"(-?[0-9]+|"$")               { handle_at (current_braced_code,
 526                                                yytext, *loc); }
 527
 528   <<EOF>>  unexpected_end_of_file (code_start, "}");
 529 }
 530
 531
 532   /*--------------------------------------------------------------.
 533   | Scanning some prologue: from "%{" (already scanned) to "%}".  |
 534   `--------------------------------------------------------------*/
 535
 536 <SC_PROLOGUE>
 537 {
 538   "%}" {
 539     STRING_FINISH;
 540     loc->start = code_start;
 541     val->chars = last_string;
 542     BEGIN INITIAL;
 543     return PROLOGUE;
 544   }
 545
 546   <<EOF>>  unexpected_end_of_file (code_start, "%}");
 547 }
 548
 549
 550   /*---------------------------------------------------------------.
 551   | Scanning the epilogue (everything after the second "%%", which |
 552   | has already been eaten).                                       |
 553   `---------------------------------------------------------------*/
 554
 555 <SC_EPILOGUE>
 556 {
 557   <<EOF>> {
 558     STRING_FINISH;
 559     loc->start = code_start;
 560     val->chars = last_string;
 561     BEGIN INITIAL;
 562     return EPILOGUE;
 563   }
 564 }
 565
 566
 567   /*----------------------------------------------------------------.
 568   | By default, grow the string obstack with the input, escaping M4 |
 569   | quoting characters.                                             |
 570   `----------------------------------------------------------------*/
 571
 572 <SC_COMMENT,SC_LINE_COMMENT,SC_STRING,SC_CHARACTER,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
 573 {
 574   \$    obstack_sgrow (&obstack_for_string, "$][");
 575   \@    obstack_sgrow (&obstack_for_string, "@@");
 576   \[    obstack_sgrow (&obstack_for_string, "@{");
 577   \]    obstack_sgrow (&obstack_for_string, "@}");
 578   .|\n  STRING_GROW;
 579 }
 580
 581
 582 %%
 583
 584 /* Set *LOC and adjust scanner cursor to account for token TOKEN of
 585    size SIZE.  */
 586
 587 static void
 588 adjust_location (location *loc, char const *token, size_t size)
 589 {
 590   int line = scanner_cursor.line;
 591   int column = scanner_cursor.column;
 592   char const *p0 = token;
 593   char const *p = token;
 594   char const *lim = token + size;
 595
 596   loc->start = scanner_cursor;
 597
 598   for (p = token; p < lim; p++)
 599     switch (*p)
 600       {
 601       case '\n':
 602         line++;
 603         column = 1;
 604         p0 = p + 1;
 605         break;
 606
 607       case '\t':
 608         column += mbsnwidth (p0, p - p0, 0);
 609         column += 8 - ((column - 1) & 7);
 610         p0 = p + 1;
 611         break;
 612       }
 613
 614   scanner_cursor.line = line;
 615   scanner_cursor.column = column + mbsnwidth (p0, p - p0, 0);
 616
 617   loc->end = scanner_cursor;
 618 }
 619
 620
 621 /* Read bytes from FP into buffer BUF of size SIZE.  Return the
 622    number of bytes read.  Remove '\r' from input, treating \r\n
 623    and isolated \r as \n.  */
 624
 625 static size_t
 626 no_cr_read (FILE *fp, char *buf, size_t size)
 627 {
 628   size_t s = fread (buf, 1, size, fp);
 629   if (s)
 630     {
 631       char *w = memchr (buf, '\r', s);
 632       if (w)
 633         {
 634           char const *r = ++w;
 635           char const *lim = buf + s;
 636
 637           for (;;)
 638             {
 639               /* Found an '\r'.  Treat it like '\n', but ignore any
 640                  '\n' that immediately follows.  */
 641               w[-1] = '\n';
 642               if (r == lim)
 643                 {
 644                   int ch = getc (fp);
 645                   if (ch != '\n' && ungetc (ch, fp) != ch)
 646                     break;
 647                 }
 648               else if (*r == '\n')
 649                 r++;
 650
 651               /* Copy until the next '\r'.  */
 652               do
 653                 {
 654                   if (r == lim)
 655                     return w - buf;
 656                 }
 657               while ((*w++ = *r++) != '\r');
 658             }
 659
 660           return w - buf;
 661         }
 662     }
 663
 664   return s;
 665 }
 666
 667
 668 /*------------------------------------------------------------------.
 669 | TEXT is pointing to a wannabee semantic value (i.e., a `$').      |
 670 |                                                                   |
 671 | Possible inputs: $[<TYPENAME>]($|integer)                         |
 672 |                                                                   |
 673 | Output to OBSTACK_FOR_STRING a reference to this semantic value.  |
 674 `------------------------------------------------------------------*/
 675
 676 static inline void
 677 handle_action_dollar (char *text, location loc)
 678 {
 679   const char *type_name = NULL;
 680   char *cp = text + 1;
 681
 682   /* Get the type name if explicit. */
 683   if (*cp == '<')
 684     {
 685       type_name = ++cp;
 686       while (*cp != '>')
 687         ++cp;
 688       *cp = '\0';
 689       ++cp;
 690     }
 691
 692   if (*cp == '$')
 693     {
 694       if (!type_name)
 695         type_name = symbol_list_n_type_name_get (current_rule, loc, 0);
 696       if (!type_name && typed)
 697         complain_at (loc, _("$$ of `%s' has no declared type"),
 698                      current_rule->sym->tag);
 699       if (!type_name)
 700         type_name = "";
 701       obstack_fgrow1 (&obstack_for_string,
 702                       "]b4_lhs_value([%s])[", type_name);
 703     }
 704   else
 705     {
 706       long num;
 707       set_errno (0);
 708       num = strtol (cp, 0, 10);
 709
 710       if (INT_MIN <= num && num <= rule_length && ! get_errno ())
 711         {
 712           int n = num;
 713           if (!type_name && n > 0)
 714             type_name = symbol_list_n_type_name_get (current_rule, loc, n);
 715           if (!type_name && typed)
 716             complain_at (loc, _("$%d of `%s' has no declared type"),
 717                          n, current_rule->sym->tag);
 718           if (!type_name)
 719             type_name = "";
 720           obstack_fgrow3 (&obstack_for_string,
 721                           "]b4_rhs_value([%d], [%d], [%s])[",
 722                           rule_length, n, type_name);
 723         }
 724       else
 725         complain_at (loc, _("integer out of range: %s"), quote (text));
 726     }
 727 }
 728
 729
 730 /*---------------------------------------------------------------.
 731 | TEXT is expected to be $$ in some code associated to a symbol: |
 732 | destructor or printer.                                         |
 733 `---------------------------------------------------------------*/
 734
 735 static inline void
 736 handle_symbol_code_dollar (char *text, location loc)
 737 {
 738   char *cp = text + 1;
 739   if (*cp == '$')
 740     obstack_sgrow (&obstack_for_string, "]b4_dollar_dollar[");
 741   else
 742     complain_at (loc, _("invalid value: %s"), quote (text));
 743 }
 744
 745
 746 /*-----------------------------------------------------------------.
 747 | Dispatch onto handle_action_dollar, or handle_destructor_dollar, |
 748 | depending upon CODE_KIND.                                        |
 749 `-----------------------------------------------------------------*/
 750
 751 static void
 752 handle_dollar (braced_code braced_code_kind, char *text, location loc)
 753 {
 754   switch (braced_code_kind)
 755     {
 756     case action_braced_code:
 757       handle_action_dollar (text, loc);
 758       break;
 759
 760     case destructor_braced_code:
 761     case printer_braced_code:
 762       handle_symbol_code_dollar (text, loc);
 763       break;
 764     }
 765 }
 766
 767
 768 /*------------------------------------------------------.
 769 | TEXT is a location token (i.e., a `@...').  Output to |
 770 | OBSTACK_FOR_STRING a reference to this location.      |
 771 `------------------------------------------------------*/
 772
 773 static inline void
 774 handle_action_at (char *text, location loc)
 775 {
 776   char *cp = text + 1;
 777   locations_flag = 1;
 778
 779   if (*cp == '$')
 780     {
 781       obstack_sgrow (&obstack_for_string, "]b4_lhs_location[");
 782     }
 783   else
 784     {
 785       long num;
 786       set_errno (0);
 787       num = strtol (cp, 0, 10);
 788
 789       if (INT_MIN <= num && num <= rule_length && ! get_errno ())
 790         {
 791           int n = num;
 792           obstack_fgrow2 (&obstack_for_string, "]b4_rhs_location([%d], [%d])[",
 793                           rule_length, n);
 794         }
 795       else
 796         complain_at (loc, _("integer out of range: %s"), quote (text));
 797     }
 798 }
 799
 800
 801 /*---------------------------------------------------------------.
 802 | TEXT is expected to be @$ in some code associated to a symbol: |
 803 | destructor or printer.                                         |
 804 `---------------------------------------------------------------*/
 805
 806 static inline void
 807 handle_symbol_code_at (char *text, location loc)
 808 {
 809   char *cp = text + 1;
 810   if (*cp == '$')
 811     obstack_sgrow (&obstack_for_string, "]b4_at_dollar[");
 812   else
 813     complain_at (loc, _("invalid value: %s"), quote (text));
 814 }
 815
 816
 817 /*-------------------------------------------------------------------.
 818 | Dispatch onto handle_action_at, or handle_destructor_at, depending |
 819 | upon CODE_KIND.                                                    |
 820 `-------------------------------------------------------------------*/
 821
 822 static void
 823 handle_at (braced_code braced_code_kind, char *text, location loc)
 824 {
 825   switch (braced_code_kind)
 826     {
 827     case action_braced_code:
 828       handle_action_at (text, loc);
 829       break;
 830
 831     case destructor_braced_code:
 832     case printer_braced_code:
 833       handle_symbol_code_at (text, loc);
 834       break;
 835     }
 836 }
 837
 838
 839 /*------------------------------------------------------------------.
 840 | Convert universal character name UCN to a single-byte character,  |
 841 | and return that character.  Return -1 if UCN does not correspond  |
 842 | to a single-byte character.                                       |
 843 `------------------------------------------------------------------*/
 844
 845 static int
 846 convert_ucn_to_byte (char const *ucn)
 847 {
 848   unsigned long code = strtoul (ucn + 2, 0, 16);
 849
 850   /* FIXME: Currently we assume Unicode-compatible unibyte characters
 851      on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes).  On
 852      non-ASCII hosts we support only the portable C character set.
 853      These limitations should be removed once we add support for
 854      multibyte characters.  */
 855
 856   if (UCHAR_MAX < code)
 857     return -1;
 858
 859 #if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
 860   {
 861     /* A non-ASCII host.  Use CODE to index into a table of the C
 862        basic execution character set, which is guaranteed to exist on
 863        all Standard C platforms.  This table also includes '$', '@',
 864        and '`', which are not in the basic execution character set but
 865        which are unibyte characters on all the platforms that we know
 866        about.  */
 867     static signed char const table[] =
 868       {
 869         '\0',   -1,   -1,   -1,   -1,   -1,   -1, '\a',
 870         '\b', '\t', '\n', '\v', '\f', '\r',   -1,   -1,
 871           -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
 872           -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
 873          ' ',  '!',  '"',  '#',  '$',  '%',  '&', '\'',
 874          '(',  ')',  '*',  '+',  ',',  '-',  '.',  '/',
 875          '0',  '1',  '2',  '3',  '4',  '5',  '6',  '7',
 876          '8',  '9',  ':',  ';',  '<',  '=',  '>',  '?',
 877          '@',  'A',  'B',  'C',  'D',  'E',  'F',  'G',
 878          'H',  'I',  'J',  'K',  'L',  'M',  'N',  'O',
 879          'P',  'Q',  'R',  'S',  'T',  'U',  'V',  'W',
 880          'X',  'Y',  'Z',  '[', '\\',  ']',  '^',  '_',
 881          '`',  'a',  'b',  'c',  'd',  'e',  'f',  'g',
 882          'h',  'i',  'j',  'k',  'l',  'm',  'n',  'o',
 883          'p',  'q',  'r',  's',  't',  'u',  'v',  'w',
 884          'x',  'y',  'z',  '{',  '|',  '}',  '~'
 885       };
 886
 887     code = code < sizeof table ? table[code] : -1;
 888   }
 889 #endif
 890
 891   return code;
 892 }
 893
 894
 895 /*----------------------------------------------------------------.
 896 | Handle `#line INT "FILE"'.  ARGS has already skipped `#line '.  |
 897 `----------------------------------------------------------------*/
 898
 899 static void
 900 handle_syncline (char *args)
 901 {
 902   int lineno = strtol (args, &args, 10);
 903   const char *file = NULL;
 904   file = strchr (args, '"') + 1;
 905   *strchr (file, '"') = 0;
 906   scanner_cursor.file = current_file = xstrdup (file);
 907   scanner_cursor.line = lineno;
 908   scanner_cursor.column = 1;
 909 }
 910
 911
 912 /*------------------------------------------------------------------------.
 913 | Report an unexpected EOF in a token or comment starting at START.       |
 914 | An end of file was encountered and the expected TOKEN_END was missing.  |
 915 | After reporting the problem, pretend that TOKEN_END was found.          |
 916 `------------------------------------------------------------------------*/
 917
 918 static void
 919 unexpected_end_of_file (boundary start, char const *token_end)
 920 {
 921   size_t i = strlen (token_end);
 922
 923   location loc;
 924   loc.start = start;
 925   loc.end = scanner_cursor;
 926   complain_at (loc, _("missing `%s' at end of file"), token_end);
 927
 928   /* Adjust scanner cursor so that any later message does not count
 929      the characters about to be inserted.  */
 930   scanner_cursor.column -= i;
 931
 932   while (i != 0)
 933     unput (token_end[--i]);
 934 }
 935
 936
 937 /*-------------------------.
 938 | Initialize the scanner.  |
 939 `-------------------------*/
 940
 941 void
 942 scanner_initialize (void)
 943 {
 944   obstack_init (&obstack_for_string);
 945 }
 946
 947
 948 /*-----------------------------------------------.
 949 | Free all the memory allocated to the scanner.  |
 950 `-----------------------------------------------*/
 951
 952 void
 953 scanner_free (void)
 954 {
 955   obstack_free (&obstack_for_string, 0);
 956   /* Reclaim Flex's buffers.  */
 957   yy_delete_buffer (YY_CURRENT_BUFFER);
 958 }