src/scan-gram.l

   1 /* Bison Grammar Scanner                             -*- C -*-
   2    Copyright (C) 2002 Free Software Foundation, Inc.
   3
   4    This file is part of Bison, the GNU Compiler Compiler.
   5
   6    This program is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 2 of the License, or
   9    (at your option) any later version.
  10
  11    This program is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with this program; if not, write to the Free Software
  18    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  19    02111-1307  USA
  20 */
  21
  22 %option debug nodefault noyywrap never-interactive
  23 %option prefix="gram_" outfile="lex.yy.c"
  24
  25 %{
  26 #include "system.h"
  27 #include "mbswidth.h"
  28 #include "complain.h"
  29 #include "quote.h"
  30 #include "struniq.h"
  31 #include "getargs.h"
  32 #include "gram.h"
  33 #include "reader.h"
  34
  35 #define YY_USER_INIT                            \
  36 do {                                            \
  37   LOCATION_RESET (*loc);                        \
  38   loc->file = current_file;                     \
  39 } while (0)
  40
  41 /* Each time we match a string, move the end cursor to its end. */
  42 #define STEP  LOCATION_STEP (*loc)
  43
  44 #define YY_USER_ACTION  extend_location (loc, yytext, yyleng);
  45
  46 #define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
  47
  48
  49 /* Read bytes from FP into buffer BUF of size SIZE.  Return the
  50    number of bytes read.  Remove '\r' from input, treating \r\n
  51    and isolated \r as \n.  */
  52
  53 static size_t
  54 no_cr_read (FILE *fp, char *buf, size_t size)
  55 {
  56   size_t s = fread (buf, 1, size, fp);
  57   if (s)
  58     {
  59       char *w = memchr (buf, '\r', s);
  60       if (w)
  61         {
  62           char const *r = ++w;
  63           char const *lim = buf + s;
  64
  65           for (;;)
  66             {
  67               /* Found an '\r'.  Treat it like '\n', but ignore any
  68                  '\n' that immediately follows.  */
  69               w[-1] = '\n';
  70               if (r == lim)
  71                 {
  72                   int ch = getc (fp);
  73                   if (ch != '\n' && ungetc (ch, fp) != ch)
  74                     break;
  75                 }
  76               else if (*r == '\n')
  77                 r++;
  78
  79               /* Copy until the next '\r'.  */
  80               do
  81                 {
  82                   if (r == lim)
  83                     return w - buf;
  84                 }
  85               while ((*w++ = *r++) != '\r');
  86             }
  87
  88           return w - buf;
  89         }
  90     }
  91
  92   return s;
  93 }
  94
  95
  96 /* Extend *LOC to account for token TOKEN of size SIZE.  */
  97
  98 static void
  99 extend_location (location_t *loc, char const *token, int size)
 100 {
 101   int line = loc->last_line;
 102   int column = loc->last_column;
 103   char const *p0 = token;
 104   char const *p = token;
 105   char const *lim = token + size;
 106
 107   for (p = token; p < lim; p++)
 108     switch (*p)
 109       {
 110       case '\r':
 111         /* \r shouldn't survive no_cr_read.  */
 112         abort ();
 113
 114       case '\n':
 115         line++;
 116         column = 1;
 117         p0 = p + 1;
 118         break;
 119
 120       case '\t':
 121         column += mbsnwidth (p0, p - p0, 0);
 122         column += 8 - ((column - 1) & 7);
 123         p0 = p + 1;
 124         break;
 125       }
 126
 127   loc->last_line = line;
 128   loc->last_column = column + mbsnwidth (p0, p - p0, 0);
 129 }
 130
 131
 132
 133 /* STRING_OBSTACK -- Used to store all the characters that we need to
 134    keep (to construct ID, STRINGS etc.).  Use the following macros to
 135    use it.
 136
 137    Use STRING_GROW to append what has just been matched, and
 138    STRING_FINISH to end the string (it puts the ending 0).
 139    STRING_FINISH also stores this string in LAST_STRING, which can be
 140    used, and which is used by STRING_FREE to free the last string.  */
 141
 142 static struct obstack string_obstack;
 143
 144 /* A string representing the most recently saved token.  */
 145 static char *last_string;
 146
 147
 148 #define STRING_GROW   \
 149   obstack_grow (&string_obstack, yytext, yyleng)
 150
 151 #define STRING_FINISH                                   \
 152   do {                                                  \
 153     obstack_1grow (&string_obstack, '\0');              \
 154     last_string = obstack_finish (&string_obstack);     \
 155   } while (0)
 156
 157 #define STRING_FREE \
 158   obstack_free (&string_obstack, last_string)
 159
 160 void
 161 scanner_last_string_free (void)
 162 {
 163   STRING_FREE;
 164 }
 165
 166 /* Within well-formed rules, RULE_LENGTH is the number of values in
 167    the current rule so far, which says where to find `$0' with respect
 168    to the top of the stack.  It is not the same as the rule->length in
 169    the case of mid rule actions.
 170
 171    Outside of well-formed rules, RULE_LENGTH has an undefined value.  */
 172 static int rule_length;
 173
 174 static void handle_dollar (braced_code_t code_kind,
 175                            char *cp, location_t location);
 176 static void handle_at (braced_code_t code_kind,
 177                        char *cp, location_t location);
 178 static void handle_syncline (char *args, location_t *location);
 179 static int convert_ucn_to_byte (char const *hex_text);
 180 static void unexpected_end_of_file (location_t *, char const *);
 181
 182 %}
 183 %x SC_COMMENT SC_LINE_COMMENT SC_YACC_COMMENT
 184 %x SC_STRING SC_CHARACTER
 185 %x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
 186 %x SC_BRACED_CODE SC_PROLOGUE SC_EPILOGUE
 187
 188 letter    [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
 189 id        {letter}({letter}|[0-9])*
 190 directive %{letter}({letter}|[0-9]|-)*
 191 int       [0-9]+
 192
 193 /* POSIX says that a tag must be both an id and a C union member, but
 194    historically almost any character is allowed in a tag.  We disallow
 195    NUL and newline, as this simplifies our implementation.  */
 196 tag      [^\0\n>]+
 197
 198 /* Zero or more instances of backslash-newline.  Following GCC, allow
 199    white space between the backslash and the newline.  */
 200 splice   (\\[ \f\t\v]*\n)*
 201
 202 %%
 203 %{
 204   /* Nesting level of the current code in braces.  */
 205   int braces_level IF_LINT (= 0);
 206
 207   /* Scanner context when scanning C code.  */
 208   int c_context IF_LINT (= 0);
 209
 210   /* At each yylex invocation, mark the current position as the
 211      start of the next token.  */
 212   STEP;
 213 %}
 214
 215
 216   /*----------------------------.
 217   | Scanning Bison directives.  |
 218   `----------------------------*/
 219 <INITIAL>
 220 {
 221   "%binary"               return PERCENT_NONASSOC;
 222   "%debug"                return PERCENT_DEBUG;
 223   "%define"               return PERCENT_DEFINE;
 224   "%defines"              return PERCENT_DEFINES;
 225   "%destructor"           return PERCENT_DESTRUCTOR;
 226   "%dprec"                return PERCENT_DPREC;
 227   "%error"[-_]"verbose"   return PERCENT_ERROR_VERBOSE;
 228   "%expect"               return PERCENT_EXPECT;
 229   "%file-prefix"          return PERCENT_FILE_PREFIX;
 230   "%fixed"[-_]"output"[-_]"files"   return PERCENT_YACC;
 231   "%glr-parser"           return PERCENT_GLR_PARSER;
 232   "%left"                 return PERCENT_LEFT;
 233   "%locations"            return PERCENT_LOCATIONS;
 234   "%merge"                return PERCENT_MERGE;
 235   "%name"[-_]"prefix"     return PERCENT_NAME_PREFIX;
 236   "%no"[-_]"lines"        return PERCENT_NO_LINES;
 237   "%nonassoc"             return PERCENT_NONASSOC;
 238   "%nterm"                return PERCENT_NTERM;
 239   "%output"               return PERCENT_OUTPUT;
 240   "%parse-param"          return PERCENT_PARSE_PARAM;
 241   "%prec"                 rule_length--; return PERCENT_PREC;
 242   "%printer"              return PERCENT_PRINTER;
 243   "%pure"[-_]"parser"     return PERCENT_PURE_PARSER;
 244   "%right"                return PERCENT_RIGHT;
 245   "%lex-param"            return PERCENT_LEX_PARAM;
 246   "%skeleton"             return PERCENT_SKELETON;
 247   "%start"                return PERCENT_START;
 248   "%term"                 return PERCENT_TOKEN;
 249   "%token"                return PERCENT_TOKEN;
 250   "%token"[-_]"table"     return PERCENT_TOKEN_TABLE;
 251   "%type"                 return PERCENT_TYPE;
 252   "%union"                return PERCENT_UNION;
 253   "%verbose"              return PERCENT_VERBOSE;
 254   "%yacc"                 return PERCENT_YACC;
 255
 256   {directive}             {
 257     complain_at (*loc, _("invalid directive: %s"), quote (yytext));
 258     STEP;
 259   }
 260
 261   ^"#line "{int}" \"".*"\"\n" {
 262     handle_syncline (yytext + sizeof "#line " - 1, loc);
 263     STEP;
 264   }
 265
 266   "="                     return EQUAL;
 267   ":"                     rule_length = 0; return COLON;
 268   "|"                     rule_length = 0; return PIPE;
 269   ";"                     return SEMICOLON;
 270
 271   [ \f\n\t\v]  STEP;
 272
 273   "," {
 274     warn_at (*loc, _("stray `,' treated as white space"));
 275     STEP;
 276   }
 277
 278   {id}        {
 279     val->symbol = symbol_get (yytext, *loc);
 280     rule_length++;
 281     return ID;
 282   }
 283
 284   {int} {
 285     unsigned long num;
 286     errno = 0;
 287     num = strtoul (yytext, 0, 10);
 288     if (INT_MAX < num || errno)
 289       {
 290         complain_at (*loc, _("integer out of range: %s"), quote (yytext));
 291         num = INT_MAX;
 292       }
 293     val->integer = num;
 294     return INT;
 295   }
 296
 297   /* Characters.  We don't check there is only one.  */
 298   "'"         STRING_GROW; BEGIN SC_ESCAPED_CHARACTER;
 299
 300   /* Strings. */
 301   "\""        STRING_GROW; BEGIN SC_ESCAPED_STRING;
 302
 303   /* Comments. */
 304   "/*"        BEGIN SC_YACC_COMMENT;
 305   "//".*      STEP;
 306
 307   /* Prologue. */
 308   "%{"        BEGIN SC_PROLOGUE;
 309
 310   /* Code in between braces.  */
 311   "{"         STRING_GROW; braces_level = 0; BEGIN SC_BRACED_CODE;
 312
 313   /* A type. */
 314   "<"{tag}">" {
 315     obstack_grow (&string_obstack, yytext + 1, yyleng - 2);
 316     STRING_FINISH;
 317     val->struniq = struniq_new (last_string);
 318     STRING_FREE;
 319     return TYPE;
 320   }
 321
 322   "%%" {
 323     static int percent_percent_count;
 324     if (++percent_percent_count == 2)
 325       BEGIN SC_EPILOGUE;
 326     return PERCENT_PERCENT;
 327   }
 328
 329   . {
 330     complain_at (*loc, _("invalid character: %s"), quote (yytext));
 331     STEP;
 332   }
 333 }
 334
 335
 336   /*---------------------------------------------------------------.
 337   | Scanning a Yacc comment.  The initial `/ *' is already eaten.  |
 338   `---------------------------------------------------------------*/
 339
 340 <SC_YACC_COMMENT>
 341 {
 342   "*/" {
 343     STEP;
 344     BEGIN INITIAL;
 345   }
 346
 347   .|\n     ;
 348   <<EOF>>  unexpected_end_of_file (loc, "*/");
 349 }
 350
 351
 352   /*------------------------------------------------------------.
 353   | Scanning a C comment.  The initial `/ *' is already eaten.  |
 354   `------------------------------------------------------------*/
 355
 356 <SC_COMMENT>
 357 {
 358   "*"{splice}"/"  STRING_GROW; BEGIN c_context;
 359   <<EOF>>         unexpected_end_of_file (loc, "*/");
 360 }
 361
 362
 363   /*--------------------------------------------------------------.
 364   | Scanning a line comment.  The initial `//' is already eaten.  |
 365   `--------------------------------------------------------------*/
 366
 367 <SC_LINE_COMMENT>
 368 {
 369   "\n"           STRING_GROW; BEGIN c_context;
 370   {splice}       STRING_GROW;
 371   <<EOF>>        BEGIN c_context;
 372 }
 373
 374
 375   /*----------------------------------------------------------------.
 376   | Scanning a C string, including its escapes.  The initial `"' is |
 377   | already eaten.                                                  |
 378   `----------------------------------------------------------------*/
 379
 380 <SC_ESCAPED_STRING>
 381 {
 382   "\"" {
 383     STRING_GROW;
 384     STRING_FINISH;
 385     val->string = last_string;
 386     rule_length++;
 387     BEGIN INITIAL;
 388     return STRING;
 389   }
 390
 391   .|\n      STRING_GROW;
 392   <<EOF>>   unexpected_end_of_file (loc, "\"");
 393 }
 394
 395   /*---------------------------------------------------------------.
 396   | Scanning a C character, decoding its escapes.  The initial "'" |
 397   | is already eaten.                                              |
 398   `---------------------------------------------------------------*/
 399
 400 <SC_ESCAPED_CHARACTER>
 401 {
 402   "'" {
 403     STRING_GROW;
 404     STRING_FINISH;
 405     val->symbol = symbol_get (last_string, *loc);
 406     symbol_class_set (val->symbol, token_sym, *loc);
 407     symbol_user_token_number_set (val->symbol,
 408                                   (unsigned char) last_string[1], *loc);
 409     STRING_FREE;
 410     rule_length++;
 411     BEGIN INITIAL;
 412     return ID;
 413   }
 414
 415   .|\n      STRING_GROW;
 416   <<EOF>>   unexpected_end_of_file (loc, "'");
 417 }
 418
 419
 420   /*----------------------------.
 421   | Decode escaped characters.  |
 422   `----------------------------*/
 423
 424 <SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
 425 {
 426   \\[0-7]{1,3} {
 427     unsigned long c = strtoul (yytext + 1, 0, 8);
 428     if (UCHAR_MAX < c)
 429       {
 430         complain_at (*loc, _("invalid escape sequence: %s"),
 431                      quote (yytext));
 432         STEP;
 433       }
 434     else
 435       obstack_1grow (&string_obstack, c);
 436   }
 437
 438   \\x[0-9abcdefABCDEF]+ {
 439     unsigned long c;
 440     errno = 0;
 441     c = strtoul (yytext + 2, 0, 16);
 442     if (UCHAR_MAX < c || errno)
 443       {
 444         complain_at (*loc, _("invalid escape sequence: %s"),
 445                      quote (yytext));
 446         STEP;
 447       }
 448     else
 449       obstack_1grow (&string_obstack, c);
 450   }
 451
 452   \\a   obstack_1grow (&string_obstack, '\a');
 453   \\b   obstack_1grow (&string_obstack, '\b');
 454   \\f   obstack_1grow (&string_obstack, '\f');
 455   \\n   obstack_1grow (&string_obstack, '\n');
 456   \\r   obstack_1grow (&string_obstack, '\r');
 457   \\t   obstack_1grow (&string_obstack, '\t');
 458   \\v   obstack_1grow (&string_obstack, '\v');
 459
 460   /* \\[\"\'?\\] would be shorter, but it confuses xgettext.  */
 461   \\("\""|"'"|"?"|"\\")  obstack_1grow (&string_obstack, yytext[1]);
 462
 463   \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
 464     int c = convert_ucn_to_byte (yytext);
 465     if (c < 0)
 466       {
 467         complain_at (*loc, _("invalid escape sequence: %s"),
 468                      quote (yytext));
 469         STEP;
 470       }
 471     else
 472       obstack_1grow (&string_obstack, c);
 473   }
 474   \\(.|\n)      {
 475     complain_at (*loc, _("unrecognized escape sequence: %s"),
 476                  quote (yytext));
 477     STRING_GROW;
 478   }
 479 }
 480
 481
 482   /*----------------------------------------------------------.
 483   | Scanning a C character without decoding its escapes.  The |
 484   | initial "'" is already eaten.                             |
 485   `----------------------------------------------------------*/
 486
 487 <SC_CHARACTER>
 488 {
 489   "'"                   STRING_GROW; BEGIN c_context;
 490   \\{splice}[^$@\[\]]   STRING_GROW;
 491   <<EOF>>               unexpected_end_of_file (loc, "'");
 492 }
 493
 494
 495   /*----------------------------------------------------------------.
 496   | Scanning a C string, without decoding its escapes.  The initial |
 497   | `"' is already eaten.                                           |
 498   `----------------------------------------------------------------*/
 499
 500 <SC_STRING>
 501 {
 502   "\""                  STRING_GROW; BEGIN c_context;
 503   \\{splice}[^$@\[\]]   STRING_GROW;
 504   <<EOF>>               unexpected_end_of_file (loc, "\"");
 505 }
 506
 507
 508   /*---------------------------------------------------.
 509   | Strings, comments etc. can be found in user code.  |
 510   `---------------------------------------------------*/
 511
 512 <SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
 513 {
 514   "'"             STRING_GROW; c_context = YY_START; BEGIN SC_CHARACTER;
 515   "\""            STRING_GROW; c_context = YY_START; BEGIN SC_STRING;
 516   "/"{splice}"*"  STRING_GROW; c_context = YY_START; BEGIN SC_COMMENT;
 517   "/"{splice}"/"  STRING_GROW; c_context = YY_START; BEGIN SC_LINE_COMMENT;
 518 }
 519
 520
 521   /*---------------------------------------------------------------.
 522   | Scanning some code in braces (%union and actions). The initial |
 523   | "{" is already eaten.                                          |
 524   `---------------------------------------------------------------*/
 525
 526 <SC_BRACED_CODE>
 527 {
 528   "{"|"<"{splice}"%"  STRING_GROW; braces_level++;
 529   "%"{splice}">"      STRING_GROW; braces_level--;
 530   "}" {
 531     STRING_GROW;
 532     braces_level--;
 533     if (braces_level < 0)
 534       {
 535         STRING_FINISH;
 536         val->string = last_string;
 537         rule_length++;
 538         BEGIN INITIAL;
 539         return BRACED_CODE;
 540       }
 541   }
 542
 543   /* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly
 544      (as `<' `<%').  */
 545   "<"{splice}"<"  STRING_GROW;
 546
 547   "$"("<"{tag}">")?(-?[0-9]+|"$") { handle_dollar (current_braced_code,
 548                                                    yytext, *loc); }
 549   "@"(-?[0-9]+|"$")               { handle_at (current_braced_code,
 550                                                yytext, *loc); }
 551
 552   <<EOF>>  unexpected_end_of_file (loc, "}");
 553 }
 554
 555
 556   /*--------------------------------------------------------------.
 557   | Scanning some prologue: from "%{" (already scanned) to "%}".  |
 558   `--------------------------------------------------------------*/
 559
 560 <SC_PROLOGUE>
 561 {
 562   "%}" {
 563     STRING_FINISH;
 564     val->string = last_string;
 565     BEGIN INITIAL;
 566     return PROLOGUE;
 567   }
 568
 569   <<EOF>>  unexpected_end_of_file (loc, "%}");
 570 }
 571
 572
 573   /*---------------------------------------------------------------.
 574   | Scanning the epilogue (everything after the second "%%", which |
 575   | has already been eaten).                                       |
 576   `---------------------------------------------------------------*/
 577
 578 <SC_EPILOGUE>
 579 {
 580   <<EOF>> {
 581     STRING_FINISH;
 582     val->string = last_string;
 583     BEGIN INITIAL;
 584     return EPILOGUE;
 585   }
 586 }
 587
 588
 589   /*----------------------------------------------------------------.
 590   | By default, grow the string obstack with the input, escaping M4 |
 591   | quoting characters.                                             |
 592   `----------------------------------------------------------------*/
 593
 594 <SC_COMMENT,SC_LINE_COMMENT,SC_STRING,SC_CHARACTER,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
 595 {
 596   \$    obstack_sgrow (&string_obstack, "$][");
 597   \@    obstack_sgrow (&string_obstack, "@@");
 598   \[    obstack_sgrow (&string_obstack, "@{");
 599   \]    obstack_sgrow (&string_obstack, "@}");
 600   .|\n  STRING_GROW;
 601 }
 602
 603
 604 %%
 605
 606 /*------------------------------------------------------------------.
 607 | TEXT is pointing to a wannabee semantic value (i.e., a `$').      |
 608 |                                                                   |
 609 | Possible inputs: $[<TYPENAME>]($|integer)                         |
 610 |                                                                   |
 611 | Output to the STRING_OBSTACK a reference to this semantic value.  |
 612 `------------------------------------------------------------------*/
 613
 614 static inline void
 615 handle_action_dollar (char *text, location_t location)
 616 {
 617   const char *type_name = NULL;
 618   char *cp = text + 1;
 619
 620   /* Get the type name if explicit. */
 621   if (*cp == '<')
 622     {
 623       type_name = ++cp;
 624       while (*cp != '>')
 625         ++cp;
 626       *cp = '\0';
 627       ++cp;
 628     }
 629
 630   if (*cp == '$')
 631     {
 632       if (!type_name)
 633         type_name = symbol_list_n_type_name_get (current_rule, location, 0);
 634       if (!type_name && typed)
 635         complain_at (location, _("$$ of `%s' has no declared type"),
 636                      current_rule->sym->tag);
 637       if (!type_name)
 638         type_name = "";
 639       obstack_fgrow1 (&string_obstack,
 640                       "]b4_lhs_value([%s])[", type_name);
 641     }
 642   else
 643     {
 644       long num;
 645       errno = 0;
 646       num = strtol (cp, 0, 10);
 647
 648       if (INT_MIN <= num && num <= rule_length && ! errno)
 649         {
 650           int n = num;
 651           if (!type_name && n > 0)
 652             type_name = symbol_list_n_type_name_get (current_rule, location,
 653                                                      n);
 654           if (!type_name && typed)
 655             complain_at (location, _("$%d of `%s' has no declared type"),
 656                       n, current_rule->sym->tag);
 657           if (!type_name)
 658             type_name = "";
 659           obstack_fgrow3 (&string_obstack,
 660                           "]b4_rhs_value([%d], [%d], [%s])[",
 661                           rule_length, n, type_name);
 662         }
 663       else
 664         complain_at (location, _("integer out of range: %s"), quote (text));
 665     }
 666 }
 667
 668
 669 /*---------------------------------------------------------------.
 670 | TEXT is expected to be $$ in some code associated to a symbol: |
 671 | destructor or printer.                                         |
 672 `---------------------------------------------------------------*/
 673
 674 static inline void
 675 handle_symbol_code_dollar (char *text, location_t location)
 676 {
 677   char *cp = text + 1;
 678   if (*cp == '$')
 679     obstack_sgrow (&string_obstack, "]b4_dollar_dollar[");
 680   else
 681     complain_at (location, _("invalid value: %s"), quote (text));
 682 }
 683
 684
 685 /*-----------------------------------------------------------------.
 686 | Dispatch onto handle_action_dollar, or handle_destructor_dollar, |
 687 | depending upon CODE_KIND.                                        |
 688 `-----------------------------------------------------------------*/
 689
 690 static void
 691 handle_dollar (braced_code_t braced_code_kind,
 692                char *text, location_t location)
 693 {
 694   switch (braced_code_kind)
 695     {
 696     case action_braced_code:
 697       handle_action_dollar (text, location);
 698       break;
 699
 700     case destructor_braced_code:
 701     case printer_braced_code:
 702       handle_symbol_code_dollar (text, location);
 703       break;
 704     }
 705 }
 706
 707
 708 /*------------------------------------------------------.
 709 | TEXT is a location token (i.e., a `@...').  Output to |
 710 | STRING_OBSTACK a reference to this location.          |
 711 `------------------------------------------------------*/
 712
 713 static inline void
 714 handle_action_at (char *text, location_t location)
 715 {
 716   char *cp = text + 1;
 717   locations_flag = 1;
 718
 719   if (*cp == '$')
 720     {
 721       obstack_sgrow (&string_obstack, "]b4_lhs_location[");
 722     }
 723   else
 724     {
 725       long num;
 726       errno = 0;
 727       num = strtol (cp, 0, 10);
 728
 729       if (INT_MIN <= num && num <= rule_length && ! errno)
 730         {
 731           int n = num;
 732           obstack_fgrow2 (&string_obstack, "]b4_rhs_location([%d], [%d])[",
 733                           rule_length, n);
 734         }
 735       else
 736         complain_at (location, _("integer out of range: %s"), quote (text));
 737     }
 738 }
 739
 740
 741 /*---------------------------------------------------------------.
 742 | TEXT is expected to be @$ in some code associated to a symbol: |
 743 | destructor or printer.                                         |
 744 `---------------------------------------------------------------*/
 745
 746 static inline void
 747 handle_symbol_code_at (char *text, location_t location)
 748 {
 749   char *cp = text + 1;
 750   if (*cp == '$')
 751     obstack_sgrow (&string_obstack, "]b4_at_dollar[");
 752   else
 753     complain_at (location, _("invalid value: %s"), quote (text));
 754 }
 755
 756
 757 /*-------------------------------------------------------------------.
 758 | Dispatch onto handle_action_at, or handle_destructor_at, depending |
 759 | upon CODE_KIND.                                                    |
 760 `-------------------------------------------------------------------*/
 761
 762 static void
 763 handle_at (braced_code_t braced_code_kind,
 764            char *text, location_t location)
 765 {
 766   switch (braced_code_kind)
 767     {
 768     case action_braced_code:
 769       handle_action_at (text, location);
 770       break;
 771
 772     case destructor_braced_code:
 773     case printer_braced_code:
 774       handle_symbol_code_at (text, location);
 775       break;
 776     }
 777 }
 778
 779
 780 /*------------------------------------------------------------------.
 781 | Convert universal character name UCN to a single-byte character,  |
 782 | and return that character.  Return -1 if UCN does not correspond  |
 783 | to a single-byte character.                                       |
 784 `------------------------------------------------------------------*/
 785
 786 static int
 787 convert_ucn_to_byte (char const *ucn)
 788 {
 789   unsigned long code = strtoul (ucn + 2, 0, 16);
 790
 791   /* FIXME: Currently we assume Unicode-compatible unibyte characters
 792      on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes).  On
 793      non-ASCII hosts we support only the portable C character set.
 794      These limitations should be removed once we add support for
 795      multibyte characters.  */
 796
 797   if (UCHAR_MAX < code)
 798     return -1;
 799
 800 #if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
 801   {
 802     /* A non-ASCII host.  Use CODE to index into a table of the C
 803        basic execution character set, which is guaranteed to exist on
 804        all Standard C platforms.  This table also includes '$', '@',
 805        and '`', which are not in the basic execution character set but
 806        which are unibyte characters on all the platforms that we know
 807        about.  */
 808     static signed char const table[] =
 809       {
 810         '\0',   -1,   -1,   -1,   -1,   -1,   -1, '\a',
 811         '\b', '\t', '\n', '\v', '\f', '\r',   -1,   -1,
 812           -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
 813           -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
 814          ' ',  '!',  '"',  '#',  '$',  '%',  '&', '\'',
 815          '(',  ')',  '*',  '+',  ',',  '-',  '.',  '/',
 816          '0',  '1',  '2',  '3',  '4',  '5',  '6',  '7',
 817          '8',  '9',  ':',  ';',  '<',  '=',  '>',  '?',
 818          '@',  'A',  'B',  'C',  'D',  'E',  'F',  'G',
 819          'H',  'I',  'J',  'K',  'L',  'M',  'N',  'O',
 820          'P',  'Q',  'R',  'S',  'T',  'U',  'V',  'W',
 821          'X',  'Y',  'Z',  '[', '\\',  ']',  '^',  '_',
 822          '`',  'a',  'b',  'c',  'd',  'e',  'f',  'g',
 823          'h',  'i',  'j',  'k',  'l',  'm',  'n',  'o',
 824          'p',  'q',  'r',  's',  't',  'u',  'v',  'w',
 825          'x',  'y',  'z',  '{',  '|',  '}',  '~'
 826       };
 827
 828     code = code < sizeof table ? table[code] : -1;
 829   }
 830 #endif
 831
 832   return code;
 833 }
 834
 835
 836 /*----------------------------------------------------------------.
 837 | Handle `#line INT "FILE"'.  ARGS has already skipped `#line '.  |
 838 `----------------------------------------------------------------*/
 839
 840 static void
 841 handle_syncline (char *args, location_t *location)
 842 {
 843   int lineno = strtol (args, &args, 10);
 844   const char *file = NULL;
 845   file = strchr (args, '"') + 1;
 846   *strchr (file, '"') = 0;
 847   current_file = xstrdup (file);
 848   location->file = current_file;
 849   location->last_line = lineno;
 850 }
 851
 852
 853 /*-------------------------------------------------------------.
 854 | Report an unexpected end of file at LOC.  An end of file was |
 855 | encountered and the expected TOKEN_END was missing.  After   |
 856 | reporting the problem, pretend that TOKEN_END was found.     |
 857 `-------------------------------------------------------------*/
 858
 859 static void
 860 unexpected_end_of_file (location_t *loc, char const *token_end)
 861 {
 862   size_t i = strlen (token_end);
 863
 864   complain_at (*loc, _("missing `%s' at end of file"), token_end);
 865
 866   /* Adjust location's last column so that any later message does not
 867      mention the characters just inserted.  */
 868   loc->last_column -= i;
 869
 870   while (i != 0)
 871     unput (token_end[--i]);
 872 }
 873
 874
 875 /*-------------------------.
 876 | Initialize the scanner.  |
 877 `-------------------------*/
 878
 879 void
 880 scanner_initialize (void)
 881 {
 882   obstack_init (&string_obstack);
 883 }
 884
 885
 886 /*-----------------------------------------------.
 887 | Free all the memory allocated to the scanner.  |
 888 `-----------------------------------------------*/
 889
 890 void
 891 scanner_free (void)
 892 {
 893   obstack_free (&string_obstack, 0);
 894   /* Reclaim Flex's buffers.  */
 895   yy_delete_buffer (YY_CURRENT_BUFFER);
 896 }