src/scan-gram.l

   1 /* Bison Grammar Scanner                             -*- C -*-
   2    Copyright (C) 2002 Free Software Foundation, Inc.
   3
   4    This file is part of Bison, the GNU Compiler Compiler.
   5
   6    This program is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 2 of the License, or
   9    (at your option) any later version.
  10
  11    This program is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with this program; if not, write to the Free Software
  18    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  19    02111-1307  USA
  20 */
  21
  22 %option debug nodefault noyywrap nounput never-interactive stack
  23 %option prefix="gram_" outfile="lex.yy.c"
  24
  25 %{
  26 #include "system.h"
  27 #include "mbswidth.h"
  28 #include "complain.h"
  29 #include "quote.h"
  30 #include "getargs.h"
  31 #include "gram.h"
  32 #include "reader.h"
  33
  34 /* Each time we match a string, move the end cursor to its end. */
  35 #define YY_USER_INIT                            \
  36 do {                                            \
  37   LOCATION_RESET (*yylloc);                     \
  38   yylloc->file = infile;                        \
  39    /* This is only to avoid GCC warnings. */    \
  40   if (yycontrol) {;};                           \
  41 } while (0)
  42
  43 #define YY_USER_ACTION  extend_location (yylloc, yytext, yyleng);
  44 #define YY_STEP         LOCATION_STEP (*yylloc)
  45
  46 #define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
  47
  48
  49 /* Read bytes from FP into buffer BUF of size SIZE.  Return the
  50    number of bytes read.  Remove '\r' from input, treating \r\n
  51    and isolated \r as \n.  */
  52
  53 static size_t
  54 no_cr_read (FILE *fp, char *buf, size_t size)
  55 {
  56   size_t s = fread (buf, 1, size, fp);
  57   if (s)
  58     {
  59       char *w = memchr (buf, '\r', s);
  60       if (w)
  61         {
  62           char const *r = ++w;
  63           char const *lim = buf + s;
  64
  65           for (;;)
  66             {
  67               /* Found an '\r'.  Treat it like '\n', but ignore any
  68                  '\n' that immediately follows.  */
  69               w[-1] = '\n';
  70               if (r == lim)
  71                 {
  72                   int ch = getc (fp);
  73                   if (ch != '\n' && ungetc (ch, fp) != ch)
  74                     break;
  75                 }
  76               else if (*r == '\n')
  77                 r++;
  78
  79               /* Copy until the next '\r'.  */
  80               do
  81                 {
  82                   if (r == lim)
  83                     return w - buf;
  84                 }
  85               while ((*w++ = *r++) != '\r');
  86             }
  87
  88           return w - buf;
  89         }
  90     }
  91
  92   return s;
  93 }
  94
  95
  96 /* Extend *LOC to account for token TOKEN of size SIZE.  */
  97
  98 static void
  99 extend_location (location_t *loc, char const *token, int size)
 100 {
 101   int line = loc->last_line;
 102   int column = loc->last_column;
 103   char const *p0 = token;
 104   char const *p = token;
 105   char const *lim = token + size;
 106
 107   for (p = token; p < lim; p++)
 108     switch (*p)
 109       {
 110       case '\r':
 111         /* \r shouldn't survive no_cr_read.  */
 112         abort ();
 113
 114       case '\n':
 115         line++;
 116         column = 1;
 117         p0 = p + 1;
 118         break;
 119
 120       case '\t':
 121         column += mbsnwidth (p0, p - p0, 0);
 122         column += 8 - ((column - 1) & 7);
 123         p0 = p + 1;
 124         break;
 125       }
 126
 127   loc->last_line = line;
 128   loc->last_column = column + mbsnwidth (p0, p - p0, 0);
 129 }
 130
 131
 132
 133 /* STRING_OBSTACK -- Used to store all the characters that we need to
 134    keep (to construct ID, STRINGS etc.).  Use the following macros to
 135    use it.
 136
 137    Use YY_OBS_GROW to append what has just been matched, and
 138    YY_OBS_FINISH to end the string (it puts the ending 0).
 139    YY_OBS_FINISH also stores this string in LAST_STRING, which can be
 140    used, and which is used by YY_OBS_FREE to free the last string.  */
 141
 142 static struct obstack string_obstack;
 143 char *last_string;
 144
 145 #define YY_OBS_GROW   \
 146   obstack_grow (&string_obstack, yytext, yyleng)
 147
 148 #define YY_OBS_FINISH                                   \
 149   do {                                                  \
 150     obstack_1grow (&string_obstack, '\0');              \
 151     last_string = obstack_finish (&string_obstack);     \
 152   } while (0)
 153
 154 #define YY_OBS_FREE                                             \
 155   do {                                                          \
 156     obstack_free (&string_obstack, last_string);                \
 157   } while (0)
 158
 159 void
 160 scanner_last_string_free (void)
 161 {
 162   YY_OBS_FREE;
 163 }
 164
 165
 166 static int percent_percent_count = 0;
 167
 168 /* Within well-formed rules, RULE_LENGTH is the number of values in
 169    the current rule so far, which says where to find `$0' with respect
 170    to the top of the stack.  It is not the same as the rule->length in
 171    the case of mid rule actions.
 172
 173    Outside of well-formed rules, RULE_LENGTH has an undefined value.  */
 174 static int rule_length;
 175
 176 static void handle_dollar (braced_code_t code_kind,
 177                            char *cp, location_t location);
 178 static void handle_at (braced_code_t code_kind,
 179                        char *cp, location_t location);
 180 static int convert_ucn_to_byte (char const *hex_text);
 181
 182 %}
 183 %x SC_COMMENT SC_LINE_COMMENT SC_YACC_COMMENT
 184 %x SC_STRING SC_CHARACTER
 185 %x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
 186 %x SC_BRACED_CODE SC_PROLOGUE SC_EPILOGUE
 187
 188 letter    [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
 189 id        {letter}({letter}|[0-9])*
 190 directive %{letter}({letter}|[0-9]|-)*
 191 int       [0-9]+
 192
 193 /* POSIX says that a tag must be both an id and a C union member, but
 194    historically almost any character is allowed in a tag.  We disallow
 195    NUL and newline, as this simplifies our implementation.  */
 196 tag      [^\0\n>]+
 197
 198 /* Zero or more instances of backslash-newline.  Following GCC, allow
 199    white space between the backslash and the newline.  */
 200 splice   (\\[ \f\t\v]*\n)*
 201
 202 %%
 203 %{
 204   int braces_level IF_LINT (= 0);
 205
 206   /* At each yylex invocation, mark the current position as the
 207      start of the next token.  */
 208   YY_STEP;
 209 %}
 210
 211
 212   /*----------------------------.
 213   | Scanning Bison directives.  |
 214   `----------------------------*/
 215 <INITIAL>
 216 {
 217   "%binary"               return PERCENT_NONASSOC;
 218   "%debug"                return PERCENT_DEBUG;
 219   "%define"               return PERCENT_DEFINE;
 220   "%defines"              return PERCENT_DEFINES;
 221   "%destructor"           return PERCENT_DESTRUCTOR;
 222   "%dprec"                return PERCENT_DPREC;
 223   "%error"[-_]"verbose"   return PERCENT_ERROR_VERBOSE;
 224   "%expect"               return PERCENT_EXPECT;
 225   "%file-prefix"          return PERCENT_FILE_PREFIX;
 226   "%fixed"[-_]"output"[-_]"files"   return PERCENT_YACC;
 227   "%glr-parser"           return PERCENT_GLR_PARSER;
 228   "%left"                 return PERCENT_LEFT;
 229   "%locations"            return PERCENT_LOCATIONS;
 230   "%merge"                return PERCENT_MERGE;
 231   "%name"[-_]"prefix"     return PERCENT_NAME_PREFIX;
 232   "%no"[-_]"lines"        return PERCENT_NO_LINES;
 233   "%nonassoc"             return PERCENT_NONASSOC;
 234   "%nterm"                return PERCENT_NTERM;
 235   "%output"               return PERCENT_OUTPUT;
 236   "%parse-param"          return PERCENT_PARSE_PARAM;
 237   "%prec"                 rule_length--; return PERCENT_PREC;
 238   "%printer"              return PERCENT_PRINTER;
 239   "%pure"[-_]"parser"     return PERCENT_PURE_PARSER;
 240   "%right"                return PERCENT_RIGHT;
 241   "%lex-param"            return PERCENT_LEX_PARAM;
 242   "%skeleton"             return PERCENT_SKELETON;
 243   "%start"                return PERCENT_START;
 244   "%term"                 return PERCENT_TOKEN;
 245   "%token"                return PERCENT_TOKEN;
 246   "%token"[-_]"table"     return PERCENT_TOKEN_TABLE;
 247   "%type"                 return PERCENT_TYPE;
 248   "%union"                return PERCENT_UNION;
 249   "%verbose"              return PERCENT_VERBOSE;
 250   "%yacc"                 return PERCENT_YACC;
 251
 252   {directive}             {
 253     complain_at (*yylloc, _("invalid directive: %s"), quote (yytext));
 254     YY_STEP;
 255   }
 256
 257   "="                     return EQUAL;
 258   ":"                     rule_length = 0; return COLON;
 259   "|"                     rule_length = 0; return PIPE;
 260   ","                     return COMMA;
 261   ";"                     return SEMICOLON;
 262
 263   [ \f\n\t\v]+  YY_STEP;
 264
 265   {id}        {
 266     yylval->symbol = symbol_get (yytext, *yylloc);
 267     rule_length++;
 268     return ID;
 269   }
 270
 271   {int} {
 272     unsigned long num;
 273     errno = 0;
 274     num = strtoul (yytext, 0, 10);
 275     if (INT_MAX < num || errno)
 276       {
 277         complain_at (*yylloc, _("invalid value: %s"), quote (yytext));
 278         num = INT_MAX;
 279       }
 280     yylval->integer = num;
 281     return INT;
 282   }
 283
 284   /* Characters.  We don't check there is only one.  */
 285   "'"         YY_OBS_GROW; yy_push_state (SC_ESCAPED_CHARACTER);
 286
 287   /* Strings. */
 288   "\""        YY_OBS_GROW; yy_push_state (SC_ESCAPED_STRING);
 289
 290   /* Comments. */
 291   "/*"        BEGIN SC_YACC_COMMENT;
 292   "//".*      YY_STEP;
 293
 294   /* Prologue. */
 295   "%{"        yy_push_state (SC_PROLOGUE);
 296
 297   /* Code in between braces.  */
 298   "{"         YY_OBS_GROW; braces_level = 0; yy_push_state (SC_BRACED_CODE);
 299
 300   /* A type. */
 301   "<"{tag}">" {
 302     obstack_grow (&string_obstack, yytext + 1, yyleng - 2);
 303     YY_OBS_FINISH;
 304     yylval->string = last_string;
 305     return TYPE;
 306   }
 307
 308
 309   "%%"   {
 310     if (++percent_percent_count == 2)
 311       yy_push_state (SC_EPILOGUE);
 312     return PERCENT_PERCENT;
 313   }
 314
 315   .           {
 316     complain_at (*yylloc, _("invalid character: %s"), quote (yytext));
 317     YY_STEP;
 318   }
 319 }
 320
 321
 322   /*-------------------------------------------------------------------.
 323   | Whatever the start condition (but those which correspond to        |
 324   | entities `swallowed' by Bison: SC_YACC_COMMENT, SC_ESCAPED_STRING, |
 325   | and SC_ESCAPED_CHARACTER), no M4 character must escape as is.      |
 326   `-------------------------------------------------------------------*/
 327
 328 <SC_COMMENT,SC_LINE_COMMENT,SC_STRING,SC_CHARACTER,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
 329 {
 330   \[    obstack_sgrow (&string_obstack, "@<:@");
 331   \]    obstack_sgrow (&string_obstack, "@:>@");
 332 }
 333
 334
 335   /*---------------------------------------------------------------.
 336   | Scanning a Yacc comment.  The initial `/ *' is already eaten.  |
 337   `---------------------------------------------------------------*/
 338
 339 <SC_YACC_COMMENT>
 340 {
 341   "*/" {
 342     YY_STEP;
 343     BEGIN INITIAL;
 344   }
 345
 346   [^*]+|"*"  ;
 347
 348   <<EOF>> {
 349     complain_at (*yylloc, _("unexpected end of file in a comment"));
 350     BEGIN INITIAL;
 351   }
 352 }
 353
 354
 355   /*------------------------------------------------------------.
 356   | Scanning a C comment.  The initial `/ *' is already eaten.  |
 357   `------------------------------------------------------------*/
 358
 359 <SC_COMMENT>
 360 {
 361   "*"{splice}"/"  YY_OBS_GROW; yy_pop_state ();
 362   [^*\[\]]+|"*"   YY_OBS_GROW;
 363
 364   <<EOF>> {
 365     complain_at (*yylloc, _("unexpected end of file in a comment"));
 366     yy_pop_state ();
 367   }
 368 }
 369
 370
 371   /*--------------------------------------------------------------.
 372   | Scanning a line comment.  The initial `//' is already eaten.  |
 373   `--------------------------------------------------------------*/
 374
 375 <SC_LINE_COMMENT>
 376 {
 377   "\n"                   YY_OBS_GROW; yy_pop_state ();
 378   ([^\n\[\]]|{splice})+  YY_OBS_GROW;
 379   <<EOF>>                yy_pop_state ();
 380 }
 381
 382
 383   /*----------------------------------------------------------------.
 384   | Scanning a C string, including its escapes.  The initial `"' is |
 385   | already eaten.                                                  |
 386   `----------------------------------------------------------------*/
 387
 388 <SC_ESCAPED_STRING>
 389 {
 390   "\"" {
 391     assert (yy_top_state () == INITIAL);
 392     YY_OBS_GROW;
 393     YY_OBS_FINISH;
 394     yylval->string = last_string;
 395     yy_pop_state ();
 396     rule_length++;
 397     return STRING;
 398   }
 399
 400   [^\"\\]+  YY_OBS_GROW;
 401
 402   <<EOF>> {
 403     complain_at (*yylloc, _("unexpected end of file in a string"));
 404     assert (yy_top_state () == INITIAL);
 405     YY_OBS_FINISH;
 406     yylval->string = last_string;
 407     yy_pop_state ();
 408     return STRING;
 409   }
 410 }
 411
 412   /*---------------------------------------------------------------.
 413   | Scanning a C character, decoding its escapes.  The initial "'" |
 414   | is already eaten.                                              |
 415   `---------------------------------------------------------------*/
 416
 417 <SC_ESCAPED_CHARACTER>
 418 {
 419   "'" {
 420     YY_OBS_GROW;
 421     assert (yy_top_state () == INITIAL);
 422     {
 423       YY_OBS_FINISH;
 424       yylval->symbol = symbol_get (last_string, *yylloc);
 425       symbol_class_set (yylval->symbol, token_sym, *yylloc);
 426       symbol_user_token_number_set (yylval->symbol,
 427                                     (unsigned char) last_string[1], *yylloc);
 428       YY_OBS_FREE;
 429       yy_pop_state ();
 430       rule_length++;
 431       return ID;
 432     }
 433   }
 434
 435   [^\'\\]+  YY_OBS_GROW;
 436
 437   <<EOF>> {
 438     complain_at (*yylloc, _("unexpected end of file in a character"));
 439     assert (yy_top_state () == INITIAL);
 440     YY_OBS_FINISH;
 441     yylval->string = last_string;
 442     yy_pop_state ();
 443     return CHARACTER;
 444   }
 445 }
 446
 447
 448   /*----------------------------.
 449   | Decode escaped characters.  |
 450   `----------------------------*/
 451
 452 <SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
 453 {
 454   \\[0-7]{1,3} {
 455     unsigned long c = strtoul (yytext + 1, 0, 8);
 456     if (UCHAR_MAX < c)
 457       {
 458         complain_at (*yylloc, _("invalid escape: %s"), quote (yytext));
 459         YY_STEP;
 460       }
 461     else
 462       obstack_1grow (&string_obstack, c);
 463   }
 464
 465   \\x[0-9a-fA-F]+ {
 466     unsigned long c;
 467     errno = 0;
 468     c = strtoul (yytext + 2, 0, 16);
 469     if (UCHAR_MAX < c || errno)
 470       {
 471         complain_at (*yylloc, _("invalid escape: %s"), quote (yytext));
 472         YY_STEP;
 473       }
 474     else
 475       obstack_1grow (&string_obstack, c);
 476   }
 477
 478   \\a   obstack_1grow (&string_obstack, '\a');
 479   \\b   obstack_1grow (&string_obstack, '\b');
 480   \\f   obstack_1grow (&string_obstack, '\f');
 481   \\n   obstack_1grow (&string_obstack, '\n');
 482   \\r   obstack_1grow (&string_obstack, '\r');
 483   \\t   obstack_1grow (&string_obstack, '\t');
 484   \\v   obstack_1grow (&string_obstack, '\v');
 485   \\[\"\'?\\]  obstack_1grow (&string_obstack, yytext[1]);
 486   \\(u|U[0-9a-fA-F]{4})[0-9a-fA-F]{4} {
 487     int c = convert_ucn_to_byte (yytext);
 488     if (c < 0)
 489       {
 490         complain_at (*yylloc, _("invalid escape: %s"), quote (yytext));
 491         YY_STEP;
 492       }
 493     else
 494       obstack_1grow (&string_obstack, c);
 495   }
 496   \\(.|\n)      {
 497     complain_at (*yylloc, _("unrecognized escape: %s"), quote (yytext));
 498     YY_OBS_GROW;
 499   }
 500   /* FLex wants this rule, in case of a `\<<EOF>>'. */
 501   \\                   YY_OBS_GROW;
 502 }
 503
 504
 505   /*----------------------------------------------------------.
 506   | Scanning a C character without decoding its escapes.  The |
 507   | initial "'" is already eaten.                             |
 508   `----------------------------------------------------------*/
 509
 510 <SC_CHARACTER>
 511 {
 512   "'" {
 513     YY_OBS_GROW;
 514     assert (yy_top_state () != INITIAL);
 515     yy_pop_state ();
 516   }
 517
 518   [^'\[\]\\]+          YY_OBS_GROW;
 519   \\{splice}[^\[\]]    YY_OBS_GROW;
 520   {splice}             YY_OBS_GROW;
 521   /* Needed for `\<<EOF>>', `\\<<newline>>[', and `\\<<newline>>]'.  */
 522   \\                   YY_OBS_GROW;
 523
 524   <<EOF>> {
 525     complain_at (*yylloc, _("unexpected end of file in a character"));
 526     assert (yy_top_state () != INITIAL);
 527     yy_pop_state ();
 528   }
 529 }
 530
 531
 532   /*----------------------------------------------------------------.
 533   | Scanning a C string, without decoding its escapes.  The initial |
 534   | `"' is already eaten.                                           |
 535   `----------------------------------------------------------------*/
 536
 537 <SC_STRING>
 538 {
 539   "\"" {
 540     assert (yy_top_state () != INITIAL);
 541     YY_OBS_GROW;
 542     yy_pop_state ();
 543   }
 544
 545   [^\"\[\]\\]+         YY_OBS_GROW;
 546   \\{splice}[^\[\]]    YY_OBS_GROW;
 547   {splice}             YY_OBS_GROW;
 548   /* Needed for `\<<EOF>>', `\\<<newline>>[', and `\\<<newline>>]'.  */
 549   \\                   YY_OBS_GROW;
 550
 551   <<EOF>> {
 552     complain_at (*yylloc, _("unexpected end of file in a string"));
 553     assert (yy_top_state () != INITIAL);
 554     yy_pop_state ();
 555   }
 556 }
 557
 558
 559   /*---------------------------------------------------.
 560   | Strings, comments etc. can be found in user code.  |
 561   `---------------------------------------------------*/
 562
 563 <SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
 564 {
 565   /* Characters.  We don't check there is only one.  */
 566   "'"         YY_OBS_GROW; yy_push_state (SC_CHARACTER);
 567
 568   /* Strings. */
 569   "\""        YY_OBS_GROW; yy_push_state (SC_STRING);
 570
 571   /* Comments. */
 572   "/"{splice}"*"  YY_OBS_GROW; yy_push_state (SC_COMMENT);
 573   "/"{splice}"/"  YY_OBS_GROW; yy_push_state (SC_LINE_COMMENT);
 574
 575   /* Not comments. */
 576   "/"         YY_OBS_GROW;
 577 }
 578
 579
 580   /*---------------------------------------------------------------.
 581   | Scanning some code in braces (%union and actions). The initial |
 582   | "{" is already eaten.                                          |
 583   `---------------------------------------------------------------*/
 584
 585 <SC_BRACED_CODE>
 586 {
 587   "{"|"<"{splice}"%"  YY_OBS_GROW; braces_level++;
 588   "%"{splice}">"      YY_OBS_GROW; braces_level--;
 589   "}" {
 590     YY_OBS_GROW;
 591     braces_level--;
 592     if (braces_level < 0)
 593       {
 594         yy_pop_state ();
 595         YY_OBS_FINISH;
 596         yylval->string = last_string;
 597         rule_length++;
 598         return BRACED_CODE;
 599       }
 600   }
 601
 602   "$"("<"{tag}">")?(-?[0-9]+|"$") { handle_dollar (current_braced_code,
 603                                                    yytext, *yylloc); }
 604   "@"(-?[0-9]+|"$")               { handle_at (current_braced_code,
 605                                                yytext, *yylloc); }
 606
 607   /* `"<"{splice}"<"' tokenizes `<<%' correctly (as `<<' `%') rather
 608      than incorrrectly (as `<' `<%').  */
 609   [^\"$%\'/<@\[\]\{\}]+|[$%/<@]|"<"{splice}"<"  YY_OBS_GROW;
 610
 611   <<EOF>> {
 612     complain_at (*yylloc, _("unexpected end of file in a braced code"));
 613     yy_pop_state ();
 614     YY_OBS_FINISH;
 615     yylval->string = last_string;
 616     return BRACED_CODE;
 617   }
 618
 619 }
 620
 621
 622   /*--------------------------------------------------------------.
 623   | Scanning some prologue: from "%{" (already scanned) to "%}".  |
 624   `--------------------------------------------------------------*/
 625
 626 <SC_PROLOGUE>
 627 {
 628   "%}" {
 629     yy_pop_state ();
 630     YY_OBS_FINISH;
 631     yylval->string = last_string;
 632     return PROLOGUE;
 633   }
 634
 635   [^%\[\]/\'\"]+     YY_OBS_GROW;
 636   "%"                YY_OBS_GROW;
 637
 638   <<EOF>> {
 639     complain_at (*yylloc, _("unexpected end of file in a prologue"));
 640     yy_pop_state ();
 641     YY_OBS_FINISH;
 642     yylval->string = last_string;
 643     return PROLOGUE;
 644   }
 645 }
 646
 647
 648   /*---------------------------------------------------------------.
 649   | Scanning the epilogue (everything after the second "%%", which |
 650   | has already been eaten).                                       |
 651   `---------------------------------------------------------------*/
 652
 653 <SC_EPILOGUE>
 654 {
 655   [^\[\]]+  YY_OBS_GROW;
 656
 657   <<EOF>> {
 658     yy_pop_state ();
 659     YY_OBS_FINISH;
 660     yylval->string = last_string;
 661     return EPILOGUE;
 662   }
 663 }
 664
 665
 666 %%
 667
 668 /*------------------------------------------------------------------.
 669 | TEXT is pointing to a wannabee semantic value (i.e., a `$').      |
 670 |                                                                   |
 671 | Possible inputs: $[<TYPENAME>]($|integer)                         |
 672 |                                                                   |
 673 | Output to the STRING_OBSTACK a reference to this semantic value.  |
 674 `------------------------------------------------------------------*/
 675
 676 static inline void
 677 handle_action_dollar (char *text, location_t location)
 678 {
 679   const char *type_name = NULL;
 680   char *cp = text + 1;
 681
 682   /* Get the type name if explicit. */
 683   if (*cp == '<')
 684     {
 685       type_name = ++cp;
 686       while (*cp != '>')
 687         ++cp;
 688       *cp = '\0';
 689       ++cp;
 690     }
 691
 692   if (*cp == '$')
 693     {
 694       if (!type_name)
 695         type_name = symbol_list_n_type_name_get (current_rule, location, 0);
 696       if (!type_name && typed)
 697         complain_at (location, _("$$ of `%s' has no declared type"),
 698                      current_rule->sym->tag);
 699       if (!type_name)
 700         type_name = "";
 701       obstack_fgrow1 (&string_obstack,
 702                       "]b4_lhs_value([%s])[", type_name);
 703     }
 704   else
 705     {
 706       long num;
 707       errno = 0;
 708       num = strtol (cp, 0, 10);
 709
 710       if (INT_MIN <= num && num <= rule_length && ! errno)
 711         {
 712           int n = num;
 713           if (!type_name && n > 0)
 714             type_name = symbol_list_n_type_name_get (current_rule, location,
 715                                                      n);
 716           if (!type_name && typed)
 717             complain_at (location, _("$%d of `%s' has no declared type"),
 718                       n, current_rule->sym->tag);
 719           if (!type_name)
 720             type_name = "";
 721           obstack_fgrow3 (&string_obstack,
 722                           "]b4_rhs_value([%d], [%d], [%s])[",
 723                           rule_length, n, type_name);
 724         }
 725       else
 726         complain_at (location, _("invalid value: %s"), quote (text));
 727     }
 728 }
 729
 730
 731 /*---------------------------------------------------------------.
 732 | TEXT is expected to be $$ in some code associated to a symbol: |
 733 | destructor or printer.                                         |
 734 `---------------------------------------------------------------*/
 735
 736 static inline void
 737 handle_symbol_code_dollar (char *text, location_t location)
 738 {
 739   char *cp = text + 1;
 740   if (*cp == '$')
 741     obstack_sgrow (&string_obstack, "]b4_dollar_dollar[");
 742   else
 743     complain_at (location, _("invalid value: %s"), quote (text));
 744 }
 745
 746
 747 /*-----------------------------------------------------------------.
 748 | Dispatch onto handle_action_dollar, or handle_destructor_dollar, |
 749 | depending upon CODE_KIND.                                        |
 750 `-----------------------------------------------------------------*/
 751
 752 static void
 753 handle_dollar (braced_code_t braced_code_kind,
 754                char *text, location_t location)
 755 {
 756   switch (braced_code_kind)
 757     {
 758     case action_braced_code:
 759       handle_action_dollar (text, location);
 760       break;
 761
 762     case destructor_braced_code:
 763     case printer_braced_code:
 764       handle_symbol_code_dollar (text, location);
 765       break;
 766     }
 767 }
 768
 769
 770 /*------------------------------------------------------.
 771 | TEXT is a location token (i.e., a `@...').  Output to |
 772 | STRING_OBSTACK a reference to this location.          |
 773 `------------------------------------------------------*/
 774
 775 static inline void
 776 handle_action_at (char *text, location_t location)
 777 {
 778   char *cp = text + 1;
 779   locations_flag = 1;
 780
 781   if (*cp == '$')
 782     {
 783       obstack_sgrow (&string_obstack, "]b4_lhs_location[");
 784     }
 785   else
 786     {
 787       long num;
 788       errno = 0;
 789       num = strtol (cp, 0, 10);
 790
 791       if (INT_MIN <= num && num <= rule_length && ! errno)
 792         {
 793           int n = num;
 794           obstack_fgrow2 (&string_obstack, "]b4_rhs_location([%d], [%d])[",
 795                           rule_length, n);
 796         }
 797       else
 798         complain_at (location, _("invalid value: %s"), quote (text));
 799     }
 800 }
 801
 802
 803 /*---------------------------------------------------------------.
 804 | TEXT is expected to be @$ in some code associated to a symbol: |
 805 | destructor or printer.                                         |
 806 `---------------------------------------------------------------*/
 807
 808 static inline void
 809 handle_symbol_code_at (char *text, location_t location)
 810 {
 811   char *cp = text + 1;
 812   if (*cp == '$')
 813     obstack_sgrow (&string_obstack, "]b4_at_dollar[");
 814   else
 815     complain_at (location, _("invalid value: %s"), quote (text));
 816 }
 817
 818
 819 /*-------------------------------------------------------------------.
 820 | Dispatch onto handle_action_at, or handle_destructor_at, depending |
 821 | upon CODE_KIND.                                                    |
 822 `-------------------------------------------------------------------*/
 823
 824 static void
 825 handle_at (braced_code_t braced_code_kind,
 826            char *text, location_t location)
 827 {
 828   switch (braced_code_kind)
 829     {
 830     case action_braced_code:
 831       handle_action_at (text, location);
 832       break;
 833
 834     case destructor_braced_code:
 835     case printer_braced_code:
 836       handle_symbol_code_at (text, location);
 837       break;
 838     }
 839 }
 840
 841
 842 /*------------------------------------------------------------------.
 843 | Convert universal character name UCN to a single-byte character,  |
 844 | and return that character.  Return -1 if UCN does not correspond  |
 845 | to a single-byte character.                                       |
 846 `------------------------------------------------------------------*/
 847
 848 static int
 849 convert_ucn_to_byte (char const *ucn)
 850 {
 851   unsigned long code = strtoul (ucn + 2, 0, 16);
 852
 853   /* FIXME: Currently we assume Unicode-compatible unibyte characters
 854      on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes).  On
 855      non-ASCII hosts we support only the portable C character set.
 856      These limitations should be removed once we add support for
 857      multibyte characters.  */
 858
 859   if (UCHAR_MAX < code)
 860     return -1;
 861
 862 #if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
 863   {
 864     /* A non-ASCII host.  Use CODE to index into a table of the C
 865        basic execution character set, which is guaranteed to exist on
 866        all Standard C platforms.  This table also includes '$', '@',
 867        and '`', which not in the basic execution character set but
 868        which are unibyte characters on all the platforms that we know
 869        about.  */
 870     static signed char const table[] =
 871       {
 872         '\0',   -1,   -1,   -1,   -1,   -1,   -1, '\a',
 873         '\b', '\t', '\n', '\v', '\f', '\r',   -1,   -1,
 874           -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
 875           -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
 876          ' ',  '!',  '"',  '#',  '$',  '%',  '&', '\'',
 877          '(',  ')',  '*',  '+',  ',',  '-',  '.',  '/',
 878          '0',  '1',  '2',  '3',  '4',  '5',  '6',  '7',
 879          '8',  '9',  ':',  ';',  '<',  '=',  '>',  '?',
 880          '@',  'A',  'B',  'C',  'D',  'E',  'F',  'G',
 881          'H',  'I',  'J',  'K',  'L',  'M',  'N',  'O',
 882          'P',  'Q',  'R',  'S',  'T',  'U',  'V',  'W',
 883          'X',  'Y',  'Z',  '[', '\\',  ']',  '^',  '_',
 884          '`',  'a',  'b',  'c',  'd',  'e',  'f',  'g',
 885          'h',  'i',  'j',  'k',  'l',  'm',  'n',  'o',
 886          'p',  'q',  'r',  's',  't',  'u',  'v',  'w',
 887          'x',  'y',  'z',  '{',  '|',  '}',  '~'
 888       };
 889
 890     code = code < sizeof table ? table[code] : -1;
 891   }
 892 #endif
 893
 894   return code;
 895 }
 896
 897
 898 /*-------------------------.
 899 | Initialize the scanner.  |
 900 `-------------------------*/
 901
 902 void
 903 scanner_initialize (void)
 904 {
 905   obstack_init (&string_obstack);
 906 }
 907
 908
 909 /*-----------------------------------------------.
 910 | Free all the memory allocated to the scanner.  |
 911 `-----------------------------------------------*/
 912
 913 void
 914 scanner_free (void)
 915 {
 916   obstack_free (&string_obstack, 0);
 917   /* Reclaim Flex's buffers.  */
 918   yy_delete_buffer (YY_CURRENT_BUFFER);
 919 }