src/scan-gram.l

   1 /* Bison Grammar Scanner                             -*- C -*-
   2    Copyright (C) 2002 Free Software Foundation, Inc.
   3
   4    This file is part of Bison, the GNU Compiler Compiler.
   5
   6    This program is free software; you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 2 of the License, or
   9    (at your option) any later version.
  10
  11    This program is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with this program; if not, write to the Free Software
  18    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  19    02111-1307  USA
  20 */
  21
  22 %option debug nodefault noyywrap nounput never-interactive stack
  23 %option prefix="gram_" outfile="lex.yy.c"
  24
  25 %{
  26 #include "system.h"
  27 #include "mbswidth.h"
  28 #include "complain.h"
  29 #include "quote.h"
  30 #include "getargs.h"
  31 #include "gram.h"
  32 #include "reader.h"
  33
  34 /* Each time we match a string, move the end cursor to its end. */
  35 #define YY_USER_INIT                            \
  36 do {                                            \
  37   LOCATION_RESET (*yylloc);                     \
  38   yylloc->file = infile;                        \
  39    /* This is only to avoid GCC warnings. */    \
  40   if (yycontrol) {;};                           \
  41 } while (0)
  42
  43 #define YY_USER_ACTION  extend_location (yylloc, yytext, yyleng);
  44 #define YY_STEP         LOCATION_STEP (*yylloc)
  45
  46 #define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
  47
  48
  49 /* Read bytes from FP into buffer BUF of size SIZE.  Return the
  50    number of bytes read.  Remove '\r' from input, treating \r\n
  51    and isolated \r as \n.  */
  52
  53 static size_t
  54 no_cr_read (FILE *fp, char *buf, size_t size)
  55 {
  56   size_t s = fread (buf, 1, size, fp);
  57   if (s)
  58     {
  59       char *w = memchr (buf, '\r', s);
  60       if (w)
  61         {
  62           char const *r = ++w;
  63           char const *lim = buf + s;
  64
  65           for (;;)
  66             {
  67               /* Found an '\r'.  Treat it like '\n', but ignore any
  68                  '\n' that immediately follows.  */
  69               w[-1] = '\n';
  70               if (r == lim)
  71                 {
  72                   int ch = getc (fp);
  73                   if (ch != '\n' && ungetc (ch, fp) != ch)
  74                     break;
  75                 }
  76               else if (*r == '\n')
  77                 r++;
  78
  79               /* Copy until the next '\r'.  */
  80               do
  81                 {
  82                   if (r == lim)
  83                     return w - buf;
  84                 }
  85               while ((*w++ = *r++) != '\r');
  86             }
  87
  88           return w - buf;
  89         }
  90     }
  91
  92   return s;
  93 }
  94
  95
  96 /* Extend *LOC to account for token TOKEN of size SIZE.  */
  97
  98 static void
  99 extend_location (location_t *loc, char const *token, int size)
 100 {
 101   int line = loc->last_line;
 102   int column = loc->last_column;
 103   char const *p0 = token;
 104   char const *p = token;
 105   char const *lim = token + size;
 106
 107   for (p = token; p < lim; p++)
 108     switch (*p)
 109       {
 110       case '\r':
 111         /* \r shouldn't survive no_cr_read.  */
 112         abort ();
 113
 114       case '\n':
 115         line++;
 116         column = 1;
 117         p0 = p + 1;
 118         break;
 119
 120       case '\t':
 121         column += mbsnwidth (p0, p - p0, 0);
 122         column += 8 - ((column - 1) & 7);
 123         p0 = p + 1;
 124         break;
 125       }
 126
 127   loc->last_line = line;
 128   loc->last_column = column + mbsnwidth (p0, p - p0, 0);
 129 }
 130
 131
 132
 133 /* STRING_OBSTACK -- Used to store all the characters that we need to
 134    keep (to construct ID, STRINGS etc.).  Use the following macros to
 135    use it.
 136
 137    Use YY_OBS_GROW to append what has just been matched, and
 138    YY_OBS_FINISH to end the string (it puts the ending 0).
 139    YY_OBS_FINISH also stores this string in LAST_STRING, which can be
 140    used, and which is used by YY_OBS_FREE to free the last string.  */
 141
 142 static struct obstack string_obstack;
 143 char *last_string;
 144
 145 #define YY_OBS_GROW   \
 146   obstack_grow (&string_obstack, yytext, yyleng)
 147
 148 #define YY_OBS_FINISH                                   \
 149   do {                                                  \
 150     obstack_1grow (&string_obstack, '\0');              \
 151     last_string = obstack_finish (&string_obstack);     \
 152   } while (0)
 153
 154 #define YY_OBS_FREE                                             \
 155   do {                                                          \
 156     obstack_free (&string_obstack, last_string);                \
 157   } while (0)
 158
 159 void
 160 scanner_last_string_free (void)
 161 {
 162   YY_OBS_FREE;
 163 }
 164
 165
 166 static int percent_percent_count = 0;
 167
 168 /* Within well-formed rules, RULE_LENGTH is the number of values in
 169    the current rule so far, which says where to find `$0' with respect
 170    to the top of the stack.  It is not the same as the rule->length in
 171    the case of mid rule actions.
 172
 173    Outside of well-formed rules, RULE_LENGTH has an undefined value.  */
 174 static int rule_length;
 175
 176 static void handle_dollar (braced_code_t code_kind,
 177                            char *cp, location_t location);
 178 static void handle_at (braced_code_t code_kind,
 179                        char *cp, location_t location);
 180 static int convert_ucn_to_byte (char const *hex_text);
 181
 182 %}
 183 %x SC_COMMENT SC_LINE_COMMENT SC_YACC_COMMENT
 184 %x SC_STRING SC_CHARACTER
 185 %x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
 186 %x SC_BRACED_CODE SC_PROLOGUE SC_EPILOGUE
 187
 188 letter    [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
 189 id        {letter}({letter}|[0-9])*
 190 directive %{letter}({letter}|[0-9]|-)*
 191 int       [0-9]+
 192
 193 /* POSIX says that a tag must be both an id and a C union member, but
 194    historically almost any character is allowed in a tag.  We disallow
 195    NUL and newline, as this simplifies our implementation.  */
 196 tag      [^\0\n>]+
 197
 198 /* Zero or more instances of backslash-newline.  Following GCC, allow
 199    white space between the backslash and the newline.  */
 200 splice   (\\[ \f\t\v]*\n)*
 201
 202 %%
 203 %{
 204   int braces_level IF_LINT (= 0);
 205
 206   /* At each yylex invocation, mark the current position as the
 207      start of the next token.  */
 208   YY_STEP;
 209 %}
 210
 211
 212   /*----------------------------.
 213   | Scanning Bison directives.  |
 214   `----------------------------*/
 215 <INITIAL>
 216 {
 217   "%binary"               return PERCENT_NONASSOC;
 218   "%debug"                return PERCENT_DEBUG;
 219   "%define"               return PERCENT_DEFINE;
 220   "%defines"              return PERCENT_DEFINES;
 221   "%destructor"           return PERCENT_DESTRUCTOR;
 222   "%dprec"                return PERCENT_DPREC;
 223   "%error"[-_]"verbose"   return PERCENT_ERROR_VERBOSE;
 224   "%expect"               return PERCENT_EXPECT;
 225   "%file-prefix"          return PERCENT_FILE_PREFIX;
 226   "%fixed"[-_]"output"[-_]"files"   return PERCENT_YACC;
 227   "%glr-parser"           return PERCENT_GLR_PARSER;
 228   "%left"                 return PERCENT_LEFT;
 229   "%locations"            return PERCENT_LOCATIONS;
 230   "%merge"                return PERCENT_MERGE;
 231   "%name"[-_]"prefix"     return PERCENT_NAME_PREFIX;
 232   "%no"[-_]"lines"        return PERCENT_NO_LINES;
 233   "%nonassoc"             return PERCENT_NONASSOC;
 234   "%nterm"                return PERCENT_NTERM;
 235   "%output"               return PERCENT_OUTPUT;
 236   "%parse-param"          return PERCENT_PARSE_PARAM;
 237   "%prec"                 rule_length--; return PERCENT_PREC;
 238   "%printer"              return PERCENT_PRINTER;
 239   "%pure"[-_]"parser"     return PERCENT_PURE_PARSER;
 240   "%right"                return PERCENT_RIGHT;
 241   "%lex-param"            return PERCENT_LEX_PARAM;
 242   "%skeleton"             return PERCENT_SKELETON;
 243   "%start"                return PERCENT_START;
 244   "%term"                 return PERCENT_TOKEN;
 245   "%token"                return PERCENT_TOKEN;
 246   "%token"[-_]"table"     return PERCENT_TOKEN_TABLE;
 247   "%type"                 return PERCENT_TYPE;
 248   "%union"                return PERCENT_UNION;
 249   "%verbose"              return PERCENT_VERBOSE;
 250   "%yacc"                 return PERCENT_YACC;
 251
 252   {directive}             {
 253     complain_at (*yylloc, _("invalid directive: %s"), quote (yytext));
 254     YY_STEP;
 255   }
 256
 257   "="                     return EQUAL;
 258   ":"                     rule_length = 0; return COLON;
 259   "|"                     rule_length = 0; return PIPE;
 260   ","                     return COMMA;
 261   ";"                     return SEMICOLON;
 262
 263   [ \f\n\t\v]+  YY_STEP;
 264
 265   {id}        {
 266     yylval->symbol = symbol_get (yytext, *yylloc);
 267     rule_length++;
 268     return ID;
 269   }
 270
 271   {int} {
 272     unsigned long num;
 273     errno = 0;
 274     num = strtoul (yytext, 0, 10);
 275     if (INT_MAX < num || errno)
 276       {
 277         complain_at (*yylloc, _("integer out of range: %s"), quote (yytext));
 278         num = INT_MAX;
 279       }
 280     yylval->integer = num;
 281     return INT;
 282   }
 283
 284   /* Characters.  We don't check there is only one.  */
 285   "'"         YY_OBS_GROW; yy_push_state (SC_ESCAPED_CHARACTER);
 286
 287   /* Strings. */
 288   "\""        YY_OBS_GROW; yy_push_state (SC_ESCAPED_STRING);
 289
 290   /* Comments. */
 291   "/*"        BEGIN SC_YACC_COMMENT;
 292   "//".*      YY_STEP;
 293
 294   /* Prologue. */
 295   "%{"        yy_push_state (SC_PROLOGUE);
 296
 297   /* Code in between braces.  */
 298   "{"         YY_OBS_GROW; braces_level = 0; yy_push_state (SC_BRACED_CODE);
 299
 300   /* A type. */
 301   "<"{tag}">" {
 302     obstack_grow (&string_obstack, yytext + 1, yyleng - 2);
 303     YY_OBS_FINISH;
 304     yylval->string = last_string;
 305     return TYPE;
 306   }
 307
 308
 309   "%%"   {
 310     if (++percent_percent_count == 2)
 311       yy_push_state (SC_EPILOGUE);
 312     return PERCENT_PERCENT;
 313   }
 314
 315   .           {
 316     complain_at (*yylloc, _("invalid character: %s"), quote (yytext));
 317     YY_STEP;
 318   }
 319 }
 320
 321
 322   /*-------------------------------------------------------------------.
 323   | Whatever the start condition (but those which correspond to        |
 324   | entities `swallowed' by Bison: SC_YACC_COMMENT, SC_ESCAPED_STRING, |
 325   | and SC_ESCAPED_CHARACTER), no M4 character must escape as is.      |
 326   `-------------------------------------------------------------------*/
 327
 328 <SC_COMMENT,SC_LINE_COMMENT,SC_STRING,SC_CHARACTER,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
 329 {
 330   \[    obstack_sgrow (&string_obstack, "@<:@");
 331   \]    obstack_sgrow (&string_obstack, "@:>@");
 332 }
 333
 334
 335   /*---------------------------------------------------------------.
 336   | Scanning a Yacc comment.  The initial `/ *' is already eaten.  |
 337   `---------------------------------------------------------------*/
 338
 339 <SC_YACC_COMMENT>
 340 {
 341   "*/" {
 342     YY_STEP;
 343     BEGIN INITIAL;
 344   }
 345
 346   [^*]+|"*"  ;
 347
 348   <<EOF>> {
 349     complain_at (*yylloc, _("unexpected end of file in a comment"));
 350     BEGIN INITIAL;
 351   }
 352 }
 353
 354
 355   /*------------------------------------------------------------.
 356   | Scanning a C comment.  The initial `/ *' is already eaten.  |
 357   `------------------------------------------------------------*/
 358
 359 <SC_COMMENT>
 360 {
 361   "*"{splice}"/"  YY_OBS_GROW; yy_pop_state ();
 362   [^*\[\]]+|"*"   YY_OBS_GROW;
 363
 364   <<EOF>> {
 365     complain_at (*yylloc, _("unexpected end of file in a comment"));
 366     yy_pop_state ();
 367   }
 368 }
 369
 370
 371   /*--------------------------------------------------------------.
 372   | Scanning a line comment.  The initial `//' is already eaten.  |
 373   `--------------------------------------------------------------*/
 374
 375 <SC_LINE_COMMENT>
 376 {
 377   "\n"                   YY_OBS_GROW; yy_pop_state ();
 378   ([^\n\[\]]|{splice})+  YY_OBS_GROW;
 379   <<EOF>>                yy_pop_state ();
 380 }
 381
 382
 383   /*----------------------------------------------------------------.
 384   | Scanning a C string, including its escapes.  The initial `"' is |
 385   | already eaten.                                                  |
 386   `----------------------------------------------------------------*/
 387
 388 <SC_ESCAPED_STRING>
 389 {
 390   "\"" {
 391     assert (yy_top_state () == INITIAL);
 392     YY_OBS_GROW;
 393     YY_OBS_FINISH;
 394     yylval->string = last_string;
 395     yy_pop_state ();
 396     rule_length++;
 397     return STRING;
 398   }
 399
 400   [^\"\\]+  YY_OBS_GROW;
 401
 402   <<EOF>> {
 403     complain_at (*yylloc, _("unexpected end of file in a string"));
 404     assert (yy_top_state () == INITIAL);
 405     YY_OBS_FINISH;
 406     yylval->string = last_string;
 407     yy_pop_state ();
 408     return STRING;
 409   }
 410 }
 411
 412   /*---------------------------------------------------------------.
 413   | Scanning a C character, decoding its escapes.  The initial "'" |
 414   | is already eaten.                                              |
 415   `---------------------------------------------------------------*/
 416
 417 <SC_ESCAPED_CHARACTER>
 418 {
 419   "'" {
 420     YY_OBS_GROW;
 421     assert (yy_top_state () == INITIAL);
 422     {
 423       YY_OBS_FINISH;
 424       yylval->symbol = symbol_get (last_string, *yylloc);
 425       symbol_class_set (yylval->symbol, token_sym, *yylloc);
 426       symbol_user_token_number_set (yylval->symbol,
 427                                     (unsigned char) last_string[1], *yylloc);
 428       YY_OBS_FREE;
 429       yy_pop_state ();
 430       rule_length++;
 431       return ID;
 432     }
 433   }
 434
 435   [^\'\\]+  YY_OBS_GROW;
 436
 437   <<EOF>> {
 438     complain_at (*yylloc, _("unexpected end of file in a character literal"));
 439     assert (yy_top_state () == INITIAL);
 440     YY_OBS_FINISH;
 441     yylval->string = last_string;
 442     yy_pop_state ();
 443     return CHARACTER;
 444   }
 445 }
 446
 447
 448   /*----------------------------.
 449   | Decode escaped characters.  |
 450   `----------------------------*/
 451
 452 <SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
 453 {
 454   \\[0-7]{1,3} {
 455     unsigned long c = strtoul (yytext + 1, 0, 8);
 456     if (UCHAR_MAX < c)
 457       {
 458         complain_at (*yylloc, _("invalid escape sequence: %s"),
 459                      quote (yytext));
 460         YY_STEP;
 461       }
 462     else
 463       obstack_1grow (&string_obstack, c);
 464   }
 465
 466   \\x[0-9a-fA-F]+ {
 467     unsigned long c;
 468     errno = 0;
 469     c = strtoul (yytext + 2, 0, 16);
 470     if (UCHAR_MAX < c || errno)
 471       {
 472         complain_at (*yylloc, _("invalid escape sequence: %s"),
 473                      quote (yytext));
 474         YY_STEP;
 475       }
 476     else
 477       obstack_1grow (&string_obstack, c);
 478   }
 479
 480   \\a   obstack_1grow (&string_obstack, '\a');
 481   \\b   obstack_1grow (&string_obstack, '\b');
 482   \\f   obstack_1grow (&string_obstack, '\f');
 483   \\n   obstack_1grow (&string_obstack, '\n');
 484   \\r   obstack_1grow (&string_obstack, '\r');
 485   \\t   obstack_1grow (&string_obstack, '\t');
 486   \\v   obstack_1grow (&string_obstack, '\v');
 487   \\[\"\'?\\]  obstack_1grow (&string_obstack, yytext[1]);
 488   \\(u|U[0-9a-fA-F]{4})[0-9a-fA-F]{4} {
 489     int c = convert_ucn_to_byte (yytext);
 490     if (c < 0)
 491       {
 492         complain_at (*yylloc, _("invalid escape sequence: %s"),
 493                      quote (yytext));
 494         YY_STEP;
 495       }
 496     else
 497       obstack_1grow (&string_obstack, c);
 498   }
 499   \\(.|\n)      {
 500     complain_at (*yylloc, _("unrecognized escape sequence: %s"),
 501                  quote (yytext));
 502     YY_OBS_GROW;
 503   }
 504   /* FLex wants this rule, in case of a `\<<EOF>>'. */
 505   \\                   YY_OBS_GROW;
 506 }
 507
 508
 509   /*----------------------------------------------------------.
 510   | Scanning a C character without decoding its escapes.  The |
 511   | initial "'" is already eaten.                             |
 512   `----------------------------------------------------------*/
 513
 514 <SC_CHARACTER>
 515 {
 516   "'" {
 517     YY_OBS_GROW;
 518     assert (yy_top_state () != INITIAL);
 519     yy_pop_state ();
 520   }
 521
 522   [^'\[\]\\]+          YY_OBS_GROW;
 523   \\{splice}[^\[\]]    YY_OBS_GROW;
 524   {splice}             YY_OBS_GROW;
 525   /* Needed for `\<<EOF>>', `\\<<newline>>[', and `\\<<newline>>]'.  */
 526   \\                   YY_OBS_GROW;
 527
 528   <<EOF>> {
 529     complain_at (*yylloc, _("unexpected end of file in a character literal"));
 530     assert (yy_top_state () != INITIAL);
 531     yy_pop_state ();
 532   }
 533 }
 534
 535
 536   /*----------------------------------------------------------------.
 537   | Scanning a C string, without decoding its escapes.  The initial |
 538   | `"' is already eaten.                                           |
 539   `----------------------------------------------------------------*/
 540
 541 <SC_STRING>
 542 {
 543   "\"" {
 544     assert (yy_top_state () != INITIAL);
 545     YY_OBS_GROW;
 546     yy_pop_state ();
 547   }
 548
 549   [^\"\[\]\\]+         YY_OBS_GROW;
 550   \\{splice}[^\[\]]    YY_OBS_GROW;
 551   {splice}             YY_OBS_GROW;
 552   /* Needed for `\<<EOF>>', `\\<<newline>>[', and `\\<<newline>>]'.  */
 553   \\                   YY_OBS_GROW;
 554
 555   <<EOF>> {
 556     complain_at (*yylloc, _("unexpected end of file in a string"));
 557     assert (yy_top_state () != INITIAL);
 558     yy_pop_state ();
 559   }
 560 }
 561
 562
 563   /*---------------------------------------------------.
 564   | Strings, comments etc. can be found in user code.  |
 565   `---------------------------------------------------*/
 566
 567 <SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
 568 {
 569   /* Characters.  We don't check there is only one.  */
 570   "'"         YY_OBS_GROW; yy_push_state (SC_CHARACTER);
 571
 572   /* Strings. */
 573   "\""        YY_OBS_GROW; yy_push_state (SC_STRING);
 574
 575   /* Comments. */
 576   "/"{splice}"*"  YY_OBS_GROW; yy_push_state (SC_COMMENT);
 577   "/"{splice}"/"  YY_OBS_GROW; yy_push_state (SC_LINE_COMMENT);
 578
 579   /* Not comments. */
 580   "/"         YY_OBS_GROW;
 581 }
 582
 583
 584   /*---------------------------------------------------------------.
 585   | Scanning some code in braces (%union and actions). The initial |
 586   | "{" is already eaten.                                          |
 587   `---------------------------------------------------------------*/
 588
 589 <SC_BRACED_CODE>
 590 {
 591   "{"|"<"{splice}"%"  YY_OBS_GROW; braces_level++;
 592   "%"{splice}">"      YY_OBS_GROW; braces_level--;
 593   "}" {
 594     YY_OBS_GROW;
 595     braces_level--;
 596     if (braces_level < 0)
 597       {
 598         yy_pop_state ();
 599         YY_OBS_FINISH;
 600         yylval->string = last_string;
 601         rule_length++;
 602         return BRACED_CODE;
 603       }
 604   }
 605
 606   "$"("<"{tag}">")?(-?[0-9]+|"$") { handle_dollar (current_braced_code,
 607                                                    yytext, *yylloc); }
 608   "@"(-?[0-9]+|"$")               { handle_at (current_braced_code,
 609                                                yytext, *yylloc); }
 610
 611   /* `"<"{splice}"<"' tokenizes `<<%' correctly (as `<<' `%') rather
 612      than incorrrectly (as `<' `<%').  */
 613   [^\"$%\'/<@\[\]\{\}]+|[$%/<@]|"<"{splice}"<"  YY_OBS_GROW;
 614
 615   <<EOF>> {
 616     complain_at (*yylloc, _("unexpected end of file in `{ ... }'"));
 617     yy_pop_state ();
 618     YY_OBS_FINISH;
 619     yylval->string = last_string;
 620     return BRACED_CODE;
 621   }
 622
 623 }
 624
 625
 626   /*--------------------------------------------------------------.
 627   | Scanning some prologue: from "%{" (already scanned) to "%}".  |
 628   `--------------------------------------------------------------*/
 629
 630 <SC_PROLOGUE>
 631 {
 632   "%}" {
 633     yy_pop_state ();
 634     YY_OBS_FINISH;
 635     yylval->string = last_string;
 636     return PROLOGUE;
 637   }
 638
 639   [^%\[\]/\'\"]+     YY_OBS_GROW;
 640   "%"                YY_OBS_GROW;
 641
 642   <<EOF>> {
 643     complain_at (*yylloc, _("unexpected end of file in `%%{ ... %%}'"));
 644     yy_pop_state ();
 645     YY_OBS_FINISH;
 646     yylval->string = last_string;
 647     return PROLOGUE;
 648   }
 649 }
 650
 651
 652   /*---------------------------------------------------------------.
 653   | Scanning the epilogue (everything after the second "%%", which |
 654   | has already been eaten).                                       |
 655   `---------------------------------------------------------------*/
 656
 657 <SC_EPILOGUE>
 658 {
 659   [^\[\]]+  YY_OBS_GROW;
 660
 661   <<EOF>> {
 662     yy_pop_state ();
 663     YY_OBS_FINISH;
 664     yylval->string = last_string;
 665     return EPILOGUE;
 666   }
 667 }
 668
 669
 670 %%
 671
 672 /*------------------------------------------------------------------.
 673 | TEXT is pointing to a wannabee semantic value (i.e., a `$').      |
 674 |                                                                   |
 675 | Possible inputs: $[<TYPENAME>]($|integer)                         |
 676 |                                                                   |
 677 | Output to the STRING_OBSTACK a reference to this semantic value.  |
 678 `------------------------------------------------------------------*/
 679
 680 static inline void
 681 handle_action_dollar (char *text, location_t location)
 682 {
 683   const char *type_name = NULL;
 684   char *cp = text + 1;
 685
 686   /* Get the type name if explicit. */
 687   if (*cp == '<')
 688     {
 689       type_name = ++cp;
 690       while (*cp != '>')
 691         ++cp;
 692       *cp = '\0';
 693       ++cp;
 694     }
 695
 696   if (*cp == '$')
 697     {
 698       if (!type_name)
 699         type_name = symbol_list_n_type_name_get (current_rule, location, 0);
 700       if (!type_name && typed)
 701         complain_at (location, _("$$ of `%s' has no declared type"),
 702                      current_rule->sym->tag);
 703       if (!type_name)
 704         type_name = "";
 705       obstack_fgrow1 (&string_obstack,
 706                       "]b4_lhs_value([%s])[", type_name);
 707     }
 708   else
 709     {
 710       long num;
 711       errno = 0;
 712       num = strtol (cp, 0, 10);
 713
 714       if (INT_MIN <= num && num <= rule_length && ! errno)
 715         {
 716           int n = num;
 717           if (!type_name && n > 0)
 718             type_name = symbol_list_n_type_name_get (current_rule, location,
 719                                                      n);
 720           if (!type_name && typed)
 721             complain_at (location, _("$%d of `%s' has no declared type"),
 722                       n, current_rule->sym->tag);
 723           if (!type_name)
 724             type_name = "";
 725           obstack_fgrow3 (&string_obstack,
 726                           "]b4_rhs_value([%d], [%d], [%s])[",
 727                           rule_length, n, type_name);
 728         }
 729       else
 730         complain_at (location, _("integer out of range: %s"), quote (text));
 731     }
 732 }
 733
 734
 735 /*---------------------------------------------------------------.
 736 | TEXT is expected to be $$ in some code associated to a symbol: |
 737 | destructor or printer.                                         |
 738 `---------------------------------------------------------------*/
 739
 740 static inline void
 741 handle_symbol_code_dollar (char *text, location_t location)
 742 {
 743   char *cp = text + 1;
 744   if (*cp == '$')
 745     obstack_sgrow (&string_obstack, "]b4_dollar_dollar[");
 746   else
 747     complain_at (location, _("invalid value: %s"), quote (text));
 748 }
 749
 750
 751 /*-----------------------------------------------------------------.
 752 | Dispatch onto handle_action_dollar, or handle_destructor_dollar, |
 753 | depending upon CODE_KIND.                                        |
 754 `-----------------------------------------------------------------*/
 755
 756 static void
 757 handle_dollar (braced_code_t braced_code_kind,
 758                char *text, location_t location)
 759 {
 760   switch (braced_code_kind)
 761     {
 762     case action_braced_code:
 763       handle_action_dollar (text, location);
 764       break;
 765
 766     case destructor_braced_code:
 767     case printer_braced_code:
 768       handle_symbol_code_dollar (text, location);
 769       break;
 770     }
 771 }
 772
 773
 774 /*------------------------------------------------------.
 775 | TEXT is a location token (i.e., a `@...').  Output to |
 776 | STRING_OBSTACK a reference to this location.          |
 777 `------------------------------------------------------*/
 778
 779 static inline void
 780 handle_action_at (char *text, location_t location)
 781 {
 782   char *cp = text + 1;
 783   locations_flag = 1;
 784
 785   if (*cp == '$')
 786     {
 787       obstack_sgrow (&string_obstack, "]b4_lhs_location[");
 788     }
 789   else
 790     {
 791       long num;
 792       errno = 0;
 793       num = strtol (cp, 0, 10);
 794
 795       if (INT_MIN <= num && num <= rule_length && ! errno)
 796         {
 797           int n = num;
 798           obstack_fgrow2 (&string_obstack, "]b4_rhs_location([%d], [%d])[",
 799                           rule_length, n);
 800         }
 801       else
 802         complain_at (location, _("integer out of range: %s"), quote (text));
 803     }
 804 }
 805
 806
 807 /*---------------------------------------------------------------.
 808 | TEXT is expected to be @$ in some code associated to a symbol: |
 809 | destructor or printer.                                         |
 810 `---------------------------------------------------------------*/
 811
 812 static inline void
 813 handle_symbol_code_at (char *text, location_t location)
 814 {
 815   char *cp = text + 1;
 816   if (*cp == '$')
 817     obstack_sgrow (&string_obstack, "]b4_at_dollar[");
 818   else
 819     complain_at (location, _("invalid value: %s"), quote (text));
 820 }
 821
 822
 823 /*-------------------------------------------------------------------.
 824 | Dispatch onto handle_action_at, or handle_destructor_at, depending |
 825 | upon CODE_KIND.                                                    |
 826 `-------------------------------------------------------------------*/
 827
 828 static void
 829 handle_at (braced_code_t braced_code_kind,
 830            char *text, location_t location)
 831 {
 832   switch (braced_code_kind)
 833     {
 834     case action_braced_code:
 835       handle_action_at (text, location);
 836       break;
 837
 838     case destructor_braced_code:
 839     case printer_braced_code:
 840       handle_symbol_code_at (text, location);
 841       break;
 842     }
 843 }
 844
 845
 846 /*------------------------------------------------------------------.
 847 | Convert universal character name UCN to a single-byte character,  |
 848 | and return that character.  Return -1 if UCN does not correspond  |
 849 | to a single-byte character.                                       |
 850 `------------------------------------------------------------------*/
 851
 852 static int
 853 convert_ucn_to_byte (char const *ucn)
 854 {
 855   unsigned long code = strtoul (ucn + 2, 0, 16);
 856
 857   /* FIXME: Currently we assume Unicode-compatible unibyte characters
 858      on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes).  On
 859      non-ASCII hosts we support only the portable C character set.
 860      These limitations should be removed once we add support for
 861      multibyte characters.  */
 862
 863   if (UCHAR_MAX < code)
 864     return -1;
 865
 866 #if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
 867   {
 868     /* A non-ASCII host.  Use CODE to index into a table of the C
 869        basic execution character set, which is guaranteed to exist on
 870        all Standard C platforms.  This table also includes '$', '@',
 871        and '`', which not in the basic execution character set but
 872        which are unibyte characters on all the platforms that we know
 873        about.  */
 874     static signed char const table[] =
 875       {
 876         '\0',   -1,   -1,   -1,   -1,   -1,   -1, '\a',
 877         '\b', '\t', '\n', '\v', '\f', '\r',   -1,   -1,
 878           -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
 879           -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
 880          ' ',  '!',  '"',  '#',  '$',  '%',  '&', '\'',
 881          '(',  ')',  '*',  '+',  ',',  '-',  '.',  '/',
 882          '0',  '1',  '2',  '3',  '4',  '5',  '6',  '7',
 883          '8',  '9',  ':',  ';',  '<',  '=',  '>',  '?',
 884          '@',  'A',  'B',  'C',  'D',  'E',  'F',  'G',
 885          'H',  'I',  'J',  'K',  'L',  'M',  'N',  'O',
 886          'P',  'Q',  'R',  'S',  'T',  'U',  'V',  'W',
 887          'X',  'Y',  'Z',  '[', '\\',  ']',  '^',  '_',
 888          '`',  'a',  'b',  'c',  'd',  'e',  'f',  'g',
 889          'h',  'i',  'j',  'k',  'l',  'm',  'n',  'o',
 890          'p',  'q',  'r',  's',  't',  'u',  'v',  'w',
 891          'x',  'y',  'z',  '{',  '|',  '}',  '~'
 892       };
 893
 894     code = code < sizeof table ? table[code] : -1;
 895   }
 896 #endif
 897
 898   return code;
 899 }
 900
 901
 902 /*-------------------------.
 903 | Initialize the scanner.  |
 904 `-------------------------*/
 905
 906 void
 907 scanner_initialize (void)
 908 {
 909   obstack_init (&string_obstack);
 910 }
 911
 912
 913 /*-----------------------------------------------.
 914 | Free all the memory allocated to the scanner.  |
 915 `-----------------------------------------------*/
 916
 917 void
 918 scanner_free (void)
 919 {
 920   obstack_free (&string_obstack, 0);
 921   /* Reclaim Flex's buffers.  */
 922   yy_delete_buffer (YY_CURRENT_BUFFER);
 923 }