src/scan-action.l

   1 /* Bison Grammar Scanner                             -*- C -*-
   2
   3    Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
   4
   5    This file is part of Bison, the GNU Compiler Compiler.
   6
   7    This program is free software; you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 2 of the License, or
  10    (at your option) any later version.
  11
  12    This program is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with this program; if not, write to the Free Software
  19    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  20    02110-1301  USA
  21 */
  22
  23 %option debug nodefault nounput noyywrap never-interactive
  24 %option prefix="gram_" outfile="lex.yy.c"
  25
  26 %{
  27 #include "system.h"
  28
  29 #include <mbswidth.h>
  30 #include <get-errno.h>
  31 #include <quote.h>
  32
  33 #include "complain.h"
  34 #include "files.h"
  35 #include "getargs.h"
  36 #include "gram.h"
  37 #include "quotearg.h"
  38 #include "reader.h"
  39 #include "uniqstr.h"
  40
  41 #define YY_USER_INIT                                    \
  42   do                                                    \
  43     {                                                   \
  44       scanner_cursor.file = current_file;               \
  45       scanner_cursor.line = 1;                          \
  46       scanner_cursor.column = 1;                        \
  47       code_start = scanner_cursor;                      \
  48     }                                                   \
  49   while (0)
  50
  51 /* Location of scanner cursor.  */
  52 boundary scanner_cursor;
  53
  54 static void adjust_location (location *, char const *, size_t);
  55 #define YY_USER_ACTION  adjust_location (loc, yytext, yyleng);
  56
  57 static size_t no_cr_read (FILE *, char *, size_t);
  58 #define YY_INPUT(buf, result, size) ((result) = no_cr_read (yyin, buf, size))
  59
  60 /* Within well-formed rules, RULE_LENGTH is the number of values in
  61    the current rule so far, which says where to find `$0' with respect
  62    to the top of the stack.  It is not the same as the rule->length in
  63    the case of mid rule actions.
  64
  65    Outside of well-formed rules, RULE_LENGTH has an undefined value.  */
  66 int rule_length;
  67
  68 static void handle_dollar (int token_type, char *cp, location loc);
  69 static void handle_at (int token_type, char *cp, location loc);
  70 static void handle_syncline (char *args);
  71 static unsigned long int scan_integer (char const *p, int base, location loc);
  72 static int convert_ucn_to_byte (char const *hex_text);
  73 static void unexpected_eof (boundary, char const *);
  74 static void unexpected_newline (boundary, char const *);
  75
  76 %}
  77 %x SC_COMMENT SC_LINE_COMMENT SC_YACC_COMMENT
  78 %x SC_STRING SC_CHARACTER
  79 %x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
  80 %x SC_PRE_CODE SC_BRACED_CODE SC_PROLOGUE SC_EPILOGUE
  81
  82 letter    [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
  83 id        {letter}({letter}|[0-9])*
  84 directive %{letter}({letter}|[0-9]|-)*
  85 int       [0-9]+
  86
  87 /* POSIX says that a tag must be both an id and a C union member, but
  88    historically almost any character is allowed in a tag.  We disallow
  89    NUL and newline, as this simplifies our implementation.  */
  90 tag      [^\0\n>]+
  91
  92 /* Zero or more instances of backslash-newline.  Following GCC, allow
  93    white space between the backslash and the newline.  */
  94 splice   (\\[ \f\t\v]*\n)*
  95
  96 %%
  97 %{
  98   /* Nesting level of the current code in braces.  */
  99   int braces_level IF_LINT (= 0);
 100
 101   /* Parent context state, when applicable.  */
 102   int context_state IF_LINT (= 0);
 103
 104   /* Token type to return, when applicable.  */
 105   int token_type IF_LINT (= 0);
 106
 107   /* Where containing code started, when applicable.  Its initial
 108      value is relevant only when yylex is invoked in the SC_EPILOGUE
 109      start condition.  */
 110   boundary code_start = scanner_cursor;
 111
 112   /* Where containing comment or string or character literal started,
 113      when applicable.  */
 114   boundary token_start IF_LINT (= scanner_cursor);
 115 %}
 116
 117
 118   /*-----------------------.
 119   | Scanning white space.  |
 120   `-----------------------*/
 121
 122 <INITIAL>
 123 {
 124   /* Comments and white space.  */
 125   ","          warn_at (*loc, _("stray `,' treated as white space"));
 126   [ \f\n\t\v]  |
 127   "//".*       ;
 128   "/*" {
 129     token_start = loc->start;
 130     context_state = YY_START;
 131     BEGIN SC_YACC_COMMENT;
 132   }
 133
 134   /* #line directives are not documented, and may be withdrawn or
 135      modified in future versions of Bison.  */
 136   ^"#line "{int}" \"".*"\"\n" {
 137     handle_syncline (yytext + sizeof "#line " - 1);
 138   }
 139 }
 140
 141
 142   /*----------------------------.
 143   | Scanning Bison directives.  |
 144   `----------------------------*/
 145 <INITIAL>
 146 {
 147
 148   /* Code in between braces.  */
 149   "{" {
 150     STRING_GROW;
 151     token_type = BRACED_CODE;
 152     braces_level = 0;
 153     code_start = loc->start;
 154     BEGIN SC_BRACED_CODE;
 155   }
 156
 157 }
 158
 159
 160   /*------------------------------------------------------------.
 161   | Scanning a C comment.  The initial `/ *' is already eaten.  |
 162   `------------------------------------------------------------*/
 163
 164 <SC_COMMENT>
 165 {
 166   "*"{splice}"/"  STRING_GROW; BEGIN context_state;
 167   <<EOF>>         unexpected_eof (token_start, "*/"); BEGIN context_state;
 168 }
 169
 170
 171   /*--------------------------------------------------------------.
 172   | Scanning a line comment.  The initial `//' is already eaten.  |
 173   `--------------------------------------------------------------*/
 174
 175 <SC_LINE_COMMENT>
 176 {
 177   "\n"           STRING_GROW; BEGIN context_state;
 178   {splice}       STRING_GROW;
 179   <<EOF>>        BEGIN context_state;
 180 }
 181
 182
 183   /*------------------------------------------------.
 184   | Scanning a Bison string, including its escapes. |
 185   | The initial quote is already eaten.             |
 186   `------------------------------------------------*/
 187
 188 <SC_ESCAPED_STRING>
 189 {
 190   "\"" {
 191     STRING_FINISH;
 192     loc->start = token_start;
 193     val->chars = last_string;
 194     rule_length++;
 195     BEGIN INITIAL;
 196     return STRING;
 197   }
 198   \n            unexpected_newline (token_start, "\""); BEGIN INITIAL;
 199   <<EOF>>       unexpected_eof (token_start, "\"");     BEGIN INITIAL;
 200 }
 201
 202   /*----------------------------------------------------------.
 203   | Scanning a Bison character literal, decoding its escapes. |
 204   | The initial quote is already eaten.                       |
 205   `----------------------------------------------------------*/
 206
 207 <SC_ESCAPED_CHARACTER>
 208 {
 209   "'" {
 210     unsigned char last_string_1;
 211     STRING_GROW;
 212     STRING_FINISH;
 213     loc->start = token_start;
 214     val->symbol = symbol_get (quotearg_style (escape_quoting_style,
 215                                               last_string),
 216                               *loc);
 217     symbol_class_set (val->symbol, token_sym, *loc);
 218     last_string_1 = last_string[1];
 219     symbol_user_token_number_set (val->symbol, last_string_1, *loc);
 220     STRING_FREE;
 221     rule_length++;
 222     BEGIN INITIAL;
 223     return ID;
 224   }
 225   \n            unexpected_newline (token_start, "'");  BEGIN INITIAL;
 226   <<EOF>>       unexpected_eof (token_start, "'");      BEGIN INITIAL;
 227 }
 228
 229 <SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING>
 230 {
 231   \0        complain_at (*loc, _("invalid null character"));
 232 }
 233
 234
 235   /*----------------------------.
 236   | Decode escaped characters.  |
 237   `----------------------------*/
 238
 239 <SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
 240 {
 241   \\[0-7]{1,3} {
 242     unsigned long int c = strtoul (yytext + 1, 0, 8);
 243     if (UCHAR_MAX < c)
 244       complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
 245     else if (! c)
 246       complain_at (*loc, _("invalid null character: %s"), quote (yytext));
 247     else
 248       obstack_1grow (&obstack_for_string, c);
 249   }
 250
 251   \\x[0-9abcdefABCDEF]+ {
 252     unsigned long int c;
 253     set_errno (0);
 254     c = strtoul (yytext + 2, 0, 16);
 255     if (UCHAR_MAX < c || get_errno ())
 256       complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
 257     else if (! c)
 258       complain_at (*loc, _("invalid null character: %s"), quote (yytext));
 259     else
 260       obstack_1grow (&obstack_for_string, c);
 261   }
 262
 263   \\a   obstack_1grow (&obstack_for_string, '\a');
 264   \\b   obstack_1grow (&obstack_for_string, '\b');
 265   \\f   obstack_1grow (&obstack_for_string, '\f');
 266   \\n   obstack_1grow (&obstack_for_string, '\n');
 267   \\r   obstack_1grow (&obstack_for_string, '\r');
 268   \\t   obstack_1grow (&obstack_for_string, '\t');
 269   \\v   obstack_1grow (&obstack_for_string, '\v');
 270
 271   /* \\[\"\'?\\] would be shorter, but it confuses xgettext.  */
 272   \\("\""|"'"|"?"|"\\")  obstack_1grow (&obstack_for_string, yytext[1]);
 273
 274   \\(u|U[0-9abcdefABCDEF]{4})[0-9abcdefABCDEF]{4} {
 275     int c = convert_ucn_to_byte (yytext);
 276     if (c < 0)
 277       complain_at (*loc, _("invalid escape sequence: %s"), quote (yytext));
 278     else if (! c)
 279       complain_at (*loc, _("invalid null character: %s"), quote (yytext));
 280     else
 281       obstack_1grow (&obstack_for_string, c);
 282   }
 283   \\(.|\n)      {
 284     complain_at (*loc, _("unrecognized escape sequence: %s"), quote (yytext));
 285     STRING_GROW;
 286   }
 287 }
 288
 289   /*--------------------------------------------.
 290   | Scanning user-code characters and strings.  |
 291   `--------------------------------------------*/
 292
 293 <SC_CHARACTER,SC_STRING>
 294 {
 295   {splice}|\\{splice}[^\n$@\[\]]        STRING_GROW;
 296 }
 297
 298 <SC_CHARACTER>
 299 {
 300   "'"           STRING_GROW; BEGIN context_state;
 301   \n            unexpected_newline (token_start, "'"); BEGIN context_state;
 302   <<EOF>>       unexpected_eof (token_start, "'"); BEGIN context_state;
 303 }
 304
 305 <SC_STRING>
 306 {
 307   "\""          STRING_GROW; BEGIN context_state;
 308   \n            unexpected_newline (token_start, "\""); BEGIN context_state;
 309   <<EOF>>       unexpected_eof (token_start, "\""); BEGIN context_state;
 310 }
 311
 312
 313   /*---------------------------------------------------.
 314   | Strings, comments etc. can be found in user code.  |
 315   `---------------------------------------------------*/
 316
 317 <INITIAL>
 318 {
 319   "'" {
 320     STRING_GROW;
 321     context_state = YY_START;
 322     token_start = loc->start;
 323     BEGIN SC_CHARACTER;
 324   }
 325   "\"" {
 326     STRING_GROW;
 327     context_state = YY_START;
 328     token_start = loc->start;
 329     BEGIN SC_STRING;
 330   }
 331   "/"{splice}"*" {
 332     STRING_GROW;
 333     context_state = YY_START;
 334     token_start = loc->start;
 335     BEGIN SC_COMMENT;
 336   }
 337   "/"{splice}"/" {
 338     STRING_GROW;
 339     context_state = YY_START;
 340     BEGIN SC_LINE_COMMENT;
 341   }
 342 }
 343
 344
 345   /*---------------------------------------------------------------.
 346   | Scanning some code in braces (%union and actions). The initial |
 347   | "{" is already eaten.                                          |
 348   `---------------------------------------------------------------*/
 349
 350 <INITIAL>
 351 {
 352   "{"|"<"{splice}"%"  STRING_GROW; braces_level++;
 353   "%"{splice}">"      STRING_GROW; braces_level--;
 354   "}" {
 355     bool outer_brace = --braces_level < 0;
 356
 357     /* As an undocumented Bison extension, append `;' before the last
 358        brace in braced code, so that the user code can omit trailing
 359        `;'.  But do not append `;' if emulating Yacc, since Yacc does
 360        not append one.
 361
 362        FIXME: Bison should warn if a semicolon seems to be necessary
 363        here, and should omit the semicolon if it seems unnecessary
 364        (e.g., after ';', '{', or '}', each followed by comments or
 365        white space).  Such a warning shouldn't depend on --yacc; it
 366        should depend on a new --pedantic option, which would cause
 367        Bison to warn if it detects an extension to POSIX.  --pedantic
 368        should also diagnose other Bison extensions like %yacc.
 369        Perhaps there should also be a GCC-style --pedantic-errors
 370        option, so that such warnings are diagnosed as errors.  */
 371     if (outer_brace && token_type == BRACED_CODE && ! yacc_flag)
 372       obstack_1grow (&obstack_for_string, ';');
 373
 374     obstack_1grow (&obstack_for_string, '}');
 375
 376     if (outer_brace)
 377       {
 378         STRING_FINISH;
 379         rule_length++;
 380         loc->start = code_start;
 381         val->chars = last_string;
 382         BEGIN INITIAL;
 383         return token_type;
 384       }
 385   }
 386
 387   /* Tokenize `<<%' correctly (as `<<' `%') rather than incorrrectly
 388      (as `<' `<%').  */
 389   "<"{splice}"<"  STRING_GROW;
 390
 391   "$"("<"{tag}">")?(-?[0-9]+|"$")  handle_dollar (token_type, yytext, *loc);
 392   "@"(-?[0-9]+|"$")                handle_at (token_type, yytext, *loc);
 393
 394   <<EOF>>  unexpected_eof (code_start, "}"); BEGIN INITIAL;
 395 }
 396
 397
 398   /*--------------------------------------------------------------.
 399   | Scanning some prologue: from "%{" (already scanned) to "%}".  |
 400   `--------------------------------------------------------------*/
 401
 402 <SC_PROLOGUE>
 403 {
 404   "%}" {
 405     STRING_FINISH;
 406     loc->start = code_start;
 407     val->chars = last_string;
 408     BEGIN INITIAL;
 409     return PROLOGUE;
 410   }
 411
 412   <<EOF>>  unexpected_eof (code_start, "%}"); BEGIN INITIAL;
 413 }
 414
 415
 416   /*---------------------------------------------------------------.
 417   | Scanning the epilogue (everything after the second "%%", which |
 418   | has already been eaten).                                       |
 419   `---------------------------------------------------------------*/
 420
 421 <SC_EPILOGUE>
 422 {
 423   <<EOF>> {
 424     STRING_FINISH;
 425     loc->start = code_start;
 426     val->chars = last_string;
 427     BEGIN INITIAL;
 428     return EPILOGUE;
 429   }
 430 }
 431
 432
 433   /*-----------------------------------------.
 434   | Escape M4 quoting characters in C code.  |
 435   `-----------------------------------------*/
 436
 437 <SC_COMMENT,SC_LINE_COMMENT,SC_STRING,SC_CHARACTER,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>
 438 {
 439   \$    obstack_sgrow (&obstack_for_string, "$][");
 440   \@    obstack_sgrow (&obstack_for_string, "@@");
 441   \[    obstack_sgrow (&obstack_for_string, "@{");
 442   \]    obstack_sgrow (&obstack_for_string, "@}");
 443 }
 444
 445
 446   /*-----------------------------------------------------.
 447   | By default, grow the string obstack with the input.  |
 448   `-----------------------------------------------------*/
 449
 450 <SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>.      |
 451 <SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PROLOGUE,SC_EPILOGUE>\n   STRING_GROW;
 452
 453 %%
 454
 455 /* Keeps track of the maximum number of semantic values to the left of
 456    a handle (those referenced by $0, $-1, etc.) are required by the
 457    semantic actions of this grammar. */
 458 int max_left_semantic_context = 0;
 459
 460 /* Set *LOC and adjust scanner cursor to account for token TOKEN of
 461    size SIZE.  */
 462
 463 static void
 464 adjust_location (location *loc, char const *token, size_t size)
 465 {
 466   int line = scanner_cursor.line;
 467   int column = scanner_cursor.column;
 468   char const *p0 = token;
 469   char const *p = token;
 470   char const *lim = token + size;
 471
 472   loc->start = scanner_cursor;
 473
 474   for (p = token; p < lim; p++)
 475     switch (*p)
 476       {
 477       case '\n':
 478         line++;
 479         column = 1;
 480         p0 = p + 1;
 481         break;
 482
 483       case '\t':
 484         column += mbsnwidth (p0, p - p0, 0);
 485         column += 8 - ((column - 1) & 7);
 486         p0 = p + 1;
 487         break;
 488       }
 489
 490   scanner_cursor.line = line;
 491   scanner_cursor.column = column + mbsnwidth (p0, p - p0, 0);
 492
 493   loc->end = scanner_cursor;
 494 }
 495
 496
 497 /* Read bytes from FP into buffer BUF of size SIZE.  Return the
 498    number of bytes read.  Remove '\r' from input, treating \r\n
 499    and isolated \r as \n.  */
 500
 501 static size_t
 502 no_cr_read (FILE *fp, char *buf, size_t size)
 503 {
 504   size_t bytes_read = fread (buf, 1, size, fp);
 505   if (bytes_read)
 506     {
 507       char *w = memchr (buf, '\r', bytes_read);
 508       if (w)
 509         {
 510           char const *r = ++w;
 511           char const *lim = buf + bytes_read;
 512
 513           for (;;)
 514             {
 515               /* Found an '\r'.  Treat it like '\n', but ignore any
 516                  '\n' that immediately follows.  */
 517               w[-1] = '\n';
 518               if (r == lim)
 519                 {
 520                   int ch = getc (fp);
 521                   if (ch != '\n' && ungetc (ch, fp) != ch)
 522                     break;
 523                 }
 524               else if (*r == '\n')
 525                 r++;
 526
 527               /* Copy until the next '\r'.  */
 528               do
 529                 {
 530                   if (r == lim)
 531                     return w - buf;
 532                 }
 533               while ((*w++ = *r++) != '\r');
 534             }
 535
 536           return w - buf;
 537         }
 538     }
 539
 540   return bytes_read;
 541 }
 542
 543
 544 /*------------------------------------------------------------------.
 545 | TEXT is pointing to a wannabee semantic value (i.e., a `$').      |
 546 |                                                                   |
 547 | Possible inputs: $[<TYPENAME>]($|integer)                         |
 548 |                                                                   |
 549 | Output to OBSTACK_FOR_STRING a reference to this semantic value.  |
 550 `------------------------------------------------------------------*/
 551
 552 static inline bool
 553 handle_action_dollar (char *text, location loc)
 554 {
 555   const char *type_name = NULL;
 556   char *cp = text + 1;
 557
 558   if (! current_rule)
 559     return false;
 560
 561   /* Get the type name if explicit. */
 562   if (*cp == '<')
 563     {
 564       type_name = ++cp;
 565       while (*cp != '>')
 566         ++cp;
 567       *cp = '\0';
 568       ++cp;
 569     }
 570
 571   if (*cp == '$')
 572     {
 573       if (!type_name)
 574         type_name = symbol_list_n_type_name_get (current_rule, loc, 0);
 575       if (!type_name && typed)
 576         complain_at (loc, _("$$ of `%s' has no declared type"),
 577                      current_rule->sym->tag);
 578       if (!type_name)
 579         type_name = "";
 580       obstack_fgrow1 (&obstack_for_string,
 581                       "]b4_lhs_value([%s])[", type_name);
 582     }
 583   else
 584     {
 585       long int num;
 586       set_errno (0);
 587       num = strtol (cp, 0, 10);
 588
 589       if (INT_MIN <= num && num <= rule_length && ! get_errno ())
 590         {
 591           int n = num;
 592           if (1-n > max_left_semantic_context)
 593             max_left_semantic_context = 1-n;
 594           if (!type_name && n > 0)
 595             type_name = symbol_list_n_type_name_get (current_rule, loc, n);
 596           if (!type_name && typed)
 597             complain_at (loc, _("$%d of `%s' has no declared type"),
 598                          n, current_rule->sym->tag);
 599           if (!type_name)
 600             type_name = "";
 601           obstack_fgrow3 (&obstack_for_string,
 602                           "]b4_rhs_value(%d, %d, [%s])[",
 603                           rule_length, n, type_name);
 604         }
 605       else
 606         complain_at (loc, _("integer out of range: %s"), quote (text));
 607     }
 608
 609   return true;
 610 }
 611
 612
 613 /*----------------------------------------------------------------.
 614 | Map `$?' onto the proper M4 symbol, depending on its TOKEN_TYPE |
 615 | (are we in an action?).                                         |
 616 `----------------------------------------------------------------*/
 617
 618 static void
 619 handle_dollar (int token_type, char *text, location loc)
 620 {
 621   switch (token_type)
 622     {
 623     case BRACED_CODE:
 624       if (handle_action_dollar (text, loc))
 625         return;
 626       break;
 627
 628     case PERCENT_DESTRUCTOR:
 629     case PERCENT_INITIAL_ACTION:
 630     case PERCENT_PRINTER:
 631       if (text[1] == '$')
 632         {
 633           obstack_sgrow (&obstack_for_string, "]b4_dollar_dollar[");
 634           return;
 635         }
 636       break;
 637
 638     default:
 639       break;
 640     }
 641
 642   complain_at (loc, _("invalid value: %s"), quote (text));
 643 }
 644
 645
 646 /*------------------------------------------------------.
 647 | TEXT is a location token (i.e., a `@...').  Output to |
 648 | OBSTACK_FOR_STRING a reference to this location.      |
 649 `------------------------------------------------------*/
 650
 651 static inline bool
 652 handle_action_at (char *text, location loc)
 653 {
 654   char *cp = text + 1;
 655   locations_flag = true;
 656
 657   if (! current_rule)
 658     return false;
 659
 660   if (*cp == '$')
 661     obstack_sgrow (&obstack_for_string, "]b4_lhs_location[");
 662   else
 663     {
 664       long int num;
 665       set_errno (0);
 666       num = strtol (cp, 0, 10);
 667
 668       if (INT_MIN <= num && num <= rule_length && ! get_errno ())
 669         {
 670           int n = num;
 671           obstack_fgrow2 (&obstack_for_string, "]b4_rhs_location(%d, %d)[",
 672                           rule_length, n);
 673         }
 674       else
 675         complain_at (loc, _("integer out of range: %s"), quote (text));
 676     }
 677
 678   return true;
 679 }
 680
 681
 682 /*----------------------------------------------------------------.
 683 | Map `@?' onto the proper M4 symbol, depending on its TOKEN_TYPE |
 684 | (are we in an action?).                                         |
 685 `----------------------------------------------------------------*/
 686
 687 static void
 688 handle_at (int token_type, char *text, location loc)
 689 {
 690   switch (token_type)
 691     {
 692     case BRACED_CODE:
 693       handle_action_at (text, loc);
 694       return;
 695
 696     case PERCENT_INITIAL_ACTION:
 697     case PERCENT_DESTRUCTOR:
 698     case PERCENT_PRINTER:
 699       if (text[1] == '$')
 700         {
 701           obstack_sgrow (&obstack_for_string, "]b4_at_dollar[");
 702           return;
 703         }
 704       break;
 705
 706     default:
 707       break;
 708     }
 709
 710   complain_at (loc, _("invalid value: %s"), quote (text));
 711 }
 712
 713
 714 /*------------------------------------------------------.
 715 | Scan NUMBER for a base-BASE integer at location LOC.  |
 716 `------------------------------------------------------*/
 717
 718 static unsigned long int
 719 scan_integer (char const *number, int base, location loc)
 720 {
 721   unsigned long int num;
 722   set_errno (0);
 723   num = strtoul (number, 0, base);
 724   if (INT_MAX < num || get_errno ())
 725     {
 726       complain_at (loc, _("integer out of range: %s"), quote (number));
 727       num = INT_MAX;
 728     }
 729   return num;
 730 }
 731
 732
 733 /*------------------------------------------------------------------.
 734 | Convert universal character name UCN to a single-byte character,  |
 735 | and return that character.  Return -1 if UCN does not correspond  |
 736 | to a single-byte character.                                       |
 737 `------------------------------------------------------------------*/
 738
 739 static int
 740 convert_ucn_to_byte (char const *ucn)
 741 {
 742   unsigned long int code = strtoul (ucn + 2, 0, 16);
 743
 744   /* FIXME: Currently we assume Unicode-compatible unibyte characters
 745      on ASCII hosts (i.e., Latin-1 on hosts with 8-bit bytes).  On
 746      non-ASCII hosts we support only the portable C character set.
 747      These limitations should be removed once we add support for
 748      multibyte characters.  */
 749
 750   if (UCHAR_MAX < code)
 751     return -1;
 752
 753 #if ! ('$' == 0x24 && '@' == 0x40 && '`' == 0x60 && '~' == 0x7e)
 754   {
 755     /* A non-ASCII host.  Use CODE to index into a table of the C
 756        basic execution character set, which is guaranteed to exist on
 757        all Standard C platforms.  This table also includes '$', '@',
 758        and '`', which are not in the basic execution character set but
 759        which are unibyte characters on all the platforms that we know
 760        about.  */
 761     static signed char const table[] =
 762       {
 763         '\0',   -1,   -1,   -1,   -1,   -1,   -1, '\a',
 764         '\b', '\t', '\n', '\v', '\f', '\r',   -1,   -1,
 765           -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
 766           -1,   -1,   -1,   -1,   -1,   -1,   -1,   -1,
 767          ' ',  '!',  '"',  '#',  '$',  '%',  '&', '\'',
 768          '(',  ')',  '*',  '+',  ',',  '-',  '.',  '/',
 769          '0',  '1',  '2',  '3',  '4',  '5',  '6',  '7',
 770          '8',  '9',  ':',  ';',  '<',  '=',  '>',  '?',
 771          '@',  'A',  'B',  'C',  'D',  'E',  'F',  'G',
 772          'H',  'I',  'J',  'K',  'L',  'M',  'N',  'O',
 773          'P',  'Q',  'R',  'S',  'T',  'U',  'V',  'W',
 774          'X',  'Y',  'Z',  '[', '\\',  ']',  '^',  '_',
 775          '`',  'a',  'b',  'c',  'd',  'e',  'f',  'g',
 776          'h',  'i',  'j',  'k',  'l',  'm',  'n',  'o',
 777          'p',  'q',  'r',  's',  't',  'u',  'v',  'w',
 778          'x',  'y',  'z',  '{',  '|',  '}',  '~'
 779       };
 780
 781     code = code < sizeof table ? table[code] : -1;
 782   }
 783 #endif
 784
 785   return code;
 786 }
 787
 788
 789 /*----------------------------------------------------------------.
 790 | Handle `#line INT "FILE"'.  ARGS has already skipped `#line '.  |
 791 `----------------------------------------------------------------*/
 792
 793 static void
 794 handle_syncline (char *args)
 795 {
 796   int lineno = strtol (args, &args, 10);
 797   const char *file = NULL;
 798   file = strchr (args, '"') + 1;
 799   *strchr (file, '"') = 0;
 800   scanner_cursor.file = current_file = uniqstr_new (file);
 801   scanner_cursor.line = lineno;
 802   scanner_cursor.column = 1;
 803 }
 804
 805
 806 /*----------------------------------------------------------------.
 807 | For a token or comment starting at START, report message MSGID, |
 808 | which should say that an end marker was found before            |
 809 | the expected TOKEN_END.                                         |
 810 `----------------------------------------------------------------*/
 811
 812 static void
 813 unexpected_end (boundary start, char const *msgid, char const *token_end)
 814 {
 815   location loc;
 816   loc.start = start;
 817   loc.end = scanner_cursor;
 818   complain_at (loc, _(msgid), token_end);
 819 }
 820
 821
 822 /*------------------------------------------------------------------------.
 823 | Report an unexpected EOF in a token or comment starting at START.       |
 824 | An end of file was encountered and the expected TOKEN_END was missing.  |
 825 `------------------------------------------------------------------------*/
 826
 827 static void
 828 unexpected_eof (boundary start, char const *token_end)
 829 {
 830   unexpected_end (start, N_("missing `%s' at end of file"), token_end);
 831 }
 832
 833
 834 /*----------------------------------------.
 835 | Likewise, but for unexpected newlines.  |
 836 `----------------------------------------*/
 837
 838 static void
 839 unexpected_newline (boundary start, char const *token_end)
 840 {
 841   unexpected_end (start, N_("missing `%s' at end of line"), token_end);
 842 }
 843
 844
 845 /*-------------------------.
 846 | Initialize the scanner.  |
 847 `-------------------------*/
 848
 849 void
 850 scanner_initialize (void)
 851 {
 852   obstack_init (&obstack_for_string);
 853 }
 854
 855
 856 /*-----------------------------------------------.
 857 | Free all the memory allocated to the scanner.  |
 858 `-----------------------------------------------*/
 859
 860 void
 861 scanner_free (void)
 862 {
 863   obstack_free (&obstack_for_string, 0);
 864   /* Reclaim Flex's buffers.  */
 865   yy_delete_buffer (YY_CURRENT_BUFFER);
 866 }