]>
git.saurik.com Git - bison.git/blob - src/lex.c
78b8a8f391bf84f8db7981a39e008575f18dd8e4
   1 /* Token-reader for Bison's input parser, 
   2    Copyright 1984, 1986, 1989, 1992, 2000, 2001 Free Software Foundation, Inc. 
   4    This file is part of Bison, the GNU Compiler Compiler. 
   6    Bison is free software; you can redistribute it and/or modify 
   7    it under the terms of the GNU General Public License as published by 
   8    the Free Software Foundation; either version 2, or (at your option) 
  11    Bison is distributed in the hope that it will be useful, 
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of 
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
  14    GNU General Public License for more details. 
  16    You should have received a copy of the GNU General Public License 
  17    along with Bison; see the file COPYING.  If not, write to 
  18    the Free Software Foundation, Inc., 59 Temple Place - Suite 330, 
  19    Boston, MA 02111-1307, USA.  */ 
  31 /* Buffer for storing the current token.  */ 
  32 static struct obstack token_obstack
; 
  33 const char *token_buffer 
= NULL
; 
  35 bucket 
*symval 
= NULL
; 
  38 /* A token to be reread, see unlex and lex. */ 
  39 static token_t unlexed 
= tok_undef
; 
  40 static bucket 
*unlexed_symval 
= NULL
; 
  41 static const char *unlexed_token_buffer 
= NULL
; 
  46   obstack_init (&token_obstack
); 
  54   obstack_free (&token_obstack
, NULL
); 
  59 skip_white_space (void) 
  73           /* FIXME: Should probably be merged with copy_comment.  */ 
  75           if (c 
!= '*' && c 
!= '/') 
  77               complain (_("unexpected `/' found and ignored")); 
  80           cplus_comment 
= (c 
== '/'); 
  87               if (!cplus_comment 
&& c 
== '*') 
 106                 fatal (_("unterminated comment")); 
 129 /*-----------------------------------------------------. 
 130 | Do a getc, but give error message if EOF encountered | 
 131 `-----------------------------------------------------*/ 
 138     fatal (_("unexpected end of file")); 
 143 /*------------------------------------------------------------------. 
 144 | Read one literal character from finput.  Process \ escapes.       | 
 145 | Append the normalized string version of the char to OUT.  Assign  | 
 146 | the character code to *PCODE. Return 1 unless the character is an | 
 147 | unescaped `term' or \n report error for \n.                       | 
 148 `------------------------------------------------------------------*/ 
 150 /* FIXME: We could directly work in the obstack, but that would make 
 151    it more difficult to move to quotearg some day.  So for the time 
 152    being, I prefer have literalchar behave like quotearg, and change 
 153    my mind later if I was wrong.  */ 
 156 literalchar (struct obstack 
*out
, int *pcode
, char term
) 
 167       complain (_("unescaped newline in constant")); 
 201       else if (c 
<= '7' && c 
>= '0') 
 204           while (c 
<= '7' && c 
>= '0') 
 206               code 
= (code 
* 8) + (c 
- '0'); 
 207               if (code 
>= 256 || code 
< 0) 
 209                   complain (_("octal value outside range 0...255: `\\%o'"), 
 224               if (c 
>= '0' && c 
<= '9') 
 225                 code 
*= 16, code 
+= c 
- '0'; 
 226               else if (c 
>= 'a' && c 
<= 'f') 
 227                 code 
*= 16, code 
+= c 
- 'a' + 10; 
 228               else if (c 
>= 'A' && c 
<= 'F') 
 229                 code 
*= 16, code 
+= c 
- 'A' + 10; 
 232               if (code 
>= 256 || code 
< 0) 
 234                   complain (_("hexadecimal value above 255: `\\x%x'"), code
); 
 244           char badchar 
[] = "c"; 
 246           complain (_("unknown escape sequence: `\\' followed by `%s'"), 
 252   /* now fill BUF with the canonical name for this character as a 
 253      literal token.  Do not use what the user typed, so that `\012' 
 254      and `\n' can be interchangeable.  */ 
 257   if (code 
== term 
&& wasquote
) 
 259   else if (code 
== '\\') 
 264   else if (code 
== '\'') 
 269   else if (code 
== '\"') 
 274   else if (code 
>= 040 && code 
< 0177) 
 276   else if (code 
== '\t') 
 281   else if (code 
== '\n') 
 286   else if (code 
== '\r') 
 291   else if (code 
== '\v') 
 296   else if (code 
== '\b') 
 301   else if (code 
== '\f') 
 309       *cp
++ = code 
/ 0100 + '0'; 
 310       *cp
++ = ((code 
/ 010) & 07) + '0'; 
 311       *cp
++ = (code 
& 07) + '0'; 
 316     obstack_sgrow (out
, buf
); 
 323 unlex (token_t token
) 
 326   unlexed_token_buffer 
= token_buffer
; 
 327   unlexed_symval 
= symval
; 
 330 /*-----------------------------------------------------------------. 
 331 | We just read `<' from FIN.  Store in TOKEN_BUFFER, the type name | 
 332 | specified between the `<...>'.                                   | 
 333 `-----------------------------------------------------------------*/ 
 336 read_type_name (FILE *fin
) 
 343         fatal (_("unterminated type name at end of file")); 
 346           complain (_("unterminated type name")); 
 351       obstack_1grow (&token_obstack
, c
); 
 354   obstack_1grow (&token_obstack
, '\0'); 
 355   token_buffer 
= obstack_finish (&token_obstack
); 
 364   /* Just to make sure. */ 
 367   if (unlexed 
!= tok_undef
) 
 369       token_t res 
= unlexed
; 
 370       symval 
= unlexed_symval
; 
 371       token_buffer 
= unlexed_token_buffer
; 
 376   c 
= skip_white_space (); 
 381       token_buffer 
= "EOF"; 
 384     case 'A':    case 'B':    case 'C':    case 'D':    case 'E': 
 385     case 'F':    case 'G':    case 'H':    case 'I':    case 'J': 
 386     case 'K':    case 'L':    case 'M':    case 'N':    case 'O': 
 387     case 'P':    case 'Q':    case 'R':    case 'S':    case 'T': 
 388     case 'U':    case 'V':    case 'W':    case 'X':    case 'Y': 
 390     case 'a':    case 'b':    case 'c':    case 'd':    case 'e': 
 391     case 'f':    case 'g':    case 'h':    case 'i':    case 'j': 
 392     case 'k':    case 'l':    case 'm':    case 'n':    case 'o': 
 393     case 'p':    case 'q':    case 'r':    case 's':    case 't': 
 394     case 'u':    case 'v':    case 'w':    case 'x':    case 'y': 
 398       while (isalnum (c
) || c 
== '_' || c 
== '.') 
 400           obstack_1grow (&token_obstack
, c
); 
 403       obstack_1grow (&token_obstack
, '\0'); 
 404       token_buffer 
= obstack_finish (&token_obstack
); 
 406       symval 
= getsym (token_buffer
); 
 407       return tok_identifier
; 
 409     case '0':    case '1':    case '2':    case '3':    case '4': 
 410     case '5':    case '6':    case '7':    case '8':    case '9': 
 416             obstack_1grow (&token_obstack
, c
); 
 417             numval 
= numval 
* 10 + c 
- '0'; 
 420         obstack_1grow (&token_obstack
, '\0'); 
 421         token_buffer 
= obstack_finish (&token_obstack
); 
 427       /* parse the literal token and compute character code in  code  */ 
 432         obstack_1grow (&token_obstack
, '\''); 
 433         literalchar (&token_obstack
, &code
, '\''); 
 439             complain (_("use \"...\" for multi-character literal tokens")); 
 441               if (!literalchar (0, &discode
, '\'')) 
 444         obstack_1grow (&token_obstack
, '\''); 
 445         obstack_1grow (&token_obstack
, '\0'); 
 446         token_buffer 
= obstack_finish (&token_obstack
); 
 447         symval 
= getsym (token_buffer
); 
 448         symval
->class = token_sym
; 
 449         if (symval
->user_token_number 
== SUNDEF
) 
 450           symval
->user_token_number 
= code
; 
 451         return tok_identifier
; 
 455       /* parse the literal string token and treat as an identifier */ 
 458         int code
;               /* ignored here */ 
 460         obstack_1grow (&token_obstack
, '\"'); 
 461         /* Read up to and including ".  */ 
 462         while (literalchar (&token_obstack
, &code
, '\"')) 
 464         obstack_1grow (&token_obstack
, '\0'); 
 465         token_buffer 
= obstack_finish (&token_obstack
); 
 467         symval 
= getsym (token_buffer
); 
 468         symval
->class = token_sym
; 
 470         return tok_identifier
; 
 483       return tok_semicolon
; 
 491       return tok_left_curly
; 
 494       obstack_1grow (&token_obstack
, c
); 
 498           obstack_1grow (&token_obstack
, c
); 
 502       while (c 
== ' ' || c 
== '\n' || c 
== '\t'); 
 503       obstack_1grow (&token_obstack
, '\0'); 
 504       token_buffer 
= obstack_finish (&token_obstack
); 
 508           return tok_left_curly
; 
 517       read_type_name (finput
); 
 521       return parse_percent_token (); 
 524       obstack_1grow (&token_obstack
, c
); 
 525       obstack_1grow (&token_obstack
, '\0'); 
 526       token_buffer 
= obstack_finish (&token_obstack
); 
 531 /* This function is a strcmp, which doesn't differentiate `-' and `_' 
 535 option_strcmp (const char *left
, const char *right
) 
 537   const unsigned char *l
, *r
; 
 542   l 
= (const unsigned char *)left
; 
 543   r 
= (const unsigned char *)right
; 
 544   while (((c 
= *l 
- *r
++) == 0 && *l 
!= '\0') 
 545          || ((*l 
== '-' || *l 
== '_') && (*r 
== '_' || *r 
== '-'))) 
 550 /* Parse a token which starts with %. 
 551    Assumes the % has already been read and discarded.  */ 
 554 parse_percent_token (void) 
 556   const struct option_table_struct 
*tx 
= NULL
; 
 557   const char *arg 
= NULL
; 
 558   /* Where the ARG was found in token_buffer. */ 
 559   size_t arg_offset 
= 0; 
 561   int c 
= getc (finput
); 
 566       return tok_two_percents
; 
 569       return tok_percent_left_curly
; 
 571       /* FIXME: Who the heck are those 5 guys!?! `%<' = `%left'!!! 
 572          Let's ask for there removal.  */ 
 592   obstack_1grow (&token_obstack
, '%'); 
 593   while (isalpha (c
) || c 
== '_' || c 
== '-') 
 597       obstack_1grow (&token_obstack
, c
); 
 601   /* %DIRECTIVE="ARG".  Separate into 
 602      TOKEN_BUFFER = `%DIRECTIVE\0ARG\0'. 
 603      This is a bit hackish, but once we move to a Bison parser, 
 604      things will be cleaned up.  */ 
 607       /* End of the directive.  We skip the `='. */ 
 608       obstack_1grow (&token_obstack
, '\0'); 
 609       /* Fetch the ARG if present. */ 
 614           arg_offset 
= obstack_object_size (&token_obstack
); 
 615           /* Read up to and including `"'.  Do not append the closing 
 616              `"' in the output: it's not part of the ARG.  */ 
 617           while (literalchar (NULL
, &code
, '"')) 
 618             obstack_1grow (&token_obstack
, code
); 
 620       /* else: should be an error. */ 
 625   obstack_1grow (&token_obstack
, '\0'); 
 626   token_buffer 
= obstack_finish (&token_obstack
); 
 628     arg 
= token_buffer 
+ arg_offset
; 
 630   /* table lookup % directive */ 
 631   for (tx 
= option_table
; tx
->name
; tx
++) 
 632     if ((tx
->access 
== opt_percent 
|| tx
->access 
== opt_both
) 
 633         && option_strcmp (token_buffer 
+ 1, tx
->name
) == 0) 
 636   if (arg 
&& tx
->ret_val 
!= tok_stropt
) 
 637     fatal (_("`%s' supports no argument: %s"), token_buffer
, quote (arg
)); 
 643       assert (tx
->set_flag
); 
 646           /* Keep only the first assignment: command line options have 
 647              already been processed, and we want them to have 
 648              precedence.  Side effect: if this %-option is used 
 649              several times, only the first is honored.  Bah.  */ 
 650           if (!*((char **) (tx
->set_flag
))) 
 651             *((char **) (tx
->set_flag
)) = xstrdup (arg
); 
 654         fatal (_("`%s' requires an argument"), token_buffer
); 
 659       assert (tx
->set_flag
); 
 660       *((int *) (tx
->set_flag
)) = 1; 
 665       fatal (_("`%s' is no longer supported"), token_buffer
);