]>
git.saurik.com Git - bison.git/blob - src/lex.c
60e392ce2cbd9311feddc61c3228b6f5d98eaf98
   1 /* Token-reader for Bison's input parser, 
   2    Copyright 1984, 1986, 1989, 1992, 2000, 2001 Free Software Foundation, Inc. 
   4    This file is part of Bison, the GNU Compiler Compiler. 
   6    Bison is free software; you can redistribute it and/or modify 
   7    it under the terms of the GNU General Public License as published by 
   8    the Free Software Foundation; either version 2, or (at your option) 
  11    Bison is distributed in the hope that it will be useful, 
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of 
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
  14    GNU General Public License for more details. 
  16    You should have received a copy of the GNU General Public License 
  17    along with Bison; see the file COPYING.  If not, write to 
  18    the Free Software Foundation, Inc., 59 Temple Place - Suite 330, 
  19    Boston, MA 02111-1307, USA.  */ 
  31 /* Buffer for storing the current token.  */ 
  32 static struct obstack token_obstack
; 
  33 const char *token_buffer 
= NULL
; 
  35 bucket 
*symval 
= NULL
; 
  38 /* A token to be reread, see unlex and lex. */ 
  39 static token_t unlexed 
= tok_undef
; 
  40 static bucket 
*unlexed_symval 
= NULL
; 
  41 static const char *unlexed_token_buffer 
= NULL
; 
  46   obstack_init (&token_obstack
); 
  54   obstack_free (&token_obstack
, NULL
); 
  59 skip_white_space (void) 
  73           /* FIXME: Should probably be merged with copy_comment.  */ 
  75           if (c 
!= '*' && c 
!= '/') 
  77               complain (_("unexpected `/' found and ignored")); 
  80           cplus_comment 
= (c 
== '/'); 
  87               if (!cplus_comment 
&& c 
== '*') 
 106                 fatal (_("unterminated comment")); 
 129 /*-----------------------------------------------------. 
 130 | Do a getc, but give error message if EOF encountered | 
 131 `-----------------------------------------------------*/ 
 138     fatal (_("unexpected end of file")); 
 143 /*---------------------------------------------------------------. 
 144 | Read one literal character from FINPUT, process \-escapes, and | 
 145 | return the character.                                          | 
 146 `---------------------------------------------------------------*/ 
 157       complain (_("unescaped newline in constant")); 
 188       else if (c 
<= '7' && c 
>= '0') 
 191           while (c 
<= '7' && c 
>= '0') 
 193               res 
= (res 
* 8) + (c 
- '0'); 
 194               if (res 
>= 256 || res 
< 0) 
 196                   complain (_("octal value outside range 0...255: `\\%o'"), 
 211               if (c 
>= '0' && c 
<= '9') 
 212                 res 
*= 16, res 
+= c 
- '0'; 
 213               else if (c 
>= 'a' && c 
<= 'f') 
 214                 res 
*= 16, res 
+= c 
- 'a' + 10; 
 215               else if (c 
>= 'A' && c 
<= 'F') 
 216                 res 
*= 16, res 
+= c 
- 'A' + 10; 
 219               if (res 
>= 256 || res 
< 0) 
 221                   complain (_("hexadecimal value above 255: `\\x%x'"), res
); 
 231           char badchar 
[] = "c"; 
 233           complain (_("unknown escape sequence: `\\' followed by `%s'"), 
 244 unlex (token_t token
) 
 247   unlexed_token_buffer 
= token_buffer
; 
 248   unlexed_symval 
= symval
; 
 251 /*-----------------------------------------------------------------. 
 252 | We just read `<' from FIN.  Store in TOKEN_BUFFER, the type name | 
 253 | specified between the `<...>'.                                   | 
 254 `-----------------------------------------------------------------*/ 
 257 read_type_name (FILE *fin
) 
 264         fatal (_("unterminated type name at end of file")); 
 267           complain (_("unterminated type name")); 
 272       obstack_1grow (&token_obstack
, c
); 
 275   obstack_1grow (&token_obstack
, '\0'); 
 276   token_buffer 
= obstack_finish (&token_obstack
); 
 285   /* Just to make sure. */ 
 288   if (unlexed 
!= tok_undef
) 
 290       token_t res 
= unlexed
; 
 291       symval 
= unlexed_symval
; 
 292       token_buffer 
= unlexed_token_buffer
; 
 297   c 
= skip_white_space (); 
 302       token_buffer 
= "EOF"; 
 305     case 'A':    case 'B':    case 'C':    case 'D':    case 'E': 
 306     case 'F':    case 'G':    case 'H':    case 'I':    case 'J': 
 307     case 'K':    case 'L':    case 'M':    case 'N':    case 'O': 
 308     case 'P':    case 'Q':    case 'R':    case 'S':    case 'T': 
 309     case 'U':    case 'V':    case 'W':    case 'X':    case 'Y': 
 311     case 'a':    case 'b':    case 'c':    case 'd':    case 'e': 
 312     case 'f':    case 'g':    case 'h':    case 'i':    case 'j': 
 313     case 'k':    case 'l':    case 'm':    case 'n':    case 'o': 
 314     case 'p':    case 'q':    case 'r':    case 's':    case 't': 
 315     case 'u':    case 'v':    case 'w':    case 'x':    case 'y': 
 319       while (isalnum (c
) || c 
== '_' || c 
== '.') 
 321           obstack_1grow (&token_obstack
, c
); 
 324       obstack_1grow (&token_obstack
, '\0'); 
 325       token_buffer 
= obstack_finish (&token_obstack
); 
 327       symval 
= getsym (token_buffer
); 
 328       return tok_identifier
; 
 330     case '0':    case '1':    case '2':    case '3':    case '4': 
 331     case '5':    case '6':    case '7':    case '8':    case '9': 
 337             obstack_1grow (&token_obstack
, c
); 
 338             numval 
= numval 
* 10 + c 
- '0'; 
 341         obstack_1grow (&token_obstack
, '\0'); 
 342         token_buffer 
= obstack_finish (&token_obstack
); 
 348       /* parse the literal token and compute character code in  code  */ 
 351         int code 
= literalchar (); 
 353         obstack_1grow (&token_obstack
, '\''); 
 354         obstack_1grow (&token_obstack
, code
); 
 359             complain (_("use \"...\" for multi-character literal tokens")); 
 360             while (literalchar () != '\'') 
 363         obstack_1grow (&token_obstack
, '\''); 
 364         obstack_1grow (&token_obstack
, '\0'); 
 365         token_buffer 
= obstack_finish (&token_obstack
); 
 366         symval 
= getsym (token_buffer
); 
 367         symval
->class = token_sym
; 
 368         if (symval
->user_token_number 
== SUNDEF
) 
 369           symval
->user_token_number 
= code
; 
 370         return tok_identifier
; 
 374       /* parse the literal string token and treat as an identifier */ 
 377         int code
;               /* ignored here */ 
 379         obstack_1grow (&token_obstack
, '\"'); 
 380         /* Read up to and including ".  */ 
 383             code 
= literalchar (); 
 384             obstack_1grow (&token_obstack
, code
); 
 386         while (code 
!= '\"'); 
 387         obstack_1grow (&token_obstack
, '\0'); 
 388         token_buffer 
= obstack_finish (&token_obstack
); 
 390         symval 
= getsym (token_buffer
); 
 391         symval
->class = token_sym
; 
 393         return tok_identifier
; 
 406       return tok_semicolon
; 
 414       return tok_left_curly
; 
 417       obstack_1grow (&token_obstack
, c
); 
 421           obstack_1grow (&token_obstack
, c
); 
 425       while (c 
== ' ' || c 
== '\n' || c 
== '\t'); 
 426       obstack_1grow (&token_obstack
, '\0'); 
 427       token_buffer 
= obstack_finish (&token_obstack
); 
 431           return tok_left_curly
; 
 440       read_type_name (finput
); 
 444       return parse_percent_token (); 
 447       obstack_1grow (&token_obstack
, c
); 
 448       obstack_1grow (&token_obstack
, '\0'); 
 449       token_buffer 
= obstack_finish (&token_obstack
); 
 454 /* This function is a strcmp, which doesn't differentiate `-' and `_' 
 458 option_strcmp (const char *left
, const char *right
) 
 460   const unsigned char *l
, *r
; 
 465   l 
= (const unsigned char *)left
; 
 466   r 
= (const unsigned char *)right
; 
 467   while (((c 
= *l 
- *r
++) == 0 && *l 
!= '\0') 
 468          || ((*l 
== '-' || *l 
== '_') && (*r 
== '_' || *r 
== '-'))) 
 473 /* Parse a token which starts with %. 
 474    Assumes the % has already been read and discarded.  */ 
 477 parse_percent_token (void) 
 479   const struct option_table_struct 
*tx 
= NULL
; 
 480   const char *arg 
= NULL
; 
 481   /* Where the ARG was found in token_buffer. */ 
 482   size_t arg_offset 
= 0; 
 484   int c 
= getc (finput
); 
 489       return tok_two_percents
; 
 492       return tok_percent_left_curly
; 
 494       /* FIXME: Who the heck are those 5 guys!?! `%<' = `%left'!!! 
 495          Let's ask for there removal.  */ 
 515   obstack_1grow (&token_obstack
, '%'); 
 516   while (isalpha (c
) || c 
== '_' || c 
== '-') 
 520       obstack_1grow (&token_obstack
, c
); 
 524   /* %DIRECTIVE="ARG".  Separate into 
 525      TOKEN_BUFFER = `%DIRECTIVE\0ARG\0'. 
 526      This is a bit hackish, but once we move to a Bison parser, 
 527      things will be cleaned up.  */ 
 530       /* End of the directive.  We skip the `='. */ 
 531       obstack_1grow (&token_obstack
, '\0'); 
 532       /* Fetch the ARG if present. */ 
 537           arg_offset 
= obstack_object_size (&token_obstack
); 
 538           /* Read up to and including `"'.  Do not append the closing 
 539              `"' in the output: it's not part of the ARG.  */ 
 540           while ((code 
= literalchar ()) != '"') 
 541             obstack_1grow (&token_obstack
, code
); 
 543       /* else: should be an error. */ 
 548   obstack_1grow (&token_obstack
, '\0'); 
 549   token_buffer 
= obstack_finish (&token_obstack
); 
 551     arg 
= token_buffer 
+ arg_offset
; 
 553   /* table lookup % directive */ 
 554   for (tx 
= option_table
; tx
->name
; tx
++) 
 555     if ((tx
->access 
== opt_percent 
|| tx
->access 
== opt_both
) 
 556         && option_strcmp (token_buffer 
+ 1, tx
->name
) == 0) 
 559   if (arg 
&& tx
->ret_val 
!= tok_stropt
) 
 560     fatal (_("`%s' supports no argument: %s"), token_buffer
, quote (arg
)); 
 566       assert (tx
->set_flag
); 
 569           /* Keep only the first assignment: command line options have 
 570              already been processed, and we want them to have 
 571              precedence.  Side effect: if this %-option is used 
 572              several times, only the first is honored.  Bah.  */ 
 573           if (!*((char **) (tx
->set_flag
))) 
 574             *((char **) (tx
->set_flag
)) = xstrdup (arg
); 
 577         fatal (_("`%s' requires an argument"), token_buffer
); 
 582       assert (tx
->set_flag
); 
 583       *((int *) (tx
->set_flag
)) = 1; 
 588       fatal (_("`%s' is no longer supported"), token_buffer
);