]>
git.saurik.com Git - bison.git/blob - src/lex.c
b2458a4f2e8a2c1978ee9ecac7cd0c8f09aaa8ef
   1 /* Token-reader for Bison's input parser, 
   2    Copyright 1984, 1986, 1989, 1992, 2000, 2001 Free Software Foundation, Inc. 
   4    This file is part of Bison, the GNU Compiler Compiler. 
   6    Bison is free software; you can redistribute it and/or modify 
   7    it under the terms of the GNU General Public License as published by 
   8    the Free Software Foundation; either version 2, or (at your option) 
  11    Bison is distributed in the hope that it will be useful, 
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of 
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
  14    GNU General Public License for more details. 
  16    You should have received a copy of the GNU General Public License 
  17    along with Bison; see the file COPYING.  If not, write to 
  18    the Free Software Foundation, Inc., 59 Temple Place - Suite 330, 
  19    Boston, MA 02111-1307, USA.  */ 
  31 /* Buffer for storing the current token.  */ 
  32 static struct obstack token_obstack
; 
  33 const char *token_buffer 
= NULL
; 
  35 bucket 
*symval 
= NULL
; 
  38 /* A token to be reread, see unlex and lex. */ 
  39 static token_t unlexed 
= tok_undef
; 
  40 static bucket 
*unlexed_symval 
= NULL
; 
  41 static const char *unlexed_token_buffer 
= NULL
; 
  46   obstack_init (&token_obstack
); 
  54   obstack_free (&token_obstack
, NULL
); 
  59 skip_white_space (void) 
  73           /* FIXME: Should probably be merged with copy_comment.  */ 
  75           if (c 
!= '*' && c 
!= '/') 
  77               complain (_("unexpected `/' found and ignored")); 
  80           cplus_comment 
= (c 
== '/'); 
  87               if (!cplus_comment 
&& c 
== '*') 
 106                 fatal (_("unterminated comment")); 
 129 /*-----------------------------------------------------. 
 130 | Do a getc, but give error message if EOF encountered | 
 131 `-----------------------------------------------------*/ 
 138     fatal (_("unexpected end of file")); 
 143 /*-----------------------------------------------------------------. 
 144 | Read one literal character from FINPUT.  Process \-escapes.      | 
 145 | Append the char to OUT and assign it *PCODE. Return 1 unless the | 
 146 | character is an unescaped `term' or \n report error for \n.      | 
 147 `-----------------------------------------------------------------*/ 
 150 literalchar (struct obstack 
*out
, int *pcode
, char term
) 
 159       complain (_("unescaped newline in constant")); 
 193       else if (c 
<= '7' && c 
>= '0') 
 196           while (c 
<= '7' && c 
>= '0') 
 198               code 
= (code 
* 8) + (c 
- '0'); 
 199               if (code 
>= 256 || code 
< 0) 
 201                   complain (_("octal value outside range 0...255: `\\%o'"), 
 216               if (c 
>= '0' && c 
<= '9') 
 217                 code 
*= 16, code 
+= c 
- '0'; 
 218               else if (c 
>= 'a' && c 
<= 'f') 
 219                 code 
*= 16, code 
+= c 
- 'a' + 10; 
 220               else if (c 
>= 'A' && c 
<= 'F') 
 221                 code 
*= 16, code 
+= c 
- 'A' + 10; 
 224               if (code 
>= 256 || code 
< 0) 
 226                   complain (_("hexadecimal value above 255: `\\x%x'"), code
); 
 236           char badchar 
[] = "c"; 
 238           complain (_("unknown escape sequence: `\\' followed by `%s'"), 
 245     obstack_1grow (out
, code
); 
 252 unlex (token_t token
) 
 255   unlexed_token_buffer 
= token_buffer
; 
 256   unlexed_symval 
= symval
; 
 259 /*-----------------------------------------------------------------. 
 260 | We just read `<' from FIN.  Store in TOKEN_BUFFER, the type name | 
 261 | specified between the `<...>'.                                   | 
 262 `-----------------------------------------------------------------*/ 
 265 read_type_name (FILE *fin
) 
 272         fatal (_("unterminated type name at end of file")); 
 275           complain (_("unterminated type name")); 
 280       obstack_1grow (&token_obstack
, c
); 
 283   obstack_1grow (&token_obstack
, '\0'); 
 284   token_buffer 
= obstack_finish (&token_obstack
); 
 293   /* Just to make sure. */ 
 296   if (unlexed 
!= tok_undef
) 
 298       token_t res 
= unlexed
; 
 299       symval 
= unlexed_symval
; 
 300       token_buffer 
= unlexed_token_buffer
; 
 305   c 
= skip_white_space (); 
 310       token_buffer 
= "EOF"; 
 313     case 'A':    case 'B':    case 'C':    case 'D':    case 'E': 
 314     case 'F':    case 'G':    case 'H':    case 'I':    case 'J': 
 315     case 'K':    case 'L':    case 'M':    case 'N':    case 'O': 
 316     case 'P':    case 'Q':    case 'R':    case 'S':    case 'T': 
 317     case 'U':    case 'V':    case 'W':    case 'X':    case 'Y': 
 319     case 'a':    case 'b':    case 'c':    case 'd':    case 'e': 
 320     case 'f':    case 'g':    case 'h':    case 'i':    case 'j': 
 321     case 'k':    case 'l':    case 'm':    case 'n':    case 'o': 
 322     case 'p':    case 'q':    case 'r':    case 's':    case 't': 
 323     case 'u':    case 'v':    case 'w':    case 'x':    case 'y': 
 327       while (isalnum (c
) || c 
== '_' || c 
== '.') 
 329           obstack_1grow (&token_obstack
, c
); 
 332       obstack_1grow (&token_obstack
, '\0'); 
 333       token_buffer 
= obstack_finish (&token_obstack
); 
 335       symval 
= getsym (token_buffer
); 
 336       return tok_identifier
; 
 338     case '0':    case '1':    case '2':    case '3':    case '4': 
 339     case '5':    case '6':    case '7':    case '8':    case '9': 
 345             obstack_1grow (&token_obstack
, c
); 
 346             numval 
= numval 
* 10 + c 
- '0'; 
 349         obstack_1grow (&token_obstack
, '\0'); 
 350         token_buffer 
= obstack_finish (&token_obstack
); 
 356       /* parse the literal token and compute character code in  code  */ 
 361         obstack_1grow (&token_obstack
, '\''); 
 362         literalchar (&token_obstack
, &code
, '\''); 
 368             complain (_("use \"...\" for multi-character literal tokens")); 
 370               if (!literalchar (0, &discode
, '\'')) 
 373         obstack_1grow (&token_obstack
, '\''); 
 374         obstack_1grow (&token_obstack
, '\0'); 
 375         token_buffer 
= obstack_finish (&token_obstack
); 
 376         symval 
= getsym (token_buffer
); 
 377         symval
->class = token_sym
; 
 378         if (symval
->user_token_number 
== SUNDEF
) 
 379           symval
->user_token_number 
= code
; 
 380         return tok_identifier
; 
 384       /* parse the literal string token and treat as an identifier */ 
 387         int code
;               /* ignored here */ 
 389         obstack_1grow (&token_obstack
, '\"'); 
 390         /* Read up to and including ".  */ 
 391         while (literalchar (&token_obstack
, &code
, '\"')) 
 393         obstack_1grow (&token_obstack
, '\0'); 
 394         token_buffer 
= obstack_finish (&token_obstack
); 
 396         symval 
= getsym (token_buffer
); 
 397         symval
->class = token_sym
; 
 399         return tok_identifier
; 
 412       return tok_semicolon
; 
 420       return tok_left_curly
; 
 423       obstack_1grow (&token_obstack
, c
); 
 427           obstack_1grow (&token_obstack
, c
); 
 431       while (c 
== ' ' || c 
== '\n' || c 
== '\t'); 
 432       obstack_1grow (&token_obstack
, '\0'); 
 433       token_buffer 
= obstack_finish (&token_obstack
); 
 437           return tok_left_curly
; 
 446       read_type_name (finput
); 
 450       return parse_percent_token (); 
 453       obstack_1grow (&token_obstack
, c
); 
 454       obstack_1grow (&token_obstack
, '\0'); 
 455       token_buffer 
= obstack_finish (&token_obstack
); 
 460 /* This function is a strcmp, which doesn't differentiate `-' and `_' 
 464 option_strcmp (const char *left
, const char *right
) 
 466   const unsigned char *l
, *r
; 
 471   l 
= (const unsigned char *)left
; 
 472   r 
= (const unsigned char *)right
; 
 473   while (((c 
= *l 
- *r
++) == 0 && *l 
!= '\0') 
 474          || ((*l 
== '-' || *l 
== '_') && (*r 
== '_' || *r 
== '-'))) 
 479 /* Parse a token which starts with %. 
 480    Assumes the % has already been read and discarded.  */ 
 483 parse_percent_token (void) 
 485   const struct option_table_struct 
*tx 
= NULL
; 
 486   const char *arg 
= NULL
; 
 487   /* Where the ARG was found in token_buffer. */ 
 488   size_t arg_offset 
= 0; 
 490   int c 
= getc (finput
); 
 495       return tok_two_percents
; 
 498       return tok_percent_left_curly
; 
 500       /* FIXME: Who the heck are those 5 guys!?! `%<' = `%left'!!! 
 501          Let's ask for there removal.  */ 
 521   obstack_1grow (&token_obstack
, '%'); 
 522   while (isalpha (c
) || c 
== '_' || c 
== '-') 
 526       obstack_1grow (&token_obstack
, c
); 
 530   /* %DIRECTIVE="ARG".  Separate into 
 531      TOKEN_BUFFER = `%DIRECTIVE\0ARG\0'. 
 532      This is a bit hackish, but once we move to a Bison parser, 
 533      things will be cleaned up.  */ 
 536       /* End of the directive.  We skip the `='. */ 
 537       obstack_1grow (&token_obstack
, '\0'); 
 538       /* Fetch the ARG if present. */ 
 543           arg_offset 
= obstack_object_size (&token_obstack
); 
 544           /* Read up to and including `"'.  Do not append the closing 
 545              `"' in the output: it's not part of the ARG.  */ 
 546           while (literalchar (NULL
, &code
, '"')) 
 547             obstack_1grow (&token_obstack
, code
); 
 549       /* else: should be an error. */ 
 554   obstack_1grow (&token_obstack
, '\0'); 
 555   token_buffer 
= obstack_finish (&token_obstack
); 
 557     arg 
= token_buffer 
+ arg_offset
; 
 559   /* table lookup % directive */ 
 560   for (tx 
= option_table
; tx
->name
; tx
++) 
 561     if ((tx
->access 
== opt_percent 
|| tx
->access 
== opt_both
) 
 562         && option_strcmp (token_buffer 
+ 1, tx
->name
) == 0) 
 565   if (arg 
&& tx
->ret_val 
!= tok_stropt
) 
 566     fatal (_("`%s' supports no argument: %s"), token_buffer
, quote (arg
)); 
 572       assert (tx
->set_flag
); 
 575           /* Keep only the first assignment: command line options have 
 576              already been processed, and we want them to have 
 577              precedence.  Side effect: if this %-option is used 
 578              several times, only the first is honored.  Bah.  */ 
 579           if (!*((char **) (tx
->set_flag
))) 
 580             *((char **) (tx
->set_flag
)) = xstrdup (arg
); 
 583         fatal (_("`%s' requires an argument"), token_buffer
); 
 588       assert (tx
->set_flag
); 
 589       *((int *) (tx
->set_flag
)) = 1; 
 594       fatal (_("`%s' is no longer supported"), token_buffer
);