1 /* Token-reader for Bison's input parser, 
   2    Copyright 1984, 1986, 1989, 1992, 2000, 2001 Free Software Foundation, Inc. 
   4    This file is part of Bison, the GNU Compiler Compiler. 
   6    Bison is free software; you can redistribute it and/or modify 
   7    it under the terms of the GNU General Public License as published by 
   8    the Free Software Foundation; either version 2, or (at your option) 
  11    Bison is distributed in the hope that it will be useful, 
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of 
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
  14    GNU General Public License for more details. 
  16    You should have received a copy of the GNU General Public License 
  17    along with Bison; see the file COPYING.  If not, write to 
  18    the Free Software Foundation, Inc., 59 Temple Place - Suite 330, 
  19    Boston, MA 02111-1307, USA.  */ 
  24 #include "getopt.h"             /* for optarg */ 
  32 /* Buffer for storing the current token.  */ 
  33 struct obstack token_obstack
; 
  34 const char *token_buffer 
= NULL
; 
  39 static int unlexed
;             /* these two describe a token to be reread */ 
  40 static bucket 
*unlexed_symval
;  /* by the next call to lex */ 
  46   obstack_init (&token_obstack
); 
  52 skip_white_space (void) 
  66           /* FIXME: Should probably be merged with copy_comment.  */ 
  68           if (c 
!= '*' && c 
!= '/') 
  70               complain (_("unexpected `/' found and ignored")); 
  73           cplus_comment 
= (c 
== '/'); 
  80               if (!cplus_comment 
&& c 
== '*') 
  99                 fatal (_("unterminated comment")); 
 122 /*-----------------------------------------------------. 
 123 | Do a getc, but give error message if EOF encountered | 
 124 `-----------------------------------------------------*/ 
 131     fatal (_("unexpected end of file")); 
 136 /*------------------------------------------------------------------. 
 137 | Read one literal character from finput.  Process \ escapes.       | 
 138 | Append the normalized string version of the char to OUT.  Assign  | 
 139 | the character code to *PCODE. Return 1 unless the character is an | 
 140 | unescaped `term' or \n report error for \n.                       | 
 141 `------------------------------------------------------------------*/ 
 143 /* FIXME: We could directly work in the obstack, but that would make 
 144    it more difficult to move to quotearg some day.  So for the time 
 145    being, I prefer have literalchar behave like quotearg, and change 
 146    my mind later if I was wrong.  */ 
 149 literalchar (struct obstack 
*out
, int *pcode
, char term
) 
 160       complain (_("unescaped newline in constant")); 
 194       else if (c 
<= '7' && c 
>= '0') 
 197           while (c 
<= '7' && c 
>= '0') 
 199               code 
= (code 
* 8) + (c 
- '0'); 
 200               if (code 
>= 256 || code 
< 0) 
 202                   complain (_("octal value outside range 0...255: `\\%o'"), 
 217               if (c 
>= '0' && c 
<= '9') 
 218                 code 
*= 16, code 
+= c 
- '0'; 
 219               else if (c 
>= 'a' && c 
<= 'f') 
 220                 code 
*= 16, code 
+= c 
- 'a' + 10; 
 221               else if (c 
>= 'A' && c 
<= 'F') 
 222                 code 
*= 16, code 
+= c 
- 'A' + 10; 
 225               if (code 
>= 256 || code 
< 0) 
 227                   complain (_("hexadecimal value above 255: `\\x%x'"), code
); 
 237           char badchar 
[] = "c"; 
 239           complain (_("unknown escape sequence: `\\' followed by `%s'"), 
 245   /* now fill BUF with the canonical name for this character as a 
 246      literal token.  Do not use what the user typed, so that `\012' 
 247      and `\n' can be interchangeable.  */ 
 250   if (code 
== term 
&& wasquote
) 
 252   else if (code 
== '\\') 
 257   else if (code 
== '\'') 
 262   else if (code 
== '\"') 
 267   else if (code 
>= 040 && code 
< 0177) 
 269   else if (code 
== '\t') 
 274   else if (code 
== '\n') 
 279   else if (code 
== '\r') 
 284   else if (code 
== '\v') 
 289   else if (code 
== '\b') 
 294   else if (code 
== '\f') 
 302       *cp
++ = code 
/ 0100 + '0'; 
 303       *cp
++ = ((code 
/ 010) & 07) + '0'; 
 304       *cp
++ = (code 
& 07) + '0'; 
 309     obstack_sgrow (out
, buf
); 
 319   unlexed_symval 
= symval
; 
 322 /*-----------------------------------------------------------------. 
 323 | We just read `<' from FIN.  Store in TOKEN_BUFFER, the type name | 
 324 | specified between the `<...>'.                                   | 
 325 `-----------------------------------------------------------------*/ 
 328 read_type_name (FILE *fin
) 
 335         fatal (_("unterminated type name at end of file")); 
 338           complain (_("unterminated type name")); 
 343       obstack_1grow (&token_obstack
, c
); 
 346   obstack_1grow (&token_obstack
, '\0'); 
 347   token_buffer 
= obstack_finish (&token_obstack
); 
 356   /* Just to make sure. */ 
 361       symval 
= unlexed_symval
; 
 367   c 
= skip_white_space (); 
 372       token_buffer 
= "EOF"; 
 375     case 'A':    case 'B':    case 'C':    case 'D':    case 'E': 
 376     case 'F':    case 'G':    case 'H':    case 'I':    case 'J': 
 377     case 'K':    case 'L':    case 'M':    case 'N':    case 'O': 
 378     case 'P':    case 'Q':    case 'R':    case 'S':    case 'T': 
 379     case 'U':    case 'V':    case 'W':    case 'X':    case 'Y': 
 381     case 'a':    case 'b':    case 'c':    case 'd':    case 'e': 
 382     case 'f':    case 'g':    case 'h':    case 'i':    case 'j': 
 383     case 'k':    case 'l':    case 'm':    case 'n':    case 'o': 
 384     case 'p':    case 'q':    case 'r':    case 's':    case 't': 
 385     case 'u':    case 'v':    case 'w':    case 'x':    case 'y': 
 389       while (isalnum (c
) || c 
== '_' || c 
== '.') 
 391           obstack_1grow (&token_obstack
, c
); 
 394       obstack_1grow (&token_obstack
, '\0'); 
 395       token_buffer 
= obstack_finish (&token_obstack
); 
 397       symval 
= getsym (token_buffer
); 
 398       return tok_identifier
; 
 400     case '0':    case '1':    case '2':    case '3':    case '4': 
 401     case '5':    case '6':    case '7':    case '8':    case '9': 
 407             obstack_1grow (&token_obstack
, c
); 
 408             numval 
= numval 
* 10 + c 
- '0'; 
 411         obstack_1grow (&token_obstack
, '\0'); 
 412         token_buffer 
= obstack_finish (&token_obstack
); 
 418       /* parse the literal token and compute character code in  code  */ 
 424         obstack_1grow (&token_obstack
, '\''); 
 425         literalchar (&token_obstack
, &code
, '\''); 
 430             complain (_("use \"...\" for multi-character literal tokens")); 
 432               if (!literalchar (0, &discode
, '\'')) 
 435         obstack_1grow (&token_obstack
, '\''); 
 436         obstack_1grow (&token_obstack
, '\0'); 
 437         token_buffer 
= obstack_finish (&token_obstack
); 
 438         symval 
= getsym (token_buffer
); 
 439         symval
->class = token_sym
; 
 440         if (!symval
->user_token_number
) 
 441           symval
->user_token_number 
= code
; 
 442         return tok_identifier
; 
 446       /* parse the literal string token and treat as an identifier */ 
 450         int code
;               /* ignored here */ 
 452         obstack_1grow (&token_obstack
, '\"'); 
 453         /* Read up to and including ".  */ 
 454         while (literalchar (&token_obstack
, &code
, '\"')) 
 456         obstack_1grow (&token_obstack
, '\0'); 
 457         token_buffer 
= obstack_finish (&token_obstack
); 
 459         symval 
= getsym (token_buffer
); 
 460         symval
->class = token_sym
; 
 462         return tok_identifier
; 
 472       return tok_semicolon
; 
 478       return tok_left_curly
; 
 487       while (c 
== ' ' || c 
== '\n' || c 
== '\t'); 
 492           return tok_left_curly
; 
 501       read_type_name (finput
); 
 505       return parse_percent_token (); 
 512 /* the following table dictates the action taken for the various % 
 513    directives.  A set_flag value causes the named flag to be set.  A 
 514    retval action returns the code.  */ 
 515 struct percent_table_struct
 
 522 struct percent_table_struct percent_table
[] = 
 524   { "token",            NULL
,                   tok_token 
}, 
 525   { "term",             NULL
,                   tok_token 
}, 
 526   { "nterm",            NULL
,                   tok_nterm 
}, 
 527   { "type",             NULL
,                   tok_type 
}, 
 528   { "guard",            NULL
,                   tok_guard 
}, 
 529   { "union",            NULL
,                   tok_union 
}, 
 530   { "expect",           NULL
,                   tok_expect 
}, 
 531   { "thong",            NULL
,                   tok_thong 
}, 
 532   { "start",            NULL
,                   tok_start 
}, 
 533   { "left",             NULL
,                   tok_left 
}, 
 534   { "right",            NULL
,                   tok_right 
}, 
 535   { "nonassoc",         NULL
,                   tok_nonassoc 
}, 
 536   { "binary",           NULL
,                   tok_nonassoc 
}, 
 537   { "prec",             NULL
,                   tok_prec 
}, 
 538   { "locations",        &locations_flag
,        tok_noop 
},     /* -l */ 
 539   { "no_lines",         &no_lines_flag
,         tok_noop 
},     /* -l */ 
 540   { "raw",              NULL
,                   tok_obsolete 
}, /* -r */ 
 541   { "token_table",      &token_table_flag
,      tok_noop 
},     /* -k */ 
 542   { "yacc",             &yacc_flag
,             tok_noop 
},     /* -y */ 
 543   { "fixed_output_files",&yacc_flag
,            tok_noop 
},     /* -y */ 
 544   { "defines",          &defines_flag
,          tok_noop 
},     /* -d */ 
 545   { "no_parser",        &no_parser_flag
,        tok_noop 
},     /* -n */ 
 547   /* For the time being, this is not enabled yet, while it's possible 
 548      though, since we use obstacks.  The only risk is with semantic 
 549      parsers which will output an `include' of an output file: be sure 
 550      that the naem included is indeed the name of the output file.  */ 
 551   { "output_file",      &spec_outfile
,          tok_setopt 
},   /* -o */ 
 552   { "file_prefix",      &spec_file_prefix
,      tok_setopt 
},   /* -b */ 
 553   { "name_prefix",      &spec_name_prefix
,      tok_setopt 
},   /* -p */ 
 555   { "header_extension", NULL
,                   tok_hdrext
}, 
 556   { "source_extension", NULL
,                   tok_srcext
}, 
 557   { "define",           NULL
,                   tok_define 
}, 
 558   { "verbose",          &verbose_flag
,          tok_noop 
},     /* -v */ 
 559   { "debug",            &debug_flag
,            tok_noop 
},     /* -t */ 
 560   { "semantic_parser",  &semantic_parser
,       tok_noop 
}, 
 561   { "pure_parser",      &pure_parser
,           tok_noop 
}, 
 563   { NULL
, NULL
, tok_illegal
} 
 566 /* Parse a token which starts with %. 
 567    Assumes the % has already been read and discarded.  */ 
 570 parse_percent_token (void) 
 573   struct percent_table_struct 
*tx
; 
 580       return tok_two_percents
; 
 583       return tok_percent_left_curly
; 
 604   obstack_1grow (&token_obstack
, '%'); 
 605   while (isalpha (c
) || c 
== '_' || c 
== '-') 
 609       obstack_1grow (&token_obstack
, c
); 
 614   obstack_1grow (&token_obstack
, '\0'); 
 615   token_buffer 
= obstack_finish (&token_obstack
); 
 617   /* table lookup % directive */ 
 618   for (tx 
= percent_table
; tx
->name
; tx
++) 
 619     if (strcmp (token_buffer 
+ 1, tx
->name
) == 0) 
 624       *((int *) (tx
->set_flag
)) = 1; 
 631       *((char **) (tx
->set_flag
)) = optarg
; 
 636       fatal (_("`%s' is no longer supported"), token_buffer
);