]>
git.saurik.com Git - bison.git/blob - src/lex.c
1 /* Token-reader for Bison's input parser,
2 Copyright (C) 1984, 1986, 1989 Free Software Foundation, Inc.
4 This file is part of Bison, the GNU Compiler Compiler.
6 Bison is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 Bison is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with Bison; see the file COPYING. If not, write to
18 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
22 lex() is the entry point. It is called from reader.c.
23 It returns one of the token-type codes defined in lex.h.
24 When an identifier is seen, the code IDENTIFIER is returned
25 and the name is looked up in the symbol table using symtab.c;
26 symval is set to a pointer to the entry found. */
38 extern int translations
;
40 int parse_percent_token();
45 /* Buffer for storing the current token. */
48 /* Allocated size of token_buffer, not including space for terminator. */
54 static int unlexed
; /* these two describe a token to be reread */
55 static bucket
*unlexed_symval
; /* by the next call to lex */
62 token_buffer
= NEW2 (maxtoken
+ 1, char);
71 int offset
= p
- token_buffer
;
73 token_buffer
= (char *) xrealloc(token_buffer
, maxtoken
+ 1);
74 return token_buffer
+ offset
;
94 if (c
!= '*' && c
!= '/')
95 fatals("unexpected `/%c' found",c
);
96 cplus_comment
= (c
== '/');
103 if (!cplus_comment
&& c
== '*')
122 fatal("unterminated comment");
150 unlexed_symval
= symval
;
163 symval
= unlexed_symval
;
169 c
= skip_white_space();
176 case 'A': case 'B': case 'C': case 'D': case 'E':
177 case 'F': case 'G': case 'H': case 'I': case 'J':
178 case 'K': case 'L': case 'M': case 'N': case 'O':
179 case 'P': case 'Q': case 'R': case 'S': case 'T':
180 case 'U': case 'V': case 'W': case 'X': case 'Y':
182 case 'a': case 'b': case 'c': case 'd': case 'e':
183 case 'f': case 'g': case 'h': case 'i': case 'j':
184 case 'k': case 'l': case 'm': case 'n': case 'o':
185 case 'p': case 'q': case 'r': case 's': case 't':
186 case 'u': case 'v': case 'w': case 'x': case 'y':
190 while (isalnum(c
) || c
== '_' || c
== '.')
192 if (p
== token_buffer
+ maxtoken
)
193 p
= grow_token_buffer(p
);
201 symval
= getsym(token_buffer
);
204 case '0': case '1': case '2': case '3': case '4':
205 case '5': case '6': case '7': case '8': case '9':
211 numval
= numval
*10 + c
- '0';
221 /* parse the literal token and compute character code in code */
225 register int code
= 0;
231 if (c
<= '7' && c
>= '0')
233 while (c
<= '7' && c
>= '0')
235 code
= (code
* 8) + (c
- '0');
237 if (code
>= 256 || code
< 0)
238 fatals("malformatted literal token `\\%03o'", code
);
260 while ((c
<= '9' && c
>= '0')
261 || (c
>= 'a' && c
<= 'z')
262 || (c
>= 'A' && c
<= 'Z'))
265 if (c
<= '9' && c
>= '0')
267 else if (c
>= 'a' && c
<= 'z')
268 code
+= c
- 'a' + 10;
269 else if (c
>= 'A' && c
<= 'Z')
270 code
+= c
- 'A' + 10;
271 if (code
>= 256 || code
<0)/* JF this said if(c>=128) */
272 fatals("malformatted literal token `\\x%x'",code
);
281 else if (c
== '\"') /* JF this is a good idea */
285 if (c
>= 040 && c
<= 0177)
286 fatals ("unknown escape sequence `\\%c'", c
);
288 fatals ("unknown escape sequence: `\\' followed by char code 0x%x", c
);
300 fatal("multicharacter literal tokens not supported");
302 /* now fill token_buffer with the canonical name for this character
303 as a literal token. Do not use what the user typed,
304 so that '\012' and '\n' can be interchangeable. */
313 else if (code
== '\'')
318 else if (code
>= 040 && code
!= 0177)
320 else if (code
== '\t')
325 else if (code
== '\n')
330 else if (code
== '\r')
335 else if (code
== '\v')
340 else if (code
== '\b')
345 else if (code
== '\f')
352 *p
++ = code
/ 0100 + '0';
353 *p
++ = ((code
/ 010) & 07) + '0';
354 *p
++ = (code
& 07) + '0';
358 symval
= getsym(token_buffer
);
359 symval
->class = STOKEN
;
360 if (! symval
->user_token_number
)
361 symval
->user_token_number
= code
;
384 if (c
== '\n') lineno
++;
386 while(c
==' ' || c
=='\n' || c
=='\t');
401 if (c
== '\n' || c
== EOF
)
402 fatal("unterminated type name");
404 if (p
== token_buffer
+ maxtoken
)
405 p
= grow_token_buffer(p
);
415 return (parse_percent_token());
423 /* parse a token which starts with %. Assumes the % has already been read and discarded. */
426 parse_percent_token ()
437 return (TWO_PERCENTS
);
440 return (PERCENT_LEFT_CURLY
);
460 while (isalpha(c
) || c
== '_')
462 if (p
== token_buffer
+ maxtoken
)
463 p
= grow_token_buffer(p
);
473 if (strcmp(token_buffer
, "token") == 0
475 strcmp(token_buffer
, "term") == 0)
477 else if (strcmp(token_buffer
, "nterm") == 0)
479 else if (strcmp(token_buffer
, "type") == 0)
481 else if (strcmp(token_buffer
, "guard") == 0)
483 else if (strcmp(token_buffer
, "union") == 0)
485 else if (strcmp(token_buffer
, "expect") == 0)
487 else if (strcmp(token_buffer
, "start") == 0)
489 else if (strcmp(token_buffer
, "left") == 0)
491 else if (strcmp(token_buffer
, "right") == 0)
493 else if (strcmp(token_buffer
, "nonassoc") == 0
495 strcmp(token_buffer
, "binary") == 0)
497 else if (strcmp(token_buffer
, "semantic_parser") == 0)
498 return (SEMANTIC_PARSER
);
499 else if (strcmp(token_buffer
, "pure_parser") == 0)
500 return (PURE_PARSER
);
501 else if (strcmp(token_buffer
, "prec") == 0)
503 else return (ILLEGAL
);