]>
Commit | Line | Data |
---|---|---|
21d3294c | 1 | /* |
2 | ** $Id: llex.c,v 2.20.1.1 2007/12/27 13:02:25 roberto Exp $ | |
3 | ** Lexical Analyzer | |
4 | ** See Copyright Notice in lua.h | |
5 | */ | |
6 | ||
7 | ||
8 | #include <ctype.h> | |
9 | #include <locale.h> | |
10 | #include <string.h> | |
11 | ||
12 | #define llex_c | |
13 | #define LUA_CORE | |
14 | ||
15 | #include "lua.h" | |
16 | ||
17 | #include "ldo.h" | |
18 | #include "llex.h" | |
19 | #include "lobject.h" | |
20 | #include "lparser.h" | |
21 | #include "lstate.h" | |
22 | #include "lstring.h" | |
23 | #include "ltable.h" | |
24 | #include "lzio.h" | |
25 | ||
26 | ||
27 | ||
28 | #define next(ls) (ls->current = zgetc(ls->z)) | |
29 | ||
30 | ||
31 | ||
32 | ||
33 | #define currIsNewline(ls) (ls->current == '\n' || ls->current == '\r') | |
34 | ||
35 | ||
36 | /* ORDER RESERVED */ | |
37 | const char *const luaX_tokens [] = { | |
38 | "and", "break", "do", "else", "elseif", | |
39 | "end", "false", "for", "function", "if", | |
40 | "in", "local", "nil", "not", "or", "repeat", | |
41 | "return", "then", "true", "until", "while", | |
42 | "..", "...", "==", ">=", "<=", "~=", | |
43 | "<number>", "<name>", "<string>", "<eof>", | |
44 | NULL | |
45 | }; | |
46 | ||
47 | ||
48 | #define save_and_next(ls) (save(ls, ls->current), next(ls)) | |
49 | ||
50 | ||
51 | static void save (LexState *ls, int c) { | |
52 | Mbuffer *b = ls->buff; | |
53 | if (b->n + 1 > b->buffsize) { | |
54 | size_t newsize; | |
55 | if (b->buffsize >= MAX_SIZET/2) | |
56 | luaX_lexerror(ls, "lexical element too long", 0); | |
57 | newsize = b->buffsize * 2; | |
58 | luaZ_resizebuffer(ls->L, b, newsize); | |
59 | } | |
60 | b->buffer[b->n++] = cast(char, c); | |
61 | } | |
62 | ||
63 | ||
64 | void luaX_init (lua_State *L) { | |
65 | int i; | |
66 | for (i=0; i<NUM_RESERVED; i++) { | |
67 | TString *ts = luaS_new(L, luaX_tokens[i]); | |
68 | luaS_fix(ts); /* reserved words are never collected */ | |
69 | lua_assert(strlen(luaX_tokens[i])+1 <= TOKEN_LEN); | |
70 | ts->tsv.reserved = cast_byte(i+1); /* reserved word */ | |
71 | } | |
72 | } | |
73 | ||
74 | ||
75 | #define MAXSRC 80 | |
76 | ||
77 | ||
78 | const char *luaX_token2str (LexState *ls, int token) { | |
79 | if (token < FIRST_RESERVED) { | |
80 | lua_assert(token == cast(unsigned char, token)); | |
81 | return (iscntrl(token)) ? luaO_pushfstring(ls->L, "char(%d)", token) : | |
82 | luaO_pushfstring(ls->L, "%c", token); | |
83 | } | |
84 | else | |
85 | return luaX_tokens[token-FIRST_RESERVED]; | |
86 | } | |
87 | ||
88 | ||
89 | static const char *txtToken (LexState *ls, int token) { | |
90 | switch (token) { | |
91 | case TK_NAME: | |
92 | case TK_STRING: | |
93 | case TK_NUMBER: | |
94 | save(ls, '\0'); | |
95 | return luaZ_buffer(ls->buff); | |
96 | default: | |
97 | return luaX_token2str(ls, token); | |
98 | } | |
99 | } | |
100 | ||
101 | ||
102 | void luaX_lexerror (LexState *ls, const char *msg, int token) { | |
103 | char buff[MAXSRC]; | |
104 | luaO_chunkid(buff, getstr(ls->source), MAXSRC); | |
105 | msg = luaO_pushfstring(ls->L, "%s:%d: %s", buff, ls->linenumber, msg); | |
106 | if (token) | |
107 | luaO_pushfstring(ls->L, "%s near " LUA_QS, msg, txtToken(ls, token)); | |
108 | luaD_throw(ls->L, LUA_ERRSYNTAX); | |
109 | } | |
110 | ||
111 | ||
112 | void luaX_syntaxerror (LexState *ls, const char *msg) { | |
113 | luaX_lexerror(ls, msg, ls->t.token); | |
114 | } | |
115 | ||
116 | ||
117 | TString *luaX_newstring (LexState *ls, const char *str, size_t l) { | |
118 | lua_State *L = ls->L; | |
119 | TString *ts = luaS_newlstr(L, str, l); | |
120 | TValue *o = luaH_setstr(L, ls->fs->h, ts); /* entry for `str' */ | |
121 | if (ttisnil(o)) | |
122 | setbvalue(o, 1); /* make sure `str' will not be collected */ | |
123 | return ts; | |
124 | } | |
125 | ||
126 | ||
127 | static void inclinenumber (LexState *ls) { | |
128 | int old = ls->current; | |
129 | lua_assert(currIsNewline(ls)); | |
130 | next(ls); /* skip `\n' or `\r' */ | |
131 | if (currIsNewline(ls) && ls->current != old) | |
132 | next(ls); /* skip `\n\r' or `\r\n' */ | |
133 | if (++ls->linenumber >= MAX_INT) | |
134 | luaX_syntaxerror(ls, "chunk has too many lines"); | |
135 | } | |
136 | ||
137 | ||
138 | void luaX_setinput (lua_State *L, LexState *ls, ZIO *z, TString *source) { | |
139 | ls->decpoint = '.'; | |
140 | ls->L = L; | |
141 | ls->lookahead.token = TK_EOS; /* no look-ahead token */ | |
142 | ls->z = z; | |
143 | ls->fs = NULL; | |
144 | ls->linenumber = 1; | |
145 | ls->lastline = 1; | |
146 | ls->source = source; | |
147 | luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER); /* initialize buffer */ | |
148 | next(ls); /* read first char */ | |
149 | } | |
150 | ||
151 | ||
152 | ||
153 | /* | |
154 | ** ======================================================= | |
155 | ** LEXICAL ANALYZER | |
156 | ** ======================================================= | |
157 | */ | |
158 | ||
159 | ||
160 | ||
161 | static int check_next (LexState *ls, const char *set) { | |
162 | if (!strchr(set, ls->current)) | |
163 | return 0; | |
164 | save_and_next(ls); | |
165 | return 1; | |
166 | } | |
167 | ||
168 | ||
169 | static void buffreplace (LexState *ls, char from, char to) { | |
170 | size_t n = luaZ_bufflen(ls->buff); | |
171 | char *p = luaZ_buffer(ls->buff); | |
172 | while (n--) | |
173 | if (p[n] == from) p[n] = to; | |
174 | } | |
175 | ||
176 | ||
177 | static void trydecpoint (LexState *ls, SemInfo *seminfo) { | |
178 | /* format error: try to update decimal point separator */ | |
179 | struct lconv *cv = localeconv(); | |
180 | char old = ls->decpoint; | |
181 | ls->decpoint = (cv ? cv->decimal_point[0] : '.'); | |
182 | buffreplace(ls, old, ls->decpoint); /* try updated decimal separator */ | |
183 | if (!luaO_str2d(luaZ_buffer(ls->buff), &seminfo->r)) { | |
184 | /* format error with correct decimal point: no more options */ | |
185 | buffreplace(ls, ls->decpoint, '.'); /* undo change (for error message) */ | |
186 | luaX_lexerror(ls, "malformed number", TK_NUMBER); | |
187 | } | |
188 | } | |
189 | ||
190 | ||
191 | /* LUA_NUMBER */ | |
192 | static void read_numeral (LexState *ls, SemInfo *seminfo) { | |
193 | lua_assert(isdigit(ls->current)); | |
194 | do { | |
195 | save_and_next(ls); | |
196 | } while (isdigit(ls->current) || ls->current == '.'); | |
197 | if (check_next(ls, "Ee")) /* `E'? */ | |
198 | check_next(ls, "+-"); /* optional exponent sign */ | |
199 | while (isalnum(ls->current) || ls->current == '_') | |
200 | save_and_next(ls); | |
201 | save(ls, '\0'); | |
202 | buffreplace(ls, '.', ls->decpoint); /* follow locale for decimal point */ | |
203 | if (!luaO_str2d(luaZ_buffer(ls->buff), &seminfo->r)) /* format error? */ | |
204 | trydecpoint(ls, seminfo); /* try to update decimal point separator */ | |
205 | } | |
206 | ||
207 | ||
208 | static int skip_sep (LexState *ls) { | |
209 | int count = 0; | |
210 | int s = ls->current; | |
211 | lua_assert(s == '[' || s == ']'); | |
212 | save_and_next(ls); | |
213 | while (ls->current == '=') { | |
214 | save_and_next(ls); | |
215 | count++; | |
216 | } | |
217 | return (ls->current == s) ? count : (-count) - 1; | |
218 | } | |
219 | ||
220 | ||
221 | static void read_long_string (LexState *ls, SemInfo *seminfo, int sep) { | |
222 | int cont = 0; | |
223 | (void)(cont); /* avoid warnings when `cont' is not used */ | |
224 | save_and_next(ls); /* skip 2nd `[' */ | |
225 | if (currIsNewline(ls)) /* string starts with a newline? */ | |
226 | inclinenumber(ls); /* skip it */ | |
227 | for (;;) { | |
228 | switch (ls->current) { | |
229 | case EOZ: | |
230 | luaX_lexerror(ls, (seminfo) ? "unfinished long string" : | |
231 | "unfinished long comment", TK_EOS); | |
232 | break; /* to avoid warnings */ | |
233 | #if defined(LUA_COMPAT_LSTR) | |
234 | case '[': { | |
235 | if (skip_sep(ls) == sep) { | |
236 | save_and_next(ls); /* skip 2nd `[' */ | |
237 | cont++; | |
238 | #if LUA_COMPAT_LSTR == 1 | |
239 | if (sep == 0) | |
240 | luaX_lexerror(ls, "nesting of [[...]] is deprecated", '['); | |
241 | #endif | |
242 | } | |
243 | break; | |
244 | } | |
245 | #endif | |
246 | case ']': { | |
247 | if (skip_sep(ls) == sep) { | |
248 | save_and_next(ls); /* skip 2nd `]' */ | |
249 | #if defined(LUA_COMPAT_LSTR) && LUA_COMPAT_LSTR == 2 | |
250 | cont--; | |
251 | if (sep == 0 && cont >= 0) break; | |
252 | #endif | |
253 | goto endloop; | |
254 | } | |
255 | break; | |
256 | } | |
257 | case '\n': | |
258 | case '\r': { | |
259 | save(ls, '\n'); | |
260 | inclinenumber(ls); | |
261 | if (!seminfo) luaZ_resetbuffer(ls->buff); /* avoid wasting space */ | |
262 | break; | |
263 | } | |
264 | default: { | |
265 | if (seminfo) save_and_next(ls); | |
266 | else next(ls); | |
267 | } | |
268 | } | |
269 | } endloop: | |
270 | if (seminfo) | |
271 | seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + (2 + sep), | |
272 | luaZ_bufflen(ls->buff) - 2*(2 + sep)); | |
273 | } | |
274 | ||
275 | ||
276 | static void read_string (LexState *ls, int del, SemInfo *seminfo) { | |
277 | save_and_next(ls); | |
278 | while (ls->current != del) { | |
279 | switch (ls->current) { | |
280 | case EOZ: | |
281 | luaX_lexerror(ls, "unfinished string", TK_EOS); | |
282 | continue; /* to avoid warnings */ | |
283 | case '\n': | |
284 | case '\r': | |
285 | luaX_lexerror(ls, "unfinished string", TK_STRING); | |
286 | continue; /* to avoid warnings */ | |
287 | case '\\': { | |
288 | int c; | |
289 | next(ls); /* do not save the `\' */ | |
290 | switch (ls->current) { | |
291 | case 'a': c = '\a'; break; | |
292 | case 'b': c = '\b'; break; | |
293 | case 'f': c = '\f'; break; | |
294 | case 'n': c = '\n'; break; | |
295 | case 'r': c = '\r'; break; | |
296 | case 't': c = '\t'; break; | |
297 | case 'v': c = '\v'; break; | |
298 | case '\n': /* go through */ | |
299 | case '\r': save(ls, '\n'); inclinenumber(ls); continue; | |
300 | case EOZ: continue; /* will raise an error next loop */ | |
301 | default: { | |
302 | if (!isdigit(ls->current)) | |
303 | save_and_next(ls); /* handles \\, \", \', and \? */ | |
304 | else { /* \xxx */ | |
305 | int i = 0; | |
306 | c = 0; | |
307 | do { | |
308 | c = 10*c + (ls->current-'0'); | |
309 | next(ls); | |
310 | } while (++i<3 && isdigit(ls->current)); | |
311 | if (c > UCHAR_MAX) | |
312 | luaX_lexerror(ls, "escape sequence too large", TK_STRING); | |
313 | save(ls, c); | |
314 | } | |
315 | continue; | |
316 | } | |
317 | } | |
318 | save(ls, c); | |
319 | next(ls); | |
320 | continue; | |
321 | } | |
322 | default: | |
323 | save_and_next(ls); | |
324 | } | |
325 | } | |
326 | save_and_next(ls); /* skip delimiter */ | |
327 | seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + 1, | |
328 | luaZ_bufflen(ls->buff) - 2); | |
329 | } | |
330 | ||
331 | ||
332 | static int llex (LexState *ls, SemInfo *seminfo) { | |
333 | luaZ_resetbuffer(ls->buff); | |
334 | for (;;) { | |
335 | switch (ls->current) { | |
336 | case '\n': | |
337 | case '\r': { | |
338 | inclinenumber(ls); | |
339 | continue; | |
340 | } | |
341 | case '-': { | |
342 | next(ls); | |
343 | if (ls->current != '-') return '-'; | |
344 | /* else is a comment */ | |
345 | next(ls); | |
346 | if (ls->current == '[') { | |
347 | int sep = skip_sep(ls); | |
348 | luaZ_resetbuffer(ls->buff); /* `skip_sep' may dirty the buffer */ | |
349 | if (sep >= 0) { | |
350 | read_long_string(ls, NULL, sep); /* long comment */ | |
351 | luaZ_resetbuffer(ls->buff); | |
352 | continue; | |
353 | } | |
354 | } | |
355 | /* else short comment */ | |
356 | while (!currIsNewline(ls) && ls->current != EOZ) | |
357 | next(ls); | |
358 | continue; | |
359 | } | |
360 | case '[': { | |
361 | int sep = skip_sep(ls); | |
362 | if (sep >= 0) { | |
363 | read_long_string(ls, seminfo, sep); | |
364 | return TK_STRING; | |
365 | } | |
366 | else if (sep == -1) return '['; | |
367 | else luaX_lexerror(ls, "invalid long string delimiter", TK_STRING); | |
368 | } | |
369 | case '=': { | |
370 | next(ls); | |
371 | if (ls->current != '=') return '='; | |
372 | else { next(ls); return TK_EQ; } | |
373 | } | |
374 | case '<': { | |
375 | next(ls); | |
376 | if (ls->current != '=') return '<'; | |
377 | else { next(ls); return TK_LE; } | |
378 | } | |
379 | case '>': { | |
380 | next(ls); | |
381 | if (ls->current != '=') return '>'; | |
382 | else { next(ls); return TK_GE; } | |
383 | } | |
384 | case '~': { | |
385 | next(ls); | |
386 | if (ls->current != '=') return '~'; | |
387 | else { next(ls); return TK_NE; } | |
388 | } | |
389 | case '"': | |
390 | case '\'': { | |
391 | read_string(ls, ls->current, seminfo); | |
392 | return TK_STRING; | |
393 | } | |
394 | case '.': { | |
395 | save_and_next(ls); | |
396 | if (check_next(ls, ".")) { | |
397 | if (check_next(ls, ".")) | |
398 | return TK_DOTS; /* ... */ | |
399 | else return TK_CONCAT; /* .. */ | |
400 | } | |
401 | else if (!isdigit(ls->current)) return '.'; | |
402 | else { | |
403 | read_numeral(ls, seminfo); | |
404 | return TK_NUMBER; | |
405 | } | |
406 | } | |
407 | case EOZ: { | |
408 | return TK_EOS; | |
409 | } | |
410 | default: { | |
411 | if (isspace(ls->current)) { | |
412 | lua_assert(!currIsNewline(ls)); | |
413 | next(ls); | |
414 | continue; | |
415 | } | |
416 | else if (isdigit(ls->current)) { | |
417 | read_numeral(ls, seminfo); | |
418 | return TK_NUMBER; | |
419 | } | |
420 | else if (isalpha(ls->current) || ls->current == '_') { | |
421 | /* identifier or reserved word */ | |
422 | TString *ts; | |
423 | do { | |
424 | save_and_next(ls); | |
425 | } while (isalnum(ls->current) || ls->current == '_'); | |
426 | ts = luaX_newstring(ls, luaZ_buffer(ls->buff), | |
427 | luaZ_bufflen(ls->buff)); | |
428 | if (ts->tsv.reserved > 0) /* reserved word? */ | |
429 | return ts->tsv.reserved - 1 + FIRST_RESERVED; | |
430 | else { | |
431 | seminfo->ts = ts; | |
432 | return TK_NAME; | |
433 | } | |
434 | } | |
435 | else { | |
436 | int c = ls->current; | |
437 | next(ls); | |
438 | return c; /* single-char tokens (+ - / ...) */ | |
439 | } | |
440 | } | |
441 | } | |
442 | } | |
443 | } | |
444 | ||
445 | ||
446 | void luaX_next (LexState *ls) { | |
447 | ls->lastline = ls->linenumber; | |
448 | if (ls->lookahead.token != TK_EOS) { /* is there a look-ahead token? */ | |
449 | ls->t = ls->lookahead; /* use this one */ | |
450 | ls->lookahead.token = TK_EOS; /* and discharge it */ | |
451 | } | |
452 | else | |
453 | ls->t.token = llex(ls, &ls->t.seminfo); /* read next token */ | |
454 | } | |
455 | ||
456 | ||
457 | void luaX_lookahead (LexState *ls) { | |
458 | lua_assert(ls->lookahead.token == TK_EOS); | |
459 | ls->lookahead.token = llex(ls, &ls->lookahead.seminfo); | |
460 | } | |
461 |