]> git.saurik.com Git - apple/boot.git/blob - i386/nasm/parser.c
7105f8f744f739382868297efd5a5a7dc35fb24c
[apple/boot.git] / i386 / nasm / parser.c
1 /*
2 * Copyright (c) 1999 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
7 *
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * file.
14 *
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
22 *
23 * @APPLE_LICENSE_HEADER_END@
24 */
25 /* parser.c source line parser for the Netwide Assembler
26 *
27 * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
28 * Julian Hall. All rights reserved. The software is
29 * redistributable under the licence given in the file "Licence"
30 * distributed in the NASM archive.
31 *
32 * initial version 27/iii/95 by Simon Tatham
33 */
34
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <stddef.h>
38 #include <string.h>
39 #include <ctype.h>
40
41 #include "nasm.h"
42 #include "nasmlib.h"
43 #include "parser.h"
44 #include "float.h"
45
46 static long reg_flags[] = { /* sizes and special flags */
47 0, REG8, REG_AL, REG_AX, REG8, REG8, REG16, REG16, REG8, REG_CL,
48 REG_CREG, REG_CREG, REG_CREG, REG_CR4, REG_CS, REG_CX, REG8,
49 REG16, REG8, REG_DREG, REG_DREG, REG_DREG, REG_DREG, REG_DREG,
50 REG_DREG, REG_DESS, REG_DX, REG_EAX, REG32, REG32, REG_ECX,
51 REG32, REG32, REG_DESS, REG32, REG32, REG_FSGS, REG_FSGS,
52 MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, MMXREG,
53 REG16, REG16, REG_DESS, FPU0, FPUREG, FPUREG, FPUREG, FPUREG,
54 FPUREG, FPUREG, FPUREG, REG_TREG, REG_TREG, REG_TREG, REG_TREG,
55 REG_TREG
56 };
57
58 enum { /* special tokens */
59 S_BYTE, S_DWORD, S_FAR, S_LONG, S_NEAR, S_NOSPLIT, S_QWORD,
60 S_SHORT, S_TO, S_TWORD, S_WORD
61 };
62
63 static int is_comma_next (void);
64
65 static int i;
66 static struct tokenval tokval;
67 static efunc error;
68
69 insn *parse_line (int pass, char *buffer, insn *result,
70 efunc errfunc, evalfunc evaluate, evalinfofunc einfo) {
71 int operand;
72 int critical;
73 struct eval_hints hints;
74
75 result->forw_ref = FALSE;
76 error = errfunc;
77 einfo ("", 0L, 0L);
78
79 stdscan_reset();
80 stdscan_bufptr = buffer;
81 i = stdscan(NULL, &tokval);
82
83 result->eops = NULL; /* must do this, whatever happens */
84 result->operands = 0; /* must initialise this */
85
86 if (i==0) { /* blank line - ignore */
87 result->label = NULL; /* so, no label on it */
88 result->opcode = -1; /* and no instruction either */
89 return result;
90 }
91 if (i != TOKEN_ID && i != TOKEN_INSN && i != TOKEN_PREFIX &&
92 (i!=TOKEN_REG || (REG_SREG & ~reg_flags[tokval.t_integer]))) {
93 error (ERR_NONFATAL, "label or instruction expected"
94 " at start of line");
95 result->label = NULL;
96 result->opcode = -1;
97 return result;
98 }
99
100 if (i == TOKEN_ID) { /* there's a label here */
101 result->label = tokval.t_charptr;
102 einfo (result->label, 0L, 0L);
103 i = stdscan(NULL, &tokval);
104 if (i == ':') { /* skip over the optional colon */
105 i = stdscan(NULL, &tokval);
106 } else if (i == 0 && pass == 1) {
107 error (ERR_WARNING|ERR_WARN_OL,
108 "label alone on a line without a colon might be in error");
109 }
110 } else /* no label; so, moving swiftly on */
111 result->label = NULL;
112
113 if (i==0) {
114 result->opcode = -1; /* this line contains just a label */
115 return result;
116 }
117
118 result->nprefix = 0;
119 result->times = 1L;
120
121 while (i == TOKEN_PREFIX ||
122 (i==TOKEN_REG && !(REG_SREG & ~reg_flags[tokval.t_integer]))) {
123 /*
124 * Handle special case: the TIMES prefix.
125 */
126 if (i == TOKEN_PREFIX && tokval.t_integer == P_TIMES) {
127 expr *value;
128
129 i = stdscan(NULL, &tokval);
130 value = evaluate (stdscan, NULL, &tokval, NULL, pass, error, NULL);
131 i = tokval.t_type;
132 if (!value) { /* but, error in evaluator */
133 result->opcode = -1; /* unrecoverable parse error: */
134 return result; /* ignore this instruction */
135 }
136 if (!is_simple (value)) {
137 error (ERR_NONFATAL,
138 "non-constant argument supplied to TIMES");
139 result->times = 1L;
140 } else {
141 result->times = value->value;
142 if (value->value < 0)
143 error(ERR_NONFATAL, "TIMES value %d is negative",
144 value->value);
145 }
146 } else {
147 if (result->nprefix == MAXPREFIX)
148 error (ERR_NONFATAL,
149 "instruction has more than %d prefixes", MAXPREFIX);
150 else
151 result->prefixes[result->nprefix++] = tokval.t_integer;
152 i = stdscan(NULL, &tokval);
153 }
154 }
155
156 if (i != TOKEN_INSN) {
157 if (result->nprefix > 0 && i == 0) {
158 /*
159 * Instruction prefixes are present, but no actual
160 * instruction. This is allowed: at this point we
161 * invent a notional instruction of RESB 0.
162 */
163 result->opcode = I_RESB;
164 result->operands = 1;
165 result->oprs[0].type = IMMEDIATE;
166 result->oprs[0].offset = 0L;
167 result->oprs[0].segment = result->oprs[0].wrt = NO_SEG;
168 return result;
169 } else {
170 error (ERR_NONFATAL, "parser: instruction expected");
171 result->opcode = -1;
172 return result;
173 }
174 }
175
176 result->opcode = tokval.t_integer;
177 result->condition = tokval.t_inttwo;
178
179 /*
180 * RESB, RESW and RESD cannot be satisfied with incorrectly
181 * evaluated operands, since the correct values _must_ be known
182 * on the first pass. Hence, even in pass one, we set the
183 * `critical' flag on calling evaluate(), so that it will bomb
184 * out on undefined symbols. Nasty, but there's nothing we can
185 * do about it.
186 *
187 * For the moment, EQU has the same difficulty, so we'll
188 * include that.
189 */
190 if (result->opcode == I_RESB ||
191 result->opcode == I_RESW ||
192 result->opcode == I_RESD ||
193 result->opcode == I_RESQ ||
194 result->opcode == I_REST ||
195 result->opcode == I_EQU)
196 critical = pass;
197 else
198 critical = (pass==2 ? 2 : 0);
199
200 if (result->opcode == I_DB ||
201 result->opcode == I_DW ||
202 result->opcode == I_DD ||
203 result->opcode == I_DQ ||
204 result->opcode == I_DT ||
205 result->opcode == I_INCBIN) {
206 extop *eop, **tail = &result->eops, **fixptr;
207 int oper_num = 0;
208
209 /*
210 * Begin to read the DB/DW/DD/DQ/DT operands.
211 */
212 while (1) {
213 i = stdscan(NULL, &tokval);
214 if (i == 0)
215 break;
216 fixptr = tail;
217 eop = *tail = nasm_malloc(sizeof(extop));
218 tail = &eop->next;
219 eop->next = NULL;
220 eop->type = EOT_NOTHING;
221 oper_num++;
222
223 if (i == TOKEN_NUM && tokval.t_charptr && is_comma_next()) {
224 eop->type = EOT_DB_STRING;
225 eop->stringval = tokval.t_charptr;
226 eop->stringlen = tokval.t_inttwo;
227 i = stdscan(NULL, &tokval); /* eat the comma */
228 continue;
229 }
230
231 if (i == TOKEN_FLOAT || i == '-') {
232 long sign = +1L;
233
234 if (i == '-') {
235 char *save = stdscan_bufptr;
236 i = stdscan(NULL, &tokval);
237 sign = -1L;
238 if (i != TOKEN_FLOAT) {
239 stdscan_bufptr = save;
240 i = tokval.t_type = '-';
241 }
242 }
243
244 if (i == TOKEN_FLOAT) {
245 eop->type = EOT_DB_STRING;
246 if (result->opcode == I_DD)
247 eop->stringlen = 4;
248 else if (result->opcode == I_DQ)
249 eop->stringlen = 8;
250 else if (result->opcode == I_DT)
251 eop->stringlen = 10;
252 else {
253 error(ERR_NONFATAL, "floating-point constant"
254 " encountered in `D%c' instruction",
255 result->opcode == I_DW ? 'W' : 'B');
256 eop->type = EOT_NOTHING;
257 }
258 eop = nasm_realloc(eop, sizeof(extop)+eop->stringlen);
259 tail = &eop->next;
260 *fixptr = eop;
261 eop->stringval = (char *)eop + sizeof(extop);
262 if (!float_const (tokval.t_charptr, sign,
263 (unsigned char *)eop->stringval,
264 eop->stringlen, error))
265 eop->type = EOT_NOTHING;
266 i = stdscan(NULL, &tokval); /* eat the comma */
267 continue;
268 }
269 }
270
271 /* anything else */ {
272 expr *value;
273 value = evaluate (stdscan, NULL, &tokval, NULL,
274 critical, error, NULL);
275 i = tokval.t_type;
276 if (!value) { /* error in evaluator */
277 result->opcode = -1;/* unrecoverable parse error: */
278 return result; /* ignore this instruction */
279 }
280 if (is_unknown(value)) {
281 eop->type = EOT_DB_NUMBER;
282 eop->offset = 0; /* doesn't matter what we put */
283 eop->segment = eop->wrt = NO_SEG; /* likewise */
284 } else if (is_reloc(value)) {
285 eop->type = EOT_DB_NUMBER;
286 eop->offset = reloc_value(value);
287 eop->segment = reloc_seg(value);
288 eop->wrt = reloc_wrt(value);
289 } else {
290 error (ERR_NONFATAL,
291 "operand %d: expression is not simple"
292 " or relocatable", oper_num);
293 }
294 }
295
296 /*
297 * We're about to call stdscan(), which will eat the
298 * comma that we're currently sitting on between
299 * arguments. However, we'd better check first that it
300 * _is_ a comma.
301 */
302 if (i == 0) /* also could be EOL */
303 break;
304 if (i != ',') {
305 error (ERR_NONFATAL, "comma expected after operand %d",
306 oper_num);
307 result->opcode = -1;/* unrecoverable parse error: */
308 return result; /* ignore this instruction */
309 }
310 }
311
312 if (result->opcode == I_INCBIN) {
313 /*
314 * Correct syntax for INCBIN is that there should be
315 * one string operand, followed by one or two numeric
316 * operands.
317 */
318 if (!result->eops || result->eops->type != EOT_DB_STRING)
319 error (ERR_NONFATAL, "`incbin' expects a file name");
320 else if (result->eops->next &&
321 result->eops->next->type != EOT_DB_NUMBER)
322 error (ERR_NONFATAL, "`incbin': second parameter is",
323 " non-numeric");
324 else if (result->eops->next && result->eops->next->next &&
325 result->eops->next->next->type != EOT_DB_NUMBER)
326 error (ERR_NONFATAL, "`incbin': third parameter is",
327 " non-numeric");
328 else if (result->eops->next && result->eops->next->next &&
329 result->eops->next->next->next)
330 error (ERR_NONFATAL, "`incbin': more than three parameters");
331 else
332 return result;
333 /*
334 * If we reach here, one of the above errors happened.
335 * Throw the instruction away.
336 */
337 result->opcode = -1;
338 return result;
339 }
340
341 return result;
342 }
343
344 /* right. Now we begin to parse the operands. There may be up to three
345 * of these, separated by commas, and terminated by a zero token. */
346
347 for (operand = 0; operand < 3; operand++) {
348 expr *value; /* used most of the time */
349 int mref; /* is this going to be a memory ref? */
350 int bracket; /* is it a [] mref, or a & mref? */
351
352 result->oprs[operand].addr_size = 0;/* have to zero this whatever */
353 result->oprs[operand].eaflags = 0; /* and this */
354 i = stdscan(NULL, &tokval);
355 if (i == 0) break; /* end of operands: get out of here */
356 result->oprs[operand].type = 0; /* so far, no override */
357 while (i == TOKEN_SPECIAL) {/* size specifiers */
358 switch ((int)tokval.t_integer) {
359 case S_BYTE:
360 result->oprs[operand].type |= BITS8;
361 break;
362 case S_WORD:
363 result->oprs[operand].type |= BITS16;
364 break;
365 case S_DWORD:
366 case S_LONG:
367 result->oprs[operand].type |= BITS32;
368 break;
369 case S_QWORD:
370 result->oprs[operand].type |= BITS64;
371 break;
372 case S_TWORD:
373 result->oprs[operand].type |= BITS80;
374 break;
375 case S_TO:
376 result->oprs[operand].type |= TO;
377 break;
378 case S_FAR:
379 result->oprs[operand].type |= FAR;
380 break;
381 case S_NEAR:
382 result->oprs[operand].type |= NEAR;
383 break;
384 case S_SHORT:
385 result->oprs[operand].type |= SHORT;
386 break;
387 }
388 i = stdscan(NULL, &tokval);
389 }
390
391 if (i == '[' || i == '&') { /* memory reference */
392 mref = TRUE;
393 bracket = (i == '[');
394 i = stdscan(NULL, &tokval);
395 if (i == TOKEN_SPECIAL) { /* check for address size override */
396 switch ((int)tokval.t_integer) {
397 case S_NOSPLIT:
398 result->oprs[operand].eaflags |= EAF_TIMESTWO;
399 break;
400 case S_BYTE:
401 result->oprs[operand].eaflags |= EAF_BYTEOFFS;
402 break;
403 case S_WORD:
404 result->oprs[operand].addr_size = 16;
405 result->oprs[operand].eaflags |= EAF_WORDOFFS;
406 break;
407 case S_DWORD:
408 case S_LONG:
409 result->oprs[operand].addr_size = 32;
410 result->oprs[operand].eaflags |= EAF_WORDOFFS;
411 break;
412 default:
413 error (ERR_NONFATAL, "invalid size specification in"
414 " effective address");
415 }
416 i = stdscan(NULL, &tokval);
417 }
418 } else { /* immediate operand, or register */
419 mref = FALSE;
420 bracket = FALSE; /* placate optimisers */
421 }
422
423 value = evaluate (stdscan, NULL, &tokval,
424 &result->forw_ref, critical, error, &hints);
425 i = tokval.t_type;
426 if (!value) { /* error in evaluator */
427 result->opcode = -1; /* unrecoverable parse error: */
428 return result; /* ignore this instruction */
429 }
430 if (i == ':' && mref) { /* it was seg:offset */
431 /*
432 * Process the segment override.
433 */
434 if (value[1].type!=0 || value->value!=1 ||
435 REG_SREG & ~reg_flags[value->type])
436 error (ERR_NONFATAL, "invalid segment override");
437 else if (result->nprefix == MAXPREFIX)
438 error (ERR_NONFATAL,
439 "instruction has more than %d prefixes",
440 MAXPREFIX);
441 else
442 result->prefixes[result->nprefix++] = value->type;
443
444 i = stdscan(NULL, &tokval); /* then skip the colon */
445 if (i == TOKEN_SPECIAL) { /* another check for size override */
446 switch ((int)tokval.t_integer) {
447 case S_WORD:
448 result->oprs[operand].addr_size = 16;
449 break;
450 case S_DWORD:
451 case S_LONG:
452 result->oprs[operand].addr_size = 32;
453 break;
454 default:
455 error (ERR_NONFATAL, "invalid size specification in"
456 " effective address");
457 }
458 i = stdscan(NULL, &tokval);
459 }
460 value = evaluate (stdscan, NULL, &tokval,
461 &result->forw_ref, critical, error, &hints);
462 i = tokval.t_type;
463 /* and get the offset */
464 if (!value) { /* but, error in evaluator */
465 result->opcode = -1; /* unrecoverable parse error: */
466 return result; /* ignore this instruction */
467 }
468 }
469 if (mref && bracket) { /* find ] at the end */
470 if (i != ']') {
471 error (ERR_NONFATAL, "parser: expecting ]");
472 do { /* error recovery again */
473 i = stdscan(NULL, &tokval);
474 } while (i != 0 && i != ',');
475 } else /* we got the required ] */
476 i = stdscan(NULL, &tokval);
477 } else { /* immediate operand */
478 if (i != 0 && i != ',' && i != ':') {
479 error (ERR_NONFATAL, "comma or end of line expected");
480 do { /* error recovery */
481 i = stdscan(NULL, &tokval);
482 } while (i != 0 && i != ',');
483 } else if (i == ':') {
484 result->oprs[operand].type |= COLON;
485 }
486 }
487
488 /* now convert the exprs returned from evaluate() into operand
489 * descriptions... */
490
491 if (mref) { /* it's a memory reference */
492 expr *e = value;
493 int b, i, s; /* basereg, indexreg, scale */
494 long o; /* offset */
495
496 b = i = -1, o = s = 0;
497 result->oprs[operand].hintbase = hints.base;
498 result->oprs[operand].hinttype = hints.type;
499
500 if (e->type <= EXPR_REG_END) { /* this bit's a register */
501 if (e->value == 1) /* in fact it can be basereg */
502 b = e->type;
503 else /* no, it has to be indexreg */
504 i = e->type, s = e->value;
505 e++;
506 }
507 if (e->type && e->type <= EXPR_REG_END) {/* it's a 2nd register */
508 if (e->value != 1) { /* it has to be indexreg */
509 if (i != -1) { /* but it can't be */
510 error(ERR_NONFATAL, "invalid effective address");
511 result->opcode = -1;
512 return result;
513 } else
514 i = e->type, s = e->value;
515 } else { /* it can be basereg */
516 if (b != -1) /* or can it? */
517 i = e->type, s = 1;
518 else
519 b = e->type;
520 }
521 e++;
522 }
523 if (e->type != 0) { /* is there an offset? */
524 if (e->type <= EXPR_REG_END) {/* in fact, is there an error? */
525 error (ERR_NONFATAL, "invalid effective address");
526 result->opcode = -1;
527 return result;
528 } else {
529 if (e->type == EXPR_UNKNOWN) {
530 o = 0; /* doesn't matter what */
531 result->oprs[operand].wrt = NO_SEG; /* nor this */
532 result->oprs[operand].segment = NO_SEG; /* or this */
533 while (e->type) e++; /* go to the end of the line */
534 } else {
535 if (e->type == EXPR_SIMPLE) {
536 o = e->value;
537 e++;
538 }
539 if (e->type == EXPR_WRT) {
540 result->oprs[operand].wrt = e->value;
541 e++;
542 } else
543 result->oprs[operand].wrt = NO_SEG;
544 /*
545 * Look for a segment base type.
546 */
547 if (e->type && e->type < EXPR_SEGBASE) {
548 error (ERR_NONFATAL, "invalid effective address");
549 result->opcode = -1;
550 return result;
551 }
552 while (e->type && e->value == 0)
553 e++;
554 if (e->type && e->value != 1) {
555 error (ERR_NONFATAL, "invalid effective address");
556 result->opcode = -1;
557 return result;
558 }
559 if (e->type) {
560 result->oprs[operand].segment =
561 e->type - EXPR_SEGBASE;
562 e++;
563 } else
564 result->oprs[operand].segment = NO_SEG;
565 while (e->type && e->value == 0)
566 e++;
567 if (e->type) {
568 error (ERR_NONFATAL, "invalid effective address");
569 result->opcode = -1;
570 return result;
571 }
572 }
573 }
574 } else {
575 o = 0;
576 result->oprs[operand].wrt = NO_SEG;
577 result->oprs[operand].segment = NO_SEG;
578 }
579
580 if (e->type != 0) { /* there'd better be nothing left! */
581 error (ERR_NONFATAL, "invalid effective address");
582 result->opcode = -1;
583 return result;
584 }
585
586 result->oprs[operand].type |= MEMORY;
587 if (b==-1 && (i==-1 || s==0))
588 result->oprs[operand].type |= MEM_OFFS;
589 result->oprs[operand].basereg = b;
590 result->oprs[operand].indexreg = i;
591 result->oprs[operand].scale = s;
592 result->oprs[operand].offset = o;
593 } else { /* it's not a memory reference */
594 if (is_just_unknown(value)) { /* it's immediate but unknown */
595 result->oprs[operand].type |= IMMEDIATE;
596 result->oprs[operand].offset = 0; /* don't care */
597 result->oprs[operand].segment = NO_SEG; /* don't care again */
598 result->oprs[operand].wrt = NO_SEG;/* still don't care */
599 } else if (is_reloc(value)) { /* it's immediate */
600 result->oprs[operand].type |= IMMEDIATE;
601 result->oprs[operand].offset = reloc_value(value);
602 result->oprs[operand].segment = reloc_seg(value);
603 result->oprs[operand].wrt = reloc_wrt(value);
604 if (is_simple(value) && reloc_value(value)==1)
605 result->oprs[operand].type |= UNITY;
606 } else { /* it's a register */
607 if (value->type>=EXPR_SIMPLE || value->value!=1) {
608 error (ERR_NONFATAL, "invalid operand type");
609 result->opcode = -1;
610 return result;
611 }
612 /* clear overrides, except TO which applies to FPU regs */
613 result->oprs[operand].type &= TO;
614 result->oprs[operand].type |= REGISTER;
615 result->oprs[operand].type |= reg_flags[value->type];
616 result->oprs[operand].basereg = value->type;
617 }
618 }
619 }
620
621 result->operands = operand; /* set operand count */
622
623 while (operand<3) /* clear remaining operands */
624 result->oprs[operand++].type = 0;
625
626 /*
627 * Transform RESW, RESD, RESQ, REST into RESB.
628 */
629 switch (result->opcode) {
630 case I_RESW: result->opcode=I_RESB; result->oprs[0].offset*=2; break;
631 case I_RESD: result->opcode=I_RESB; result->oprs[0].offset*=4; break;
632 case I_RESQ: result->opcode=I_RESB; result->oprs[0].offset*=8; break;
633 case I_REST: result->opcode=I_RESB; result->oprs[0].offset*=10; break;
634 }
635
636 return result;
637 }
638
639 static int is_comma_next (void) {
640 char *p;
641 int i;
642 struct tokenval tv;
643
644 p = stdscan_bufptr;
645 i = stdscan (NULL, &tv);
646 stdscan_bufptr = p;
647 return (i == ',' || i == ';' || !i);
648 }
649
650 void cleanup_insn (insn *i) {
651 extop *e;
652
653 while (i->eops) {
654 e = i->eops;
655 i->eops = i->eops->next;
656 nasm_free (e);
657 }
658 }