]>
git.saurik.com Git - apple/boot.git/blob - i386/nasm/parser.c
7105f8f744f739382868297efd5a5a7dc35fb24c
2 * Copyright (c) 1999 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
23 * @APPLE_LICENSE_HEADER_END@
25 /* parser.c source line parser for the Netwide Assembler
27 * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
28 * Julian Hall. All rights reserved. The software is
29 * redistributable under the licence given in the file "Licence"
30 * distributed in the NASM archive.
32 * initial version 27/iii/95 by Simon Tatham
46 static long reg_flags
[] = { /* sizes and special flags */
47 0, REG8
, REG_AL
, REG_AX
, REG8
, REG8
, REG16
, REG16
, REG8
, REG_CL
,
48 REG_CREG
, REG_CREG
, REG_CREG
, REG_CR4
, REG_CS
, REG_CX
, REG8
,
49 REG16
, REG8
, REG_DREG
, REG_DREG
, REG_DREG
, REG_DREG
, REG_DREG
,
50 REG_DREG
, REG_DESS
, REG_DX
, REG_EAX
, REG32
, REG32
, REG_ECX
,
51 REG32
, REG32
, REG_DESS
, REG32
, REG32
, REG_FSGS
, REG_FSGS
,
52 MMXREG
, MMXREG
, MMXREG
, MMXREG
, MMXREG
, MMXREG
, MMXREG
, MMXREG
,
53 REG16
, REG16
, REG_DESS
, FPU0
, FPUREG
, FPUREG
, FPUREG
, FPUREG
,
54 FPUREG
, FPUREG
, FPUREG
, REG_TREG
, REG_TREG
, REG_TREG
, REG_TREG
,
58 enum { /* special tokens */
59 S_BYTE
, S_DWORD
, S_FAR
, S_LONG
, S_NEAR
, S_NOSPLIT
, S_QWORD
,
60 S_SHORT
, S_TO
, S_TWORD
, S_WORD
63 static int is_comma_next (void);
66 static struct tokenval tokval
;
69 insn
*parse_line (int pass
, char *buffer
, insn
*result
,
70 efunc errfunc
, evalfunc evaluate
, evalinfofunc einfo
) {
73 struct eval_hints hints
;
75 result
->forw_ref
= FALSE
;
80 stdscan_bufptr
= buffer
;
81 i
= stdscan(NULL
, &tokval
);
83 result
->eops
= NULL
; /* must do this, whatever happens */
84 result
->operands
= 0; /* must initialise this */
86 if (i
==0) { /* blank line - ignore */
87 result
->label
= NULL
; /* so, no label on it */
88 result
->opcode
= -1; /* and no instruction either */
91 if (i
!= TOKEN_ID
&& i
!= TOKEN_INSN
&& i
!= TOKEN_PREFIX
&&
92 (i
!=TOKEN_REG
|| (REG_SREG
& ~reg_flags
[tokval
.t_integer
]))) {
93 error (ERR_NONFATAL
, "label or instruction expected"
100 if (i
== TOKEN_ID
) { /* there's a label here */
101 result
->label
= tokval
.t_charptr
;
102 einfo (result
->label
, 0L, 0L);
103 i
= stdscan(NULL
, &tokval
);
104 if (i
== ':') { /* skip over the optional colon */
105 i
= stdscan(NULL
, &tokval
);
106 } else if (i
== 0 && pass
== 1) {
107 error (ERR_WARNING
|ERR_WARN_OL
,
108 "label alone on a line without a colon might be in error");
110 } else /* no label; so, moving swiftly on */
111 result
->label
= NULL
;
114 result
->opcode
= -1; /* this line contains just a label */
121 while (i
== TOKEN_PREFIX
||
122 (i
==TOKEN_REG
&& !(REG_SREG
& ~reg_flags
[tokval
.t_integer
]))) {
124 * Handle special case: the TIMES prefix.
126 if (i
== TOKEN_PREFIX
&& tokval
.t_integer
== P_TIMES
) {
129 i
= stdscan(NULL
, &tokval
);
130 value
= evaluate (stdscan
, NULL
, &tokval
, NULL
, pass
, error
, NULL
);
132 if (!value
) { /* but, error in evaluator */
133 result
->opcode
= -1; /* unrecoverable parse error: */
134 return result
; /* ignore this instruction */
136 if (!is_simple (value
)) {
138 "non-constant argument supplied to TIMES");
141 result
->times
= value
->value
;
142 if (value
->value
< 0)
143 error(ERR_NONFATAL
, "TIMES value %d is negative",
147 if (result
->nprefix
== MAXPREFIX
)
149 "instruction has more than %d prefixes", MAXPREFIX
);
151 result
->prefixes
[result
->nprefix
++] = tokval
.t_integer
;
152 i
= stdscan(NULL
, &tokval
);
156 if (i
!= TOKEN_INSN
) {
157 if (result
->nprefix
> 0 && i
== 0) {
159 * Instruction prefixes are present, but no actual
160 * instruction. This is allowed: at this point we
161 * invent a notional instruction of RESB 0.
163 result
->opcode
= I_RESB
;
164 result
->operands
= 1;
165 result
->oprs
[0].type
= IMMEDIATE
;
166 result
->oprs
[0].offset
= 0L;
167 result
->oprs
[0].segment
= result
->oprs
[0].wrt
= NO_SEG
;
170 error (ERR_NONFATAL
, "parser: instruction expected");
176 result
->opcode
= tokval
.t_integer
;
177 result
->condition
= tokval
.t_inttwo
;
180 * RESB, RESW and RESD cannot be satisfied with incorrectly
181 * evaluated operands, since the correct values _must_ be known
182 * on the first pass. Hence, even in pass one, we set the
183 * `critical' flag on calling evaluate(), so that it will bomb
184 * out on undefined symbols. Nasty, but there's nothing we can
187 * For the moment, EQU has the same difficulty, so we'll
190 if (result
->opcode
== I_RESB
||
191 result
->opcode
== I_RESW
||
192 result
->opcode
== I_RESD
||
193 result
->opcode
== I_RESQ
||
194 result
->opcode
== I_REST
||
195 result
->opcode
== I_EQU
)
198 critical
= (pass
==2 ? 2 : 0);
200 if (result
->opcode
== I_DB
||
201 result
->opcode
== I_DW
||
202 result
->opcode
== I_DD
||
203 result
->opcode
== I_DQ
||
204 result
->opcode
== I_DT
||
205 result
->opcode
== I_INCBIN
) {
206 extop
*eop
, **tail
= &result
->eops
, **fixptr
;
210 * Begin to read the DB/DW/DD/DQ/DT operands.
213 i
= stdscan(NULL
, &tokval
);
217 eop
= *tail
= nasm_malloc(sizeof(extop
));
220 eop
->type
= EOT_NOTHING
;
223 if (i
== TOKEN_NUM
&& tokval
.t_charptr
&& is_comma_next()) {
224 eop
->type
= EOT_DB_STRING
;
225 eop
->stringval
= tokval
.t_charptr
;
226 eop
->stringlen
= tokval
.t_inttwo
;
227 i
= stdscan(NULL
, &tokval
); /* eat the comma */
231 if (i
== TOKEN_FLOAT
|| i
== '-') {
235 char *save
= stdscan_bufptr
;
236 i
= stdscan(NULL
, &tokval
);
238 if (i
!= TOKEN_FLOAT
) {
239 stdscan_bufptr
= save
;
240 i
= tokval
.t_type
= '-';
244 if (i
== TOKEN_FLOAT
) {
245 eop
->type
= EOT_DB_STRING
;
246 if (result
->opcode
== I_DD
)
248 else if (result
->opcode
== I_DQ
)
250 else if (result
->opcode
== I_DT
)
253 error(ERR_NONFATAL
, "floating-point constant"
254 " encountered in `D%c' instruction",
255 result
->opcode
== I_DW
? 'W' : 'B');
256 eop
->type
= EOT_NOTHING
;
258 eop
= nasm_realloc(eop
, sizeof(extop
)+eop
->stringlen
);
261 eop
->stringval
= (char *)eop
+ sizeof(extop
);
262 if (!float_const (tokval
.t_charptr
, sign
,
263 (unsigned char *)eop
->stringval
,
264 eop
->stringlen
, error
))
265 eop
->type
= EOT_NOTHING
;
266 i
= stdscan(NULL
, &tokval
); /* eat the comma */
271 /* anything else */ {
273 value
= evaluate (stdscan
, NULL
, &tokval
, NULL
,
274 critical
, error
, NULL
);
276 if (!value
) { /* error in evaluator */
277 result
->opcode
= -1;/* unrecoverable parse error: */
278 return result
; /* ignore this instruction */
280 if (is_unknown(value
)) {
281 eop
->type
= EOT_DB_NUMBER
;
282 eop
->offset
= 0; /* doesn't matter what we put */
283 eop
->segment
= eop
->wrt
= NO_SEG
; /* likewise */
284 } else if (is_reloc(value
)) {
285 eop
->type
= EOT_DB_NUMBER
;
286 eop
->offset
= reloc_value(value
);
287 eop
->segment
= reloc_seg(value
);
288 eop
->wrt
= reloc_wrt(value
);
291 "operand %d: expression is not simple"
292 " or relocatable", oper_num
);
297 * We're about to call stdscan(), which will eat the
298 * comma that we're currently sitting on between
299 * arguments. However, we'd better check first that it
302 if (i
== 0) /* also could be EOL */
305 error (ERR_NONFATAL
, "comma expected after operand %d",
307 result
->opcode
= -1;/* unrecoverable parse error: */
308 return result
; /* ignore this instruction */
312 if (result
->opcode
== I_INCBIN
) {
314 * Correct syntax for INCBIN is that there should be
315 * one string operand, followed by one or two numeric
318 if (!result
->eops
|| result
->eops
->type
!= EOT_DB_STRING
)
319 error (ERR_NONFATAL
, "`incbin' expects a file name");
320 else if (result
->eops
->next
&&
321 result
->eops
->next
->type
!= EOT_DB_NUMBER
)
322 error (ERR_NONFATAL
, "`incbin': second parameter is",
324 else if (result
->eops
->next
&& result
->eops
->next
->next
&&
325 result
->eops
->next
->next
->type
!= EOT_DB_NUMBER
)
326 error (ERR_NONFATAL
, "`incbin': third parameter is",
328 else if (result
->eops
->next
&& result
->eops
->next
->next
&&
329 result
->eops
->next
->next
->next
)
330 error (ERR_NONFATAL
, "`incbin': more than three parameters");
334 * If we reach here, one of the above errors happened.
335 * Throw the instruction away.
344 /* right. Now we begin to parse the operands. There may be up to three
345 * of these, separated by commas, and terminated by a zero token. */
347 for (operand
= 0; operand
< 3; operand
++) {
348 expr
*value
; /* used most of the time */
349 int mref
; /* is this going to be a memory ref? */
350 int bracket
; /* is it a [] mref, or a & mref? */
352 result
->oprs
[operand
].addr_size
= 0;/* have to zero this whatever */
353 result
->oprs
[operand
].eaflags
= 0; /* and this */
354 i
= stdscan(NULL
, &tokval
);
355 if (i
== 0) break; /* end of operands: get out of here */
356 result
->oprs
[operand
].type
= 0; /* so far, no override */
357 while (i
== TOKEN_SPECIAL
) {/* size specifiers */
358 switch ((int)tokval
.t_integer
) {
360 result
->oprs
[operand
].type
|= BITS8
;
363 result
->oprs
[operand
].type
|= BITS16
;
367 result
->oprs
[operand
].type
|= BITS32
;
370 result
->oprs
[operand
].type
|= BITS64
;
373 result
->oprs
[operand
].type
|= BITS80
;
376 result
->oprs
[operand
].type
|= TO
;
379 result
->oprs
[operand
].type
|= FAR
;
382 result
->oprs
[operand
].type
|= NEAR
;
385 result
->oprs
[operand
].type
|= SHORT
;
388 i
= stdscan(NULL
, &tokval
);
391 if (i
== '[' || i
== '&') { /* memory reference */
393 bracket
= (i
== '[');
394 i
= stdscan(NULL
, &tokval
);
395 if (i
== TOKEN_SPECIAL
) { /* check for address size override */
396 switch ((int)tokval
.t_integer
) {
398 result
->oprs
[operand
].eaflags
|= EAF_TIMESTWO
;
401 result
->oprs
[operand
].eaflags
|= EAF_BYTEOFFS
;
404 result
->oprs
[operand
].addr_size
= 16;
405 result
->oprs
[operand
].eaflags
|= EAF_WORDOFFS
;
409 result
->oprs
[operand
].addr_size
= 32;
410 result
->oprs
[operand
].eaflags
|= EAF_WORDOFFS
;
413 error (ERR_NONFATAL
, "invalid size specification in"
414 " effective address");
416 i
= stdscan(NULL
, &tokval
);
418 } else { /* immediate operand, or register */
420 bracket
= FALSE
; /* placate optimisers */
423 value
= evaluate (stdscan
, NULL
, &tokval
,
424 &result
->forw_ref
, critical
, error
, &hints
);
426 if (!value
) { /* error in evaluator */
427 result
->opcode
= -1; /* unrecoverable parse error: */
428 return result
; /* ignore this instruction */
430 if (i
== ':' && mref
) { /* it was seg:offset */
432 * Process the segment override.
434 if (value
[1].type
!=0 || value
->value
!=1 ||
435 REG_SREG
& ~reg_flags
[value
->type
])
436 error (ERR_NONFATAL
, "invalid segment override");
437 else if (result
->nprefix
== MAXPREFIX
)
439 "instruction has more than %d prefixes",
442 result
->prefixes
[result
->nprefix
++] = value
->type
;
444 i
= stdscan(NULL
, &tokval
); /* then skip the colon */
445 if (i
== TOKEN_SPECIAL
) { /* another check for size override */
446 switch ((int)tokval
.t_integer
) {
448 result
->oprs
[operand
].addr_size
= 16;
452 result
->oprs
[operand
].addr_size
= 32;
455 error (ERR_NONFATAL
, "invalid size specification in"
456 " effective address");
458 i
= stdscan(NULL
, &tokval
);
460 value
= evaluate (stdscan
, NULL
, &tokval
,
461 &result
->forw_ref
, critical
, error
, &hints
);
463 /* and get the offset */
464 if (!value
) { /* but, error in evaluator */
465 result
->opcode
= -1; /* unrecoverable parse error: */
466 return result
; /* ignore this instruction */
469 if (mref
&& bracket
) { /* find ] at the end */
471 error (ERR_NONFATAL
, "parser: expecting ]");
472 do { /* error recovery again */
473 i
= stdscan(NULL
, &tokval
);
474 } while (i
!= 0 && i
!= ',');
475 } else /* we got the required ] */
476 i
= stdscan(NULL
, &tokval
);
477 } else { /* immediate operand */
478 if (i
!= 0 && i
!= ',' && i
!= ':') {
479 error (ERR_NONFATAL
, "comma or end of line expected");
480 do { /* error recovery */
481 i
= stdscan(NULL
, &tokval
);
482 } while (i
!= 0 && i
!= ',');
483 } else if (i
== ':') {
484 result
->oprs
[operand
].type
|= COLON
;
488 /* now convert the exprs returned from evaluate() into operand
491 if (mref
) { /* it's a memory reference */
493 int b
, i
, s
; /* basereg, indexreg, scale */
496 b
= i
= -1, o
= s
= 0;
497 result
->oprs
[operand
].hintbase
= hints
.base
;
498 result
->oprs
[operand
].hinttype
= hints
.type
;
500 if (e
->type
<= EXPR_REG_END
) { /* this bit's a register */
501 if (e
->value
== 1) /* in fact it can be basereg */
503 else /* no, it has to be indexreg */
504 i
= e
->type
, s
= e
->value
;
507 if (e
->type
&& e
->type
<= EXPR_REG_END
) {/* it's a 2nd register */
508 if (e
->value
!= 1) { /* it has to be indexreg */
509 if (i
!= -1) { /* but it can't be */
510 error(ERR_NONFATAL
, "invalid effective address");
514 i
= e
->type
, s
= e
->value
;
515 } else { /* it can be basereg */
516 if (b
!= -1) /* or can it? */
523 if (e
->type
!= 0) { /* is there an offset? */
524 if (e
->type
<= EXPR_REG_END
) {/* in fact, is there an error? */
525 error (ERR_NONFATAL
, "invalid effective address");
529 if (e
->type
== EXPR_UNKNOWN
) {
530 o
= 0; /* doesn't matter what */
531 result
->oprs
[operand
].wrt
= NO_SEG
; /* nor this */
532 result
->oprs
[operand
].segment
= NO_SEG
; /* or this */
533 while (e
->type
) e
++; /* go to the end of the line */
535 if (e
->type
== EXPR_SIMPLE
) {
539 if (e
->type
== EXPR_WRT
) {
540 result
->oprs
[operand
].wrt
= e
->value
;
543 result
->oprs
[operand
].wrt
= NO_SEG
;
545 * Look for a segment base type.
547 if (e
->type
&& e
->type
< EXPR_SEGBASE
) {
548 error (ERR_NONFATAL
, "invalid effective address");
552 while (e
->type
&& e
->value
== 0)
554 if (e
->type
&& e
->value
!= 1) {
555 error (ERR_NONFATAL
, "invalid effective address");
560 result
->oprs
[operand
].segment
=
561 e
->type
- EXPR_SEGBASE
;
564 result
->oprs
[operand
].segment
= NO_SEG
;
565 while (e
->type
&& e
->value
== 0)
568 error (ERR_NONFATAL
, "invalid effective address");
576 result
->oprs
[operand
].wrt
= NO_SEG
;
577 result
->oprs
[operand
].segment
= NO_SEG
;
580 if (e
->type
!= 0) { /* there'd better be nothing left! */
581 error (ERR_NONFATAL
, "invalid effective address");
586 result
->oprs
[operand
].type
|= MEMORY
;
587 if (b
==-1 && (i
==-1 || s
==0))
588 result
->oprs
[operand
].type
|= MEM_OFFS
;
589 result
->oprs
[operand
].basereg
= b
;
590 result
->oprs
[operand
].indexreg
= i
;
591 result
->oprs
[operand
].scale
= s
;
592 result
->oprs
[operand
].offset
= o
;
593 } else { /* it's not a memory reference */
594 if (is_just_unknown(value
)) { /* it's immediate but unknown */
595 result
->oprs
[operand
].type
|= IMMEDIATE
;
596 result
->oprs
[operand
].offset
= 0; /* don't care */
597 result
->oprs
[operand
].segment
= NO_SEG
; /* don't care again */
598 result
->oprs
[operand
].wrt
= NO_SEG
;/* still don't care */
599 } else if (is_reloc(value
)) { /* it's immediate */
600 result
->oprs
[operand
].type
|= IMMEDIATE
;
601 result
->oprs
[operand
].offset
= reloc_value(value
);
602 result
->oprs
[operand
].segment
= reloc_seg(value
);
603 result
->oprs
[operand
].wrt
= reloc_wrt(value
);
604 if (is_simple(value
) && reloc_value(value
)==1)
605 result
->oprs
[operand
].type
|= UNITY
;
606 } else { /* it's a register */
607 if (value
->type
>=EXPR_SIMPLE
|| value
->value
!=1) {
608 error (ERR_NONFATAL
, "invalid operand type");
612 /* clear overrides, except TO which applies to FPU regs */
613 result
->oprs
[operand
].type
&= TO
;
614 result
->oprs
[operand
].type
|= REGISTER
;
615 result
->oprs
[operand
].type
|= reg_flags
[value
->type
];
616 result
->oprs
[operand
].basereg
= value
->type
;
621 result
->operands
= operand
; /* set operand count */
623 while (operand
<3) /* clear remaining operands */
624 result
->oprs
[operand
++].type
= 0;
627 * Transform RESW, RESD, RESQ, REST into RESB.
629 switch (result
->opcode
) {
630 case I_RESW
: result
->opcode
=I_RESB
; result
->oprs
[0].offset
*=2; break;
631 case I_RESD
: result
->opcode
=I_RESB
; result
->oprs
[0].offset
*=4; break;
632 case I_RESQ
: result
->opcode
=I_RESB
; result
->oprs
[0].offset
*=8; break;
633 case I_REST
: result
->opcode
=I_RESB
; result
->oprs
[0].offset
*=10; break;
639 static int is_comma_next (void) {
645 i
= stdscan (NULL
, &tv
);
647 return (i
== ',' || i
== ';' || !i
);
650 void cleanup_insn (insn
*i
) {
655 i
->eops
= i
->eops
->next
;