[apple/boot.git] / i386 / nasm / parser.c

/*
 * Copyright (c) 1999 Apple Computer, Inc. All rights reserved.
 *
 * @APPLE_LICENSE_HEADER_START@
 * 
 * Portions Copyright (c) 1999 Apple Computer, Inc.  All Rights
 * Reserved.  This file contains Original Code and/or Modifications of
 * Original Code as defined in and that are subject to the Apple Public
 * Source License Version 1.1 (the "License").  You may not use this file
 * except in compliance with the License.  Please obtain a copy of the
 * License at http://www.apple.com/publicsource and read it before using
 * this file.
 * 
 * The Original Code and all software distributed under the License are
 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE OR NON- INFRINGEMENT.  Please see the
 * License for the specific language governing rights and limitations
 * under the License.
 * 
 * @APPLE_LICENSE_HEADER_END@
 */
/* parser.c   source line parser for the Netwide Assembler
 *
 * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
 * Julian Hall. All rights reserved. The software is
 * redistributable under the licence given in the file "Licence"
 * distributed in the NASM archive.
 *
 * initial version 27/iii/95 by Simon Tatham
 */

#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <string.h>
#include <ctype.h>

#include "nasm.h"
#include "nasmlib.h"
#include "parser.h"
#include "float.h"

static long reg_flags[] = {	       /* sizes and special flags */
    0, REG8, REG_AL, REG_AX, REG8, REG8, REG16, REG16, REG8, REG_CL,
    REG_CREG, REG_CREG, REG_CREG, REG_CR4, REG_CS, REG_CX, REG8,
    REG16, REG8, REG_DREG, REG_DREG, REG_DREG, REG_DREG, REG_DREG,
    REG_DREG, REG_DESS, REG_DX, REG_EAX, REG32, REG32, REG_ECX,
    REG32, REG32, REG_DESS, REG32, REG32, REG_FSGS, REG_FSGS,
    MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, MMXREG,
    REG16, REG16, REG_DESS, FPU0, FPUREG, FPUREG, FPUREG, FPUREG,
    FPUREG, FPUREG, FPUREG, REG_TREG, REG_TREG, REG_TREG, REG_TREG,
    REG_TREG
};

enum {				       /* special tokens */
    S_BYTE, S_DWORD, S_FAR, S_LONG, S_NEAR, S_NOSPLIT, S_QWORD,
    S_SHORT, S_TO, S_TWORD, S_WORD
};

static int is_comma_next (void);

static int i;
static struct tokenval tokval;
static efunc error;

insn *parse_line (int pass, char *buffer, insn *result,
		  efunc errfunc, evalfunc evaluate, evalinfofunc einfo) {
    int operand;
    int critical;
    struct eval_hints hints;

    result->forw_ref = FALSE;
    error = errfunc;
    einfo ("", 0L, 0L);

    stdscan_reset();
    stdscan_bufptr = buffer;
    i = stdscan(NULL, &tokval);

    result->eops = NULL;	       /* must do this, whatever happens */
    result->operands = 0;	       /* must initialise this */

    if (i==0) {			       /* blank line - ignore */
	result->label = NULL;	       /* so, no label on it */
	result->opcode = -1;	       /* and no instruction either */
	return result;
    }
    if (i != TOKEN_ID && i != TOKEN_INSN && i != TOKEN_PREFIX &&
	(i!=TOKEN_REG || (REG_SREG & ~reg_flags[tokval.t_integer]))) {
	error (ERR_NONFATAL, "label or instruction expected"
	       " at start of line");
	result->label = NULL;
	result->opcode = -1;
	return result;
    }

    if (i == TOKEN_ID) {	       /* there's a label here */
	result->label = tokval.t_charptr;
	einfo (result->label, 0L, 0L);
	i = stdscan(NULL, &tokval);
	if (i == ':') {		       /* skip over the optional colon */
	    i = stdscan(NULL, &tokval);
	} else if (i == 0 && pass == 1) {
	    error (ERR_WARNING|ERR_WARN_OL,
		   "label alone on a line without a colon might be in error");
	}
    } else			       /* no label; so, moving swiftly on */
	result->label = NULL;

    if (i==0) {
	result->opcode = -1;	       /* this line contains just a label */
	return result;
    }

    result->nprefix = 0;
    result->times = 1L;

    while (i == TOKEN_PREFIX ||
	   (i==TOKEN_REG && !(REG_SREG & ~reg_flags[tokval.t_integer]))) {
	/*
	 * Handle special case: the TIMES prefix.
	 */
	if (i == TOKEN_PREFIX && tokval.t_integer == P_TIMES) {
	    expr *value;

	    i = stdscan(NULL, &tokval);
	    value = evaluate (stdscan, NULL, &tokval, NULL, pass, error, NULL);
	    i = tokval.t_type;
	    if (!value) {	       /* but, error in evaluator */
		result->opcode = -1;   /* unrecoverable parse error: */
		return result;	       /* ignore this instruction */
	    }
	    if (!is_simple (value)) {
		error (ERR_NONFATAL,
		       "non-constant argument supplied to TIMES");
		result->times = 1L;
	    } else {
		result->times = value->value;
		if (value->value < 0)
		    error(ERR_NONFATAL, "TIMES value %d is negative",
			  value->value);
	    }
	} else {
	    if (result->nprefix == MAXPREFIX)
		error (ERR_NONFATAL,
		       "instruction has more than %d prefixes", MAXPREFIX);
	    else
		result->prefixes[result->nprefix++] = tokval.t_integer;
	    i = stdscan(NULL, &tokval);
	}
    }

    if (i != TOKEN_INSN) {
	if (result->nprefix > 0 && i == 0) {
	    /*
	     * Instruction prefixes are present, but no actual
	     * instruction. This is allowed: at this point we
	     * invent a notional instruction of RESB 0.
	     */
	    result->opcode = I_RESB;
	    result->operands = 1;
	    result->oprs[0].type = IMMEDIATE;
	    result->oprs[0].offset = 0L;
	    result->oprs[0].segment = result->oprs[0].wrt = NO_SEG;
	    return result;
	} else {
	    error (ERR_NONFATAL, "parser: instruction expected");
	    result->opcode = -1;
	    return result;
	}
    }

    result->opcode = tokval.t_integer;
    result->condition = tokval.t_inttwo;

    /*
     * RESB, RESW and RESD cannot be satisfied with incorrectly
     * evaluated operands, since the correct values _must_ be known
     * on the first pass. Hence, even in pass one, we set the
     * `critical' flag on calling evaluate(), so that it will bomb
     * out on undefined symbols. Nasty, but there's nothing we can
     * do about it.
     *
     * For the moment, EQU has the same difficulty, so we'll
     * include that.
     */
    if (result->opcode == I_RESB ||
	result->opcode == I_RESW ||
	result->opcode == I_RESD ||
	result->opcode == I_RESQ ||
	result->opcode == I_REST ||
	result->opcode == I_EQU)
	critical = pass;
    else
	critical = (pass==2 ? 2 : 0);

    if (result->opcode == I_DB ||
	result->opcode == I_DW ||
	result->opcode == I_DD ||
	result->opcode == I_DQ ||
	result->opcode == I_DT ||
	result->opcode == I_INCBIN) {
	extop *eop, **tail = &result->eops, **fixptr;
	int oper_num = 0;

	/*
	 * Begin to read the DB/DW/DD/DQ/DT operands.
	 */
	while (1) {
	    i = stdscan(NULL, &tokval);
	    if (i == 0)
		break;
	    fixptr = tail;
	    eop = *tail = nasm_malloc(sizeof(extop));
	    tail = &eop->next;
	    eop->next = NULL;
	    eop->type = EOT_NOTHING;
	    oper_num++;

	    if (i == TOKEN_NUM && tokval.t_charptr && is_comma_next()) {
		eop->type = EOT_DB_STRING;
		eop->stringval = tokval.t_charptr;
		eop->stringlen = tokval.t_inttwo;
		i = stdscan(NULL, &tokval);       /* eat the comma */
		continue;
	    }

	    if (i == TOKEN_FLOAT || i == '-') {
		long sign = +1L;

		if (i == '-') {
		    char *save = stdscan_bufptr;
		    i = stdscan(NULL, &tokval);
		    sign = -1L;
		    if (i != TOKEN_FLOAT) {
			stdscan_bufptr = save;
			i = tokval.t_type = '-';
		    }
		}

		if (i == TOKEN_FLOAT) {
		    eop->type = EOT_DB_STRING;
		    if (result->opcode == I_DD)
			eop->stringlen = 4;
		    else if (result->opcode == I_DQ)
			eop->stringlen = 8;
		    else if (result->opcode == I_DT)
		    eop->stringlen = 10;
		    else {
			error(ERR_NONFATAL, "floating-point constant"
			      " encountered in `D%c' instruction",
			      result->opcode == I_DW ? 'W' : 'B');
			eop->type = EOT_NOTHING;
		    }
		    eop = nasm_realloc(eop, sizeof(extop)+eop->stringlen);
		    tail = &eop->next;
		    *fixptr = eop;
		    eop->stringval = (char *)eop + sizeof(extop);
		    if (!float_const (tokval.t_charptr, sign,
				      (unsigned char *)eop->stringval,
				      eop->stringlen, error))
			eop->type = EOT_NOTHING;
		    i = stdscan(NULL, &tokval);       /* eat the comma */
		    continue;
		}
	    }

	    /* anything else */ {
		expr *value;
		value = evaluate (stdscan, NULL, &tokval, NULL,
				  critical, error, NULL);
		i = tokval.t_type;
		if (!value) {	       /* error in evaluator */
		    result->opcode = -1;/* unrecoverable parse error: */
		    return result;     /* ignore this instruction */
		}
		if (is_unknown(value)) {
		    eop->type = EOT_DB_NUMBER;
		    eop->offset = 0;   /* doesn't matter what we put */
		    eop->segment = eop->wrt = NO_SEG;   /* likewise */
		} else if (is_reloc(value)) {
		    eop->type = EOT_DB_NUMBER;
		    eop->offset = reloc_value(value);
		    eop->segment = reloc_seg(value);
		    eop->wrt = reloc_wrt(value);
		} else {
		    error (ERR_NONFATAL,
			   "operand %d: expression is not simple"
			   " or relocatable", oper_num);
		}
	    }

	    /*
	     * We're about to call stdscan(), which will eat the
	     * comma that we're currently sitting on between
	     * arguments. However, we'd better check first that it
	     * _is_ a comma.
	     */
	    if (i == 0)		       /* also could be EOL */
		break;
	    if (i != ',') {
		error (ERR_NONFATAL, "comma expected after operand %d",
		       oper_num);
		result->opcode = -1;/* unrecoverable parse error: */
		return result;     /* ignore this instruction */
	    }
	}

	if (result->opcode == I_INCBIN) {
	    /*
	     * Correct syntax for INCBIN is that there should be
	     * one string operand, followed by one or two numeric
	     * operands.
	     */
	    if (!result->eops || result->eops->type != EOT_DB_STRING)
		error (ERR_NONFATAL, "`incbin' expects a file name");
	    else if (result->eops->next &&
		     result->eops->next->type != EOT_DB_NUMBER)
		error (ERR_NONFATAL, "`incbin': second parameter is",
		       " non-numeric");
	    else if (result->eops->next && result->eops->next->next &&
		     result->eops->next->next->type != EOT_DB_NUMBER)
		error (ERR_NONFATAL, "`incbin': third parameter is",
		       " non-numeric");
	    else if (result->eops->next && result->eops->next->next &&
		     result->eops->next->next->next)
		error (ERR_NONFATAL, "`incbin': more than three parameters");
	    else
		return result;
	    /*
	     * If we reach here, one of the above errors happened.
	     * Throw the instruction away.
	     */
	    result->opcode = -1;
	    return result;
	}

	return result;
    }

    /* right. Now we begin to parse the operands. There may be up to three
     * of these, separated by commas, and terminated by a zero token. */

    for (operand = 0; operand < 3; operand++) {
	expr *value;		       /* used most of the time */
	int mref;		       /* is this going to be a memory ref? */
	int bracket;		       /* is it a [] mref, or a & mref? */

	result->oprs[operand].addr_size = 0;/* have to zero this whatever */
	result->oprs[operand].eaflags = 0;   /* and this */
	i = stdscan(NULL, &tokval);
	if (i == 0) break;	       /* end of operands: get out of here */
	result->oprs[operand].type = 0;   /* so far, no override */
	while (i == TOKEN_SPECIAL)	{/* size specifiers */
	    switch ((int)tokval.t_integer) {
	      case S_BYTE:
		result->oprs[operand].type |= BITS8;
		break;
	      case S_WORD:
		result->oprs[operand].type |= BITS16;
		break;
	      case S_DWORD:
	      case S_LONG:
		result->oprs[operand].type |= BITS32;
		break;
	      case S_QWORD:
		result->oprs[operand].type |= BITS64;
		break;
	      case S_TWORD:
		result->oprs[operand].type |= BITS80;
		break;
	      case S_TO:
		result->oprs[operand].type |= TO;
		break;
	      case S_FAR:
		result->oprs[operand].type |= FAR;
		break;
	      case S_NEAR:
		result->oprs[operand].type |= NEAR;
		break;
	      case S_SHORT:
		result->oprs[operand].type |= SHORT;
		break;
	    }
	    i = stdscan(NULL, &tokval);
	}

	if (i == '[' || i == '&') {    /* memory reference */
	    mref = TRUE;
	    bracket = (i == '[');
	    i = stdscan(NULL, &tokval);	    
	    if (i == TOKEN_SPECIAL) {  /* check for address size override */
		switch ((int)tokval.t_integer) {
		  case S_NOSPLIT:
		    result->oprs[operand].eaflags |= EAF_TIMESTWO;
		    break;
		  case S_BYTE:
		    result->oprs[operand].eaflags |= EAF_BYTEOFFS;
		    break;
		  case S_WORD:
		    result->oprs[operand].addr_size = 16;
		    result->oprs[operand].eaflags |= EAF_WORDOFFS;
		    break;
		  case S_DWORD:
		  case S_LONG:
		    result->oprs[operand].addr_size = 32;
		    result->oprs[operand].eaflags |= EAF_WORDOFFS;
		    break;
		  default:
		    error (ERR_NONFATAL, "invalid size specification in"
			   " effective address");
		}
		i = stdscan(NULL, &tokval);
	    }
	} else {		       /* immediate operand, or register */
	    mref = FALSE;
	    bracket = FALSE;	       /* placate optimisers */
	}

	value = evaluate (stdscan, NULL, &tokval,
			  &result->forw_ref, critical, error, &hints);
	i = tokval.t_type;
	if (!value) {		       /* error in evaluator */
	    result->opcode = -1;       /* unrecoverable parse error: */
	    return result;	       /* ignore this instruction */
	}
	if (i == ':' && mref) {	       /* it was seg:offset */
	    /*
	     * Process the segment override.
	     */
	    if (value[1].type!=0 || value->value!=1 ||
		REG_SREG & ~reg_flags[value->type])
		error (ERR_NONFATAL, "invalid segment override");
	    else if (result->nprefix == MAXPREFIX)
		error (ERR_NONFATAL,
		       "instruction has more than %d prefixes",
		       MAXPREFIX);
	    else
		result->prefixes[result->nprefix++] = value->type;

	    i = stdscan(NULL, &tokval);	       /* then skip the colon */
	    if (i == TOKEN_SPECIAL) {  /* another check for size override */
		switch ((int)tokval.t_integer) {
		  case S_WORD:
		    result->oprs[operand].addr_size = 16;
		    break;
		  case S_DWORD:
		  case S_LONG:
		    result->oprs[operand].addr_size = 32;
		    break;
		  default:
		    error (ERR_NONFATAL, "invalid size specification in"
			   " effective address");
		}
		i = stdscan(NULL, &tokval);
	    }
	    value = evaluate (stdscan, NULL, &tokval,
			      &result->forw_ref, critical, error, &hints);
	    i = tokval.t_type;
	    /* and get the offset */
	    if (!value) {	       /* but, error in evaluator */
		result->opcode = -1;   /* unrecoverable parse error: */
		return result;	       /* ignore this instruction */
	    }
	}
	if (mref && bracket) {	       /* find ] at the end */
	    if (i != ']') {
		error (ERR_NONFATAL, "parser: expecting ]");
		do {		       /* error recovery again */
		    i = stdscan(NULL, &tokval);
		} while (i != 0 && i != ',');
	    } else		       /* we got the required ] */
		i = stdscan(NULL, &tokval);
	} else {		       /* immediate operand */
	    if (i != 0 && i != ',' && i != ':') {
		error (ERR_NONFATAL, "comma or end of line expected");
		do {		       /* error recovery */
		    i = stdscan(NULL, &tokval);
		} while (i != 0 && i != ',');
	    } else if (i == ':') {
		result->oprs[operand].type |= COLON;
	    }
	}

	/* now convert the exprs returned from evaluate() into operand
	 * descriptions... */

	if (mref) {		       /* it's a memory reference */
	    expr *e = value;
	    int b, i, s;	       /* basereg, indexreg, scale */
	    long o;		       /* offset */

	    b = i = -1, o = s = 0;
	    result->oprs[operand].hintbase = hints.base;
	    result->oprs[operand].hinttype = hints.type;

	    if (e->type <= EXPR_REG_END) {   /* this bit's a register */
		if (e->value == 1) /* in fact it can be basereg */
		    b = e->type;
		else	       /* no, it has to be indexreg */
		    i = e->type, s = e->value;
		e++;
	    }
	    if (e->type && e->type <= EXPR_REG_END) {/* it's a 2nd register */
		if (e->value != 1) {   /* it has to be indexreg */
		    if (i != -1) {     /* but it can't be */
			error(ERR_NONFATAL, "invalid effective address");
			result->opcode = -1;
			return result;
		    } else
			i = e->type, s = e->value;
		} else {	       /* it can be basereg */
		    if (b != -1)       /* or can it? */
			i = e->type, s = 1;
		    else
			b = e->type;
		}
		e++;
	    }
	    if (e->type != 0) {	       /* is there an offset? */
		if (e->type <= EXPR_REG_END) {/* in fact, is there an error? */
		    error (ERR_NONFATAL, "invalid effective address");
		    result->opcode = -1;
		    return result;
		} else {
		    if (e->type == EXPR_UNKNOWN) {
			o = 0;	       /* doesn't matter what */
			result->oprs[operand].wrt = NO_SEG;   /* nor this */
			result->oprs[operand].segment = NO_SEG;  /* or this */
			while (e->type) e++;   /* go to the end of the line */
		    } else {
			if (e->type == EXPR_SIMPLE) {
			    o = e->value;
			    e++;
			}
			if (e->type == EXPR_WRT) {
			    result->oprs[operand].wrt = e->value;
			    e++;
			} else
			    result->oprs[operand].wrt = NO_SEG;
			/*
			 * Look for a segment base type.
			 */
			if (e->type && e->type < EXPR_SEGBASE) {
			    error (ERR_NONFATAL, "invalid effective address");
			    result->opcode = -1;
			    return result;
			}
			while (e->type && e->value == 0)
			    e++;
			if (e->type && e->value != 1) {
			    error (ERR_NONFATAL, "invalid effective address");
			    result->opcode = -1;
			    return result;
			}
			if (e->type) {
			    result->oprs[operand].segment =
				e->type - EXPR_SEGBASE;
			    e++;
			} else
			    result->oprs[operand].segment = NO_SEG;
			while (e->type && e->value == 0)
			    e++;
			if (e->type) {
			    error (ERR_NONFATAL, "invalid effective address");
			    result->opcode = -1;
			    return result;
			}
		    }
		}
	    } else {
		o = 0;
		result->oprs[operand].wrt = NO_SEG;
		result->oprs[operand].segment = NO_SEG;
	    }

	    if (e->type != 0) {    /* there'd better be nothing left! */
		error (ERR_NONFATAL, "invalid effective address");
		result->opcode = -1;
		return result;
	    }

	    result->oprs[operand].type |= MEMORY;
	    if (b==-1 && (i==-1 || s==0))
		result->oprs[operand].type |= MEM_OFFS;
	    result->oprs[operand].basereg = b;
	    result->oprs[operand].indexreg = i;
	    result->oprs[operand].scale = s;
	    result->oprs[operand].offset = o;
	} else {		       /* it's not a memory reference */
	    if (is_just_unknown(value)) {     /* it's immediate but unknown */
		result->oprs[operand].type |= IMMEDIATE;
		result->oprs[operand].offset = 0;   /* don't care */
		result->oprs[operand].segment = NO_SEG; /* don't care again */
		result->oprs[operand].wrt = NO_SEG;/* still don't care */
	    } else if (is_reloc(value)) {     /* it's immediate */
		result->oprs[operand].type |= IMMEDIATE;
		result->oprs[operand].offset = reloc_value(value);
		result->oprs[operand].segment = reloc_seg(value);
		result->oprs[operand].wrt = reloc_wrt(value);
		if (is_simple(value) && reloc_value(value)==1)
		    result->oprs[operand].type |= UNITY;
	    } else {	       /* it's a register */
		if (value->type>=EXPR_SIMPLE || value->value!=1) {
		    error (ERR_NONFATAL, "invalid operand type");
		    result->opcode = -1;
		    return result;
		}
		/* clear overrides, except TO which applies to FPU regs */
		result->oprs[operand].type &= TO;
		result->oprs[operand].type |= REGISTER;
		result->oprs[operand].type |= reg_flags[value->type];
		result->oprs[operand].basereg = value->type;
	    }
	}
    }

    result->operands = operand;       /* set operand count */

    while (operand<3)		       /* clear remaining operands */
	result->oprs[operand++].type = 0;

    /*
     * Transform RESW, RESD, RESQ, REST into RESB.
     */
    switch (result->opcode) {
      case I_RESW: result->opcode=I_RESB; result->oprs[0].offset*=2; break;
      case I_RESD: result->opcode=I_RESB; result->oprs[0].offset*=4; break;
      case I_RESQ: result->opcode=I_RESB; result->oprs[0].offset*=8; break;
      case I_REST: result->opcode=I_RESB; result->oprs[0].offset*=10; break;
    }

    return result;
}

static int is_comma_next (void) {
    char *p;
    int i;
    struct tokenval tv;

    p = stdscan_bufptr;
    i = stdscan (NULL, &tv);
    stdscan_bufptr = p;
    return (i == ',' || i == ';' || !i);
}

void cleanup_insn (insn *i) {
    extop *e;

    while (i->eops) {
	e = i->eops;
	i->eops = i->eops->next;
	nasm_free (e);
    }
}
Commit	Line	Data
14c7c974 A	1	/*
	2	* Copyright (c) 1999 Apple Computer, Inc. All rights reserved.
	3	*
	4	* @APPLE_LICENSE_HEADER_START@
	5	*
4f6e3300 A	6	* Portions Copyright (c) 1999 Apple Computer, Inc. All Rights
	7	* Reserved. This file contains Original Code and/or Modifications of
	8	* Original Code as defined in and that are subject to the Apple Public
	9	* Source License Version 1.1 (the "License"). You may not use this file
	10	* except in compliance with the License. Please obtain a copy of the
	11	* License at http://www.apple.com/publicsource and read it before using
	12	* this file.
14c7c974 A	13	*
14c7c974 A	14	* The Original Code and all software distributed under the License are
4f6e3300	15	* distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14c7c974 A	16	* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
14c7c974 A	17	* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
4f6e3300 A	18	* FITNESS FOR A PARTICULAR PURPOSE OR NON- INFRINGEMENT. Please see the
	19	* License for the specific language governing rights and limitations
	20	* under the License.
14c7c974 A	21	*
	22	* @APPLE_LICENSE_HEADER_END@
	23	*/
	24	/* parser.c source line parser for the Netwide Assembler
	25	*
	26	* The Netwide Assembler is copyright (C) 1996 Simon Tatham and
	27	* Julian Hall. All rights reserved. The software is
	28	* redistributable under the licence given in the file "Licence"
	29	* distributed in the NASM archive.
	30	*
	31	* initial version 27/iii/95 by Simon Tatham
	32	*/
	33
	34	#include <stdio.h>
	35	#include <stdlib.h>
	36	#include <stddef.h>
	37	#include <string.h>
	38	#include <ctype.h>
	39
	40	#include "nasm.h"
	41	#include "nasmlib.h"
	42	#include "parser.h"
	43	#include "float.h"
	44
	45	static long reg_flags[] = { /* sizes and special flags */
	46	0, REG8, REG_AL, REG_AX, REG8, REG8, REG16, REG16, REG8, REG_CL,
	47	REG_CREG, REG_CREG, REG_CREG, REG_CR4, REG_CS, REG_CX, REG8,
	48	REG16, REG8, REG_DREG, REG_DREG, REG_DREG, REG_DREG, REG_DREG,
	49	REG_DREG, REG_DESS, REG_DX, REG_EAX, REG32, REG32, REG_ECX,
	50	REG32, REG32, REG_DESS, REG32, REG32, REG_FSGS, REG_FSGS,
	51	MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, MMXREG, MMXREG,
	52	REG16, REG16, REG_DESS, FPU0, FPUREG, FPUREG, FPUREG, FPUREG,
	53	FPUREG, FPUREG, FPUREG, REG_TREG, REG_TREG, REG_TREG, REG_TREG,
	54	REG_TREG
	55	};
	56
	57	enum { /* special tokens */
	58	S_BYTE, S_DWORD, S_FAR, S_LONG, S_NEAR, S_NOSPLIT, S_QWORD,
	59	S_SHORT, S_TO, S_TWORD, S_WORD
	60	};
	61
	62	static int is_comma_next (void);
	63
	64	static int i;
	65	static struct tokenval tokval;
	66	static efunc error;
	67
	68	insn parse_line (int pass, char buffer, insn *result,
	69	efunc errfunc, evalfunc evaluate, evalinfofunc einfo) {
	70	int operand;
	71	int critical;
	72	struct eval_hints hints;
	73
	74	result->forw_ref = FALSE;
	75	error = errfunc;
	76	einfo ("", 0L, 0L);
	77
	78	stdscan_reset();
	79	stdscan_bufptr = buffer;
	80	i = stdscan(NULL, &tokval);
	81
	82	result->eops = NULL; /* must do this, whatever happens */
	83	result->operands = 0; /* must initialise this */
	84
85	if (i==0) { /* blank line - ignore */
86	result->label = NULL; /* so, no label on it */
87	result->opcode = -1; /* and no instruction either */
88	return result;
89	}
90	if (i != TOKEN_ID && i != TOKEN_INSN && i != TOKEN_PREFIX &&
91	(i!=TOKEN_REG \|\| (REG_SREG & ~reg_flags[tokval.t_integer]))) {
92	error (ERR_NONFATAL, "label or instruction expected"
93	" at start of line");
94	result->label = NULL;
95	result->opcode = -1;
96	return result;
97	}
98
99	if (i == TOKEN_ID) { /* there's a label here */
100	result->label = tokval.t_charptr;
101	einfo (result->label, 0L, 0L);
102	i = stdscan(NULL, &tokval);
103	if (i == ':') { /* skip over the optional colon */
104	i = stdscan(NULL, &tokval);
105	} else if (i == 0 && pass == 1) {
106	error (ERR_WARNING\|ERR_WARN_OL,
107	"label alone on a line without a colon might be in error");
108	}
109	} else /* no label; so, moving swiftly on */
110	result->label = NULL;
111
112	if (i==0) {
113	result->opcode = -1; /* this line contains just a label */
114	return result;
115	}
116
117	result->nprefix = 0;
118	result->times = 1L;
119
120	while (i == TOKEN_PREFIX \|\|
121	(i==TOKEN_REG && !(REG_SREG & ~reg_flags[tokval.t_integer]))) {
122	/*
123	* Handle special case: the TIMES prefix.
124	*/
125	if (i == TOKEN_PREFIX && tokval.t_integer == P_TIMES) {
126	expr *value;
127
128	i = stdscan(NULL, &tokval);
129	value = evaluate (stdscan, NULL, &tokval, NULL, pass, error, NULL);
130	i = tokval.t_type;
131	if (!value) { /* but, error in evaluator */
132	result->opcode = -1; /* unrecoverable parse error: */
133	return result; /* ignore this instruction */
134	}
135	if (!is_simple (value)) {
136	error (ERR_NONFATAL,
137	"non-constant argument supplied to TIMES");
138	result->times = 1L;
139	} else {
140	result->times = value->value;
141	if (value->value < 0)
142	error(ERR_NONFATAL, "TIMES value %d is negative",
143	value->value);
144	}
145	} else {
146	if (result->nprefix == MAXPREFIX)
147	error (ERR_NONFATAL,
148	"instruction has more than %d prefixes", MAXPREFIX);
149	else
150	result->prefixes[result->nprefix++] = tokval.t_integer;
151	i = stdscan(NULL, &tokval);
152	}
153	}
154
155	if (i != TOKEN_INSN) {
156	if (result->nprefix > 0 && i == 0) {
157	/*
158	* Instruction prefixes are present, but no actual
159	* instruction. This is allowed: at this point we
160	* invent a notional instruction of RESB 0.
161	*/
162	result->opcode = I_RESB;
163	result->operands = 1;
164	result->oprs[0].type = IMMEDIATE;
165	result->oprs[0].offset = 0L;
166	result->oprs[0].segment = result->oprs[0].wrt = NO_SEG;
167	return result;
168	} else {
169	error (ERR_NONFATAL, "parser: instruction expected");
170	result->opcode = -1;
171	return result;
172	}
173	}
174
175	result->opcode = tokval.t_integer;
176	result->condition = tokval.t_inttwo;
177
178	/*
179	* RESB, RESW and RESD cannot be satisfied with incorrectly
180	* evaluated operands, since the correct values _must_ be known
181	* on the first pass. Hence, even in pass one, we set the
182	* `critical' flag on calling evaluate(), so that it will bomb
183	* out on undefined symbols. Nasty, but there's nothing we can
184	* do about it.
185	*
186	* For the moment, EQU has the same difficulty, so we'll
187	* include that.
188	*/
189	if (result->opcode == I_RESB \|\|
190	result->opcode == I_RESW \|\|
191	result->opcode == I_RESD \|\|
192	result->opcode == I_RESQ \|\|
193	result->opcode == I_REST \|\|
194	result->opcode == I_EQU)
195	critical = pass;
196	else
197	critical = (pass==2 ? 2 : 0);
198
199	if (result->opcode == I_DB \|\|
200	result->opcode == I_DW \|\|
201	result->opcode == I_DD \|\|
202	result->opcode == I_DQ \|\|
203	result->opcode == I_DT \|\|
204	result->opcode == I_INCBIN) {
205	extop eop, tail = &result->eops, *fixptr;
206	int oper_num = 0;
207
208	/*
209	* Begin to read the DB/DW/DD/DQ/DT operands.
210	*/
211	while (1) {
212	i = stdscan(NULL, &tokval);
213	if (i == 0)
214	break;
215	fixptr = tail;
216	eop = *tail = nasm_malloc(sizeof(extop));
217	tail = &eop->next;
218	eop->next = NULL;
219	eop->type = EOT_NOTHING;
220	oper_num++;
221
222	if (i == TOKEN_NUM && tokval.t_charptr && is_comma_next()) {
223	eop->type = EOT_DB_STRING;
224	eop->stringval = tokval.t_charptr;
225	eop->stringlen = tokval.t_inttwo;
226	i = stdscan(NULL, &tokval); /* eat the comma */
227	continue;
228	}
229
230	if (i == TOKEN_FLOAT \|\| i == '-') {
231	long sign = +1L;
232
233	if (i == '-') {
234	char *save = stdscan_bufptr;
235	i = stdscan(NULL, &tokval);
236	sign = -1L;
237	if (i != TOKEN_FLOAT) {
238	stdscan_bufptr = save;
239	i = tokval.t_type = '-';
240	}
241	}
242
243	if (i == TOKEN_FLOAT) {
244	eop->type = EOT_DB_STRING;
245	if (result->opcode == I_DD)
246	eop->stringlen = 4;
247	else if (result->opcode == I_DQ)
248	eop->stringlen = 8;
249	else if (result->opcode == I_DT)
250	eop->stringlen = 10;
251	else {
252	error(ERR_NONFATAL, "floating-point constant"
253	" encountered in `D%c' instruction",
254	result->opcode == I_DW ? 'W' : 'B');
255	eop->type = EOT_NOTHING;
256	}
257	eop = nasm_realloc(eop, sizeof(extop)+eop->stringlen);
258	tail = &eop->next;
259	*fixptr = eop;
260	eop->stringval = (char *)eop + sizeof(extop);
261	if (!float_const (tokval.t_charptr, sign,
262	(unsigned char *)eop->stringval,
263	eop->stringlen, error))
264	eop->type = EOT_NOTHING;
265	i = stdscan(NULL, &tokval); /* eat the comma */
266	continue;
267	}
268	}
269
270	/* anything else */ {
271	expr *value;
272	value = evaluate (stdscan, NULL, &tokval, NULL,
273	critical, error, NULL);
274	i = tokval.t_type;
275	if (!value) { /* error in evaluator */
276	result->opcode = -1;/* unrecoverable parse error: */
277	return result; /* ignore this instruction */
278	}
279	if (is_unknown(value)) {
280	eop->type = EOT_DB_NUMBER;
281	eop->offset = 0; /* doesn't matter what we put */
282	eop->segment = eop->wrt = NO_SEG; /* likewise */
283	} else if (is_reloc(value)) {
284	eop->type = EOT_DB_NUMBER;
285	eop->offset = reloc_value(value);
286	eop->segment = reloc_seg(value);
287	eop->wrt = reloc_wrt(value);
288	} else {
289	error (ERR_NONFATAL,
290	"operand %d: expression is not simple"
291	" or relocatable", oper_num);
292	}
293	}
294
295	/*
296	* We're about to call stdscan(), which will eat the
297	* comma that we're currently sitting on between
298	* arguments. However, we'd better check first that it
299	* _is_ a comma.
300	*/
301	if (i == 0) /* also could be EOL */
302	break;
303	if (i != ',') {
304	error (ERR_NONFATAL, "comma expected after operand %d",
305	oper_num);
306	result->opcode = -1;/* unrecoverable parse error: */
307	return result; /* ignore this instruction */
308	}
309	}
310
311	if (result->opcode == I_INCBIN) {
312	/*
313	* Correct syntax for INCBIN is that there should be
314	* one string operand, followed by one or two numeric
315	* operands.
316	*/
317	if (!result->eops \|\| result->eops->type != EOT_DB_STRING)
318	error (ERR_NONFATAL, "`incbin' expects a file name");
319	else if (result->eops->next &&
320	result->eops->next->type != EOT_DB_NUMBER)
321	error (ERR_NONFATAL, "`incbin': second parameter is",
322	" non-numeric");
323	else if (result->eops->next && result->eops->next->next &&
324	result->eops->next->next->type != EOT_DB_NUMBER)
325	error (ERR_NONFATAL, "`incbin': third parameter is",
326	" non-numeric");
327	else if (result->eops->next && result->eops->next->next &&
328	result->eops->next->next->next)
329	error (ERR_NONFATAL, "`incbin': more than three parameters");
330	else
331	return result;
332	/*
333	* If we reach here, one of the above errors happened.
334	* Throw the instruction away.
335	*/
336	result->opcode = -1;
337	return result;
338	}
339
340	return result;
341	}
342
343	/* right. Now we begin to parse the operands. There may be up to three
344	* of these, separated by commas, and terminated by a zero token. */
345
346	for (operand = 0; operand < 3; operand++) {
347	expr value; / used most of the time */
348	int mref; /* is this going to be a memory ref? */
349	int bracket; /* is it a [] mref, or a & mref? */
350
351	result->oprs[operand].addr_size = 0;/* have to zero this whatever */
352	result->oprs[operand].eaflags = 0; /* and this */
353	i = stdscan(NULL, &tokval);
354	if (i == 0) break; /* end of operands: get out of here */
355	result->oprs[operand].type = 0; /* so far, no override */
356	while (i == TOKEN_SPECIAL) {/* size specifiers */
357	switch ((int)tokval.t_integer) {
358	case S_BYTE:
359	result->oprs[operand].type \|= BITS8;
360	break;
361	case S_WORD:
362	result->oprs[operand].type \|= BITS16;
363	break;
364	case S_DWORD:
365	case S_LONG:
366	result->oprs[operand].type \|= BITS32;
367	break;
368	case S_QWORD:
369	result->oprs[operand].type \|= BITS64;
370	break;
371	case S_TWORD:
372	result->oprs[operand].type \|= BITS80;
373	break;
374	case S_TO:
375	result->oprs[operand].type \|= TO;
376	break;
377	case S_FAR:
378	result->oprs[operand].type \|= FAR;
379	break;
380	case S_NEAR:
381	result->oprs[operand].type \|= NEAR;
382	break;
383	case S_SHORT:
384	result->oprs[operand].type \|= SHORT;
385	break;
386	}
387	i = stdscan(NULL, &tokval);
388	}
389
390	if (i == '[' \|\| i == '&') { /* memory reference */
391	mref = TRUE;
392	bracket = (i == '[');
393	i = stdscan(NULL, &tokval);
394	if (i == TOKEN_SPECIAL) { /* check for address size override */
395	switch ((int)tokval.t_integer) {
396	case S_NOSPLIT:
397	result->oprs[operand].eaflags \|= EAF_TIMESTWO;
398	break;
399	case S_BYTE:
400	result->oprs[operand].eaflags \|= EAF_BYTEOFFS;
401	break;
402	case S_WORD:
403	result->oprs[operand].addr_size = 16;
404	result->oprs[operand].eaflags \|= EAF_WORDOFFS;
405	break;
406	case S_DWORD:
407	case S_LONG:
408	result->oprs[operand].addr_size = 32;
409	result->oprs[operand].eaflags \|= EAF_WORDOFFS;
410	break;
411	default:
412	error (ERR_NONFATAL, "invalid size specification in"
413	" effective address");
414	}
415	i = stdscan(NULL, &tokval);
416	}
417	} else { /* immediate operand, or register */
418	mref = FALSE;
419	bracket = FALSE; /* placate optimisers */
420	}
421
422	value = evaluate (stdscan, NULL, &tokval,
423	&result->forw_ref, critical, error, &hints);
424	i = tokval.t_type;
425	if (!value) { /* error in evaluator */
426	result->opcode = -1; /* unrecoverable parse error: */
427	return result; /* ignore this instruction */
428	}
429	if (i == ':' && mref) { /* it was seg:offset */
430	/*
431	* Process the segment override.
432	*/
433	if (value[1].type!=0 \|\| value->value!=1 \|\|
434	REG_SREG & ~reg_flags[value->type])
435	error (ERR_NONFATAL, "invalid segment override");
436	else if (result->nprefix == MAXPREFIX)
437	error (ERR_NONFATAL,
438	"instruction has more than %d prefixes",
439	MAXPREFIX);
440	else
441	result->prefixes[result->nprefix++] = value->type;
442
443	i = stdscan(NULL, &tokval); /* then skip the colon */
444	if (i == TOKEN_SPECIAL) { /* another check for size override */
445	switch ((int)tokval.t_integer) {
446	case S_WORD:
447	result->oprs[operand].addr_size = 16;
448	break;
449	case S_DWORD:
450	case S_LONG:
451	result->oprs[operand].addr_size = 32;
452	break;
453	default:
454	error (ERR_NONFATAL, "invalid size specification in"
455	" effective address");
456	}
457	i = stdscan(NULL, &tokval);
458	}
459	value = evaluate (stdscan, NULL, &tokval,
460	&result->forw_ref, critical, error, &hints);
461	i = tokval.t_type;
462	/* and get the offset */
463	if (!value) { /* but, error in evaluator */
464	result->opcode = -1; /* unrecoverable parse error: */
465	return result; /* ignore this instruction */
466	}
467	}
468	if (mref && bracket) { /* find ] at the end */
469	if (i != ']') {
470	error (ERR_NONFATAL, "parser: expecting ]");
471	do { /* error recovery again */
472	i = stdscan(NULL, &tokval);
473	} while (i != 0 && i != ',');
474	} else /* we got the required ] */
475	i = stdscan(NULL, &tokval);
476	} else { /* immediate operand */
477	if (i != 0 && i != ',' && i != ':') {
478	error (ERR_NONFATAL, "comma or end of line expected");
479	do { /* error recovery */
480	i = stdscan(NULL, &tokval);
481	} while (i != 0 && i != ',');
482	} else if (i == ':') {
483	result->oprs[operand].type \|= COLON;
484	}
485	}
486
487	/* now convert the exprs returned from evaluate() into operand
488	* descriptions... */
489
490	if (mref) { /* it's a memory reference */
491	expr *e = value;
492	int b, i, s; /* basereg, indexreg, scale */
493	long o; /* offset */
494
495	b = i = -1, o = s = 0;
496	result->oprs[operand].hintbase = hints.base;
497	result->oprs[operand].hinttype = hints.type;
498
499	if (e->type <= EXPR_REG_END) { /* this bit's a register */
500	if (e->value == 1) /* in fact it can be basereg */
501	b = e->type;
502	else /* no, it has to be indexreg */
503	i = e->type, s = e->value;
504	e++;
505	}
506	if (e->type && e->type <= EXPR_REG_END) {/* it's a 2nd register */
507	if (e->value != 1) { /* it has to be indexreg */
508	if (i != -1) { /* but it can't be */
509	error(ERR_NONFATAL, "invalid effective address");
510	result->opcode = -1;
511	return result;
512	} else
513	i = e->type, s = e->value;
514	} else { /* it can be basereg */
515	if (b != -1) /* or can it? */
516	i = e->type, s = 1;
517	else
518	b = e->type;
519	}
520	e++;
521	}
522	if (e->type != 0) { /* is there an offset? */
523	if (e->type <= EXPR_REG_END) {/* in fact, is there an error? */
524	error (ERR_NONFATAL, "invalid effective address");
525	result->opcode = -1;
526	return result;
527	} else {
528	if (e->type == EXPR_UNKNOWN) {
529	o = 0; /* doesn't matter what */
530	result->oprs[operand].wrt = NO_SEG; /* nor this */
531	result->oprs[operand].segment = NO_SEG; /* or this */
532	while (e->type) e++; /* go to the end of the line */
533	} else {
534	if (e->type == EXPR_SIMPLE) {
535	o = e->value;
536	e++;
537	}
538	if (e->type == EXPR_WRT) {
539	result->oprs[operand].wrt = e->value;
540	e++;
541	} else
542	result->oprs[operand].wrt = NO_SEG;
543	/*
544	* Look for a segment base type.
545	*/
546	if (e->type && e->type < EXPR_SEGBASE) {
547	error (ERR_NONFATAL, "invalid effective address");
548	result->opcode = -1;
549	return result;
550	}
551	while (e->type && e->value == 0)
552	e++;
553	if (e->type && e->value != 1) {
554	error (ERR_NONFATAL, "invalid effective address");
555	result->opcode = -1;
556	return result;
557	}
558	if (e->type) {
559	result->oprs[operand].segment =
560	e->type - EXPR_SEGBASE;
561	e++;
562	} else
563	result->oprs[operand].segment = NO_SEG;
564	while (e->type && e->value == 0)
565	e++;
566	if (e->type) {
567	error (ERR_NONFATAL, "invalid effective address");
568	result->opcode = -1;
569	return result;
570	}
571	}
572	}
573	} else {
574	o = 0;
575	result->oprs[operand].wrt = NO_SEG;
576	result->oprs[operand].segment = NO_SEG;
577	}
578
579	if (e->type != 0) { /* there'd better be nothing left! */
580	error (ERR_NONFATAL, "invalid effective address");
581	result->opcode = -1;
582	return result;
583	}
584
585	result->oprs[operand].type \|= MEMORY;
586	if (b==-1 && (i==-1 \|\| s==0))
587	result->oprs[operand].type \|= MEM_OFFS;
588	result->oprs[operand].basereg = b;
589	result->oprs[operand].indexreg = i;
590	result->oprs[operand].scale = s;
591	result->oprs[operand].offset = o;
592	} else { /* it's not a memory reference */
593	if (is_just_unknown(value)) { /* it's immediate but unknown */
594	result->oprs[operand].type \|= IMMEDIATE;
595	result->oprs[operand].offset = 0; /* don't care */
596	result->oprs[operand].segment = NO_SEG; /* don't care again */
597	result->oprs[operand].wrt = NO_SEG;/* still don't care */
598	} else if (is_reloc(value)) { /* it's immediate */
599	result->oprs[operand].type \|= IMMEDIATE;
600	result->oprs[operand].offset = reloc_value(value);
601	result->oprs[operand].segment = reloc_seg(value);
602	result->oprs[operand].wrt = reloc_wrt(value);
603	if (is_simple(value) && reloc_value(value)==1)
604	result->oprs[operand].type \|= UNITY;
605	} else { /* it's a register */
606	if (value->type>=EXPR_SIMPLE \|\| value->value!=1) {
607	error (ERR_NONFATAL, "invalid operand type");
608	result->opcode = -1;
609	return result;
610	}
611	/* clear overrides, except TO which applies to FPU regs */
612	result->oprs[operand].type &= TO;
613	result->oprs[operand].type \|= REGISTER;
614	result->oprs[operand].type \|= reg_flags[value->type];
615	result->oprs[operand].basereg = value->type;
616	}
617	}
618	}
619
620	result->operands = operand; /* set operand count */
621
622	while (operand<3) /* clear remaining operands */
623	result->oprs[operand++].type = 0;
624
625	/*
626	* Transform RESW, RESD, RESQ, REST into RESB.
627	*/
628	switch (result->opcode) {
629	case I_RESW: result->opcode=I_RESB; result->oprs[0].offset*=2; break;
630	case I_RESD: result->opcode=I_RESB; result->oprs[0].offset*=4; break;
631	case I_RESQ: result->opcode=I_RESB; result->oprs[0].offset*=8; break;
632	case I_REST: result->opcode=I_RESB; result->oprs[0].offset*=10; break;
633	}
634
635	return result;
636	}
637
638	static int is_comma_next (void) {
639	char *p;
640	int i;
641	struct tokenval tv;
642
643	p = stdscan_bufptr;
644	i = stdscan (NULL, &tv);
645	stdscan_bufptr = p;
646	return (i == ',' \|\| i == ';' \|\| !i);
647	}
648
649	void cleanup_insn (insn *i) {
650	extop *e;
651
652	while (i->eops) {
653	e = i->eops;
654	i->eops = i->eops->next;
655	nasm_free (e);
656	}
657	}