]> git.saurik.com Git - apple/boot.git/blob - i386/nasm/assemble.c
boot-122.tar.gz
[apple/boot.git] / i386 / nasm / assemble.c
1 /*
2 * Copyright (c) 1999 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * Portions Copyright (c) 1999 Apple Computer, Inc. All Rights
7 * Reserved. This file contains Original Code and/or Modifications of
8 * Original Code as defined in and that are subject to the Apple Public
9 * Source License Version 1.1 (the "License"). You may not use this file
10 * except in compliance with the License. Please obtain a copy of the
11 * License at http://www.apple.com/publicsource and read it before using
12 * this file.
13 *
14 * The Original Code and all software distributed under the License are
15 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
16 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
17 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE OR NON- INFRINGEMENT. Please see the
19 * License for the specific language governing rights and limitations
20 * under the License.
21 *
22 * @APPLE_LICENSE_HEADER_END@
23 */
24 /* assemble.c code generation for the Netwide Assembler
25 *
26 * The Netwide Assembler is copyright (C) 1996 Simon Tatham and
27 * Julian Hall. All rights reserved. The software is
28 * redistributable under the licence given in the file "Licence"
29 * distributed in the NASM archive.
30 *
31 * the actual codes (C syntax, i.e. octal):
32 * \0 - terminates the code. (Unless it's a literal of course.)
33 * \1, \2, \3 - that many literal bytes follow in the code stream
34 * \4, \6 - the POP/PUSH (respectively) codes for CS, DS, ES, SS
35 * (POP is never used for CS) depending on operand 0
36 * \5, \7 - the second byte of POP/PUSH codes for FS, GS, depending
37 * on operand 0
38 * \10, \11, \12 - a literal byte follows in the code stream, to be added
39 * to the register value of operand 0, 1 or 2
40 * \17 - encodes the literal byte 0. (Some compilers don't take
41 * kindly to a zero byte in the _middle_ of a compile time
42 * string constant, so I had to put this hack in.)
43 * \14, \15, \16 - a signed byte immediate operand, from operand 0, 1 or 2
44 * \20, \21, \22 - a byte immediate operand, from operand 0, 1 or 2
45 * \24, \25, \26 - an unsigned byte immediate operand, from operand 0, 1 or 2
46 * \30, \31, \32 - a word immediate operand, from operand 0, 1 or 2
47 * \34, \35, \36 - select between \3[012] and \4[012] depending on 16/32 bit
48 * assembly mode or the address-size override on the operand
49 * \37 - a word constant, from the _segment_ part of operand 0
50 * \40, \41, \42 - a long immediate operand, from operand 0, 1 or 2
51 * \50, \51, \52 - a byte relative operand, from operand 0, 1 or 2
52 * \60, \61, \62 - a word relative operand, from operand 0, 1 or 2
53 * \64, \65, \66 - select between \6[012] and \7[012] depending on 16/32 bit
54 * assembly mode or the address-size override on the operand
55 * \70, \71, \72 - a long relative operand, from operand 0, 1 or 2
56 * \1ab - a ModRM, calculated on EA in operand a, with the spare
57 * field the register value of operand b.
58 * \2ab - a ModRM, calculated on EA in operand a, with the spare
59 * field equal to digit b.
60 * \30x - might be an 0x67 byte, depending on the address size of
61 * the memory reference in operand x.
62 * \310 - indicates fixed 16-bit address size, i.e. optional 0x67.
63 * \311 - indicates fixed 32-bit address size, i.e. optional 0x67.
64 * \320 - indicates fixed 16-bit operand size, i.e. optional 0x66.
65 * \321 - indicates fixed 32-bit operand size, i.e. optional 0x66.
66 * \322 - indicates that this instruction is only valid when the
67 * operand size is the default (instruction to disassembler,
68 * generates no code in the assembler)
69 * \330 - a literal byte follows in the code stream, to be added
70 * to the condition code value of the instruction.
71 * \340 - reserve <operand 0> bytes of uninitialised storage.
72 * Operand 0 had better be a segmentless constant.
73 */
74
75 #include <stdio.h>
76 #include <string.h>
77
78 #include "nasm.h"
79 #include "nasmlib.h"
80 #include "assemble.h"
81 #include "insns.h"
82
83 extern struct itemplate *nasm_instructions[];
84
85 typedef struct {
86 int sib_present; /* is a SIB byte necessary? */
87 int bytes; /* # of bytes of offset needed */
88 int size; /* lazy - this is sib+bytes+1 */
89 unsigned char modrm, sib; /* the bytes themselves */
90 } ea;
91
92 static efunc errfunc;
93 static struct ofmt *outfmt;
94 static ListGen *list;
95
96 static long calcsize (long, long, int, insn *, char *);
97 static void gencode (long, long, int, insn *, char *, long);
98 static int regval (operand *o);
99 static int matches (struct itemplate *, insn *);
100 static ea *process_ea (operand *, ea *, int, int, int);
101 static int chsize (operand *, int);
102
103 /*
104 * This routine wrappers the real output format's output routine,
105 * in order to pass a copy of the data off to the listing file
106 * generator at the same time.
107 */
108 static void out (long offset, long segto, void *data, unsigned long type,
109 long segment, long wrt) {
110 if ((type & OUT_TYPMASK) == OUT_ADDRESS) {
111 if (segment != NO_SEG || wrt != NO_SEG) {
112 /*
113 * This address is relocated. We must write it as
114 * OUT_ADDRESS, so there's no work to be done here.
115 */
116 list->output (offset, data, type);
117 } else {
118 unsigned char p[4], *q = p;
119 /*
120 * This is a non-relocated address, and we're going to
121 * convert it into RAWDATA format.
122 */
123 if ((type & OUT_SIZMASK) == 4) {
124 WRITELONG (q, * (long *) data);
125 list->output (offset, p, OUT_RAWDATA+4);
126 } else {
127 WRITESHORT (q, * (long *) data);
128 list->output (offset, p, OUT_RAWDATA+2);
129 }
130 }
131 } else if ((type & OUT_TYPMASK) == OUT_RAWDATA) {
132 list->output (offset, data, type);
133 } else if ((type & OUT_TYPMASK) == OUT_RESERVE) {
134 list->output (offset, NULL, type);
135 } else if ((type & OUT_TYPMASK) == OUT_REL2ADR ||
136 (type & OUT_TYPMASK) == OUT_REL4ADR) {
137 list->output (offset, data, type);
138 }
139
140 outfmt->output (segto, data, type, segment, wrt);
141 }
142
143 long assemble (long segment, long offset, int bits,
144 insn *instruction, struct ofmt *output, efunc error,
145 ListGen *listgen) {
146 int j, size_prob;
147 long insn_end, itimes;
148 long start = offset;
149 struct itemplate *temp;
150
151 errfunc = error; /* to pass to other functions */
152 outfmt = output; /* likewise */
153 list = listgen; /* and again */
154
155 if (instruction->opcode == -1)
156 return 0;
157
158 if (instruction->opcode == I_DB ||
159 instruction->opcode == I_DW ||
160 instruction->opcode == I_DD ||
161 instruction->opcode == I_DQ ||
162 instruction->opcode == I_DT) {
163 extop *e;
164 long wsize = 0; /* placate gcc */
165 long t = instruction->times;
166
167 switch (instruction->opcode) {
168 case I_DB: wsize = 1; break;
169 case I_DW: wsize = 2; break;
170 case I_DD: wsize = 4; break;
171 case I_DQ: wsize = 8; break;
172 case I_DT: wsize = 10; break;
173 }
174
175 while (t--) {
176 for (e = instruction->eops; e; e = e->next) {
177 if (e->type == EOT_DB_NUMBER) {
178 if (wsize == 1) {
179 if (e->segment != NO_SEG)
180 errfunc (ERR_NONFATAL,
181 "one-byte relocation attempted");
182 else {
183 unsigned char c = e->offset;
184 out (offset, segment, &c, OUT_RAWDATA+1,
185 NO_SEG, NO_SEG);
186 }
187 } else if (wsize > 5) {
188 errfunc (ERR_NONFATAL, "integer supplied to a D%c"
189 " instruction", wsize==8 ? 'Q' : 'T');
190 } else
191 out (offset, segment, &e->offset,
192 OUT_ADDRESS+wsize, e->segment,
193 e->wrt);
194 offset += wsize;
195 } else if (e->type == EOT_DB_STRING) {
196 int align;
197
198 align = (-e->stringlen) % wsize;
199 if (align < 0)
200 align += wsize;
201 out (offset, segment, e->stringval,
202 OUT_RAWDATA+e->stringlen, NO_SEG, NO_SEG);
203 if (align)
204 out (offset, segment, "\0\0\0\0",
205 OUT_RAWDATA+align, NO_SEG, NO_SEG);
206 offset += e->stringlen + align;
207 }
208 }
209 if (t > 0 && t == instruction->times-1) {
210 /*
211 * Dummy call to list->output to give the offset to the
212 * listing module.
213 */
214 list->output (offset, NULL, OUT_RAWDATA);
215 list->uplevel (LIST_TIMES);
216 }
217 }
218 if (instruction->times > 1)
219 list->downlevel (LIST_TIMES);
220 return offset - start;
221 }
222
223 if (instruction->opcode == I_INCBIN) {
224 static char fname[FILENAME_MAX];
225 FILE *fp;
226 long len;
227
228 len = FILENAME_MAX-1;
229 if (len > instruction->eops->stringlen)
230 len = instruction->eops->stringlen;
231 strncpy (fname, instruction->eops->stringval, len);
232 fname[len] = '\0';
233 if (!(fp = fopen(fname, "rb")))
234 error (ERR_NONFATAL, "`incbin': unable to open file `%s'", fname);
235 else if (fseek(fp, 0L, SEEK_END) < 0)
236 error (ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
237 fname);
238 else {
239 static char buf[2048];
240 long t = instruction->times;
241 long l;
242
243 len = ftell (fp);
244 if (instruction->eops->next) {
245 len -= instruction->eops->next->offset;
246 if (instruction->eops->next->next &&
247 len > instruction->eops->next->next->offset)
248 len = instruction->eops->next->next->offset;
249 }
250 /*
251 * Dummy call to list->output to give the offset to the
252 * listing module.
253 */
254 list->output (offset, NULL, OUT_RAWDATA);
255 list->uplevel(LIST_INCBIN);
256 while (t--) {
257 fseek (fp,
258 (instruction->eops->next ?
259 instruction->eops->next->offset : 0),
260 SEEK_SET);
261 l = len;
262 while (l > 0) {
263 long m = fread (buf, 1, (l>(long)sizeof(buf)?(long)sizeof(buf):l),
264 fp);
265 if (!m) {
266 /*
267 * This shouldn't happen unless the file
268 * actually changes while we are reading
269 * it.
270 */
271 error (ERR_NONFATAL, "`incbin': unexpected EOF while"
272 " reading file `%s'", fname);
273 return 0; /* it doesn't much matter... */
274 }
275 out (offset, segment, buf, OUT_RAWDATA+m,
276 NO_SEG, NO_SEG);
277 l -= m;
278 }
279 }
280 list->downlevel(LIST_INCBIN);
281 if (instruction->times > 1) {
282 /*
283 * Dummy call to list->output to give the offset to the
284 * listing module.
285 */
286 list->output (offset, NULL, OUT_RAWDATA);
287 list->uplevel(LIST_TIMES);
288 list->downlevel(LIST_TIMES);
289 }
290 fclose (fp);
291 return instruction->times * len;
292 }
293 return 0; /* if we're here, there's an error */
294 }
295
296 size_prob = FALSE;
297 temp = nasm_instructions[instruction->opcode];
298 while (temp->opcode != -1) {
299 int m = matches (temp, instruction);
300 if (m == 100) { /* matches! */
301 char *codes = temp->code;
302 long insn_size = calcsize(segment, offset, bits,
303 instruction, codes);
304 itimes = instruction->times;
305 if (insn_size < 0) /* shouldn't be, on pass two */
306 error (ERR_PANIC, "errors made it through from pass one");
307 else while (itimes--) {
308 insn_end = offset + insn_size;
309 for (j=0; j<instruction->nprefix; j++) {
310 unsigned char c;
311 switch (instruction->prefixes[j]) {
312 case P_LOCK:
313 c = 0xF0; break;
314 case P_REPNE: case P_REPNZ:
315 c = 0xF2; break;
316 case P_REPE: case P_REPZ: case P_REP:
317 c = 0xF3; break;
318 case R_CS: c = 0x2E; break;
319 case R_DS: c = 0x3E; break;
320 case R_ES: c = 0x26; break;
321 case R_FS: c = 0x64; break;
322 case R_GS: c = 0x65; break;
323 case R_SS: c = 0x36; break;
324 case P_A16:
325 if (bits == 16)
326 c = 0; /* no prefix */
327 else
328 c = 0x67;
329 break;
330 case P_A32:
331 if (bits == 32)
332 c = 0; /* no prefix */
333 else
334 c = 0x67;
335 break;
336 case P_O16:
337 if (bits == 16)
338 c = 0; /* no prefix */
339 else
340 c = 0x66;
341 break;
342 case P_O32:
343 if (bits == 32)
344 c = 0; /* no prefix */
345 else
346 c = 0x66;
347 break;
348 default:
349 error (ERR_PANIC,
350 "invalid instruction prefix");
351 }
352 if (c != 0)
353 out (offset, segment, &c, OUT_RAWDATA+1,
354 NO_SEG, NO_SEG);
355 offset++;
356 }
357 gencode (segment, offset, bits, instruction, codes, insn_end);
358 offset += insn_size;
359 if (itimes > 0 && itimes == instruction->times-1) {
360 /*
361 * Dummy call to list->output to give the offset to the
362 * listing module.
363 */
364 list->output (offset, NULL, OUT_RAWDATA);
365 list->uplevel (LIST_TIMES);
366 }
367 }
368 if (instruction->times > 1)
369 list->downlevel (LIST_TIMES);
370 return offset - start;
371 } else if (m > 0) {
372 size_prob = m;
373 }
374 temp++;
375 }
376 if (temp->opcode == -1) { /* didn't match any instruction */
377 if (size_prob == 1) /* would have matched, but for size */
378 error (ERR_NONFATAL, "operation size not specified");
379 else if (size_prob == 2)
380 error (ERR_NONFATAL, "mismatch in operand sizes");
381 else
382 error (ERR_NONFATAL,
383 "invalid combination of opcode and operands");
384 }
385 return 0;
386 }
387
388 long insn_size (long segment, long offset, int bits,
389 insn *instruction, efunc error) {
390 struct itemplate *temp;
391
392 errfunc = error; /* to pass to other functions */
393
394 if (instruction->opcode == -1)
395 return 0;
396
397 if (instruction->opcode == I_DB ||
398 instruction->opcode == I_DW ||
399 instruction->opcode == I_DD ||
400 instruction->opcode == I_DQ ||
401 instruction->opcode == I_DT) {
402 extop *e;
403 long isize, osize, wsize = 0; /* placate gcc */
404
405 isize = 0;
406 switch (instruction->opcode) {
407 case I_DB: wsize = 1; break;
408 case I_DW: wsize = 2; break;
409 case I_DD: wsize = 4; break;
410 case I_DQ: wsize = 8; break;
411 case I_DT: wsize = 10; break;
412 }
413
414 for (e = instruction->eops; e; e = e->next) {
415 long align;
416
417 osize = 0;
418 if (e->type == EOT_DB_NUMBER)
419 osize = 1;
420 else if (e->type == EOT_DB_STRING)
421 osize = e->stringlen;
422
423 align = (-osize) % wsize;
424 if (align < 0)
425 align += wsize;
426 isize += osize + align;
427 }
428 return isize * instruction->times;
429 }
430
431 if (instruction->opcode == I_INCBIN) {
432 char fname[FILENAME_MAX];
433 FILE *fp;
434 long len;
435
436 len = FILENAME_MAX-1;
437 if (len > instruction->eops->stringlen)
438 len = instruction->eops->stringlen;
439 strncpy (fname, instruction->eops->stringval, len);
440 fname[len] = '\0';
441 if (!(fp = fopen(fname, "rb")))
442 error (ERR_NONFATAL, "`incbin': unable to open file `%s'", fname);
443 else if (fseek(fp, 0L, SEEK_END) < 0)
444 error (ERR_NONFATAL, "`incbin': unable to seek on file `%s'",
445 fname);
446 else {
447 len = ftell (fp);
448 fclose (fp);
449 if (instruction->eops->next) {
450 len -= instruction->eops->next->offset;
451 if (instruction->eops->next->next &&
452 len > instruction->eops->next->next->offset)
453 len = instruction->eops->next->next->offset;
454 }
455 return instruction->times * len;
456 }
457 return 0; /* if we're here, there's an error */
458 }
459
460 temp = nasm_instructions[instruction->opcode];
461 while (temp->opcode != -1) {
462 if (matches(temp, instruction) == 100) {
463 /* we've matched an instruction. */
464 long isize;
465 char *codes = temp->code;
466 int j;
467
468 isize = calcsize(segment, offset, bits, instruction, codes);
469 if (isize < 0)
470 return -1;
471 for (j = 0; j < instruction->nprefix; j++) {
472 if ((instruction->prefixes[j] != P_A16 &&
473 instruction->prefixes[j] != P_O16 && bits==16) ||
474 (instruction->prefixes[j] != P_A32 &&
475 instruction->prefixes[j] != P_O32 && bits==32))
476 isize++;
477 }
478 return isize * instruction->times;
479 }
480 temp++;
481 }
482 return -1; /* didn't match any instruction */
483 }
484
485 static long calcsize (long segment, long offset, int bits,
486 insn *ins, char *codes) {
487 long length = 0;
488 unsigned char c;
489
490 while (*codes) switch (c = *codes++) {
491 case 01: case 02: case 03:
492 codes += c, length += c; break;
493 case 04: case 05: case 06: case 07:
494 length++; break;
495 case 010: case 011: case 012:
496 codes++, length++; break;
497 case 017:
498 length++; break;
499 case 014: case 015: case 016:
500 length++; break;
501 case 020: case 021: case 022:
502 length++; break;
503 case 024: case 025: case 026:
504 length++; break;
505 case 030: case 031: case 032:
506 length += 2; break;
507 case 034: case 035: case 036:
508 length += ((ins->oprs[c-034].addr_size ?
509 ins->oprs[c-034].addr_size : bits) == 16 ? 2 : 4); break;
510 case 037:
511 length += 2; break;
512 case 040: case 041: case 042:
513 length += 4; break;
514 case 050: case 051: case 052:
515 length++; break;
516 case 060: case 061: case 062:
517 length += 2; break;
518 case 064: case 065: case 066:
519 length += ((ins->oprs[c-064].addr_size ?
520 ins->oprs[c-064].addr_size : bits) == 16 ? 2 : 4); break;
521 case 070: case 071: case 072:
522 length += 4; break;
523 case 0300: case 0301: case 0302:
524 length += chsize (&ins->oprs[c-0300], bits);
525 break;
526 case 0310:
527 length += (bits==32);
528 break;
529 case 0311:
530 length += (bits==16);
531 break;
532 case 0312:
533 break;
534 case 0320:
535 length += (bits==32);
536 break;
537 case 0321:
538 length += (bits==16);
539 break;
540 case 0322:
541 break;
542 case 0330:
543 codes++, length++; break;
544 case 0340: case 0341: case 0342:
545 if (ins->oprs[0].segment != NO_SEG)
546 errfunc (ERR_NONFATAL, "attempt to reserve non-constant"
547 " quantity of BSS space");
548 else
549 length += ins->oprs[0].offset << (c-0340);
550 break;
551 default: /* can't do it by 'case' statements */
552 if (c>=0100 && c<=0277) { /* it's an EA */
553 ea ea_data;
554 if (!process_ea (&ins->oprs[(c>>3)&7], &ea_data, bits, 0,
555 ins->forw_ref)) {
556 errfunc (ERR_NONFATAL, "invalid effective address");
557 return -1;
558 } else
559 length += ea_data.size;
560 } else
561 errfunc (ERR_PANIC, "internal instruction table corrupt"
562 ": instruction code 0x%02X given", c);
563 }
564 return length;
565 }
566
567 static void gencode (long segment, long offset, int bits,
568 insn *ins, char *codes, long insn_end) {
569 static char condval[] = { /* conditional opcodes */
570 0x7, 0x3, 0x2, 0x6, 0x2, 0x4, 0xF, 0xD, 0xC, 0xE, 0x6, 0x2,
571 0x3, 0x7, 0x3, 0x5, 0xE, 0xC, 0xD, 0xF, 0x1, 0xB, 0x9, 0x5,
572 0x0, 0xA, 0xA, 0xB, 0x8, 0x4
573 };
574 unsigned char c, bytes[4];
575 long data, size;
576
577 while (*codes) switch (c = *codes++) {
578 case 01: case 02: case 03:
579 out (offset, segment, codes, OUT_RAWDATA+c, NO_SEG, NO_SEG);
580 codes += c;
581 offset += c;
582 break;
583 case 04: case 06:
584 switch (ins->oprs[0].basereg) {
585 case R_CS: bytes[0] = 0x0E + (c == 0x04 ? 1 : 0); break;
586 case R_DS: bytes[0] = 0x1E + (c == 0x04 ? 1 : 0); break;
587 case R_ES: bytes[0] = 0x06 + (c == 0x04 ? 1 : 0); break;
588 case R_SS: bytes[0] = 0x16 + (c == 0x04 ? 1 : 0); break;
589 default:
590 errfunc (ERR_PANIC, "bizarre 8086 segment register received");
591 }
592 out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
593 offset++;
594 break;
595 case 05: case 07:
596 switch (ins->oprs[0].basereg) {
597 case R_FS: bytes[0] = 0xA0 + (c == 0x05 ? 1 : 0); break;
598 case R_GS: bytes[0] = 0xA8 + (c == 0x05 ? 1 : 0); break;
599 default:
600 errfunc (ERR_PANIC, "bizarre 386 segment register received");
601 }
602 out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
603 offset++;
604 break;
605 case 010: case 011: case 012:
606 bytes[0] = *codes++ + regval(&ins->oprs[c-010]);
607 out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
608 offset += 1;
609 break;
610 case 017:
611 bytes[0] = 0;
612 out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
613 offset += 1;
614 break;
615 case 014: case 015: case 016:
616 if (ins->oprs[c-014].offset < -128 || ins->oprs[c-014].offset > 127)
617 errfunc (ERR_WARNING, "signed byte value exceeds bounds");
618 if (ins->oprs[c-014].segment != NO_SEG) {
619 data = ins->oprs[c-014].offset;
620 out (offset, segment, &data, OUT_ADDRESS+1,
621 ins->oprs[c-014].segment, ins->oprs[c-014].wrt);
622 } else {
623 bytes[0] = ins->oprs[c-014].offset;
624 out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
625 }
626 offset += 1;
627 break;
628 case 020: case 021: case 022:
629 if (ins->oprs[c-020].offset < -256 || ins->oprs[c-020].offset > 255)
630 errfunc (ERR_WARNING, "byte value exceeds bounds");
631 if (ins->oprs[c-020].segment != NO_SEG) {
632 data = ins->oprs[c-020].offset;
633 out (offset, segment, &data, OUT_ADDRESS+1,
634 ins->oprs[c-020].segment, ins->oprs[c-020].wrt);
635 } else {
636 bytes[0] = ins->oprs[c-020].offset;
637 out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
638 }
639 offset += 1;
640 break;
641 case 024: case 025: case 026:
642 if (ins->oprs[c-024].offset < 0 || ins->oprs[c-024].offset > 255)
643 errfunc (ERR_WARNING, "unsigned byte value exceeds bounds");
644 if (ins->oprs[c-024].segment != NO_SEG) {
645 data = ins->oprs[c-024].offset;
646 out (offset, segment, &data, OUT_ADDRESS+1,
647 ins->oprs[c-024].segment, ins->oprs[c-024].wrt);
648 } else {
649 bytes[0] = ins->oprs[c-024].offset;
650 out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
651 }
652 offset += 1;
653 break;
654 case 030: case 031: case 032:
655 if (ins->oprs[c-030].segment == NO_SEG &&
656 ins->oprs[c-030].wrt == NO_SEG &&
657 (ins->oprs[c-030].offset < -65536L ||
658 ins->oprs[c-030].offset > 65535L))
659 errfunc (ERR_WARNING, "word value exceeds bounds");
660 data = ins->oprs[c-030].offset;
661 out (offset, segment, &data, OUT_ADDRESS+2,
662 ins->oprs[c-030].segment, ins->oprs[c-030].wrt);
663 offset += 2;
664 break;
665 case 034: case 035: case 036:
666 data = ins->oprs[c-034].offset;
667 size = ((ins->oprs[c-034].addr_size ?
668 ins->oprs[c-034].addr_size : bits) == 16 ? 2 : 4);
669 if (size==16 && (data < -65536L || data > 65535L))
670 errfunc (ERR_WARNING, "word value exceeds bounds");
671 out (offset, segment, &data, OUT_ADDRESS+size,
672 ins->oprs[c-034].segment, ins->oprs[c-034].wrt);
673 offset += size;
674 break;
675 case 037:
676 if (ins->oprs[0].segment == NO_SEG)
677 errfunc (ERR_NONFATAL, "value referenced by FAR is not"
678 " relocatable");
679 data = 0L;
680 out (offset, segment, &data, OUT_ADDRESS+2,
681 outfmt->segbase(1+ins->oprs[0].segment),
682 ins->oprs[0].wrt);
683 offset += 2;
684 break;
685 case 040: case 041: case 042:
686 data = ins->oprs[c-040].offset;
687 out (offset, segment, &data, OUT_ADDRESS+4,
688 ins->oprs[c-040].segment, ins->oprs[c-040].wrt);
689 offset += 4;
690 break;
691 case 050: case 051: case 052:
692 if (ins->oprs[c-050].segment != segment)
693 errfunc (ERR_NONFATAL, "short relative jump outside segment");
694 data = ins->oprs[c-050].offset - insn_end;
695 if (data > 127 || data < -128)
696 errfunc (ERR_NONFATAL, "short jump is out of range");
697 bytes[0] = data;
698 out (offset, segment, bytes, OUT_RAWDATA+1, NO_SEG, NO_SEG);
699 offset += 1;
700 break;
701 case 060: case 061: case 062:
702 if (ins->oprs[c-060].segment != segment) {
703 data = ins->oprs[c-060].offset;
704 out (offset, segment, &data, OUT_REL2ADR+insn_end-offset,
705 ins->oprs[c-060].segment, ins->oprs[c-060].wrt);
706 } else {
707 data = ins->oprs[c-060].offset - insn_end;
708 out (offset, segment, &data,
709 OUT_ADDRESS+2, NO_SEG, NO_SEG);
710 }
711 offset += 2;
712 break;
713 case 064: case 065: case 066:
714 size = ((ins->oprs[c-064].addr_size ?
715 ins->oprs[c-064].addr_size : bits) == 16 ? 2 : 4);
716 if (ins->oprs[c-064].segment != segment) {
717 data = ins->oprs[c-064].offset;
718 size = (bits == 16 ? OUT_REL2ADR : OUT_REL4ADR);
719 out (offset, segment, &data, size+insn_end-offset,
720 ins->oprs[c-064].segment, ins->oprs[c-064].wrt);
721 size = (bits == 16 ? 2 : 4);
722 } else {
723 data = ins->oprs[c-064].offset - insn_end;
724 out (offset, segment, &data,
725 OUT_ADDRESS+size, NO_SEG, NO_SEG);
726 }
727 offset += size;
728 break;
729 case 070: case 071: case 072:
730 if (ins->oprs[c-070].segment != segment) {
731 data = ins->oprs[c-070].offset;
732 out (offset, segment, &data, OUT_REL4ADR+insn_end-offset,
733 ins->oprs[c-070].segment, ins->oprs[c-070].wrt);
734 } else {
735 data = ins->oprs[c-070].offset - insn_end;
736 out (offset, segment, &data,
737 OUT_ADDRESS+4, NO_SEG, NO_SEG);
738 }
739 offset += 4;
740 break;
741 case 0300: case 0301: case 0302:
742 if (chsize (&ins->oprs[c-0300], bits)) {
743 *bytes = 0x67;
744 out (offset, segment, bytes,
745 OUT_RAWDATA+1, NO_SEG, NO_SEG);
746 offset += 1;
747 } else
748 offset += 0;
749 break;
750 case 0310:
751 if (bits==32) {
752 *bytes = 0x67;
753 out (offset, segment, bytes,
754 OUT_RAWDATA+1, NO_SEG, NO_SEG);
755 offset += 1;
756 } else
757 offset += 0;
758 break;
759 case 0311:
760 if (bits==16) {
761 *bytes = 0x67;
762 out (offset, segment, bytes,
763 OUT_RAWDATA+1, NO_SEG, NO_SEG);
764 offset += 1;
765 } else
766 offset += 0;
767 break;
768 case 0312:
769 break;
770 case 0320:
771 if (bits==32) {
772 *bytes = 0x66;
773 out (offset, segment, bytes,
774 OUT_RAWDATA+1, NO_SEG, NO_SEG);
775 offset += 1;
776 } else
777 offset += 0;
778 break;
779 case 0321:
780 if (bits==16) {
781 *bytes = 0x66;
782 out (offset, segment, bytes,
783 OUT_RAWDATA+1, NO_SEG, NO_SEG);
784 offset += 1;
785 } else
786 offset += 0;
787 break;
788 case 0322:
789 break;
790 case 0330:
791 *bytes = *codes++ + condval[ins->condition];
792 out (offset, segment, bytes,
793 OUT_RAWDATA+1, NO_SEG, NO_SEG);
794 offset += 1;
795 break;
796 case 0340: case 0341: case 0342:
797 if (ins->oprs[0].segment != NO_SEG)
798 errfunc (ERR_PANIC, "non-constant BSS size in pass two");
799 else {
800 long size = ins->oprs[0].offset << (c-0340);
801 if (size > 0)
802 out (offset, segment, NULL,
803 OUT_RESERVE+size, NO_SEG, NO_SEG);
804 offset += size;
805 }
806 break;
807 default: /* can't do it by 'case' statements */
808 if (c>=0100 && c<=0277) { /* it's an EA */
809 ea ea_data;
810 int rfield;
811 unsigned char *p;
812 long s;
813
814 if (c<=0177) /* pick rfield from operand b */
815 rfield = regval (&ins->oprs[c&7]);
816 else /* rfield is constant */
817 rfield = c & 7;
818 if (!process_ea (&ins->oprs[(c>>3)&7], &ea_data, bits, rfield,
819 ins->forw_ref))
820 errfunc (ERR_NONFATAL, "invalid effective address");
821
822 p = bytes;
823 *p++ = ea_data.modrm;
824 if (ea_data.sib_present)
825 *p++ = ea_data.sib;
826 /*
827 * the cast in the next line is to placate MS C...
828 */
829 out (offset, segment, bytes, OUT_RAWDATA+(long)(p-bytes),
830 NO_SEG, NO_SEG);
831 s = p-bytes;
832
833 switch (ea_data.bytes) {
834 case 0:
835 break;
836 case 1:
837 if (ins->oprs[(c>>3)&7].segment != NO_SEG) {
838 data = ins->oprs[(c>>3)&7].offset;
839 out (offset, segment, &data, OUT_ADDRESS+1,
840 ins->oprs[(c>>3)&7].segment,
841 ins->oprs[(c>>3)&7].wrt);
842 } else {
843 *bytes = ins->oprs[(c>>3)&7].offset;
844 out (offset, segment, bytes, OUT_RAWDATA+1,
845 NO_SEG, NO_SEG);
846 }
847 s++;
848 break;
849 case 2:
850 case 4:
851 data = ins->oprs[(c>>3)&7].offset;
852 out (offset, segment, &data,
853 OUT_ADDRESS+ea_data.bytes,
854 ins->oprs[(c>>3)&7].segment, ins->oprs[(c>>3)&7].wrt);
855 s += ea_data.bytes;
856 break;
857 }
858 offset += s;
859 } else
860 errfunc (ERR_PANIC, "internal instruction table corrupt"
861 ": instruction code 0x%02X given", c);
862 }
863 }
864
865 static int regval (operand *o) {
866 switch (o->basereg) {
867 case R_EAX: case R_AX: case R_AL: case R_ES: case R_CR0: case R_DR0:
868 case R_ST0: case R_MM0:
869 return 0;
870 case R_ECX: case R_CX: case R_CL: case R_CS: case R_DR1: case R_ST1:
871 case R_MM1:
872 return 1;
873 case R_EDX: case R_DX: case R_DL: case R_SS: case R_CR2: case R_DR2:
874 case R_ST2: case R_MM2:
875 return 2;
876 case R_EBX: case R_BX: case R_BL: case R_DS: case R_CR3: case R_DR3:
877 case R_TR3: case R_ST3: case R_MM3:
878 return 3;
879 case R_ESP: case R_SP: case R_AH: case R_FS: case R_CR4: case R_TR4:
880 case R_ST4: case R_MM4:
881 return 4;
882 case R_EBP: case R_BP: case R_CH: case R_GS: case R_TR5: case R_ST5:
883 case R_MM5:
884 return 5;
885 case R_ESI: case R_SI: case R_DH: case R_DR6: case R_TR6: case R_ST6:
886 case R_MM6:
887 return 6;
888 case R_EDI: case R_DI: case R_BH: case R_DR7: case R_TR7: case R_ST7:
889 case R_MM7:
890 return 7;
891 default: /* panic */
892 errfunc (ERR_PANIC, "invalid register operand given to regval()");
893 return 0;
894 }
895 }
896
897 static int matches (struct itemplate *itemp, insn *instruction) {
898 int i, size, oprs, ret;
899
900 ret = 100;
901
902 /*
903 * Check the opcode
904 */
905 if (itemp->opcode != instruction->opcode) return 0;
906
907 /*
908 * Count the operands
909 */
910 if (itemp->operands != instruction->operands) return 0;
911
912 /*
913 * Check that no spurious colons or TOs are present
914 */
915 for (i=0; i<itemp->operands; i++)
916 if (instruction->oprs[i].type & ~itemp->opd[i] & (COLON|TO))
917 return 0;
918
919 /*
920 * Check that the operand flags all match up
921 */
922 for (i=0; i<itemp->operands; i++)
923 if (itemp->opd[i] & ~instruction->oprs[i].type ||
924 ((itemp->opd[i] & SIZE_MASK) &&
925 ((itemp->opd[i] ^ instruction->oprs[i].type) & SIZE_MASK))) {
926 if ((itemp->opd[i] & ~instruction->oprs[i].type & NON_SIZE) ||
927 (instruction->oprs[i].type & SIZE_MASK))
928 return 0;
929 else
930 ret = 1;
931 }
932
933 /*
934 * Check operand sizes
935 */
936 if (itemp->flags & IF_SB) {
937 size = BITS8;
938 oprs = itemp->operands;
939 } else if (itemp->flags & IF_SW) {
940 size = BITS16;
941 oprs = itemp->operands;
942 } else if (itemp->flags & IF_SD) {
943 size = BITS32;
944 oprs = itemp->operands;
945 } else if (itemp->flags & (IF_SM | IF_SM2)) {
946 oprs = (itemp->flags & IF_SM2 ? 2 : itemp->operands);
947 size = 0; /* placate gcc */
948 for (i=0; i<oprs; i++)
949 if ( (size = itemp->opd[i] & SIZE_MASK) != 0)
950 break;
951 } else {
952 size = 0;
953 oprs = itemp->operands;
954 }
955
956 for (i=0; i<itemp->operands; i++)
957 if (!(itemp->opd[i] & SIZE_MASK) &&
958 (instruction->oprs[i].type & SIZE_MASK & ~size))
959 ret = 2;
960
961 return ret;
962 }
963
964 static ea *process_ea (operand *input, ea *output, int addrbits, int rfield,
965 int forw_ref) {
966 if (!(REGISTER & ~input->type)) { /* it's a single register */
967 static int regs[] = {
968 R_MM0, R_EAX, R_AX, R_AL, R_MM1, R_ECX, R_CX, R_CL,
969 R_MM2, R_EDX, R_DX, R_DL, R_MM3, R_EBX, R_BX, R_BL,
970 R_MM4, R_ESP, R_SP, R_AH, R_MM5, R_EBP, R_BP, R_CH,
971 R_MM6, R_ESI, R_SI, R_DH, R_MM7, R_EDI, R_DI, R_BH
972 };
973 int i;
974
975 for (i=0; i<(int)elements(regs); i++)
976 if (input->basereg == regs[i]) break;
977 if (i<(int)elements(regs)) {
978 output->sib_present = FALSE;/* no SIB necessary */
979 output->bytes = 0; /* no offset necessary either */
980 output->modrm = 0xC0 | (rfield << 3) | (i/4);
981 } else
982 return NULL;
983 } else { /* it's a memory reference */
984 if (input->basereg==-1 && (input->indexreg==-1 || input->scale==0)) {
985 /* it's a pure offset */
986 if (input->addr_size)
987 addrbits = input->addr_size;
988 output->sib_present = FALSE;
989 output->bytes = (addrbits==32 ? 4 : 2);
990 output->modrm = (addrbits==32 ? 5 : 6) | (rfield << 3);
991 } else { /* it's an indirection */
992 int i=input->indexreg, b=input->basereg, s=input->scale;
993 long o=input->offset, seg=input->segment;
994 int hb=input->hintbase, ht=input->hinttype;
995 int t;
996
997 if (s==0) i = -1; /* make this easy, at least */
998
999 if (i==R_EAX || i==R_EBX || i==R_ECX || i==R_EDX
1000 || i==R_EBP || i==R_ESP || i==R_ESI || i==R_EDI
1001 || b==R_EAX || b==R_EBX || b==R_ECX || b==R_EDX
1002 || b==R_EBP || b==R_ESP || b==R_ESI || b==R_EDI) {
1003 /* it must be a 32-bit memory reference. Firstly we have
1004 * to check that all registers involved are type Exx. */
1005 if (i!=-1 && i!=R_EAX && i!=R_EBX && i!=R_ECX && i!=R_EDX
1006 && i!=R_EBP && i!=R_ESP && i!=R_ESI && i!=R_EDI)
1007 return NULL;
1008 if (b!=-1 && b!=R_EAX && b!=R_EBX && b!=R_ECX && b!=R_EDX
1009 && b!=R_EBP && b!=R_ESP && b!=R_ESI && b!=R_EDI)
1010 return NULL;
1011
1012 /* While we're here, ensure the user didn't specify WORD. */
1013 if (input->addr_size == 16)
1014 return NULL;
1015
1016 /* now reorganise base/index */
1017 if (s == 1 && b != i && b != -1 && i != -1 &&
1018 ((hb==b&&ht==EAH_NOTBASE) || (hb==i&&ht==EAH_MAKEBASE)))
1019 t = b, b = i, i = t; /* swap if hints say so */
1020 if (b==i) /* convert EAX+2*EAX to 3*EAX */
1021 b = -1, s++;
1022 if (b==-1 && s==1 && !(hb == i && ht == EAH_NOTBASE))
1023 b = i, i = -1; /* make single reg base, unless hint */
1024 if (((s==2 && i!=R_ESP && !(input->eaflags & EAF_TIMESTWO)) ||
1025 s==3 || s==5 || s==9) && b==-1)
1026 b = i, s--; /* convert 3*EAX to EAX+2*EAX */
1027 if (s==1 && i==R_ESP) /* swap ESP into base if scale is 1 */
1028 i = b, b = R_ESP;
1029 if (i==R_ESP || (s!=1 && s!=2 && s!=4 && s!=8 && i!=-1))
1030 return NULL; /* wrong, for various reasons */
1031
1032 if (i==-1 && b!=R_ESP) {/* no SIB needed */
1033 int mod, rm;
1034 switch(b) {
1035 case R_EAX: rm = 0; break;
1036 case R_ECX: rm = 1; break;
1037 case R_EDX: rm = 2; break;
1038 case R_EBX: rm = 3; break;
1039 case R_EBP: rm = 5; break;
1040 case R_ESI: rm = 6; break;
1041 case R_EDI: rm = 7; break;
1042 case -1: rm = 5; break;
1043 default: /* should never happen */
1044 return NULL;
1045 }
1046 if (b==-1 || (b!=R_EBP && o==0 &&
1047 seg==NO_SEG && !forw_ref &&
1048 !(input->eaflags &
1049 (EAF_BYTEOFFS|EAF_WORDOFFS))))
1050 mod = 0;
1051 else if (input->eaflags & EAF_BYTEOFFS ||
1052 (o>=-128 && o<=127 && seg==NO_SEG && !forw_ref &&
1053 !(input->eaflags & EAF_WORDOFFS))) {
1054 mod = 1;
1055 } else
1056 mod = 2;
1057
1058 output->sib_present = FALSE;
1059 output->bytes = (b==-1 || mod==2 ? 4 : mod);
1060 output->modrm = (mod<<6) | (rfield<<3) | rm;
1061 } else { /* we need a SIB */
1062 int mod, scale, index, base;
1063
1064 switch (b) {
1065 case R_EAX: base = 0; break;
1066 case R_ECX: base = 1; break;
1067 case R_EDX: base = 2; break;
1068 case R_EBX: base = 3; break;
1069 case R_ESP: base = 4; break;
1070 case R_EBP: case -1: base = 5; break;
1071 case R_ESI: base = 6; break;
1072 case R_EDI: base = 7; break;
1073 default: /* then what the smeg is it? */
1074 return NULL; /* panic */
1075 }
1076
1077 switch (i) {
1078 case R_EAX: index = 0; break;
1079 case R_ECX: index = 1; break;
1080 case R_EDX: index = 2; break;
1081 case R_EBX: index = 3; break;
1082 case -1: index = 4; break;
1083 case R_EBP: index = 5; break;
1084 case R_ESI: index = 6; break;
1085 case R_EDI: index = 7; break;
1086 default: /* then what the smeg is it? */
1087 return NULL; /* panic */
1088 }
1089
1090 if (i==-1) s = 1;
1091 switch (s) {
1092 case 1: scale = 0; break;
1093 case 2: scale = 1; break;
1094 case 4: scale = 2; break;
1095 case 8: scale = 3; break;
1096 default: /* then what the smeg is it? */
1097 return NULL; /* panic */
1098 }
1099
1100 if (b==-1 || (b!=R_EBP && o==0 &&
1101 seg==NO_SEG && !forw_ref &&
1102 !(input->eaflags &
1103 (EAF_BYTEOFFS|EAF_WORDOFFS))))
1104 mod = 0;
1105 else if (input->eaflags & EAF_BYTEOFFS ||
1106 (o>=-128 && o<=127 && seg==NO_SEG && !forw_ref &&
1107 !(input->eaflags & EAF_WORDOFFS)))
1108 mod = 1;
1109 else
1110 mod = 2;
1111
1112 output->sib_present = TRUE;
1113 output->bytes = (b==-1 || mod==2 ? 4 : mod);
1114 output->modrm = (mod<<6) | (rfield<<3) | 4;
1115 output->sib = (scale<<6) | (index<<3) | base;
1116 }
1117 } else { /* it's 16-bit */
1118 int mod, rm;
1119
1120 /* check all registers are BX, BP, SI or DI */
1121 if ((b!=-1 && b!=R_BP && b!=R_BX && b!=R_SI && b!=R_DI) ||
1122 (i!=-1 && i!=R_BP && i!=R_BX && i!=R_SI && i!=R_DI))
1123 return NULL;
1124
1125 /* ensure the user didn't specify DWORD */
1126 if (input->addr_size == 32)
1127 return NULL;
1128
1129 if (s!=1 && i!=-1) return NULL;/* no can do, in 16-bit EA */
1130 if (b==-1 && i!=-1) b ^= i, i ^= b, b ^= i; /* swap them round */
1131 if ((b==R_SI || b==R_DI) && i!=-1)
1132 b ^= i, i ^= b, b ^= i; /* have BX/BP as base, SI/DI index */
1133 if (b==i) return NULL;/* shouldn't ever happen, in theory */
1134 if (i!=-1 && b!=-1 &&
1135 (i==R_BP || i==R_BX || b==R_SI || b==R_DI))
1136 return NULL; /* invalid combinations */
1137 if (b==-1) /* pure offset: handled above */
1138 return NULL; /* so if it gets to here, panic! */
1139
1140 rm = -1;
1141 if (i!=-1)
1142 switch (i*256 + b) {
1143 case R_SI*256+R_BX: rm=0; break;
1144 case R_DI*256+R_BX: rm=1; break;
1145 case R_SI*256+R_BP: rm=2; break;
1146 case R_DI*256+R_BP: rm=3; break;
1147 }
1148 else
1149 switch (b) {
1150 case R_SI: rm=4; break;
1151 case R_DI: rm=5; break;
1152 case R_BP: rm=6; break;
1153 case R_BX: rm=7; break;
1154 }
1155 if (rm==-1) /* can't happen, in theory */
1156 return NULL; /* so panic if it does */
1157
1158 if (o==0 && seg==NO_SEG && !forw_ref && rm!=6 &&
1159 !(input->eaflags & (EAF_BYTEOFFS|EAF_WORDOFFS)))
1160 mod = 0;
1161 else if (input->eaflags & EAF_BYTEOFFS ||
1162 (o>=-128 && o<=127 && seg==NO_SEG && !forw_ref &&
1163 !(input->eaflags & EAF_WORDOFFS)))
1164 mod = 1;
1165 else
1166 mod = 2;
1167
1168 output->sib_present = FALSE; /* no SIB - it's 16-bit */
1169 output->bytes = mod; /* bytes of offset needed */
1170 output->modrm = (mod<<6) | (rfield<<3) | rm;
1171 }
1172 }
1173 }
1174 output->size = 1 + output->sib_present + output->bytes;
1175 return output;
1176 }
1177
1178 static int chsize (operand *input, int addrbits) {
1179 if (!(MEMORY & ~input->type)) {
1180 int i=input->indexreg, b=input->basereg;
1181
1182 if (input->scale==0) i = -1;
1183
1184 if (i == -1 && b == -1) /* pure offset */
1185 return (input->addr_size != 0 && input->addr_size != addrbits);
1186
1187 if (i==R_EAX || i==R_EBX || i==R_ECX || i==R_EDX
1188 || i==R_EBP || i==R_ESP || i==R_ESI || i==R_EDI
1189 || b==R_EAX || b==R_EBX || b==R_ECX || b==R_EDX
1190 || b==R_EBP || b==R_ESP || b==R_ESI || b==R_EDI)
1191 return (addrbits==16);
1192 else
1193 return (addrbits==32);
1194 } else
1195 return 0;
1196 }