]> git.saurik.com Git - apple/javascriptcore.git/blame - disassembler/udis86/udis86_decode.c
JavaScriptCore-1218.0.1.tar.gz
[apple/javascriptcore.git] / disassembler / udis86 / udis86_decode.c
CommitLineData
93a37866
A
1/* udis86 - libudis86/decode.c
2 *
3 * Copyright (c) 2002-2009 Vivek Thampi
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without modification,
7 * are permitted provided that the following conditions are met:
8 *
9 * * Redistributions of source code must retain the above copyright notice,
10 * this list of conditions and the following disclaimer.
11 * * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
22 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include "config.h"
28
29#if USE(UDIS86)
30
31#include "udis86_extern.h"
32#include "udis86_types.h"
33#include "udis86_input.h"
34#include "udis86_decode.h"
35#include <wtf/Assertions.h>
36
37#define dbg(x, n...)
38/* #define dbg printf */
39
40#ifndef __UD_STANDALONE__
41# include <string.h>
42#endif /* __UD_STANDALONE__ */
43
44/* The max number of prefixes to an instruction */
45#define MAX_PREFIXES 15
46
47/* instruction aliases and special cases */
48static struct ud_itab_entry s_ie__invalid =
49 { UD_Iinvalid, O_NONE, O_NONE, O_NONE, P_none };
50
51static int
52decode_ext(struct ud *u, uint16_t ptr);
53
54
55static inline int
56eff_opr_mode(int dis_mode, int rex_w, int pfx_opr)
57{
58 if (dis_mode == 64) {
59 return rex_w ? 64 : (pfx_opr ? 16 : 32);
60 } else if (dis_mode == 32) {
61 return pfx_opr ? 16 : 32;
62 } else {
63 ASSERT(dis_mode == 16);
64 return pfx_opr ? 32 : 16;
65 }
66}
67
68
69static inline int
70eff_adr_mode(int dis_mode, int pfx_adr)
71{
72 if (dis_mode == 64) {
73 return pfx_adr ? 32 : 64;
74 } else if (dis_mode == 32) {
75 return pfx_adr ? 16 : 32;
76 } else {
77 ASSERT(dis_mode == 16);
78 return pfx_adr ? 32 : 16;
79 }
80}
81
82
83/* Looks up mnemonic code in the mnemonic string table
84 * Returns NULL if the mnemonic code is invalid
85 */
86const char * ud_lookup_mnemonic( enum ud_mnemonic_code c )
87{
88 return ud_mnemonics_str[ c ];
89}
90
91
92/*
93 * decode_prefixes
94 *
95 * Extracts instruction prefixes.
96 */
97static int
98decode_prefixes(struct ud *u)
99{
100 unsigned int have_pfx = 1;
101 unsigned int i;
102 uint8_t curr;
103
104 /* if in error state, bail out */
105 if ( u->error )
106 return -1;
107
108 /* keep going as long as there are prefixes available */
109 for ( i = 0; have_pfx ; ++i ) {
110
111 /* Get next byte. */
112 ud_inp_next(u);
113 if ( u->error )
114 return -1;
115 curr = ud_inp_curr( u );
116
117 /* rex prefixes in 64bit mode */
118 if ( u->dis_mode == 64 && ( curr & 0xF0 ) == 0x40 ) {
119 u->pfx_rex = curr;
120 } else {
121 switch ( curr )
122 {
123 case 0x2E :
124 u->pfx_seg = UD_R_CS;
125 u->pfx_rex = 0;
126 break;
127 case 0x36 :
128 u->pfx_seg = UD_R_SS;
129 u->pfx_rex = 0;
130 break;
131 case 0x3E :
132 u->pfx_seg = UD_R_DS;
133 u->pfx_rex = 0;
134 break;
135 case 0x26 :
136 u->pfx_seg = UD_R_ES;
137 u->pfx_rex = 0;
138 break;
139 case 0x64 :
140 u->pfx_seg = UD_R_FS;
141 u->pfx_rex = 0;
142 break;
143 case 0x65 :
144 u->pfx_seg = UD_R_GS;
145 u->pfx_rex = 0;
146 break;
147 case 0x67 : /* adress-size override prefix */
148 u->pfx_adr = 0x67;
149 u->pfx_rex = 0;
150 break;
151 case 0xF0 :
152 u->pfx_lock = 0xF0;
153 u->pfx_rex = 0;
154 break;
155 case 0x66:
156 /* the 0x66 sse prefix is only effective if no other sse prefix
157 * has already been specified.
158 */
159 if ( !u->pfx_insn ) u->pfx_insn = 0x66;
160 u->pfx_opr = 0x66;
161 u->pfx_rex = 0;
162 break;
163 case 0xF2:
164 u->pfx_insn = 0xF2;
165 u->pfx_repne = 0xF2;
166 u->pfx_rex = 0;
167 break;
168 case 0xF3:
169 u->pfx_insn = 0xF3;
170 u->pfx_rep = 0xF3;
171 u->pfx_repe = 0xF3;
172 u->pfx_rex = 0;
173 break;
174 default :
175 /* No more prefixes */
176 have_pfx = 0;
177 break;
178 }
179 }
180
181 /* check if we reached max instruction length */
182 if ( i + 1 == MAX_INSN_LENGTH ) {
183 u->error = 1;
184 break;
185 }
186 }
187
188 /* return status */
189 if ( u->error )
190 return -1;
191
192 /* rewind back one byte in stream, since the above loop
193 * stops with a non-prefix byte.
194 */
195 ud_inp_back(u);
196 return 0;
197}
198
199
200static inline unsigned int modrm( struct ud * u )
201{
202 if ( !u->have_modrm ) {
203 u->modrm = ud_inp_next( u );
204 u->have_modrm = 1;
205 }
206 return u->modrm;
207}
208
209
210static unsigned int resolve_operand_size( const struct ud * u, unsigned int s )
211{
212 switch ( s )
213 {
214 case SZ_V:
215 return ( u->opr_mode );
216 case SZ_Z:
217 return ( u->opr_mode == 16 ) ? 16 : 32;
218 case SZ_P:
219 return ( u->opr_mode == 16 ) ? SZ_WP : SZ_DP;
220 case SZ_MDQ:
221 return ( u->opr_mode == 16 ) ? 32 : u->opr_mode;
222 case SZ_RDQ:
223 return ( u->dis_mode == 64 ) ? 64 : 32;
224 default:
225 return s;
226 }
227}
228
229
230static int resolve_mnemonic( struct ud* u )
231{
232 /* far/near flags */
233 u->br_far = 0;
234 u->br_near = 0;
235 /* readjust operand sizes for call/jmp instrcutions */
236 if ( u->mnemonic == UD_Icall || u->mnemonic == UD_Ijmp ) {
237 /* WP: 16:16 pointer */
238 if ( u->operand[ 0 ].size == SZ_WP ) {
239 u->operand[ 0 ].size = 16;
240 u->br_far = 1;
241 u->br_near= 0;
242 /* DP: 32:32 pointer */
243 } else if ( u->operand[ 0 ].size == SZ_DP ) {
244 u->operand[ 0 ].size = 32;
245 u->br_far = 1;
246 u->br_near= 0;
247 } else {
248 u->br_far = 0;
249 u->br_near= 1;
250 }
251 /* resolve 3dnow weirdness. */
252 } else if ( u->mnemonic == UD_I3dnow ) {
253 u->mnemonic = ud_itab[ u->le->table[ ud_inp_curr( u ) ] ].mnemonic;
254 }
255 /* SWAPGS is only valid in 64bits mode */
256 if ( u->mnemonic == UD_Iswapgs && u->dis_mode != 64 ) {
257 u->error = 1;
258 return -1;
259 }
260
261 if (u->mnemonic == UD_Ixchg) {
262 if ((u->operand[0].type == UD_OP_REG && u->operand[0].base == UD_R_AX &&
263 u->operand[1].type == UD_OP_REG && u->operand[1].base == UD_R_AX) ||
264 (u->operand[0].type == UD_OP_REG && u->operand[0].base == UD_R_EAX &&
265 u->operand[1].type == UD_OP_REG && u->operand[1].base == UD_R_EAX)) {
266 u->operand[0].type = UD_NONE;
267 u->operand[1].type = UD_NONE;
268 u->mnemonic = UD_Inop;
269 }
270 }
271
272 if (u->mnemonic == UD_Inop && u->pfx_rep) {
273 u->pfx_rep = 0;
274 u->mnemonic = UD_Ipause;
275 }
276 return 0;
277}
278
279
280/* -----------------------------------------------------------------------------
281 * decode_a()- Decodes operands of the type seg:offset
282 * -----------------------------------------------------------------------------
283 */
284static void
285decode_a(struct ud* u, struct ud_operand *op)
286{
287 if (u->opr_mode == 16) {
288 /* seg16:off16 */
289 op->type = UD_OP_PTR;
290 op->size = 32;
291 op->lval.ptr.off = ud_inp_uint16(u);
292 op->lval.ptr.seg = ud_inp_uint16(u);
293 } else {
294 /* seg16:off32 */
295 op->type = UD_OP_PTR;
296 op->size = 48;
297 op->lval.ptr.off = ud_inp_uint32(u);
298 op->lval.ptr.seg = ud_inp_uint16(u);
299 }
300}
301
302/* -----------------------------------------------------------------------------
303 * decode_gpr() - Returns decoded General Purpose Register
304 * -----------------------------------------------------------------------------
305 */
306static enum ud_type
307decode_gpr(register struct ud* u, unsigned int s, unsigned char rm)
308{
309 s = resolve_operand_size(u, s);
310
311 switch (s) {
312 case 64:
313 return UD_R_RAX + rm;
314 case SZ_DP:
315 case 32:
316 return UD_R_EAX + rm;
317 case SZ_WP:
318 case 16:
319 return UD_R_AX + rm;
320 case 8:
321 if (u->dis_mode == 64 && u->pfx_rex) {
322 if (rm >= 4)
323 return UD_R_SPL + (rm-4);
324 return UD_R_AL + rm;
325 } else return UD_R_AL + rm;
326 default:
327 return 0;
328 }
329}
330
331/* -----------------------------------------------------------------------------
332 * resolve_gpr64() - 64bit General Purpose Register-Selection.
333 * -----------------------------------------------------------------------------
334 */
335static enum ud_type
336resolve_gpr64(struct ud* u, enum ud_operand_code gpr_op, enum ud_operand_size * size)
337{
338 if (gpr_op >= OP_rAXr8 && gpr_op <= OP_rDIr15)
339 gpr_op = (gpr_op - OP_rAXr8) | (REX_B(u->pfx_rex) << 3);
340 else gpr_op = (gpr_op - OP_rAX);
341
342 if (u->opr_mode == 16) {
343 *size = 16;
344 return gpr_op + UD_R_AX;
345 }
346 if (u->dis_mode == 32 ||
347 (u->opr_mode == 32 && ! (REX_W(u->pfx_rex) || u->default64))) {
348 *size = 32;
349 return gpr_op + UD_R_EAX;
350 }
351
352 *size = 64;
353 return gpr_op + UD_R_RAX;
354}
355
356/* -----------------------------------------------------------------------------
357 * resolve_gpr32 () - 32bit General Purpose Register-Selection.
358 * -----------------------------------------------------------------------------
359 */
360static enum ud_type
361resolve_gpr32(struct ud* u, enum ud_operand_code gpr_op)
362{
363 gpr_op = gpr_op - OP_eAX;
364
365 if (u->opr_mode == 16)
366 return gpr_op + UD_R_AX;
367
368 return gpr_op + UD_R_EAX;
369}
370
371/* -----------------------------------------------------------------------------
372 * resolve_reg() - Resolves the register type
373 * -----------------------------------------------------------------------------
374 */
375static enum ud_type
376resolve_reg(struct ud* u, unsigned int type, unsigned char i)
377{
378 switch (type) {
379 case T_MMX : return UD_R_MM0 + (i & 7);
380 case T_XMM : return UD_R_XMM0 + i;
381 case T_CRG : return UD_R_CR0 + i;
382 case T_DBG : return UD_R_DR0 + i;
383 case T_SEG : {
384 /*
385 * Only 6 segment registers, anything else is an error.
386 */
387 if ((i & 7) > 5) {
388 u->error = 1;
389 } else {
390 return UD_R_ES + (i & 7);
391 }
392 }
393 case T_NONE:
394 default: return UD_NONE;
395 }
396}
397
398/* -----------------------------------------------------------------------------
399 * decode_imm() - Decodes Immediate values.
400 * -----------------------------------------------------------------------------
401 */
402static void
403decode_imm(struct ud* u, unsigned int s, struct ud_operand *op)
404{
405 op->size = resolve_operand_size(u, s);
406 op->type = UD_OP_IMM;
407
408 switch (op->size) {
409 case 8: op->lval.sbyte = ud_inp_uint8(u); break;
410 case 16: op->lval.uword = ud_inp_uint16(u); break;
411 case 32: op->lval.udword = ud_inp_uint32(u); break;
412 case 64: op->lval.uqword = ud_inp_uint64(u); break;
413 default: return;
414 }
415}
416
417
418/*
419 * decode_modrm_reg
420 *
421 * Decodes reg field of mod/rm byte
422 *
423 */
424static void
425decode_modrm_reg(struct ud *u,
426 struct ud_operand *operand,
427 unsigned int type,
428 unsigned int size)
429{
430 uint8_t reg = (REX_R(u->pfx_rex) << 3) | MODRM_REG(modrm(u));
431 operand->type = UD_OP_REG;
432 operand->size = resolve_operand_size(u, size);
433
434 if (type == T_GPR) {
435 operand->base = decode_gpr(u, operand->size, reg);
436 } else {
437 operand->base = resolve_reg(u, type, reg);
438 }
439}
440
441
442/*
443 * decode_modrm_rm
444 *
445 * Decodes rm field of mod/rm byte
446 *
447 */
448static void
449decode_modrm_rm(struct ud *u,
450 struct ud_operand *op,
451 unsigned char type,
452 unsigned int size)
453
454{
455 unsigned char mod, rm, reg;
456
457 /* get mod, r/m and reg fields */
458 mod = MODRM_MOD(modrm(u));
459 rm = (REX_B(u->pfx_rex) << 3) | MODRM_RM(modrm(u));
460 reg = (REX_R(u->pfx_rex) << 3) | MODRM_REG(modrm(u));
461
462 op->size = resolve_operand_size(u, size);
463
464 /*
465 * If mod is 11b, then the modrm.rm specifies a register.
466 *
467 */
468 if (mod == 3) {
469 op->type = UD_OP_REG;
470 if (type == T_GPR) {
471 op->base = decode_gpr(u, op->size, rm);
472 } else {
473 op->base = resolve_reg(u, type, (REX_B(u->pfx_rex) << 3) | (rm & 7));
474 }
475 return;
476 }
477
478
479 /*
480 * !11 => Memory Address
481 */
482 op->type = UD_OP_MEM;
483
484 if (u->adr_mode == 64) {
485 op->base = UD_R_RAX + rm;
486 if (mod == 1) {
487 op->offset = 8;
488 } else if (mod == 2) {
489 op->offset = 32;
490 } else if (mod == 0 && (rm & 7) == 5) {
491 op->base = UD_R_RIP;
492 op->offset = 32;
493 } else {
494 op->offset = 0;
495 }
496 /*
497 * Scale-Index-Base (SIB)
498 */
499 if ((rm & 7) == 4) {
500 ud_inp_next(u);
501
502 op->scale = (1 << SIB_S(ud_inp_curr(u))) & ~1;
503 op->index = UD_R_RAX + (SIB_I(ud_inp_curr(u)) | (REX_X(u->pfx_rex) << 3));
504 op->base = UD_R_RAX + (SIB_B(ud_inp_curr(u)) | (REX_B(u->pfx_rex) << 3));
505
506 /* special conditions for base reference */
507 if (op->index == UD_R_RSP) {
508 op->index = UD_NONE;
509 op->scale = UD_NONE;
510 }
511
512 if (op->base == UD_R_RBP || op->base == UD_R_R13) {
513 if (mod == 0) {
514 op->base = UD_NONE;
515 }
516 if (mod == 1) {
517 op->offset = 8;
518 } else {
519 op->offset = 32;
520 }
521 }
522 }
523 } else if (u->adr_mode == 32) {
524 op->base = UD_R_EAX + rm;
525 if (mod == 1) {
526 op->offset = 8;
527 } else if (mod == 2) {
528 op->offset = 32;
529 } else if (mod == 0 && rm == 5) {
530 op->base = UD_NONE;
531 op->offset = 32;
532 } else {
533 op->offset = 0;
534 }
535
536 /* Scale-Index-Base (SIB) */
537 if ((rm & 7) == 4) {
538 ud_inp_next(u);
539
540 op->scale = (1 << SIB_S(ud_inp_curr(u))) & ~1;
541 op->index = UD_R_EAX + (SIB_I(ud_inp_curr(u)) | (REX_X(u->pfx_rex) << 3));
542 op->base = UD_R_EAX + (SIB_B(ud_inp_curr(u)) | (REX_B(u->pfx_rex) << 3));
543
544 if (op->index == UD_R_ESP) {
545 op->index = UD_NONE;
546 op->scale = UD_NONE;
547 }
548
549 /* special condition for base reference */
550 if (op->base == UD_R_EBP) {
551 if (mod == 0) {
552 op->base = UD_NONE;
553 }
554 if (mod == 1) {
555 op->offset = 8;
556 } else {
557 op->offset = 32;
558 }
559 }
560 }
561 } else {
562 const unsigned int bases[] = { UD_R_BX, UD_R_BX, UD_R_BP, UD_R_BP,
563 UD_R_SI, UD_R_DI, UD_R_BP, UD_R_BX };
564 const unsigned int indices[] = { UD_R_SI, UD_R_DI, UD_R_SI, UD_R_DI,
565 UD_NONE, UD_NONE, UD_NONE, UD_NONE };
566 op->base = bases[rm & 7];
567 op->index = indices[rm & 7];
568 if (mod == 0 && rm == 6) {
569 op->offset= 16;
570 op->base = UD_NONE;
571 } else if (mod == 1) {
572 op->offset = 8;
573 } else if (mod == 2) {
574 op->offset = 16;
575 }
576 }
577
578 /*
579 * extract offset, if any
580 */
581 switch (op->offset) {
582 case 8 : op->lval.ubyte = ud_inp_uint8(u); break;
583 case 16: op->lval.uword = ud_inp_uint16(u); break;
584 case 32: op->lval.udword = ud_inp_uint32(u); break;
585 case 64: op->lval.uqword = ud_inp_uint64(u); break;
586 default: break;
587 }
588}
589
590/* -----------------------------------------------------------------------------
591 * decode_o() - Decodes offset
592 * -----------------------------------------------------------------------------
593 */
594static void
595decode_o(struct ud* u, unsigned int s, struct ud_operand *op)
596{
597 switch (u->adr_mode) {
598 case 64:
599 op->offset = 64;
600 op->lval.uqword = ud_inp_uint64(u);
601 break;
602 case 32:
603 op->offset = 32;
604 op->lval.udword = ud_inp_uint32(u);
605 break;
606 case 16:
607 op->offset = 16;
608 op->lval.uword = ud_inp_uint16(u);
609 break;
610 default:
611 return;
612 }
613 op->type = UD_OP_MEM;
614 op->size = resolve_operand_size(u, s);
615}
616
617/* -----------------------------------------------------------------------------
618 * decode_operands() - Disassembles Operands.
619 * -----------------------------------------------------------------------------
620 */
621static int
622decode_operand(struct ud *u,
623 struct ud_operand *operand,
624 enum ud_operand_code type,
625 unsigned int size)
626{
627 switch (type) {
628 case OP_A :
629 decode_a(u, operand);
630 break;
631 case OP_MR:
632 if (MODRM_MOD(modrm(u)) == 3) {
633 decode_modrm_rm(u, operand, T_GPR,
634 size == SZ_DY ? SZ_MDQ : SZ_V);
635 } else if (size == SZ_WV) {
636 decode_modrm_rm( u, operand, T_GPR, SZ_W);
637 } else if (size == SZ_BV) {
638 decode_modrm_rm( u, operand, T_GPR, SZ_B);
639 } else if (size == SZ_DY) {
640 decode_modrm_rm( u, operand, T_GPR, SZ_D);
641 } else {
642 ASSERT(!"unexpected size");
643 }
644 break;
645 case OP_M:
646 if (MODRM_MOD(modrm(u)) == 3) {
647 u->error = 1;
648 }
649 /* intended fall through */
650 case OP_E:
651 decode_modrm_rm(u, operand, T_GPR, size);
652 break;
653 break;
654 case OP_G:
655 decode_modrm_reg(u, operand, T_GPR, size);
656 break;
657 case OP_I:
658 decode_imm(u, size, operand);
659 break;
660 case OP_I1:
661 operand->type = UD_OP_CONST;
662 operand->lval.udword = 1;
663 break;
664 case OP_PR:
665 if (MODRM_MOD(modrm(u)) != 3) {
666 u->error = 1;
667 }
668 decode_modrm_rm(u, operand, T_MMX, size);
669 break;
670 case OP_P:
671 decode_modrm_reg(u, operand, T_MMX, size);
672 break;
673 case OP_VR:
674 if (MODRM_MOD(modrm(u)) != 3) {
675 u->error = 1;
676 }
677 /* intended fall through */
678 case OP_W:
679 decode_modrm_rm(u, operand, T_XMM, size);
680 break;
681 case OP_V:
682 decode_modrm_reg(u, operand, T_XMM, size);
683 break;
684 case OP_S:
685 decode_modrm_reg(u, operand, T_SEG, size);
686 break;
687 case OP_AL:
688 case OP_CL:
689 case OP_DL:
690 case OP_BL:
691 case OP_AH:
692 case OP_CH:
693 case OP_DH:
694 case OP_BH:
695 operand->type = UD_OP_REG;
696 operand->base = UD_R_AL + (type - OP_AL);
697 operand->size = 8;
698 break;
699 case OP_DX:
700 operand->type = UD_OP_REG;
701 operand->base = UD_R_DX;
702 operand->size = 16;
703 break;
704 case OP_O:
705 decode_o(u, size, operand);
706 break;
707 case OP_rAXr8:
708 case OP_rCXr9:
709 case OP_rDXr10:
710 case OP_rBXr11:
711 case OP_rSPr12:
712 case OP_rBPr13:
713 case OP_rSIr14:
714 case OP_rDIr15:
715 case OP_rAX:
716 case OP_rCX:
717 case OP_rDX:
718 case OP_rBX:
719 case OP_rSP:
720 case OP_rBP:
721 case OP_rSI:
722 case OP_rDI:
723 operand->type = UD_OP_REG;
724 operand->base = resolve_gpr64(u, type, &operand->size);
725 break;
726 case OP_ALr8b:
727 case OP_CLr9b:
728 case OP_DLr10b:
729 case OP_BLr11b:
730 case OP_AHr12b:
731 case OP_CHr13b:
732 case OP_DHr14b:
733 case OP_BHr15b: {
734 ud_type_t gpr = (type - OP_ALr8b) + UD_R_AL
735 + (REX_B(u->pfx_rex) << 3);
736 if (UD_R_AH <= gpr && u->pfx_rex) {
737 gpr = gpr + 4;
738 }
739 operand->type = UD_OP_REG;
740 operand->base = gpr;
741 break;
742 }
743 case OP_eAX:
744 case OP_eCX:
745 case OP_eDX:
746 case OP_eBX:
747 case OP_eSP:
748 case OP_eBP:
749 case OP_eSI:
750 case OP_eDI:
751 operand->type = UD_OP_REG;
752 operand->base = resolve_gpr32(u, type);
753 operand->size = u->opr_mode == 16 ? 16 : 32;
754 break;
755 case OP_ES:
756 case OP_CS:
757 case OP_DS:
758 case OP_SS:
759 case OP_FS:
760 case OP_GS:
761 /* in 64bits mode, only fs and gs are allowed */
762 if (u->dis_mode == 64) {
763 if (type != OP_FS && type != OP_GS) {
764 u->error= 1;
765 }
766 }
767 operand->type = UD_OP_REG;
768 operand->base = (type - OP_ES) + UD_R_ES;
769 operand->size = 16;
770 break;
771 case OP_J :
772 decode_imm(u, size, operand);
773 operand->type = UD_OP_JIMM;
774 break ;
775 case OP_Q:
776 decode_modrm_rm(u, operand, T_MMX, size);
777 break;
778 case OP_R :
779 decode_modrm_rm(u, operand, T_GPR, size);
780 break;
781 case OP_C:
782 decode_modrm_reg(u, operand, T_CRG, size);
783 break;
784 case OP_D:
785 decode_modrm_reg(u, operand, T_DBG, size);
786 break;
787 case OP_I3 :
788 operand->type = UD_OP_CONST;
789 operand->lval.sbyte = 3;
790 break;
791 case OP_ST0:
792 case OP_ST1:
793 case OP_ST2:
794 case OP_ST3:
795 case OP_ST4:
796 case OP_ST5:
797 case OP_ST6:
798 case OP_ST7:
799 operand->type = UD_OP_REG;
800 operand->base = (type - OP_ST0) + UD_R_ST0;
801 operand->size = 0;
802 break;
803 case OP_AX:
804 operand->type = UD_OP_REG;
805 operand->base = UD_R_AX;
806 operand->size = 16;
807 break;
808 default :
809 operand->type = UD_NONE;
810 break;
811 }
812 return 0;
813}
814
815
816/*
817 * decode_operands
818 *
819 * Disassemble upto 3 operands of the current instruction being
820 * disassembled. By the end of the function, the operand fields
821 * of the ud structure will have been filled.
822 */
823static int
824decode_operands(struct ud* u)
825{
826 decode_operand(u, &u->operand[0],
827 u->itab_entry->operand1.type,
828 u->itab_entry->operand1.size);
829 decode_operand(u, &u->operand[1],
830 u->itab_entry->operand2.type,
831 u->itab_entry->operand2.size);
832 decode_operand(u, &u->operand[2],
833 u->itab_entry->operand3.type,
834 u->itab_entry->operand3.size);
835 return 0;
836}
837
838/* -----------------------------------------------------------------------------
839 * clear_insn() - clear instruction structure
840 * -----------------------------------------------------------------------------
841 */
842static void
843clear_insn(register struct ud* u)
844{
845 u->error = 0;
846 u->pfx_seg = 0;
847 u->pfx_opr = 0;
848 u->pfx_adr = 0;
849 u->pfx_lock = 0;
850 u->pfx_repne = 0;
851 u->pfx_rep = 0;
852 u->pfx_repe = 0;
853 u->pfx_rex = 0;
854 u->pfx_insn = 0;
855 u->mnemonic = UD_Inone;
856 u->itab_entry = NULL;
857 u->have_modrm = 0;
858
859 memset( &u->operand[ 0 ], 0, sizeof( struct ud_operand ) );
860 memset( &u->operand[ 1 ], 0, sizeof( struct ud_operand ) );
861 memset( &u->operand[ 2 ], 0, sizeof( struct ud_operand ) );
862}
863
864static int
865resolve_mode( struct ud* u )
866{
867 /* if in error state, bail out */
868 if ( u->error ) return -1;
869
870 /* propagate prefix effects */
871 if ( u->dis_mode == 64 ) { /* set 64bit-mode flags */
872
873 /* Check validity of instruction m64 */
874 if ( P_INV64( u->itab_entry->prefix ) ) {
875 u->error = 1;
876 return -1;
877 }
878
879 /* effective rex prefix is the effective mask for the
880 * instruction hard-coded in the opcode map.
881 */
882 u->pfx_rex = ( u->pfx_rex & 0x40 ) |
883 ( u->pfx_rex & REX_PFX_MASK( u->itab_entry->prefix ) );
884
885 /* whether this instruction has a default operand size of
886 * 64bit, also hardcoded into the opcode map.
887 */
888 u->default64 = P_DEF64( u->itab_entry->prefix );
889 /* calculate effective operand size */
890 if ( REX_W( u->pfx_rex ) ) {
891 u->opr_mode = 64;
892 } else if ( u->pfx_opr ) {
893 u->opr_mode = 16;
894 } else {
895 /* unless the default opr size of instruction is 64,
896 * the effective operand size in the absence of rex.w
897 * prefix is 32.
898 */
899 u->opr_mode = ( u->default64 ) ? 64 : 32;
900 }
901
902 /* calculate effective address size */
903 u->adr_mode = (u->pfx_adr) ? 32 : 64;
904 } else if ( u->dis_mode == 32 ) { /* set 32bit-mode flags */
905 u->opr_mode = ( u->pfx_opr ) ? 16 : 32;
906 u->adr_mode = ( u->pfx_adr ) ? 16 : 32;
907 } else if ( u->dis_mode == 16 ) { /* set 16bit-mode flags */
908 u->opr_mode = ( u->pfx_opr ) ? 32 : 16;
909 u->adr_mode = ( u->pfx_adr ) ? 32 : 16;
910 }
911
912 /* These flags determine which operand to apply the operand size
913 * cast to.
914 */
915 u->c1 = ( P_C1( u->itab_entry->prefix ) ) ? 1 : 0;
916 u->c2 = ( P_C2( u->itab_entry->prefix ) ) ? 1 : 0;
917 u->c3 = ( P_C3( u->itab_entry->prefix ) ) ? 1 : 0;
918
919 /* set flags for implicit addressing */
920 u->implicit_addr = P_IMPADDR( u->itab_entry->prefix );
921
922 return 0;
923}
924
925static int gen_hex( struct ud *u )
926{
927 unsigned int i;
928 unsigned char *src_ptr = ud_inp_sess( u );
929 char* src_hex;
930
931 /* bail out if in error stat. */
932 if ( u->error ) return -1;
933 /* output buffer pointe */
934 src_hex = ( char* ) u->insn_hexcode;
935 /* for each byte used to decode instruction */
936 for ( i = 0; i < u->inp_ctr; ++i, ++src_ptr) {
937 sprintf( src_hex, "%02x", *src_ptr & 0xFF );
938 src_hex += 2;
939 }
940 return 0;
941}
942
943
944static inline int
945decode_insn(struct ud *u, uint16_t ptr)
946{
947 ASSERT((ptr & 0x8000) == 0);
948 u->itab_entry = &ud_itab[ ptr ];
949 u->mnemonic = u->itab_entry->mnemonic;
950 return (resolve_mode(u) == 0 &&
951 decode_operands(u) == 0 &&
952 resolve_mnemonic(u) == 0) ? 0 : -1;
953}
954
955
956/*
957 * decode_3dnow()
958 *
959 * Decoding 3dnow is a little tricky because of its strange opcode
960 * structure. The final opcode disambiguation depends on the last
961 * byte that comes after the operands have been decoded. Fortunately,
962 * all 3dnow instructions have the same set of operand types. So we
963 * go ahead and decode the instruction by picking an arbitrarily chosen
964 * valid entry in the table, decode the operands, and read the final
965 * byte to resolve the menmonic.
966 */
967static inline int
968decode_3dnow(struct ud* u)
969{
970 uint16_t ptr;
971 ASSERT(u->le->type == UD_TAB__OPC_3DNOW);
972 ASSERT(u->le->table[0xc] != 0);
973 decode_insn(u, u->le->table[0xc]);
974 ud_inp_next(u);
975 if (u->error) {
976 return -1;
977 }
978 ptr = u->le->table[ud_inp_curr(u)];
979 ASSERT((ptr & 0x8000) == 0);
980 u->mnemonic = ud_itab[ptr].mnemonic;
981 return 0;
982}
983
984
985static int
986decode_ssepfx(struct ud *u)
987{
988 uint8_t idx = ((u->pfx_insn & 0xf) + 1) / 2;
989 if (u->le->table[idx] == 0) {
990 idx = 0;
991 }
992 if (idx && u->le->table[idx] != 0) {
993 /*
994 * "Consume" the prefix as a part of the opcode, so it is no
995 * longer exported as an instruction prefix.
996 */
997 switch (u->pfx_insn) {
998 case 0xf2:
999 u->pfx_repne = 0;
1000 break;
1001 case 0xf3:
1002 u->pfx_rep = 0;
1003 u->pfx_repe = 0;
1004 break;
1005 case 0x66:
1006 u->pfx_opr = 0;
1007 break;
1008 }
1009 }
1010 return decode_ext(u, u->le->table[idx]);
1011}
1012
1013
1014/*
1015 * decode_ext()
1016 *
1017 * Decode opcode extensions (if any)
1018 */
1019static int
1020decode_ext(struct ud *u, uint16_t ptr)
1021{
1022 uint8_t idx = 0;
1023 if ((ptr & 0x8000) == 0) {
1024 return decode_insn(u, ptr);
1025 }
1026 u->le = &ud_lookup_table_list[(~0x8000 & ptr)];
1027 if (u->le->type == UD_TAB__OPC_3DNOW) {
1028 return decode_3dnow(u);
1029 }
1030
1031 switch (u->le->type) {
1032 case UD_TAB__OPC_MOD:
1033 /* !11 = 0, 11 = 1 */
1034 idx = (MODRM_MOD(modrm(u)) + 1) / 4;
1035 break;
1036 /* disassembly mode/operand size/address size based tables.
1037 * 16 = 0,, 32 = 1, 64 = 2
1038 */
1039 case UD_TAB__OPC_MODE:
1040 idx = u->dis_mode / 32;
1041 break;
1042 case UD_TAB__OPC_OSIZE:
1043 idx = eff_opr_mode(u->dis_mode, REX_W(u->pfx_rex), u->pfx_opr) / 32;
1044 break;
1045 case UD_TAB__OPC_ASIZE:
1046 idx = eff_adr_mode(u->dis_mode, u->pfx_adr) / 32;
1047 break;
1048 case UD_TAB__OPC_X87:
1049 idx = modrm(u) - 0xC0;
1050 break;
1051 case UD_TAB__OPC_VENDOR:
1052 if (u->vendor == UD_VENDOR_ANY) {
1053 /* choose a valid entry */
1054 idx = (u->le->table[idx] != 0) ? 0 : 1;
1055 } else if (u->vendor == UD_VENDOR_AMD) {
1056 idx = 0;
1057 } else {
1058 idx = 1;
1059 }
1060 break;
1061 case UD_TAB__OPC_RM:
1062 idx = MODRM_RM(modrm(u));
1063 break;
1064 case UD_TAB__OPC_REG:
1065 idx = MODRM_REG(modrm(u));
1066 break;
1067 case UD_TAB__OPC_SSE:
1068 return decode_ssepfx(u);
1069 default:
1070 ASSERT(!"not reached");
1071 break;
1072 }
1073
1074 return decode_ext(u, u->le->table[idx]);
1075}
1076
1077
1078static inline int
1079decode_opcode(struct ud *u)
1080{
1081 uint16_t ptr;
1082 ASSERT(u->le->type == UD_TAB__OPC_TABLE);
1083 ud_inp_next(u);
1084 if (u->error) {
1085 return -1;
1086 }
1087 ptr = u->le->table[ud_inp_curr(u)];
1088 if (ptr & 0x8000) {
1089 u->le = &ud_lookup_table_list[ptr & ~0x8000];
1090 if (u->le->type == UD_TAB__OPC_TABLE) {
1091 return decode_opcode(u);
1092 }
1093 }
1094 return decode_ext(u, ptr);
1095}
1096
1097
1098/* =============================================================================
1099 * ud_decode() - Instruction decoder. Returns the number of bytes decoded.
1100 * =============================================================================
1101 */
1102unsigned int
1103ud_decode(struct ud *u)
1104{
1105 ud_inp_start(u);
1106 clear_insn(u);
1107 u->le = &ud_lookup_table_list[0];
1108 u->error = decode_prefixes(u) == -1 ||
1109 decode_opcode(u) == -1 ||
1110 u->error;
1111 /* Handle decode error. */
1112 if (u->error) {
1113 /* clear out the decode data. */
1114 clear_insn(u);
1115 /* mark the sequence of bytes as invalid. */
1116 u->itab_entry = & s_ie__invalid;
1117 u->mnemonic = u->itab_entry->mnemonic;
1118 }
1119
1120 /* maybe this stray segment override byte
1121 * should be spewed out?
1122 */
1123 if ( !P_SEG( u->itab_entry->prefix ) &&
1124 u->operand[0].type != UD_OP_MEM &&
1125 u->operand[1].type != UD_OP_MEM )
1126 u->pfx_seg = 0;
1127
1128 u->insn_offset = u->pc; /* set offset of instruction */
1129 u->insn_fill = 0; /* set translation buffer index to 0 */
1130 u->pc += u->inp_ctr; /* move program counter by bytes decoded */
1131 gen_hex( u ); /* generate hex code */
1132
1133 /* return number of bytes disassembled. */
1134 return u->inp_ctr;
1135}
1136
1137/*
1138vim: set ts=2 sw=2 expandtab
1139*/
1140
1141#endif // USE(UDIS86)