]> git.saurik.com Git - apple/javascriptcore.git/blob - disassembler/udis86/udis86_decode.c
579903642ad4bfdbf2d4fdd097b707c1db2d7526
[apple/javascriptcore.git] / disassembler / udis86 / udis86_decode.c
1 /* udis86 - libudis86/decode.c
2 *
3 * Copyright (c) 2002-2009 Vivek Thampi
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without modification,
7 * are permitted provided that the following conditions are met:
8 *
9 * * Redistributions of source code must retain the above copyright notice,
10 * this list of conditions and the following disclaimer.
11 * * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
22 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #include "config.h"
28
29 #if USE(UDIS86)
30
31 #include "udis86_extern.h"
32 #include "udis86_types.h"
33 #include "udis86_input.h"
34 #include "udis86_decode.h"
35 #include <wtf/Assertions.h>
36
37 #define dbg(x, n...)
38 /* #define dbg printf */
39
40 #ifndef __UD_STANDALONE__
41 # include <string.h>
42 #endif /* __UD_STANDALONE__ */
43
44 /* The max number of prefixes to an instruction */
45 #define MAX_PREFIXES 15
46
47 /* instruction aliases and special cases */
48 static struct ud_itab_entry s_ie__invalid =
49 { UD_Iinvalid, O_NONE, O_NONE, O_NONE, P_none };
50
51 static int
52 decode_ext(struct ud *u, uint16_t ptr);
53
54
55 static inline int
56 eff_opr_mode(int dis_mode, int rex_w, int pfx_opr)
57 {
58 if (dis_mode == 64) {
59 return rex_w ? 64 : (pfx_opr ? 16 : 32);
60 } else if (dis_mode == 32) {
61 return pfx_opr ? 16 : 32;
62 } else {
63 ASSERT(dis_mode == 16);
64 return pfx_opr ? 32 : 16;
65 }
66 }
67
68
69 static inline int
70 eff_adr_mode(int dis_mode, int pfx_adr)
71 {
72 if (dis_mode == 64) {
73 return pfx_adr ? 32 : 64;
74 } else if (dis_mode == 32) {
75 return pfx_adr ? 16 : 32;
76 } else {
77 ASSERT(dis_mode == 16);
78 return pfx_adr ? 32 : 16;
79 }
80 }
81
82
83 /* Looks up mnemonic code in the mnemonic string table
84 * Returns NULL if the mnemonic code is invalid
85 */
86 const char * ud_lookup_mnemonic( enum ud_mnemonic_code c )
87 {
88 return ud_mnemonics_str[ c ];
89 }
90
91
92 /*
93 * decode_prefixes
94 *
95 * Extracts instruction prefixes.
96 */
97 static int
98 decode_prefixes(struct ud *u)
99 {
100 unsigned int have_pfx = 1;
101 unsigned int i;
102 uint8_t curr;
103
104 /* if in error state, bail out */
105 if ( u->error )
106 return -1;
107
108 /* keep going as long as there are prefixes available */
109 for ( i = 0; have_pfx ; ++i ) {
110
111 /* Get next byte. */
112 ud_inp_next(u);
113 if ( u->error )
114 return -1;
115 curr = ud_inp_curr( u );
116
117 /* rex prefixes in 64bit mode */
118 if ( u->dis_mode == 64 && ( curr & 0xF0 ) == 0x40 ) {
119 u->pfx_rex = curr;
120 } else {
121 switch ( curr )
122 {
123 case 0x2E :
124 u->pfx_seg = UD_R_CS;
125 u->pfx_rex = 0;
126 break;
127 case 0x36 :
128 u->pfx_seg = UD_R_SS;
129 u->pfx_rex = 0;
130 break;
131 case 0x3E :
132 u->pfx_seg = UD_R_DS;
133 u->pfx_rex = 0;
134 break;
135 case 0x26 :
136 u->pfx_seg = UD_R_ES;
137 u->pfx_rex = 0;
138 break;
139 case 0x64 :
140 u->pfx_seg = UD_R_FS;
141 u->pfx_rex = 0;
142 break;
143 case 0x65 :
144 u->pfx_seg = UD_R_GS;
145 u->pfx_rex = 0;
146 break;
147 case 0x67 : /* adress-size override prefix */
148 u->pfx_adr = 0x67;
149 u->pfx_rex = 0;
150 break;
151 case 0xF0 :
152 u->pfx_lock = 0xF0;
153 u->pfx_rex = 0;
154 break;
155 case 0x66:
156 /* the 0x66 sse prefix is only effective if no other sse prefix
157 * has already been specified.
158 */
159 if ( !u->pfx_insn ) u->pfx_insn = 0x66;
160 u->pfx_opr = 0x66;
161 u->pfx_rex = 0;
162 break;
163 case 0xF2:
164 u->pfx_insn = 0xF2;
165 u->pfx_repne = 0xF2;
166 u->pfx_rex = 0;
167 break;
168 case 0xF3:
169 u->pfx_insn = 0xF3;
170 u->pfx_rep = 0xF3;
171 u->pfx_repe = 0xF3;
172 u->pfx_rex = 0;
173 break;
174 default :
175 /* No more prefixes */
176 have_pfx = 0;
177 break;
178 }
179 }
180
181 /* check if we reached max instruction length */
182 if ( i + 1 == MAX_INSN_LENGTH ) {
183 u->error = 1;
184 break;
185 }
186 }
187
188 /* return status */
189 if ( u->error )
190 return -1;
191
192 /* rewind back one byte in stream, since the above loop
193 * stops with a non-prefix byte.
194 */
195 ud_inp_back(u);
196 return 0;
197 }
198
199
200 static inline unsigned int modrm( struct ud * u )
201 {
202 if ( !u->have_modrm ) {
203 u->modrm = ud_inp_next( u );
204 u->have_modrm = 1;
205 }
206 return u->modrm;
207 }
208
209
210 static unsigned int resolve_operand_size( const struct ud * u, unsigned int s )
211 {
212 switch ( s )
213 {
214 case SZ_V:
215 return ( u->opr_mode );
216 case SZ_Z:
217 return ( u->opr_mode == 16 ) ? 16 : 32;
218 case SZ_P:
219 return ( u->opr_mode == 16 ) ? SZ_WP : SZ_DP;
220 case SZ_MDQ:
221 return ( u->opr_mode == 16 ) ? 32 : u->opr_mode;
222 case SZ_RDQ:
223 return ( u->dis_mode == 64 ) ? 64 : 32;
224 default:
225 return s;
226 }
227 }
228
229
230 static int resolve_mnemonic( struct ud* u )
231 {
232 /* far/near flags */
233 u->br_far = 0;
234 u->br_near = 0;
235 /* readjust operand sizes for call/jmp instrcutions */
236 if ( u->mnemonic == UD_Icall || u->mnemonic == UD_Ijmp ) {
237 /* WP: 16:16 pointer */
238 if ( u->operand[ 0 ].size == SZ_WP ) {
239 u->operand[ 0 ].size = 16;
240 u->br_far = 1;
241 u->br_near= 0;
242 /* DP: 32:32 pointer */
243 } else if ( u->operand[ 0 ].size == SZ_DP ) {
244 u->operand[ 0 ].size = 32;
245 u->br_far = 1;
246 u->br_near= 0;
247 } else {
248 u->br_far = 0;
249 u->br_near= 1;
250 }
251 /* resolve 3dnow weirdness. */
252 } else if ( u->mnemonic == UD_I3dnow ) {
253 u->mnemonic = ud_itab[ u->le->table[ ud_inp_curr( u ) ] ].mnemonic;
254 }
255 /* SWAPGS is only valid in 64bits mode */
256 if ( u->mnemonic == UD_Iswapgs && u->dis_mode != 64 ) {
257 u->error = 1;
258 return -1;
259 }
260
261 if (u->mnemonic == UD_Ixchg) {
262 if ((u->operand[0].type == UD_OP_REG && u->operand[0].base == UD_R_AX &&
263 u->operand[1].type == UD_OP_REG && u->operand[1].base == UD_R_AX) ||
264 (u->operand[0].type == UD_OP_REG && u->operand[0].base == UD_R_EAX &&
265 u->operand[1].type == UD_OP_REG && u->operand[1].base == UD_R_EAX)) {
266 u->operand[0].type = UD_NONE;
267 u->operand[1].type = UD_NONE;
268 u->mnemonic = UD_Inop;
269 }
270 }
271
272 if (u->mnemonic == UD_Inop && u->pfx_rep) {
273 u->pfx_rep = 0;
274 u->mnemonic = UD_Ipause;
275 }
276 return 0;
277 }
278
279
280 /* -----------------------------------------------------------------------------
281 * decode_a()- Decodes operands of the type seg:offset
282 * -----------------------------------------------------------------------------
283 */
284 static void
285 decode_a(struct ud* u, struct ud_operand *op)
286 {
287 if (u->opr_mode == 16) {
288 /* seg16:off16 */
289 op->type = UD_OP_PTR;
290 op->size = 32;
291 op->lval.ptr.off = ud_inp_uint16(u);
292 op->lval.ptr.seg = ud_inp_uint16(u);
293 } else {
294 /* seg16:off32 */
295 op->type = UD_OP_PTR;
296 op->size = 48;
297 op->lval.ptr.off = ud_inp_uint32(u);
298 op->lval.ptr.seg = ud_inp_uint16(u);
299 }
300 }
301
302 /* -----------------------------------------------------------------------------
303 * decode_gpr() - Returns decoded General Purpose Register
304 * -----------------------------------------------------------------------------
305 */
306 static enum ud_type
307 decode_gpr(register struct ud* u, unsigned int s, unsigned char rm)
308 {
309 s = resolve_operand_size(u, s);
310
311 switch (s) {
312 case 64:
313 return UD_R_RAX + rm;
314 case SZ_DP:
315 case 32:
316 return UD_R_EAX + rm;
317 case SZ_WP:
318 case 16:
319 return UD_R_AX + rm;
320 case 8:
321 if (u->dis_mode == 64 && u->pfx_rex) {
322 if (rm >= 4)
323 return UD_R_SPL + (rm-4);
324 return UD_R_AL + rm;
325 } else return UD_R_AL + rm;
326 default:
327 return 0;
328 }
329 }
330
331 /* -----------------------------------------------------------------------------
332 * resolve_gpr64() - 64bit General Purpose Register-Selection.
333 * -----------------------------------------------------------------------------
334 */
335 static enum ud_type
336 resolve_gpr64(struct ud* u, enum ud_operand_code gpr_op, enum ud_operand_size * size)
337 {
338 if (gpr_op >= OP_rAXr8 && gpr_op <= OP_rDIr15)
339 gpr_op = (gpr_op - OP_rAXr8) | (REX_B(u->pfx_rex) << 3);
340 else gpr_op = (gpr_op - OP_rAX);
341
342 if (u->opr_mode == 16) {
343 *size = 16;
344 return gpr_op + UD_R_AX;
345 }
346 if (u->dis_mode == 32 ||
347 (u->opr_mode == 32 && ! (REX_W(u->pfx_rex) || u->default64))) {
348 *size = 32;
349 return gpr_op + UD_R_EAX;
350 }
351
352 *size = 64;
353 return gpr_op + UD_R_RAX;
354 }
355
356 /* -----------------------------------------------------------------------------
357 * resolve_gpr32 () - 32bit General Purpose Register-Selection.
358 * -----------------------------------------------------------------------------
359 */
360 static enum ud_type
361 resolve_gpr32(struct ud* u, enum ud_operand_code gpr_op)
362 {
363 gpr_op = gpr_op - OP_eAX;
364
365 if (u->opr_mode == 16)
366 return gpr_op + UD_R_AX;
367
368 return gpr_op + UD_R_EAX;
369 }
370
371 /* -----------------------------------------------------------------------------
372 * resolve_reg() - Resolves the register type
373 * -----------------------------------------------------------------------------
374 */
375 static enum ud_type
376 resolve_reg(struct ud* u, unsigned int type, unsigned char i)
377 {
378 switch (type) {
379 case T_MMX : return UD_R_MM0 + (i & 7);
380 case T_XMM : return UD_R_XMM0 + i;
381 case T_CRG : return UD_R_CR0 + i;
382 case T_DBG : return UD_R_DR0 + i;
383 case T_SEG : {
384 /*
385 * Only 6 segment registers, anything else is an error.
386 */
387 if ((i & 7) > 5) {
388 u->error = 1;
389 } else {
390 return UD_R_ES + (i & 7);
391 }
392 }
393 case T_NONE:
394 default: return UD_NONE;
395 }
396 }
397
398 /* -----------------------------------------------------------------------------
399 * decode_imm() - Decodes Immediate values.
400 * -----------------------------------------------------------------------------
401 */
402 static void
403 decode_imm(struct ud* u, unsigned int s, struct ud_operand *op)
404 {
405 op->size = resolve_operand_size(u, s);
406 op->type = UD_OP_IMM;
407
408 switch (op->size) {
409 case 8: op->lval.sbyte = ud_inp_uint8(u); break;
410 case 16: op->lval.uword = ud_inp_uint16(u); break;
411 case 32: op->lval.udword = ud_inp_uint32(u); break;
412 case 64: op->lval.uqword = ud_inp_uint64(u); break;
413 default: return;
414 }
415 }
416
417
418 /*
419 * decode_modrm_reg
420 *
421 * Decodes reg field of mod/rm byte
422 *
423 */
424 static void
425 decode_modrm_reg(struct ud *u,
426 struct ud_operand *operand,
427 unsigned int type,
428 unsigned int size)
429 {
430 uint8_t reg = (REX_R(u->pfx_rex) << 3) | MODRM_REG(modrm(u));
431 operand->type = UD_OP_REG;
432 operand->size = resolve_operand_size(u, size);
433
434 if (type == T_GPR) {
435 operand->base = decode_gpr(u, operand->size, reg);
436 } else {
437 operand->base = resolve_reg(u, type, reg);
438 }
439 }
440
441
442 /*
443 * decode_modrm_rm
444 *
445 * Decodes rm field of mod/rm byte
446 *
447 */
448 static void
449 decode_modrm_rm(struct ud *u,
450 struct ud_operand *op,
451 unsigned char type,
452 unsigned int size)
453
454 {
455 unsigned char mod, rm, reg;
456
457 /* get mod, r/m and reg fields */
458 mod = MODRM_MOD(modrm(u));
459 rm = (REX_B(u->pfx_rex) << 3) | MODRM_RM(modrm(u));
460 reg = (REX_R(u->pfx_rex) << 3) | MODRM_REG(modrm(u));
461
462 UNUSED_PARAM(reg);
463
464 op->size = resolve_operand_size(u, size);
465
466 /*
467 * If mod is 11b, then the modrm.rm specifies a register.
468 *
469 */
470 if (mod == 3) {
471 op->type = UD_OP_REG;
472 if (type == T_GPR) {
473 op->base = decode_gpr(u, op->size, rm);
474 } else {
475 op->base = resolve_reg(u, type, (REX_B(u->pfx_rex) << 3) | (rm & 7));
476 }
477 return;
478 }
479
480
481 /*
482 * !11 => Memory Address
483 */
484 op->type = UD_OP_MEM;
485
486 if (u->adr_mode == 64) {
487 op->base = UD_R_RAX + rm;
488 if (mod == 1) {
489 op->offset = 8;
490 } else if (mod == 2) {
491 op->offset = 32;
492 } else if (mod == 0 && (rm & 7) == 5) {
493 op->base = UD_R_RIP;
494 op->offset = 32;
495 } else {
496 op->offset = 0;
497 }
498 /*
499 * Scale-Index-Base (SIB)
500 */
501 if ((rm & 7) == 4) {
502 ud_inp_next(u);
503
504 op->scale = (1 << SIB_S(ud_inp_curr(u))) & ~1;
505 op->index = UD_R_RAX + (SIB_I(ud_inp_curr(u)) | (REX_X(u->pfx_rex) << 3));
506 op->base = UD_R_RAX + (SIB_B(ud_inp_curr(u)) | (REX_B(u->pfx_rex) << 3));
507
508 /* special conditions for base reference */
509 if (op->index == UD_R_RSP) {
510 op->index = UD_NONE;
511 op->scale = UD_NONE;
512 }
513
514 if (op->base == UD_R_RBP || op->base == UD_R_R13) {
515 if (mod == 0) {
516 op->base = UD_NONE;
517 }
518 if (mod == 1) {
519 op->offset = 8;
520 } else {
521 op->offset = 32;
522 }
523 }
524 }
525 } else if (u->adr_mode == 32) {
526 op->base = UD_R_EAX + rm;
527 if (mod == 1) {
528 op->offset = 8;
529 } else if (mod == 2) {
530 op->offset = 32;
531 } else if (mod == 0 && rm == 5) {
532 op->base = UD_NONE;
533 op->offset = 32;
534 } else {
535 op->offset = 0;
536 }
537
538 /* Scale-Index-Base (SIB) */
539 if ((rm & 7) == 4) {
540 ud_inp_next(u);
541
542 op->scale = (1 << SIB_S(ud_inp_curr(u))) & ~1;
543 op->index = UD_R_EAX + (SIB_I(ud_inp_curr(u)) | (REX_X(u->pfx_rex) << 3));
544 op->base = UD_R_EAX + (SIB_B(ud_inp_curr(u)) | (REX_B(u->pfx_rex) << 3));
545
546 if (op->index == UD_R_ESP) {
547 op->index = UD_NONE;
548 op->scale = UD_NONE;
549 }
550
551 /* special condition for base reference */
552 if (op->base == UD_R_EBP) {
553 if (mod == 0) {
554 op->base = UD_NONE;
555 }
556 if (mod == 1) {
557 op->offset = 8;
558 } else {
559 op->offset = 32;
560 }
561 }
562 }
563 } else {
564 const unsigned int bases[] = { UD_R_BX, UD_R_BX, UD_R_BP, UD_R_BP,
565 UD_R_SI, UD_R_DI, UD_R_BP, UD_R_BX };
566 const unsigned int indices[] = { UD_R_SI, UD_R_DI, UD_R_SI, UD_R_DI,
567 UD_NONE, UD_NONE, UD_NONE, UD_NONE };
568 op->base = bases[rm & 7];
569 op->index = indices[rm & 7];
570 if (mod == 0 && rm == 6) {
571 op->offset= 16;
572 op->base = UD_NONE;
573 } else if (mod == 1) {
574 op->offset = 8;
575 } else if (mod == 2) {
576 op->offset = 16;
577 }
578 }
579
580 /*
581 * extract offset, if any
582 */
583 switch (op->offset) {
584 case 8 : op->lval.ubyte = ud_inp_uint8(u); break;
585 case 16: op->lval.uword = ud_inp_uint16(u); break;
586 case 32: op->lval.udword = ud_inp_uint32(u); break;
587 case 64: op->lval.uqword = ud_inp_uint64(u); break;
588 default: break;
589 }
590 }
591
592 /* -----------------------------------------------------------------------------
593 * decode_o() - Decodes offset
594 * -----------------------------------------------------------------------------
595 */
596 static void
597 decode_o(struct ud* u, unsigned int s, struct ud_operand *op)
598 {
599 switch (u->adr_mode) {
600 case 64:
601 op->offset = 64;
602 op->lval.uqword = ud_inp_uint64(u);
603 break;
604 case 32:
605 op->offset = 32;
606 op->lval.udword = ud_inp_uint32(u);
607 break;
608 case 16:
609 op->offset = 16;
610 op->lval.uword = ud_inp_uint16(u);
611 break;
612 default:
613 return;
614 }
615 op->type = UD_OP_MEM;
616 op->size = resolve_operand_size(u, s);
617 }
618
619 /* -----------------------------------------------------------------------------
620 * decode_operands() - Disassembles Operands.
621 * -----------------------------------------------------------------------------
622 */
623 static int
624 decode_operand(struct ud *u,
625 struct ud_operand *operand,
626 enum ud_operand_code type,
627 unsigned int size)
628 {
629 switch (type) {
630 case OP_A :
631 decode_a(u, operand);
632 break;
633 case OP_MR:
634 if (MODRM_MOD(modrm(u)) == 3) {
635 decode_modrm_rm(u, operand, T_GPR,
636 size == SZ_DY ? SZ_MDQ : SZ_V);
637 } else if (size == SZ_WV) {
638 decode_modrm_rm( u, operand, T_GPR, SZ_W);
639 } else if (size == SZ_BV) {
640 decode_modrm_rm( u, operand, T_GPR, SZ_B);
641 } else if (size == SZ_DY) {
642 decode_modrm_rm( u, operand, T_GPR, SZ_D);
643 } else {
644 ASSERT(!"unexpected size");
645 }
646 break;
647 case OP_M:
648 if (MODRM_MOD(modrm(u)) == 3) {
649 u->error = 1;
650 }
651 /* intended fall through */
652 case OP_E:
653 decode_modrm_rm(u, operand, T_GPR, size);
654 break;
655 case OP_G:
656 decode_modrm_reg(u, operand, T_GPR, size);
657 break;
658 case OP_I:
659 decode_imm(u, size, operand);
660 break;
661 case OP_I1:
662 operand->type = UD_OP_CONST;
663 operand->lval.udword = 1;
664 break;
665 case OP_PR:
666 if (MODRM_MOD(modrm(u)) != 3) {
667 u->error = 1;
668 }
669 decode_modrm_rm(u, operand, T_MMX, size);
670 break;
671 case OP_P:
672 decode_modrm_reg(u, operand, T_MMX, size);
673 break;
674 case OP_VR:
675 if (MODRM_MOD(modrm(u)) != 3) {
676 u->error = 1;
677 }
678 /* intended fall through */
679 case OP_W:
680 decode_modrm_rm(u, operand, T_XMM, size);
681 break;
682 case OP_V:
683 decode_modrm_reg(u, operand, T_XMM, size);
684 break;
685 case OP_S:
686 decode_modrm_reg(u, operand, T_SEG, size);
687 break;
688 case OP_AL:
689 case OP_CL:
690 case OP_DL:
691 case OP_BL:
692 case OP_AH:
693 case OP_CH:
694 case OP_DH:
695 case OP_BH:
696 operand->type = UD_OP_REG;
697 operand->base = UD_R_AL + (type - OP_AL);
698 operand->size = 8;
699 break;
700 case OP_DX:
701 operand->type = UD_OP_REG;
702 operand->base = UD_R_DX;
703 operand->size = 16;
704 break;
705 case OP_O:
706 decode_o(u, size, operand);
707 break;
708 case OP_rAXr8:
709 case OP_rCXr9:
710 case OP_rDXr10:
711 case OP_rBXr11:
712 case OP_rSPr12:
713 case OP_rBPr13:
714 case OP_rSIr14:
715 case OP_rDIr15:
716 case OP_rAX:
717 case OP_rCX:
718 case OP_rDX:
719 case OP_rBX:
720 case OP_rSP:
721 case OP_rBP:
722 case OP_rSI:
723 case OP_rDI:
724 operand->type = UD_OP_REG;
725 operand->base = resolve_gpr64(u, type, &operand->size);
726 break;
727 case OP_ALr8b:
728 case OP_CLr9b:
729 case OP_DLr10b:
730 case OP_BLr11b:
731 case OP_AHr12b:
732 case OP_CHr13b:
733 case OP_DHr14b:
734 case OP_BHr15b: {
735 ud_type_t gpr = (type - OP_ALr8b) + UD_R_AL
736 + (REX_B(u->pfx_rex) << 3);
737 if (UD_R_AH <= gpr && u->pfx_rex) {
738 gpr = gpr + 4;
739 }
740 operand->type = UD_OP_REG;
741 operand->base = gpr;
742 break;
743 }
744 case OP_eAX:
745 case OP_eCX:
746 case OP_eDX:
747 case OP_eBX:
748 case OP_eSP:
749 case OP_eBP:
750 case OP_eSI:
751 case OP_eDI:
752 operand->type = UD_OP_REG;
753 operand->base = resolve_gpr32(u, type);
754 operand->size = u->opr_mode == 16 ? 16 : 32;
755 break;
756 case OP_ES:
757 case OP_CS:
758 case OP_DS:
759 case OP_SS:
760 case OP_FS:
761 case OP_GS:
762 /* in 64bits mode, only fs and gs are allowed */
763 if (u->dis_mode == 64) {
764 if (type != OP_FS && type != OP_GS) {
765 u->error= 1;
766 }
767 }
768 operand->type = UD_OP_REG;
769 operand->base = (type - OP_ES) + UD_R_ES;
770 operand->size = 16;
771 break;
772 case OP_J :
773 decode_imm(u, size, operand);
774 operand->type = UD_OP_JIMM;
775 break ;
776 case OP_Q:
777 decode_modrm_rm(u, operand, T_MMX, size);
778 break;
779 case OP_R :
780 decode_modrm_rm(u, operand, T_GPR, size);
781 break;
782 case OP_C:
783 decode_modrm_reg(u, operand, T_CRG, size);
784 break;
785 case OP_D:
786 decode_modrm_reg(u, operand, T_DBG, size);
787 break;
788 case OP_I3 :
789 operand->type = UD_OP_CONST;
790 operand->lval.sbyte = 3;
791 break;
792 case OP_ST0:
793 case OP_ST1:
794 case OP_ST2:
795 case OP_ST3:
796 case OP_ST4:
797 case OP_ST5:
798 case OP_ST6:
799 case OP_ST7:
800 operand->type = UD_OP_REG;
801 operand->base = (type - OP_ST0) + UD_R_ST0;
802 operand->size = 0;
803 break;
804 case OP_AX:
805 operand->type = UD_OP_REG;
806 operand->base = UD_R_AX;
807 operand->size = 16;
808 break;
809 default :
810 operand->type = UD_NONE;
811 break;
812 }
813 return 0;
814 }
815
816
817 /*
818 * decode_operands
819 *
820 * Disassemble upto 3 operands of the current instruction being
821 * disassembled. By the end of the function, the operand fields
822 * of the ud structure will have been filled.
823 */
824 static int
825 decode_operands(struct ud* u)
826 {
827 decode_operand(u, &u->operand[0],
828 u->itab_entry->operand1.type,
829 u->itab_entry->operand1.size);
830 decode_operand(u, &u->operand[1],
831 u->itab_entry->operand2.type,
832 u->itab_entry->operand2.size);
833 decode_operand(u, &u->operand[2],
834 u->itab_entry->operand3.type,
835 u->itab_entry->operand3.size);
836 return 0;
837 }
838
839 /* -----------------------------------------------------------------------------
840 * clear_insn() - clear instruction structure
841 * -----------------------------------------------------------------------------
842 */
843 static void
844 clear_insn(register struct ud* u)
845 {
846 u->error = 0;
847 u->pfx_seg = 0;
848 u->pfx_opr = 0;
849 u->pfx_adr = 0;
850 u->pfx_lock = 0;
851 u->pfx_repne = 0;
852 u->pfx_rep = 0;
853 u->pfx_repe = 0;
854 u->pfx_rex = 0;
855 u->pfx_insn = 0;
856 u->mnemonic = UD_Inone;
857 u->itab_entry = NULL;
858 u->have_modrm = 0;
859
860 memset( &u->operand[ 0 ], 0, sizeof( struct ud_operand ) );
861 memset( &u->operand[ 1 ], 0, sizeof( struct ud_operand ) );
862 memset( &u->operand[ 2 ], 0, sizeof( struct ud_operand ) );
863 }
864
865 static int
866 resolve_mode( struct ud* u )
867 {
868 /* if in error state, bail out */
869 if ( u->error ) return -1;
870
871 /* propagate prefix effects */
872 if ( u->dis_mode == 64 ) { /* set 64bit-mode flags */
873
874 /* Check validity of instruction m64 */
875 if ( P_INV64( u->itab_entry->prefix ) ) {
876 u->error = 1;
877 return -1;
878 }
879
880 /* effective rex prefix is the effective mask for the
881 * instruction hard-coded in the opcode map.
882 */
883 u->pfx_rex = ( u->pfx_rex & 0x40 ) |
884 ( u->pfx_rex & REX_PFX_MASK( u->itab_entry->prefix ) );
885
886 /* whether this instruction has a default operand size of
887 * 64bit, also hardcoded into the opcode map.
888 */
889 u->default64 = P_DEF64( u->itab_entry->prefix );
890 /* calculate effective operand size */
891 if ( REX_W( u->pfx_rex ) ) {
892 u->opr_mode = 64;
893 } else if ( u->pfx_opr ) {
894 u->opr_mode = 16;
895 } else {
896 /* unless the default opr size of instruction is 64,
897 * the effective operand size in the absence of rex.w
898 * prefix is 32.
899 */
900 u->opr_mode = ( u->default64 ) ? 64 : 32;
901 }
902
903 /* calculate effective address size */
904 u->adr_mode = (u->pfx_adr) ? 32 : 64;
905 } else if ( u->dis_mode == 32 ) { /* set 32bit-mode flags */
906 u->opr_mode = ( u->pfx_opr ) ? 16 : 32;
907 u->adr_mode = ( u->pfx_adr ) ? 16 : 32;
908 } else if ( u->dis_mode == 16 ) { /* set 16bit-mode flags */
909 u->opr_mode = ( u->pfx_opr ) ? 32 : 16;
910 u->adr_mode = ( u->pfx_adr ) ? 32 : 16;
911 }
912
913 /* These flags determine which operand to apply the operand size
914 * cast to.
915 */
916 u->c1 = ( P_C1( u->itab_entry->prefix ) ) ? 1 : 0;
917 u->c2 = ( P_C2( u->itab_entry->prefix ) ) ? 1 : 0;
918 u->c3 = ( P_C3( u->itab_entry->prefix ) ) ? 1 : 0;
919
920 /* set flags for implicit addressing */
921 u->implicit_addr = P_IMPADDR( u->itab_entry->prefix );
922
923 return 0;
924 }
925
926 static int gen_hex( struct ud *u )
927 {
928 unsigned int i;
929 unsigned char *src_ptr = ud_inp_sess( u );
930 char* src_hex;
931
932 /* bail out if in error stat. */
933 if ( u->error ) return -1;
934 /* output buffer pointe */
935 src_hex = ( char* ) u->insn_hexcode;
936 /* for each byte used to decode instruction */
937 for ( i = 0; i < u->inp_ctr; ++i, ++src_ptr) {
938 sprintf( src_hex, "%02x", *src_ptr & 0xFF );
939 src_hex += 2;
940 }
941 return 0;
942 }
943
944
945 static inline int
946 decode_insn(struct ud *u, uint16_t ptr)
947 {
948 ASSERT((ptr & 0x8000) == 0);
949 u->itab_entry = &ud_itab[ ptr ];
950 u->mnemonic = u->itab_entry->mnemonic;
951 return (resolve_mode(u) == 0 &&
952 decode_operands(u) == 0 &&
953 resolve_mnemonic(u) == 0) ? 0 : -1;
954 }
955
956
957 /*
958 * decode_3dnow()
959 *
960 * Decoding 3dnow is a little tricky because of its strange opcode
961 * structure. The final opcode disambiguation depends on the last
962 * byte that comes after the operands have been decoded. Fortunately,
963 * all 3dnow instructions have the same set of operand types. So we
964 * go ahead and decode the instruction by picking an arbitrarily chosen
965 * valid entry in the table, decode the operands, and read the final
966 * byte to resolve the menmonic.
967 */
968 static inline int
969 decode_3dnow(struct ud* u)
970 {
971 uint16_t ptr;
972 ASSERT(u->le->type == UD_TAB__OPC_3DNOW);
973 ASSERT(u->le->table[0xc] != 0);
974 decode_insn(u, u->le->table[0xc]);
975 ud_inp_next(u);
976 if (u->error) {
977 return -1;
978 }
979 ptr = u->le->table[ud_inp_curr(u)];
980 ASSERT((ptr & 0x8000) == 0);
981 u->mnemonic = ud_itab[ptr].mnemonic;
982 return 0;
983 }
984
985
986 static int
987 decode_ssepfx(struct ud *u)
988 {
989 uint8_t idx = ((u->pfx_insn & 0xf) + 1) / 2;
990 if (u->le->table[idx] == 0) {
991 idx = 0;
992 }
993 if (idx && u->le->table[idx] != 0) {
994 /*
995 * "Consume" the prefix as a part of the opcode, so it is no
996 * longer exported as an instruction prefix.
997 */
998 switch (u->pfx_insn) {
999 case 0xf2:
1000 u->pfx_repne = 0;
1001 break;
1002 case 0xf3:
1003 u->pfx_rep = 0;
1004 u->pfx_repe = 0;
1005 break;
1006 case 0x66:
1007 u->pfx_opr = 0;
1008 break;
1009 }
1010 }
1011 return decode_ext(u, u->le->table[idx]);
1012 }
1013
1014
1015 /*
1016 * decode_ext()
1017 *
1018 * Decode opcode extensions (if any)
1019 */
1020 static int
1021 decode_ext(struct ud *u, uint16_t ptr)
1022 {
1023 uint8_t idx = 0;
1024 if ((ptr & 0x8000) == 0) {
1025 return decode_insn(u, ptr);
1026 }
1027 u->le = &ud_lookup_table_list[(~0x8000 & ptr)];
1028 if (u->le->type == UD_TAB__OPC_3DNOW) {
1029 return decode_3dnow(u);
1030 }
1031
1032 switch (u->le->type) {
1033 case UD_TAB__OPC_MOD:
1034 /* !11 = 0, 11 = 1 */
1035 idx = (MODRM_MOD(modrm(u)) + 1) / 4;
1036 break;
1037 /* disassembly mode/operand size/address size based tables.
1038 * 16 = 0,, 32 = 1, 64 = 2
1039 */
1040 case UD_TAB__OPC_MODE:
1041 idx = u->dis_mode / 32;
1042 break;
1043 case UD_TAB__OPC_OSIZE:
1044 idx = eff_opr_mode(u->dis_mode, REX_W(u->pfx_rex), u->pfx_opr) / 32;
1045 break;
1046 case UD_TAB__OPC_ASIZE:
1047 idx = eff_adr_mode(u->dis_mode, u->pfx_adr) / 32;
1048 break;
1049 case UD_TAB__OPC_X87:
1050 idx = modrm(u) - 0xC0;
1051 break;
1052 case UD_TAB__OPC_VENDOR:
1053 if (u->vendor == UD_VENDOR_ANY) {
1054 /* choose a valid entry */
1055 idx = (u->le->table[idx] != 0) ? 0 : 1;
1056 } else if (u->vendor == UD_VENDOR_AMD) {
1057 idx = 0;
1058 } else {
1059 idx = 1;
1060 }
1061 break;
1062 case UD_TAB__OPC_RM:
1063 idx = MODRM_RM(modrm(u));
1064 break;
1065 case UD_TAB__OPC_REG:
1066 idx = MODRM_REG(modrm(u));
1067 break;
1068 case UD_TAB__OPC_SSE:
1069 return decode_ssepfx(u);
1070 default:
1071 ASSERT(!"not reached");
1072 break;
1073 }
1074
1075 return decode_ext(u, u->le->table[idx]);
1076 }
1077
1078
1079 static inline int
1080 decode_opcode(struct ud *u)
1081 {
1082 uint16_t ptr;
1083 ASSERT(u->le->type == UD_TAB__OPC_TABLE);
1084 ud_inp_next(u);
1085 if (u->error) {
1086 return -1;
1087 }
1088 ptr = u->le->table[ud_inp_curr(u)];
1089 if (ptr & 0x8000) {
1090 u->le = &ud_lookup_table_list[ptr & ~0x8000];
1091 if (u->le->type == UD_TAB__OPC_TABLE) {
1092 return decode_opcode(u);
1093 }
1094 }
1095 return decode_ext(u, ptr);
1096 }
1097
1098
1099 /* =============================================================================
1100 * ud_decode() - Instruction decoder. Returns the number of bytes decoded.
1101 * =============================================================================
1102 */
1103 unsigned int
1104 ud_decode(struct ud *u)
1105 {
1106 ud_inp_start(u);
1107 clear_insn(u);
1108 u->le = &ud_lookup_table_list[0];
1109 u->error = decode_prefixes(u) == -1 ||
1110 decode_opcode(u) == -1 ||
1111 u->error;
1112 /* Handle decode error. */
1113 if (u->error) {
1114 /* clear out the decode data. */
1115 clear_insn(u);
1116 /* mark the sequence of bytes as invalid. */
1117 u->itab_entry = & s_ie__invalid;
1118 u->mnemonic = u->itab_entry->mnemonic;
1119 }
1120
1121 /* maybe this stray segment override byte
1122 * should be spewed out?
1123 */
1124 if ( !P_SEG( u->itab_entry->prefix ) &&
1125 u->operand[0].type != UD_OP_MEM &&
1126 u->operand[1].type != UD_OP_MEM )
1127 u->pfx_seg = 0;
1128
1129 u->insn_offset = u->pc; /* set offset of instruction */
1130 u->insn_fill = 0; /* set translation buffer index to 0 */
1131 u->pc += u->inp_ctr; /* move program counter by bytes decoded */
1132 gen_hex( u ); /* generate hex code */
1133
1134 /* return number of bytes disassembled. */
1135 return u->inp_ctr;
1136 }
1137
1138 /*
1139 vim: set ts=2 sw=2 expandtab
1140 */
1141
1142 #endif // USE(UDIS86)