]>
Commit | Line | Data |
---|---|---|
93a37866 A |
1 | /* udis86 - libudis86/decode.c |
2 | * | |
3 | * Copyright (c) 2002-2009 Vivek Thampi | |
4 | * All rights reserved. | |
5 | * | |
6 | * Redistribution and use in source and binary forms, with or without modification, | |
7 | * are permitted provided that the following conditions are met: | |
8 | * | |
9 | * * Redistributions of source code must retain the above copyright notice, | |
10 | * this list of conditions and the following disclaimer. | |
11 | * * Redistributions in binary form must reproduce the above copyright notice, | |
12 | * this list of conditions and the following disclaimer in the documentation | |
13 | * and/or other materials provided with the distribution. | |
14 | * | |
15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | |
16 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |
17 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
18 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR | |
19 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | |
20 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON | |
22 | * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
23 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
24 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
25 | */ | |
26 | ||
27 | #include "config.h" | |
28 | ||
29 | #if USE(UDIS86) | |
30 | ||
31 | #include "udis86_extern.h" | |
32 | #include "udis86_types.h" | |
33 | #include "udis86_input.h" | |
34 | #include "udis86_decode.h" | |
35 | #include <wtf/Assertions.h> | |
36 | ||
37 | #define dbg(x, n...) | |
38 | /* #define dbg printf */ | |
39 | ||
40 | #ifndef __UD_STANDALONE__ | |
41 | # include <string.h> | |
42 | #endif /* __UD_STANDALONE__ */ | |
43 | ||
44 | /* The max number of prefixes to an instruction */ | |
45 | #define MAX_PREFIXES 15 | |
46 | ||
47 | /* instruction aliases and special cases */ | |
48 | static struct ud_itab_entry s_ie__invalid = | |
49 | { UD_Iinvalid, O_NONE, O_NONE, O_NONE, P_none }; | |
50 | ||
51 | static int | |
52 | decode_ext(struct ud *u, uint16_t ptr); | |
53 | ||
54 | ||
55 | static inline int | |
56 | eff_opr_mode(int dis_mode, int rex_w, int pfx_opr) | |
57 | { | |
58 | if (dis_mode == 64) { | |
59 | return rex_w ? 64 : (pfx_opr ? 16 : 32); | |
60 | } else if (dis_mode == 32) { | |
61 | return pfx_opr ? 16 : 32; | |
62 | } else { | |
63 | ASSERT(dis_mode == 16); | |
64 | return pfx_opr ? 32 : 16; | |
65 | } | |
66 | } | |
67 | ||
68 | ||
69 | static inline int | |
70 | eff_adr_mode(int dis_mode, int pfx_adr) | |
71 | { | |
72 | if (dis_mode == 64) { | |
73 | return pfx_adr ? 32 : 64; | |
74 | } else if (dis_mode == 32) { | |
75 | return pfx_adr ? 16 : 32; | |
76 | } else { | |
77 | ASSERT(dis_mode == 16); | |
78 | return pfx_adr ? 32 : 16; | |
79 | } | |
80 | } | |
81 | ||
82 | ||
83 | /* Looks up mnemonic code in the mnemonic string table | |
84 | * Returns NULL if the mnemonic code is invalid | |
85 | */ | |
86 | const char * ud_lookup_mnemonic( enum ud_mnemonic_code c ) | |
87 | { | |
88 | return ud_mnemonics_str[ c ]; | |
89 | } | |
90 | ||
91 | ||
92 | /* | |
93 | * decode_prefixes | |
94 | * | |
95 | * Extracts instruction prefixes. | |
96 | */ | |
97 | static int | |
98 | decode_prefixes(struct ud *u) | |
99 | { | |
100 | unsigned int have_pfx = 1; | |
101 | unsigned int i; | |
102 | uint8_t curr; | |
103 | ||
104 | /* if in error state, bail out */ | |
105 | if ( u->error ) | |
106 | return -1; | |
107 | ||
108 | /* keep going as long as there are prefixes available */ | |
109 | for ( i = 0; have_pfx ; ++i ) { | |
110 | ||
111 | /* Get next byte. */ | |
112 | ud_inp_next(u); | |
113 | if ( u->error ) | |
114 | return -1; | |
115 | curr = ud_inp_curr( u ); | |
116 | ||
117 | /* rex prefixes in 64bit mode */ | |
118 | if ( u->dis_mode == 64 && ( curr & 0xF0 ) == 0x40 ) { | |
119 | u->pfx_rex = curr; | |
120 | } else { | |
121 | switch ( curr ) | |
122 | { | |
123 | case 0x2E : | |
124 | u->pfx_seg = UD_R_CS; | |
125 | u->pfx_rex = 0; | |
126 | break; | |
127 | case 0x36 : | |
128 | u->pfx_seg = UD_R_SS; | |
129 | u->pfx_rex = 0; | |
130 | break; | |
131 | case 0x3E : | |
132 | u->pfx_seg = UD_R_DS; | |
133 | u->pfx_rex = 0; | |
134 | break; | |
135 | case 0x26 : | |
136 | u->pfx_seg = UD_R_ES; | |
137 | u->pfx_rex = 0; | |
138 | break; | |
139 | case 0x64 : | |
140 | u->pfx_seg = UD_R_FS; | |
141 | u->pfx_rex = 0; | |
142 | break; | |
143 | case 0x65 : | |
144 | u->pfx_seg = UD_R_GS; | |
145 | u->pfx_rex = 0; | |
146 | break; | |
147 | case 0x67 : /* adress-size override prefix */ | |
148 | u->pfx_adr = 0x67; | |
149 | u->pfx_rex = 0; | |
150 | break; | |
151 | case 0xF0 : | |
152 | u->pfx_lock = 0xF0; | |
153 | u->pfx_rex = 0; | |
154 | break; | |
155 | case 0x66: | |
156 | /* the 0x66 sse prefix is only effective if no other sse prefix | |
157 | * has already been specified. | |
158 | */ | |
159 | if ( !u->pfx_insn ) u->pfx_insn = 0x66; | |
160 | u->pfx_opr = 0x66; | |
161 | u->pfx_rex = 0; | |
162 | break; | |
163 | case 0xF2: | |
164 | u->pfx_insn = 0xF2; | |
165 | u->pfx_repne = 0xF2; | |
166 | u->pfx_rex = 0; | |
167 | break; | |
168 | case 0xF3: | |
169 | u->pfx_insn = 0xF3; | |
170 | u->pfx_rep = 0xF3; | |
171 | u->pfx_repe = 0xF3; | |
172 | u->pfx_rex = 0; | |
173 | break; | |
174 | default : | |
175 | /* No more prefixes */ | |
176 | have_pfx = 0; | |
177 | break; | |
178 | } | |
179 | } | |
180 | ||
181 | /* check if we reached max instruction length */ | |
182 | if ( i + 1 == MAX_INSN_LENGTH ) { | |
183 | u->error = 1; | |
184 | break; | |
185 | } | |
186 | } | |
187 | ||
188 | /* return status */ | |
189 | if ( u->error ) | |
190 | return -1; | |
191 | ||
192 | /* rewind back one byte in stream, since the above loop | |
193 | * stops with a non-prefix byte. | |
194 | */ | |
195 | ud_inp_back(u); | |
196 | return 0; | |
197 | } | |
198 | ||
199 | ||
200 | static inline unsigned int modrm( struct ud * u ) | |
201 | { | |
202 | if ( !u->have_modrm ) { | |
203 | u->modrm = ud_inp_next( u ); | |
204 | u->have_modrm = 1; | |
205 | } | |
206 | return u->modrm; | |
207 | } | |
208 | ||
209 | ||
210 | static unsigned int resolve_operand_size( const struct ud * u, unsigned int s ) | |
211 | { | |
212 | switch ( s ) | |
213 | { | |
214 | case SZ_V: | |
215 | return ( u->opr_mode ); | |
216 | case SZ_Z: | |
217 | return ( u->opr_mode == 16 ) ? 16 : 32; | |
218 | case SZ_P: | |
219 | return ( u->opr_mode == 16 ) ? SZ_WP : SZ_DP; | |
220 | case SZ_MDQ: | |
221 | return ( u->opr_mode == 16 ) ? 32 : u->opr_mode; | |
222 | case SZ_RDQ: | |
223 | return ( u->dis_mode == 64 ) ? 64 : 32; | |
224 | default: | |
225 | return s; | |
226 | } | |
227 | } | |
228 | ||
229 | ||
230 | static int resolve_mnemonic( struct ud* u ) | |
231 | { | |
232 | /* far/near flags */ | |
233 | u->br_far = 0; | |
234 | u->br_near = 0; | |
235 | /* readjust operand sizes for call/jmp instrcutions */ | |
236 | if ( u->mnemonic == UD_Icall || u->mnemonic == UD_Ijmp ) { | |
237 | /* WP: 16:16 pointer */ | |
238 | if ( u->operand[ 0 ].size == SZ_WP ) { | |
239 | u->operand[ 0 ].size = 16; | |
240 | u->br_far = 1; | |
241 | u->br_near= 0; | |
242 | /* DP: 32:32 pointer */ | |
243 | } else if ( u->operand[ 0 ].size == SZ_DP ) { | |
244 | u->operand[ 0 ].size = 32; | |
245 | u->br_far = 1; | |
246 | u->br_near= 0; | |
247 | } else { | |
248 | u->br_far = 0; | |
249 | u->br_near= 1; | |
250 | } | |
251 | /* resolve 3dnow weirdness. */ | |
252 | } else if ( u->mnemonic == UD_I3dnow ) { | |
253 | u->mnemonic = ud_itab[ u->le->table[ ud_inp_curr( u ) ] ].mnemonic; | |
254 | } | |
255 | /* SWAPGS is only valid in 64bits mode */ | |
256 | if ( u->mnemonic == UD_Iswapgs && u->dis_mode != 64 ) { | |
257 | u->error = 1; | |
258 | return -1; | |
259 | } | |
260 | ||
261 | if (u->mnemonic == UD_Ixchg) { | |
262 | if ((u->operand[0].type == UD_OP_REG && u->operand[0].base == UD_R_AX && | |
263 | u->operand[1].type == UD_OP_REG && u->operand[1].base == UD_R_AX) || | |
264 | (u->operand[0].type == UD_OP_REG && u->operand[0].base == UD_R_EAX && | |
265 | u->operand[1].type == UD_OP_REG && u->operand[1].base == UD_R_EAX)) { | |
266 | u->operand[0].type = UD_NONE; | |
267 | u->operand[1].type = UD_NONE; | |
268 | u->mnemonic = UD_Inop; | |
269 | } | |
270 | } | |
271 | ||
272 | if (u->mnemonic == UD_Inop && u->pfx_rep) { | |
273 | u->pfx_rep = 0; | |
274 | u->mnemonic = UD_Ipause; | |
275 | } | |
276 | return 0; | |
277 | } | |
278 | ||
279 | ||
280 | /* ----------------------------------------------------------------------------- | |
281 | * decode_a()- Decodes operands of the type seg:offset | |
282 | * ----------------------------------------------------------------------------- | |
283 | */ | |
284 | static void | |
285 | decode_a(struct ud* u, struct ud_operand *op) | |
286 | { | |
287 | if (u->opr_mode == 16) { | |
288 | /* seg16:off16 */ | |
289 | op->type = UD_OP_PTR; | |
290 | op->size = 32; | |
291 | op->lval.ptr.off = ud_inp_uint16(u); | |
292 | op->lval.ptr.seg = ud_inp_uint16(u); | |
293 | } else { | |
294 | /* seg16:off32 */ | |
295 | op->type = UD_OP_PTR; | |
296 | op->size = 48; | |
297 | op->lval.ptr.off = ud_inp_uint32(u); | |
298 | op->lval.ptr.seg = ud_inp_uint16(u); | |
299 | } | |
300 | } | |
301 | ||
302 | /* ----------------------------------------------------------------------------- | |
303 | * decode_gpr() - Returns decoded General Purpose Register | |
304 | * ----------------------------------------------------------------------------- | |
305 | */ | |
306 | static enum ud_type | |
307 | decode_gpr(register struct ud* u, unsigned int s, unsigned char rm) | |
308 | { | |
309 | s = resolve_operand_size(u, s); | |
310 | ||
311 | switch (s) { | |
312 | case 64: | |
313 | return UD_R_RAX + rm; | |
314 | case SZ_DP: | |
315 | case 32: | |
316 | return UD_R_EAX + rm; | |
317 | case SZ_WP: | |
318 | case 16: | |
319 | return UD_R_AX + rm; | |
320 | case 8: | |
321 | if (u->dis_mode == 64 && u->pfx_rex) { | |
322 | if (rm >= 4) | |
323 | return UD_R_SPL + (rm-4); | |
324 | return UD_R_AL + rm; | |
325 | } else return UD_R_AL + rm; | |
326 | default: | |
327 | return 0; | |
328 | } | |
329 | } | |
330 | ||
331 | /* ----------------------------------------------------------------------------- | |
332 | * resolve_gpr64() - 64bit General Purpose Register-Selection. | |
333 | * ----------------------------------------------------------------------------- | |
334 | */ | |
335 | static enum ud_type | |
336 | resolve_gpr64(struct ud* u, enum ud_operand_code gpr_op, enum ud_operand_size * size) | |
337 | { | |
338 | if (gpr_op >= OP_rAXr8 && gpr_op <= OP_rDIr15) | |
339 | gpr_op = (gpr_op - OP_rAXr8) | (REX_B(u->pfx_rex) << 3); | |
340 | else gpr_op = (gpr_op - OP_rAX); | |
341 | ||
342 | if (u->opr_mode == 16) { | |
343 | *size = 16; | |
344 | return gpr_op + UD_R_AX; | |
345 | } | |
346 | if (u->dis_mode == 32 || | |
347 | (u->opr_mode == 32 && ! (REX_W(u->pfx_rex) || u->default64))) { | |
348 | *size = 32; | |
349 | return gpr_op + UD_R_EAX; | |
350 | } | |
351 | ||
352 | *size = 64; | |
353 | return gpr_op + UD_R_RAX; | |
354 | } | |
355 | ||
356 | /* ----------------------------------------------------------------------------- | |
357 | * resolve_gpr32 () - 32bit General Purpose Register-Selection. | |
358 | * ----------------------------------------------------------------------------- | |
359 | */ | |
360 | static enum ud_type | |
361 | resolve_gpr32(struct ud* u, enum ud_operand_code gpr_op) | |
362 | { | |
363 | gpr_op = gpr_op - OP_eAX; | |
364 | ||
365 | if (u->opr_mode == 16) | |
366 | return gpr_op + UD_R_AX; | |
367 | ||
368 | return gpr_op + UD_R_EAX; | |
369 | } | |
370 | ||
371 | /* ----------------------------------------------------------------------------- | |
372 | * resolve_reg() - Resolves the register type | |
373 | * ----------------------------------------------------------------------------- | |
374 | */ | |
375 | static enum ud_type | |
376 | resolve_reg(struct ud* u, unsigned int type, unsigned char i) | |
377 | { | |
378 | switch (type) { | |
379 | case T_MMX : return UD_R_MM0 + (i & 7); | |
380 | case T_XMM : return UD_R_XMM0 + i; | |
381 | case T_CRG : return UD_R_CR0 + i; | |
382 | case T_DBG : return UD_R_DR0 + i; | |
383 | case T_SEG : { | |
384 | /* | |
385 | * Only 6 segment registers, anything else is an error. | |
386 | */ | |
387 | if ((i & 7) > 5) { | |
388 | u->error = 1; | |
389 | } else { | |
390 | return UD_R_ES + (i & 7); | |
391 | } | |
392 | } | |
393 | case T_NONE: | |
394 | default: return UD_NONE; | |
395 | } | |
396 | } | |
397 | ||
398 | /* ----------------------------------------------------------------------------- | |
399 | * decode_imm() - Decodes Immediate values. | |
400 | * ----------------------------------------------------------------------------- | |
401 | */ | |
402 | static void | |
403 | decode_imm(struct ud* u, unsigned int s, struct ud_operand *op) | |
404 | { | |
405 | op->size = resolve_operand_size(u, s); | |
406 | op->type = UD_OP_IMM; | |
407 | ||
408 | switch (op->size) { | |
409 | case 8: op->lval.sbyte = ud_inp_uint8(u); break; | |
410 | case 16: op->lval.uword = ud_inp_uint16(u); break; | |
411 | case 32: op->lval.udword = ud_inp_uint32(u); break; | |
412 | case 64: op->lval.uqword = ud_inp_uint64(u); break; | |
413 | default: return; | |
414 | } | |
415 | } | |
416 | ||
417 | ||
418 | /* | |
419 | * decode_modrm_reg | |
420 | * | |
421 | * Decodes reg field of mod/rm byte | |
422 | * | |
423 | */ | |
424 | static void | |
425 | decode_modrm_reg(struct ud *u, | |
426 | struct ud_operand *operand, | |
427 | unsigned int type, | |
428 | unsigned int size) | |
429 | { | |
430 | uint8_t reg = (REX_R(u->pfx_rex) << 3) | MODRM_REG(modrm(u)); | |
431 | operand->type = UD_OP_REG; | |
432 | operand->size = resolve_operand_size(u, size); | |
433 | ||
434 | if (type == T_GPR) { | |
435 | operand->base = decode_gpr(u, operand->size, reg); | |
436 | } else { | |
437 | operand->base = resolve_reg(u, type, reg); | |
438 | } | |
439 | } | |
440 | ||
441 | ||
442 | /* | |
443 | * decode_modrm_rm | |
444 | * | |
445 | * Decodes rm field of mod/rm byte | |
446 | * | |
447 | */ | |
448 | static void | |
449 | decode_modrm_rm(struct ud *u, | |
450 | struct ud_operand *op, | |
451 | unsigned char type, | |
452 | unsigned int size) | |
453 | ||
454 | { | |
455 | unsigned char mod, rm, reg; | |
456 | ||
457 | /* get mod, r/m and reg fields */ | |
458 | mod = MODRM_MOD(modrm(u)); | |
459 | rm = (REX_B(u->pfx_rex) << 3) | MODRM_RM(modrm(u)); | |
460 | reg = (REX_R(u->pfx_rex) << 3) | MODRM_REG(modrm(u)); | |
461 | ||
462 | op->size = resolve_operand_size(u, size); | |
463 | ||
464 | /* | |
465 | * If mod is 11b, then the modrm.rm specifies a register. | |
466 | * | |
467 | */ | |
468 | if (mod == 3) { | |
469 | op->type = UD_OP_REG; | |
470 | if (type == T_GPR) { | |
471 | op->base = decode_gpr(u, op->size, rm); | |
472 | } else { | |
473 | op->base = resolve_reg(u, type, (REX_B(u->pfx_rex) << 3) | (rm & 7)); | |
474 | } | |
475 | return; | |
476 | } | |
477 | ||
478 | ||
479 | /* | |
480 | * !11 => Memory Address | |
481 | */ | |
482 | op->type = UD_OP_MEM; | |
483 | ||
484 | if (u->adr_mode == 64) { | |
485 | op->base = UD_R_RAX + rm; | |
486 | if (mod == 1) { | |
487 | op->offset = 8; | |
488 | } else if (mod == 2) { | |
489 | op->offset = 32; | |
490 | } else if (mod == 0 && (rm & 7) == 5) { | |
491 | op->base = UD_R_RIP; | |
492 | op->offset = 32; | |
493 | } else { | |
494 | op->offset = 0; | |
495 | } | |
496 | /* | |
497 | * Scale-Index-Base (SIB) | |
498 | */ | |
499 | if ((rm & 7) == 4) { | |
500 | ud_inp_next(u); | |
501 | ||
502 | op->scale = (1 << SIB_S(ud_inp_curr(u))) & ~1; | |
503 | op->index = UD_R_RAX + (SIB_I(ud_inp_curr(u)) | (REX_X(u->pfx_rex) << 3)); | |
504 | op->base = UD_R_RAX + (SIB_B(ud_inp_curr(u)) | (REX_B(u->pfx_rex) << 3)); | |
505 | ||
506 | /* special conditions for base reference */ | |
507 | if (op->index == UD_R_RSP) { | |
508 | op->index = UD_NONE; | |
509 | op->scale = UD_NONE; | |
510 | } | |
511 | ||
512 | if (op->base == UD_R_RBP || op->base == UD_R_R13) { | |
513 | if (mod == 0) { | |
514 | op->base = UD_NONE; | |
515 | } | |
516 | if (mod == 1) { | |
517 | op->offset = 8; | |
518 | } else { | |
519 | op->offset = 32; | |
520 | } | |
521 | } | |
522 | } | |
523 | } else if (u->adr_mode == 32) { | |
524 | op->base = UD_R_EAX + rm; | |
525 | if (mod == 1) { | |
526 | op->offset = 8; | |
527 | } else if (mod == 2) { | |
528 | op->offset = 32; | |
529 | } else if (mod == 0 && rm == 5) { | |
530 | op->base = UD_NONE; | |
531 | op->offset = 32; | |
532 | } else { | |
533 | op->offset = 0; | |
534 | } | |
535 | ||
536 | /* Scale-Index-Base (SIB) */ | |
537 | if ((rm & 7) == 4) { | |
538 | ud_inp_next(u); | |
539 | ||
540 | op->scale = (1 << SIB_S(ud_inp_curr(u))) & ~1; | |
541 | op->index = UD_R_EAX + (SIB_I(ud_inp_curr(u)) | (REX_X(u->pfx_rex) << 3)); | |
542 | op->base = UD_R_EAX + (SIB_B(ud_inp_curr(u)) | (REX_B(u->pfx_rex) << 3)); | |
543 | ||
544 | if (op->index == UD_R_ESP) { | |
545 | op->index = UD_NONE; | |
546 | op->scale = UD_NONE; | |
547 | } | |
548 | ||
549 | /* special condition for base reference */ | |
550 | if (op->base == UD_R_EBP) { | |
551 | if (mod == 0) { | |
552 | op->base = UD_NONE; | |
553 | } | |
554 | if (mod == 1) { | |
555 | op->offset = 8; | |
556 | } else { | |
557 | op->offset = 32; | |
558 | } | |
559 | } | |
560 | } | |
561 | } else { | |
562 | const unsigned int bases[] = { UD_R_BX, UD_R_BX, UD_R_BP, UD_R_BP, | |
563 | UD_R_SI, UD_R_DI, UD_R_BP, UD_R_BX }; | |
564 | const unsigned int indices[] = { UD_R_SI, UD_R_DI, UD_R_SI, UD_R_DI, | |
565 | UD_NONE, UD_NONE, UD_NONE, UD_NONE }; | |
566 | op->base = bases[rm & 7]; | |
567 | op->index = indices[rm & 7]; | |
568 | if (mod == 0 && rm == 6) { | |
569 | op->offset= 16; | |
570 | op->base = UD_NONE; | |
571 | } else if (mod == 1) { | |
572 | op->offset = 8; | |
573 | } else if (mod == 2) { | |
574 | op->offset = 16; | |
575 | } | |
576 | } | |
577 | ||
578 | /* | |
579 | * extract offset, if any | |
580 | */ | |
581 | switch (op->offset) { | |
582 | case 8 : op->lval.ubyte = ud_inp_uint8(u); break; | |
583 | case 16: op->lval.uword = ud_inp_uint16(u); break; | |
584 | case 32: op->lval.udword = ud_inp_uint32(u); break; | |
585 | case 64: op->lval.uqword = ud_inp_uint64(u); break; | |
586 | default: break; | |
587 | } | |
588 | } | |
589 | ||
590 | /* ----------------------------------------------------------------------------- | |
591 | * decode_o() - Decodes offset | |
592 | * ----------------------------------------------------------------------------- | |
593 | */ | |
594 | static void | |
595 | decode_o(struct ud* u, unsigned int s, struct ud_operand *op) | |
596 | { | |
597 | switch (u->adr_mode) { | |
598 | case 64: | |
599 | op->offset = 64; | |
600 | op->lval.uqword = ud_inp_uint64(u); | |
601 | break; | |
602 | case 32: | |
603 | op->offset = 32; | |
604 | op->lval.udword = ud_inp_uint32(u); | |
605 | break; | |
606 | case 16: | |
607 | op->offset = 16; | |
608 | op->lval.uword = ud_inp_uint16(u); | |
609 | break; | |
610 | default: | |
611 | return; | |
612 | } | |
613 | op->type = UD_OP_MEM; | |
614 | op->size = resolve_operand_size(u, s); | |
615 | } | |
616 | ||
617 | /* ----------------------------------------------------------------------------- | |
618 | * decode_operands() - Disassembles Operands. | |
619 | * ----------------------------------------------------------------------------- | |
620 | */ | |
621 | static int | |
622 | decode_operand(struct ud *u, | |
623 | struct ud_operand *operand, | |
624 | enum ud_operand_code type, | |
625 | unsigned int size) | |
626 | { | |
627 | switch (type) { | |
628 | case OP_A : | |
629 | decode_a(u, operand); | |
630 | break; | |
631 | case OP_MR: | |
632 | if (MODRM_MOD(modrm(u)) == 3) { | |
633 | decode_modrm_rm(u, operand, T_GPR, | |
634 | size == SZ_DY ? SZ_MDQ : SZ_V); | |
635 | } else if (size == SZ_WV) { | |
636 | decode_modrm_rm( u, operand, T_GPR, SZ_W); | |
637 | } else if (size == SZ_BV) { | |
638 | decode_modrm_rm( u, operand, T_GPR, SZ_B); | |
639 | } else if (size == SZ_DY) { | |
640 | decode_modrm_rm( u, operand, T_GPR, SZ_D); | |
641 | } else { | |
642 | ASSERT(!"unexpected size"); | |
643 | } | |
644 | break; | |
645 | case OP_M: | |
646 | if (MODRM_MOD(modrm(u)) == 3) { | |
647 | u->error = 1; | |
648 | } | |
649 | /* intended fall through */ | |
650 | case OP_E: | |
651 | decode_modrm_rm(u, operand, T_GPR, size); | |
652 | break; | |
653 | break; | |
654 | case OP_G: | |
655 | decode_modrm_reg(u, operand, T_GPR, size); | |
656 | break; | |
657 | case OP_I: | |
658 | decode_imm(u, size, operand); | |
659 | break; | |
660 | case OP_I1: | |
661 | operand->type = UD_OP_CONST; | |
662 | operand->lval.udword = 1; | |
663 | break; | |
664 | case OP_PR: | |
665 | if (MODRM_MOD(modrm(u)) != 3) { | |
666 | u->error = 1; | |
667 | } | |
668 | decode_modrm_rm(u, operand, T_MMX, size); | |
669 | break; | |
670 | case OP_P: | |
671 | decode_modrm_reg(u, operand, T_MMX, size); | |
672 | break; | |
673 | case OP_VR: | |
674 | if (MODRM_MOD(modrm(u)) != 3) { | |
675 | u->error = 1; | |
676 | } | |
677 | /* intended fall through */ | |
678 | case OP_W: | |
679 | decode_modrm_rm(u, operand, T_XMM, size); | |
680 | break; | |
681 | case OP_V: | |
682 | decode_modrm_reg(u, operand, T_XMM, size); | |
683 | break; | |
684 | case OP_S: | |
685 | decode_modrm_reg(u, operand, T_SEG, size); | |
686 | break; | |
687 | case OP_AL: | |
688 | case OP_CL: | |
689 | case OP_DL: | |
690 | case OP_BL: | |
691 | case OP_AH: | |
692 | case OP_CH: | |
693 | case OP_DH: | |
694 | case OP_BH: | |
695 | operand->type = UD_OP_REG; | |
696 | operand->base = UD_R_AL + (type - OP_AL); | |
697 | operand->size = 8; | |
698 | break; | |
699 | case OP_DX: | |
700 | operand->type = UD_OP_REG; | |
701 | operand->base = UD_R_DX; | |
702 | operand->size = 16; | |
703 | break; | |
704 | case OP_O: | |
705 | decode_o(u, size, operand); | |
706 | break; | |
707 | case OP_rAXr8: | |
708 | case OP_rCXr9: | |
709 | case OP_rDXr10: | |
710 | case OP_rBXr11: | |
711 | case OP_rSPr12: | |
712 | case OP_rBPr13: | |
713 | case OP_rSIr14: | |
714 | case OP_rDIr15: | |
715 | case OP_rAX: | |
716 | case OP_rCX: | |
717 | case OP_rDX: | |
718 | case OP_rBX: | |
719 | case OP_rSP: | |
720 | case OP_rBP: | |
721 | case OP_rSI: | |
722 | case OP_rDI: | |
723 | operand->type = UD_OP_REG; | |
724 | operand->base = resolve_gpr64(u, type, &operand->size); | |
725 | break; | |
726 | case OP_ALr8b: | |
727 | case OP_CLr9b: | |
728 | case OP_DLr10b: | |
729 | case OP_BLr11b: | |
730 | case OP_AHr12b: | |
731 | case OP_CHr13b: | |
732 | case OP_DHr14b: | |
733 | case OP_BHr15b: { | |
734 | ud_type_t gpr = (type - OP_ALr8b) + UD_R_AL | |
735 | + (REX_B(u->pfx_rex) << 3); | |
736 | if (UD_R_AH <= gpr && u->pfx_rex) { | |
737 | gpr = gpr + 4; | |
738 | } | |
739 | operand->type = UD_OP_REG; | |
740 | operand->base = gpr; | |
741 | break; | |
742 | } | |
743 | case OP_eAX: | |
744 | case OP_eCX: | |
745 | case OP_eDX: | |
746 | case OP_eBX: | |
747 | case OP_eSP: | |
748 | case OP_eBP: | |
749 | case OP_eSI: | |
750 | case OP_eDI: | |
751 | operand->type = UD_OP_REG; | |
752 | operand->base = resolve_gpr32(u, type); | |
753 | operand->size = u->opr_mode == 16 ? 16 : 32; | |
754 | break; | |
755 | case OP_ES: | |
756 | case OP_CS: | |
757 | case OP_DS: | |
758 | case OP_SS: | |
759 | case OP_FS: | |
760 | case OP_GS: | |
761 | /* in 64bits mode, only fs and gs are allowed */ | |
762 | if (u->dis_mode == 64) { | |
763 | if (type != OP_FS && type != OP_GS) { | |
764 | u->error= 1; | |
765 | } | |
766 | } | |
767 | operand->type = UD_OP_REG; | |
768 | operand->base = (type - OP_ES) + UD_R_ES; | |
769 | operand->size = 16; | |
770 | break; | |
771 | case OP_J : | |
772 | decode_imm(u, size, operand); | |
773 | operand->type = UD_OP_JIMM; | |
774 | break ; | |
775 | case OP_Q: | |
776 | decode_modrm_rm(u, operand, T_MMX, size); | |
777 | break; | |
778 | case OP_R : | |
779 | decode_modrm_rm(u, operand, T_GPR, size); | |
780 | break; | |
781 | case OP_C: | |
782 | decode_modrm_reg(u, operand, T_CRG, size); | |
783 | break; | |
784 | case OP_D: | |
785 | decode_modrm_reg(u, operand, T_DBG, size); | |
786 | break; | |
787 | case OP_I3 : | |
788 | operand->type = UD_OP_CONST; | |
789 | operand->lval.sbyte = 3; | |
790 | break; | |
791 | case OP_ST0: | |
792 | case OP_ST1: | |
793 | case OP_ST2: | |
794 | case OP_ST3: | |
795 | case OP_ST4: | |
796 | case OP_ST5: | |
797 | case OP_ST6: | |
798 | case OP_ST7: | |
799 | operand->type = UD_OP_REG; | |
800 | operand->base = (type - OP_ST0) + UD_R_ST0; | |
801 | operand->size = 0; | |
802 | break; | |
803 | case OP_AX: | |
804 | operand->type = UD_OP_REG; | |
805 | operand->base = UD_R_AX; | |
806 | operand->size = 16; | |
807 | break; | |
808 | default : | |
809 | operand->type = UD_NONE; | |
810 | break; | |
811 | } | |
812 | return 0; | |
813 | } | |
814 | ||
815 | ||
816 | /* | |
817 | * decode_operands | |
818 | * | |
819 | * Disassemble upto 3 operands of the current instruction being | |
820 | * disassembled. By the end of the function, the operand fields | |
821 | * of the ud structure will have been filled. | |
822 | */ | |
823 | static int | |
824 | decode_operands(struct ud* u) | |
825 | { | |
826 | decode_operand(u, &u->operand[0], | |
827 | u->itab_entry->operand1.type, | |
828 | u->itab_entry->operand1.size); | |
829 | decode_operand(u, &u->operand[1], | |
830 | u->itab_entry->operand2.type, | |
831 | u->itab_entry->operand2.size); | |
832 | decode_operand(u, &u->operand[2], | |
833 | u->itab_entry->operand3.type, | |
834 | u->itab_entry->operand3.size); | |
835 | return 0; | |
836 | } | |
837 | ||
838 | /* ----------------------------------------------------------------------------- | |
839 | * clear_insn() - clear instruction structure | |
840 | * ----------------------------------------------------------------------------- | |
841 | */ | |
842 | static void | |
843 | clear_insn(register struct ud* u) | |
844 | { | |
845 | u->error = 0; | |
846 | u->pfx_seg = 0; | |
847 | u->pfx_opr = 0; | |
848 | u->pfx_adr = 0; | |
849 | u->pfx_lock = 0; | |
850 | u->pfx_repne = 0; | |
851 | u->pfx_rep = 0; | |
852 | u->pfx_repe = 0; | |
853 | u->pfx_rex = 0; | |
854 | u->pfx_insn = 0; | |
855 | u->mnemonic = UD_Inone; | |
856 | u->itab_entry = NULL; | |
857 | u->have_modrm = 0; | |
858 | ||
859 | memset( &u->operand[ 0 ], 0, sizeof( struct ud_operand ) ); | |
860 | memset( &u->operand[ 1 ], 0, sizeof( struct ud_operand ) ); | |
861 | memset( &u->operand[ 2 ], 0, sizeof( struct ud_operand ) ); | |
862 | } | |
863 | ||
864 | static int | |
865 | resolve_mode( struct ud* u ) | |
866 | { | |
867 | /* if in error state, bail out */ | |
868 | if ( u->error ) return -1; | |
869 | ||
870 | /* propagate prefix effects */ | |
871 | if ( u->dis_mode == 64 ) { /* set 64bit-mode flags */ | |
872 | ||
873 | /* Check validity of instruction m64 */ | |
874 | if ( P_INV64( u->itab_entry->prefix ) ) { | |
875 | u->error = 1; | |
876 | return -1; | |
877 | } | |
878 | ||
879 | /* effective rex prefix is the effective mask for the | |
880 | * instruction hard-coded in the opcode map. | |
881 | */ | |
882 | u->pfx_rex = ( u->pfx_rex & 0x40 ) | | |
883 | ( u->pfx_rex & REX_PFX_MASK( u->itab_entry->prefix ) ); | |
884 | ||
885 | /* whether this instruction has a default operand size of | |
886 | * 64bit, also hardcoded into the opcode map. | |
887 | */ | |
888 | u->default64 = P_DEF64( u->itab_entry->prefix ); | |
889 | /* calculate effective operand size */ | |
890 | if ( REX_W( u->pfx_rex ) ) { | |
891 | u->opr_mode = 64; | |
892 | } else if ( u->pfx_opr ) { | |
893 | u->opr_mode = 16; | |
894 | } else { | |
895 | /* unless the default opr size of instruction is 64, | |
896 | * the effective operand size in the absence of rex.w | |
897 | * prefix is 32. | |
898 | */ | |
899 | u->opr_mode = ( u->default64 ) ? 64 : 32; | |
900 | } | |
901 | ||
902 | /* calculate effective address size */ | |
903 | u->adr_mode = (u->pfx_adr) ? 32 : 64; | |
904 | } else if ( u->dis_mode == 32 ) { /* set 32bit-mode flags */ | |
905 | u->opr_mode = ( u->pfx_opr ) ? 16 : 32; | |
906 | u->adr_mode = ( u->pfx_adr ) ? 16 : 32; | |
907 | } else if ( u->dis_mode == 16 ) { /* set 16bit-mode flags */ | |
908 | u->opr_mode = ( u->pfx_opr ) ? 32 : 16; | |
909 | u->adr_mode = ( u->pfx_adr ) ? 32 : 16; | |
910 | } | |
911 | ||
912 | /* These flags determine which operand to apply the operand size | |
913 | * cast to. | |
914 | */ | |
915 | u->c1 = ( P_C1( u->itab_entry->prefix ) ) ? 1 : 0; | |
916 | u->c2 = ( P_C2( u->itab_entry->prefix ) ) ? 1 : 0; | |
917 | u->c3 = ( P_C3( u->itab_entry->prefix ) ) ? 1 : 0; | |
918 | ||
919 | /* set flags for implicit addressing */ | |
920 | u->implicit_addr = P_IMPADDR( u->itab_entry->prefix ); | |
921 | ||
922 | return 0; | |
923 | } | |
924 | ||
925 | static int gen_hex( struct ud *u ) | |
926 | { | |
927 | unsigned int i; | |
928 | unsigned char *src_ptr = ud_inp_sess( u ); | |
929 | char* src_hex; | |
930 | ||
931 | /* bail out if in error stat. */ | |
932 | if ( u->error ) return -1; | |
933 | /* output buffer pointe */ | |
934 | src_hex = ( char* ) u->insn_hexcode; | |
935 | /* for each byte used to decode instruction */ | |
936 | for ( i = 0; i < u->inp_ctr; ++i, ++src_ptr) { | |
937 | sprintf( src_hex, "%02x", *src_ptr & 0xFF ); | |
938 | src_hex += 2; | |
939 | } | |
940 | return 0; | |
941 | } | |
942 | ||
943 | ||
944 | static inline int | |
945 | decode_insn(struct ud *u, uint16_t ptr) | |
946 | { | |
947 | ASSERT((ptr & 0x8000) == 0); | |
948 | u->itab_entry = &ud_itab[ ptr ]; | |
949 | u->mnemonic = u->itab_entry->mnemonic; | |
950 | return (resolve_mode(u) == 0 && | |
951 | decode_operands(u) == 0 && | |
952 | resolve_mnemonic(u) == 0) ? 0 : -1; | |
953 | } | |
954 | ||
955 | ||
956 | /* | |
957 | * decode_3dnow() | |
958 | * | |
959 | * Decoding 3dnow is a little tricky because of its strange opcode | |
960 | * structure. The final opcode disambiguation depends on the last | |
961 | * byte that comes after the operands have been decoded. Fortunately, | |
962 | * all 3dnow instructions have the same set of operand types. So we | |
963 | * go ahead and decode the instruction by picking an arbitrarily chosen | |
964 | * valid entry in the table, decode the operands, and read the final | |
965 | * byte to resolve the menmonic. | |
966 | */ | |
967 | static inline int | |
968 | decode_3dnow(struct ud* u) | |
969 | { | |
970 | uint16_t ptr; | |
971 | ASSERT(u->le->type == UD_TAB__OPC_3DNOW); | |
972 | ASSERT(u->le->table[0xc] != 0); | |
973 | decode_insn(u, u->le->table[0xc]); | |
974 | ud_inp_next(u); | |
975 | if (u->error) { | |
976 | return -1; | |
977 | } | |
978 | ptr = u->le->table[ud_inp_curr(u)]; | |
979 | ASSERT((ptr & 0x8000) == 0); | |
980 | u->mnemonic = ud_itab[ptr].mnemonic; | |
981 | return 0; | |
982 | } | |
983 | ||
984 | ||
985 | static int | |
986 | decode_ssepfx(struct ud *u) | |
987 | { | |
988 | uint8_t idx = ((u->pfx_insn & 0xf) + 1) / 2; | |
989 | if (u->le->table[idx] == 0) { | |
990 | idx = 0; | |
991 | } | |
992 | if (idx && u->le->table[idx] != 0) { | |
993 | /* | |
994 | * "Consume" the prefix as a part of the opcode, so it is no | |
995 | * longer exported as an instruction prefix. | |
996 | */ | |
997 | switch (u->pfx_insn) { | |
998 | case 0xf2: | |
999 | u->pfx_repne = 0; | |
1000 | break; | |
1001 | case 0xf3: | |
1002 | u->pfx_rep = 0; | |
1003 | u->pfx_repe = 0; | |
1004 | break; | |
1005 | case 0x66: | |
1006 | u->pfx_opr = 0; | |
1007 | break; | |
1008 | } | |
1009 | } | |
1010 | return decode_ext(u, u->le->table[idx]); | |
1011 | } | |
1012 | ||
1013 | ||
1014 | /* | |
1015 | * decode_ext() | |
1016 | * | |
1017 | * Decode opcode extensions (if any) | |
1018 | */ | |
1019 | static int | |
1020 | decode_ext(struct ud *u, uint16_t ptr) | |
1021 | { | |
1022 | uint8_t idx = 0; | |
1023 | if ((ptr & 0x8000) == 0) { | |
1024 | return decode_insn(u, ptr); | |
1025 | } | |
1026 | u->le = &ud_lookup_table_list[(~0x8000 & ptr)]; | |
1027 | if (u->le->type == UD_TAB__OPC_3DNOW) { | |
1028 | return decode_3dnow(u); | |
1029 | } | |
1030 | ||
1031 | switch (u->le->type) { | |
1032 | case UD_TAB__OPC_MOD: | |
1033 | /* !11 = 0, 11 = 1 */ | |
1034 | idx = (MODRM_MOD(modrm(u)) + 1) / 4; | |
1035 | break; | |
1036 | /* disassembly mode/operand size/address size based tables. | |
1037 | * 16 = 0,, 32 = 1, 64 = 2 | |
1038 | */ | |
1039 | case UD_TAB__OPC_MODE: | |
1040 | idx = u->dis_mode / 32; | |
1041 | break; | |
1042 | case UD_TAB__OPC_OSIZE: | |
1043 | idx = eff_opr_mode(u->dis_mode, REX_W(u->pfx_rex), u->pfx_opr) / 32; | |
1044 | break; | |
1045 | case UD_TAB__OPC_ASIZE: | |
1046 | idx = eff_adr_mode(u->dis_mode, u->pfx_adr) / 32; | |
1047 | break; | |
1048 | case UD_TAB__OPC_X87: | |
1049 | idx = modrm(u) - 0xC0; | |
1050 | break; | |
1051 | case UD_TAB__OPC_VENDOR: | |
1052 | if (u->vendor == UD_VENDOR_ANY) { | |
1053 | /* choose a valid entry */ | |
1054 | idx = (u->le->table[idx] != 0) ? 0 : 1; | |
1055 | } else if (u->vendor == UD_VENDOR_AMD) { | |
1056 | idx = 0; | |
1057 | } else { | |
1058 | idx = 1; | |
1059 | } | |
1060 | break; | |
1061 | case UD_TAB__OPC_RM: | |
1062 | idx = MODRM_RM(modrm(u)); | |
1063 | break; | |
1064 | case UD_TAB__OPC_REG: | |
1065 | idx = MODRM_REG(modrm(u)); | |
1066 | break; | |
1067 | case UD_TAB__OPC_SSE: | |
1068 | return decode_ssepfx(u); | |
1069 | default: | |
1070 | ASSERT(!"not reached"); | |
1071 | break; | |
1072 | } | |
1073 | ||
1074 | return decode_ext(u, u->le->table[idx]); | |
1075 | } | |
1076 | ||
1077 | ||
1078 | static inline int | |
1079 | decode_opcode(struct ud *u) | |
1080 | { | |
1081 | uint16_t ptr; | |
1082 | ASSERT(u->le->type == UD_TAB__OPC_TABLE); | |
1083 | ud_inp_next(u); | |
1084 | if (u->error) { | |
1085 | return -1; | |
1086 | } | |
1087 | ptr = u->le->table[ud_inp_curr(u)]; | |
1088 | if (ptr & 0x8000) { | |
1089 | u->le = &ud_lookup_table_list[ptr & ~0x8000]; | |
1090 | if (u->le->type == UD_TAB__OPC_TABLE) { | |
1091 | return decode_opcode(u); | |
1092 | } | |
1093 | } | |
1094 | return decode_ext(u, ptr); | |
1095 | } | |
1096 | ||
1097 | ||
1098 | /* ============================================================================= | |
1099 | * ud_decode() - Instruction decoder. Returns the number of bytes decoded. | |
1100 | * ============================================================================= | |
1101 | */ | |
1102 | unsigned int | |
1103 | ud_decode(struct ud *u) | |
1104 | { | |
1105 | ud_inp_start(u); | |
1106 | clear_insn(u); | |
1107 | u->le = &ud_lookup_table_list[0]; | |
1108 | u->error = decode_prefixes(u) == -1 || | |
1109 | decode_opcode(u) == -1 || | |
1110 | u->error; | |
1111 | /* Handle decode error. */ | |
1112 | if (u->error) { | |
1113 | /* clear out the decode data. */ | |
1114 | clear_insn(u); | |
1115 | /* mark the sequence of bytes as invalid. */ | |
1116 | u->itab_entry = & s_ie__invalid; | |
1117 | u->mnemonic = u->itab_entry->mnemonic; | |
1118 | } | |
1119 | ||
1120 | /* maybe this stray segment override byte | |
1121 | * should be spewed out? | |
1122 | */ | |
1123 | if ( !P_SEG( u->itab_entry->prefix ) && | |
1124 | u->operand[0].type != UD_OP_MEM && | |
1125 | u->operand[1].type != UD_OP_MEM ) | |
1126 | u->pfx_seg = 0; | |
1127 | ||
1128 | u->insn_offset = u->pc; /* set offset of instruction */ | |
1129 | u->insn_fill = 0; /* set translation buffer index to 0 */ | |
1130 | u->pc += u->inp_ctr; /* move program counter by bytes decoded */ | |
1131 | gen_hex( u ); /* generate hex code */ | |
1132 | ||
1133 | /* return number of bytes disassembled. */ | |
1134 | return u->inp_ctr; | |
1135 | } | |
1136 | ||
1137 | /* | |
1138 | vim: set ts=2 sw=2 expandtab | |
1139 | */ | |
1140 | ||
1141 | #endif // USE(UDIS86) |