]>
Commit | Line | Data |
---|---|---|
1 | /* udis86 - libudis86/decode.c | |
2 | * | |
3 | * Copyright (c) 2002-2009 Vivek Thampi | |
4 | * All rights reserved. | |
5 | * | |
6 | * Redistribution and use in source and binary forms, with or without modification, | |
7 | * are permitted provided that the following conditions are met: | |
8 | * | |
9 | * * Redistributions of source code must retain the above copyright notice, | |
10 | * this list of conditions and the following disclaimer. | |
11 | * * Redistributions in binary form must reproduce the above copyright notice, | |
12 | * this list of conditions and the following disclaimer in the documentation | |
13 | * and/or other materials provided with the distribution. | |
14 | * | |
15 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | |
16 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |
17 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
18 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR | |
19 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | |
20 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
21 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON | |
22 | * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
23 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
24 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
25 | */ | |
26 | ||
27 | #include "config.h" | |
28 | ||
29 | #if USE(UDIS86) | |
30 | ||
31 | #include "udis86_extern.h" | |
32 | #include "udis86_types.h" | |
33 | #include "udis86_input.h" | |
34 | #include "udis86_decode.h" | |
35 | #include <wtf/Assertions.h> | |
36 | ||
37 | #define dbg(x, n...) | |
38 | /* #define dbg printf */ | |
39 | ||
40 | #ifndef __UD_STANDALONE__ | |
41 | # include <string.h> | |
42 | #endif /* __UD_STANDALONE__ */ | |
43 | ||
44 | /* The max number of prefixes to an instruction */ | |
45 | #define MAX_PREFIXES 15 | |
46 | ||
47 | /* instruction aliases and special cases */ | |
48 | static struct ud_itab_entry s_ie__invalid = | |
49 | { UD_Iinvalid, O_NONE, O_NONE, O_NONE, P_none }; | |
50 | ||
51 | static int | |
52 | decode_ext(struct ud *u, uint16_t ptr); | |
53 | ||
54 | ||
55 | static inline int | |
56 | eff_opr_mode(int dis_mode, int rex_w, int pfx_opr) | |
57 | { | |
58 | if (dis_mode == 64) { | |
59 | return rex_w ? 64 : (pfx_opr ? 16 : 32); | |
60 | } else if (dis_mode == 32) { | |
61 | return pfx_opr ? 16 : 32; | |
62 | } else { | |
63 | ASSERT(dis_mode == 16); | |
64 | return pfx_opr ? 32 : 16; | |
65 | } | |
66 | } | |
67 | ||
68 | ||
69 | static inline int | |
70 | eff_adr_mode(int dis_mode, int pfx_adr) | |
71 | { | |
72 | if (dis_mode == 64) { | |
73 | return pfx_adr ? 32 : 64; | |
74 | } else if (dis_mode == 32) { | |
75 | return pfx_adr ? 16 : 32; | |
76 | } else { | |
77 | ASSERT(dis_mode == 16); | |
78 | return pfx_adr ? 32 : 16; | |
79 | } | |
80 | } | |
81 | ||
82 | ||
83 | /* Looks up mnemonic code in the mnemonic string table | |
84 | * Returns NULL if the mnemonic code is invalid | |
85 | */ | |
86 | const char * ud_lookup_mnemonic( enum ud_mnemonic_code c ) | |
87 | { | |
88 | return ud_mnemonics_str[ c ]; | |
89 | } | |
90 | ||
91 | ||
92 | /* | |
93 | * decode_prefixes | |
94 | * | |
95 | * Extracts instruction prefixes. | |
96 | */ | |
97 | static int | |
98 | decode_prefixes(struct ud *u) | |
99 | { | |
100 | unsigned int have_pfx = 1; | |
101 | unsigned int i; | |
102 | uint8_t curr; | |
103 | ||
104 | /* if in error state, bail out */ | |
105 | if ( u->error ) | |
106 | return -1; | |
107 | ||
108 | /* keep going as long as there are prefixes available */ | |
109 | for ( i = 0; have_pfx ; ++i ) { | |
110 | ||
111 | /* Get next byte. */ | |
112 | ud_inp_next(u); | |
113 | if ( u->error ) | |
114 | return -1; | |
115 | curr = ud_inp_curr( u ); | |
116 | ||
117 | /* rex prefixes in 64bit mode */ | |
118 | if ( u->dis_mode == 64 && ( curr & 0xF0 ) == 0x40 ) { | |
119 | u->pfx_rex = curr; | |
120 | } else { | |
121 | switch ( curr ) | |
122 | { | |
123 | case 0x2E : | |
124 | u->pfx_seg = UD_R_CS; | |
125 | u->pfx_rex = 0; | |
126 | break; | |
127 | case 0x36 : | |
128 | u->pfx_seg = UD_R_SS; | |
129 | u->pfx_rex = 0; | |
130 | break; | |
131 | case 0x3E : | |
132 | u->pfx_seg = UD_R_DS; | |
133 | u->pfx_rex = 0; | |
134 | break; | |
135 | case 0x26 : | |
136 | u->pfx_seg = UD_R_ES; | |
137 | u->pfx_rex = 0; | |
138 | break; | |
139 | case 0x64 : | |
140 | u->pfx_seg = UD_R_FS; | |
141 | u->pfx_rex = 0; | |
142 | break; | |
143 | case 0x65 : | |
144 | u->pfx_seg = UD_R_GS; | |
145 | u->pfx_rex = 0; | |
146 | break; | |
147 | case 0x67 : /* adress-size override prefix */ | |
148 | u->pfx_adr = 0x67; | |
149 | u->pfx_rex = 0; | |
150 | break; | |
151 | case 0xF0 : | |
152 | u->pfx_lock = 0xF0; | |
153 | u->pfx_rex = 0; | |
154 | break; | |
155 | case 0x66: | |
156 | /* the 0x66 sse prefix is only effective if no other sse prefix | |
157 | * has already been specified. | |
158 | */ | |
159 | if ( !u->pfx_insn ) u->pfx_insn = 0x66; | |
160 | u->pfx_opr = 0x66; | |
161 | u->pfx_rex = 0; | |
162 | break; | |
163 | case 0xF2: | |
164 | u->pfx_insn = 0xF2; | |
165 | u->pfx_repne = 0xF2; | |
166 | u->pfx_rex = 0; | |
167 | break; | |
168 | case 0xF3: | |
169 | u->pfx_insn = 0xF3; | |
170 | u->pfx_rep = 0xF3; | |
171 | u->pfx_repe = 0xF3; | |
172 | u->pfx_rex = 0; | |
173 | break; | |
174 | default : | |
175 | /* No more prefixes */ | |
176 | have_pfx = 0; | |
177 | break; | |
178 | } | |
179 | } | |
180 | ||
181 | /* check if we reached max instruction length */ | |
182 | if ( i + 1 == MAX_INSN_LENGTH ) { | |
183 | u->error = 1; | |
184 | break; | |
185 | } | |
186 | } | |
187 | ||
188 | /* return status */ | |
189 | if ( u->error ) | |
190 | return -1; | |
191 | ||
192 | /* rewind back one byte in stream, since the above loop | |
193 | * stops with a non-prefix byte. | |
194 | */ | |
195 | ud_inp_back(u); | |
196 | return 0; | |
197 | } | |
198 | ||
199 | ||
200 | static inline unsigned int modrm( struct ud * u ) | |
201 | { | |
202 | if ( !u->have_modrm ) { | |
203 | u->modrm = ud_inp_next( u ); | |
204 | u->have_modrm = 1; | |
205 | } | |
206 | return u->modrm; | |
207 | } | |
208 | ||
209 | ||
210 | static unsigned int resolve_operand_size( const struct ud * u, unsigned int s ) | |
211 | { | |
212 | switch ( s ) | |
213 | { | |
214 | case SZ_V: | |
215 | return ( u->opr_mode ); | |
216 | case SZ_Z: | |
217 | return ( u->opr_mode == 16 ) ? 16 : 32; | |
218 | case SZ_P: | |
219 | return ( u->opr_mode == 16 ) ? SZ_WP : SZ_DP; | |
220 | case SZ_MDQ: | |
221 | return ( u->opr_mode == 16 ) ? 32 : u->opr_mode; | |
222 | case SZ_RDQ: | |
223 | return ( u->dis_mode == 64 ) ? 64 : 32; | |
224 | default: | |
225 | return s; | |
226 | } | |
227 | } | |
228 | ||
229 | ||
230 | static int resolve_mnemonic( struct ud* u ) | |
231 | { | |
232 | /* far/near flags */ | |
233 | u->br_far = 0; | |
234 | u->br_near = 0; | |
235 | /* readjust operand sizes for call/jmp instrcutions */ | |
236 | if ( u->mnemonic == UD_Icall || u->mnemonic == UD_Ijmp ) { | |
237 | /* WP: 16:16 pointer */ | |
238 | if ( u->operand[ 0 ].size == SZ_WP ) { | |
239 | u->operand[ 0 ].size = 16; | |
240 | u->br_far = 1; | |
241 | u->br_near= 0; | |
242 | /* DP: 32:32 pointer */ | |
243 | } else if ( u->operand[ 0 ].size == SZ_DP ) { | |
244 | u->operand[ 0 ].size = 32; | |
245 | u->br_far = 1; | |
246 | u->br_near= 0; | |
247 | } else { | |
248 | u->br_far = 0; | |
249 | u->br_near= 1; | |
250 | } | |
251 | /* resolve 3dnow weirdness. */ | |
252 | } else if ( u->mnemonic == UD_I3dnow ) { | |
253 | u->mnemonic = ud_itab[ u->le->table[ ud_inp_curr( u ) ] ].mnemonic; | |
254 | } | |
255 | /* SWAPGS is only valid in 64bits mode */ | |
256 | if ( u->mnemonic == UD_Iswapgs && u->dis_mode != 64 ) { | |
257 | u->error = 1; | |
258 | return -1; | |
259 | } | |
260 | ||
261 | if (u->mnemonic == UD_Ixchg) { | |
262 | if ((u->operand[0].type == UD_OP_REG && u->operand[0].base == UD_R_AX && | |
263 | u->operand[1].type == UD_OP_REG && u->operand[1].base == UD_R_AX) || | |
264 | (u->operand[0].type == UD_OP_REG && u->operand[0].base == UD_R_EAX && | |
265 | u->operand[1].type == UD_OP_REG && u->operand[1].base == UD_R_EAX)) { | |
266 | u->operand[0].type = UD_NONE; | |
267 | u->operand[1].type = UD_NONE; | |
268 | u->mnemonic = UD_Inop; | |
269 | } | |
270 | } | |
271 | ||
272 | if (u->mnemonic == UD_Inop && u->pfx_rep) { | |
273 | u->pfx_rep = 0; | |
274 | u->mnemonic = UD_Ipause; | |
275 | } | |
276 | return 0; | |
277 | } | |
278 | ||
279 | ||
280 | /* ----------------------------------------------------------------------------- | |
281 | * decode_a()- Decodes operands of the type seg:offset | |
282 | * ----------------------------------------------------------------------------- | |
283 | */ | |
284 | static void | |
285 | decode_a(struct ud* u, struct ud_operand *op) | |
286 | { | |
287 | if (u->opr_mode == 16) { | |
288 | /* seg16:off16 */ | |
289 | op->type = UD_OP_PTR; | |
290 | op->size = 32; | |
291 | op->lval.ptr.off = ud_inp_uint16(u); | |
292 | op->lval.ptr.seg = ud_inp_uint16(u); | |
293 | } else { | |
294 | /* seg16:off32 */ | |
295 | op->type = UD_OP_PTR; | |
296 | op->size = 48; | |
297 | op->lval.ptr.off = ud_inp_uint32(u); | |
298 | op->lval.ptr.seg = ud_inp_uint16(u); | |
299 | } | |
300 | } | |
301 | ||
302 | /* ----------------------------------------------------------------------------- | |
303 | * decode_gpr() - Returns decoded General Purpose Register | |
304 | * ----------------------------------------------------------------------------- | |
305 | */ | |
306 | static enum ud_type | |
307 | decode_gpr(register struct ud* u, unsigned int s, unsigned char rm) | |
308 | { | |
309 | s = resolve_operand_size(u, s); | |
310 | ||
311 | switch (s) { | |
312 | case 64: | |
313 | return UD_R_RAX + rm; | |
314 | case SZ_DP: | |
315 | case 32: | |
316 | return UD_R_EAX + rm; | |
317 | case SZ_WP: | |
318 | case 16: | |
319 | return UD_R_AX + rm; | |
320 | case 8: | |
321 | if (u->dis_mode == 64 && u->pfx_rex) { | |
322 | if (rm >= 4) | |
323 | return UD_R_SPL + (rm-4); | |
324 | return UD_R_AL + rm; | |
325 | } else return UD_R_AL + rm; | |
326 | default: | |
327 | return 0; | |
328 | } | |
329 | } | |
330 | ||
331 | /* ----------------------------------------------------------------------------- | |
332 | * resolve_gpr64() - 64bit General Purpose Register-Selection. | |
333 | * ----------------------------------------------------------------------------- | |
334 | */ | |
335 | static enum ud_type | |
336 | resolve_gpr64(struct ud* u, enum ud_operand_code gpr_op, enum ud_operand_size * size) | |
337 | { | |
338 | if (gpr_op >= OP_rAXr8 && gpr_op <= OP_rDIr15) | |
339 | gpr_op = (gpr_op - OP_rAXr8) | (REX_B(u->pfx_rex) << 3); | |
340 | else gpr_op = (gpr_op - OP_rAX); | |
341 | ||
342 | if (u->opr_mode == 16) { | |
343 | *size = 16; | |
344 | return gpr_op + UD_R_AX; | |
345 | } | |
346 | if (u->dis_mode == 32 || | |
347 | (u->opr_mode == 32 && ! (REX_W(u->pfx_rex) || u->default64))) { | |
348 | *size = 32; | |
349 | return gpr_op + UD_R_EAX; | |
350 | } | |
351 | ||
352 | *size = 64; | |
353 | return gpr_op + UD_R_RAX; | |
354 | } | |
355 | ||
356 | /* ----------------------------------------------------------------------------- | |
357 | * resolve_gpr32 () - 32bit General Purpose Register-Selection. | |
358 | * ----------------------------------------------------------------------------- | |
359 | */ | |
360 | static enum ud_type | |
361 | resolve_gpr32(struct ud* u, enum ud_operand_code gpr_op) | |
362 | { | |
363 | gpr_op = gpr_op - OP_eAX; | |
364 | ||
365 | if (u->opr_mode == 16) | |
366 | return gpr_op + UD_R_AX; | |
367 | ||
368 | return gpr_op + UD_R_EAX; | |
369 | } | |
370 | ||
371 | /* ----------------------------------------------------------------------------- | |
372 | * resolve_reg() - Resolves the register type | |
373 | * ----------------------------------------------------------------------------- | |
374 | */ | |
375 | static enum ud_type | |
376 | resolve_reg(struct ud* u, unsigned int type, unsigned char i) | |
377 | { | |
378 | switch (type) { | |
379 | case T_MMX : return UD_R_MM0 + (i & 7); | |
380 | case T_XMM : return UD_R_XMM0 + i; | |
381 | case T_CRG : return UD_R_CR0 + i; | |
382 | case T_DBG : return UD_R_DR0 + i; | |
383 | case T_SEG : { | |
384 | /* | |
385 | * Only 6 segment registers, anything else is an error. | |
386 | */ | |
387 | if ((i & 7) > 5) { | |
388 | u->error = 1; | |
389 | } else { | |
390 | return UD_R_ES + (i & 7); | |
391 | } | |
392 | } | |
393 | case T_NONE: | |
394 | default: return UD_NONE; | |
395 | } | |
396 | } | |
397 | ||
398 | /* ----------------------------------------------------------------------------- | |
399 | * decode_imm() - Decodes Immediate values. | |
400 | * ----------------------------------------------------------------------------- | |
401 | */ | |
402 | static void | |
403 | decode_imm(struct ud* u, unsigned int s, struct ud_operand *op) | |
404 | { | |
405 | op->size = resolve_operand_size(u, s); | |
406 | op->type = UD_OP_IMM; | |
407 | ||
408 | switch (op->size) { | |
409 | case 8: op->lval.sbyte = ud_inp_uint8(u); break; | |
410 | case 16: op->lval.uword = ud_inp_uint16(u); break; | |
411 | case 32: op->lval.udword = ud_inp_uint32(u); break; | |
412 | case 64: op->lval.uqword = ud_inp_uint64(u); break; | |
413 | default: return; | |
414 | } | |
415 | } | |
416 | ||
417 | ||
418 | /* | |
419 | * decode_modrm_reg | |
420 | * | |
421 | * Decodes reg field of mod/rm byte | |
422 | * | |
423 | */ | |
424 | static void | |
425 | decode_modrm_reg(struct ud *u, | |
426 | struct ud_operand *operand, | |
427 | unsigned int type, | |
428 | unsigned int size) | |
429 | { | |
430 | uint8_t reg = (REX_R(u->pfx_rex) << 3) | MODRM_REG(modrm(u)); | |
431 | operand->type = UD_OP_REG; | |
432 | operand->size = resolve_operand_size(u, size); | |
433 | ||
434 | if (type == T_GPR) { | |
435 | operand->base = decode_gpr(u, operand->size, reg); | |
436 | } else { | |
437 | operand->base = resolve_reg(u, type, reg); | |
438 | } | |
439 | } | |
440 | ||
441 | ||
442 | /* | |
443 | * decode_modrm_rm | |
444 | * | |
445 | * Decodes rm field of mod/rm byte | |
446 | * | |
447 | */ | |
448 | static void | |
449 | decode_modrm_rm(struct ud *u, | |
450 | struct ud_operand *op, | |
451 | unsigned char type, | |
452 | unsigned int size) | |
453 | ||
454 | { | |
455 | unsigned char mod, rm, reg; | |
456 | ||
457 | /* get mod, r/m and reg fields */ | |
458 | mod = MODRM_MOD(modrm(u)); | |
459 | rm = (REX_B(u->pfx_rex) << 3) | MODRM_RM(modrm(u)); | |
460 | reg = (REX_R(u->pfx_rex) << 3) | MODRM_REG(modrm(u)); | |
461 | ||
462 | UNUSED_PARAM(reg); | |
463 | ||
464 | op->size = resolve_operand_size(u, size); | |
465 | ||
466 | /* | |
467 | * If mod is 11b, then the modrm.rm specifies a register. | |
468 | * | |
469 | */ | |
470 | if (mod == 3) { | |
471 | op->type = UD_OP_REG; | |
472 | if (type == T_GPR) { | |
473 | op->base = decode_gpr(u, op->size, rm); | |
474 | } else { | |
475 | op->base = resolve_reg(u, type, (REX_B(u->pfx_rex) << 3) | (rm & 7)); | |
476 | } | |
477 | return; | |
478 | } | |
479 | ||
480 | ||
481 | /* | |
482 | * !11 => Memory Address | |
483 | */ | |
484 | op->type = UD_OP_MEM; | |
485 | ||
486 | if (u->adr_mode == 64) { | |
487 | op->base = UD_R_RAX + rm; | |
488 | if (mod == 1) { | |
489 | op->offset = 8; | |
490 | } else if (mod == 2) { | |
491 | op->offset = 32; | |
492 | } else if (mod == 0 && (rm & 7) == 5) { | |
493 | op->base = UD_R_RIP; | |
494 | op->offset = 32; | |
495 | } else { | |
496 | op->offset = 0; | |
497 | } | |
498 | /* | |
499 | * Scale-Index-Base (SIB) | |
500 | */ | |
501 | if ((rm & 7) == 4) { | |
502 | ud_inp_next(u); | |
503 | ||
504 | op->scale = (1 << SIB_S(ud_inp_curr(u))) & ~1; | |
505 | op->index = UD_R_RAX + (SIB_I(ud_inp_curr(u)) | (REX_X(u->pfx_rex) << 3)); | |
506 | op->base = UD_R_RAX + (SIB_B(ud_inp_curr(u)) | (REX_B(u->pfx_rex) << 3)); | |
507 | ||
508 | /* special conditions for base reference */ | |
509 | if (op->index == UD_R_RSP) { | |
510 | op->index = UD_NONE; | |
511 | op->scale = UD_NONE; | |
512 | } | |
513 | ||
514 | if (op->base == UD_R_RBP || op->base == UD_R_R13) { | |
515 | if (mod == 0) { | |
516 | op->base = UD_NONE; | |
517 | } | |
518 | if (mod == 1) { | |
519 | op->offset = 8; | |
520 | } else { | |
521 | op->offset = 32; | |
522 | } | |
523 | } | |
524 | } | |
525 | } else if (u->adr_mode == 32) { | |
526 | op->base = UD_R_EAX + rm; | |
527 | if (mod == 1) { | |
528 | op->offset = 8; | |
529 | } else if (mod == 2) { | |
530 | op->offset = 32; | |
531 | } else if (mod == 0 && rm == 5) { | |
532 | op->base = UD_NONE; | |
533 | op->offset = 32; | |
534 | } else { | |
535 | op->offset = 0; | |
536 | } | |
537 | ||
538 | /* Scale-Index-Base (SIB) */ | |
539 | if ((rm & 7) == 4) { | |
540 | ud_inp_next(u); | |
541 | ||
542 | op->scale = (1 << SIB_S(ud_inp_curr(u))) & ~1; | |
543 | op->index = UD_R_EAX + (SIB_I(ud_inp_curr(u)) | (REX_X(u->pfx_rex) << 3)); | |
544 | op->base = UD_R_EAX + (SIB_B(ud_inp_curr(u)) | (REX_B(u->pfx_rex) << 3)); | |
545 | ||
546 | if (op->index == UD_R_ESP) { | |
547 | op->index = UD_NONE; | |
548 | op->scale = UD_NONE; | |
549 | } | |
550 | ||
551 | /* special condition for base reference */ | |
552 | if (op->base == UD_R_EBP) { | |
553 | if (mod == 0) { | |
554 | op->base = UD_NONE; | |
555 | } | |
556 | if (mod == 1) { | |
557 | op->offset = 8; | |
558 | } else { | |
559 | op->offset = 32; | |
560 | } | |
561 | } | |
562 | } | |
563 | } else { | |
564 | const unsigned int bases[] = { UD_R_BX, UD_R_BX, UD_R_BP, UD_R_BP, | |
565 | UD_R_SI, UD_R_DI, UD_R_BP, UD_R_BX }; | |
566 | const unsigned int indices[] = { UD_R_SI, UD_R_DI, UD_R_SI, UD_R_DI, | |
567 | UD_NONE, UD_NONE, UD_NONE, UD_NONE }; | |
568 | op->base = bases[rm & 7]; | |
569 | op->index = indices[rm & 7]; | |
570 | if (mod == 0 && rm == 6) { | |
571 | op->offset= 16; | |
572 | op->base = UD_NONE; | |
573 | } else if (mod == 1) { | |
574 | op->offset = 8; | |
575 | } else if (mod == 2) { | |
576 | op->offset = 16; | |
577 | } | |
578 | } | |
579 | ||
580 | /* | |
581 | * extract offset, if any | |
582 | */ | |
583 | switch (op->offset) { | |
584 | case 8 : op->lval.ubyte = ud_inp_uint8(u); break; | |
585 | case 16: op->lval.uword = ud_inp_uint16(u); break; | |
586 | case 32: op->lval.udword = ud_inp_uint32(u); break; | |
587 | case 64: op->lval.uqword = ud_inp_uint64(u); break; | |
588 | default: break; | |
589 | } | |
590 | } | |
591 | ||
592 | /* ----------------------------------------------------------------------------- | |
593 | * decode_o() - Decodes offset | |
594 | * ----------------------------------------------------------------------------- | |
595 | */ | |
596 | static void | |
597 | decode_o(struct ud* u, unsigned int s, struct ud_operand *op) | |
598 | { | |
599 | switch (u->adr_mode) { | |
600 | case 64: | |
601 | op->offset = 64; | |
602 | op->lval.uqword = ud_inp_uint64(u); | |
603 | break; | |
604 | case 32: | |
605 | op->offset = 32; | |
606 | op->lval.udword = ud_inp_uint32(u); | |
607 | break; | |
608 | case 16: | |
609 | op->offset = 16; | |
610 | op->lval.uword = ud_inp_uint16(u); | |
611 | break; | |
612 | default: | |
613 | return; | |
614 | } | |
615 | op->type = UD_OP_MEM; | |
616 | op->size = resolve_operand_size(u, s); | |
617 | } | |
618 | ||
619 | /* ----------------------------------------------------------------------------- | |
620 | * decode_operands() - Disassembles Operands. | |
621 | * ----------------------------------------------------------------------------- | |
622 | */ | |
623 | static int | |
624 | decode_operand(struct ud *u, | |
625 | struct ud_operand *operand, | |
626 | enum ud_operand_code type, | |
627 | unsigned int size) | |
628 | { | |
629 | switch (type) { | |
630 | case OP_A : | |
631 | decode_a(u, operand); | |
632 | break; | |
633 | case OP_MR: | |
634 | if (MODRM_MOD(modrm(u)) == 3) { | |
635 | decode_modrm_rm(u, operand, T_GPR, | |
636 | size == SZ_DY ? SZ_MDQ : SZ_V); | |
637 | } else if (size == SZ_WV) { | |
638 | decode_modrm_rm( u, operand, T_GPR, SZ_W); | |
639 | } else if (size == SZ_BV) { | |
640 | decode_modrm_rm( u, operand, T_GPR, SZ_B); | |
641 | } else if (size == SZ_DY) { | |
642 | decode_modrm_rm( u, operand, T_GPR, SZ_D); | |
643 | } else { | |
644 | ASSERT(!"unexpected size"); | |
645 | } | |
646 | break; | |
647 | case OP_M: | |
648 | if (MODRM_MOD(modrm(u)) == 3) { | |
649 | u->error = 1; | |
650 | } | |
651 | /* intended fall through */ | |
652 | case OP_E: | |
653 | decode_modrm_rm(u, operand, T_GPR, size); | |
654 | break; | |
655 | case OP_G: | |
656 | decode_modrm_reg(u, operand, T_GPR, size); | |
657 | break; | |
658 | case OP_I: | |
659 | decode_imm(u, size, operand); | |
660 | break; | |
661 | case OP_I1: | |
662 | operand->type = UD_OP_CONST; | |
663 | operand->lval.udword = 1; | |
664 | break; | |
665 | case OP_PR: | |
666 | if (MODRM_MOD(modrm(u)) != 3) { | |
667 | u->error = 1; | |
668 | } | |
669 | decode_modrm_rm(u, operand, T_MMX, size); | |
670 | break; | |
671 | case OP_P: | |
672 | decode_modrm_reg(u, operand, T_MMX, size); | |
673 | break; | |
674 | case OP_VR: | |
675 | if (MODRM_MOD(modrm(u)) != 3) { | |
676 | u->error = 1; | |
677 | } | |
678 | /* intended fall through */ | |
679 | case OP_W: | |
680 | decode_modrm_rm(u, operand, T_XMM, size); | |
681 | break; | |
682 | case OP_V: | |
683 | decode_modrm_reg(u, operand, T_XMM, size); | |
684 | break; | |
685 | case OP_S: | |
686 | decode_modrm_reg(u, operand, T_SEG, size); | |
687 | break; | |
688 | case OP_AL: | |
689 | case OP_CL: | |
690 | case OP_DL: | |
691 | case OP_BL: | |
692 | case OP_AH: | |
693 | case OP_CH: | |
694 | case OP_DH: | |
695 | case OP_BH: | |
696 | operand->type = UD_OP_REG; | |
697 | operand->base = UD_R_AL + (type - OP_AL); | |
698 | operand->size = 8; | |
699 | break; | |
700 | case OP_DX: | |
701 | operand->type = UD_OP_REG; | |
702 | operand->base = UD_R_DX; | |
703 | operand->size = 16; | |
704 | break; | |
705 | case OP_O: | |
706 | decode_o(u, size, operand); | |
707 | break; | |
708 | case OP_rAXr8: | |
709 | case OP_rCXr9: | |
710 | case OP_rDXr10: | |
711 | case OP_rBXr11: | |
712 | case OP_rSPr12: | |
713 | case OP_rBPr13: | |
714 | case OP_rSIr14: | |
715 | case OP_rDIr15: | |
716 | case OP_rAX: | |
717 | case OP_rCX: | |
718 | case OP_rDX: | |
719 | case OP_rBX: | |
720 | case OP_rSP: | |
721 | case OP_rBP: | |
722 | case OP_rSI: | |
723 | case OP_rDI: | |
724 | operand->type = UD_OP_REG; | |
725 | operand->base = resolve_gpr64(u, type, &operand->size); | |
726 | break; | |
727 | case OP_ALr8b: | |
728 | case OP_CLr9b: | |
729 | case OP_DLr10b: | |
730 | case OP_BLr11b: | |
731 | case OP_AHr12b: | |
732 | case OP_CHr13b: | |
733 | case OP_DHr14b: | |
734 | case OP_BHr15b: { | |
735 | ud_type_t gpr = (type - OP_ALr8b) + UD_R_AL | |
736 | + (REX_B(u->pfx_rex) << 3); | |
737 | if (UD_R_AH <= gpr && u->pfx_rex) { | |
738 | gpr = gpr + 4; | |
739 | } | |
740 | operand->type = UD_OP_REG; | |
741 | operand->base = gpr; | |
742 | break; | |
743 | } | |
744 | case OP_eAX: | |
745 | case OP_eCX: | |
746 | case OP_eDX: | |
747 | case OP_eBX: | |
748 | case OP_eSP: | |
749 | case OP_eBP: | |
750 | case OP_eSI: | |
751 | case OP_eDI: | |
752 | operand->type = UD_OP_REG; | |
753 | operand->base = resolve_gpr32(u, type); | |
754 | operand->size = u->opr_mode == 16 ? 16 : 32; | |
755 | break; | |
756 | case OP_ES: | |
757 | case OP_CS: | |
758 | case OP_DS: | |
759 | case OP_SS: | |
760 | case OP_FS: | |
761 | case OP_GS: | |
762 | /* in 64bits mode, only fs and gs are allowed */ | |
763 | if (u->dis_mode == 64) { | |
764 | if (type != OP_FS && type != OP_GS) { | |
765 | u->error= 1; | |
766 | } | |
767 | } | |
768 | operand->type = UD_OP_REG; | |
769 | operand->base = (type - OP_ES) + UD_R_ES; | |
770 | operand->size = 16; | |
771 | break; | |
772 | case OP_J : | |
773 | decode_imm(u, size, operand); | |
774 | operand->type = UD_OP_JIMM; | |
775 | break ; | |
776 | case OP_Q: | |
777 | decode_modrm_rm(u, operand, T_MMX, size); | |
778 | break; | |
779 | case OP_R : | |
780 | decode_modrm_rm(u, operand, T_GPR, size); | |
781 | break; | |
782 | case OP_C: | |
783 | decode_modrm_reg(u, operand, T_CRG, size); | |
784 | break; | |
785 | case OP_D: | |
786 | decode_modrm_reg(u, operand, T_DBG, size); | |
787 | break; | |
788 | case OP_I3 : | |
789 | operand->type = UD_OP_CONST; | |
790 | operand->lval.sbyte = 3; | |
791 | break; | |
792 | case OP_ST0: | |
793 | case OP_ST1: | |
794 | case OP_ST2: | |
795 | case OP_ST3: | |
796 | case OP_ST4: | |
797 | case OP_ST5: | |
798 | case OP_ST6: | |
799 | case OP_ST7: | |
800 | operand->type = UD_OP_REG; | |
801 | operand->base = (type - OP_ST0) + UD_R_ST0; | |
802 | operand->size = 0; | |
803 | break; | |
804 | case OP_AX: | |
805 | operand->type = UD_OP_REG; | |
806 | operand->base = UD_R_AX; | |
807 | operand->size = 16; | |
808 | break; | |
809 | default : | |
810 | operand->type = UD_NONE; | |
811 | break; | |
812 | } | |
813 | return 0; | |
814 | } | |
815 | ||
816 | ||
817 | /* | |
818 | * decode_operands | |
819 | * | |
820 | * Disassemble upto 3 operands of the current instruction being | |
821 | * disassembled. By the end of the function, the operand fields | |
822 | * of the ud structure will have been filled. | |
823 | */ | |
824 | static int | |
825 | decode_operands(struct ud* u) | |
826 | { | |
827 | decode_operand(u, &u->operand[0], | |
828 | u->itab_entry->operand1.type, | |
829 | u->itab_entry->operand1.size); | |
830 | decode_operand(u, &u->operand[1], | |
831 | u->itab_entry->operand2.type, | |
832 | u->itab_entry->operand2.size); | |
833 | decode_operand(u, &u->operand[2], | |
834 | u->itab_entry->operand3.type, | |
835 | u->itab_entry->operand3.size); | |
836 | return 0; | |
837 | } | |
838 | ||
839 | /* ----------------------------------------------------------------------------- | |
840 | * clear_insn() - clear instruction structure | |
841 | * ----------------------------------------------------------------------------- | |
842 | */ | |
843 | static void | |
844 | clear_insn(register struct ud* u) | |
845 | { | |
846 | u->error = 0; | |
847 | u->pfx_seg = 0; | |
848 | u->pfx_opr = 0; | |
849 | u->pfx_adr = 0; | |
850 | u->pfx_lock = 0; | |
851 | u->pfx_repne = 0; | |
852 | u->pfx_rep = 0; | |
853 | u->pfx_repe = 0; | |
854 | u->pfx_rex = 0; | |
855 | u->pfx_insn = 0; | |
856 | u->mnemonic = UD_Inone; | |
857 | u->itab_entry = NULL; | |
858 | u->have_modrm = 0; | |
859 | ||
860 | memset( &u->operand[ 0 ], 0, sizeof( struct ud_operand ) ); | |
861 | memset( &u->operand[ 1 ], 0, sizeof( struct ud_operand ) ); | |
862 | memset( &u->operand[ 2 ], 0, sizeof( struct ud_operand ) ); | |
863 | } | |
864 | ||
865 | static int | |
866 | resolve_mode( struct ud* u ) | |
867 | { | |
868 | /* if in error state, bail out */ | |
869 | if ( u->error ) return -1; | |
870 | ||
871 | /* propagate prefix effects */ | |
872 | if ( u->dis_mode == 64 ) { /* set 64bit-mode flags */ | |
873 | ||
874 | /* Check validity of instruction m64 */ | |
875 | if ( P_INV64( u->itab_entry->prefix ) ) { | |
876 | u->error = 1; | |
877 | return -1; | |
878 | } | |
879 | ||
880 | /* effective rex prefix is the effective mask for the | |
881 | * instruction hard-coded in the opcode map. | |
882 | */ | |
883 | u->pfx_rex = ( u->pfx_rex & 0x40 ) | | |
884 | ( u->pfx_rex & REX_PFX_MASK( u->itab_entry->prefix ) ); | |
885 | ||
886 | /* whether this instruction has a default operand size of | |
887 | * 64bit, also hardcoded into the opcode map. | |
888 | */ | |
889 | u->default64 = P_DEF64( u->itab_entry->prefix ); | |
890 | /* calculate effective operand size */ | |
891 | if ( REX_W( u->pfx_rex ) ) { | |
892 | u->opr_mode = 64; | |
893 | } else if ( u->pfx_opr ) { | |
894 | u->opr_mode = 16; | |
895 | } else { | |
896 | /* unless the default opr size of instruction is 64, | |
897 | * the effective operand size in the absence of rex.w | |
898 | * prefix is 32. | |
899 | */ | |
900 | u->opr_mode = ( u->default64 ) ? 64 : 32; | |
901 | } | |
902 | ||
903 | /* calculate effective address size */ | |
904 | u->adr_mode = (u->pfx_adr) ? 32 : 64; | |
905 | } else if ( u->dis_mode == 32 ) { /* set 32bit-mode flags */ | |
906 | u->opr_mode = ( u->pfx_opr ) ? 16 : 32; | |
907 | u->adr_mode = ( u->pfx_adr ) ? 16 : 32; | |
908 | } else if ( u->dis_mode == 16 ) { /* set 16bit-mode flags */ | |
909 | u->opr_mode = ( u->pfx_opr ) ? 32 : 16; | |
910 | u->adr_mode = ( u->pfx_adr ) ? 32 : 16; | |
911 | } | |
912 | ||
913 | /* These flags determine which operand to apply the operand size | |
914 | * cast to. | |
915 | */ | |
916 | u->c1 = ( P_C1( u->itab_entry->prefix ) ) ? 1 : 0; | |
917 | u->c2 = ( P_C2( u->itab_entry->prefix ) ) ? 1 : 0; | |
918 | u->c3 = ( P_C3( u->itab_entry->prefix ) ) ? 1 : 0; | |
919 | ||
920 | /* set flags for implicit addressing */ | |
921 | u->implicit_addr = P_IMPADDR( u->itab_entry->prefix ); | |
922 | ||
923 | return 0; | |
924 | } | |
925 | ||
926 | static int gen_hex( struct ud *u ) | |
927 | { | |
928 | unsigned int i; | |
929 | unsigned char *src_ptr = ud_inp_sess( u ); | |
930 | char* src_hex; | |
931 | ||
932 | /* bail out if in error stat. */ | |
933 | if ( u->error ) return -1; | |
934 | /* output buffer pointe */ | |
935 | src_hex = ( char* ) u->insn_hexcode; | |
936 | /* for each byte used to decode instruction */ | |
937 | for ( i = 0; i < u->inp_ctr; ++i, ++src_ptr) { | |
938 | sprintf( src_hex, "%02x", *src_ptr & 0xFF ); | |
939 | src_hex += 2; | |
940 | } | |
941 | return 0; | |
942 | } | |
943 | ||
944 | ||
945 | static inline int | |
946 | decode_insn(struct ud *u, uint16_t ptr) | |
947 | { | |
948 | ASSERT((ptr & 0x8000) == 0); | |
949 | u->itab_entry = &ud_itab[ ptr ]; | |
950 | u->mnemonic = u->itab_entry->mnemonic; | |
951 | return (resolve_mode(u) == 0 && | |
952 | decode_operands(u) == 0 && | |
953 | resolve_mnemonic(u) == 0) ? 0 : -1; | |
954 | } | |
955 | ||
956 | ||
957 | /* | |
958 | * decode_3dnow() | |
959 | * | |
960 | * Decoding 3dnow is a little tricky because of its strange opcode | |
961 | * structure. The final opcode disambiguation depends on the last | |
962 | * byte that comes after the operands have been decoded. Fortunately, | |
963 | * all 3dnow instructions have the same set of operand types. So we | |
964 | * go ahead and decode the instruction by picking an arbitrarily chosen | |
965 | * valid entry in the table, decode the operands, and read the final | |
966 | * byte to resolve the menmonic. | |
967 | */ | |
968 | static inline int | |
969 | decode_3dnow(struct ud* u) | |
970 | { | |
971 | uint16_t ptr; | |
972 | ASSERT(u->le->type == UD_TAB__OPC_3DNOW); | |
973 | ASSERT(u->le->table[0xc] != 0); | |
974 | decode_insn(u, u->le->table[0xc]); | |
975 | ud_inp_next(u); | |
976 | if (u->error) { | |
977 | return -1; | |
978 | } | |
979 | ptr = u->le->table[ud_inp_curr(u)]; | |
980 | ASSERT((ptr & 0x8000) == 0); | |
981 | u->mnemonic = ud_itab[ptr].mnemonic; | |
982 | return 0; | |
983 | } | |
984 | ||
985 | ||
986 | static int | |
987 | decode_ssepfx(struct ud *u) | |
988 | { | |
989 | uint8_t idx = ((u->pfx_insn & 0xf) + 1) / 2; | |
990 | if (u->le->table[idx] == 0) { | |
991 | idx = 0; | |
992 | } | |
993 | if (idx && u->le->table[idx] != 0) { | |
994 | /* | |
995 | * "Consume" the prefix as a part of the opcode, so it is no | |
996 | * longer exported as an instruction prefix. | |
997 | */ | |
998 | switch (u->pfx_insn) { | |
999 | case 0xf2: | |
1000 | u->pfx_repne = 0; | |
1001 | break; | |
1002 | case 0xf3: | |
1003 | u->pfx_rep = 0; | |
1004 | u->pfx_repe = 0; | |
1005 | break; | |
1006 | case 0x66: | |
1007 | u->pfx_opr = 0; | |
1008 | break; | |
1009 | } | |
1010 | } | |
1011 | return decode_ext(u, u->le->table[idx]); | |
1012 | } | |
1013 | ||
1014 | ||
1015 | /* | |
1016 | * decode_ext() | |
1017 | * | |
1018 | * Decode opcode extensions (if any) | |
1019 | */ | |
1020 | static int | |
1021 | decode_ext(struct ud *u, uint16_t ptr) | |
1022 | { | |
1023 | uint8_t idx = 0; | |
1024 | if ((ptr & 0x8000) == 0) { | |
1025 | return decode_insn(u, ptr); | |
1026 | } | |
1027 | u->le = &ud_lookup_table_list[(~0x8000 & ptr)]; | |
1028 | if (u->le->type == UD_TAB__OPC_3DNOW) { | |
1029 | return decode_3dnow(u); | |
1030 | } | |
1031 | ||
1032 | switch (u->le->type) { | |
1033 | case UD_TAB__OPC_MOD: | |
1034 | /* !11 = 0, 11 = 1 */ | |
1035 | idx = (MODRM_MOD(modrm(u)) + 1) / 4; | |
1036 | break; | |
1037 | /* disassembly mode/operand size/address size based tables. | |
1038 | * 16 = 0,, 32 = 1, 64 = 2 | |
1039 | */ | |
1040 | case UD_TAB__OPC_MODE: | |
1041 | idx = u->dis_mode / 32; | |
1042 | break; | |
1043 | case UD_TAB__OPC_OSIZE: | |
1044 | idx = eff_opr_mode(u->dis_mode, REX_W(u->pfx_rex), u->pfx_opr) / 32; | |
1045 | break; | |
1046 | case UD_TAB__OPC_ASIZE: | |
1047 | idx = eff_adr_mode(u->dis_mode, u->pfx_adr) / 32; | |
1048 | break; | |
1049 | case UD_TAB__OPC_X87: | |
1050 | idx = modrm(u) - 0xC0; | |
1051 | break; | |
1052 | case UD_TAB__OPC_VENDOR: | |
1053 | if (u->vendor == UD_VENDOR_ANY) { | |
1054 | /* choose a valid entry */ | |
1055 | idx = (u->le->table[idx] != 0) ? 0 : 1; | |
1056 | } else if (u->vendor == UD_VENDOR_AMD) { | |
1057 | idx = 0; | |
1058 | } else { | |
1059 | idx = 1; | |
1060 | } | |
1061 | break; | |
1062 | case UD_TAB__OPC_RM: | |
1063 | idx = MODRM_RM(modrm(u)); | |
1064 | break; | |
1065 | case UD_TAB__OPC_REG: | |
1066 | idx = MODRM_REG(modrm(u)); | |
1067 | break; | |
1068 | case UD_TAB__OPC_SSE: | |
1069 | return decode_ssepfx(u); | |
1070 | default: | |
1071 | ASSERT(!"not reached"); | |
1072 | break; | |
1073 | } | |
1074 | ||
1075 | return decode_ext(u, u->le->table[idx]); | |
1076 | } | |
1077 | ||
1078 | ||
1079 | static inline int | |
1080 | decode_opcode(struct ud *u) | |
1081 | { | |
1082 | uint16_t ptr; | |
1083 | ASSERT(u->le->type == UD_TAB__OPC_TABLE); | |
1084 | ud_inp_next(u); | |
1085 | if (u->error) { | |
1086 | return -1; | |
1087 | } | |
1088 | ptr = u->le->table[ud_inp_curr(u)]; | |
1089 | if (ptr & 0x8000) { | |
1090 | u->le = &ud_lookup_table_list[ptr & ~0x8000]; | |
1091 | if (u->le->type == UD_TAB__OPC_TABLE) { | |
1092 | return decode_opcode(u); | |
1093 | } | |
1094 | } | |
1095 | return decode_ext(u, ptr); | |
1096 | } | |
1097 | ||
1098 | ||
1099 | /* ============================================================================= | |
1100 | * ud_decode() - Instruction decoder. Returns the number of bytes decoded. | |
1101 | * ============================================================================= | |
1102 | */ | |
1103 | unsigned int | |
1104 | ud_decode(struct ud *u) | |
1105 | { | |
1106 | ud_inp_start(u); | |
1107 | clear_insn(u); | |
1108 | u->le = &ud_lookup_table_list[0]; | |
1109 | u->error = decode_prefixes(u) == -1 || | |
1110 | decode_opcode(u) == -1 || | |
1111 | u->error; | |
1112 | /* Handle decode error. */ | |
1113 | if (u->error) { | |
1114 | /* clear out the decode data. */ | |
1115 | clear_insn(u); | |
1116 | /* mark the sequence of bytes as invalid. */ | |
1117 | u->itab_entry = & s_ie__invalid; | |
1118 | u->mnemonic = u->itab_entry->mnemonic; | |
1119 | } | |
1120 | ||
1121 | /* maybe this stray segment override byte | |
1122 | * should be spewed out? | |
1123 | */ | |
1124 | if ( !P_SEG( u->itab_entry->prefix ) && | |
1125 | u->operand[0].type != UD_OP_MEM && | |
1126 | u->operand[1].type != UD_OP_MEM ) | |
1127 | u->pfx_seg = 0; | |
1128 | ||
1129 | u->insn_offset = u->pc; /* set offset of instruction */ | |
1130 | u->insn_fill = 0; /* set translation buffer index to 0 */ | |
1131 | u->pc += u->inp_ctr; /* move program counter by bytes decoded */ | |
1132 | gen_hex( u ); /* generate hex code */ | |
1133 | ||
1134 | /* return number of bytes disassembled. */ | |
1135 | return u->inp_ctr; | |
1136 | } | |
1137 | ||
1138 | /* | |
1139 | vim: set ts=2 sw=2 expandtab | |
1140 | */ | |
1141 | ||
1142 | #endif // USE(UDIS86) |