4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
28 * #pragma ident "@(#)fasttrap_isa.c 1.23 06/09/19 SMI"
33 #define _KERNEL /* Solaris vs. Darwin */
37 #include <sys/fasttrap_isa.h>
38 #include <sys/fasttrap_impl.h>
39 #include <sys/dtrace.h>
40 #include <sys/dtrace_impl.h>
42 #include "fasttrap_regset.h"
44 #include <sys/dtrace_ptss.h>
45 #include <kern/debug.h>
47 #define proc_t struct proc
50 * Lossless User-Land Tracing on x86
51 * ---------------------------------
53 * The execution of most instructions is not dependent on the address; for
54 * these instructions it is sufficient to copy them into the user process's
55 * address space and execute them. To effectively single-step an instruction
56 * in user-land, we copy out the following sequence of instructions to scratch
57 * space in the user thread's ulwp_t structure.
59 * We then set the program counter (%eip or %rip) to point to this scratch
60 * space. Once execution resumes, the original instruction is executed and
61 * then control flow is redirected to what was originally the subsequent
62 * instruction. If the kernel attemps to deliver a signal while single-
63 * stepping, the signal is deferred and the program counter is moved into the
64 * second sequence of instructions. The second sequence ends in a trap into
65 * the kernel where the deferred signal is then properly handled and delivered.
67 * For instructions whose execute is position dependent, we perform simple
68 * emulation. These instructions are limited to control transfer
69 * instructions in 32-bit mode, but in 64-bit mode there's the added wrinkle
70 * of %rip-relative addressing that means that almost any instruction can be
71 * position dependent. For all the details on how we emulate generic
72 * instructions included %rip-relative instructions, see the code in
73 * fasttrap_pid_probe() below where we handle instructions of type
74 * FASTTRAP_T_COMMON (under the header: Generic Instruction Tracing).
77 #define FASTTRAP_MODRM_MOD(modrm) (((modrm) >> 6) & 0x3)
78 #define FASTTRAP_MODRM_REG(modrm) (((modrm) >> 3) & 0x7)
79 #define FASTTRAP_MODRM_RM(modrm) ((modrm) & 0x7)
80 #define FASTTRAP_MODRM(mod, reg, rm) (((mod) << 6) | ((reg) << 3) | (rm))
82 #define FASTTRAP_SIB_SCALE(sib) (((sib) >> 6) & 0x3)
83 #define FASTTRAP_SIB_INDEX(sib) (((sib) >> 3) & 0x7)
84 #define FASTTRAP_SIB_BASE(sib) ((sib) & 0x7)
86 #define FASTTRAP_REX_W(rex) (((rex) >> 3) & 1)
87 #define FASTTRAP_REX_R(rex) (((rex) >> 2) & 1)
88 #define FASTTRAP_REX_X(rex) (((rex) >> 1) & 1)
89 #define FASTTRAP_REX_B(rex) ((rex) & 1)
90 #define FASTTRAP_REX(w, r, x, b) \
91 (0x40 | ((w) << 3) | ((r) << 2) | ((x) << 1) | (b))
94 * Single-byte op-codes.
96 #define FASTTRAP_PUSHL_EBP 0x55
98 #define FASTTRAP_JO 0x70
99 #define FASTTRAP_JNO 0x71
100 #define FASTTRAP_JB 0x72
101 #define FASTTRAP_JAE 0x73
102 #define FASTTRAP_JE 0x74
103 #define FASTTRAP_JNE 0x75
104 #define FASTTRAP_JBE 0x76
105 #define FASTTRAP_JA 0x77
106 #define FASTTRAP_JS 0x78
107 #define FASTTRAP_JNS 0x79
108 #define FASTTRAP_JP 0x7a
109 #define FASTTRAP_JNP 0x7b
110 #define FASTTRAP_JL 0x7c
111 #define FASTTRAP_JGE 0x7d
112 #define FASTTRAP_JLE 0x7e
113 #define FASTTRAP_JG 0x7f
115 #define FASTTRAP_NOP 0x90
117 #define FASTTRAP_MOV_EAX 0xb8
118 #define FASTTRAP_MOV_ECX 0xb9
120 #define FASTTRAP_RET16 0xc2
121 #define FASTTRAP_RET 0xc3
123 #define FASTTRAP_LOOPNZ 0xe0
124 #define FASTTRAP_LOOPZ 0xe1
125 #define FASTTRAP_LOOP 0xe2
126 #define FASTTRAP_JCXZ 0xe3
128 #define FASTTRAP_CALL 0xe8
129 #define FASTTRAP_JMP32 0xe9
130 #define FASTTRAP_JMP8 0xeb
132 #define FASTTRAP_INT3 0xcc
133 #define FASTTRAP_INT 0xcd
134 #define T_DTRACE_RET 0x7f
136 #define FASTTRAP_2_BYTE_OP 0x0f
137 #define FASTTRAP_GROUP5_OP 0xff
140 * Two-byte op-codes (second byte only).
142 #define FASTTRAP_0F_JO 0x80
143 #define FASTTRAP_0F_JNO 0x81
144 #define FASTTRAP_0F_JB 0x82
145 #define FASTTRAP_0F_JAE 0x83
146 #define FASTTRAP_0F_JE 0x84
147 #define FASTTRAP_0F_JNE 0x85
148 #define FASTTRAP_0F_JBE 0x86
149 #define FASTTRAP_0F_JA 0x87
150 #define FASTTRAP_0F_JS 0x88
151 #define FASTTRAP_0F_JNS 0x89
152 #define FASTTRAP_0F_JP 0x8a
153 #define FASTTRAP_0F_JNP 0x8b
154 #define FASTTRAP_0F_JL 0x8c
155 #define FASTTRAP_0F_JGE 0x8d
156 #define FASTTRAP_0F_JLE 0x8e
157 #define FASTTRAP_0F_JG 0x8f
159 #define FASTTRAP_EFLAGS_OF 0x800
160 #define FASTTRAP_EFLAGS_DF 0x400
161 #define FASTTRAP_EFLAGS_SF 0x080
162 #define FASTTRAP_EFLAGS_ZF 0x040
163 #define FASTTRAP_EFLAGS_AF 0x010
164 #define FASTTRAP_EFLAGS_PF 0x004
165 #define FASTTRAP_EFLAGS_CF 0x001
168 * Instruction prefixes.
170 #define FASTTRAP_PREFIX_OPERAND 0x66
171 #define FASTTRAP_PREFIX_ADDRESS 0x67
172 #define FASTTRAP_PREFIX_CS 0x2E
173 #define FASTTRAP_PREFIX_DS 0x3E
174 #define FASTTRAP_PREFIX_ES 0x26
175 #define FASTTRAP_PREFIX_FS 0x64
176 #define FASTTRAP_PREFIX_GS 0x65
177 #define FASTTRAP_PREFIX_SS 0x36
178 #define FASTTRAP_PREFIX_LOCK 0xF0
179 #define FASTTRAP_PREFIX_REP 0xF3
180 #define FASTTRAP_PREFIX_REPNE 0xF2
182 #define FASTTRAP_NOREG 0xff
185 * Map between instruction register encodings and the kernel constants which
186 * correspond to indicies into struct regs.
190 * APPLE NOTE: We are cheating here. The regmap is used to decode which register
191 * a given instruction is trying to reference. OS X does not have extended registers
192 * for 32 bit apps, but the *order* is the same. So for 32 bit state, we will return:
199 * The fasttrap_getreg function knows how to make the correct transformation.
201 #if __sol64 || defined(__APPLE__)
202 static const uint8_t regmap
[16] = {
203 REG_RAX
, REG_RCX
, REG_RDX
, REG_RBX
, REG_RSP
, REG_RBP
, REG_RSI
, REG_RDI
,
204 REG_R8
, REG_R9
, REG_R10
, REG_R11
, REG_R12
, REG_R13
, REG_R14
, REG_R15
,
207 static const uint8_t regmap
[8] = {
208 EAX
, ECX
, EDX
, EBX
, UESP
, EBP
, ESI
, EDI
212 static user_addr_t
fasttrap_getreg(x86_saved_state_t
*, uint_t
);
215 fasttrap_anarg(x86_saved_state_t
*regs
, int function_entry
, int argno
)
218 int shift
= function_entry
? 1 : 0;
220 x86_saved_state64_t
*regs64
;
221 x86_saved_state32_t
*regs32
;
222 unsigned int p_model
;
224 if (is_saved_state64(regs
)) {
225 regs64
= saved_state64(regs
);
227 p_model
= DATAMODEL_LP64
;
230 regs32
= saved_state32(regs
);
231 p_model
= DATAMODEL_ILP32
;
234 if (p_model
== DATAMODEL_LP64
) {
238 * In 64-bit mode, the first six arguments are stored in
242 return ((®s64
->rdi
)[argno
]);
244 stack
= regs64
->isf
.rsp
+ sizeof(uint64_t) * (argno
- 6 + shift
);
245 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
246 value
= dtrace_fuword64(stack
);
247 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
| CPU_DTRACE_BADADDR
);
249 uint32_t *stack
= (uint32_t *)regs32
->uesp
;
250 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
251 value
= dtrace_fuword32((user_addr_t
)(unsigned long)&stack
[argno
+ shift
]);
252 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
| CPU_DTRACE_BADADDR
);
260 fasttrap_tracepoint_init(proc_t
*p
, fasttrap_tracepoint_t
*tp
, user_addr_t pc
,
261 fasttrap_probe_type_t type
)
264 uint8_t instr
[FASTTRAP_MAX_INSTR_SIZE
+ 10];
265 size_t len
= FASTTRAP_MAX_INSTR_SIZE
;
266 size_t first
= MIN(len
, PAGE_SIZE
- (pc
& PAGE_MASK
));
270 uint8_t seg
, rex
= 0;
271 unsigned int p_model
= (p
->p_flag
& P_LP64
) ? DATAMODEL_LP64
: DATAMODEL_ILP32
;
274 * Read the instruction at the given address out of the process's
275 * address space. We don't have to worry about a debugger
276 * changing this instruction before we overwrite it with our trap
277 * instruction since P_PR_LOCK is set. Since instructions can span
278 * pages, we potentially read the instruction in two parts. If the
279 * second part fails, we just zero out that part of the instruction.
282 * APPLE NOTE: Of course, we do not have a P_PR_LOCK, so this is racey...
284 if (uread(p
, &instr
[0], first
, pc
) != 0)
287 uread(p
, &instr
[first
], len
- first
, pc
+ first
) != 0) {
288 bzero(&instr
[first
], len
- first
);
293 * If the disassembly fails, then we have a malformed instruction.
295 if ((size
= dtrace_instr_size_isa(instr
, p_model
, &rmindex
)) <= 0)
299 * Make sure the disassembler isn't completely broken.
301 ASSERT(-1 <= rmindex
&& rmindex
< (int)size
);
304 * If the computed size is greater than the number of bytes read,
305 * then it was a malformed instruction possibly because it fell on a
306 * page boundary and the subsequent page was missing or because of
307 * some malicious user.
312 tp
->ftt_size
= (uint8_t)size
;
313 tp
->ftt_segment
= FASTTRAP_SEG_NONE
;
316 * Find the start of the instruction's opcode by processing any
321 switch (instr
[start
]) {
322 case FASTTRAP_PREFIX_SS
:
325 case FASTTRAP_PREFIX_GS
:
328 case FASTTRAP_PREFIX_FS
:
331 case FASTTRAP_PREFIX_ES
:
334 case FASTTRAP_PREFIX_DS
:
337 case FASTTRAP_PREFIX_CS
:
340 case FASTTRAP_PREFIX_OPERAND
:
341 case FASTTRAP_PREFIX_ADDRESS
:
342 case FASTTRAP_PREFIX_LOCK
:
343 case FASTTRAP_PREFIX_REP
:
344 case FASTTRAP_PREFIX_REPNE
:
347 * It's illegal for an instruction to specify
348 * two segment prefixes -- give up on this
349 * illegal instruction.
351 if (tp
->ftt_segment
!= FASTTRAP_SEG_NONE
)
354 tp
->ftt_segment
= seg
;
362 #if __sol64 || defined(__APPLE__)
364 * Identify the REX prefix on 64-bit processes.
366 if (p_model
== DATAMODEL_LP64
&& (instr
[start
] & 0xf0) == 0x40)
367 rex
= instr
[start
++];
371 * Now that we're pretty sure that the instruction is okay, copy the
372 * valid part to the tracepoint.
374 bcopy(instr
, tp
->ftt_instr
, FASTTRAP_MAX_INSTR_SIZE
);
376 tp
->ftt_type
= FASTTRAP_T_COMMON
;
377 if (instr
[start
] == FASTTRAP_2_BYTE_OP
) {
378 switch (instr
[start
+ 1]) {
380 case FASTTRAP_0F_JNO
:
382 case FASTTRAP_0F_JAE
:
384 case FASTTRAP_0F_JNE
:
385 case FASTTRAP_0F_JBE
:
388 case FASTTRAP_0F_JNS
:
390 case FASTTRAP_0F_JNP
:
392 case FASTTRAP_0F_JGE
:
393 case FASTTRAP_0F_JLE
:
395 tp
->ftt_type
= FASTTRAP_T_JCC
;
396 tp
->ftt_code
= (instr
[start
+ 1] & 0x0f) | FASTTRAP_JO
;
397 tp
->ftt_dest
= pc
+ tp
->ftt_size
+
398 *(int32_t *)&instr
[start
+ 2];
401 } else if (instr
[start
] == FASTTRAP_GROUP5_OP
) {
402 uint_t mod
= FASTTRAP_MODRM_MOD(instr
[start
+ 1]);
403 uint_t reg
= FASTTRAP_MODRM_REG(instr
[start
+ 1]);
404 uint_t rm
= FASTTRAP_MODRM_RM(instr
[start
+ 1]);
406 if (reg
== 2 || reg
== 4) {
410 tp
->ftt_type
= FASTTRAP_T_CALL
;
412 tp
->ftt_type
= FASTTRAP_T_JMP
;
419 ASSERT(p_model
== DATAMODEL_LP64
|| rex
== 0);
422 * See AMD x86-64 Architecture Programmer's Manual
423 * Volume 3, Section 1.2.7, Table 1-12, and
424 * Appendix A.3.1, Table A-15.
426 if (mod
!= 3 && rm
== 4) {
427 uint8_t sib
= instr
[start
+ 2];
428 uint_t index
= FASTTRAP_SIB_INDEX(sib
);
429 uint_t base
= FASTTRAP_SIB_BASE(sib
);
431 tp
->ftt_scale
= FASTTRAP_SIB_SCALE(sib
);
433 tp
->ftt_index
= (index
== 4) ?
435 regmap
[index
| (FASTTRAP_REX_X(rex
) << 3)];
436 tp
->ftt_base
= (mod
== 0 && base
== 5) ?
438 regmap
[base
| (FASTTRAP_REX_B(rex
) << 3)];
441 sz
= mod
== 1 ? 1 : 4;
444 * In 64-bit mode, mod == 0 and r/m == 5
445 * denotes %rip-relative addressing; in 32-bit
446 * mode, the base register isn't used. In both
447 * modes, there is a 32-bit operand.
449 if (mod
== 0 && rm
== 5) {
450 #if __sol64 || defined(__APPLE__)
451 if (p_model
== DATAMODEL_LP64
)
452 tp
->ftt_base
= REG_RIP
;
455 tp
->ftt_base
= FASTTRAP_NOREG
;
459 (FASTTRAP_REX_B(rex
) << 3);
461 tp
->ftt_base
= regmap
[base
];
462 sz
= mod
== 1 ? 1 : mod
== 2 ? 4 : 0;
464 tp
->ftt_index
= FASTTRAP_NOREG
;
469 tp
->ftt_dest
= *(int8_t *)&instr
[start
+ i
];
471 tp
->ftt_dest
= *(int32_t *)&instr
[start
+ i
];
476 switch (instr
[start
]) {
478 tp
->ftt_type
= FASTTRAP_T_RET
;
482 tp
->ftt_type
= FASTTRAP_T_RET16
;
483 tp
->ftt_dest
= *(uint16_t *)&instr
[start
+ 1];
502 tp
->ftt_type
= FASTTRAP_T_JCC
;
503 tp
->ftt_code
= instr
[start
];
504 tp
->ftt_dest
= pc
+ tp
->ftt_size
+
505 (int8_t)instr
[start
+ 1];
508 case FASTTRAP_LOOPNZ
:
511 tp
->ftt_type
= FASTTRAP_T_LOOP
;
512 tp
->ftt_code
= instr
[start
];
513 tp
->ftt_dest
= pc
+ tp
->ftt_size
+
514 (int8_t)instr
[start
+ 1];
518 tp
->ftt_type
= FASTTRAP_T_JCXZ
;
519 tp
->ftt_dest
= pc
+ tp
->ftt_size
+
520 (int8_t)instr
[start
+ 1];
524 tp
->ftt_type
= FASTTRAP_T_CALL
;
525 tp
->ftt_dest
= pc
+ tp
->ftt_size
+
526 *(int32_t *)&instr
[start
+ 1];
531 tp
->ftt_type
= FASTTRAP_T_JMP
;
532 tp
->ftt_dest
= pc
+ tp
->ftt_size
+
533 *(int32_t *)&instr
[start
+ 1];
536 tp
->ftt_type
= FASTTRAP_T_JMP
;
537 tp
->ftt_dest
= pc
+ tp
->ftt_size
+
538 (int8_t)instr
[start
+ 1];
541 case FASTTRAP_PUSHL_EBP
:
543 tp
->ftt_type
= FASTTRAP_T_PUSHL_EBP
;
547 #if __sol64 || defined(__APPLE__)
548 ASSERT(p_model
== DATAMODEL_LP64
|| rex
== 0);
551 * On sol64 we have to be careful not to confuse a nop
552 * (actually xchgl %eax, %eax) with an instruction using
553 * the same opcode, but that does something different
554 * (e.g. xchgl %r8d, %eax or xcghq %r8, %rax).
556 if (FASTTRAP_REX_B(rex
) == 0)
558 tp
->ftt_type
= FASTTRAP_T_NOP
;
563 * The pid provider shares the int3 trap with debugger
564 * breakpoints so we can't instrument them.
566 ASSERT(instr
[start
] == FASTTRAP_INSTR
);
571 * Interrupts seem like they could be traced with
572 * no negative implications, but it's possible that
573 * a thread could be redirected by the trap handling
574 * code which would eventually return to the
575 * instruction after the interrupt. If the interrupt
576 * were in our scratch space, the subsequent
577 * instruction might be overwritten before we return.
578 * Accordingly we refuse to instrument any interrupt.
584 #if __sol64 || defined(__APPLE__)
585 if (p_model
== DATAMODEL_LP64
&& tp
->ftt_type
== FASTTRAP_T_COMMON
) {
587 * If the process is 64-bit and the instruction type is still
588 * FASTTRAP_T_COMMON -- meaning we're going to copy it out an
589 * execute it -- we need to watch for %rip-relative
590 * addressing mode. See the portion of fasttrap_pid_probe()
591 * below where we handle tracepoints with type
592 * FASTTRAP_T_COMMON for how we emulate instructions that
593 * employ %rip-relative addressing.
596 uint_t mod
= FASTTRAP_MODRM_MOD(instr
[rmindex
]);
597 uint_t reg
= FASTTRAP_MODRM_REG(instr
[rmindex
]);
598 uint_t rm
= FASTTRAP_MODRM_RM(instr
[rmindex
]);
600 ASSERT(rmindex
> (int)start
);
602 if (mod
== 0 && rm
== 5) {
604 * We need to be sure to avoid other
605 * registers used by this instruction. While
606 * the reg field may determine the op code
607 * rather than denoting a register, assuming
608 * that it denotes a register is always safe.
609 * We leave the REX field intact and use
610 * whatever value's there for simplicity.
613 tp
->ftt_ripmode
= FASTTRAP_RIP_1
|
615 FASTTRAP_REX_B(rex
));
618 tp
->ftt_ripmode
= FASTTRAP_RIP_2
|
620 FASTTRAP_REX_B(rex
));
624 tp
->ftt_modrm
= tp
->ftt_instr
[rmindex
];
625 tp
->ftt_instr
[rmindex
] =
626 FASTTRAP_MODRM(2, reg
, rm
);
636 fasttrap_tracepoint_install(proc_t
*p
, fasttrap_tracepoint_t
*tp
)
638 fasttrap_instr_t instr
= FASTTRAP_INSTR
;
640 if (uwrite(p
, &instr
, 1, tp
->ftt_pc
) != 0)
647 fasttrap_tracepoint_remove(proc_t
*p
, fasttrap_tracepoint_t
*tp
)
652 * Distinguish between read or write failures and a changed
655 if (uread(p
, &instr
, 1, tp
->ftt_pc
) != 0)
657 if (instr
!= FASTTRAP_INSTR
)
659 if (uwrite(p
, &tp
->ftt_instr
[0], 1, tp
->ftt_pc
) != 0)
666 fasttrap_return_common(x86_saved_state_t
*regs
, user_addr_t pc
, pid_t pid
,
669 x86_saved_state64_t
*regs64
;
670 x86_saved_state32_t
*regs32
;
671 unsigned int p_model
;
673 if (is_saved_state64(regs
)) {
674 regs64
= saved_state64(regs
);
676 p_model
= DATAMODEL_LP64
;
679 regs32
= saved_state32(regs
);
680 p_model
= DATAMODEL_ILP32
;
683 fasttrap_tracepoint_t
*tp
;
684 fasttrap_bucket_t
*bucket
;
688 pid_mtx
= &cpu_core
[CPU
->cpu_id
].cpuc_pid_lock
;
689 lck_mtx_lock(pid_mtx
);
690 bucket
= &fasttrap_tpoints
.fth_table
[FASTTRAP_TPOINTS_INDEX(pid
, pc
)];
692 for (tp
= bucket
->ftb_data
; tp
!= NULL
; tp
= tp
->ftt_next
) {
693 if (pid
== tp
->ftt_pid
&& pc
== tp
->ftt_pc
&&
694 !tp
->ftt_proc
->ftpc_defunct
)
699 * Don't sweat it if we can't find the tracepoint again; unlike
700 * when we're in fasttrap_pid_probe(), finding the tracepoint here
701 * is not essential to the correct execution of the process.
704 lck_mtx_unlock(pid_mtx
);
708 for (id
= tp
->ftt_retids
; id
!= NULL
; id
= id
->fti_next
) {
710 * If there's a branch that could act as a return site, we
711 * need to trace it, and check here if the program counter is
712 * external to the function.
714 if (tp
->ftt_type
!= FASTTRAP_T_RET
&&
715 tp
->ftt_type
!= FASTTRAP_T_RET16
&&
716 new_pc
- id
->fti_probe
->ftp_faddr
<
717 id
->fti_probe
->ftp_fsize
)
720 if (p_model
== DATAMODEL_LP64
) {
721 dtrace_probe(id
->fti_probe
->ftp_id
,
722 pc
- id
->fti_probe
->ftp_faddr
,
723 regs64
->rax
, regs64
->rdx
, 0, 0);
725 dtrace_probe(id
->fti_probe
->ftp_id
,
726 pc
- id
->fti_probe
->ftp_faddr
,
727 regs32
->eax
, regs32
->edx
, 0, 0);
731 lck_mtx_unlock(pid_mtx
);
735 fasttrap_sigsegv(proc_t
*p
, uthread_t t
, user_addr_t addr
)
739 /* Set fault address and mark signal */
741 t
->uu_siglist
|= sigmask(SIGSEGV
);
744 * XXX These two line may be redundant; if not, then we need
745 * XXX to potentially set the data address in the machine
746 * XXX specific thread state structure to indicate the address.
748 t
->uu_exception
= KERN_INVALID_ADDRESS
; /* SIGSEGV */
749 t
->uu_subcode
= 0; /* XXX pad */
754 signal_setast(t
->uu_context
.vc_thread
);
758 fasttrap_usdt_args64(fasttrap_probe_t
*probe
, x86_saved_state64_t
*regs64
, int argc
,
761 int i
, x
, cap
= MIN(argc
, probe
->ftp_nargs
);
762 user_addr_t stack
= (user_addr_t
)regs64
->isf
.rsp
;
764 for (i
= 0; i
< cap
; i
++) {
765 x
= probe
->ftp_argmap
[i
];
768 /* FIXME! This may be broken, needs testing */
769 argv
[i
] = (®s64
->rdi
)[x
];
771 fasttrap_fuword64_noerr(stack
+ (x
* sizeof(uint64_t)), &argv
[i
]);
775 for (; i
< argc
; i
++) {
781 fasttrap_usdt_args32(fasttrap_probe_t
*probe
, x86_saved_state32_t
*regs32
, int argc
,
784 int i
, x
, cap
= MIN(argc
, probe
->ftp_nargs
);
785 uint32_t *stack
= (uint32_t *)regs32
->uesp
;
787 for (i
= 0; i
< cap
; i
++) {
788 x
= probe
->ftp_argmap
[i
];
790 fasttrap_fuword32_noerr((user_addr_t
)(unsigned long)&stack
[x
], &argv
[i
]);
793 for (; i
< argc
; i
++) {
802 fasttrap_do_seg(fasttrap_tracepoint_t
*tp
, x86_saved_state_t
*rp
, user_addr_t
*addr
) // 64 bit
804 #pragma unused(tp, rp, addr)
805 printf("fasttrap_do_seg() called while unimplemented.\n");
809 uint16_t sel
, ndx
, type
;
812 switch (tp
->ftt_segment
) {
813 case FASTTRAP_SEG_CS
:
816 case FASTTRAP_SEG_DS
:
819 case FASTTRAP_SEG_ES
:
822 case FASTTRAP_SEG_FS
:
825 case FASTTRAP_SEG_GS
:
828 case FASTTRAP_SEG_SS
:
834 * Make sure the given segment register specifies a user priority
835 * selector rather than a kernel selector.
843 * Check the bounds and grab the descriptor out of the specified
847 if (ndx
> p
->p_ldtlimit
)
850 desc
= p
->p_ldt
+ ndx
;
856 desc
= cpu_get_gdt() + ndx
;
860 * The descriptor must have user privilege level and it must be
863 if (desc
->usd_dpl
!= SEL_UPL
|| desc
->usd_p
!= 1)
866 type
= desc
->usd_type
;
869 * If the S bit in the type field is not set, this descriptor can
870 * only be used in system context.
872 if ((type
& 0x10) != 0x10)
875 limit
= USEGD_GETLIMIT(desc
) * (desc
->usd_gran
? PAGESIZE
: 1);
877 if (tp
->ftt_segment
== FASTTRAP_SEG_CS
) {
879 * The code/data bit and readable bit must both be set.
881 if ((type
& 0xa) != 0xa)
888 * The code/data bit must be clear.
890 if ((type
& 0x8) != 0)
894 * If the expand-down bit is clear, we just check the limit as
895 * it would naturally be applied. Otherwise, we need to check
896 * that the address is the range [limit + 1 .. 0xffff] or
897 * [limit + 1 ... 0xffffffff] depending on if the default
898 * operand size bit is set.
900 if ((type
& 0x4) == 0) {
903 } else if (desc
->usd_def32
) {
904 if (*addr
< limit
+ 1 || 0xffff < *addr
)
907 if (*addr
< limit
+ 1 || 0xffffffff < *addr
)
912 *addr
+= USEGD_GETBASE(desc
);
918 * Due to variances between Solaris and xnu, I have split this into a 32 bit and 64 bit
919 * code path. It still takes an x86_saved_state_t* argument, because it must sometimes
920 * call other methods that require a x86_saved_state_t.
924 * Any changes made to this method must be echo'd in fasttrap_pid_probe64!
928 fasttrap_pid_probe32(x86_saved_state_t
*regs
)
930 ASSERT(is_saved_state32(regs
));
932 x86_saved_state32_t
*regs32
= saved_state32(regs
);
933 user_addr_t pc
= regs32
->eip
- 1;
934 proc_t
*p
= current_proc();
935 user_addr_t new_pc
= 0;
936 fasttrap_bucket_t
*bucket
;
938 fasttrap_tracepoint_t
*tp
, tp_local
;
940 dtrace_icookie_t cookie
;
941 uint_t is_enabled
= 0;
943 uthread_t uthread
= (uthread_t
)get_bsdthread_info(current_thread());
946 * It's possible that a user (in a veritable orgy of bad planning)
947 * could redirect this thread's flow of control before it reached the
948 * return probe fasttrap. In this case we need to kill the process
949 * since it's in a unrecoverable state.
951 if (uthread
->t_dtrace_step
) {
952 ASSERT(uthread
->t_dtrace_on
);
953 fasttrap_sigtrap(p
, uthread
, pc
);
958 * Clear all user tracing flags.
960 uthread
->t_dtrace_ft
= 0;
961 uthread
->t_dtrace_pc
= 0;
962 uthread
->t_dtrace_npc
= 0;
963 uthread
->t_dtrace_scrpc
= 0;
964 uthread
->t_dtrace_astpc
= 0;
967 * Treat a child created by a call to vfork(2) as if it were its
968 * parent. We know that there's only one thread of control in such a
972 * APPLE NOTE: Terry says: "You need to hold the process locks (currently: kernel funnel) for this traversal"
973 * FIXME: How do we assert this?
975 while (p
->p_lflag
& P_LINVFORK
)
979 pid_mtx
= &cpu_core
[CPU
->cpu_id
].cpuc_pid_lock
;
980 lck_mtx_lock(pid_mtx
);
981 bucket
= &fasttrap_tpoints
.fth_table
[FASTTRAP_TPOINTS_INDEX(pid
, pc
)];
984 * Lookup the tracepoint that the process just hit.
986 for (tp
= bucket
->ftb_data
; tp
!= NULL
; tp
= tp
->ftt_next
) {
987 if (pid
== tp
->ftt_pid
&& pc
== tp
->ftt_pc
&&
988 !tp
->ftt_proc
->ftpc_defunct
)
993 * If we couldn't find a matching tracepoint, either a tracepoint has
994 * been inserted without using the pid<pid> ioctl interface (see
995 * fasttrap_ioctl), or somehow we have mislaid this tracepoint.
998 lck_mtx_unlock(pid_mtx
);
1003 * Set the program counter to the address of the traced instruction
1004 * so that it looks right in ustack() output.
1008 if (tp
->ftt_ids
!= NULL
) {
1011 uint32_t s0
, s1
, s2
, s3
, s4
, s5
;
1012 uint32_t *stack
= (uint32_t *)regs32
->uesp
;
1015 * In 32-bit mode, all arguments are passed on the
1016 * stack. If this is a function entry probe, we need
1017 * to skip the first entry on the stack as it
1018 * represents the return address rather than a
1019 * parameter to the function.
1021 fasttrap_fuword32_noerr((user_addr_t
)(unsigned long)&stack
[0], &s0
);
1022 fasttrap_fuword32_noerr((user_addr_t
)(unsigned long)&stack
[1], &s1
);
1023 fasttrap_fuword32_noerr((user_addr_t
)(unsigned long)&stack
[2], &s2
);
1024 fasttrap_fuword32_noerr((user_addr_t
)(unsigned long)&stack
[3], &s3
);
1025 fasttrap_fuword32_noerr((user_addr_t
)(unsigned long)&stack
[4], &s4
);
1026 fasttrap_fuword32_noerr((user_addr_t
)(unsigned long)&stack
[5], &s5
);
1028 for (id
= tp
->ftt_ids
; id
!= NULL
; id
= id
->fti_next
) {
1029 fasttrap_probe_t
*probe
= id
->fti_probe
;
1031 if (id
->fti_ptype
== DTFTP_ENTRY
) {
1033 * We note that this was an entry
1034 * probe to help ustack() find the
1037 cookie
= dtrace_interrupt_disable();
1038 DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY
);
1039 dtrace_probe(probe
->ftp_id
, s1
, s2
,
1041 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY
);
1042 dtrace_interrupt_enable(cookie
);
1043 } else if (id
->fti_ptype
== DTFTP_IS_ENABLED
) {
1045 * Note that in this case, we don't
1046 * call dtrace_probe() since it's only
1047 * an artificial probe meant to change
1048 * the flow of control so that it
1049 * encounters the true probe.
1052 } else if (probe
->ftp_argmap
== NULL
) {
1053 dtrace_probe(probe
->ftp_id
, s0
, s1
,
1058 fasttrap_usdt_args32(probe
, regs32
,
1059 sizeof (t
) / sizeof (t
[0]), t
);
1061 dtrace_probe(probe
->ftp_id
, t
[0], t
[1],
1065 /* APPLE NOTE: Oneshot probes get one and only one chance... */
1066 if (probe
->ftp_prov
->ftp_provider_type
== DTFTP_PROVIDER_ONESHOT
) {
1067 fasttrap_tracepoint_remove(p
, tp
);
1073 * We're about to do a bunch of work so we cache a local copy of
1074 * the tracepoint to emulate the instruction, and then find the
1075 * tracepoint again later if we need to light up any return probes.
1078 lck_mtx_unlock(pid_mtx
);
1082 * Set the program counter to appear as though the traced instruction
1083 * had completely executed. This ensures that fasttrap_getreg() will
1084 * report the expected value for REG_RIP.
1086 regs32
->eip
= pc
+ tp
->ftt_size
;
1089 * If there's an is-enabled probe connected to this tracepoint it
1090 * means that there was a 'xorl %eax, %eax' or 'xorq %rax, %rax'
1091 * instruction that was placed there by DTrace when the binary was
1092 * linked. As this probe is, in fact, enabled, we need to stuff 1
1093 * into %eax or %rax. Accordingly, we can bypass all the instruction
1094 * emulation logic since we know the inevitable result. It's possible
1095 * that a user could construct a scenario where the 'is-enabled'
1096 * probe was on some other instruction, but that would be a rather
1097 * exotic way to shoot oneself in the foot.
1101 new_pc
= regs32
->eip
;
1106 * We emulate certain types of instructions to ensure correctness
1107 * (in the case of position dependent instructions) or optimize
1108 * common cases. The rest we have the thread execute back in user-
1111 switch (tp
->ftt_type
) {
1112 case FASTTRAP_T_RET
:
1113 case FASTTRAP_T_RET16
:
1120 * We have to emulate _every_ facet of the behavior of a ret
1121 * instruction including what happens if the load from %esp
1122 * fails; in that case, we send a SIGSEGV.
1125 ret
= fasttrap_fuword32((user_addr_t
)regs32
->uesp
, &dst32
);
1127 addr
= regs32
->uesp
+ sizeof (uint32_t);
1130 fasttrap_sigsegv(p
, uthread
, (user_addr_t
)regs32
->uesp
);
1135 if (tp
->ftt_type
== FASTTRAP_T_RET16
)
1136 addr
+= tp
->ftt_dest
;
1138 regs32
->uesp
= addr
;
1143 case FASTTRAP_T_JCC
:
1147 switch (tp
->ftt_code
) {
1149 taken
= (regs32
->efl
& FASTTRAP_EFLAGS_OF
) != 0;
1152 taken
= (regs32
->efl
& FASTTRAP_EFLAGS_OF
) == 0;
1155 taken
= (regs32
->efl
& FASTTRAP_EFLAGS_CF
) != 0;
1158 taken
= (regs32
->efl
& FASTTRAP_EFLAGS_CF
) == 0;
1161 taken
= (regs32
->efl
& FASTTRAP_EFLAGS_ZF
) != 0;
1164 taken
= (regs32
->efl
& FASTTRAP_EFLAGS_ZF
) == 0;
1167 taken
= (regs32
->efl
& FASTTRAP_EFLAGS_CF
) != 0 ||
1168 (regs32
->efl
& FASTTRAP_EFLAGS_ZF
) != 0;
1171 taken
= (regs32
->efl
& FASTTRAP_EFLAGS_CF
) == 0 &&
1172 (regs32
->efl
& FASTTRAP_EFLAGS_ZF
) == 0;
1175 taken
= (regs32
->efl
& FASTTRAP_EFLAGS_SF
) != 0;
1178 taken
= (regs32
->efl
& FASTTRAP_EFLAGS_SF
) == 0;
1181 taken
= (regs32
->efl
& FASTTRAP_EFLAGS_PF
) != 0;
1184 taken
= (regs32
->efl
& FASTTRAP_EFLAGS_PF
) == 0;
1187 taken
= ((regs32
->efl
& FASTTRAP_EFLAGS_SF
) == 0) !=
1188 ((regs32
->efl
& FASTTRAP_EFLAGS_OF
) == 0);
1191 taken
= ((regs32
->efl
& FASTTRAP_EFLAGS_SF
) == 0) ==
1192 ((regs32
->efl
& FASTTRAP_EFLAGS_OF
) == 0);
1195 taken
= (regs32
->efl
& FASTTRAP_EFLAGS_ZF
) != 0 ||
1196 ((regs32
->efl
& FASTTRAP_EFLAGS_SF
) == 0) !=
1197 ((regs32
->efl
& FASTTRAP_EFLAGS_OF
) == 0);
1200 taken
= (regs32
->efl
& FASTTRAP_EFLAGS_ZF
) == 0 &&
1201 ((regs32
->efl
& FASTTRAP_EFLAGS_SF
) == 0) ==
1202 ((regs32
->efl
& FASTTRAP_EFLAGS_OF
) == 0);
1209 new_pc
= tp
->ftt_dest
;
1211 new_pc
= pc
+ tp
->ftt_size
;
1215 case FASTTRAP_T_LOOP
:
1218 greg_t cx
= regs32
->ecx
--;
1220 switch (tp
->ftt_code
) {
1221 case FASTTRAP_LOOPNZ
:
1222 taken
= (regs32
->efl
& FASTTRAP_EFLAGS_ZF
) == 0 &&
1225 case FASTTRAP_LOOPZ
:
1226 taken
= (regs32
->efl
& FASTTRAP_EFLAGS_ZF
) != 0 &&
1237 new_pc
= tp
->ftt_dest
;
1239 new_pc
= pc
+ tp
->ftt_size
;
1243 case FASTTRAP_T_JCXZ
:
1245 greg_t cx
= regs32
->ecx
;
1248 new_pc
= tp
->ftt_dest
;
1250 new_pc
= pc
+ tp
->ftt_size
;
1254 case FASTTRAP_T_PUSHL_EBP
:
1256 user_addr_t addr
= regs32
->uesp
- sizeof (uint32_t);
1257 int ret
= fasttrap_suword32(addr
, (uint32_t)regs32
->ebp
);
1260 fasttrap_sigsegv(p
, uthread
, addr
);
1265 regs32
->uesp
= addr
;
1266 new_pc
= pc
+ tp
->ftt_size
;
1270 case FASTTRAP_T_NOP
:
1271 new_pc
= pc
+ tp
->ftt_size
;
1274 case FASTTRAP_T_JMP
:
1275 case FASTTRAP_T_CALL
:
1276 if (tp
->ftt_code
== 0) {
1277 new_pc
= tp
->ftt_dest
;
1279 user_addr_t
/* value ,*/ addr
= tp
->ftt_dest
;
1281 if (tp
->ftt_base
!= FASTTRAP_NOREG
)
1282 addr
+= fasttrap_getreg(regs
, tp
->ftt_base
);
1283 if (tp
->ftt_index
!= FASTTRAP_NOREG
)
1284 addr
+= fasttrap_getreg(regs
, tp
->ftt_index
) <<
1287 if (tp
->ftt_code
== 1) {
1289 * If there's a segment prefix for this
1290 * instruction, we'll need to check permissions
1291 * and bounds on the given selector, and adjust
1292 * the address accordingly.
1294 if (tp
->ftt_segment
!= FASTTRAP_SEG_NONE
&&
1295 fasttrap_do_seg(tp
, regs
, &addr
) != 0) {
1296 fasttrap_sigsegv(p
, uthread
, addr
);
1302 addr
= (user_addr_t
)(uint32_t)addr
;
1303 if (fasttrap_fuword32(addr
, &value32
) == -1) {
1304 fasttrap_sigsegv(p
, uthread
, addr
);
1315 * If this is a call instruction, we need to push the return
1316 * address onto the stack. If this fails, we send the process
1317 * a SIGSEGV and reset the pc to emulate what would happen if
1318 * this instruction weren't traced.
1320 if (tp
->ftt_type
== FASTTRAP_T_CALL
) {
1321 user_addr_t addr
= regs32
->uesp
- sizeof (uint32_t);
1322 int ret
= fasttrap_suword32(addr
, (uint32_t)(pc
+ tp
->ftt_size
));
1325 fasttrap_sigsegv(p
, uthread
, addr
);
1330 regs32
->uesp
= addr
;
1334 case FASTTRAP_T_COMMON
:
1337 uint8_t scratch
[2 * FASTTRAP_MAX_INSTR_SIZE
+ 5 + 2];
1341 * Generic Instruction Tracing
1342 * ---------------------------
1344 * This is the layout of the scratch space in the user-land
1345 * thread structure for our generated instructions.
1348 * ------------------------ -----
1349 * a: <original instruction> <= 15
1350 * jmp <pc + tp->ftt_size> 5
1351 * b: <original instrction> <= 15
1352 * int T_DTRACE_RET 2
1357 * ------------------------ -----
1358 * a: <original instruction> <= 15
1360 * <pc + tp->ftt_size> 8
1361 * b: <original instruction> <= 15
1362 * int T_DTRACE_RET 2
1366 * The %pc is set to a, and curthread->t_dtrace_astpc is set
1367 * to b. If we encounter a signal on the way out of the
1368 * kernel, trap() will set %pc to curthread->t_dtrace_astpc
1369 * so that we execute the original instruction and re-enter
1370 * the kernel rather than redirecting to the next instruction.
1372 * If there are return probes (so we know that we're going to
1373 * need to reenter the kernel after executing the original
1374 * instruction), the scratch space will just contain the
1375 * original instruction followed by an interrupt -- the same
1379 addr
= uthread
->t_dtrace_scratch
->addr
;
1382 fasttrap_sigtrap(p
, uthread
, pc
); // Should be killing target proc
1387 ASSERT(tp
->ftt_size
< FASTTRAP_MAX_INSTR_SIZE
);
1389 uthread
->t_dtrace_scrpc
= addr
;
1390 bcopy(tp
->ftt_instr
, &scratch
[i
], tp
->ftt_size
);
1394 * Set up the jmp to the next instruction; note that
1395 * the size of the traced instruction cancels out.
1397 scratch
[i
++] = FASTTRAP_JMP32
;
1398 *(uint32_t *)&scratch
[i
] = pc
- addr
- 5;
1399 i
+= sizeof (uint32_t);
1401 uthread
->t_dtrace_astpc
= addr
+ i
;
1402 bcopy(tp
->ftt_instr
, &scratch
[i
], tp
->ftt_size
);
1404 scratch
[i
++] = FASTTRAP_INT
;
1405 scratch
[i
++] = T_DTRACE_RET
;
1407 if (fasttrap_copyout(scratch
, addr
, i
)) {
1408 fasttrap_sigtrap(p
, uthread
, pc
);
1413 if (tp
->ftt_retids
!= NULL
) {
1414 uthread
->t_dtrace_step
= 1;
1415 uthread
->t_dtrace_ret
= 1;
1416 new_pc
= uthread
->t_dtrace_astpc
;
1418 new_pc
= uthread
->t_dtrace_scrpc
;
1421 uthread
->t_dtrace_pc
= pc
;
1422 uthread
->t_dtrace_npc
= pc
+ tp
->ftt_size
;
1423 uthread
->t_dtrace_on
= 1;
1428 panic("fasttrap: mishandled an instruction");
1435 * We're setting this earlier than Solaris does, to get a "correct"
1436 * ustack() output. In the Sun code, a() -> b() -> c() -> d() is
1437 * reported at: d, b, a. The new way gives c, b, a, which is closer
1438 * to correct, as the return instruction has already exectued.
1440 regs32
->eip
= new_pc
;
1443 * If there were no return probes when we first found the tracepoint,
1444 * we should feel no obligation to honor any return probes that were
1445 * subsequently enabled -- they'll just have to wait until the next
1448 if (tp
->ftt_retids
!= NULL
) {
1450 * We need to wait until the results of the instruction are
1451 * apparent before invoking any return probes. If this
1452 * instruction was emulated we can just call
1453 * fasttrap_return_common(); if it needs to be executed, we
1454 * need to wait until the user thread returns to the kernel.
1456 if (tp
->ftt_type
!= FASTTRAP_T_COMMON
) {
1457 fasttrap_return_common(regs
, pc
, pid
, new_pc
);
1459 ASSERT(uthread
->t_dtrace_ret
!= 0);
1460 ASSERT(uthread
->t_dtrace_pc
== pc
);
1461 ASSERT(uthread
->t_dtrace_scrpc
!= 0);
1462 ASSERT(new_pc
== uthread
->t_dtrace_astpc
);
1470 * Due to variances between Solaris and xnu, I have split this into a 32 bit and 64 bit
1471 * code path. It still takes an x86_saved_state_t* argument, because it must sometimes
1472 * call other methods that require a x86_saved_state_t.
1476 * Any changes made to this method must be echo'd in fasttrap_pid_probe32!
1480 fasttrap_pid_probe64(x86_saved_state_t
*regs
)
1482 ASSERT(is_saved_state64(regs
));
1484 x86_saved_state64_t
*regs64
= saved_state64(regs
);
1485 user_addr_t pc
= regs64
->isf
.rip
- 1;
1486 proc_t
*p
= current_proc();
1487 user_addr_t new_pc
= 0;
1488 fasttrap_bucket_t
*bucket
;
1490 fasttrap_tracepoint_t
*tp
, tp_local
;
1492 dtrace_icookie_t cookie
;
1493 uint_t is_enabled
= 0;
1495 uthread_t uthread
= (uthread_t
)get_bsdthread_info(current_thread());
1498 * It's possible that a user (in a veritable orgy of bad planning)
1499 * could redirect this thread's flow of control before it reached the
1500 * return probe fasttrap. In this case we need to kill the process
1501 * since it's in a unrecoverable state.
1503 if (uthread
->t_dtrace_step
) {
1504 ASSERT(uthread
->t_dtrace_on
);
1505 fasttrap_sigtrap(p
, uthread
, pc
);
1510 * Clear all user tracing flags.
1512 uthread
->t_dtrace_ft
= 0;
1513 uthread
->t_dtrace_pc
= 0;
1514 uthread
->t_dtrace_npc
= 0;
1515 uthread
->t_dtrace_scrpc
= 0;
1516 uthread
->t_dtrace_astpc
= 0;
1517 uthread
->t_dtrace_regv
= 0;
1520 * Treat a child created by a call to vfork(2) as if it were its
1521 * parent. We know that there's only one thread of control in such a
1522 * process: this one.
1525 * APPLE NOTE: Terry says: "You need to hold the process locks (currently: kernel funnel) for this traversal"
1526 * FIXME: How do we assert this?
1528 while (p
->p_lflag
& P_LINVFORK
)
1532 pid_mtx
= &cpu_core
[CPU
->cpu_id
].cpuc_pid_lock
;
1533 lck_mtx_lock(pid_mtx
);
1534 bucket
= &fasttrap_tpoints
.fth_table
[FASTTRAP_TPOINTS_INDEX(pid
, pc
)];
1537 * Lookup the tracepoint that the process just hit.
1539 for (tp
= bucket
->ftb_data
; tp
!= NULL
; tp
= tp
->ftt_next
) {
1540 if (pid
== tp
->ftt_pid
&& pc
== tp
->ftt_pc
&&
1541 !tp
->ftt_proc
->ftpc_defunct
)
1546 * If we couldn't find a matching tracepoint, either a tracepoint has
1547 * been inserted without using the pid<pid> ioctl interface (see
1548 * fasttrap_ioctl), or somehow we have mislaid this tracepoint.
1551 lck_mtx_unlock(pid_mtx
);
1556 * Set the program counter to the address of the traced instruction
1557 * so that it looks right in ustack() output.
1559 regs64
->isf
.rip
= pc
;
1561 if (tp
->ftt_ids
!= NULL
) {
1564 for (id
= tp
->ftt_ids
; id
!= NULL
; id
= id
->fti_next
) {
1565 fasttrap_probe_t
*probe
= id
->fti_probe
;
1567 if (id
->fti_ptype
== DTFTP_ENTRY
) {
1569 * We note that this was an entry
1570 * probe to help ustack() find the
1573 cookie
= dtrace_interrupt_disable();
1574 DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY
);
1575 dtrace_probe(probe
->ftp_id
, regs64
->rdi
,
1576 regs64
->rsi
, regs64
->rdx
, regs64
->rcx
,
1578 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY
);
1579 dtrace_interrupt_enable(cookie
);
1580 } else if (id
->fti_ptype
== DTFTP_IS_ENABLED
) {
1582 * Note that in this case, we don't
1583 * call dtrace_probe() since it's only
1584 * an artificial probe meant to change
1585 * the flow of control so that it
1586 * encounters the true probe.
1589 } else if (probe
->ftp_argmap
== NULL
) {
1590 dtrace_probe(probe
->ftp_id
, regs64
->rdi
,
1591 regs64
->rsi
, regs64
->rdx
, regs64
->rcx
,
1596 fasttrap_usdt_args64(probe
, regs64
,
1597 sizeof (t
) / sizeof (t
[0]), t
);
1599 dtrace_probe(probe
->ftp_id
, t
[0], t
[1],
1603 /* APPLE NOTE: Oneshot probes get one and only one chance... */
1604 if (probe
->ftp_prov
->ftp_provider_type
== DTFTP_PROVIDER_ONESHOT
) {
1605 fasttrap_tracepoint_remove(p
, tp
);
1611 * We're about to do a bunch of work so we cache a local copy of
1612 * the tracepoint to emulate the instruction, and then find the
1613 * tracepoint again later if we need to light up any return probes.
1616 lck_mtx_unlock(pid_mtx
);
1620 * Set the program counter to appear as though the traced instruction
1621 * had completely executed. This ensures that fasttrap_getreg() will
1622 * report the expected value for REG_RIP.
1624 regs64
->isf
.rip
= pc
+ tp
->ftt_size
;
1627 * If there's an is-enabled probe connected to this tracepoint it
1628 * means that there was a 'xorl %eax, %eax' or 'xorq %rax, %rax'
1629 * instruction that was placed there by DTrace when the binary was
1630 * linked. As this probe is, in fact, enabled, we need to stuff 1
1631 * into %eax or %rax. Accordingly, we can bypass all the instruction
1632 * emulation logic since we know the inevitable result. It's possible
1633 * that a user could construct a scenario where the 'is-enabled'
1634 * probe was on some other instruction, but that would be a rather
1635 * exotic way to shoot oneself in the foot.
1639 new_pc
= regs64
->isf
.rip
;
1644 * We emulate certain types of instructions to ensure correctness
1645 * (in the case of position dependent instructions) or optimize
1646 * common cases. The rest we have the thread execute back in user-
1649 switch (tp
->ftt_type
) {
1650 case FASTTRAP_T_RET
:
1651 case FASTTRAP_T_RET16
:
1658 * We have to emulate _every_ facet of the behavior of a ret
1659 * instruction including what happens if the load from %esp
1660 * fails; in that case, we send a SIGSEGV.
1662 ret
= fasttrap_fuword64((user_addr_t
)regs64
->isf
.rsp
, &dst
);
1663 addr
= regs64
->isf
.rsp
+ sizeof (uint64_t);
1666 fasttrap_sigsegv(p
, uthread
, (user_addr_t
)regs64
->isf
.rsp
);
1671 if (tp
->ftt_type
== FASTTRAP_T_RET16
)
1672 addr
+= tp
->ftt_dest
;
1674 regs64
->isf
.rsp
= addr
;
1679 case FASTTRAP_T_JCC
:
1683 switch (tp
->ftt_code
) {
1685 taken
= (regs64
->isf
.rflags
& FASTTRAP_EFLAGS_OF
) != 0;
1688 taken
= (regs64
->isf
.rflags
& FASTTRAP_EFLAGS_OF
) == 0;
1691 taken
= (regs64
->isf
.rflags
& FASTTRAP_EFLAGS_CF
) != 0;
1694 taken
= (regs64
->isf
.rflags
& FASTTRAP_EFLAGS_CF
) == 0;
1697 taken
= (regs64
->isf
.rflags
& FASTTRAP_EFLAGS_ZF
) != 0;
1700 taken
= (regs64
->isf
.rflags
& FASTTRAP_EFLAGS_ZF
) == 0;
1703 taken
= (regs64
->isf
.rflags
& FASTTRAP_EFLAGS_CF
) != 0 ||
1704 (regs64
->isf
.rflags
& FASTTRAP_EFLAGS_ZF
) != 0;
1707 taken
= (regs64
->isf
.rflags
& FASTTRAP_EFLAGS_CF
) == 0 &&
1708 (regs64
->isf
.rflags
& FASTTRAP_EFLAGS_ZF
) == 0;
1711 taken
= (regs64
->isf
.rflags
& FASTTRAP_EFLAGS_SF
) != 0;
1714 taken
= (regs64
->isf
.rflags
& FASTTRAP_EFLAGS_SF
) == 0;
1717 taken
= (regs64
->isf
.rflags
& FASTTRAP_EFLAGS_PF
) != 0;
1720 taken
= (regs64
->isf
.rflags
& FASTTRAP_EFLAGS_PF
) == 0;
1723 taken
= ((regs64
->isf
.rflags
& FASTTRAP_EFLAGS_SF
) == 0) !=
1724 ((regs64
->isf
.rflags
& FASTTRAP_EFLAGS_OF
) == 0);
1727 taken
= ((regs64
->isf
.rflags
& FASTTRAP_EFLAGS_SF
) == 0) ==
1728 ((regs64
->isf
.rflags
& FASTTRAP_EFLAGS_OF
) == 0);
1731 taken
= (regs64
->isf
.rflags
& FASTTRAP_EFLAGS_ZF
) != 0 ||
1732 ((regs64
->isf
.rflags
& FASTTRAP_EFLAGS_SF
) == 0) !=
1733 ((regs64
->isf
.rflags
& FASTTRAP_EFLAGS_OF
) == 0);
1736 taken
= (regs64
->isf
.rflags
& FASTTRAP_EFLAGS_ZF
) == 0 &&
1737 ((regs64
->isf
.rflags
& FASTTRAP_EFLAGS_SF
) == 0) ==
1738 ((regs64
->isf
.rflags
& FASTTRAP_EFLAGS_OF
) == 0);
1745 new_pc
= tp
->ftt_dest
;
1747 new_pc
= pc
+ tp
->ftt_size
;
1751 case FASTTRAP_T_LOOP
:
1754 uint64_t cx
= regs64
->rcx
--;
1756 switch (tp
->ftt_code
) {
1757 case FASTTRAP_LOOPNZ
:
1758 taken
= (regs64
->isf
.rflags
& FASTTRAP_EFLAGS_ZF
) == 0 &&
1761 case FASTTRAP_LOOPZ
:
1762 taken
= (regs64
->isf
.rflags
& FASTTRAP_EFLAGS_ZF
) != 0 &&
1773 new_pc
= tp
->ftt_dest
;
1775 new_pc
= pc
+ tp
->ftt_size
;
1779 case FASTTRAP_T_JCXZ
:
1781 uint64_t cx
= regs64
->rcx
;
1784 new_pc
= tp
->ftt_dest
;
1786 new_pc
= pc
+ tp
->ftt_size
;
1790 case FASTTRAP_T_PUSHL_EBP
:
1792 user_addr_t addr
= regs64
->isf
.rsp
- sizeof (uint64_t);
1793 int ret
= fasttrap_suword64(addr
, (uint64_t)regs64
->rbp
);
1796 fasttrap_sigsegv(p
, uthread
, addr
);
1801 regs64
->isf
.rsp
= addr
;
1802 new_pc
= pc
+ tp
->ftt_size
;
1806 case FASTTRAP_T_NOP
:
1807 new_pc
= pc
+ tp
->ftt_size
;
1810 case FASTTRAP_T_JMP
:
1811 case FASTTRAP_T_CALL
:
1812 if (tp
->ftt_code
== 0) {
1813 new_pc
= tp
->ftt_dest
;
1815 user_addr_t value
, addr
= tp
->ftt_dest
;
1817 if (tp
->ftt_base
!= FASTTRAP_NOREG
)
1818 addr
+= fasttrap_getreg(regs
, tp
->ftt_base
);
1819 if (tp
->ftt_index
!= FASTTRAP_NOREG
)
1820 addr
+= fasttrap_getreg(regs
, tp
->ftt_index
) <<
1823 if (tp
->ftt_code
== 1) {
1825 * If there's a segment prefix for this
1826 * instruction, we'll need to check permissions
1827 * and bounds on the given selector, and adjust
1828 * the address accordingly.
1830 if (tp
->ftt_segment
!= FASTTRAP_SEG_NONE
&&
1831 fasttrap_do_seg(tp
, regs
, &addr
) != 0) {
1832 fasttrap_sigsegv(p
, uthread
, addr
);
1837 if (fasttrap_fuword64(addr
, &value
) == -1) {
1838 fasttrap_sigsegv(p
, uthread
, addr
);
1849 * If this is a call instruction, we need to push the return
1850 * address onto the stack. If this fails, we send the process
1851 * a SIGSEGV and reset the pc to emulate what would happen if
1852 * this instruction weren't traced.
1854 if (tp
->ftt_type
== FASTTRAP_T_CALL
) {
1855 user_addr_t addr
= regs64
->isf
.rsp
- sizeof (uint64_t);
1856 int ret
= fasttrap_suword64(addr
, pc
+ tp
->ftt_size
);
1859 fasttrap_sigsegv(p
, uthread
, addr
);
1864 regs64
->isf
.rsp
= addr
;
1868 case FASTTRAP_T_COMMON
:
1871 uint8_t scratch
[2 * FASTTRAP_MAX_INSTR_SIZE
+ 5 + 2];
1875 * Generic Instruction Tracing
1876 * ---------------------------
1878 * This is the layout of the scratch space in the user-land
1879 * thread structure for our generated instructions.
1882 * ------------------------ -----
1883 * a: <original instruction> <= 15
1884 * jmp <pc + tp->ftt_size> 5
1885 * b: <original instrction> <= 15
1886 * int T_DTRACE_RET 2
1891 * ------------------------ -----
1892 * a: <original instruction> <= 15
1894 * <pc + tp->ftt_size> 8
1895 * b: <original instruction> <= 15
1896 * int T_DTRACE_RET 2
1900 * The %pc is set to a, and curthread->t_dtrace_astpc is set
1901 * to b. If we encounter a signal on the way out of the
1902 * kernel, trap() will set %pc to curthread->t_dtrace_astpc
1903 * so that we execute the original instruction and re-enter
1904 * the kernel rather than redirecting to the next instruction.
1906 * If there are return probes (so we know that we're going to
1907 * need to reenter the kernel after executing the original
1908 * instruction), the scratch space will just contain the
1909 * original instruction followed by an interrupt -- the same
1912 * %rip-relative Addressing
1913 * ------------------------
1915 * There's a further complication in 64-bit mode due to %rip-
1916 * relative addressing. While this is clearly a beneficial
1917 * architectural decision for position independent code, it's
1918 * hard not to see it as a personal attack against the pid
1919 * provider since before there was a relatively small set of
1920 * instructions to emulate; with %rip-relative addressing,
1921 * almost every instruction can potentially depend on the
1922 * address at which it's executed. Rather than emulating
1923 * the broad spectrum of instructions that can now be
1924 * position dependent, we emulate jumps and others as in
1925 * 32-bit mode, and take a different tack for instructions
1926 * using %rip-relative addressing.
1928 * For every instruction that uses the ModRM byte, the
1929 * in-kernel disassembler reports its location. We use the
1930 * ModRM byte to identify that an instruction uses
1931 * %rip-relative addressing and to see what other registers
1932 * the instruction uses. To emulate those instructions,
1933 * we modify the instruction to be %rax-relative rather than
1934 * %rip-relative (or %rcx-relative if the instruction uses
1935 * %rax; or %r8- or %r9-relative if the REX.B is present so
1936 * we don't have to rewrite the REX prefix). We then load
1937 * the value that %rip would have been into the scratch
1938 * register and generate an instruction to reset the scratch
1939 * register back to its original value. The instruction
1940 * sequence looks like this:
1942 * 64-mode %rip-relative bytes
1943 * ------------------------ -----
1944 * a: <modified instruction> <= 15
1945 * movq $<value>, %<scratch> 6
1947 * <pc + tp->ftt_size> 8
1948 * b: <modified instruction> <= 15
1949 * int T_DTRACE_RET 2
1953 * We set curthread->t_dtrace_regv so that upon receiving
1954 * a signal we can reset the value of the scratch register.
1957 addr
= uthread
->t_dtrace_scratch
->addr
;
1960 fasttrap_sigtrap(p
, uthread
, pc
); // Should be killing target proc
1965 ASSERT(tp
->ftt_size
< FASTTRAP_MAX_INSTR_SIZE
);
1967 uthread
->t_dtrace_scrpc
= addr
;
1968 bcopy(tp
->ftt_instr
, &scratch
[i
], tp
->ftt_size
);
1971 if (tp
->ftt_ripmode
!= 0) {
1974 ASSERT(tp
->ftt_ripmode
&
1975 (FASTTRAP_RIP_1
| FASTTRAP_RIP_2
));
1978 * If this was a %rip-relative instruction, we change
1979 * it to be either a %rax- or %rcx-relative
1980 * instruction (depending on whether those registers
1981 * are used as another operand; or %r8- or %r9-
1982 * relative depending on the value of REX.B). We then
1983 * set that register and generate a movq instruction
1984 * to reset the value.
1986 if (tp
->ftt_ripmode
& FASTTRAP_RIP_X
)
1987 scratch
[i
++] = FASTTRAP_REX(1, 0, 0, 1);
1989 scratch
[i
++] = FASTTRAP_REX(1, 0, 0, 0);
1991 if (tp
->ftt_ripmode
& FASTTRAP_RIP_1
)
1992 scratch
[i
++] = FASTTRAP_MOV_EAX
;
1994 scratch
[i
++] = FASTTRAP_MOV_ECX
;
1996 switch (tp
->ftt_ripmode
) {
1997 case FASTTRAP_RIP_1
:
1999 uthread
->t_dtrace_reg
= REG_RAX
;
2001 case FASTTRAP_RIP_2
:
2003 uthread
->t_dtrace_reg
= REG_RCX
;
2005 case FASTTRAP_RIP_1
| FASTTRAP_RIP_X
:
2007 uthread
->t_dtrace_reg
= REG_R8
;
2009 case FASTTRAP_RIP_2
| FASTTRAP_RIP_X
:
2011 uthread
->t_dtrace_reg
= REG_R9
;
2015 panic("unhandled ripmode in fasttrap_pid_probe64");
2018 *(uint64_t *)&scratch
[i
] = *reg
;
2019 uthread
->t_dtrace_regv
= *reg
;
2020 *reg
= pc
+ tp
->ftt_size
;
2021 i
+= sizeof (uint64_t);
2025 * Generate the branch instruction to what would have
2026 * normally been the subsequent instruction. In 32-bit mode,
2027 * this is just a relative branch; in 64-bit mode this is a
2028 * %rip-relative branch that loads the 64-bit pc value
2029 * immediately after the jmp instruction.
2031 scratch
[i
++] = FASTTRAP_GROUP5_OP
;
2032 scratch
[i
++] = FASTTRAP_MODRM(0, 4, 5);
2033 *(uint32_t *)&scratch
[i
] = 0;
2034 i
+= sizeof (uint32_t);
2035 *(uint64_t *)&scratch
[i
] = pc
+ tp
->ftt_size
;
2036 i
+= sizeof (uint64_t);
2038 uthread
->t_dtrace_astpc
= addr
+ i
;
2039 bcopy(tp
->ftt_instr
, &scratch
[i
], tp
->ftt_size
);
2041 scratch
[i
++] = FASTTRAP_INT
;
2042 scratch
[i
++] = T_DTRACE_RET
;
2044 if (fasttrap_copyout(scratch
, addr
, i
)) {
2045 fasttrap_sigtrap(p
, uthread
, pc
);
2050 if (tp
->ftt_retids
!= NULL
) {
2051 uthread
->t_dtrace_step
= 1;
2052 uthread
->t_dtrace_ret
= 1;
2053 new_pc
= uthread
->t_dtrace_astpc
;
2055 new_pc
= uthread
->t_dtrace_scrpc
;
2058 uthread
->t_dtrace_pc
= pc
;
2059 uthread
->t_dtrace_npc
= pc
+ tp
->ftt_size
;
2060 uthread
->t_dtrace_on
= 1;
2065 panic("fasttrap: mishandled an instruction");
2072 * We're setting this earlier than Solaris does, to get a "correct"
2073 * ustack() output. In the Sun code, a() -> b() -> c() -> d() is
2074 * reported at: d, b, a. The new way gives c, b, a, which is closer
2075 * to correct, as the return instruction has already exectued.
2077 regs64
->isf
.rip
= new_pc
;
2081 * If there were no return probes when we first found the tracepoint,
2082 * we should feel no obligation to honor any return probes that were
2083 * subsequently enabled -- they'll just have to wait until the next
2086 if (tp
->ftt_retids
!= NULL
) {
2088 * We need to wait until the results of the instruction are
2089 * apparent before invoking any return probes. If this
2090 * instruction was emulated we can just call
2091 * fasttrap_return_common(); if it needs to be executed, we
2092 * need to wait until the user thread returns to the kernel.
2094 if (tp
->ftt_type
!= FASTTRAP_T_COMMON
) {
2095 fasttrap_return_common(regs
, pc
, pid
, new_pc
);
2097 ASSERT(uthread
->t_dtrace_ret
!= 0);
2098 ASSERT(uthread
->t_dtrace_pc
== pc
);
2099 ASSERT(uthread
->t_dtrace_scrpc
!= 0);
2100 ASSERT(new_pc
== uthread
->t_dtrace_astpc
);
2108 fasttrap_pid_probe(x86_saved_state_t
*regs
)
2110 if (is_saved_state64(regs
))
2111 return fasttrap_pid_probe64(regs
);
2113 return fasttrap_pid_probe32(regs
);
2117 fasttrap_return_probe(x86_saved_state_t
*regs
)
2119 x86_saved_state64_t
*regs64
;
2120 x86_saved_state32_t
*regs32
;
2121 unsigned int p_model
;
2123 if (is_saved_state64(regs
)) {
2124 regs64
= saved_state64(regs
);
2126 p_model
= DATAMODEL_LP64
;
2129 regs32
= saved_state32(regs
);
2130 p_model
= DATAMODEL_ILP32
;
2133 proc_t
*p
= current_proc();
2134 uthread_t uthread
= (uthread_t
)get_bsdthread_info(current_thread());
2135 user_addr_t pc
= uthread
->t_dtrace_pc
;
2136 user_addr_t npc
= uthread
->t_dtrace_npc
;
2138 uthread
->t_dtrace_pc
= 0;
2139 uthread
->t_dtrace_npc
= 0;
2140 uthread
->t_dtrace_scrpc
= 0;
2141 uthread
->t_dtrace_astpc
= 0;
2144 * Treat a child created by a call to vfork(2) as if it were its
2145 * parent. We know that there's only one thread of control in such a
2146 * process: this one.
2149 * APPLE NOTE: Terry says: "You need to hold the process locks (currently: kernel funnel) for this traversal"
2150 * How do we assert this?
2152 while (p
->p_lflag
& P_LINVFORK
) {
2157 * We set rp->r_pc to the address of the traced instruction so
2158 * that it appears to dtrace_probe() that we're on the original
2159 * instruction, and so that the user can't easily detect our
2160 * complex web of lies. dtrace_return_probe() (our caller)
2161 * will correctly set %pc after we return.
2163 if (p_model
== DATAMODEL_LP64
)
2164 regs64
->isf
.rip
= pc
;
2168 fasttrap_return_common(regs
, pc
, p
->p_pid
, npc
);
2174 fasttrap_pid_getarg(void *arg
, dtrace_id_t id
, void *parg
, int argno
,
2177 #pragma unused(arg, id, parg, aframes)
2178 return (fasttrap_anarg((x86_saved_state_t
*)find_user_regs(current_thread()), 1, argno
));
2182 fasttrap_usdt_getarg(void *arg
, dtrace_id_t id
, void *parg
, int argno
,
2185 #pragma unused(arg, id, parg, aframes)
2186 return (fasttrap_anarg((x86_saved_state_t
*)find_user_regs(current_thread()), 0, argno
));
2190 * APPLE NOTE: See comments by regmap array definition. We are cheating
2191 * when returning 32 bit registers.
2194 fasttrap_getreg(x86_saved_state_t
*regs
, uint_t reg
)
2196 if (is_saved_state64(regs
)) {
2197 x86_saved_state64_t
*regs64
= saved_state64(regs
);
2200 case REG_RAX
: return regs64
->rax
;
2201 case REG_RCX
: return regs64
->rcx
;
2202 case REG_RDX
: return regs64
->rdx
;
2203 case REG_RBX
: return regs64
->rbx
;
2204 case REG_RSP
: return regs64
->isf
.rsp
;
2205 case REG_RBP
: return regs64
->rbp
;
2206 case REG_RSI
: return regs64
->rsi
;
2207 case REG_RDI
: return regs64
->rdi
;
2208 case REG_R8
: return regs64
->r8
;
2209 case REG_R9
: return regs64
->r9
;
2210 case REG_R10
: return regs64
->r10
;
2211 case REG_R11
: return regs64
->r11
;
2212 case REG_R12
: return regs64
->r12
;
2213 case REG_R13
: return regs64
->r13
;
2214 case REG_R14
: return regs64
->r14
;
2215 case REG_R15
: return regs64
->r15
;
2218 panic("dtrace: unhandled x86_64 getreg() constant");
2220 x86_saved_state32_t
*regs32
= saved_state32(regs
);
2223 case REG_RAX
: return regs32
->eax
;
2224 case REG_RCX
: return regs32
->ecx
;
2225 case REG_RDX
: return regs32
->edx
;
2226 case REG_RBX
: return regs32
->ebx
;
2227 case REG_RSP
: return regs32
->uesp
;
2228 case REG_RBP
: return regs32
->ebp
;
2229 case REG_RSI
: return regs32
->esi
;
2230 case REG_RDI
: return regs32
->edi
;
2233 panic("dtrace: unhandled i386 getreg() constant");