4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
28 * #pragma ident "@(#)fasttrap_isa.c 1.23 06/09/19 SMI"
33 #define _KERNEL /* Solaris vs. Darwin */
37 #include <sys/fasttrap_isa.h>
38 #include <sys/fasttrap_impl.h>
39 #include <sys/dtrace.h>
40 #include <sys/dtrace_impl.h>
41 extern dtrace_id_t dtrace_probeid_error
;
43 #include "fasttrap_regset.h"
45 #include <sys/dtrace_ptss.h>
46 #include <kern/debug.h>
48 #define proc_t struct proc
51 * Lossless User-Land Tracing on x86
52 * ---------------------------------
54 * The execution of most instructions is not dependent on the address; for
55 * these instructions it is sufficient to copy them into the user process's
56 * address space and execute them. To effectively single-step an instruction
57 * in user-land, we copy out the following sequence of instructions to scratch
58 * space in the user thread's ulwp_t structure.
60 * We then set the program counter (%eip or %rip) to point to this scratch
61 * space. Once execution resumes, the original instruction is executed and
62 * then control flow is redirected to what was originally the subsequent
63 * instruction. If the kernel attemps to deliver a signal while single-
64 * stepping, the signal is deferred and the program counter is moved into the
65 * second sequence of instructions. The second sequence ends in a trap into
66 * the kernel where the deferred signal is then properly handled and delivered.
68 * For instructions whose execute is position dependent, we perform simple
69 * emulation. These instructions are limited to control transfer
70 * instructions in 32-bit mode, but in 64-bit mode there's the added wrinkle
71 * of %rip-relative addressing that means that almost any instruction can be
72 * position dependent. For all the details on how we emulate generic
73 * instructions included %rip-relative instructions, see the code in
74 * fasttrap_pid_probe() below where we handle instructions of type
75 * FASTTRAP_T_COMMON (under the header: Generic Instruction Tracing).
78 #define FASTTRAP_MODRM_MOD(modrm) (((modrm) >> 6) & 0x3)
79 #define FASTTRAP_MODRM_REG(modrm) (((modrm) >> 3) & 0x7)
80 #define FASTTRAP_MODRM_RM(modrm) ((modrm) & 0x7)
81 #define FASTTRAP_MODRM(mod, reg, rm) (((mod) << 6) | ((reg) << 3) | (rm))
83 #define FASTTRAP_SIB_SCALE(sib) (((sib) >> 6) & 0x3)
84 #define FASTTRAP_SIB_INDEX(sib) (((sib) >> 3) & 0x7)
85 #define FASTTRAP_SIB_BASE(sib) ((sib) & 0x7)
87 #define FASTTRAP_REX_W(rex) (((rex) >> 3) & 1)
88 #define FASTTRAP_REX_R(rex) (((rex) >> 2) & 1)
89 #define FASTTRAP_REX_X(rex) (((rex) >> 1) & 1)
90 #define FASTTRAP_REX_B(rex) ((rex) & 1)
91 #define FASTTRAP_REX(w, r, x, b) \
92 (0x40 | ((w) << 3) | ((r) << 2) | ((x) << 1) | (b))
95 * Single-byte op-codes.
97 #define FASTTRAP_PUSHL_EBP 0x55
99 #define FASTTRAP_JO 0x70
100 #define FASTTRAP_JNO 0x71
101 #define FASTTRAP_JB 0x72
102 #define FASTTRAP_JAE 0x73
103 #define FASTTRAP_JE 0x74
104 #define FASTTRAP_JNE 0x75
105 #define FASTTRAP_JBE 0x76
106 #define FASTTRAP_JA 0x77
107 #define FASTTRAP_JS 0x78
108 #define FASTTRAP_JNS 0x79
109 #define FASTTRAP_JP 0x7a
110 #define FASTTRAP_JNP 0x7b
111 #define FASTTRAP_JL 0x7c
112 #define FASTTRAP_JGE 0x7d
113 #define FASTTRAP_JLE 0x7e
114 #define FASTTRAP_JG 0x7f
116 #define FASTTRAP_NOP 0x90
118 #define FASTTRAP_MOV_EAX 0xb8
119 #define FASTTRAP_MOV_ECX 0xb9
121 #define FASTTRAP_RET16 0xc2
122 #define FASTTRAP_RET 0xc3
124 #define FASTTRAP_LOOPNZ 0xe0
125 #define FASTTRAP_LOOPZ 0xe1
126 #define FASTTRAP_LOOP 0xe2
127 #define FASTTRAP_JCXZ 0xe3
129 #define FASTTRAP_CALL 0xe8
130 #define FASTTRAP_JMP32 0xe9
131 #define FASTTRAP_JMP8 0xeb
133 #define FASTTRAP_INT3 0xcc
134 #define FASTTRAP_INT 0xcd
135 #define T_DTRACE_RET 0x7f
137 #define FASTTRAP_2_BYTE_OP 0x0f
138 #define FASTTRAP_GROUP5_OP 0xff
141 * Two-byte op-codes (second byte only).
143 #define FASTTRAP_0F_JO 0x80
144 #define FASTTRAP_0F_JNO 0x81
145 #define FASTTRAP_0F_JB 0x82
146 #define FASTTRAP_0F_JAE 0x83
147 #define FASTTRAP_0F_JE 0x84
148 #define FASTTRAP_0F_JNE 0x85
149 #define FASTTRAP_0F_JBE 0x86
150 #define FASTTRAP_0F_JA 0x87
151 #define FASTTRAP_0F_JS 0x88
152 #define FASTTRAP_0F_JNS 0x89
153 #define FASTTRAP_0F_JP 0x8a
154 #define FASTTRAP_0F_JNP 0x8b
155 #define FASTTRAP_0F_JL 0x8c
156 #define FASTTRAP_0F_JGE 0x8d
157 #define FASTTRAP_0F_JLE 0x8e
158 #define FASTTRAP_0F_JG 0x8f
160 #define FASTTRAP_EFLAGS_OF 0x800
161 #define FASTTRAP_EFLAGS_DF 0x400
162 #define FASTTRAP_EFLAGS_SF 0x080
163 #define FASTTRAP_EFLAGS_ZF 0x040
164 #define FASTTRAP_EFLAGS_AF 0x010
165 #define FASTTRAP_EFLAGS_PF 0x004
166 #define FASTTRAP_EFLAGS_CF 0x001
169 * Instruction prefixes.
171 #define FASTTRAP_PREFIX_OPERAND 0x66
172 #define FASTTRAP_PREFIX_ADDRESS 0x67
173 #define FASTTRAP_PREFIX_CS 0x2E
174 #define FASTTRAP_PREFIX_DS 0x3E
175 #define FASTTRAP_PREFIX_ES 0x26
176 #define FASTTRAP_PREFIX_FS 0x64
177 #define FASTTRAP_PREFIX_GS 0x65
178 #define FASTTRAP_PREFIX_SS 0x36
179 #define FASTTRAP_PREFIX_LOCK 0xF0
180 #define FASTTRAP_PREFIX_REP 0xF3
181 #define FASTTRAP_PREFIX_REPNE 0xF2
183 #define FASTTRAP_NOREG 0xff
186 * Map between instruction register encodings and the kernel constants which
187 * correspond to indicies into struct regs.
191 * APPLE NOTE: We are cheating here. The regmap is used to decode which register
192 * a given instruction is trying to reference. OS X does not have extended registers
193 * for 32 bit apps, but the *order* is the same. So for 32 bit state, we will return:
200 * The fasttrap_getreg function knows how to make the correct transformation.
202 #if __sol64 || defined(__APPLE__)
203 static const uint8_t regmap
[16] = {
204 REG_RAX
, REG_RCX
, REG_RDX
, REG_RBX
, REG_RSP
, REG_RBP
, REG_RSI
, REG_RDI
,
205 REG_R8
, REG_R9
, REG_R10
, REG_R11
, REG_R12
, REG_R13
, REG_R14
, REG_R15
,
208 static const uint8_t regmap
[8] = {
209 EAX
, ECX
, EDX
, EBX
, UESP
, EBP
, ESI
, EDI
213 static user_addr_t
fasttrap_getreg(x86_saved_state_t
*, uint_t
);
216 fasttrap_anarg(x86_saved_state_t
*regs
, int function_entry
, int argno
)
219 int shift
= function_entry
? 1 : 0;
221 x86_saved_state64_t
*regs64
;
222 x86_saved_state32_t
*regs32
;
223 unsigned int p_model
;
225 if (is_saved_state64(regs
)) {
226 regs64
= saved_state64(regs
);
228 p_model
= DATAMODEL_LP64
;
231 regs32
= saved_state32(regs
);
232 p_model
= DATAMODEL_ILP32
;
235 if (p_model
== DATAMODEL_LP64
) {
239 * In 64-bit mode, the first six arguments are stored in
243 return ((®s64
->rdi
)[argno
]);
245 stack
= regs64
->isf
.rsp
+ sizeof(uint64_t) * (argno
- 6 + shift
);
246 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
247 value
= dtrace_fuword64(stack
);
248 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
| CPU_DTRACE_BADADDR
);
250 uint32_t *stack
= (uint32_t *)regs32
->uesp
;
251 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
252 value
= dtrace_fuword32((user_addr_t
)(unsigned long)&stack
[argno
+ shift
]);
253 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
| CPU_DTRACE_BADADDR
);
261 fasttrap_tracepoint_init(proc_t
*p
, fasttrap_tracepoint_t
*tp
, user_addr_t pc
,
262 fasttrap_probe_type_t type
)
265 uint8_t instr
[FASTTRAP_MAX_INSTR_SIZE
+ 10];
266 size_t len
= FASTTRAP_MAX_INSTR_SIZE
;
267 size_t first
= MIN(len
, PAGE_SIZE
- (pc
& PAGE_MASK
));
271 uint8_t seg
, rex
= 0;
272 unsigned int p_model
= (p
->p_flag
& P_LP64
) ? DATAMODEL_LP64
: DATAMODEL_ILP32
;
275 * Read the instruction at the given address out of the process's
276 * address space. We don't have to worry about a debugger
277 * changing this instruction before we overwrite it with our trap
278 * instruction since P_PR_LOCK is set. Since instructions can span
279 * pages, we potentially read the instruction in two parts. If the
280 * second part fails, we just zero out that part of the instruction.
283 * APPLE NOTE: Of course, we do not have a P_PR_LOCK, so this is racey...
285 if (uread(p
, &instr
[0], first
, pc
) != 0)
288 uread(p
, &instr
[first
], len
- first
, pc
+ first
) != 0) {
289 bzero(&instr
[first
], len
- first
);
294 * If the disassembly fails, then we have a malformed instruction.
296 if ((size
= dtrace_instr_size_isa(instr
, p_model
, &rmindex
)) <= 0)
300 * Make sure the disassembler isn't completely broken.
302 ASSERT(-1 <= rmindex
&& rmindex
< (int)size
);
305 * If the computed size is greater than the number of bytes read,
306 * then it was a malformed instruction possibly because it fell on a
307 * page boundary and the subsequent page was missing or because of
308 * some malicious user.
313 tp
->ftt_size
= (uint8_t)size
;
314 tp
->ftt_segment
= FASTTRAP_SEG_NONE
;
317 * Find the start of the instruction's opcode by processing any
322 switch (instr
[start
]) {
323 case FASTTRAP_PREFIX_SS
:
326 case FASTTRAP_PREFIX_GS
:
329 case FASTTRAP_PREFIX_FS
:
332 case FASTTRAP_PREFIX_ES
:
335 case FASTTRAP_PREFIX_DS
:
338 case FASTTRAP_PREFIX_CS
:
341 case FASTTRAP_PREFIX_OPERAND
:
342 case FASTTRAP_PREFIX_ADDRESS
:
343 case FASTTRAP_PREFIX_LOCK
:
344 case FASTTRAP_PREFIX_REP
:
345 case FASTTRAP_PREFIX_REPNE
:
348 * It's illegal for an instruction to specify
349 * two segment prefixes -- give up on this
350 * illegal instruction.
352 if (tp
->ftt_segment
!= FASTTRAP_SEG_NONE
)
355 tp
->ftt_segment
= seg
;
363 #if __sol64 || defined(__APPLE__)
365 * Identify the REX prefix on 64-bit processes.
367 if (p_model
== DATAMODEL_LP64
&& (instr
[start
] & 0xf0) == 0x40)
368 rex
= instr
[start
++];
372 * Now that we're pretty sure that the instruction is okay, copy the
373 * valid part to the tracepoint.
375 bcopy(instr
, tp
->ftt_instr
, FASTTRAP_MAX_INSTR_SIZE
);
377 tp
->ftt_type
= FASTTRAP_T_COMMON
;
378 if (instr
[start
] == FASTTRAP_2_BYTE_OP
) {
379 switch (instr
[start
+ 1]) {
381 case FASTTRAP_0F_JNO
:
383 case FASTTRAP_0F_JAE
:
385 case FASTTRAP_0F_JNE
:
386 case FASTTRAP_0F_JBE
:
389 case FASTTRAP_0F_JNS
:
391 case FASTTRAP_0F_JNP
:
393 case FASTTRAP_0F_JGE
:
394 case FASTTRAP_0F_JLE
:
396 tp
->ftt_type
= FASTTRAP_T_JCC
;
397 tp
->ftt_code
= (instr
[start
+ 1] & 0x0f) | FASTTRAP_JO
;
398 tp
->ftt_dest
= pc
+ tp
->ftt_size
+
399 *(int32_t *)&instr
[start
+ 2];
402 } else if (instr
[start
] == FASTTRAP_GROUP5_OP
) {
403 uint_t mod
= FASTTRAP_MODRM_MOD(instr
[start
+ 1]);
404 uint_t reg
= FASTTRAP_MODRM_REG(instr
[start
+ 1]);
405 uint_t rm
= FASTTRAP_MODRM_RM(instr
[start
+ 1]);
407 if (reg
== 2 || reg
== 4) {
411 tp
->ftt_type
= FASTTRAP_T_CALL
;
413 tp
->ftt_type
= FASTTRAP_T_JMP
;
420 ASSERT(p_model
== DATAMODEL_LP64
|| rex
== 0);
423 * See AMD x86-64 Architecture Programmer's Manual
424 * Volume 3, Section 1.2.7, Table 1-12, and
425 * Appendix A.3.1, Table A-15.
427 if (mod
!= 3 && rm
== 4) {
428 uint8_t sib
= instr
[start
+ 2];
429 uint_t index
= FASTTRAP_SIB_INDEX(sib
);
430 uint_t base
= FASTTRAP_SIB_BASE(sib
);
432 tp
->ftt_scale
= FASTTRAP_SIB_SCALE(sib
);
434 tp
->ftt_index
= (index
== 4) ?
436 regmap
[index
| (FASTTRAP_REX_X(rex
) << 3)];
437 tp
->ftt_base
= (mod
== 0 && base
== 5) ?
439 regmap
[base
| (FASTTRAP_REX_B(rex
) << 3)];
442 sz
= mod
== 1 ? 1 : 4;
445 * In 64-bit mode, mod == 0 and r/m == 5
446 * denotes %rip-relative addressing; in 32-bit
447 * mode, the base register isn't used. In both
448 * modes, there is a 32-bit operand.
450 if (mod
== 0 && rm
== 5) {
451 #if __sol64 || defined(__APPLE__)
452 if (p_model
== DATAMODEL_LP64
)
453 tp
->ftt_base
= REG_RIP
;
456 tp
->ftt_base
= FASTTRAP_NOREG
;
460 (FASTTRAP_REX_B(rex
) << 3);
462 tp
->ftt_base
= regmap
[base
];
463 sz
= mod
== 1 ? 1 : mod
== 2 ? 4 : 0;
465 tp
->ftt_index
= FASTTRAP_NOREG
;
470 tp
->ftt_dest
= *(int8_t *)&instr
[start
+ i
];
472 tp
->ftt_dest
= *(int32_t *)&instr
[start
+ i
];
477 switch (instr
[start
]) {
479 tp
->ftt_type
= FASTTRAP_T_RET
;
483 tp
->ftt_type
= FASTTRAP_T_RET16
;
484 tp
->ftt_dest
= *(uint16_t *)&instr
[start
+ 1];
503 tp
->ftt_type
= FASTTRAP_T_JCC
;
504 tp
->ftt_code
= instr
[start
];
505 tp
->ftt_dest
= pc
+ tp
->ftt_size
+
506 (int8_t)instr
[start
+ 1];
509 case FASTTRAP_LOOPNZ
:
512 tp
->ftt_type
= FASTTRAP_T_LOOP
;
513 tp
->ftt_code
= instr
[start
];
514 tp
->ftt_dest
= pc
+ tp
->ftt_size
+
515 (int8_t)instr
[start
+ 1];
519 tp
->ftt_type
= FASTTRAP_T_JCXZ
;
520 tp
->ftt_dest
= pc
+ tp
->ftt_size
+
521 (int8_t)instr
[start
+ 1];
525 tp
->ftt_type
= FASTTRAP_T_CALL
;
526 tp
->ftt_dest
= pc
+ tp
->ftt_size
+
527 *(int32_t *)&instr
[start
+ 1];
532 tp
->ftt_type
= FASTTRAP_T_JMP
;
533 tp
->ftt_dest
= pc
+ tp
->ftt_size
+
534 *(int32_t *)&instr
[start
+ 1];
537 tp
->ftt_type
= FASTTRAP_T_JMP
;
538 tp
->ftt_dest
= pc
+ tp
->ftt_size
+
539 (int8_t)instr
[start
+ 1];
542 case FASTTRAP_PUSHL_EBP
:
544 tp
->ftt_type
= FASTTRAP_T_PUSHL_EBP
;
548 #if __sol64 || defined(__APPLE__)
549 ASSERT(p_model
== DATAMODEL_LP64
|| rex
== 0);
552 * On sol64 we have to be careful not to confuse a nop
553 * (actually xchgl %eax, %eax) with an instruction using
554 * the same opcode, but that does something different
555 * (e.g. xchgl %r8d, %eax or xcghq %r8, %rax).
557 if (FASTTRAP_REX_B(rex
) == 0)
559 tp
->ftt_type
= FASTTRAP_T_NOP
;
564 * The pid provider shares the int3 trap with debugger
565 * breakpoints so we can't instrument them.
567 ASSERT(instr
[start
] == FASTTRAP_INSTR
);
572 * Interrupts seem like they could be traced with
573 * no negative implications, but it's possible that
574 * a thread could be redirected by the trap handling
575 * code which would eventually return to the
576 * instruction after the interrupt. If the interrupt
577 * were in our scratch space, the subsequent
578 * instruction might be overwritten before we return.
579 * Accordingly we refuse to instrument any interrupt.
585 #if __sol64 || defined(__APPLE__)
586 if (p_model
== DATAMODEL_LP64
&& tp
->ftt_type
== FASTTRAP_T_COMMON
) {
588 * If the process is 64-bit and the instruction type is still
589 * FASTTRAP_T_COMMON -- meaning we're going to copy it out an
590 * execute it -- we need to watch for %rip-relative
591 * addressing mode. See the portion of fasttrap_pid_probe()
592 * below where we handle tracepoints with type
593 * FASTTRAP_T_COMMON for how we emulate instructions that
594 * employ %rip-relative addressing.
597 uint_t mod
= FASTTRAP_MODRM_MOD(instr
[rmindex
]);
598 uint_t reg
= FASTTRAP_MODRM_REG(instr
[rmindex
]);
599 uint_t rm
= FASTTRAP_MODRM_RM(instr
[rmindex
]);
601 ASSERT(rmindex
> (int)start
);
603 if (mod
== 0 && rm
== 5) {
605 * We need to be sure to avoid other
606 * registers used by this instruction. While
607 * the reg field may determine the op code
608 * rather than denoting a register, assuming
609 * that it denotes a register is always safe.
610 * We leave the REX field intact and use
611 * whatever value's there for simplicity.
614 tp
->ftt_ripmode
= FASTTRAP_RIP_1
|
616 FASTTRAP_REX_B(rex
));
619 tp
->ftt_ripmode
= FASTTRAP_RIP_2
|
621 FASTTRAP_REX_B(rex
));
625 tp
->ftt_modrm
= tp
->ftt_instr
[rmindex
];
626 tp
->ftt_instr
[rmindex
] =
627 FASTTRAP_MODRM(2, reg
, rm
);
637 fasttrap_tracepoint_install(proc_t
*p
, fasttrap_tracepoint_t
*tp
)
639 fasttrap_instr_t instr
= FASTTRAP_INSTR
;
641 if (uwrite(p
, &instr
, 1, tp
->ftt_pc
) != 0)
648 fasttrap_tracepoint_remove(proc_t
*p
, fasttrap_tracepoint_t
*tp
)
653 * Distinguish between read or write failures and a changed
656 if (uread(p
, &instr
, 1, tp
->ftt_pc
) != 0)
658 if (instr
!= FASTTRAP_INSTR
)
660 if (uwrite(p
, &tp
->ftt_instr
[0], 1, tp
->ftt_pc
) != 0)
667 fasttrap_return_common(x86_saved_state_t
*regs
, user_addr_t pc
, pid_t pid
,
670 x86_saved_state64_t
*regs64
;
671 x86_saved_state32_t
*regs32
;
672 unsigned int p_model
;
674 if (is_saved_state64(regs
)) {
675 regs64
= saved_state64(regs
);
677 p_model
= DATAMODEL_LP64
;
680 regs32
= saved_state32(regs
);
681 p_model
= DATAMODEL_ILP32
;
684 fasttrap_tracepoint_t
*tp
;
685 fasttrap_bucket_t
*bucket
;
689 pid_mtx
= &cpu_core
[CPU
->cpu_id
].cpuc_pid_lock
;
690 lck_mtx_lock(pid_mtx
);
691 bucket
= &fasttrap_tpoints
.fth_table
[FASTTRAP_TPOINTS_INDEX(pid
, pc
)];
693 for (tp
= bucket
->ftb_data
; tp
!= NULL
; tp
= tp
->ftt_next
) {
694 if (pid
== tp
->ftt_pid
&& pc
== tp
->ftt_pc
&&
695 !tp
->ftt_proc
->ftpc_defunct
)
700 * Don't sweat it if we can't find the tracepoint again; unlike
701 * when we're in fasttrap_pid_probe(), finding the tracepoint here
702 * is not essential to the correct execution of the process.
705 lck_mtx_unlock(pid_mtx
);
709 for (id
= tp
->ftt_retids
; id
!= NULL
; id
= id
->fti_next
) {
711 * If there's a branch that could act as a return site, we
712 * need to trace it, and check here if the program counter is
713 * external to the function.
715 if (tp
->ftt_type
!= FASTTRAP_T_RET
&&
716 tp
->ftt_type
!= FASTTRAP_T_RET16
&&
717 new_pc
- id
->fti_probe
->ftp_faddr
<
718 id
->fti_probe
->ftp_fsize
)
721 if (ISSET(current_proc()->p_lflag
, P_LNOATTACH
)) {
722 dtrace_probe(dtrace_probeid_error
, 0 /* state */, id
->fti_probe
->ftp_id
,
723 1 /* ndx */, -1 /* offset */, DTRACEFLT_UPRIV
);
724 } else if (p_model
== DATAMODEL_LP64
) {
725 dtrace_probe(id
->fti_probe
->ftp_id
,
726 pc
- id
->fti_probe
->ftp_faddr
,
727 regs64
->rax
, regs64
->rdx
, 0, 0);
729 dtrace_probe(id
->fti_probe
->ftp_id
,
730 pc
- id
->fti_probe
->ftp_faddr
,
731 regs32
->eax
, regs32
->edx
, 0, 0);
735 lck_mtx_unlock(pid_mtx
);
739 fasttrap_sigsegv(proc_t
*p
, uthread_t t
, user_addr_t addr
)
743 /* Set fault address and mark signal */
745 t
->uu_siglist
|= sigmask(SIGSEGV
);
748 * XXX These two line may be redundant; if not, then we need
749 * XXX to potentially set the data address in the machine
750 * XXX specific thread state structure to indicate the address.
752 t
->uu_exception
= KERN_INVALID_ADDRESS
; /* SIGSEGV */
753 t
->uu_subcode
= 0; /* XXX pad */
758 signal_setast(t
->uu_context
.vc_thread
);
762 fasttrap_usdt_args64(fasttrap_probe_t
*probe
, x86_saved_state64_t
*regs64
, int argc
,
765 int i
, x
, cap
= MIN(argc
, probe
->ftp_nargs
);
766 user_addr_t stack
= (user_addr_t
)regs64
->isf
.rsp
;
768 for (i
= 0; i
< cap
; i
++) {
769 x
= probe
->ftp_argmap
[i
];
772 /* FIXME! This may be broken, needs testing */
773 argv
[i
] = (®s64
->rdi
)[x
];
775 fasttrap_fuword64_noerr(stack
+ (x
* sizeof(uint64_t)), &argv
[i
]);
779 for (; i
< argc
; i
++) {
785 fasttrap_usdt_args32(fasttrap_probe_t
*probe
, x86_saved_state32_t
*regs32
, int argc
,
788 int i
, x
, cap
= MIN(argc
, probe
->ftp_nargs
);
789 uint32_t *stack
= (uint32_t *)regs32
->uesp
;
791 for (i
= 0; i
< cap
; i
++) {
792 x
= probe
->ftp_argmap
[i
];
794 fasttrap_fuword32_noerr((user_addr_t
)(unsigned long)&stack
[x
], &argv
[i
]);
797 for (; i
< argc
; i
++) {
806 fasttrap_do_seg(fasttrap_tracepoint_t
*tp
, x86_saved_state_t
*rp
, user_addr_t
*addr
) // 64 bit
808 #pragma unused(tp, rp, addr)
809 printf("fasttrap_do_seg() called while unimplemented.\n");
813 uint16_t sel
, ndx
, type
;
816 switch (tp
->ftt_segment
) {
817 case FASTTRAP_SEG_CS
:
820 case FASTTRAP_SEG_DS
:
823 case FASTTRAP_SEG_ES
:
826 case FASTTRAP_SEG_FS
:
829 case FASTTRAP_SEG_GS
:
832 case FASTTRAP_SEG_SS
:
838 * Make sure the given segment register specifies a user priority
839 * selector rather than a kernel selector.
847 * Check the bounds and grab the descriptor out of the specified
851 if (ndx
> p
->p_ldtlimit
)
854 desc
= p
->p_ldt
+ ndx
;
860 desc
= cpu_get_gdt() + ndx
;
864 * The descriptor must have user privilege level and it must be
867 if (desc
->usd_dpl
!= SEL_UPL
|| desc
->usd_p
!= 1)
870 type
= desc
->usd_type
;
873 * If the S bit in the type field is not set, this descriptor can
874 * only be used in system context.
876 if ((type
& 0x10) != 0x10)
879 limit
= USEGD_GETLIMIT(desc
) * (desc
->usd_gran
? PAGESIZE
: 1);
881 if (tp
->ftt_segment
== FASTTRAP_SEG_CS
) {
883 * The code/data bit and readable bit must both be set.
885 if ((type
& 0xa) != 0xa)
892 * The code/data bit must be clear.
894 if ((type
& 0x8) != 0)
898 * If the expand-down bit is clear, we just check the limit as
899 * it would naturally be applied. Otherwise, we need to check
900 * that the address is the range [limit + 1 .. 0xffff] or
901 * [limit + 1 ... 0xffffffff] depending on if the default
902 * operand size bit is set.
904 if ((type
& 0x4) == 0) {
907 } else if (desc
->usd_def32
) {
908 if (*addr
< limit
+ 1 || 0xffff < *addr
)
911 if (*addr
< limit
+ 1 || 0xffffffff < *addr
)
916 *addr
+= USEGD_GETBASE(desc
);
922 * Due to variances between Solaris and xnu, I have split this into a 32 bit and 64 bit
923 * code path. It still takes an x86_saved_state_t* argument, because it must sometimes
924 * call other methods that require a x86_saved_state_t.
928 * Any changes made to this method must be echo'd in fasttrap_pid_probe64!
932 fasttrap_pid_probe32(x86_saved_state_t
*regs
)
934 ASSERT(is_saved_state32(regs
));
936 x86_saved_state32_t
*regs32
= saved_state32(regs
);
937 user_addr_t pc
= regs32
->eip
- 1;
938 proc_t
*p
= current_proc();
939 user_addr_t new_pc
= 0;
940 fasttrap_bucket_t
*bucket
;
942 fasttrap_tracepoint_t
*tp
, tp_local
;
944 dtrace_icookie_t cookie
;
945 uint_t is_enabled
= 0;
947 uthread_t uthread
= (uthread_t
)get_bsdthread_info(current_thread());
950 * It's possible that a user (in a veritable orgy of bad planning)
951 * could redirect this thread's flow of control before it reached the
952 * return probe fasttrap. In this case we need to kill the process
953 * since it's in a unrecoverable state.
955 if (uthread
->t_dtrace_step
) {
956 ASSERT(uthread
->t_dtrace_on
);
957 fasttrap_sigtrap(p
, uthread
, pc
);
962 * Clear all user tracing flags.
964 uthread
->t_dtrace_ft
= 0;
965 uthread
->t_dtrace_pc
= 0;
966 uthread
->t_dtrace_npc
= 0;
967 uthread
->t_dtrace_scrpc
= 0;
968 uthread
->t_dtrace_astpc
= 0;
971 * Treat a child created by a call to vfork(2) as if it were its
972 * parent. We know that there's only one thread of control in such a
976 * APPLE NOTE: Terry says: "You need to hold the process locks (currently: kernel funnel) for this traversal"
977 * FIXME: How do we assert this?
979 while (p
->p_lflag
& P_LINVFORK
)
983 pid_mtx
= &cpu_core
[CPU
->cpu_id
].cpuc_pid_lock
;
984 lck_mtx_lock(pid_mtx
);
985 bucket
= &fasttrap_tpoints
.fth_table
[FASTTRAP_TPOINTS_INDEX(pid
, pc
)];
988 * Lookup the tracepoint that the process just hit.
990 for (tp
= bucket
->ftb_data
; tp
!= NULL
; tp
= tp
->ftt_next
) {
991 if (pid
== tp
->ftt_pid
&& pc
== tp
->ftt_pc
&&
992 !tp
->ftt_proc
->ftpc_defunct
)
997 * If we couldn't find a matching tracepoint, either a tracepoint has
998 * been inserted without using the pid<pid> ioctl interface (see
999 * fasttrap_ioctl), or somehow we have mislaid this tracepoint.
1002 lck_mtx_unlock(pid_mtx
);
1007 * Set the program counter to the address of the traced instruction
1008 * so that it looks right in ustack() output.
1012 if (tp
->ftt_ids
!= NULL
) {
1015 uint32_t s0
, s1
, s2
, s3
, s4
, s5
;
1016 uint32_t *stack
= (uint32_t *)regs32
->uesp
;
1019 * In 32-bit mode, all arguments are passed on the
1020 * stack. If this is a function entry probe, we need
1021 * to skip the first entry on the stack as it
1022 * represents the return address rather than a
1023 * parameter to the function.
1025 fasttrap_fuword32_noerr((user_addr_t
)(unsigned long)&stack
[0], &s0
);
1026 fasttrap_fuword32_noerr((user_addr_t
)(unsigned long)&stack
[1], &s1
);
1027 fasttrap_fuword32_noerr((user_addr_t
)(unsigned long)&stack
[2], &s2
);
1028 fasttrap_fuword32_noerr((user_addr_t
)(unsigned long)&stack
[3], &s3
);
1029 fasttrap_fuword32_noerr((user_addr_t
)(unsigned long)&stack
[4], &s4
);
1030 fasttrap_fuword32_noerr((user_addr_t
)(unsigned long)&stack
[5], &s5
);
1032 for (id
= tp
->ftt_ids
; id
!= NULL
; id
= id
->fti_next
) {
1033 fasttrap_probe_t
*probe
= id
->fti_probe
;
1035 if (ISSET(current_proc()->p_lflag
, P_LNOATTACH
)) {
1036 dtrace_probe(dtrace_probeid_error
, 0 /* state */, probe
->ftp_id
,
1037 1 /* ndx */, -1 /* offset */, DTRACEFLT_UPRIV
);
1038 } else if (id
->fti_ptype
== DTFTP_ENTRY
) {
1040 * We note that this was an entry
1041 * probe to help ustack() find the
1044 cookie
= dtrace_interrupt_disable();
1045 DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY
);
1046 dtrace_probe(probe
->ftp_id
, s1
, s2
,
1048 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY
);
1049 dtrace_interrupt_enable(cookie
);
1050 } else if (id
->fti_ptype
== DTFTP_IS_ENABLED
) {
1052 * Note that in this case, we don't
1053 * call dtrace_probe() since it's only
1054 * an artificial probe meant to change
1055 * the flow of control so that it
1056 * encounters the true probe.
1059 } else if (probe
->ftp_argmap
== NULL
) {
1060 dtrace_probe(probe
->ftp_id
, s0
, s1
,
1065 fasttrap_usdt_args32(probe
, regs32
,
1066 sizeof (t
) / sizeof (t
[0]), t
);
1068 dtrace_probe(probe
->ftp_id
, t
[0], t
[1],
1072 /* APPLE NOTE: Oneshot probes get one and only one chance... */
1073 if (probe
->ftp_prov
->ftp_provider_type
== DTFTP_PROVIDER_ONESHOT
) {
1074 fasttrap_tracepoint_remove(p
, tp
);
1080 * We're about to do a bunch of work so we cache a local copy of
1081 * the tracepoint to emulate the instruction, and then find the
1082 * tracepoint again later if we need to light up any return probes.
1085 lck_mtx_unlock(pid_mtx
);
1089 * Set the program counter to appear as though the traced instruction
1090 * had completely executed. This ensures that fasttrap_getreg() will
1091 * report the expected value for REG_RIP.
1093 regs32
->eip
= pc
+ tp
->ftt_size
;
1096 * If there's an is-enabled probe connected to this tracepoint it
1097 * means that there was a 'xorl %eax, %eax' or 'xorq %rax, %rax'
1098 * instruction that was placed there by DTrace when the binary was
1099 * linked. As this probe is, in fact, enabled, we need to stuff 1
1100 * into %eax or %rax. Accordingly, we can bypass all the instruction
1101 * emulation logic since we know the inevitable result. It's possible
1102 * that a user could construct a scenario where the 'is-enabled'
1103 * probe was on some other instruction, but that would be a rather
1104 * exotic way to shoot oneself in the foot.
1108 new_pc
= regs32
->eip
;
1113 * We emulate certain types of instructions to ensure correctness
1114 * (in the case of position dependent instructions) or optimize
1115 * common cases. The rest we have the thread execute back in user-
1118 switch (tp
->ftt_type
) {
1119 case FASTTRAP_T_RET
:
1120 case FASTTRAP_T_RET16
:
1127 * We have to emulate _every_ facet of the behavior of a ret
1128 * instruction including what happens if the load from %esp
1129 * fails; in that case, we send a SIGSEGV.
1132 ret
= fasttrap_fuword32((user_addr_t
)regs32
->uesp
, &dst32
);
1134 addr
= regs32
->uesp
+ sizeof (uint32_t);
1137 fasttrap_sigsegv(p
, uthread
, (user_addr_t
)regs32
->uesp
);
1142 if (tp
->ftt_type
== FASTTRAP_T_RET16
)
1143 addr
+= tp
->ftt_dest
;
1145 regs32
->uesp
= addr
;
1150 case FASTTRAP_T_JCC
:
1154 switch (tp
->ftt_code
) {
1156 taken
= (regs32
->efl
& FASTTRAP_EFLAGS_OF
) != 0;
1159 taken
= (regs32
->efl
& FASTTRAP_EFLAGS_OF
) == 0;
1162 taken
= (regs32
->efl
& FASTTRAP_EFLAGS_CF
) != 0;
1165 taken
= (regs32
->efl
& FASTTRAP_EFLAGS_CF
) == 0;
1168 taken
= (regs32
->efl
& FASTTRAP_EFLAGS_ZF
) != 0;
1171 taken
= (regs32
->efl
& FASTTRAP_EFLAGS_ZF
) == 0;
1174 taken
= (regs32
->efl
& FASTTRAP_EFLAGS_CF
) != 0 ||
1175 (regs32
->efl
& FASTTRAP_EFLAGS_ZF
) != 0;
1178 taken
= (regs32
->efl
& FASTTRAP_EFLAGS_CF
) == 0 &&
1179 (regs32
->efl
& FASTTRAP_EFLAGS_ZF
) == 0;
1182 taken
= (regs32
->efl
& FASTTRAP_EFLAGS_SF
) != 0;
1185 taken
= (regs32
->efl
& FASTTRAP_EFLAGS_SF
) == 0;
1188 taken
= (regs32
->efl
& FASTTRAP_EFLAGS_PF
) != 0;
1191 taken
= (regs32
->efl
& FASTTRAP_EFLAGS_PF
) == 0;
1194 taken
= ((regs32
->efl
& FASTTRAP_EFLAGS_SF
) == 0) !=
1195 ((regs32
->efl
& FASTTRAP_EFLAGS_OF
) == 0);
1198 taken
= ((regs32
->efl
& FASTTRAP_EFLAGS_SF
) == 0) ==
1199 ((regs32
->efl
& FASTTRAP_EFLAGS_OF
) == 0);
1202 taken
= (regs32
->efl
& FASTTRAP_EFLAGS_ZF
) != 0 ||
1203 ((regs32
->efl
& FASTTRAP_EFLAGS_SF
) == 0) !=
1204 ((regs32
->efl
& FASTTRAP_EFLAGS_OF
) == 0);
1207 taken
= (regs32
->efl
& FASTTRAP_EFLAGS_ZF
) == 0 &&
1208 ((regs32
->efl
& FASTTRAP_EFLAGS_SF
) == 0) ==
1209 ((regs32
->efl
& FASTTRAP_EFLAGS_OF
) == 0);
1216 new_pc
= tp
->ftt_dest
;
1218 new_pc
= pc
+ tp
->ftt_size
;
1222 case FASTTRAP_T_LOOP
:
1225 greg_t cx
= regs32
->ecx
--;
1227 switch (tp
->ftt_code
) {
1228 case FASTTRAP_LOOPNZ
:
1229 taken
= (regs32
->efl
& FASTTRAP_EFLAGS_ZF
) == 0 &&
1232 case FASTTRAP_LOOPZ
:
1233 taken
= (regs32
->efl
& FASTTRAP_EFLAGS_ZF
) != 0 &&
1244 new_pc
= tp
->ftt_dest
;
1246 new_pc
= pc
+ tp
->ftt_size
;
1250 case FASTTRAP_T_JCXZ
:
1252 greg_t cx
= regs32
->ecx
;
1255 new_pc
= tp
->ftt_dest
;
1257 new_pc
= pc
+ tp
->ftt_size
;
1261 case FASTTRAP_T_PUSHL_EBP
:
1263 user_addr_t addr
= regs32
->uesp
- sizeof (uint32_t);
1264 int ret
= fasttrap_suword32(addr
, (uint32_t)regs32
->ebp
);
1267 fasttrap_sigsegv(p
, uthread
, addr
);
1272 regs32
->uesp
= addr
;
1273 new_pc
= pc
+ tp
->ftt_size
;
1277 case FASTTRAP_T_NOP
:
1278 new_pc
= pc
+ tp
->ftt_size
;
1281 case FASTTRAP_T_JMP
:
1282 case FASTTRAP_T_CALL
:
1283 if (tp
->ftt_code
== 0) {
1284 new_pc
= tp
->ftt_dest
;
1286 user_addr_t
/* value ,*/ addr
= tp
->ftt_dest
;
1288 if (tp
->ftt_base
!= FASTTRAP_NOREG
)
1289 addr
+= fasttrap_getreg(regs
, tp
->ftt_base
);
1290 if (tp
->ftt_index
!= FASTTRAP_NOREG
)
1291 addr
+= fasttrap_getreg(regs
, tp
->ftt_index
) <<
1294 if (tp
->ftt_code
== 1) {
1296 * If there's a segment prefix for this
1297 * instruction, we'll need to check permissions
1298 * and bounds on the given selector, and adjust
1299 * the address accordingly.
1301 if (tp
->ftt_segment
!= FASTTRAP_SEG_NONE
&&
1302 fasttrap_do_seg(tp
, regs
, &addr
) != 0) {
1303 fasttrap_sigsegv(p
, uthread
, addr
);
1309 addr
= (user_addr_t
)(uint32_t)addr
;
1310 if (fasttrap_fuword32(addr
, &value32
) == -1) {
1311 fasttrap_sigsegv(p
, uthread
, addr
);
1322 * If this is a call instruction, we need to push the return
1323 * address onto the stack. If this fails, we send the process
1324 * a SIGSEGV and reset the pc to emulate what would happen if
1325 * this instruction weren't traced.
1327 if (tp
->ftt_type
== FASTTRAP_T_CALL
) {
1328 user_addr_t addr
= regs32
->uesp
- sizeof (uint32_t);
1329 int ret
= fasttrap_suword32(addr
, (uint32_t)(pc
+ tp
->ftt_size
));
1332 fasttrap_sigsegv(p
, uthread
, addr
);
1337 regs32
->uesp
= addr
;
1341 case FASTTRAP_T_COMMON
:
1344 uint8_t scratch
[2 * FASTTRAP_MAX_INSTR_SIZE
+ 5 + 2];
1348 * Generic Instruction Tracing
1349 * ---------------------------
1351 * This is the layout of the scratch space in the user-land
1352 * thread structure for our generated instructions.
1355 * ------------------------ -----
1356 * a: <original instruction> <= 15
1357 * jmp <pc + tp->ftt_size> 5
1358 * b: <original instrction> <= 15
1359 * int T_DTRACE_RET 2
1364 * ------------------------ -----
1365 * a: <original instruction> <= 15
1367 * <pc + tp->ftt_size> 8
1368 * b: <original instruction> <= 15
1369 * int T_DTRACE_RET 2
1373 * The %pc is set to a, and curthread->t_dtrace_astpc is set
1374 * to b. If we encounter a signal on the way out of the
1375 * kernel, trap() will set %pc to curthread->t_dtrace_astpc
1376 * so that we execute the original instruction and re-enter
1377 * the kernel rather than redirecting to the next instruction.
1379 * If there are return probes (so we know that we're going to
1380 * need to reenter the kernel after executing the original
1381 * instruction), the scratch space will just contain the
1382 * original instruction followed by an interrupt -- the same
1386 addr
= uthread
->t_dtrace_scratch
->addr
;
1389 fasttrap_sigtrap(p
, uthread
, pc
); // Should be killing target proc
1394 ASSERT(tp
->ftt_size
< FASTTRAP_MAX_INSTR_SIZE
);
1396 uthread
->t_dtrace_scrpc
= addr
;
1397 bcopy(tp
->ftt_instr
, &scratch
[i
], tp
->ftt_size
);
1401 * Set up the jmp to the next instruction; note that
1402 * the size of the traced instruction cancels out.
1404 scratch
[i
++] = FASTTRAP_JMP32
;
1405 *(uint32_t *)&scratch
[i
] = pc
- addr
- 5;
1406 i
+= sizeof (uint32_t);
1408 uthread
->t_dtrace_astpc
= addr
+ i
;
1409 bcopy(tp
->ftt_instr
, &scratch
[i
], tp
->ftt_size
);
1411 scratch
[i
++] = FASTTRAP_INT
;
1412 scratch
[i
++] = T_DTRACE_RET
;
1414 if (fasttrap_copyout(scratch
, addr
, i
)) {
1415 fasttrap_sigtrap(p
, uthread
, pc
);
1420 if (tp
->ftt_retids
!= NULL
) {
1421 uthread
->t_dtrace_step
= 1;
1422 uthread
->t_dtrace_ret
= 1;
1423 new_pc
= uthread
->t_dtrace_astpc
;
1425 new_pc
= uthread
->t_dtrace_scrpc
;
1428 uthread
->t_dtrace_pc
= pc
;
1429 uthread
->t_dtrace_npc
= pc
+ tp
->ftt_size
;
1430 uthread
->t_dtrace_on
= 1;
1435 panic("fasttrap: mishandled an instruction");
1442 * We're setting this earlier than Solaris does, to get a "correct"
1443 * ustack() output. In the Sun code, a() -> b() -> c() -> d() is
1444 * reported at: d, b, a. The new way gives c, b, a, which is closer
1445 * to correct, as the return instruction has already exectued.
1447 regs32
->eip
= new_pc
;
1450 * If there were no return probes when we first found the tracepoint,
1451 * we should feel no obligation to honor any return probes that were
1452 * subsequently enabled -- they'll just have to wait until the next
1455 if (tp
->ftt_retids
!= NULL
) {
1457 * We need to wait until the results of the instruction are
1458 * apparent before invoking any return probes. If this
1459 * instruction was emulated we can just call
1460 * fasttrap_return_common(); if it needs to be executed, we
1461 * need to wait until the user thread returns to the kernel.
1463 if (tp
->ftt_type
!= FASTTRAP_T_COMMON
) {
1464 fasttrap_return_common(regs
, pc
, pid
, new_pc
);
1466 ASSERT(uthread
->t_dtrace_ret
!= 0);
1467 ASSERT(uthread
->t_dtrace_pc
== pc
);
1468 ASSERT(uthread
->t_dtrace_scrpc
!= 0);
1469 ASSERT(new_pc
== uthread
->t_dtrace_astpc
);
1477 * Due to variances between Solaris and xnu, I have split this into a 32 bit and 64 bit
1478 * code path. It still takes an x86_saved_state_t* argument, because it must sometimes
1479 * call other methods that require a x86_saved_state_t.
1483 * Any changes made to this method must be echo'd in fasttrap_pid_probe32!
1487 fasttrap_pid_probe64(x86_saved_state_t
*regs
)
1489 ASSERT(is_saved_state64(regs
));
1491 x86_saved_state64_t
*regs64
= saved_state64(regs
);
1492 user_addr_t pc
= regs64
->isf
.rip
- 1;
1493 proc_t
*p
= current_proc();
1494 user_addr_t new_pc
= 0;
1495 fasttrap_bucket_t
*bucket
;
1497 fasttrap_tracepoint_t
*tp
, tp_local
;
1499 dtrace_icookie_t cookie
;
1500 uint_t is_enabled
= 0;
1502 uthread_t uthread
= (uthread_t
)get_bsdthread_info(current_thread());
1505 * It's possible that a user (in a veritable orgy of bad planning)
1506 * could redirect this thread's flow of control before it reached the
1507 * return probe fasttrap. In this case we need to kill the process
1508 * since it's in a unrecoverable state.
1510 if (uthread
->t_dtrace_step
) {
1511 ASSERT(uthread
->t_dtrace_on
);
1512 fasttrap_sigtrap(p
, uthread
, pc
);
1517 * Clear all user tracing flags.
1519 uthread
->t_dtrace_ft
= 0;
1520 uthread
->t_dtrace_pc
= 0;
1521 uthread
->t_dtrace_npc
= 0;
1522 uthread
->t_dtrace_scrpc
= 0;
1523 uthread
->t_dtrace_astpc
= 0;
1524 uthread
->t_dtrace_regv
= 0;
1527 * Treat a child created by a call to vfork(2) as if it were its
1528 * parent. We know that there's only one thread of control in such a
1529 * process: this one.
1532 * APPLE NOTE: Terry says: "You need to hold the process locks (currently: kernel funnel) for this traversal"
1533 * FIXME: How do we assert this?
1535 while (p
->p_lflag
& P_LINVFORK
)
1539 pid_mtx
= &cpu_core
[CPU
->cpu_id
].cpuc_pid_lock
;
1540 lck_mtx_lock(pid_mtx
);
1541 bucket
= &fasttrap_tpoints
.fth_table
[FASTTRAP_TPOINTS_INDEX(pid
, pc
)];
1544 * Lookup the tracepoint that the process just hit.
1546 for (tp
= bucket
->ftb_data
; tp
!= NULL
; tp
= tp
->ftt_next
) {
1547 if (pid
== tp
->ftt_pid
&& pc
== tp
->ftt_pc
&&
1548 !tp
->ftt_proc
->ftpc_defunct
)
1553 * If we couldn't find a matching tracepoint, either a tracepoint has
1554 * been inserted without using the pid<pid> ioctl interface (see
1555 * fasttrap_ioctl), or somehow we have mislaid this tracepoint.
1558 lck_mtx_unlock(pid_mtx
);
1563 * Set the program counter to the address of the traced instruction
1564 * so that it looks right in ustack() output.
1566 regs64
->isf
.rip
= pc
;
1568 if (tp
->ftt_ids
!= NULL
) {
1571 for (id
= tp
->ftt_ids
; id
!= NULL
; id
= id
->fti_next
) {
1572 fasttrap_probe_t
*probe
= id
->fti_probe
;
1574 if (ISSET(current_proc()->p_lflag
, P_LNOATTACH
)) {
1575 dtrace_probe(dtrace_probeid_error
, 0 /* state */, probe
->ftp_id
,
1576 1 /* ndx */, -1 /* offset */, DTRACEFLT_UPRIV
);
1577 } else if (id
->fti_ptype
== DTFTP_ENTRY
) {
1579 * We note that this was an entry
1580 * probe to help ustack() find the
1583 cookie
= dtrace_interrupt_disable();
1584 DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY
);
1585 dtrace_probe(probe
->ftp_id
, regs64
->rdi
,
1586 regs64
->rsi
, regs64
->rdx
, regs64
->rcx
,
1588 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY
);
1589 dtrace_interrupt_enable(cookie
);
1590 } else if (id
->fti_ptype
== DTFTP_IS_ENABLED
) {
1592 * Note that in this case, we don't
1593 * call dtrace_probe() since it's only
1594 * an artificial probe meant to change
1595 * the flow of control so that it
1596 * encounters the true probe.
1599 } else if (probe
->ftp_argmap
== NULL
) {
1600 dtrace_probe(probe
->ftp_id
, regs64
->rdi
,
1601 regs64
->rsi
, regs64
->rdx
, regs64
->rcx
,
1606 fasttrap_usdt_args64(probe
, regs64
,
1607 sizeof (t
) / sizeof (t
[0]), t
);
1609 dtrace_probe(probe
->ftp_id
, t
[0], t
[1],
1613 /* APPLE NOTE: Oneshot probes get one and only one chance... */
1614 if (probe
->ftp_prov
->ftp_provider_type
== DTFTP_PROVIDER_ONESHOT
) {
1615 fasttrap_tracepoint_remove(p
, tp
);
1621 * We're about to do a bunch of work so we cache a local copy of
1622 * the tracepoint to emulate the instruction, and then find the
1623 * tracepoint again later if we need to light up any return probes.
1626 lck_mtx_unlock(pid_mtx
);
1630 * Set the program counter to appear as though the traced instruction
1631 * had completely executed. This ensures that fasttrap_getreg() will
1632 * report the expected value for REG_RIP.
1634 regs64
->isf
.rip
= pc
+ tp
->ftt_size
;
1637 * If there's an is-enabled probe connected to this tracepoint it
1638 * means that there was a 'xorl %eax, %eax' or 'xorq %rax, %rax'
1639 * instruction that was placed there by DTrace when the binary was
1640 * linked. As this probe is, in fact, enabled, we need to stuff 1
1641 * into %eax or %rax. Accordingly, we can bypass all the instruction
1642 * emulation logic since we know the inevitable result. It's possible
1643 * that a user could construct a scenario where the 'is-enabled'
1644 * probe was on some other instruction, but that would be a rather
1645 * exotic way to shoot oneself in the foot.
1649 new_pc
= regs64
->isf
.rip
;
1654 * We emulate certain types of instructions to ensure correctness
1655 * (in the case of position dependent instructions) or optimize
1656 * common cases. The rest we have the thread execute back in user-
1659 switch (tp
->ftt_type
) {
1660 case FASTTRAP_T_RET
:
1661 case FASTTRAP_T_RET16
:
1668 * We have to emulate _every_ facet of the behavior of a ret
1669 * instruction including what happens if the load from %esp
1670 * fails; in that case, we send a SIGSEGV.
1672 ret
= fasttrap_fuword64((user_addr_t
)regs64
->isf
.rsp
, &dst
);
1673 addr
= regs64
->isf
.rsp
+ sizeof (uint64_t);
1676 fasttrap_sigsegv(p
, uthread
, (user_addr_t
)regs64
->isf
.rsp
);
1681 if (tp
->ftt_type
== FASTTRAP_T_RET16
)
1682 addr
+= tp
->ftt_dest
;
1684 regs64
->isf
.rsp
= addr
;
1689 case FASTTRAP_T_JCC
:
1693 switch (tp
->ftt_code
) {
1695 taken
= (regs64
->isf
.rflags
& FASTTRAP_EFLAGS_OF
) != 0;
1698 taken
= (regs64
->isf
.rflags
& FASTTRAP_EFLAGS_OF
) == 0;
1701 taken
= (regs64
->isf
.rflags
& FASTTRAP_EFLAGS_CF
) != 0;
1704 taken
= (regs64
->isf
.rflags
& FASTTRAP_EFLAGS_CF
) == 0;
1707 taken
= (regs64
->isf
.rflags
& FASTTRAP_EFLAGS_ZF
) != 0;
1710 taken
= (regs64
->isf
.rflags
& FASTTRAP_EFLAGS_ZF
) == 0;
1713 taken
= (regs64
->isf
.rflags
& FASTTRAP_EFLAGS_CF
) != 0 ||
1714 (regs64
->isf
.rflags
& FASTTRAP_EFLAGS_ZF
) != 0;
1717 taken
= (regs64
->isf
.rflags
& FASTTRAP_EFLAGS_CF
) == 0 &&
1718 (regs64
->isf
.rflags
& FASTTRAP_EFLAGS_ZF
) == 0;
1721 taken
= (regs64
->isf
.rflags
& FASTTRAP_EFLAGS_SF
) != 0;
1724 taken
= (regs64
->isf
.rflags
& FASTTRAP_EFLAGS_SF
) == 0;
1727 taken
= (regs64
->isf
.rflags
& FASTTRAP_EFLAGS_PF
) != 0;
1730 taken
= (regs64
->isf
.rflags
& FASTTRAP_EFLAGS_PF
) == 0;
1733 taken
= ((regs64
->isf
.rflags
& FASTTRAP_EFLAGS_SF
) == 0) !=
1734 ((regs64
->isf
.rflags
& FASTTRAP_EFLAGS_OF
) == 0);
1737 taken
= ((regs64
->isf
.rflags
& FASTTRAP_EFLAGS_SF
) == 0) ==
1738 ((regs64
->isf
.rflags
& FASTTRAP_EFLAGS_OF
) == 0);
1741 taken
= (regs64
->isf
.rflags
& FASTTRAP_EFLAGS_ZF
) != 0 ||
1742 ((regs64
->isf
.rflags
& FASTTRAP_EFLAGS_SF
) == 0) !=
1743 ((regs64
->isf
.rflags
& FASTTRAP_EFLAGS_OF
) == 0);
1746 taken
= (regs64
->isf
.rflags
& FASTTRAP_EFLAGS_ZF
) == 0 &&
1747 ((regs64
->isf
.rflags
& FASTTRAP_EFLAGS_SF
) == 0) ==
1748 ((regs64
->isf
.rflags
& FASTTRAP_EFLAGS_OF
) == 0);
1755 new_pc
= tp
->ftt_dest
;
1757 new_pc
= pc
+ tp
->ftt_size
;
1761 case FASTTRAP_T_LOOP
:
1764 uint64_t cx
= regs64
->rcx
--;
1766 switch (tp
->ftt_code
) {
1767 case FASTTRAP_LOOPNZ
:
1768 taken
= (regs64
->isf
.rflags
& FASTTRAP_EFLAGS_ZF
) == 0 &&
1771 case FASTTRAP_LOOPZ
:
1772 taken
= (regs64
->isf
.rflags
& FASTTRAP_EFLAGS_ZF
) != 0 &&
1783 new_pc
= tp
->ftt_dest
;
1785 new_pc
= pc
+ tp
->ftt_size
;
1789 case FASTTRAP_T_JCXZ
:
1791 uint64_t cx
= regs64
->rcx
;
1794 new_pc
= tp
->ftt_dest
;
1796 new_pc
= pc
+ tp
->ftt_size
;
1800 case FASTTRAP_T_PUSHL_EBP
:
1802 user_addr_t addr
= regs64
->isf
.rsp
- sizeof (uint64_t);
1803 int ret
= fasttrap_suword64(addr
, (uint64_t)regs64
->rbp
);
1806 fasttrap_sigsegv(p
, uthread
, addr
);
1811 regs64
->isf
.rsp
= addr
;
1812 new_pc
= pc
+ tp
->ftt_size
;
1816 case FASTTRAP_T_NOP
:
1817 new_pc
= pc
+ tp
->ftt_size
;
1820 case FASTTRAP_T_JMP
:
1821 case FASTTRAP_T_CALL
:
1822 if (tp
->ftt_code
== 0) {
1823 new_pc
= tp
->ftt_dest
;
1825 user_addr_t value
, addr
= tp
->ftt_dest
;
1827 if (tp
->ftt_base
!= FASTTRAP_NOREG
)
1828 addr
+= fasttrap_getreg(regs
, tp
->ftt_base
);
1829 if (tp
->ftt_index
!= FASTTRAP_NOREG
)
1830 addr
+= fasttrap_getreg(regs
, tp
->ftt_index
) <<
1833 if (tp
->ftt_code
== 1) {
1835 * If there's a segment prefix for this
1836 * instruction, we'll need to check permissions
1837 * and bounds on the given selector, and adjust
1838 * the address accordingly.
1840 if (tp
->ftt_segment
!= FASTTRAP_SEG_NONE
&&
1841 fasttrap_do_seg(tp
, regs
, &addr
) != 0) {
1842 fasttrap_sigsegv(p
, uthread
, addr
);
1847 if (fasttrap_fuword64(addr
, &value
) == -1) {
1848 fasttrap_sigsegv(p
, uthread
, addr
);
1859 * If this is a call instruction, we need to push the return
1860 * address onto the stack. If this fails, we send the process
1861 * a SIGSEGV and reset the pc to emulate what would happen if
1862 * this instruction weren't traced.
1864 if (tp
->ftt_type
== FASTTRAP_T_CALL
) {
1865 user_addr_t addr
= regs64
->isf
.rsp
- sizeof (uint64_t);
1866 int ret
= fasttrap_suword64(addr
, pc
+ tp
->ftt_size
);
1869 fasttrap_sigsegv(p
, uthread
, addr
);
1874 regs64
->isf
.rsp
= addr
;
1878 case FASTTRAP_T_COMMON
:
1881 uint8_t scratch
[2 * FASTTRAP_MAX_INSTR_SIZE
+ 5 + 2];
1885 * Generic Instruction Tracing
1886 * ---------------------------
1888 * This is the layout of the scratch space in the user-land
1889 * thread structure for our generated instructions.
1892 * ------------------------ -----
1893 * a: <original instruction> <= 15
1894 * jmp <pc + tp->ftt_size> 5
1895 * b: <original instrction> <= 15
1896 * int T_DTRACE_RET 2
1901 * ------------------------ -----
1902 * a: <original instruction> <= 15
1904 * <pc + tp->ftt_size> 8
1905 * b: <original instruction> <= 15
1906 * int T_DTRACE_RET 2
1910 * The %pc is set to a, and curthread->t_dtrace_astpc is set
1911 * to b. If we encounter a signal on the way out of the
1912 * kernel, trap() will set %pc to curthread->t_dtrace_astpc
1913 * so that we execute the original instruction and re-enter
1914 * the kernel rather than redirecting to the next instruction.
1916 * If there are return probes (so we know that we're going to
1917 * need to reenter the kernel after executing the original
1918 * instruction), the scratch space will just contain the
1919 * original instruction followed by an interrupt -- the same
1922 * %rip-relative Addressing
1923 * ------------------------
1925 * There's a further complication in 64-bit mode due to %rip-
1926 * relative addressing. While this is clearly a beneficial
1927 * architectural decision for position independent code, it's
1928 * hard not to see it as a personal attack against the pid
1929 * provider since before there was a relatively small set of
1930 * instructions to emulate; with %rip-relative addressing,
1931 * almost every instruction can potentially depend on the
1932 * address at which it's executed. Rather than emulating
1933 * the broad spectrum of instructions that can now be
1934 * position dependent, we emulate jumps and others as in
1935 * 32-bit mode, and take a different tack for instructions
1936 * using %rip-relative addressing.
1938 * For every instruction that uses the ModRM byte, the
1939 * in-kernel disassembler reports its location. We use the
1940 * ModRM byte to identify that an instruction uses
1941 * %rip-relative addressing and to see what other registers
1942 * the instruction uses. To emulate those instructions,
1943 * we modify the instruction to be %rax-relative rather than
1944 * %rip-relative (or %rcx-relative if the instruction uses
1945 * %rax; or %r8- or %r9-relative if the REX.B is present so
1946 * we don't have to rewrite the REX prefix). We then load
1947 * the value that %rip would have been into the scratch
1948 * register and generate an instruction to reset the scratch
1949 * register back to its original value. The instruction
1950 * sequence looks like this:
1952 * 64-mode %rip-relative bytes
1953 * ------------------------ -----
1954 * a: <modified instruction> <= 15
1955 * movq $<value>, %<scratch> 6
1957 * <pc + tp->ftt_size> 8
1958 * b: <modified instruction> <= 15
1959 * int T_DTRACE_RET 2
1963 * We set curthread->t_dtrace_regv so that upon receiving
1964 * a signal we can reset the value of the scratch register.
1967 addr
= uthread
->t_dtrace_scratch
->addr
;
1970 fasttrap_sigtrap(p
, uthread
, pc
); // Should be killing target proc
1975 ASSERT(tp
->ftt_size
< FASTTRAP_MAX_INSTR_SIZE
);
1977 uthread
->t_dtrace_scrpc
= addr
;
1978 bcopy(tp
->ftt_instr
, &scratch
[i
], tp
->ftt_size
);
1981 if (tp
->ftt_ripmode
!= 0) {
1984 ASSERT(tp
->ftt_ripmode
&
1985 (FASTTRAP_RIP_1
| FASTTRAP_RIP_2
));
1988 * If this was a %rip-relative instruction, we change
1989 * it to be either a %rax- or %rcx-relative
1990 * instruction (depending on whether those registers
1991 * are used as another operand; or %r8- or %r9-
1992 * relative depending on the value of REX.B). We then
1993 * set that register and generate a movq instruction
1994 * to reset the value.
1996 if (tp
->ftt_ripmode
& FASTTRAP_RIP_X
)
1997 scratch
[i
++] = FASTTRAP_REX(1, 0, 0, 1);
1999 scratch
[i
++] = FASTTRAP_REX(1, 0, 0, 0);
2001 if (tp
->ftt_ripmode
& FASTTRAP_RIP_1
)
2002 scratch
[i
++] = FASTTRAP_MOV_EAX
;
2004 scratch
[i
++] = FASTTRAP_MOV_ECX
;
2006 switch (tp
->ftt_ripmode
) {
2007 case FASTTRAP_RIP_1
:
2009 uthread
->t_dtrace_reg
= REG_RAX
;
2011 case FASTTRAP_RIP_2
:
2013 uthread
->t_dtrace_reg
= REG_RCX
;
2015 case FASTTRAP_RIP_1
| FASTTRAP_RIP_X
:
2017 uthread
->t_dtrace_reg
= REG_R8
;
2019 case FASTTRAP_RIP_2
| FASTTRAP_RIP_X
:
2021 uthread
->t_dtrace_reg
= REG_R9
;
2025 panic("unhandled ripmode in fasttrap_pid_probe64");
2028 *(uint64_t *)&scratch
[i
] = *reg
;
2029 uthread
->t_dtrace_regv
= *reg
;
2030 *reg
= pc
+ tp
->ftt_size
;
2031 i
+= sizeof (uint64_t);
2035 * Generate the branch instruction to what would have
2036 * normally been the subsequent instruction. In 32-bit mode,
2037 * this is just a relative branch; in 64-bit mode this is a
2038 * %rip-relative branch that loads the 64-bit pc value
2039 * immediately after the jmp instruction.
2041 scratch
[i
++] = FASTTRAP_GROUP5_OP
;
2042 scratch
[i
++] = FASTTRAP_MODRM(0, 4, 5);
2043 *(uint32_t *)&scratch
[i
] = 0;
2044 i
+= sizeof (uint32_t);
2045 *(uint64_t *)&scratch
[i
] = pc
+ tp
->ftt_size
;
2046 i
+= sizeof (uint64_t);
2048 uthread
->t_dtrace_astpc
= addr
+ i
;
2049 bcopy(tp
->ftt_instr
, &scratch
[i
], tp
->ftt_size
);
2051 scratch
[i
++] = FASTTRAP_INT
;
2052 scratch
[i
++] = T_DTRACE_RET
;
2054 if (fasttrap_copyout(scratch
, addr
, i
)) {
2055 fasttrap_sigtrap(p
, uthread
, pc
);
2060 if (tp
->ftt_retids
!= NULL
) {
2061 uthread
->t_dtrace_step
= 1;
2062 uthread
->t_dtrace_ret
= 1;
2063 new_pc
= uthread
->t_dtrace_astpc
;
2065 new_pc
= uthread
->t_dtrace_scrpc
;
2068 uthread
->t_dtrace_pc
= pc
;
2069 uthread
->t_dtrace_npc
= pc
+ tp
->ftt_size
;
2070 uthread
->t_dtrace_on
= 1;
2075 panic("fasttrap: mishandled an instruction");
2082 * We're setting this earlier than Solaris does, to get a "correct"
2083 * ustack() output. In the Sun code, a() -> b() -> c() -> d() is
2084 * reported at: d, b, a. The new way gives c, b, a, which is closer
2085 * to correct, as the return instruction has already exectued.
2087 regs64
->isf
.rip
= new_pc
;
2091 * If there were no return probes when we first found the tracepoint,
2092 * we should feel no obligation to honor any return probes that were
2093 * subsequently enabled -- they'll just have to wait until the next
2096 if (tp
->ftt_retids
!= NULL
) {
2098 * We need to wait until the results of the instruction are
2099 * apparent before invoking any return probes. If this
2100 * instruction was emulated we can just call
2101 * fasttrap_return_common(); if it needs to be executed, we
2102 * need to wait until the user thread returns to the kernel.
2104 if (tp
->ftt_type
!= FASTTRAP_T_COMMON
) {
2105 fasttrap_return_common(regs
, pc
, pid
, new_pc
);
2107 ASSERT(uthread
->t_dtrace_ret
!= 0);
2108 ASSERT(uthread
->t_dtrace_pc
== pc
);
2109 ASSERT(uthread
->t_dtrace_scrpc
!= 0);
2110 ASSERT(new_pc
== uthread
->t_dtrace_astpc
);
2118 fasttrap_pid_probe(x86_saved_state_t
*regs
)
2120 if (is_saved_state64(regs
))
2121 return fasttrap_pid_probe64(regs
);
2123 return fasttrap_pid_probe32(regs
);
2127 fasttrap_return_probe(x86_saved_state_t
*regs
)
2129 x86_saved_state64_t
*regs64
;
2130 x86_saved_state32_t
*regs32
;
2131 unsigned int p_model
;
2133 if (is_saved_state64(regs
)) {
2134 regs64
= saved_state64(regs
);
2136 p_model
= DATAMODEL_LP64
;
2139 regs32
= saved_state32(regs
);
2140 p_model
= DATAMODEL_ILP32
;
2143 proc_t
*p
= current_proc();
2144 uthread_t uthread
= (uthread_t
)get_bsdthread_info(current_thread());
2145 user_addr_t pc
= uthread
->t_dtrace_pc
;
2146 user_addr_t npc
= uthread
->t_dtrace_npc
;
2148 uthread
->t_dtrace_pc
= 0;
2149 uthread
->t_dtrace_npc
= 0;
2150 uthread
->t_dtrace_scrpc
= 0;
2151 uthread
->t_dtrace_astpc
= 0;
2154 * Treat a child created by a call to vfork(2) as if it were its
2155 * parent. We know that there's only one thread of control in such a
2156 * process: this one.
2159 * APPLE NOTE: Terry says: "You need to hold the process locks (currently: kernel funnel) for this traversal"
2160 * How do we assert this?
2162 while (p
->p_lflag
& P_LINVFORK
) {
2167 * We set rp->r_pc to the address of the traced instruction so
2168 * that it appears to dtrace_probe() that we're on the original
2169 * instruction, and so that the user can't easily detect our
2170 * complex web of lies. dtrace_return_probe() (our caller)
2171 * will correctly set %pc after we return.
2173 if (p_model
== DATAMODEL_LP64
)
2174 regs64
->isf
.rip
= pc
;
2178 fasttrap_return_common(regs
, pc
, p
->p_pid
, npc
);
2184 fasttrap_pid_getarg(void *arg
, dtrace_id_t id
, void *parg
, int argno
,
2187 #pragma unused(arg, id, parg, aframes)
2188 return (fasttrap_anarg((x86_saved_state_t
*)find_user_regs(current_thread()), 1, argno
));
2192 fasttrap_usdt_getarg(void *arg
, dtrace_id_t id
, void *parg
, int argno
,
2195 #pragma unused(arg, id, parg, aframes)
2196 return (fasttrap_anarg((x86_saved_state_t
*)find_user_regs(current_thread()), 0, argno
));
2200 * APPLE NOTE: See comments by regmap array definition. We are cheating
2201 * when returning 32 bit registers.
2204 fasttrap_getreg(x86_saved_state_t
*regs
, uint_t reg
)
2206 if (is_saved_state64(regs
)) {
2207 x86_saved_state64_t
*regs64
= saved_state64(regs
);
2210 case REG_RAX
: return regs64
->rax
;
2211 case REG_RCX
: return regs64
->rcx
;
2212 case REG_RDX
: return regs64
->rdx
;
2213 case REG_RBX
: return regs64
->rbx
;
2214 case REG_RSP
: return regs64
->isf
.rsp
;
2215 case REG_RBP
: return regs64
->rbp
;
2216 case REG_RSI
: return regs64
->rsi
;
2217 case REG_RDI
: return regs64
->rdi
;
2218 case REG_R8
: return regs64
->r8
;
2219 case REG_R9
: return regs64
->r9
;
2220 case REG_R10
: return regs64
->r10
;
2221 case REG_R11
: return regs64
->r11
;
2222 case REG_R12
: return regs64
->r12
;
2223 case REG_R13
: return regs64
->r13
;
2224 case REG_R14
: return regs64
->r14
;
2225 case REG_R15
: return regs64
->r15
;
2228 panic("dtrace: unhandled x86_64 getreg() constant");
2230 x86_saved_state32_t
*regs32
= saved_state32(regs
);
2233 case REG_RAX
: return regs32
->eax
;
2234 case REG_RCX
: return regs32
->ecx
;
2235 case REG_RDX
: return regs32
->edx
;
2236 case REG_RBX
: return regs32
->ebx
;
2237 case REG_RSP
: return regs32
->uesp
;
2238 case REG_RBP
: return regs32
->ebp
;
2239 case REG_RSI
: return regs32
->esi
;
2240 case REG_RDI
: return regs32
->edi
;
2243 panic("dtrace: unhandled i386 getreg() constant");