4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 /* #pragma ident "@(#)fbt.c 1.15 05/09/19 SMI" */
31 #define _KERNEL /* Solaris vs. Darwin */
35 #define MACH__POSIX_C_SOURCE_PRIVATE 1 /* pulls in suitable savearea from mach/ppc/thread_status.h */
36 #include <kern/thread.h>
37 #include <mach/thread_status.h>
38 #include <mach/vm_param.h>
39 #include <mach-o/loader.h>
40 #include <mach-o/nlist.h>
41 #include <libkern/kernel_mach_header.h>
42 #include <libkern/OSAtomic.h>
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/errno.h>
48 #include <sys/ioctl.h>
50 #include <sys/fcntl.h>
51 #include <miscfs/devfs/devfs.h>
53 #include <sys/dtrace.h>
54 #include <sys/dtrace_impl.h>
57 #include <sys/dtrace_glue.h>
59 #include <san/kasan.h>
61 #define DTRACE_INVOP_NOP_SKIP 1
62 #define DTRACE_INVOP_MOVL_ESP_EBP 10
63 #define DTRACE_INVOP_MOVL_ESP_EBP_SKIP 2
64 #define DTRACE_INVOP_MOV_RSP_RBP 11
65 #define DTRACE_INVOP_MOV_RSP_RBP_SKIP 3
66 #define DTRACE_INVOP_POP_RBP 12
67 #define DTRACE_INVOP_POP_RBP_SKIP 1
68 #define DTRACE_INVOP_LEAVE_SKIP 1
70 #define FBT_PUSHL_EBP 0x55
71 #define FBT_MOVL_ESP_EBP0_V0 0x8b
72 #define FBT_MOVL_ESP_EBP1_V0 0xec
73 #define FBT_MOVL_ESP_EBP0_V1 0x89
74 #define FBT_MOVL_ESP_EBP1_V1 0xe5
76 #define FBT_PUSH_RBP 0x55
77 #define FBT_REX_RSP_RBP 0x48
78 #define FBT_MOV_RSP_RBP0 0x89
79 #define FBT_MOV_RSP_RBP1 0xe5
80 #define FBT_POP_RBP 0x5d
82 #define FBT_POPL_EBP 0x5d
84 #define FBT_RET_IMM16 0xc2
85 #define FBT_LEAVE 0xc9
86 #define FBT_JMP_SHORT_REL 0xeb /* Jump short, relative, displacement relative to next instr. */
87 #define FBT_JMP_NEAR_REL 0xe9 /* Jump near, relative, displacement relative to next instr. */
88 #define FBT_JMP_FAR_ABS 0xea /* Jump far, absolute, address given in operand */
90 #define FBT_RET_IMM16_LEN 3
91 #define FBT_JMP_SHORT_REL_LEN 2
92 #define FBT_JMP_NEAR_REL_LEN 5
93 #define FBT_JMP_FAR_ABS_LEN 5
95 #define FBT_PATCHVAL 0xf0
96 #define FBT_AFRAMES_ENTRY 7
97 #define FBT_AFRAMES_RETURN 6
99 #define FBT_ENTRY "entry"
100 #define FBT_RETURN "return"
101 #define FBT_ADDR2NDX(addr) ((((uintptr_t)(addr)) >> 4) & fbt_probetab_mask)
103 extern dtrace_provider_id_t fbt_id
;
104 extern fbt_probe_t
**fbt_probetab
;
105 extern int fbt_probetab_mask
;
107 kern_return_t
fbt_perfCallback(int, x86_saved_state_t
*, uintptr_t *, __unused
int);
110 fbt_invop(uintptr_t addr
, uintptr_t *state
, uintptr_t rval
)
112 fbt_probe_t
*fbt
= fbt_probetab
[FBT_ADDR2NDX(addr
)];
114 for (; fbt
!= NULL
; fbt
= fbt
->fbtp_hashnext
) {
115 if ((uintptr_t)fbt
->fbtp_patchpoint
== addr
) {
117 if (fbt
->fbtp_roffset
== 0) {
118 x86_saved_state64_t
*regs
= (x86_saved_state64_t
*)state
;
120 CPU
->cpu_dtrace_caller
= *(uintptr_t *)(((uintptr_t)(regs
->isf
.rsp
))+sizeof(uint64_t)); // 8(%rsp)
121 /* 64-bit ABI, arguments passed in registers. */
122 dtrace_probe(fbt
->fbtp_id
, regs
->rdi
, regs
->rsi
, regs
->rdx
, regs
->rcx
, regs
->r8
);
123 CPU
->cpu_dtrace_caller
= 0;
126 dtrace_probe(fbt
->fbtp_id
, fbt
->fbtp_roffset
, rval
, 0, 0, 0);
127 CPU
->cpu_dtrace_caller
= 0;
130 return (fbt
->fbtp_rval
);
137 #define IS_USER_TRAP(regs) (regs && (((regs)->isf.cs & 3) != 0))
138 #define T_INVALID_OPCODE 6
139 #define FBT_EXCEPTION_CODE T_INVALID_OPCODE
140 #define T_PREEMPT 255
145 x86_saved_state_t
*tagged_regs
,
147 __unused
int unused2
)
149 kern_return_t retval
= KERN_FAILURE
;
150 x86_saved_state64_t
*saved_state
= saved_state64(tagged_regs
);
152 if (FBT_EXCEPTION_CODE
== trapno
&& !IS_USER_TRAP(saved_state
)) {
154 uint64_t rsp_probe
, fp
, delta
= 0;
160 oldlevel
= ml_set_interrupts_enabled(FALSE
);
162 /* Calculate where the stack pointer was when the probe instruction "fired." */
163 rsp_probe
= saved_state
->isf
.rsp
; /* Easy, x86_64 establishes this value in idt64.s */
166 "Ldtrace_invop_callsite_pre_label:\n"
168 ".private_extern _dtrace_invop_callsite_pre\n"
169 "_dtrace_invop_callsite_pre:\n"
170 " .quad Ldtrace_invop_callsite_pre_label\n"
174 emul
= dtrace_invop( saved_state
->isf
.rip
, (uintptr_t *)saved_state
, saved_state
->rax
);
177 "Ldtrace_invop_callsite_post_label:\n"
179 ".private_extern _dtrace_invop_callsite_post\n"
180 "_dtrace_invop_callsite_post:\n"
181 " .quad Ldtrace_invop_callsite_post_label\n"
186 case DTRACE_INVOP_NOP
:
187 saved_state
->isf
.rip
+= DTRACE_INVOP_NOP_SKIP
; /* Skip over the patched NOP (planted by sdt). */
188 retval
= KERN_SUCCESS
;
191 case DTRACE_INVOP_MOV_RSP_RBP
:
192 saved_state
->rbp
= rsp_probe
; /* Emulate patched mov %rsp,%rbp */
193 saved_state
->isf
.rip
+= DTRACE_INVOP_MOV_RSP_RBP_SKIP
; /* Skip over the bytes of the patched mov %rsp,%rbp */
194 retval
= KERN_SUCCESS
;
197 case DTRACE_INVOP_POP_RBP
:
198 case DTRACE_INVOP_LEAVE
:
200 * Emulate first micro-op of patched leave: mov %rbp,%rsp
201 * fp points just below the return address slot for target's ret
202 * and at the slot holding the frame pointer saved by the target's prologue.
204 fp
= saved_state
->rbp
;
205 /* Emulate second micro-op of patched leave: patched pop %rbp
206 * savearea rbp is set for the frame of the caller to target
207 * The *live* %rsp will be adjusted below for pop increment(s)
209 saved_state
->rbp
= *(uint64_t *)fp
;
210 /* Skip over the patched leave */
211 saved_state
->isf
.rip
+= DTRACE_INVOP_LEAVE_SKIP
;
213 * Lift the stack to account for the emulated leave
214 * Account for words local in this frame
215 * (in "case DTRACE_INVOP_POPL_EBP:" this is zero.)
217 delta
= ((uint32_t *)fp
) - ((uint32_t *)rsp_probe
); /* delta is a *word* increment */
218 /* Account for popping off the rbp (just accomplished by the emulation
222 saved_state
->isf
.rsp
+= (delta
<< 2);
223 /* Obtain the stack pointer recorded by the trampolines */
225 /* Shift contents of stack */
226 for (pDst
= (uint32_t *)fp
;
227 pDst
> (((uint32_t *)old_sp
));
229 *pDst
= pDst
[-delta
];
233 * The above has moved stack objects so they are no longer in sync
236 uintptr_t base
= (uintptr_t)((uint32_t *)old_sp
- delta
);
237 uintptr_t size
= (uintptr_t)fp
- base
;
238 if (base
>= VM_MIN_KERNEL_AND_KEXT_ADDRESS
) {
239 kasan_unpoison_stack(base
, size
);
243 /* Track the stack lift in "saved_state". */
244 saved_state
= (x86_saved_state64_t
*) (((uintptr_t)saved_state
) + (delta
<< 2));
245 /* Adjust the stack pointer utilized by the trampolines */
246 *lo_spp
= old_sp
+ (delta
<< 2);
248 retval
= KERN_SUCCESS
;
252 retval
= KERN_FAILURE
;
256 /* Trick trap_from_kernel into not attempting to handle pending AST_URGENT */
257 saved_state
->isf
.trapno
= T_PREEMPT
;
259 ml_set_interrupts_enabled(oldlevel
);
266 fbt_provide_probe(struct modctl
*ctl
, const char *modname
, const char* symbolName
, machine_inst_t
* symbolStart
, machine_inst_t
* instrHigh
)
269 unsigned int doenable
= 0;
272 fbt_probe_t
*newfbt
, *retfbt
, *entryfbt
;
273 machine_inst_t
*instr
, *limit
, theInstr
, i1
, i2
, i3
;
277 * Guard against null symbols
279 if (!symbolStart
|| !instrHigh
|| instrHigh
< symbolStart
) {
280 kprintf("dtrace: %s has an invalid address\n", symbolName
);
284 for (j
= 0, instr
= symbolStart
, theInstr
= 0;
285 (j
< 4) && (instrHigh
> (instr
+ 2)); j
++) {
287 if (theInstr
== FBT_PUSH_RBP
|| theInstr
== FBT_RET
|| theInstr
== FBT_RET_IMM16
)
290 if ((size
= dtrace_instr_size(instr
)) <= 0)
296 if (theInstr
!= FBT_PUSH_RBP
)
303 limit
= (machine_inst_t
*)instrHigh
;
305 if (i1
== FBT_REX_RSP_RBP
&& i2
== FBT_MOV_RSP_RBP0
&& i3
== FBT_MOV_RSP_RBP1
) {
306 instr
+= 1; /* Advance to the mov %rsp,%rbp */
314 * Sometimes, the compiler will schedule an intervening instruction
315 * in the function prologue. Example:
318 * 000006d8 pushl %ebp
319 * 000006d9 movl $0x00000004,%edx
320 * 000006de movl %esp,%ebp
322 * Try the next instruction, to see if it is a movl %esp,%ebp
325 instr
+= 1; /* Advance past the pushl %ebp */
326 if ((size
= dtrace_instr_size(instr
)) <= 0)
331 if ((instr
+ 1) >= limit
)
337 if (!(i1
== FBT_MOVL_ESP_EBP0_V0
&& i2
== FBT_MOVL_ESP_EBP1_V0
) &&
338 !(i1
== FBT_MOVL_ESP_EBP0_V1
&& i2
== FBT_MOVL_ESP_EBP1_V1
))
341 /* instr already points at the movl %esp,%ebp */
345 thisid
= dtrace_probe_lookup(fbt_id
, modname
, symbolName
, FBT_ENTRY
);
346 newfbt
= kmem_zalloc(sizeof (fbt_probe_t
), KM_SLEEP
);
347 strlcpy( (char *)&(newfbt
->fbtp_name
), symbolName
, MAX_FBTP_NAME_CHARS
);
351 * The dtrace_probe previously existed, so we have to hook
352 * the newfbt entry onto the end of the existing fbt's chain.
353 * If we find an fbt entry that was previously patched to
354 * fire, (as indicated by the current patched value), then
355 * we want to enable this newfbt on the spot.
357 entryfbt
= dtrace_probe_arg (fbt_id
, thisid
);
358 ASSERT (entryfbt
!= NULL
);
359 for(; entryfbt
!= NULL
; entryfbt
= entryfbt
->fbtp_next
) {
360 if (entryfbt
->fbtp_currentval
== entryfbt
->fbtp_patchval
)
363 if (entryfbt
->fbtp_next
== NULL
) {
364 entryfbt
->fbtp_next
= newfbt
;
365 newfbt
->fbtp_id
= entryfbt
->fbtp_id
;
372 * The dtrace_probe did not previously exist, so we
373 * create it and hook in the newfbt. Since the probe is
374 * new, we obviously do not need to enable it on the spot.
376 newfbt
->fbtp_id
= dtrace_probe_create(fbt_id
, modname
, symbolName
, FBT_ENTRY
, FBT_AFRAMES_ENTRY
, newfbt
);
380 newfbt
->fbtp_patchpoint
= instr
;
381 newfbt
->fbtp_ctl
= ctl
;
382 newfbt
->fbtp_loadcnt
= ctl
->mod_loadcnt
;
383 newfbt
->fbtp_rval
= DTRACE_INVOP_MOV_RSP_RBP
;
384 newfbt
->fbtp_savedval
= theInstr
;
385 newfbt
->fbtp_patchval
= FBT_PATCHVAL
;
386 newfbt
->fbtp_currentval
= 0;
387 newfbt
->fbtp_hashnext
= fbt_probetab
[FBT_ADDR2NDX(instr
)];
388 fbt_probetab
[FBT_ADDR2NDX(instr
)] = newfbt
;
391 fbt_enable(NULL
, newfbt
->fbtp_id
, newfbt
);
394 * The fbt entry chain is in place, one entry point per symbol.
395 * The fbt return chain can have multiple return points per symbol.
396 * Here we find the end of the fbt return chain.
401 thisid
= dtrace_probe_lookup(fbt_id
, modname
, symbolName
, FBT_RETURN
);
403 /* The dtrace_probe previously existed, so we have to
404 * find the end of the existing fbt chain. If we find
405 * an fbt return that was previously patched to fire,
406 * (as indicated by the currrent patched value), then
407 * we want to enable any new fbts on the spot.
409 retfbt
= dtrace_probe_arg (fbt_id
, thisid
);
410 ASSERT(retfbt
!= NULL
);
411 for (; retfbt
!= NULL
; retfbt
= retfbt
->fbtp_next
) {
412 if (retfbt
->fbtp_currentval
== retfbt
->fbtp_patchval
)
414 if(retfbt
->fbtp_next
== NULL
)
428 * If this disassembly fails, then we've likely walked off into
429 * a jump table or some other unsuitable area. Bail out of the
432 if ((size
= dtrace_instr_size(instr
)) <= 0)
436 * We (desperately) want to avoid erroneously instrumenting a
437 * jump table, especially given that our markers are pretty
438 * short: two bytes on x86, and just one byte on amd64. To
439 * determine if we're looking at a true instruction sequence
440 * or an inline jump table that happens to contain the same
441 * byte sequences, we resort to some heuristic sleeze: we
442 * treat this instruction as being contained within a pointer,
443 * and see if that pointer points to within the body of the
444 * function. If it does, we refuse to instrument it.
446 for (j
= 0; j
< sizeof (uintptr_t); j
++) {
447 uintptr_t check
= (uintptr_t)instr
- j
;
450 if (check
< (uintptr_t)symbolStart
)
453 if (check
+ sizeof (uintptr_t) > (uintptr_t)limit
)
456 ptr
= *(uint8_t **)check
;
458 if (ptr
>= (uint8_t *)symbolStart
&& ptr
< limit
) {
465 * OK, it's an instruction.
469 /* Walked onto the start of the next routine? If so, bail out of this function. */
470 if (theInstr
== FBT_PUSH_RBP
)
473 if (!(size
== 1 && (theInstr
== FBT_POP_RBP
|| theInstr
== FBT_LEAVE
))) {
479 * Found the pop %rbp; or leave.
481 machine_inst_t
*patch_instr
= instr
;
484 * Scan forward for a "ret", or "jmp".
490 size
= dtrace_instr_size(instr
);
491 if (size
<= 0) /* Failed instruction decode? */
496 if (!(size
== FBT_RET_LEN
&& (theInstr
== FBT_RET
)) &&
497 !(size
== FBT_RET_IMM16_LEN
&& (theInstr
== FBT_RET_IMM16
)) &&
498 !(size
== FBT_JMP_SHORT_REL_LEN
&& (theInstr
== FBT_JMP_SHORT_REL
)) &&
499 !(size
== FBT_JMP_NEAR_REL_LEN
&& (theInstr
== FBT_JMP_NEAR_REL
)) &&
500 !(size
== FBT_JMP_FAR_ABS_LEN
&& (theInstr
== FBT_JMP_FAR_ABS
)))
504 * pop %rbp; ret; or leave; ret; or leave; jmp tailCalledFun; -- We have a winner!
506 newfbt
= kmem_zalloc(sizeof (fbt_probe_t
), KM_SLEEP
);
507 strlcpy( (char *)&(newfbt
->fbtp_name
), symbolName
, MAX_FBTP_NAME_CHARS
);
509 if (retfbt
== NULL
) {
510 newfbt
->fbtp_id
= dtrace_probe_create(fbt_id
, modname
,
511 symbolName
, FBT_RETURN
, FBT_AFRAMES_RETURN
, newfbt
);
513 retfbt
->fbtp_next
= newfbt
;
514 newfbt
->fbtp_id
= retfbt
->fbtp_id
;
518 newfbt
->fbtp_patchpoint
= patch_instr
;
519 newfbt
->fbtp_ctl
= ctl
;
520 newfbt
->fbtp_loadcnt
= ctl
->mod_loadcnt
;
522 if (*patch_instr
== FBT_POP_RBP
) {
523 newfbt
->fbtp_rval
= DTRACE_INVOP_POP_RBP
;
525 ASSERT(*patch_instr
== FBT_LEAVE
);
526 newfbt
->fbtp_rval
= DTRACE_INVOP_LEAVE
;
528 newfbt
->fbtp_roffset
=
529 (uintptr_t)(patch_instr
- (uint8_t *)symbolStart
);
531 newfbt
->fbtp_savedval
= *patch_instr
;
532 newfbt
->fbtp_patchval
= FBT_PATCHVAL
;
533 newfbt
->fbtp_hashnext
= fbt_probetab
[FBT_ADDR2NDX(patch_instr
)];
534 fbt_probetab
[FBT_ADDR2NDX(patch_instr
)] = newfbt
;
537 fbt_enable(NULL
, newfbt
->fbtp_id
, newfbt
);