4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 #include <kern/thread.h>
28 #include <mach/thread_status.h>
29 #include <mach/vm_param.h>
30 #include <mach-o/loader.h>
31 #include <mach-o/nlist.h>
32 #include <libkern/kernel_mach_header.h>
33 #include <libkern/OSAtomic.h>
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/errno.h>
39 #include <sys/ioctl.h>
41 #include <sys/fcntl.h>
42 #include <miscfs/devfs/devfs.h>
44 #include <sys/dtrace.h>
45 #include <sys/dtrace_impl.h>
48 #include <sys/dtrace_glue.h>
50 #include <san/kasan.h>
52 #define DTRACE_INVOP_NOP_SKIP 1
53 #define DTRACE_INVOP_MOVL_ESP_EBP 10
54 #define DTRACE_INVOP_MOVL_ESP_EBP_SKIP 2
55 #define DTRACE_INVOP_MOV_RSP_RBP 11
56 #define DTRACE_INVOP_MOV_RSP_RBP_SKIP 3
57 #define DTRACE_INVOP_POP_RBP 12
58 #define DTRACE_INVOP_POP_RBP_SKIP 1
59 #define DTRACE_INVOP_LEAVE_SKIP 1
61 #define FBT_PUSHL_EBP 0x55
62 #define FBT_MOVL_ESP_EBP0_V0 0x8b
63 #define FBT_MOVL_ESP_EBP1_V0 0xec
64 #define FBT_MOVL_ESP_EBP0_V1 0x89
65 #define FBT_MOVL_ESP_EBP1_V1 0xe5
67 #define FBT_PUSH_RBP 0x55
68 #define FBT_REX_RSP_RBP 0x48
69 #define FBT_MOV_RSP_RBP0 0x89
70 #define FBT_MOV_RSP_RBP1 0xe5
71 #define FBT_POP_RBP 0x5d
73 #define FBT_POPL_EBP 0x5d
75 #define FBT_RET_IMM16 0xc2
76 #define FBT_LEAVE 0xc9
77 #define FBT_JMP_SHORT_REL 0xeb /* Jump short, relative, displacement relative to next instr. */
78 #define FBT_JMP_NEAR_REL 0xe9 /* Jump near, relative, displacement relative to next instr. */
79 #define FBT_JMP_FAR_ABS 0xea /* Jump far, absolute, address given in operand */
81 #define FBT_RET_IMM16_LEN 3
82 #define FBT_JMP_SHORT_REL_LEN 2
83 #define FBT_JMP_NEAR_REL_LEN 5
84 #define FBT_JMP_FAR_ABS_LEN 5
86 #define FBT_PATCHVAL 0xf0
87 #define FBT_AFRAMES_ENTRY 7
88 #define FBT_AFRAMES_RETURN 6
90 #define FBT_ENTRY "entry"
91 #define FBT_RETURN "return"
92 #define FBT_ADDR2NDX(addr) ((((uintptr_t)(addr)) >> 4) & fbt_probetab_mask)
94 extern dtrace_provider_id_t fbt_id
;
95 extern fbt_probe_t
**fbt_probetab
;
96 extern int fbt_probetab_mask
;
98 kern_return_t
fbt_perfCallback(int, x86_saved_state_t
*, uintptr_t *, __unused
int);
101 fbt_invop(uintptr_t addr
, uintptr_t *state
, uintptr_t rval
)
103 fbt_probe_t
*fbt
= fbt_probetab
[FBT_ADDR2NDX(addr
)];
105 for (; fbt
!= NULL
; fbt
= fbt
->fbtp_hashnext
) {
106 if ((uintptr_t)fbt
->fbtp_patchpoint
== addr
) {
108 if (fbt
->fbtp_roffset
== 0) {
109 x86_saved_state64_t
*regs
= (x86_saved_state64_t
*)state
;
111 CPU
->cpu_dtrace_caller
= *(uintptr_t *)(((uintptr_t)(regs
->isf
.rsp
))+sizeof(uint64_t)); // 8(%rsp)
112 /* 64-bit ABI, arguments passed in registers. */
113 dtrace_probe(fbt
->fbtp_id
, regs
->rdi
, regs
->rsi
, regs
->rdx
, regs
->rcx
, regs
->r8
);
114 CPU
->cpu_dtrace_caller
= 0;
117 dtrace_probe(fbt
->fbtp_id
, fbt
->fbtp_roffset
, rval
, 0, 0, 0);
118 CPU
->cpu_dtrace_caller
= 0;
121 return (fbt
->fbtp_rval
);
128 #define IS_USER_TRAP(regs) (regs && (((regs)->isf.cs & 3) != 0))
129 #define T_INVALID_OPCODE 6
130 #define FBT_EXCEPTION_CODE T_INVALID_OPCODE
131 #define T_PREEMPT 255
136 x86_saved_state_t
*tagged_regs
,
138 __unused
int unused2
)
140 kern_return_t retval
= KERN_FAILURE
;
141 x86_saved_state64_t
*saved_state
= saved_state64(tagged_regs
);
143 if (FBT_EXCEPTION_CODE
== trapno
&& !IS_USER_TRAP(saved_state
)) {
145 uint64_t rsp_probe
, fp
, delta
= 0;
151 oldlevel
= ml_set_interrupts_enabled(FALSE
);
153 /* Calculate where the stack pointer was when the probe instruction "fired." */
154 rsp_probe
= saved_state
->isf
.rsp
; /* Easy, x86_64 establishes this value in idt64.s */
157 "Ldtrace_invop_callsite_pre_label:\n"
159 ".private_extern _dtrace_invop_callsite_pre\n"
160 "_dtrace_invop_callsite_pre:\n"
161 " .quad Ldtrace_invop_callsite_pre_label\n"
165 emul
= dtrace_invop( saved_state
->isf
.rip
, (uintptr_t *)saved_state
, saved_state
->rax
);
168 "Ldtrace_invop_callsite_post_label:\n"
170 ".private_extern _dtrace_invop_callsite_post\n"
171 "_dtrace_invop_callsite_post:\n"
172 " .quad Ldtrace_invop_callsite_post_label\n"
177 case DTRACE_INVOP_NOP
:
178 saved_state
->isf
.rip
+= DTRACE_INVOP_NOP_SKIP
; /* Skip over the patched NOP (planted by sdt). */
179 retval
= KERN_SUCCESS
;
182 case DTRACE_INVOP_MOV_RSP_RBP
:
183 saved_state
->rbp
= rsp_probe
; /* Emulate patched mov %rsp,%rbp */
184 saved_state
->isf
.rip
+= DTRACE_INVOP_MOV_RSP_RBP_SKIP
; /* Skip over the bytes of the patched mov %rsp,%rbp */
185 retval
= KERN_SUCCESS
;
188 case DTRACE_INVOP_POP_RBP
:
189 case DTRACE_INVOP_LEAVE
:
191 * Emulate first micro-op of patched leave: mov %rbp,%rsp
192 * fp points just below the return address slot for target's ret
193 * and at the slot holding the frame pointer saved by the target's prologue.
195 fp
= saved_state
->rbp
;
196 /* Emulate second micro-op of patched leave: patched pop %rbp
197 * savearea rbp is set for the frame of the caller to target
198 * The *live* %rsp will be adjusted below for pop increment(s)
200 saved_state
->rbp
= *(uint64_t *)fp
;
201 /* Skip over the patched leave */
202 saved_state
->isf
.rip
+= DTRACE_INVOP_LEAVE_SKIP
;
204 * Lift the stack to account for the emulated leave
205 * Account for words local in this frame
206 * (in "case DTRACE_INVOP_POPL_EBP:" this is zero.)
208 delta
= ((uint32_t *)fp
) - ((uint32_t *)rsp_probe
); /* delta is a *word* increment */
209 /* Account for popping off the rbp (just accomplished by the emulation
213 saved_state
->isf
.rsp
+= (delta
<< 2);
214 /* Obtain the stack pointer recorded by the trampolines */
216 /* Shift contents of stack */
217 for (pDst
= (uint32_t *)fp
;
218 pDst
> (((uint32_t *)old_sp
));
220 *pDst
= pDst
[-delta
];
224 * The above has moved stack objects so they are no longer in sync
227 uintptr_t base
= (uintptr_t)((uint32_t *)old_sp
- delta
);
228 uintptr_t size
= (uintptr_t)fp
- base
;
229 if (base
>= VM_MIN_KERNEL_AND_KEXT_ADDRESS
) {
230 kasan_unpoison_stack(base
, size
);
234 /* Track the stack lift in "saved_state". */
235 saved_state
= (x86_saved_state64_t
*) (((uintptr_t)saved_state
) + (delta
<< 2));
236 /* Adjust the stack pointer utilized by the trampolines */
237 *lo_spp
= old_sp
+ (delta
<< 2);
239 retval
= KERN_SUCCESS
;
243 retval
= KERN_FAILURE
;
247 /* Trick trap_from_kernel into not attempting to handle pending AST_URGENT */
248 saved_state
->isf
.trapno
= T_PREEMPT
;
250 ml_set_interrupts_enabled(oldlevel
);
257 fbt_provide_probe(struct modctl
*ctl
, const char *modname
, const char* symbolName
, machine_inst_t
* symbolStart
, machine_inst_t
* instrHigh
)
260 unsigned int doenable
= 0;
263 fbt_probe_t
*newfbt
, *retfbt
, *entryfbt
;
264 machine_inst_t
*instr
, *limit
, theInstr
, i1
, i2
, i3
;
268 * Guard against null symbols
270 if (!symbolStart
|| !instrHigh
|| instrHigh
< symbolStart
) {
271 kprintf("dtrace: %s has an invalid address\n", symbolName
);
275 for (j
= 0, instr
= symbolStart
, theInstr
= 0;
276 (j
< 4) && (instrHigh
> (instr
+ 2)); j
++) {
278 if (theInstr
== FBT_PUSH_RBP
|| theInstr
== FBT_RET
|| theInstr
== FBT_RET_IMM16
)
281 if ((size
= dtrace_instr_size(instr
)) <= 0)
287 if (theInstr
!= FBT_PUSH_RBP
)
294 limit
= (machine_inst_t
*)instrHigh
;
296 if (i1
== FBT_REX_RSP_RBP
&& i2
== FBT_MOV_RSP_RBP0
&& i3
== FBT_MOV_RSP_RBP1
) {
297 instr
+= 1; /* Advance to the mov %rsp,%rbp */
305 * Sometimes, the compiler will schedule an intervening instruction
306 * in the function prologue. Example:
309 * 000006d8 pushl %ebp
310 * 000006d9 movl $0x00000004,%edx
311 * 000006de movl %esp,%ebp
313 * Try the next instruction, to see if it is a movl %esp,%ebp
316 instr
+= 1; /* Advance past the pushl %ebp */
317 if ((size
= dtrace_instr_size(instr
)) <= 0)
322 if ((instr
+ 1) >= limit
)
328 if (!(i1
== FBT_MOVL_ESP_EBP0_V0
&& i2
== FBT_MOVL_ESP_EBP1_V0
) &&
329 !(i1
== FBT_MOVL_ESP_EBP0_V1
&& i2
== FBT_MOVL_ESP_EBP1_V1
))
332 /* instr already points at the movl %esp,%ebp */
336 thisid
= dtrace_probe_lookup(fbt_id
, modname
, symbolName
, FBT_ENTRY
);
337 newfbt
= kmem_zalloc(sizeof (fbt_probe_t
), KM_SLEEP
);
338 strlcpy( (char *)&(newfbt
->fbtp_name
), symbolName
, MAX_FBTP_NAME_CHARS
);
342 * The dtrace_probe previously existed, so we have to hook
343 * the newfbt entry onto the end of the existing fbt's chain.
344 * If we find an fbt entry that was previously patched to
345 * fire, (as indicated by the current patched value), then
346 * we want to enable this newfbt on the spot.
348 entryfbt
= dtrace_probe_arg (fbt_id
, thisid
);
349 ASSERT (entryfbt
!= NULL
);
350 for(; entryfbt
!= NULL
; entryfbt
= entryfbt
->fbtp_next
) {
351 if (entryfbt
->fbtp_currentval
== entryfbt
->fbtp_patchval
)
354 if (entryfbt
->fbtp_next
== NULL
) {
355 entryfbt
->fbtp_next
= newfbt
;
356 newfbt
->fbtp_id
= entryfbt
->fbtp_id
;
363 * The dtrace_probe did not previously exist, so we
364 * create it and hook in the newfbt. Since the probe is
365 * new, we obviously do not need to enable it on the spot.
367 newfbt
->fbtp_id
= dtrace_probe_create(fbt_id
, modname
, symbolName
, FBT_ENTRY
, FBT_AFRAMES_ENTRY
, newfbt
);
371 newfbt
->fbtp_patchpoint
= instr
;
372 newfbt
->fbtp_ctl
= ctl
;
373 newfbt
->fbtp_loadcnt
= ctl
->mod_loadcnt
;
374 newfbt
->fbtp_rval
= DTRACE_INVOP_MOV_RSP_RBP
;
375 newfbt
->fbtp_savedval
= theInstr
;
376 newfbt
->fbtp_patchval
= FBT_PATCHVAL
;
377 newfbt
->fbtp_currentval
= 0;
378 newfbt
->fbtp_hashnext
= fbt_probetab
[FBT_ADDR2NDX(instr
)];
379 fbt_probetab
[FBT_ADDR2NDX(instr
)] = newfbt
;
382 fbt_enable(NULL
, newfbt
->fbtp_id
, newfbt
);
385 * The fbt entry chain is in place, one entry point per symbol.
386 * The fbt return chain can have multiple return points per symbol.
387 * Here we find the end of the fbt return chain.
392 thisid
= dtrace_probe_lookup(fbt_id
, modname
, symbolName
, FBT_RETURN
);
394 /* The dtrace_probe previously existed, so we have to
395 * find the end of the existing fbt chain. If we find
396 * an fbt return that was previously patched to fire,
397 * (as indicated by the currrent patched value), then
398 * we want to enable any new fbts on the spot.
400 retfbt
= dtrace_probe_arg (fbt_id
, thisid
);
401 ASSERT(retfbt
!= NULL
);
402 for (; retfbt
!= NULL
; retfbt
= retfbt
->fbtp_next
) {
403 if (retfbt
->fbtp_currentval
== retfbt
->fbtp_patchval
)
405 if(retfbt
->fbtp_next
== NULL
)
419 * If this disassembly fails, then we've likely walked off into
420 * a jump table or some other unsuitable area. Bail out of the
423 if ((size
= dtrace_instr_size(instr
)) <= 0)
427 * We (desperately) want to avoid erroneously instrumenting a
428 * jump table, especially given that our markers are pretty
429 * short: two bytes on x86, and just one byte on amd64. To
430 * determine if we're looking at a true instruction sequence
431 * or an inline jump table that happens to contain the same
432 * byte sequences, we resort to some heuristic sleeze: we
433 * treat this instruction as being contained within a pointer,
434 * and see if that pointer points to within the body of the
435 * function. If it does, we refuse to instrument it.
437 for (j
= 0; j
< sizeof (uintptr_t); j
++) {
438 uintptr_t check
= (uintptr_t)instr
- j
;
441 if (check
< (uintptr_t)symbolStart
)
444 if (check
+ sizeof (uintptr_t) > (uintptr_t)limit
)
447 ptr
= *(uint8_t **)check
;
449 if (ptr
>= (uint8_t *)symbolStart
&& ptr
< limit
) {
456 * OK, it's an instruction.
460 /* Walked onto the start of the next routine? If so, bail out of this function. */
461 if (theInstr
== FBT_PUSH_RBP
)
464 if (!(size
== 1 && (theInstr
== FBT_POP_RBP
|| theInstr
== FBT_LEAVE
))) {
470 * Found the pop %rbp; or leave.
472 machine_inst_t
*patch_instr
= instr
;
475 * Scan forward for a "ret", or "jmp".
481 size
= dtrace_instr_size(instr
);
482 if (size
<= 0) /* Failed instruction decode? */
487 if (!(size
== FBT_RET_LEN
&& (theInstr
== FBT_RET
)) &&
488 !(size
== FBT_RET_IMM16_LEN
&& (theInstr
== FBT_RET_IMM16
)) &&
489 !(size
== FBT_JMP_SHORT_REL_LEN
&& (theInstr
== FBT_JMP_SHORT_REL
)) &&
490 !(size
== FBT_JMP_NEAR_REL_LEN
&& (theInstr
== FBT_JMP_NEAR_REL
)) &&
491 !(size
== FBT_JMP_FAR_ABS_LEN
&& (theInstr
== FBT_JMP_FAR_ABS
)))
495 * pop %rbp; ret; or leave; ret; or leave; jmp tailCalledFun; -- We have a winner!
497 newfbt
= kmem_zalloc(sizeof (fbt_probe_t
), KM_SLEEP
);
498 strlcpy( (char *)&(newfbt
->fbtp_name
), symbolName
, MAX_FBTP_NAME_CHARS
);
500 if (retfbt
== NULL
) {
501 newfbt
->fbtp_id
= dtrace_probe_create(fbt_id
, modname
,
502 symbolName
, FBT_RETURN
, FBT_AFRAMES_RETURN
, newfbt
);
504 retfbt
->fbtp_next
= newfbt
;
505 newfbt
->fbtp_id
= retfbt
->fbtp_id
;
509 newfbt
->fbtp_patchpoint
= patch_instr
;
510 newfbt
->fbtp_ctl
= ctl
;
511 newfbt
->fbtp_loadcnt
= ctl
->mod_loadcnt
;
513 if (*patch_instr
== FBT_POP_RBP
) {
514 newfbt
->fbtp_rval
= DTRACE_INVOP_POP_RBP
;
516 ASSERT(*patch_instr
== FBT_LEAVE
);
517 newfbt
->fbtp_rval
= DTRACE_INVOP_LEAVE
;
519 newfbt
->fbtp_roffset
=
520 (uintptr_t)(patch_instr
- (uint8_t *)symbolStart
);
522 newfbt
->fbtp_savedval
= *patch_instr
;
523 newfbt
->fbtp_patchval
= FBT_PATCHVAL
;
524 newfbt
->fbtp_hashnext
= fbt_probetab
[FBT_ADDR2NDX(patch_instr
)];
525 fbt_probetab
[FBT_ADDR2NDX(patch_instr
)] = newfbt
;
528 fbt_enable(NULL
, newfbt
->fbtp_id
, newfbt
);