]> git.saurik.com Git - apple/xnu.git/blob - bsd/dev/i386/fbt_x86.c
xnu-6153.61.1.tar.gz
[apple/xnu.git] / bsd / dev / i386 / fbt_x86.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #include <kern/thread.h>
28 #include <mach/thread_status.h>
29 #include <mach/vm_param.h>
30 #include <mach-o/loader.h>
31 #include <mach-o/nlist.h>
32 #include <libkern/kernel_mach_header.h>
33 #include <libkern/OSAtomic.h>
34
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/errno.h>
38 #include <sys/stat.h>
39 #include <sys/ioctl.h>
40 #include <sys/conf.h>
41 #include <sys/fcntl.h>
42 #include <miscfs/devfs/devfs.h>
43
44 #include <sys/dtrace.h>
45 #include <sys/dtrace_impl.h>
46 #include <sys/fbt.h>
47
48 #include <sys/dtrace_glue.h>
49
50 #include <san/kasan.h>
51
52 #define DTRACE_INVOP_NOP_SKIP 1
53 #define DTRACE_INVOP_MOVL_ESP_EBP 10
54 #define DTRACE_INVOP_MOVL_ESP_EBP_SKIP 2
55 #define DTRACE_INVOP_MOV_RSP_RBP 11
56 #define DTRACE_INVOP_MOV_RSP_RBP_SKIP 3
57 #define DTRACE_INVOP_POP_RBP 12
58 #define DTRACE_INVOP_POP_RBP_SKIP 1
59 #define DTRACE_INVOP_LEAVE_SKIP 1
60
61 #define FBT_PUSHL_EBP 0x55
62 #define FBT_MOVL_ESP_EBP0_V0 0x8b
63 #define FBT_MOVL_ESP_EBP1_V0 0xec
64 #define FBT_MOVL_ESP_EBP0_V1 0x89
65 #define FBT_MOVL_ESP_EBP1_V1 0xe5
66
67 #define FBT_PUSH_RBP 0x55
68 #define FBT_REX_RSP_RBP 0x48
69 #define FBT_MOV_RSP_RBP0 0x89
70 #define FBT_MOV_RSP_RBP1 0xe5
71 #define FBT_POP_RBP 0x5d
72
73 #define FBT_POPL_EBP 0x5d
74 #define FBT_RET 0xc3
75 #define FBT_RET_IMM16 0xc2
76 #define FBT_LEAVE 0xc9
77 #define FBT_JMP_SHORT_REL 0xeb /* Jump short, relative, displacement relative to next instr. */
78 #define FBT_JMP_NEAR_REL 0xe9 /* Jump near, relative, displacement relative to next instr. */
79 #define FBT_JMP_FAR_ABS 0xea /* Jump far, absolute, address given in operand */
80 #define FBT_RET_LEN 1
81 #define FBT_RET_IMM16_LEN 3
82 #define FBT_JMP_SHORT_REL_LEN 2
83 #define FBT_JMP_NEAR_REL_LEN 5
84 #define FBT_JMP_FAR_ABS_LEN 5
85
86 #define FBT_PATCHVAL 0xf0
87 #define FBT_AFRAMES_ENTRY 7
88 #define FBT_AFRAMES_RETURN 6
89
90 #define FBT_ENTRY "entry"
91 #define FBT_RETURN "return"
92 #define FBT_ADDR2NDX(addr) ((((uintptr_t)(addr)) >> 4) & fbt_probetab_mask)
93
94 extern dtrace_provider_id_t fbt_id;
95 extern fbt_probe_t **fbt_probetab;
96 extern int fbt_probetab_mask;
97
98 kern_return_t fbt_perfCallback(int, x86_saved_state_t *, uintptr_t *, __unused int);
99
100 int
101 fbt_invop(uintptr_t addr, uintptr_t *state, uintptr_t rval)
102 {
103 fbt_probe_t *fbt = fbt_probetab[FBT_ADDR2NDX(addr)];
104
105 for (; fbt != NULL; fbt = fbt->fbtp_hashnext) {
106 if ((uintptr_t)fbt->fbtp_patchpoint == addr) {
107
108 if (fbt->fbtp_roffset == 0) {
109 x86_saved_state64_t *regs = (x86_saved_state64_t *)state;
110
111 CPU->cpu_dtrace_caller = *(uintptr_t *)(((uintptr_t)(regs->isf.rsp))+sizeof(uint64_t)); // 8(%rsp)
112 /* 64-bit ABI, arguments passed in registers. */
113 dtrace_probe(fbt->fbtp_id, regs->rdi, regs->rsi, regs->rdx, regs->rcx, regs->r8);
114 CPU->cpu_dtrace_caller = 0;
115 } else {
116
117 dtrace_probe(fbt->fbtp_id, fbt->fbtp_roffset, rval, 0, 0, 0);
118 CPU->cpu_dtrace_caller = 0;
119 }
120
121 return (fbt->fbtp_rval);
122 }
123 }
124
125 return (0);
126 }
127
128 #define IS_USER_TRAP(regs) (regs && (((regs)->isf.cs & 3) != 0))
129 #define T_INVALID_OPCODE 6
130 #define FBT_EXCEPTION_CODE T_INVALID_OPCODE
131 #define T_PREEMPT 255
132
133 kern_return_t
134 fbt_perfCallback(
135 int trapno,
136 x86_saved_state_t *tagged_regs,
137 uintptr_t *lo_spp,
138 __unused int unused2)
139 {
140 kern_return_t retval = KERN_FAILURE;
141 x86_saved_state64_t *saved_state = saved_state64(tagged_regs);
142
143 if (FBT_EXCEPTION_CODE == trapno && !IS_USER_TRAP(saved_state)) {
144 boolean_t oldlevel;
145 uint64_t rsp_probe, fp, delta = 0;
146 uintptr_t old_sp;
147 uint32_t *pDst;
148 int emul;
149
150
151 oldlevel = ml_set_interrupts_enabled(FALSE);
152
153 /* Calculate where the stack pointer was when the probe instruction "fired." */
154 rsp_probe = saved_state->isf.rsp; /* Easy, x86_64 establishes this value in idt64.s */
155
156 __asm__ volatile(
157 "Ldtrace_invop_callsite_pre_label:\n"
158 ".data\n"
159 ".private_extern _dtrace_invop_callsite_pre\n"
160 "_dtrace_invop_callsite_pre:\n"
161 " .quad Ldtrace_invop_callsite_pre_label\n"
162 ".text\n"
163 );
164
165 emul = dtrace_invop( saved_state->isf.rip, (uintptr_t *)saved_state, saved_state->rax );
166
167 __asm__ volatile(
168 "Ldtrace_invop_callsite_post_label:\n"
169 ".data\n"
170 ".private_extern _dtrace_invop_callsite_post\n"
171 "_dtrace_invop_callsite_post:\n"
172 " .quad Ldtrace_invop_callsite_post_label\n"
173 ".text\n"
174 );
175
176 switch (emul) {
177 case DTRACE_INVOP_NOP:
178 saved_state->isf.rip += DTRACE_INVOP_NOP_SKIP; /* Skip over the patched NOP (planted by sdt). */
179 retval = KERN_SUCCESS;
180 break;
181
182 case DTRACE_INVOP_MOV_RSP_RBP:
183 saved_state->rbp = rsp_probe; /* Emulate patched mov %rsp,%rbp */
184 saved_state->isf.rip += DTRACE_INVOP_MOV_RSP_RBP_SKIP; /* Skip over the bytes of the patched mov %rsp,%rbp */
185 retval = KERN_SUCCESS;
186 break;
187
188 case DTRACE_INVOP_POP_RBP:
189 case DTRACE_INVOP_LEAVE:
190 /*
191 * Emulate first micro-op of patched leave: mov %rbp,%rsp
192 * fp points just below the return address slot for target's ret
193 * and at the slot holding the frame pointer saved by the target's prologue.
194 */
195 fp = saved_state->rbp;
196 /* Emulate second micro-op of patched leave: patched pop %rbp
197 * savearea rbp is set for the frame of the caller to target
198 * The *live* %rsp will be adjusted below for pop increment(s)
199 */
200 saved_state->rbp = *(uint64_t *)fp;
201 /* Skip over the patched leave */
202 saved_state->isf.rip += DTRACE_INVOP_LEAVE_SKIP;
203 /*
204 * Lift the stack to account for the emulated leave
205 * Account for words local in this frame
206 * (in "case DTRACE_INVOP_POPL_EBP:" this is zero.)
207 */
208 delta = ((uint32_t *)fp) - ((uint32_t *)rsp_probe); /* delta is a *word* increment */
209 /* Account for popping off the rbp (just accomplished by the emulation
210 * above...)
211 */
212 delta += 2;
213 saved_state->isf.rsp += (delta << 2);
214 /* Obtain the stack pointer recorded by the trampolines */
215 old_sp = *lo_spp;
216 /* Shift contents of stack */
217 for (pDst = (uint32_t *)fp;
218 pDst > (((uint32_t *)old_sp));
219 pDst--)
220 *pDst = pDst[-delta];
221
222 #if KASAN
223 /*
224 * The above has moved stack objects so they are no longer in sync
225 * with the shadow.
226 */
227 uintptr_t base = (uintptr_t)((uint32_t *)old_sp - delta);
228 uintptr_t size = (uintptr_t)fp - base;
229 if (base >= VM_MIN_KERNEL_AND_KEXT_ADDRESS) {
230 kasan_unpoison_stack(base, size);
231 }
232 #endif
233
234 /* Track the stack lift in "saved_state". */
235 saved_state = (x86_saved_state64_t *) (((uintptr_t)saved_state) + (delta << 2));
236 /* Adjust the stack pointer utilized by the trampolines */
237 *lo_spp = old_sp + (delta << 2);
238
239 retval = KERN_SUCCESS;
240 break;
241
242 default:
243 retval = KERN_FAILURE;
244 break;
245 }
246
247 /* Trick trap_from_kernel into not attempting to handle pending AST_URGENT */
248 saved_state->isf.trapno = T_PREEMPT;
249
250 ml_set_interrupts_enabled(oldlevel);
251 }
252
253 return retval;
254 }
255
256 void
257 fbt_provide_probe(struct modctl *ctl, const char *modname, const char* symbolName, machine_inst_t* symbolStart, machine_inst_t* instrHigh)
258 {
259 unsigned int j;
260 unsigned int doenable = 0;
261 dtrace_id_t thisid;
262
263 fbt_probe_t *newfbt, *retfbt, *entryfbt;
264 machine_inst_t *instr, *limit, theInstr, i1, i2, i3;
265 int size;
266
267 /*
268 * Guard against null symbols
269 */
270 if (!symbolStart || !instrHigh || instrHigh < symbolStart) {
271 kprintf("dtrace: %s has an invalid address\n", symbolName);
272 return;
273 }
274
275 for (j = 0, instr = symbolStart, theInstr = 0;
276 (j < 4) && (instrHigh > (instr + 2)); j++) {
277 theInstr = instr[0];
278 if (theInstr == FBT_PUSH_RBP || theInstr == FBT_RET || theInstr == FBT_RET_IMM16)
279 break;
280
281 if ((size = dtrace_instr_size(instr)) <= 0)
282 break;
283
284 instr += size;
285 }
286
287 if (theInstr != FBT_PUSH_RBP)
288 return;
289
290 i1 = instr[1];
291 i2 = instr[2];
292 i3 = instr[3];
293
294 limit = (machine_inst_t *)instrHigh;
295
296 if (i1 == FBT_REX_RSP_RBP && i2 == FBT_MOV_RSP_RBP0 && i3 == FBT_MOV_RSP_RBP1) {
297 instr += 1; /* Advance to the mov %rsp,%rbp */
298 theInstr = i1;
299 } else {
300 return;
301 }
302 #if 0
303 else {
304 /*
305 * Sometimes, the compiler will schedule an intervening instruction
306 * in the function prologue. Example:
307 *
308 * _mach_vm_read:
309 * 000006d8 pushl %ebp
310 * 000006d9 movl $0x00000004,%edx
311 * 000006de movl %esp,%ebp
312 *
313 * Try the next instruction, to see if it is a movl %esp,%ebp
314 */
315
316 instr += 1; /* Advance past the pushl %ebp */
317 if ((size = dtrace_instr_size(instr)) <= 0)
318 return;
319
320 instr += size;
321
322 if ((instr + 1) >= limit)
323 return;
324
325 i1 = instr[0];
326 i2 = instr[1];
327
328 if (!(i1 == FBT_MOVL_ESP_EBP0_V0 && i2 == FBT_MOVL_ESP_EBP1_V0) &&
329 !(i1 == FBT_MOVL_ESP_EBP0_V1 && i2 == FBT_MOVL_ESP_EBP1_V1))
330 return;
331
332 /* instr already points at the movl %esp,%ebp */
333 theInstr = i1;
334 }
335 #endif
336 thisid = dtrace_probe_lookup(fbt_id, modname, symbolName, FBT_ENTRY);
337 newfbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP);
338 strlcpy( (char *)&(newfbt->fbtp_name), symbolName, MAX_FBTP_NAME_CHARS );
339
340 if (thisid != 0) {
341 /*
342 * The dtrace_probe previously existed, so we have to hook
343 * the newfbt entry onto the end of the existing fbt's chain.
344 * If we find an fbt entry that was previously patched to
345 * fire, (as indicated by the current patched value), then
346 * we want to enable this newfbt on the spot.
347 */
348 entryfbt = dtrace_probe_arg (fbt_id, thisid);
349 ASSERT (entryfbt != NULL);
350 for(; entryfbt != NULL; entryfbt = entryfbt->fbtp_next) {
351 if (entryfbt->fbtp_currentval == entryfbt->fbtp_patchval)
352 doenable++;
353
354 if (entryfbt->fbtp_next == NULL) {
355 entryfbt->fbtp_next = newfbt;
356 newfbt->fbtp_id = entryfbt->fbtp_id;
357 break;
358 }
359 }
360 }
361 else {
362 /*
363 * The dtrace_probe did not previously exist, so we
364 * create it and hook in the newfbt. Since the probe is
365 * new, we obviously do not need to enable it on the spot.
366 */
367 newfbt->fbtp_id = dtrace_probe_create(fbt_id, modname, symbolName, FBT_ENTRY, FBT_AFRAMES_ENTRY, newfbt);
368 doenable = 0;
369 }
370
371 newfbt->fbtp_patchpoint = instr;
372 newfbt->fbtp_ctl = ctl;
373 newfbt->fbtp_loadcnt = ctl->mod_loadcnt;
374 newfbt->fbtp_rval = DTRACE_INVOP_MOV_RSP_RBP;
375 newfbt->fbtp_savedval = theInstr;
376 newfbt->fbtp_patchval = FBT_PATCHVAL;
377 newfbt->fbtp_currentval = 0;
378 newfbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)];
379 fbt_probetab[FBT_ADDR2NDX(instr)] = newfbt;
380
381 if (doenable)
382 fbt_enable(NULL, newfbt->fbtp_id, newfbt);
383
384 /*
385 * The fbt entry chain is in place, one entry point per symbol.
386 * The fbt return chain can have multiple return points per symbol.
387 * Here we find the end of the fbt return chain.
388 */
389
390 doenable=0;
391
392 thisid = dtrace_probe_lookup(fbt_id, modname, symbolName, FBT_RETURN);
393 if (thisid != 0) {
394 /* The dtrace_probe previously existed, so we have to
395 * find the end of the existing fbt chain. If we find
396 * an fbt return that was previously patched to fire,
397 * (as indicated by the currrent patched value), then
398 * we want to enable any new fbts on the spot.
399 */
400 retfbt = dtrace_probe_arg (fbt_id, thisid);
401 ASSERT(retfbt != NULL);
402 for (; retfbt != NULL; retfbt = retfbt->fbtp_next) {
403 if (retfbt->fbtp_currentval == retfbt->fbtp_patchval)
404 doenable++;
405 if(retfbt->fbtp_next == NULL)
406 break;
407 }
408 }
409 else {
410 doenable = 0;
411 retfbt = NULL;
412 }
413
414 again:
415 if (instr >= limit)
416 return;
417
418 /*
419 * If this disassembly fails, then we've likely walked off into
420 * a jump table or some other unsuitable area. Bail out of the
421 * disassembly now.
422 */
423 if ((size = dtrace_instr_size(instr)) <= 0)
424 return;
425
426 /*
427 * We (desperately) want to avoid erroneously instrumenting a
428 * jump table, especially given that our markers are pretty
429 * short: two bytes on x86, and just one byte on amd64. To
430 * determine if we're looking at a true instruction sequence
431 * or an inline jump table that happens to contain the same
432 * byte sequences, we resort to some heuristic sleeze: we
433 * treat this instruction as being contained within a pointer,
434 * and see if that pointer points to within the body of the
435 * function. If it does, we refuse to instrument it.
436 */
437 for (j = 0; j < sizeof (uintptr_t); j++) {
438 uintptr_t check = (uintptr_t)instr - j;
439 uint8_t *ptr;
440
441 if (check < (uintptr_t)symbolStart)
442 break;
443
444 if (check + sizeof (uintptr_t) > (uintptr_t)limit)
445 continue;
446
447 ptr = *(uint8_t **)check;
448
449 if (ptr >= (uint8_t *)symbolStart && ptr < limit) {
450 instr += size;
451 goto again;
452 }
453 }
454
455 /*
456 * OK, it's an instruction.
457 */
458 theInstr = instr[0];
459
460 /* Walked onto the start of the next routine? If so, bail out of this function. */
461 if (theInstr == FBT_PUSH_RBP)
462 return;
463
464 if (!(size == 1 && (theInstr == FBT_POP_RBP || theInstr == FBT_LEAVE))) {
465 instr += size;
466 goto again;
467 }
468
469 /*
470 * Found the pop %rbp; or leave.
471 */
472 machine_inst_t *patch_instr = instr;
473
474 /*
475 * Scan forward for a "ret", or "jmp".
476 */
477 instr += size;
478 if (instr >= limit)
479 return;
480
481 size = dtrace_instr_size(instr);
482 if (size <= 0) /* Failed instruction decode? */
483 return;
484
485 theInstr = instr[0];
486
487 if (!(size == FBT_RET_LEN && (theInstr == FBT_RET)) &&
488 !(size == FBT_RET_IMM16_LEN && (theInstr == FBT_RET_IMM16)) &&
489 !(size == FBT_JMP_SHORT_REL_LEN && (theInstr == FBT_JMP_SHORT_REL)) &&
490 !(size == FBT_JMP_NEAR_REL_LEN && (theInstr == FBT_JMP_NEAR_REL)) &&
491 !(size == FBT_JMP_FAR_ABS_LEN && (theInstr == FBT_JMP_FAR_ABS)))
492 return;
493
494 /*
495 * pop %rbp; ret; or leave; ret; or leave; jmp tailCalledFun; -- We have a winner!
496 */
497 newfbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP);
498 strlcpy( (char *)&(newfbt->fbtp_name), symbolName, MAX_FBTP_NAME_CHARS );
499
500 if (retfbt == NULL) {
501 newfbt->fbtp_id = dtrace_probe_create(fbt_id, modname,
502 symbolName, FBT_RETURN, FBT_AFRAMES_RETURN, newfbt);
503 } else {
504 retfbt->fbtp_next = newfbt;
505 newfbt->fbtp_id = retfbt->fbtp_id;
506 }
507
508 retfbt = newfbt;
509 newfbt->fbtp_patchpoint = patch_instr;
510 newfbt->fbtp_ctl = ctl;
511 newfbt->fbtp_loadcnt = ctl->mod_loadcnt;
512
513 if (*patch_instr == FBT_POP_RBP) {
514 newfbt->fbtp_rval = DTRACE_INVOP_POP_RBP;
515 } else {
516 ASSERT(*patch_instr == FBT_LEAVE);
517 newfbt->fbtp_rval = DTRACE_INVOP_LEAVE;
518 }
519 newfbt->fbtp_roffset =
520 (uintptr_t)(patch_instr - (uint8_t *)symbolStart);
521
522 newfbt->fbtp_savedval = *patch_instr;
523 newfbt->fbtp_patchval = FBT_PATCHVAL;
524 newfbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(patch_instr)];
525 fbt_probetab[FBT_ADDR2NDX(patch_instr)] = newfbt;
526
527 if (doenable)
528 fbt_enable(NULL, newfbt->fbtp_id, newfbt);
529
530 instr += size;
531 goto again;
532 }
533