]> git.saurik.com Git - apple/xnu.git/blob - bsd/dev/i386/fbt_x86.c
xnu-4570.20.62.tar.gz
[apple/xnu.git] / bsd / dev / i386 / fbt_x86.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /* #pragma ident "@(#)fbt.c 1.15 05/09/19 SMI" */
28
29 #ifdef KERNEL
30 #ifndef _KERNEL
31 #define _KERNEL /* Solaris vs. Darwin */
32 #endif
33 #endif
34
35 #define MACH__POSIX_C_SOURCE_PRIVATE 1 /* pulls in suitable savearea from mach/ppc/thread_status.h */
36 #include <kern/thread.h>
37 #include <mach/thread_status.h>
38 #include <mach/vm_param.h>
39 #include <mach-o/loader.h>
40 #include <mach-o/nlist.h>
41 #include <libkern/kernel_mach_header.h>
42 #include <libkern/OSAtomic.h>
43
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/errno.h>
47 #include <sys/stat.h>
48 #include <sys/ioctl.h>
49 #include <sys/conf.h>
50 #include <sys/fcntl.h>
51 #include <miscfs/devfs/devfs.h>
52
53 #include <sys/dtrace.h>
54 #include <sys/dtrace_impl.h>
55 #include <sys/fbt.h>
56
57 #include <sys/dtrace_glue.h>
58
59 #define DTRACE_INVOP_NOP_SKIP 1
60 #define DTRACE_INVOP_MOVL_ESP_EBP 10
61 #define DTRACE_INVOP_MOVL_ESP_EBP_SKIP 2
62 #define DTRACE_INVOP_MOV_RSP_RBP 11
63 #define DTRACE_INVOP_MOV_RSP_RBP_SKIP 3
64 #define DTRACE_INVOP_POP_RBP 12
65 #define DTRACE_INVOP_POP_RBP_SKIP 1
66 #define DTRACE_INVOP_LEAVE_SKIP 1
67
68 #define FBT_PUSHL_EBP 0x55
69 #define FBT_MOVL_ESP_EBP0_V0 0x8b
70 #define FBT_MOVL_ESP_EBP1_V0 0xec
71 #define FBT_MOVL_ESP_EBP0_V1 0x89
72 #define FBT_MOVL_ESP_EBP1_V1 0xe5
73
74 #define FBT_PUSH_RBP 0x55
75 #define FBT_REX_RSP_RBP 0x48
76 #define FBT_MOV_RSP_RBP0 0x89
77 #define FBT_MOV_RSP_RBP1 0xe5
78 #define FBT_POP_RBP 0x5d
79
80 #define FBT_POPL_EBP 0x5d
81 #define FBT_RET 0xc3
82 #define FBT_RET_IMM16 0xc2
83 #define FBT_LEAVE 0xc9
84 #define FBT_JMP_SHORT_REL 0xeb /* Jump short, relative, displacement relative to next instr. */
85 #define FBT_JMP_NEAR_REL 0xe9 /* Jump near, relative, displacement relative to next instr. */
86 #define FBT_JMP_FAR_ABS 0xea /* Jump far, absolute, address given in operand */
87 #define FBT_RET_LEN 1
88 #define FBT_RET_IMM16_LEN 3
89 #define FBT_JMP_SHORT_REL_LEN 2
90 #define FBT_JMP_NEAR_REL_LEN 5
91 #define FBT_JMP_FAR_ABS_LEN 5
92
93 #define FBT_PATCHVAL 0xf0
94 #define FBT_AFRAMES_ENTRY 7
95 #define FBT_AFRAMES_RETURN 6
96
97 #define FBT_ENTRY "entry"
98 #define FBT_RETURN "return"
99 #define FBT_ADDR2NDX(addr) ((((uintptr_t)(addr)) >> 4) & fbt_probetab_mask)
100
101 extern dtrace_provider_id_t fbt_id;
102 extern fbt_probe_t **fbt_probetab;
103 extern int fbt_probetab_mask;
104
105 kern_return_t fbt_perfCallback(int, x86_saved_state_t *, uintptr_t *, __unused int);
106
107 int
108 fbt_invop(uintptr_t addr, uintptr_t *state, uintptr_t rval)
109 {
110 fbt_probe_t *fbt = fbt_probetab[FBT_ADDR2NDX(addr)];
111
112 for (; fbt != NULL; fbt = fbt->fbtp_hashnext) {
113 if ((uintptr_t)fbt->fbtp_patchpoint == addr) {
114
115 if (fbt->fbtp_roffset == 0) {
116 x86_saved_state64_t *regs = (x86_saved_state64_t *)state;
117
118 CPU->cpu_dtrace_caller = *(uintptr_t *)(((uintptr_t)(regs->isf.rsp))+sizeof(uint64_t)); // 8(%rsp)
119 /* 64-bit ABI, arguments passed in registers. */
120 dtrace_probe(fbt->fbtp_id, regs->rdi, regs->rsi, regs->rdx, regs->rcx, regs->r8);
121 CPU->cpu_dtrace_caller = 0;
122 } else {
123
124 dtrace_probe(fbt->fbtp_id, fbt->fbtp_roffset, rval, 0, 0, 0);
125 CPU->cpu_dtrace_caller = 0;
126 }
127
128 return (fbt->fbtp_rval);
129 }
130 }
131
132 return (0);
133 }
134
135 #define IS_USER_TRAP(regs) (regs && (((regs)->isf.cs & 3) != 0))
136 #define T_INVALID_OPCODE 6
137 #define FBT_EXCEPTION_CODE T_INVALID_OPCODE
138 #define T_PREEMPT 255
139
140 kern_return_t
141 fbt_perfCallback(
142 int trapno,
143 x86_saved_state_t *tagged_regs,
144 uintptr_t *lo_spp,
145 __unused int unused2)
146 {
147 kern_return_t retval = KERN_FAILURE;
148 x86_saved_state64_t *saved_state = saved_state64(tagged_regs);
149
150 if (FBT_EXCEPTION_CODE == trapno && !IS_USER_TRAP(saved_state)) {
151 boolean_t oldlevel;
152 uint64_t rsp_probe, fp, delta = 0;
153 uintptr_t old_sp;
154 uint32_t *pDst;
155 int emul;
156
157
158 oldlevel = ml_set_interrupts_enabled(FALSE);
159
160 /* Calculate where the stack pointer was when the probe instruction "fired." */
161 rsp_probe = saved_state->isf.rsp; /* Easy, x86_64 establishes this value in idt64.s */
162
163 __asm__ volatile(
164 "Ldtrace_invop_callsite_pre_label:\n"
165 ".data\n"
166 ".private_extern _dtrace_invop_callsite_pre\n"
167 "_dtrace_invop_callsite_pre:\n"
168 " .quad Ldtrace_invop_callsite_pre_label\n"
169 ".text\n"
170 );
171
172 emul = dtrace_invop( saved_state->isf.rip, (uintptr_t *)saved_state, saved_state->rax );
173
174 __asm__ volatile(
175 "Ldtrace_invop_callsite_post_label:\n"
176 ".data\n"
177 ".private_extern _dtrace_invop_callsite_post\n"
178 "_dtrace_invop_callsite_post:\n"
179 " .quad Ldtrace_invop_callsite_post_label\n"
180 ".text\n"
181 );
182
183 switch (emul) {
184 case DTRACE_INVOP_NOP:
185 saved_state->isf.rip += DTRACE_INVOP_NOP_SKIP; /* Skip over the patched NOP (planted by sdt). */
186 retval = KERN_SUCCESS;
187 break;
188
189 case DTRACE_INVOP_MOV_RSP_RBP:
190 saved_state->rbp = rsp_probe; /* Emulate patched mov %rsp,%rbp */
191 saved_state->isf.rip += DTRACE_INVOP_MOV_RSP_RBP_SKIP; /* Skip over the bytes of the patched mov %rsp,%rbp */
192 retval = KERN_SUCCESS;
193 break;
194
195 case DTRACE_INVOP_POP_RBP:
196 case DTRACE_INVOP_LEAVE:
197 /*
198 * Emulate first micro-op of patched leave: mov %rbp,%rsp
199 * fp points just below the return address slot for target's ret
200 * and at the slot holding the frame pointer saved by the target's prologue.
201 */
202 fp = saved_state->rbp;
203 /* Emulate second micro-op of patched leave: patched pop %rbp
204 * savearea rbp is set for the frame of the caller to target
205 * The *live* %rsp will be adjusted below for pop increment(s)
206 */
207 saved_state->rbp = *(uint64_t *)fp;
208 /* Skip over the patched leave */
209 saved_state->isf.rip += DTRACE_INVOP_LEAVE_SKIP;
210 /*
211 * Lift the stack to account for the emulated leave
212 * Account for words local in this frame
213 * (in "case DTRACE_INVOP_POPL_EBP:" this is zero.)
214 */
215 delta = ((uint32_t *)fp) - ((uint32_t *)rsp_probe); /* delta is a *word* increment */
216 /* Account for popping off the rbp (just accomplished by the emulation
217 * above...)
218 */
219 delta += 2;
220 saved_state->isf.rsp += (delta << 2);
221 /* Obtain the stack pointer recorded by the trampolines */
222 old_sp = *lo_spp;
223 /* Shift contents of stack */
224 for (pDst = (uint32_t *)fp;
225 pDst > (((uint32_t *)old_sp));
226 pDst--)
227 *pDst = pDst[-delta];
228
229 /* Track the stack lift in "saved_state". */
230 saved_state = (x86_saved_state64_t *) (((uintptr_t)saved_state) + (delta << 2));
231 /* Adjust the stack pointer utilized by the trampolines */
232 *lo_spp = old_sp + (delta << 2);
233
234 retval = KERN_SUCCESS;
235 break;
236
237 default:
238 retval = KERN_FAILURE;
239 break;
240 }
241
242 /* Trick trap_from_kernel into not attempting to handle pending AST_URGENT */
243 saved_state->isf.trapno = T_PREEMPT;
244
245 ml_set_interrupts_enabled(oldlevel);
246 }
247
248 return retval;
249 }
250
251 void
252 fbt_provide_probe(struct modctl *ctl, uintptr_t instrLow, uintptr_t instrHigh, char *modname, char* symbolName, machine_inst_t* symbolStart)
253 {
254 unsigned int j;
255 unsigned int doenable = 0;
256 dtrace_id_t thisid;
257
258 fbt_probe_t *newfbt, *retfbt, *entryfbt;
259 machine_inst_t *instr, *limit, theInstr, i1, i2, i3;
260 int size;
261
262 /*
263 * Guard against null symbols
264 */
265 if (!symbolStart || !instrLow || !instrHigh) {
266 kprintf("dtrace: %s has an invalid address\n", symbolName);
267 return;
268 }
269
270 for (j = 0, instr = symbolStart, theInstr = 0;
271 (j < 4) && ((uintptr_t)instr >= instrLow) && (instrHigh > (uintptr_t)(instr + 2));
272 j++) {
273 theInstr = instr[0];
274 if (theInstr == FBT_PUSH_RBP || theInstr == FBT_RET || theInstr == FBT_RET_IMM16)
275 break;
276
277 if ((size = dtrace_instr_size(instr)) <= 0)
278 break;
279
280 instr += size;
281 }
282
283 if (theInstr != FBT_PUSH_RBP)
284 return;
285
286 i1 = instr[1];
287 i2 = instr[2];
288 i3 = instr[3];
289
290 limit = (machine_inst_t *)instrHigh;
291
292 if (i1 == FBT_REX_RSP_RBP && i2 == FBT_MOV_RSP_RBP0 && i3 == FBT_MOV_RSP_RBP1) {
293 instr += 1; /* Advance to the mov %rsp,%rbp */
294 theInstr = i1;
295 } else {
296 return;
297 }
298 #if 0
299 else {
300 /*
301 * Sometimes, the compiler will schedule an intervening instruction
302 * in the function prologue. Example:
303 *
304 * _mach_vm_read:
305 * 000006d8 pushl %ebp
306 * 000006d9 movl $0x00000004,%edx
307 * 000006de movl %esp,%ebp
308 *
309 * Try the next instruction, to see if it is a movl %esp,%ebp
310 */
311
312 instr += 1; /* Advance past the pushl %ebp */
313 if ((size = dtrace_instr_size(instr)) <= 0)
314 return;
315
316 instr += size;
317
318 if ((instr + 1) >= limit)
319 return;
320
321 i1 = instr[0];
322 i2 = instr[1];
323
324 if (!(i1 == FBT_MOVL_ESP_EBP0_V0 && i2 == FBT_MOVL_ESP_EBP1_V0) &&
325 !(i1 == FBT_MOVL_ESP_EBP0_V1 && i2 == FBT_MOVL_ESP_EBP1_V1))
326 return;
327
328 /* instr already points at the movl %esp,%ebp */
329 theInstr = i1;
330 }
331 #endif
332 thisid = dtrace_probe_lookup(fbt_id, modname, symbolName, FBT_ENTRY);
333 newfbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP);
334 strlcpy( (char *)&(newfbt->fbtp_name), symbolName, MAX_FBTP_NAME_CHARS );
335
336 if (thisid != 0) {
337 /*
338 * The dtrace_probe previously existed, so we have to hook
339 * the newfbt entry onto the end of the existing fbt's chain.
340 * If we find an fbt entry that was previously patched to
341 * fire, (as indicated by the current patched value), then
342 * we want to enable this newfbt on the spot.
343 */
344 entryfbt = dtrace_probe_arg (fbt_id, thisid);
345 ASSERT (entryfbt != NULL);
346 for(; entryfbt != NULL; entryfbt = entryfbt->fbtp_next) {
347 if (entryfbt->fbtp_currentval == entryfbt->fbtp_patchval)
348 doenable++;
349
350 if (entryfbt->fbtp_next == NULL) {
351 entryfbt->fbtp_next = newfbt;
352 newfbt->fbtp_id = entryfbt->fbtp_id;
353 break;
354 }
355 }
356 }
357 else {
358 /*
359 * The dtrace_probe did not previously exist, so we
360 * create it and hook in the newfbt. Since the probe is
361 * new, we obviously do not need to enable it on the spot.
362 */
363 newfbt->fbtp_id = dtrace_probe_create(fbt_id, modname, symbolName, FBT_ENTRY, FBT_AFRAMES_ENTRY, newfbt);
364 doenable = 0;
365 }
366
367 newfbt->fbtp_patchpoint = instr;
368 newfbt->fbtp_ctl = ctl;
369 newfbt->fbtp_loadcnt = ctl->mod_loadcnt;
370 newfbt->fbtp_rval = DTRACE_INVOP_MOV_RSP_RBP;
371 newfbt->fbtp_savedval = theInstr;
372 newfbt->fbtp_patchval = FBT_PATCHVAL;
373 newfbt->fbtp_currentval = 0;
374 newfbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)];
375 fbt_probetab[FBT_ADDR2NDX(instr)] = newfbt;
376
377 if (doenable)
378 fbt_enable(NULL, newfbt->fbtp_id, newfbt);
379
380 /*
381 * The fbt entry chain is in place, one entry point per symbol.
382 * The fbt return chain can have multiple return points per symbol.
383 * Here we find the end of the fbt return chain.
384 */
385
386 doenable=0;
387
388 thisid = dtrace_probe_lookup(fbt_id, modname, symbolName, FBT_RETURN);
389 if (thisid != 0) {
390 /* The dtrace_probe previously existed, so we have to
391 * find the end of the existing fbt chain. If we find
392 * an fbt return that was previously patched to fire,
393 * (as indicated by the currrent patched value), then
394 * we want to enable any new fbts on the spot.
395 */
396 retfbt = dtrace_probe_arg (fbt_id, thisid);
397 ASSERT(retfbt != NULL);
398 for (; retfbt != NULL; retfbt = retfbt->fbtp_next) {
399 if (retfbt->fbtp_currentval == retfbt->fbtp_patchval)
400 doenable++;
401 if(retfbt->fbtp_next == NULL)
402 break;
403 }
404 }
405 else {
406 doenable = 0;
407 retfbt = NULL;
408 }
409
410 again:
411 if (instr >= limit)
412 return;
413
414 /*
415 * If this disassembly fails, then we've likely walked off into
416 * a jump table or some other unsuitable area. Bail out of the
417 * disassembly now.
418 */
419 if ((size = dtrace_instr_size(instr)) <= 0)
420 return;
421
422 /*
423 * We (desperately) want to avoid erroneously instrumenting a
424 * jump table, especially given that our markers are pretty
425 * short: two bytes on x86, and just one byte on amd64. To
426 * determine if we're looking at a true instruction sequence
427 * or an inline jump table that happens to contain the same
428 * byte sequences, we resort to some heuristic sleeze: we
429 * treat this instruction as being contained within a pointer,
430 * and see if that pointer points to within the body of the
431 * function. If it does, we refuse to instrument it.
432 */
433 for (j = 0; j < sizeof (uintptr_t); j++) {
434 uintptr_t check = (uintptr_t)instr - j;
435 uint8_t *ptr;
436
437 if (check < (uintptr_t)symbolStart)
438 break;
439
440 if (check + sizeof (uintptr_t) > (uintptr_t)limit)
441 continue;
442
443 ptr = *(uint8_t **)check;
444
445 if (ptr >= (uint8_t *)symbolStart && ptr < limit) {
446 instr += size;
447 goto again;
448 }
449 }
450
451 /*
452 * OK, it's an instruction.
453 */
454 theInstr = instr[0];
455
456 /* Walked onto the start of the next routine? If so, bail out of this function. */
457 if (theInstr == FBT_PUSH_RBP)
458 return;
459
460 if (!(size == 1 && (theInstr == FBT_POP_RBP || theInstr == FBT_LEAVE))) {
461 instr += size;
462 goto again;
463 }
464
465 /*
466 * Found the pop %rbp; or leave.
467 */
468 machine_inst_t *patch_instr = instr;
469
470 /*
471 * Scan forward for a "ret", or "jmp".
472 */
473 instr += size;
474 if (instr >= limit)
475 return;
476
477 size = dtrace_instr_size(instr);
478 if (size <= 0) /* Failed instruction decode? */
479 return;
480
481 theInstr = instr[0];
482
483 if (!(size == FBT_RET_LEN && (theInstr == FBT_RET)) &&
484 !(size == FBT_RET_IMM16_LEN && (theInstr == FBT_RET_IMM16)) &&
485 !(size == FBT_JMP_SHORT_REL_LEN && (theInstr == FBT_JMP_SHORT_REL)) &&
486 !(size == FBT_JMP_NEAR_REL_LEN && (theInstr == FBT_JMP_NEAR_REL)) &&
487 !(size == FBT_JMP_FAR_ABS_LEN && (theInstr == FBT_JMP_FAR_ABS)))
488 return;
489
490 /*
491 * pop %rbp; ret; or leave; ret; or leave; jmp tailCalledFun; -- We have a winner!
492 */
493 newfbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP);
494 strlcpy( (char *)&(newfbt->fbtp_name), symbolName, MAX_FBTP_NAME_CHARS );
495
496 if (retfbt == NULL) {
497 newfbt->fbtp_id = dtrace_probe_create(fbt_id, modname,
498 symbolName, FBT_RETURN, FBT_AFRAMES_RETURN, newfbt);
499 } else {
500 retfbt->fbtp_next = newfbt;
501 newfbt->fbtp_id = retfbt->fbtp_id;
502 }
503
504 retfbt = newfbt;
505 newfbt->fbtp_patchpoint = patch_instr;
506 newfbt->fbtp_ctl = ctl;
507 newfbt->fbtp_loadcnt = ctl->mod_loadcnt;
508
509 if (*patch_instr == FBT_POP_RBP) {
510 newfbt->fbtp_rval = DTRACE_INVOP_POP_RBP;
511 } else {
512 ASSERT(*patch_instr == FBT_LEAVE);
513 newfbt->fbtp_rval = DTRACE_INVOP_LEAVE;
514 }
515 newfbt->fbtp_roffset =
516 (uintptr_t)(patch_instr - (uint8_t *)symbolStart);
517
518 newfbt->fbtp_savedval = *patch_instr;
519 newfbt->fbtp_patchval = FBT_PATCHVAL;
520 newfbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(patch_instr)];
521 fbt_probetab[FBT_ADDR2NDX(patch_instr)] = newfbt;
522
523 if (doenable)
524 fbt_enable(NULL, newfbt->fbtp_id, newfbt);
525
526 instr += size;
527 goto again;
528 }
529
530 void
531 fbt_provide_module_kernel_syms(struct modctl *ctl)
532 {
533 kernel_mach_header_t *mh;
534 struct load_command *cmd;
535 kernel_segment_command_t *orig_ts = NULL, *orig_le = NULL;
536 struct symtab_command *orig_st = NULL;
537 kernel_nlist_t *sym = NULL;
538 char *strings;
539 uintptr_t instrLow, instrHigh;
540 char *modname;
541 unsigned int i;
542
543 mh = (kernel_mach_header_t *)(ctl->mod_address);
544 modname = ctl->mod_modname;
545
546 if (mh->magic != MH_MAGIC_KERNEL)
547 return;
548
549 cmd = (struct load_command *) &mh[1];
550 for (i = 0; i < mh->ncmds; i++) {
551 if (cmd->cmd == LC_SEGMENT_KERNEL) {
552 kernel_segment_command_t *orig_sg = (kernel_segment_command_t *) cmd;
553
554 if (LIT_STRNEQL(orig_sg->segname, SEG_TEXT))
555 orig_ts = orig_sg;
556 else if (LIT_STRNEQL(orig_sg->segname, SEG_LINKEDIT))
557 orig_le = orig_sg;
558 else if (LIT_STRNEQL(orig_sg->segname, ""))
559 orig_ts = orig_sg; /* kexts have a single unnamed segment */
560 }
561 else if (cmd->cmd == LC_SYMTAB)
562 orig_st = (struct symtab_command *) cmd;
563
564 cmd = (struct load_command *) ((caddr_t) cmd + cmd->cmdsize);
565 }
566
567 if ((orig_ts == NULL) || (orig_st == NULL) || (orig_le == NULL))
568 return;
569
570 sym = (kernel_nlist_t *)(orig_le->vmaddr + orig_st->symoff - orig_le->fileoff);
571 strings = (char *)(orig_le->vmaddr + orig_st->stroff - orig_le->fileoff);
572
573 /* Find extent of the TEXT section */
574 instrLow = (uintptr_t)orig_ts->vmaddr;
575 instrHigh = (uintptr_t)(orig_ts->vmaddr + orig_ts->vmsize);
576
577 for (i = 0; i < orig_st->nsyms; i++) {
578 uint8_t n_type = sym[i].n_type & (N_TYPE | N_EXT);
579 char *name = strings + sym[i].n_un.n_strx;
580
581 /* Check that the symbol is a global and that it has a name. */
582 if (((N_SECT | N_EXT) != n_type && (N_ABS | N_EXT) != n_type))
583 continue;
584
585 if (0 == sym[i].n_un.n_strx) /* iff a null, "", name. */
586 continue;
587
588 /* Lop off omnipresent leading underscore. */
589 if (*name == '_')
590 name += 1;
591
592 /*
593 * We're only blacklisting functions in the kernel for now.
594 */
595 if (MOD_IS_MACH_KERNEL(ctl) && fbt_excluded(name))
596 continue;
597
598 fbt_provide_probe(ctl, instrLow, instrHigh, modname, name, (machine_inst_t*)sym[i].n_value);
599 }
600 }