]> git.saurik.com Git - apple/xnu.git/blame - bsd/dev/i386/fbt_x86.c
xnu-1486.2.11.tar.gz
[apple/xnu.git] / bsd / dev / i386 / fbt_x86.c
CommitLineData
2d21ac55
A
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22/*
23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27/* #pragma ident "@(#)fbt.c 1.15 05/09/19 SMI" */
28
29#ifdef KERNEL
30#ifndef _KERNEL
31#define _KERNEL /* Solaris vs. Darwin */
32#endif
33#endif
34
35#define MACH__POSIX_C_SOURCE_PRIVATE 1 /* pulls in suitable savearea from mach/ppc/thread_status.h */
36#include <kern/thread.h>
37#include <mach/thread_status.h>
38#include <mach/vm_param.h>
39#include <mach-o/loader.h>
40#include <mach-o/nlist.h>
b0d623f7 41#include <libkern/kernel_mach_header.h>
2d21ac55
A
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/errno.h>
46#include <sys/stat.h>
47#include <sys/ioctl.h>
48#include <sys/conf.h>
49#include <sys/fcntl.h>
50#include <miscfs/devfs/devfs.h>
51
52#include <sys/dtrace.h>
53#include <sys/dtrace_impl.h>
54#include <sys/fbt.h>
55
56#include <sys/dtrace_glue.h>
57
58#define DTRACE_INVOP_NOP_SKIP 1
59#define DTRACE_INVOP_MOVL_ESP_EBP 10
60#define DTRACE_INVOP_MOVL_ESP_EBP_SKIP 2
b0d623f7
A
61#define DTRACE_INVOP_MOV_RSP_RBP 11
62#define DTRACE_INVOP_MOV_RSP_RBP_SKIP 3
63#define DTRACE_INVOP_POP_RBP 12
64#define DTRACE_INVOP_POP_RBP_SKIP 1
2d21ac55
A
65#define DTRACE_INVOP_LEAVE_SKIP 1
66
67#define FBT_PUSHL_EBP 0x55
68#define FBT_MOVL_ESP_EBP0_V0 0x8b
69#define FBT_MOVL_ESP_EBP1_V0 0xec
70#define FBT_MOVL_ESP_EBP0_V1 0x89
71#define FBT_MOVL_ESP_EBP1_V1 0xe5
b0d623f7
A
72
73#define FBT_PUSH_RBP 0x55
2d21ac55 74#define FBT_REX_RSP_RBP 0x48
b0d623f7
A
75#define FBT_MOV_RSP_RBP0 0x89
76#define FBT_MOV_RSP_RBP1 0xe5
77#define FBT_POP_RBP 0x5d
2d21ac55
A
78
79#define FBT_POPL_EBP 0x5d
80#define FBT_RET 0xc3
81#define FBT_RET_IMM16 0xc2
82#define FBT_LEAVE 0xc9
83#define FBT_JMP_SHORT_REL 0xeb /* Jump short, relative, displacement relative to next instr. */
84#define FBT_JMP_NEAR_REL 0xe9 /* Jump near, relative, displacement relative to next instr. */
85#define FBT_JMP_FAR_ABS 0xea /* Jump far, absolute, address given in operand */
86#define FBT_RET_LEN 1
87#define FBT_RET_IMM16_LEN 3
88#define FBT_JMP_SHORT_REL_LEN 2
89#define FBT_JMP_NEAR_REL_LEN 5
90#define FBT_JMP_FAR_ABS_LEN 5
91
92#define FBT_PATCHVAL 0xf0
93#define FBT_AFRAMES_ENTRY 7
94#define FBT_AFRAMES_RETURN 6
95
96#define FBT_ENTRY "entry"
97#define FBT_RETURN "return"
98#define FBT_ADDR2NDX(addr) ((((uintptr_t)(addr)) >> 4) & fbt_probetab_mask)
99
100extern dtrace_provider_id_t fbt_id;
101extern fbt_probe_t **fbt_probetab;
102extern int fbt_probetab_mask;
103
b0d623f7
A
104kern_return_t fbt_perfCallback(int, x86_saved_state_t *, __unused int, __unused int);
105
2d21ac55
A
106/*
107 * Critical routines that must not be probed. PR_5221096, PR_5379018.
b0d623f7 108 * The blacklist must be kept in alphabetic order for purposes of bsearch().
2d21ac55
A
109 */
110
111static const char * critical_blacklist[] =
112{
113 "bcopy_phys",
114 "console_cpu_alloc",
115 "console_cpu_free",
116 "cpu_IA32e_disable",
117 "cpu_IA32e_enable",
b0d623f7 118 "cpu_NMI_interrupt",
2d21ac55
A
119 "cpu_control",
120 "cpu_data_alloc",
b0d623f7
A
121 "cpu_desc_init",
122 "cpu_desc_init64",
123 "cpu_desc_load",
124 "cpu_desc_load64",
2d21ac55
A
125 "cpu_exit_wait",
126 "cpu_info",
127 "cpu_info_count",
128 "cpu_init",
129 "cpu_interrupt",
130 "cpu_machine_init",
131 "cpu_mode_init",
132 "cpu_processor_alloc",
133 "cpu_processor_free",
134 "cpu_signal_handler",
135 "cpu_sleep",
136 "cpu_start",
137 "cpu_subtype",
138 "cpu_thread_alloc",
139 "cpu_thread_halt",
140 "cpu_thread_init",
141 "cpu_threadtype",
142 "cpu_to_processor",
b0d623f7
A
143 "cpu_topology_sort",
144 "cpu_topology_start_cpu",
2d21ac55 145 "cpu_type",
2d21ac55
A
146 "cpuid_cpu_display",
147 "handle_pending_TLB_flushes",
148 "hw_compare_and_store",
149 "machine_idle_cstate",
150 "mca_cpu_alloc",
151 "mca_cpu_init",
152 "ml_nofault_copy",
153 "pmap_cpu_alloc",
154 "pmap_cpu_free",
155 "pmap_cpu_high_map_vaddr",
156 "pmap_cpu_high_shared_remap",
157 "pmap_cpu_init",
2d21ac55 158 "register_cpu_setup_func",
b0d623f7
A
159 "unregister_cpu_setup_func",
160 "vstart"
2d21ac55
A
161};
162#define CRITICAL_BLACKLIST_COUNT (sizeof(critical_blacklist)/sizeof(critical_blacklist[0]))
163
164/*
165 * The transitive closure of entry points that can be reached from probe context.
b0d623f7 166 * (Apart from routines whose names begin with dtrace_).
2d21ac55
A
167 */
168static const char * probe_ctx_closure[] =
169{
170 "Debugger",
b0d623f7 171 "IS_64BIT_PROCESS",
2d21ac55
A
172 "OSCompareAndSwap",
173 "absolutetime_to_microtime",
174 "ast_pending",
b0d623f7 175 "astbsd_on",
2d21ac55
A
176 "clock_get_calendar_nanotime_nowait",
177 "copyin",
178 "copyin_user",
179 "copyinstr",
180 "copyout",
181 "copyoutstr",
182 "cpu_number",
183 "current_proc",
184 "current_processor",
185 "current_task",
186 "current_thread",
187 "debug_enter",
188 "find_user_regs",
189 "flush_tlb64",
190 "get_bsdtask_info",
191 "get_bsdthread_info",
192 "hw_atomic_and",
193 "kauth_cred_get",
194 "kauth_getgid",
195 "kauth_getuid",
196 "kernel_preempt_check",
197 "mach_absolute_time",
198 "max_valid_stack_address",
199 "ml_at_interrupt_context",
200 "ml_phys_write_byte_64",
201 "ml_phys_write_half_64",
202 "ml_phys_write_word_64",
203 "ml_set_interrupts_enabled",
204 "panic",
205 "pmap64_pde",
206 "pmap64_pdpt",
207 "pmap_find_phys",
208 "pmap_get_mapwindow",
209 "pmap_pde",
210 "pmap_pte",
211 "pmap_put_mapwindow",
212 "pmap_valid_page",
213 "prf",
214 "proc_is64bit",
215 "proc_selfname",
216 "proc_selfpid",
b0d623f7 217 "proc_selfppid",
2d21ac55
A
218 "psignal_lock",
219 "rtc_nanotime_load",
220 "rtc_nanotime_read",
b0d623f7 221 "sdt_getargdesc",
2d21ac55
A
222 "strlcpy",
223 "sync_iss_to_iks_unconditionally",
b0d623f7 224 "systrace_stub",
2d21ac55
A
225 "timer_grab"
226};
227#define PROBE_CTX_CLOSURE_COUNT (sizeof(probe_ctx_closure)/sizeof(probe_ctx_closure[0]))
228
229
230static int _cmp(const void *a, const void *b)
231{
b0d623f7 232 return strncmp((const char *)a, *(const char **)b, strlen((const char *)a) + 1);
2d21ac55
A
233}
234
235static const void * bsearch(
236 register const void *key,
237 const void *base0,
238 size_t nmemb,
239 register size_t size,
240 register int (*compar)(const void *, const void *)) {
241
242 register const char *base = base0;
243 register size_t lim;
244 register int cmp;
245 register const void *p;
246
247 for (lim = nmemb; lim != 0; lim >>= 1) {
248 p = base + (lim >> 1) * size;
249 cmp = (*compar)(key, p);
250 if (cmp == 0)
251 return p;
252 if (cmp > 0) { /* key > p: move right */
253 base = (const char *)p + size;
254 lim--;
255 } /* else move left */
256 }
257 return (NULL);
258}
259
b0d623f7 260#if defined(__i386__)
2d21ac55
A
261int
262fbt_invop(uintptr_t addr, uintptr_t *stack, uintptr_t rval)
263{
264 uintptr_t stack0 = 0, stack1 = 0, stack2 = 0, stack3 = 0, stack4 = 0;
265 fbt_probe_t *fbt = fbt_probetab[FBT_ADDR2NDX(addr)];
266
267 for (; fbt != NULL; fbt = fbt->fbtp_hashnext) {
268 if ((uintptr_t)fbt->fbtp_patchpoint == addr) {
269
270 if (fbt->fbtp_roffset == 0) {
271 uintptr_t *stacktop;
272 if (CPU_ON_INTR(CPU))
273 stacktop = (uintptr_t *)dtrace_get_cpu_int_stack_top();
274 else
b0d623f7 275 stacktop = (uintptr_t *)(dtrace_get_kernel_stack(current_thread()) + kernel_stack_size);
2d21ac55
A
276
277 stack += 1; /* skip over the target's pushl'd %ebp */
278
279 if (stack <= stacktop)
280 CPU->cpu_dtrace_caller = *stack++;
281 if (stack <= stacktop)
282 stack0 = *stack++;
283 if (stack <= stacktop)
284 stack1 = *stack++;
285 if (stack <= stacktop)
286 stack2 = *stack++;
287 if (stack <= stacktop)
288 stack3 = *stack++;
289 if (stack <= stacktop)
290 stack4 = *stack++;
291
b0d623f7 292 /* 32-bit ABI, arguments passed on stack. */
2d21ac55
A
293 dtrace_probe(fbt->fbtp_id, stack0, stack1, stack2, stack3, stack4);
294 CPU->cpu_dtrace_caller = 0;
295 } else {
296 dtrace_probe(fbt->fbtp_id, fbt->fbtp_roffset, rval, 0, 0, 0);
297 CPU->cpu_dtrace_caller = 0;
298 }
299
300 return (fbt->fbtp_rval);
301 }
302 }
303
304 return (0);
305}
306
307#define IS_USER_TRAP(regs) (regs && (((regs)->cs & 3) != 0))
308#define T_INVALID_OPCODE 6
309#define FBT_EXCEPTION_CODE T_INVALID_OPCODE
b0d623f7 310#define T_PREEMPT 255
2d21ac55
A
311
312kern_return_t
313fbt_perfCallback(
314 int trapno,
315 x86_saved_state_t *tagged_regs,
316 __unused int unused1,
317 __unused int unused2)
318{
319 kern_return_t retval = KERN_FAILURE;
320 x86_saved_state32_t *saved_state = saved_state32(tagged_regs);
321 struct x86_saved_state32_from_kernel *regs = (struct x86_saved_state32_from_kernel *)saved_state;
322
323 if (FBT_EXCEPTION_CODE == trapno && !IS_USER_TRAP(saved_state)) {
324 boolean_t oldlevel, cpu_64bit;
325 uint32_t esp_probe, *ebp, edi, fp, *pDst, delta = 0;
326 int emul;
327
328 cpu_64bit = ml_is64bit();
329 oldlevel = ml_set_interrupts_enabled(FALSE);
330
331 /* Calculate where the stack pointer was when the probe instruction "fired." */
332 if (cpu_64bit) {
333 esp_probe = saved_state->uesp; /* Easy, x86_64 establishes this value in idt64.s */
334 } else {
335 esp_probe = (uint32_t)&(regs[1]); /* Nasty, infer the location above the save area */
336 }
337
338 emul = dtrace_invop( saved_state->eip, (uintptr_t *)esp_probe, saved_state->eax );
339 __asm__ volatile(".globl _dtrace_invop_callsite");
340 __asm__ volatile("_dtrace_invop_callsite:");
341
342 switch (emul) {
343 case DTRACE_INVOP_NOP:
b0d623f7 344 saved_state->eip += DTRACE_INVOP_NOP_SKIP; /* Skip over the patched NOP (planted by sdt.) */
2d21ac55
A
345 retval = KERN_SUCCESS;
346 break;
347
348 case DTRACE_INVOP_MOVL_ESP_EBP:
349 saved_state->ebp = esp_probe; /* Emulate patched movl %esp,%ebp */
350 saved_state->eip += DTRACE_INVOP_MOVL_ESP_EBP_SKIP; /* Skip over the bytes of the patched movl %esp,%ebp */
351 retval = KERN_SUCCESS;
352 break;
353
354 case DTRACE_INVOP_POPL_EBP:
355 case DTRACE_INVOP_LEAVE:
356/*
357 * Emulate first micro-op of patched leave: movl %ebp,%esp
358 * fp points just below the return address slot for target's ret
359 * and at the slot holding the frame pointer saved by the target's prologue.
360 */
361 fp = saved_state->ebp;
362/* Emulate second micro-op of patched leave: patched popl %ebp
363 * savearea ebp is set for the frame of the caller to target
364 * The *live* %esp will be adjusted below for pop increment(s)
365 */
366 saved_state->ebp = *(uint32_t *)fp;
367/* Skip over the patched leave */
368 saved_state->eip += DTRACE_INVOP_LEAVE_SKIP;
369/*
370 * Lift the stack to account for the emulated leave
371 * Account for words local in this frame
372 * (in "case DTRACE_INVOP_POPL_EBP:" this is zero.)
373 */
374 delta = ((uint32_t *)fp) - ((uint32_t *)esp_probe);
375/* Account for popping off the ebp (just accomplished by the emulation
376 * above...)
377 */
378 delta += 1;
379
380 if (cpu_64bit)
381 saved_state->uesp += (delta << 2);
382
383/* XXX Fragile in the extreme. Obtain the value of %edi that our caller pushed
384 * (on behalf of its caller -- trap_from_kernel()). Ultimately,
385 * trap_from_kernel's stack pointer is restored from this slot.
386 * This is sensitive to the manner in which the compiler preserves %edi,
387 * and trap_from_kernel()'s internals.
388 */
389 ebp = (uint32_t *)__builtin_frame_address(0);
390 ebp = (uint32_t *)*ebp;
391 edi = *(ebp - 1);
392/* Shift contents of stack */
393 for (pDst = (uint32_t *)fp;
394 pDst > (((uint32_t *)edi));
395 pDst--)
396 *pDst = pDst[-delta];
b0d623f7
A
397
398/* Track the stack lift in "saved_state". */
399 saved_state = (x86_saved_state32_t *) (((uintptr_t)saved_state) + (delta << 2));
400
2d21ac55
A
401/* Now adjust the value of %edi in our caller (kernel_trap)'s frame */
402 *(ebp - 1) = edi + (delta << 2);
403
404 retval = KERN_SUCCESS;
405 break;
406
407 default:
408 retval = KERN_FAILURE;
409 break;
410 }
b0d623f7
A
411 saved_state->trapno = T_PREEMPT; /* Avoid call to i386_astintr()! */
412
2d21ac55
A
413 ml_set_interrupts_enabled(oldlevel);
414 }
415
416 return retval;
417}
418
419/*ARGSUSED*/
420static void
421__fbt_provide_module(void *arg, struct modctl *ctl)
422{
423#pragma unused(arg)
b0d623f7 424 kernel_mach_header_t *mh;
2d21ac55 425 struct load_command *cmd;
b0d623f7 426 kernel_segment_command_t *orig_ts = NULL, *orig_le = NULL;
2d21ac55
A
427 struct symtab_command *orig_st = NULL;
428 struct nlist *sym = NULL;
429 char *strings;
430 uintptr_t instrLow, instrHigh;
431 char *modname;
b0d623f7 432 unsigned int i, j;
2d21ac55
A
433
434 int gIgnoreFBTBlacklist = 0;
593a1d5f 435 PE_parse_boot_argn("IgnoreFBTBlacklist", &gIgnoreFBTBlacklist, sizeof (gIgnoreFBTBlacklist));
2d21ac55 436
b0d623f7 437 mh = (kernel_mach_header_t *)(ctl->address);
2d21ac55
A
438 modname = ctl->mod_modname;
439
440 if (0 == ctl->address || 0 == ctl->size) /* Has the linker been jettisoned? */
441 return;
442
443 /*
444 * Employees of dtrace and their families are ineligible. Void
445 * where prohibited.
446 */
447
b0d623f7 448 if (LIT_STRNEQL(modname, "com.apple.driver.dtrace"))
2d21ac55
A
449 return;
450
451 if (strstr(modname, "CHUD") != NULL)
452 return;
453
454 if (mh->magic != MH_MAGIC)
455 return;
456
457 cmd = (struct load_command *) &mh[1];
458 for (i = 0; i < mh->ncmds; i++) {
b0d623f7
A
459 if (cmd->cmd == LC_SEGMENT_KERNEL) {
460 kernel_segment_command_t *orig_sg = (kernel_segment_command_t *) cmd;
2d21ac55 461
b0d623f7 462 if (LIT_STRNEQL(orig_sg->segname, SEG_TEXT))
2d21ac55 463 orig_ts = orig_sg;
b0d623f7 464 else if (LIT_STRNEQL(orig_sg->segname, SEG_LINKEDIT))
2d21ac55 465 orig_le = orig_sg;
b0d623f7 466 else if (LIT_STRNEQL(orig_sg->segname, ""))
2d21ac55
A
467 orig_ts = orig_sg; /* kexts have a single unnamed segment */
468 }
469 else if (cmd->cmd == LC_SYMTAB)
470 orig_st = (struct symtab_command *) cmd;
471
472 cmd = (struct load_command *) ((caddr_t) cmd + cmd->cmdsize);
473 }
474
475 if ((orig_ts == NULL) || (orig_st == NULL) || (orig_le == NULL))
476 return;
477
b0d623f7
A
478 sym = (struct nlist *)(orig_le->vmaddr + orig_st->symoff - orig_le->fileoff);
479 strings = (char *)(orig_le->vmaddr + orig_st->stroff - orig_le->fileoff);
2d21ac55
A
480
481 /* Find extent of the TEXT section */
482 instrLow = (uintptr_t)orig_ts->vmaddr;
483 instrHigh = (uintptr_t)(orig_ts->vmaddr + orig_ts->vmsize);
484
485 for (i = 0; i < orig_st->nsyms; i++) {
486 fbt_probe_t *fbt, *retfbt;
487 machine_inst_t *instr, *limit, theInstr, i1, i2;
488 uint8_t n_type = sym[i].n_type & (N_TYPE | N_EXT);
489 char *name = strings + sym[i].n_un.n_strx;
490 int size;
491
492 /* Check that the symbol is a global and that it has a name. */
493 if (((N_SECT | N_EXT) != n_type && (N_ABS | N_EXT) != n_type))
494 continue;
495
496 if (0 == sym[i].n_un.n_strx) /* iff a null, "", name. */
497 continue;
498
499 /* Lop off omnipresent leading underscore. */
500 if (*name == '_')
501 name += 1;
502
b0d623f7 503 if (LIT_STRNSTART(name, "dtrace_") && !LIT_STRNSTART(name, "dtrace_safe_")) {
2d21ac55
A
504 /*
505 * Anything beginning with "dtrace_" may be called
506 * from probe context unless it explitly indicates
507 * that it won't be called from probe context by
508 * using the prefix "dtrace_safe_".
509 */
510 continue;
511 }
512
b0d623f7 513 if (LIT_STRNSTART(name, "dsmos_"))
2d21ac55
A
514 continue; /* Don't Steal Mac OS X! */
515
b0d623f7 516 if (LIT_STRNSTART(name, "_dtrace"))
2d21ac55
A
517 continue; /* Shims in dtrace.c */
518
b0d623f7 519 if (LIT_STRNSTART(name, "chud"))
2d21ac55
A
520 continue; /* Professional courtesy. */
521
b0d623f7 522 if (LIT_STRNSTART(name, "hibernate_"))
2d21ac55
A
523 continue; /* Let sleeping dogs lie. */
524
b0d623f7
A
525 if (LIT_STRNEQL(name, "_ZN9IOService14newTemperatureElPS_") || /* IOService::newTemperature */
526 LIT_STRNEQL(name, "_ZN9IOService26temperatureCriticalForZoneEPS_")) /* IOService::temperatureCriticalForZone */
2d21ac55
A
527 continue; /* Per the fire code */
528
529 /*
530 * Place no probes (illegal instructions) in the exception handling path!
531 */
b0d623f7
A
532 if (LIT_STRNEQL(name, "t_invop") ||
533 LIT_STRNEQL(name, "enter_lohandler") ||
534 LIT_STRNEQL(name, "lo_alltraps") ||
535 LIT_STRNEQL(name, "kernel_trap") ||
536 LIT_STRNEQL(name, "interrupt") ||
537 LIT_STRNEQL(name, "i386_astintr"))
2d21ac55
A
538 continue;
539
b0d623f7
A
540 if (LIT_STRNEQL(name, "current_thread") ||
541 LIT_STRNEQL(name, "ast_pending") ||
542 LIT_STRNEQL(name, "fbt_perfCallback") ||
543 LIT_STRNEQL(name, "machine_thread_get_kern_state") ||
544 LIT_STRNEQL(name, "get_threadtask") ||
545 LIT_STRNEQL(name, "ml_set_interrupts_enabled") ||
546 LIT_STRNEQL(name, "dtrace_invop") ||
547 LIT_STRNEQL(name, "fbt_invop") ||
548 LIT_STRNEQL(name, "sdt_invop") ||
549 LIT_STRNEQL(name, "max_valid_stack_address"))
2d21ac55
A
550 continue;
551
552 /*
553 * Voodoo.
554 */
b0d623f7
A
555 if (LIT_STRNSTART(name, "machine_stack_") ||
556 LIT_STRNSTART(name, "mapping_") ||
557 LIT_STRNEQL(name, "tmrCvt") ||
558
559 LIT_STRNSTART(name, "tsc_") ||
560
561 LIT_STRNSTART(name, "pmCPU") ||
562 LIT_STRNEQL(name, "pmKextRegister") ||
563 LIT_STRNEQL(name, "pmMarkAllCPUsOff") ||
564 LIT_STRNEQL(name, "pmSafeMode") ||
565 LIT_STRNEQL(name, "pmTimerSave") ||
566 LIT_STRNEQL(name, "pmTimerRestore") ||
567 LIT_STRNEQL(name, "pmUnRegister") ||
568 LIT_STRNSTART(name, "pms") ||
569 LIT_STRNEQL(name, "power_management_init") ||
570 LIT_STRNSTART(name, "usimple_") ||
571 LIT_STRNEQL(name, "lck_spin_lock") ||
572 LIT_STRNEQL(name, "lck_spin_unlock") ||
573
574 LIT_STRNSTART(name, "rtc_") ||
575 LIT_STRNSTART(name, "_rtc_") ||
576 LIT_STRNSTART(name, "rtclock_") ||
577 LIT_STRNSTART(name, "clock_") ||
578 LIT_STRNSTART(name, "absolutetime_to_") ||
579 LIT_STRNEQL(name, "setPop") ||
580 LIT_STRNEQL(name, "nanoseconds_to_absolutetime") ||
581 LIT_STRNEQL(name, "nanotime_to_absolutetime") ||
582
583 LIT_STRNSTART(name, "etimer_") ||
584
585 LIT_STRNSTART(name, "commpage_") ||
586 LIT_STRNSTART(name, "pmap_") ||
587 LIT_STRNSTART(name, "ml_") ||
588 LIT_STRNSTART(name, "PE_") ||
589 LIT_STRNEQL(name, "kprintf") ||
590 LIT_STRNSTART(name, "lapic_") ||
591 LIT_STRNSTART(name, "acpi_"))
2d21ac55
A
592 continue;
593
594 /*
595 * Avoid machine_ routines. PR_5346750.
596 */
b0d623f7 597 if (LIT_STRNSTART(name, "machine_"))
2d21ac55
A
598 continue;
599
b0d623f7 600 if (LIT_STRNEQL(name, "handle_pending_TLB_flushes"))
2d21ac55
A
601 continue;
602
603 /*
604 * Place no probes on critical routines. PR_5221096
605 */
606 if (!gIgnoreFBTBlacklist &&
607 bsearch( name, critical_blacklist, CRITICAL_BLACKLIST_COUNT, sizeof(name), _cmp ) != NULL)
608 continue;
609
610 /*
611 * Place no probes that could be hit in probe context.
612 */
613 if (!gIgnoreFBTBlacklist &&
614 bsearch( name, probe_ctx_closure, PROBE_CTX_CLOSURE_COUNT, sizeof(name), _cmp ) != NULL)
615 continue;
616
617 /*
618 * Place no probes that could be hit on the way to the debugger.
619 */
b0d623f7
A
620 if (LIT_STRNSTART(name, "kdp_") ||
621 LIT_STRNSTART(name, "kdb_") ||
622 LIT_STRNSTART(name, "kdbg_") ||
623 LIT_STRNSTART(name, "kdebug_") ||
624 LIT_STRNEQL(name, "kernel_debug") ||
625 LIT_STRNEQL(name, "Debugger") ||
626 LIT_STRNEQL(name, "Call_DebuggerC") ||
627 LIT_STRNEQL(name, "lock_debugger") ||
628 LIT_STRNEQL(name, "unlock_debugger") ||
629 LIT_STRNEQL(name, "SysChoked"))
2d21ac55
A
630 continue;
631
632 /*
633 * Place no probes that could be hit on the way to a panic.
634 */
635 if (NULL != strstr(name, "panic_") ||
b0d623f7
A
636 LIT_STRNEQL(name, "panic") ||
637 LIT_STRNEQL(name, "handleMck") ||
638 LIT_STRNEQL(name, "unresolved_kernel_trap"))
2d21ac55
A
639 continue;
640
641 if (dtrace_probe_lookup(fbt_id, modname, name, NULL) != 0)
642 continue;
643
644 for (j = 0, instr = (machine_inst_t *)sym[i].n_value, theInstr = 0;
645 (j < 4) && ((uintptr_t)instr >= instrLow) && (instrHigh > (uintptr_t)(instr + 2));
646 j++) {
647 theInstr = instr[0];
648 if (theInstr == FBT_PUSHL_EBP || theInstr == FBT_RET || theInstr == FBT_RET_IMM16)
649 break;
650
651 if ((size = dtrace_instr_size(instr)) <= 0)
652 break;
653
654 instr += size;
655 }
656
657 if (theInstr != FBT_PUSHL_EBP)
658 continue;
659
660 i1 = instr[1];
661 i2 = instr[2];
662
663 limit = (machine_inst_t *)instrHigh;
664
665 if ((i1 == FBT_MOVL_ESP_EBP0_V0 && i2 == FBT_MOVL_ESP_EBP1_V0) ||
666 (i1 == FBT_MOVL_ESP_EBP0_V1 && i2 == FBT_MOVL_ESP_EBP1_V1)) {
667 instr += 1; /* Advance to the movl %esp,%ebp */
668 theInstr = i1;
669 } else {
670 /*
671 * Sometimes, the compiler will schedule an intervening instruction
672 * in the function prologue. Example:
673 *
674 * _mach_vm_read:
675 * 000006d8 pushl %ebp
676 * 000006d9 movl $0x00000004,%edx
677 * 000006de movl %esp,%ebp
678 *
679 * Try the next instruction, to see if it is a movl %esp,%ebp
680 */
681
682 instr += 1; /* Advance past the pushl %ebp */
683 if ((size = dtrace_instr_size(instr)) <= 0)
684 continue;
685
686 instr += size;
687
688 if ((instr + 1) >= limit)
689 continue;
690
691 i1 = instr[0];
692 i2 = instr[1];
693
694 if (!(i1 == FBT_MOVL_ESP_EBP0_V0 && i2 == FBT_MOVL_ESP_EBP1_V0) &&
695 !(i1 == FBT_MOVL_ESP_EBP0_V1 && i2 == FBT_MOVL_ESP_EBP1_V1))
696 continue;
697
698 /* instr already points at the movl %esp,%ebp */
699 theInstr = i1;
700 }
701
702 fbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP);
703 strlcpy( (char *)&(fbt->fbtp_name), name, MAX_FBTP_NAME_CHARS );
704 fbt->fbtp_id = dtrace_probe_create(fbt_id, modname, name, FBT_ENTRY, FBT_AFRAMES_ENTRY, fbt);
705 fbt->fbtp_patchpoint = instr;
706 fbt->fbtp_ctl = ctl;
707 fbt->fbtp_loadcnt = ctl->mod_loadcnt;
708 fbt->fbtp_rval = DTRACE_INVOP_MOVL_ESP_EBP;
709 fbt->fbtp_savedval = theInstr;
710 fbt->fbtp_patchval = FBT_PATCHVAL;
711
712 fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)];
713 fbt->fbtp_symndx = i;
714 fbt_probetab[FBT_ADDR2NDX(instr)] = fbt;
715
716 retfbt = NULL;
717again:
718 if (instr >= limit)
719 continue;
720
721 /*
722 * If this disassembly fails, then we've likely walked off into
723 * a jump table or some other unsuitable area. Bail out of the
724 * disassembly now.
725 */
726 if ((size = dtrace_instr_size(instr)) <= 0)
727 continue;
728
729 /*
730 * We (desperately) want to avoid erroneously instrumenting a
731 * jump table, especially given that our markers are pretty
732 * short: two bytes on x86, and just one byte on amd64. To
733 * determine if we're looking at a true instruction sequence
734 * or an inline jump table that happens to contain the same
735 * byte sequences, we resort to some heuristic sleeze: we
736 * treat this instruction as being contained within a pointer,
737 * and see if that pointer points to within the body of the
738 * function. If it does, we refuse to instrument it.
739 */
740 for (j = 0; j < sizeof (uintptr_t); j++) {
741 uintptr_t check = (uintptr_t)instr - j;
742 uint8_t *ptr;
743
744 if (check < sym[i].n_value)
745 break;
746
747 if (check + sizeof (uintptr_t) > (uintptr_t)limit)
748 continue;
749
750 ptr = *(uint8_t **)check;
751
752 if (ptr >= (uint8_t *)sym[i].n_value && ptr < limit) {
753 instr += size;
754 goto again;
755 }
756 }
757
758 /*
759 * OK, it's an instruction.
760 */
761 theInstr = instr[0];
762
763 /* Walked onto the start of the next routine? If so, bail out of this function. */
764 if (theInstr == FBT_PUSHL_EBP)
765 continue;
766
767 if (!(size == 1 && (theInstr == FBT_POPL_EBP || theInstr == FBT_LEAVE))) {
768 instr += size;
769 goto again;
770 }
771
772 /*
773 * Found the popl %ebp; or leave.
774 */
775 machine_inst_t *patch_instr = instr;
776
777 /*
778 * Scan forward for a "ret", or "jmp".
779 */
780 instr += size;
781 if (instr >= limit)
782 continue;
783
784 size = dtrace_instr_size(instr);
785 if (size <= 0) /* Failed instruction decode? */
786 continue;
787
788 theInstr = instr[0];
789
790 if (!(size == FBT_RET_LEN && (theInstr == FBT_RET)) &&
791 !(size == FBT_RET_IMM16_LEN && (theInstr == FBT_RET_IMM16)) &&
792 !(size == FBT_JMP_SHORT_REL_LEN && (theInstr == FBT_JMP_SHORT_REL)) &&
793 !(size == FBT_JMP_NEAR_REL_LEN && (theInstr == FBT_JMP_NEAR_REL)) &&
794 !(size == FBT_JMP_FAR_ABS_LEN && (theInstr == FBT_JMP_FAR_ABS)))
795 continue;
796
797 /*
798 * popl %ebp; ret; or leave; ret; or leave; jmp tailCalledFun; -- We have a winner!
799 */
800 fbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP);
801 strlcpy( (char *)&(fbt->fbtp_name), name, MAX_FBTP_NAME_CHARS );
802
803 if (retfbt == NULL) {
804 fbt->fbtp_id = dtrace_probe_create(fbt_id, modname,
805 name, FBT_RETURN, FBT_AFRAMES_RETURN, fbt);
806 } else {
807 retfbt->fbtp_next = fbt;
808 fbt->fbtp_id = retfbt->fbtp_id;
809 }
810
811 retfbt = fbt;
812 fbt->fbtp_patchpoint = patch_instr;
813 fbt->fbtp_ctl = ctl;
814 fbt->fbtp_loadcnt = ctl->mod_loadcnt;
815
816 if (*patch_instr == FBT_POPL_EBP) {
817 fbt->fbtp_rval = DTRACE_INVOP_POPL_EBP;
818 } else {
819 ASSERT(*patch_instr == FBT_LEAVE);
820 fbt->fbtp_rval = DTRACE_INVOP_LEAVE;
821 }
822 fbt->fbtp_roffset =
823 (uintptr_t)(patch_instr - (uint8_t *)sym[i].n_value);
824
825 fbt->fbtp_savedval = *patch_instr;
826 fbt->fbtp_patchval = FBT_PATCHVAL;
827 fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(patch_instr)];
828 fbt->fbtp_symndx = i;
829 fbt_probetab[FBT_ADDR2NDX(patch_instr)] = fbt;
830
831 instr += size;
832 goto again;
833 }
834}
b0d623f7
A
835#elif defined(__x86_64__)
836int
837fbt_invop(uintptr_t addr, uintptr_t *state, uintptr_t rval)
838{
839 fbt_probe_t *fbt = fbt_probetab[FBT_ADDR2NDX(addr)];
840
841 for (; fbt != NULL; fbt = fbt->fbtp_hashnext) {
842 if ((uintptr_t)fbt->fbtp_patchpoint == addr) {
843
844 if (fbt->fbtp_roffset == 0) {
845 x86_saved_state64_t *regs = (x86_saved_state64_t *)state;
846
847 CPU->cpu_dtrace_caller = *(uintptr_t *)(((uintptr_t)(regs->isf.rsp))+sizeof(uint64_t)); // 8(%rsp)
848 /* 64-bit ABI, arguments passed in registers. */
849 dtrace_probe(fbt->fbtp_id, regs->rdi, regs->rsi, regs->rdx, regs->rcx, regs->r8);
850 CPU->cpu_dtrace_caller = 0;
851 } else {
852
853 dtrace_probe(fbt->fbtp_id, fbt->fbtp_roffset, rval, 0, 0, 0);
854 CPU->cpu_dtrace_caller = 0;
855 }
856
857 return (fbt->fbtp_rval);
858 }
859 }
860
861 return (0);
862}
863
864#define IS_USER_TRAP(regs) (regs && (((regs)->isf.cs & 3) != 0))
865#define T_INVALID_OPCODE 6
866#define FBT_EXCEPTION_CODE T_INVALID_OPCODE
867#define T_PREEMPT 255
868
869kern_return_t
870fbt_perfCallback(
871 int trapno,
872 x86_saved_state_t *tagged_regs,
873 __unused int unused1,
874 __unused int unused2)
875{
876 kern_return_t retval = KERN_FAILURE;
877 x86_saved_state64_t *saved_state = saved_state64(tagged_regs);
878
879 if (FBT_EXCEPTION_CODE == trapno && !IS_USER_TRAP(saved_state)) {
880 boolean_t oldlevel;
881 uint64_t rsp_probe, *rbp, r12, fp, delta = 0;
882 uint32_t *pDst;
883 int emul;
884
885 oldlevel = ml_set_interrupts_enabled(FALSE);
886
887 /* Calculate where the stack pointer was when the probe instruction "fired." */
888 rsp_probe = saved_state->isf.rsp; /* Easy, x86_64 establishes this value in idt64.s */
889
890 emul = dtrace_invop( saved_state->isf.rip, (uintptr_t *)saved_state, saved_state->rax );
891 __asm__ volatile(".globl _dtrace_invop_callsite");
892 __asm__ volatile("_dtrace_invop_callsite:");
893
894 switch (emul) {
895 case DTRACE_INVOP_NOP:
896 saved_state->isf.rip += DTRACE_INVOP_NOP_SKIP; /* Skip over the patched NOP (planted by sdt). */
897 retval = KERN_SUCCESS;
898 break;
899
900 case DTRACE_INVOP_MOV_RSP_RBP:
901 saved_state->rbp = rsp_probe; /* Emulate patched mov %rsp,%rbp */
902 saved_state->isf.rip += DTRACE_INVOP_MOV_RSP_RBP_SKIP; /* Skip over the bytes of the patched mov %rsp,%rbp */
903 retval = KERN_SUCCESS;
904 break;
905
906 case DTRACE_INVOP_POP_RBP:
907 case DTRACE_INVOP_LEAVE:
908/*
909 * Emulate first micro-op of patched leave: mov %rbp,%rsp
910 * fp points just below the return address slot for target's ret
911 * and at the slot holding the frame pointer saved by the target's prologue.
912 */
913 fp = saved_state->rbp;
914/* Emulate second micro-op of patched leave: patched pop %rbp
915 * savearea rbp is set for the frame of the caller to target
916 * The *live* %rsp will be adjusted below for pop increment(s)
917 */
918 saved_state->rbp = *(uint64_t *)fp;
919/* Skip over the patched leave */
920 saved_state->isf.rip += DTRACE_INVOP_LEAVE_SKIP;
921/*
922 * Lift the stack to account for the emulated leave
923 * Account for words local in this frame
924 * (in "case DTRACE_INVOP_POPL_EBP:" this is zero.)
925 */
926 delta = ((uint32_t *)fp) - ((uint32_t *)rsp_probe); /* delta is a *word* increment */
927/* Account for popping off the rbp (just accomplished by the emulation
928 * above...)
929 */
930 delta += 2;
931 saved_state->isf.rsp += (delta << 2);
932
933/* XXX Fragile in the extreme.
934 * This is sensitive to trap_from_kernel()'s internals.
935 */
936 rbp = (uint64_t *)__builtin_frame_address(0);
937 rbp = (uint64_t *)*rbp;
938 r12 = *(rbp - 4);
939
940/* Shift contents of stack */
941 for (pDst = (uint32_t *)fp;
942 pDst > (((uint32_t *)r12));
943 pDst--)
944 *pDst = pDst[-delta];
945
946/* Track the stack lift in "saved_state". */
947 saved_state = (x86_saved_state64_t *) (((uintptr_t)saved_state) + (delta << 2));
948
949/* Now adjust the value of %r12 in our caller (kernel_trap)'s frame */
950 *(rbp - 4) = r12 + (delta << 2);
951
952 retval = KERN_SUCCESS;
953 break;
954
955 default:
956 retval = KERN_FAILURE;
957 break;
958 }
959 saved_state->isf.trapno = T_PREEMPT; /* Avoid call to i386_astintr()! */
960
961 ml_set_interrupts_enabled(oldlevel);
962 }
963
964 return retval;
965}
966
967/*ARGSUSED*/
968static void
969__fbt_provide_module(void *arg, struct modctl *ctl)
970{
971#pragma unused(arg)
972 kernel_mach_header_t *mh;
973 struct load_command *cmd;
974 kernel_segment_command_t *orig_ts = NULL, *orig_le = NULL;
975 struct symtab_command *orig_st = NULL;
976 struct nlist_64 *sym = NULL;
977 char *strings;
978 uintptr_t instrLow, instrHigh;
979 char *modname;
980 unsigned int i, j;
981
982 int gIgnoreFBTBlacklist = 0;
983 PE_parse_boot_argn("IgnoreFBTBlacklist", &gIgnoreFBTBlacklist, sizeof (gIgnoreFBTBlacklist));
984
985 mh = (kernel_mach_header_t *)(ctl->address);
986 modname = ctl->mod_modname;
987
988 if (0 == ctl->address || 0 == ctl->size) /* Has the linker been jettisoned? */
989 return;
990
991 /*
992 * Employees of dtrace and their families are ineligible. Void
993 * where prohibited.
994 */
995
996 if (LIT_STRNEQL(modname, "com.apple.driver.dtrace"))
997 return;
998
999 if (strstr(modname, "CHUD") != NULL)
1000 return;
1001
1002 if (mh->magic != MH_MAGIC_64)
1003 return;
1004
1005 cmd = (struct load_command *) &mh[1];
1006 for (i = 0; i < mh->ncmds; i++) {
1007 if (cmd->cmd == LC_SEGMENT_KERNEL) {
1008 kernel_segment_command_t *orig_sg = (kernel_segment_command_t *) cmd;
1009
1010 if (LIT_STRNEQL(orig_sg->segname, SEG_TEXT))
1011 orig_ts = orig_sg;
1012 else if (LIT_STRNEQL(orig_sg->segname, SEG_LINKEDIT))
1013 orig_le = orig_sg;
1014 else if (LIT_STRNEQL(orig_sg->segname, ""))
1015 orig_ts = orig_sg; /* kexts have a single unnamed segment */
1016 }
1017 else if (cmd->cmd == LC_SYMTAB)
1018 orig_st = (struct symtab_command *) cmd;
1019
1020 cmd = (struct load_command *) ((caddr_t) cmd + cmd->cmdsize);
1021 }
1022
1023 if ((orig_ts == NULL) || (orig_st == NULL) || (orig_le == NULL))
1024 return;
1025
1026 sym = (struct nlist_64 *)(orig_le->vmaddr + orig_st->symoff - orig_le->fileoff);
1027 strings = (char *)(orig_le->vmaddr + orig_st->stroff - orig_le->fileoff);
1028
1029 /* Find extent of the TEXT section */
1030 instrLow = (uintptr_t)orig_ts->vmaddr;
1031 instrHigh = (uintptr_t)(orig_ts->vmaddr + orig_ts->vmsize);
1032
1033 for (i = 0; i < orig_st->nsyms; i++) {
1034 fbt_probe_t *fbt, *retfbt;
1035 machine_inst_t *instr, *limit, theInstr, i1, i2, i3;
1036 uint8_t n_type = sym[i].n_type & (N_TYPE | N_EXT);
1037 char *name = strings + sym[i].n_un.n_strx;
1038 int size;
1039
1040 /* Check that the symbol is a global and that it has a name. */
1041 if (((N_SECT | N_EXT) != n_type && (N_ABS | N_EXT) != n_type))
1042 continue;
1043
1044 if (0 == sym[i].n_un.n_strx) /* iff a null, "", name. */
1045 continue;
1046
1047 /* Lop off omnipresent leading underscore. */
1048 if (*name == '_')
1049 name += 1;
1050
1051 if (LIT_STRNSTART(name, "dtrace_") && !LIT_STRNSTART(name, "dtrace_safe_")) {
1052 /*
1053 * Anything beginning with "dtrace_" may be called
1054 * from probe context unless it explitly indicates
1055 * that it won't be called from probe context by
1056 * using the prefix "dtrace_safe_".
1057 */
1058 continue;
1059 }
1060
1061 if (LIT_STRNSTART(name, "fasttrap_") ||
1062 LIT_STRNSTART(name, "fuword") ||
1063 LIT_STRNSTART(name, "suword") ||
1064 LIT_STRNEQL(name, "sprlock") ||
1065 LIT_STRNEQL(name, "sprunlock") ||
1066 LIT_STRNEQL(name, "uread") ||
1067 LIT_STRNEQL(name, "uwrite"))
1068 continue; /* Fasttrap inner-workings. */
1069
1070 if (LIT_STRNSTART(name, "dsmos_"))
1071 continue; /* Don't Steal Mac OS X! */
1072
1073 if (LIT_STRNSTART(name, "_dtrace"))
1074 continue; /* Shims in dtrace.c */
1075
1076 if (LIT_STRNSTART(name, "chud"))
1077 continue; /* Professional courtesy. */
1078
1079 if (LIT_STRNSTART(name, "hibernate_"))
1080 continue; /* Let sleeping dogs lie. */
1081
1082 if (LIT_STRNEQL(name, "ZN9IOService14newTemperatureElPS_") || /* IOService::newTemperature */
1083 LIT_STRNEQL(name, "ZN9IOService26temperatureCriticalForZoneEPS_")) /* IOService::temperatureCriticalForZone */
1084 continue; /* Per the fire code */
1085
1086 /*
1087 * Place no probes (illegal instructions) in the exception handling path!
1088 */
1089 if (LIT_STRNEQL(name, "t_invop") ||
1090 LIT_STRNEQL(name, "enter_lohandler") ||
1091 LIT_STRNEQL(name, "lo_alltraps") ||
1092 LIT_STRNEQL(name, "kernel_trap") ||
1093 LIT_STRNEQL(name, "interrupt") ||
1094 LIT_STRNEQL(name, "i386_astintr"))
1095 continue;
1096
1097 if (LIT_STRNEQL(name, "current_thread") ||
1098 LIT_STRNEQL(name, "ast_pending") ||
1099 LIT_STRNEQL(name, "fbt_perfCallback") ||
1100 LIT_STRNEQL(name, "machine_thread_get_kern_state") ||
1101 LIT_STRNEQL(name, "get_threadtask") ||
1102 LIT_STRNEQL(name, "ml_set_interrupts_enabled") ||
1103 LIT_STRNEQL(name, "dtrace_invop") ||
1104 LIT_STRNEQL(name, "fbt_invop") ||
1105 LIT_STRNEQL(name, "sdt_invop") ||
1106 LIT_STRNEQL(name, "max_valid_stack_address"))
1107 continue;
1108
1109 /*
1110 * Voodoo.
1111 */
1112 if (LIT_STRNSTART(name, "machine_stack_") ||
1113 LIT_STRNSTART(name, "mapping_") ||
1114 LIT_STRNEQL(name, "tmrCvt") ||
1115
1116 LIT_STRNSTART(name, "tsc_") ||
1117
1118 LIT_STRNSTART(name, "pmCPU") ||
1119 LIT_STRNEQL(name, "pmKextRegister") ||
1120 LIT_STRNEQL(name, "pmMarkAllCPUsOff") ||
1121 LIT_STRNEQL(name, "pmSafeMode") ||
1122 LIT_STRNEQL(name, "pmTimerSave") ||
1123 LIT_STRNEQL(name, "pmTimerRestore") ||
1124 LIT_STRNEQL(name, "pmUnRegister") ||
1125 LIT_STRNSTART(name, "pms") ||
1126 LIT_STRNEQL(name, "power_management_init") ||
1127 LIT_STRNSTART(name, "usimple_") ||
1128 LIT_STRNSTART(name, "lck_spin_lock") ||
1129 LIT_STRNSTART(name, "lck_spin_unlock") ||
1130
1131 LIT_STRNSTART(name, "rtc_") ||
1132 LIT_STRNSTART(name, "_rtc_") ||
1133 LIT_STRNSTART(name, "rtclock_") ||
1134 LIT_STRNSTART(name, "clock_") ||
1135 LIT_STRNSTART(name, "absolutetime_to_") ||
1136 LIT_STRNEQL(name, "setPop") ||
1137 LIT_STRNEQL(name, "nanoseconds_to_absolutetime") ||
1138 LIT_STRNEQL(name, "nanotime_to_absolutetime") ||
1139
1140 LIT_STRNSTART(name, "etimer_") ||
1141
1142 LIT_STRNSTART(name, "commpage_") ||
1143 LIT_STRNSTART(name, "pmap_") ||
1144 LIT_STRNSTART(name, "ml_") ||
1145 LIT_STRNSTART(name, "PE_") ||
1146 LIT_STRNEQL(name, "kprintf") ||
1147 LIT_STRNSTART(name, "lapic_") ||
1148 LIT_STRNSTART(name, "acpi_"))
1149 continue;
1150
1151 /*
1152 * Avoid machine_ routines. PR_5346750.
1153 */
1154 if (LIT_STRNSTART(name, "machine_"))
1155 continue;
1156
1157 if (LIT_STRNEQL(name, "handle_pending_TLB_flushes"))
1158 continue;
1159
1160 /*
1161 * Place no probes on critical routines. PR_5221096
1162 */
1163 if (!gIgnoreFBTBlacklist &&
1164 bsearch( name, critical_blacklist, CRITICAL_BLACKLIST_COUNT, sizeof(name), _cmp ) != NULL)
1165 continue;
1166
1167 /*
1168 * Place no probes that could be hit in probe context.
1169 */
1170 if (!gIgnoreFBTBlacklist &&
1171 bsearch( name, probe_ctx_closure, PROBE_CTX_CLOSURE_COUNT, sizeof(name), _cmp ) != NULL)
1172 continue;
1173
1174 /*
1175 * Place no probes that could be hit on the way to the debugger.
1176 */
1177 if (LIT_STRNSTART(name, "kdp_") ||
1178 LIT_STRNSTART(name, "kdb_") ||
1179 LIT_STRNSTART(name, "kdbg_") ||
1180 LIT_STRNSTART(name, "kdebug_") ||
1181 LIT_STRNEQL(name, "kernel_debug") ||
1182 LIT_STRNEQL(name, "Debugger") ||
1183 LIT_STRNEQL(name, "Call_DebuggerC") ||
1184 LIT_STRNEQL(name, "lock_debugger") ||
1185 LIT_STRNEQL(name, "unlock_debugger") ||
1186 LIT_STRNEQL(name, "SysChoked"))
1187 continue;
1188
1189 /*
1190 * Place no probes that could be hit on the way to a panic.
1191 */
1192 if (NULL != strstr(name, "panic_") ||
1193 LIT_STRNEQL(name, "panic") ||
1194 LIT_STRNEQL(name, "handleMck") ||
1195 LIT_STRNEQL(name, "unresolved_kernel_trap"))
1196 continue;
1197
1198 if (dtrace_probe_lookup(fbt_id, modname, name, NULL) != 0)
1199 continue;
1200
1201 for (j = 0, instr = (machine_inst_t *)sym[i].n_value, theInstr = 0;
1202 (j < 4) && ((uintptr_t)instr >= instrLow) && (instrHigh > (uintptr_t)(instr + 2));
1203 j++) {
1204 theInstr = instr[0];
1205 if (theInstr == FBT_PUSH_RBP || theInstr == FBT_RET || theInstr == FBT_RET_IMM16)
1206 break;
1207
1208 if ((size = dtrace_instr_size(instr)) <= 0)
1209 break;
1210
1211 instr += size;
1212 }
1213
1214 if (theInstr != FBT_PUSH_RBP)
1215 continue;
1216
1217 i1 = instr[1];
1218 i2 = instr[2];
1219 i3 = instr[3];
1220
1221 limit = (machine_inst_t *)instrHigh;
1222
1223 if (i1 == FBT_REX_RSP_RBP && i2 == FBT_MOV_RSP_RBP0 && i3 == FBT_MOV_RSP_RBP1) {
1224 instr += 1; /* Advance to the mov %rsp,%rbp */
1225 theInstr = i1;
1226 } else {
1227 continue;
1228 }
1229#if 0
1230 else {
1231 /*
1232 * Sometimes, the compiler will schedule an intervening instruction
1233 * in the function prologue. Example:
1234 *
1235 * _mach_vm_read:
1236 * 000006d8 pushl %ebp
1237 * 000006d9 movl $0x00000004,%edx
1238 * 000006de movl %esp,%ebp
1239 *
1240 * Try the next instruction, to see if it is a movl %esp,%ebp
1241 */
1242
1243 instr += 1; /* Advance past the pushl %ebp */
1244 if ((size = dtrace_instr_size(instr)) <= 0)
1245 continue;
1246
1247 instr += size;
1248
1249 if ((instr + 1) >= limit)
1250 continue;
1251
1252 i1 = instr[0];
1253 i2 = instr[1];
1254
1255 if (!(i1 == FBT_MOVL_ESP_EBP0_V0 && i2 == FBT_MOVL_ESP_EBP1_V0) &&
1256 !(i1 == FBT_MOVL_ESP_EBP0_V1 && i2 == FBT_MOVL_ESP_EBP1_V1))
1257 continue;
1258
1259 /* instr already points at the movl %esp,%ebp */
1260 theInstr = i1;
1261 }
1262#endif
1263
1264 fbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP);
1265 strlcpy( (char *)&(fbt->fbtp_name), name, MAX_FBTP_NAME_CHARS );
1266 fbt->fbtp_id = dtrace_probe_create(fbt_id, modname, name, FBT_ENTRY, FBT_AFRAMES_ENTRY, fbt);
1267 fbt->fbtp_patchpoint = instr;
1268 fbt->fbtp_ctl = ctl;
1269 fbt->fbtp_loadcnt = ctl->mod_loadcnt;
1270 fbt->fbtp_rval = DTRACE_INVOP_MOV_RSP_RBP;
1271 fbt->fbtp_savedval = theInstr;
1272 fbt->fbtp_patchval = FBT_PATCHVAL;
1273
1274 fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)];
1275 fbt->fbtp_symndx = i;
1276 fbt_probetab[FBT_ADDR2NDX(instr)] = fbt;
1277
1278 retfbt = NULL;
1279again:
1280 if (instr >= limit)
1281 continue;
1282
1283 /*
1284 * If this disassembly fails, then we've likely walked off into
1285 * a jump table or some other unsuitable area. Bail out of the
1286 * disassembly now.
1287 */
1288 if ((size = dtrace_instr_size(instr)) <= 0)
1289 continue;
1290
1291 /*
1292 * We (desperately) want to avoid erroneously instrumenting a
1293 * jump table, especially given that our markers are pretty
1294 * short: two bytes on x86, and just one byte on amd64. To
1295 * determine if we're looking at a true instruction sequence
1296 * or an inline jump table that happens to contain the same
1297 * byte sequences, we resort to some heuristic sleeze: we
1298 * treat this instruction as being contained within a pointer,
1299 * and see if that pointer points to within the body of the
1300 * function. If it does, we refuse to instrument it.
1301 */
1302 for (j = 0; j < sizeof (uintptr_t); j++) {
1303 uintptr_t check = (uintptr_t)instr - j;
1304 uint8_t *ptr;
1305
1306 if (check < sym[i].n_value)
1307 break;
1308
1309 if (check + sizeof (uintptr_t) > (uintptr_t)limit)
1310 continue;
1311
1312 ptr = *(uint8_t **)check;
1313
1314 if (ptr >= (uint8_t *)sym[i].n_value && ptr < limit) {
1315 instr += size;
1316 goto again;
1317 }
1318 }
1319
1320 /*
1321 * OK, it's an instruction.
1322 */
1323 theInstr = instr[0];
1324
1325 /* Walked onto the start of the next routine? If so, bail out of this function. */
1326 if (theInstr == FBT_PUSH_RBP)
1327 continue;
1328
1329 if (!(size == 1 && (theInstr == FBT_POP_RBP || theInstr == FBT_LEAVE))) {
1330 instr += size;
1331 goto again;
1332 }
1333
1334 /*
1335 * Found the pop %rbp; or leave.
1336 */
1337 machine_inst_t *patch_instr = instr;
1338
1339 /*
1340 * Scan forward for a "ret", or "jmp".
1341 */
1342 instr += size;
1343 if (instr >= limit)
1344 continue;
1345
1346 size = dtrace_instr_size(instr);
1347 if (size <= 0) /* Failed instruction decode? */
1348 continue;
1349
1350 theInstr = instr[0];
1351
1352 if (!(size == FBT_RET_LEN && (theInstr == FBT_RET)) &&
1353 !(size == FBT_RET_IMM16_LEN && (theInstr == FBT_RET_IMM16)) &&
1354 !(size == FBT_JMP_SHORT_REL_LEN && (theInstr == FBT_JMP_SHORT_REL)) &&
1355 !(size == FBT_JMP_NEAR_REL_LEN && (theInstr == FBT_JMP_NEAR_REL)) &&
1356 !(size == FBT_JMP_FAR_ABS_LEN && (theInstr == FBT_JMP_FAR_ABS)))
1357 continue;
1358
1359 /*
1360 * pop %rbp; ret; or leave; ret; or leave; jmp tailCalledFun; -- We have a winner!
1361 */
1362 fbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP);
1363 strlcpy( (char *)&(fbt->fbtp_name), name, MAX_FBTP_NAME_CHARS );
1364
1365 if (retfbt == NULL) {
1366 fbt->fbtp_id = dtrace_probe_create(fbt_id, modname,
1367 name, FBT_RETURN, FBT_AFRAMES_RETURN, fbt);
1368 } else {
1369 retfbt->fbtp_next = fbt;
1370 fbt->fbtp_id = retfbt->fbtp_id;
1371 }
1372
1373 retfbt = fbt;
1374 fbt->fbtp_patchpoint = patch_instr;
1375 fbt->fbtp_ctl = ctl;
1376 fbt->fbtp_loadcnt = ctl->mod_loadcnt;
1377
1378 if (*patch_instr == FBT_POP_RBP) {
1379 fbt->fbtp_rval = DTRACE_INVOP_POP_RBP;
1380 } else {
1381 ASSERT(*patch_instr == FBT_LEAVE);
1382 fbt->fbtp_rval = DTRACE_INVOP_LEAVE;
1383 }
1384 fbt->fbtp_roffset =
1385 (uintptr_t)(patch_instr - (uint8_t *)sym[i].n_value);
1386
1387 fbt->fbtp_savedval = *patch_instr;
1388 fbt->fbtp_patchval = FBT_PATCHVAL;
1389 fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(patch_instr)];
1390 fbt->fbtp_symndx = i;
1391 fbt_probetab[FBT_ADDR2NDX(patch_instr)] = fbt;
1392
1393 instr += size;
1394 goto again;
1395 }
1396}
1397#else
1398#error Unknown arch
1399#endif
2d21ac55
A
1400
1401extern struct modctl g_fbt_kernctl;
1402#undef kmem_alloc /* from its binding to dt_kmem_alloc glue */
1403#undef kmem_free /* from its binding to dt_kmem_free glue */
1404#include <vm/vm_kern.h>
1405
1406/*ARGSUSED*/
1407void
1408fbt_provide_module(void *arg, struct modctl *ctl)
1409{
1410#pragma unused(ctl)
1411 __fbt_provide_module(arg, &g_fbt_kernctl);
1412
b0d623f7
A
1413 if ( (vm_offset_t)g_fbt_kernctl.address != (vm_offset_t )NULL )
1414 kmem_free(kernel_map, (vm_offset_t)g_fbt_kernctl.address, round_page(g_fbt_kernctl.size));
2d21ac55
A
1415 g_fbt_kernctl.address = 0;
1416 g_fbt_kernctl.size = 0;
1417}