]> git.saurik.com Git - apple/xnu.git/blame - bsd/dev/dtrace/fbt.c
xnu-4903.221.2.tar.gz
[apple/xnu.git] / bsd / dev / dtrace / fbt.c
CommitLineData
2d21ac55
A
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
b0d623f7
A
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
2d21ac55
A
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
6d2010ae 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
2d21ac55
A
23 * Use is subject to license terms.
24 */
25
b0d623f7 26/* #pragma ident "@(#)fbt.c 1.18 07/01/10 SMI" */
2d21ac55
A
27
28#ifdef KERNEL
29#ifndef _KERNEL
30#define _KERNEL /* Solaris vs. Darwin */
31#endif
32#endif
33
5ba3f43e 34#include <mach-o/loader.h>
b0d623f7 35#include <libkern/kernel_mach_header.h>
2d21ac55
A
36
37#include <sys/param.h>
38#include <sys/systm.h>
5ba3f43e 39#include <sys/sysctl.h>
2d21ac55
A
40#include <sys/errno.h>
41#include <sys/stat.h>
42#include <sys/ioctl.h>
43#include <sys/conf.h>
44#include <sys/fcntl.h>
45#include <miscfs/devfs/devfs.h>
46#include <pexpert/pexpert.h>
47
48#include <sys/dtrace.h>
49#include <sys/dtrace_impl.h>
50#include <sys/fbt.h>
51
52#include <sys/dtrace_glue.h>
a39ff7e2 53#include <san/kasan.h>
2d21ac55
A
54
55/* #include <machine/trap.h> */
56struct savearea_t; /* Used anonymously */
2d21ac55 57
5ba3f43e
A
58#if defined(__arm__) || defined(__arm64__)
59typedef kern_return_t (*perfCallback)(int, struct savearea_t *, __unused int, __unused int);
60extern perfCallback tempDTraceTrapHook;
61extern kern_return_t fbt_perfCallback(int, struct savearea_t *, __unused int, __unused int);
62#elif defined(__x86_64__)
39236c6e 63typedef kern_return_t (*perfCallback)(int, struct savearea_t *, uintptr_t *, __unused int);
2d21ac55 64extern perfCallback tempDTraceTrapHook;
39236c6e 65extern kern_return_t fbt_perfCallback(int, struct savearea_t *, uintptr_t *, __unused int);
fe8ab488
A
66#else
67#error Unknown architecture
68#endif
2d21ac55 69
d9a64523
A
70__private_extern__
71void
72qsort(void *a, size_t n, size_t es, int (*cmp)(const void *, const void *));
73
2d21ac55
A
74#define FBT_ADDR2NDX(addr) ((((uintptr_t)(addr)) >> 4) & fbt_probetab_mask)
75#define FBT_PROBETAB_SIZE 0x8000 /* 32k entries -- 128K total */
76
2d21ac55
A
77static int fbt_probetab_size;
78dtrace_provider_id_t fbt_id;
79fbt_probe_t **fbt_probetab;
80int fbt_probetab_mask;
81static int fbt_verbose = 0;
82
5ba3f43e
A
83int ignore_fbt_blacklist = 0;
84
85extern int dtrace_kernel_symbol_mode;
86
87
2d21ac55
A
88void fbt_init( void );
89
5ba3f43e
A
90/*
91 * Critical routines that must not be probed. PR_5221096, PR_5379018.
92 * The blacklist must be kept in alphabetic order for purposes of bsearch().
93 */
94static const char * critical_blacklist[] =
95{
96 "Call_DebuggerC",
d9a64523
A
97 "DebuggerCall",
98 "DebuggerTrapWithState",
99 "DebuggerXCallEnter",
100 "IOCPURunPlatformPanicActions",
101 "PEARMDebugPanicHook",
102 "PEHaltRestart",
103 "SavePanicInfo",
5ba3f43e
A
104 "SysChoked",
105 "_ZN9IOService14newTemperatureElPS_", /* IOService::newTemperature */
106 "_ZN9IOService26temperatureCriticalForZoneEPS_", /* IOService::temperatureCriticalForZone */
107 "_ZNK6OSData14getBytesNoCopyEv", /* Data::getBytesNoCopy, IOHibernateSystemWake path */
d9a64523
A
108 "__ZN16IOPlatformExpert11haltRestartEj",
109 "__ZN18IODTPlatformExpert11haltRestartEj",
110 "__ZN9IODTNVRAM13savePanicInfoEPhy"
5ba3f43e
A
111 "_disable_preemption",
112 "_enable_preemption",
d9a64523 113 "alternate_debugger_enter",
5ba3f43e
A
114 "bcopy_phys",
115 "console_cpu_alloc",
116 "console_cpu_free",
117 "cpu_IA32e_disable",
118 "cpu_IA32e_enable",
119 "cpu_NMI_interrupt",
120 "cpu_control",
121 "cpu_data_alloc",
122 "cpu_desc_init",
123 "cpu_desc_init64",
124 "cpu_desc_load",
125 "cpu_desc_load64",
126 "cpu_exit_wait",
127 "cpu_info",
128 "cpu_info_count",
129 "cpu_init",
130 "cpu_interrupt",
131 "cpu_machine_init",
132 "cpu_mode_init",
133 "cpu_processor_alloc",
134 "cpu_processor_free",
135 "cpu_signal_handler",
136 "cpu_sleep",
137 "cpu_start",
138 "cpu_subtype",
139 "cpu_thread_alloc",
140 "cpu_thread_halt",
141 "cpu_thread_init",
142 "cpu_threadtype",
143 "cpu_to_processor",
144 "cpu_topology_sort",
145 "cpu_topology_start_cpu",
146 "cpu_type",
147 "cpuid_cpu_display",
148 "cpuid_extfeatures",
149 "dtrace_invop",
150 "enter_lohandler",
151 "fbt_invop",
152 "fbt_perfCallback",
d9a64523 153 "get_preemption_level"
5ba3f43e
A
154 "get_threadtask",
155 "handle_pending_TLB_flushes",
156 "hw_compare_and_store",
157 "interrupt",
d9a64523
A
158 "is_saved_state32",
159 "kernel_preempt_check",
5ba3f43e
A
160 "kernel_trap",
161 "kprintf",
d9a64523
A
162 "ks_dispatch_kernel",
163 "ks_dispatch_user",
164 "ks_kernel_trap",
5ba3f43e
A
165 "lo_alltraps",
166 "lock_debugger",
167 "machine_idle_cstate",
168 "machine_thread_get_kern_state",
169 "mca_cpu_alloc",
170 "mca_cpu_init",
171 "ml_nofault_copy",
172 "nanoseconds_to_absolutetime",
173 "nanotime_to_absolutetime",
174 "packA",
175 "panic",
d9a64523
A
176 "phystokv",
177 "phystokv_range",
178 "pltrace",
5ba3f43e
A
179 "pmKextRegister",
180 "pmMarkAllCPUsOff",
181 "pmSafeMode",
182 "pmTimerRestore",
183 "pmTimerSave",
184 "pmUnRegister",
185 "pmap_cpu_alloc",
186 "pmap_cpu_free",
187 "pmap_cpu_high_map_vaddr",
188 "pmap_cpu_high_shared_remap",
189 "pmap_cpu_init",
190 "power_management_init",
191 "preemption_underflow_panic",
192 "register_cpu_setup_func",
d9a64523
A
193 "ret64_iret"
194 "ret_to_user"
195 "return_to_kernel",
196 "return_to_user",
197 "saved_state64",
5ba3f43e
A
198 "sdt_invop",
199 "sprlock",
200 "sprunlock",
d9a64523
A
201 "strlen",
202 "strncmp",
5ba3f43e
A
203 "t_invop",
204 "tmrCvt",
d9a64523
A
205 "trap_from_kernel",
206 "uart_putc",
5ba3f43e
A
207 "unlock_debugger",
208 "unpackA",
209 "unregister_cpu_setup_func",
d9a64523
A
210 "uread",
211 "uwrite",
5ba3f43e
A
212 "vstart"
213};
d9a64523 214
5ba3f43e
A
215#define CRITICAL_BLACKLIST_COUNT (sizeof(critical_blacklist)/sizeof(critical_blacklist[0]))
216
217/*
218 * The transitive closure of entry points that can be reached from probe context.
219 * (Apart from routines whose names begin with dtrace_).
220 */
221static const char * probe_ctx_closure[] =
222{
223 "ClearIdlePop",
224 "Debugger",
225 "IS_64BIT_PROCESS",
226 "OSCompareAndSwap",
227 "SetIdlePop",
d9a64523 228 "__dtrace_probe",
5ba3f43e
A
229 "absolutetime_to_microtime",
230 "act_set_astbsd",
231 "arm_init_idle_cpu",
232 "ast_dtrace_on",
233 "ast_pending",
234 "clean_dcache",
235 "clean_mmu_dcache",
236 "clock_get_calendar_nanotime_nowait",
237 "copyin",
238 "copyin_kern",
239 "copyin_user",
240 "copyinstr",
241 "copyout",
242 "copyoutstr",
243 "cpu_number",
244 "current_proc",
245 "current_processor",
246 "current_task",
247 "current_thread",
248 "debug_enter",
249 "drain_write_buffer",
250 "find_user_regs",
251 "flush_dcache",
252 "flush_tlb64",
253 "get_bsdtask_info",
254 "get_bsdthread_info",
255 "hertz_tick",
256 "hw_atomic_and",
257 "invalidate_mmu_icache",
258 "kauth_cred_get",
259 "kauth_getgid",
260 "kauth_getuid",
261 "kernel_preempt_check",
262 "kvtophys",
263 "mach_absolute_time",
264 "max_valid_stack_address",
265 "memcpy",
266 "memmove",
267 "ml_at_interrupt_context",
268 "ml_phys_write_byte_64",
269 "ml_phys_write_half_64",
270 "ml_phys_write_word_64",
271 "ml_set_interrupts_enabled",
272 "mt_core_snap",
273 "mt_cur_cpu_cycles",
274 "mt_cur_cpu_instrs",
275 "mt_cur_thread_cycles",
276 "mt_cur_thread_instrs",
277 "mt_fixed_counts",
278 "mt_fixed_counts_internal",
279 "mt_mtc_update_count",
280 "mt_update_thread",
281 "ovbcopy",
282 "panic",
283 "pmap64_pde",
284 "pmap64_pdpt",
285 "pmap_find_phys",
286 "pmap_get_mapwindow",
287 "pmap_pde",
288 "pmap_pte",
289 "pmap_put_mapwindow",
290 "pmap_valid_page",
291 "prf",
292 "proc_is64bit",
293 "proc_selfname",
294 "psignal_lock",
295 "rtc_nanotime_load",
296 "rtc_nanotime_read",
297 "sdt_getargdesc",
298 "setPop",
299 "strlcpy",
300 "sync_iss_to_iks_unconditionally",
301 "systrace_stub",
302 "timer_grab"
303};
304#define PROBE_CTX_CLOSURE_COUNT (sizeof(probe_ctx_closure)/sizeof(probe_ctx_closure[0]))
305
306#pragma clang diagnostic push
307#pragma clang diagnostic ignored "-Wcast-qual"
308static int _cmp(const void *a, const void *b)
309{
310 return strncmp((const char *)a, *(const char **)b, strlen((const char *)a) + 1);
311}
312#pragma clang diagnostic pop
313/*
314 * Module validation
315 */
316int
317fbt_module_excluded(struct modctl* ctl)
318{
319 ASSERT(!MOD_FBT_DONE(ctl));
320
321 if (ctl->mod_address == 0 || ctl->mod_size == 0) {
322 return TRUE;
323 }
d9a64523 324
5ba3f43e
A
325 if (ctl->mod_loaded == 0) {
326 return TRUE;
327 }
328
329 /*
330 * If the user sets this, trust they know what they are doing.
331 */
332 if (ignore_fbt_blacklist)
333 return FALSE;
334
335 /*
336 * These drivers control low level functions that when traced
337 * cause problems often in the sleep/wake paths as well as
338 * critical debug and panic paths.
339 * If somebody really wants to drill in on one of these kexts, then
340 * they can override blacklisting using the boot-arg above.
341 */
342
343#ifdef __x86_64__
344 if (strstr(ctl->mod_modname, "AppleACPIEC") != NULL)
345 return TRUE;
346
347 if (strstr(ctl->mod_modname, "AppleACPIPlatform") != NULL)
348 return TRUE;
349
350 if (strstr(ctl->mod_modname, "AppleRTC") != NULL)
351 return TRUE;
352
353 if (strstr(ctl->mod_modname, "IOACPIFamily") != NULL)
354 return TRUE;
355
356 if (strstr(ctl->mod_modname, "AppleIntelCPUPowerManagement") != NULL)
357 return TRUE;
358
359 if (strstr(ctl->mod_modname, "AppleProfile") != NULL)
360 return TRUE;
361
362 if (strstr(ctl->mod_modname, "AppleIntelProfile") != NULL)
363 return TRUE;
364
365 if (strstr(ctl->mod_modname, "AppleEFI") != NULL)
366 return TRUE;
367
368#elif __arm__ || __arm64__
369 if (LIT_STRNEQL(ctl->mod_modname, "com.apple.driver.AppleARMPlatform") ||
370 LIT_STRNEQL(ctl->mod_modname, "com.apple.driver.AppleARMPL192VIC") ||
371 LIT_STRNEQL(ctl->mod_modname, "com.apple.driver.AppleInterruptController"))
372 return TRUE;
373#endif
374
375 return FALSE;
376}
377
378/*
379 * FBT probe name validation
380 */
381int
382fbt_excluded(const char* name)
383{
384 /*
385 * If the user set this, trust they know what they are doing.
386 */
387 if (ignore_fbt_blacklist)
388 return FALSE;
389
390 if (LIT_STRNSTART(name, "dtrace_") && !LIT_STRNSTART(name, "dtrace_safe_")) {
391 /*
392 * Anything beginning with "dtrace_" may be called
393 * from probe context unless it explitly indicates
394 * that it won't be called from probe context by
395 * using the prefix "dtrace_safe_".
396 */
397 return TRUE;
398 }
399
400 /*
401 * Place no probes on critical routines (5221096)
402 */
403 if (bsearch( name, critical_blacklist, CRITICAL_BLACKLIST_COUNT, sizeof(name), _cmp ) != NULL)
404 return TRUE;
405
406 /*
407 * Place no probes that could be hit in probe context.
408 */
409 if (bsearch( name, probe_ctx_closure, PROBE_CTX_CLOSURE_COUNT, sizeof(name), _cmp ) != NULL) {
410 return TRUE;
411 }
412
413 /*
414 * Place no probes that could be hit in probe context.
415 * In the interests of safety, some of these may be overly cautious.
416 * Also exclude very low-level "firmware" class calls.
417 */
418 if (LIT_STRNSTART(name, "cpu_") || /* Coarse */
419 LIT_STRNSTART(name, "platform_") || /* Coarse */
420 LIT_STRNSTART(name, "machine_") || /* Coarse */
421 LIT_STRNSTART(name, "ml_") || /* Coarse */
422 LIT_STRNSTART(name, "PE_") || /* Coarse */
423 LIT_STRNSTART(name, "rtc_") || /* Coarse */
424 LIT_STRNSTART(name, "_rtc_") ||
425 LIT_STRNSTART(name, "rtclock_") ||
426 LIT_STRNSTART(name, "clock_") ||
427 LIT_STRNSTART(name, "bcopy") ||
428 LIT_STRNSTART(name, "pmap_") ||
429 LIT_STRNSTART(name, "hw_") || /* Coarse */
430 LIT_STRNSTART(name, "lapic_") || /* Coarse */
431 LIT_STRNSTART(name, "OSAdd") ||
432 LIT_STRNSTART(name, "OSBit") ||
433 LIT_STRNSTART(name, "OSDecrement") ||
434 LIT_STRNSTART(name, "OSIncrement") ||
435 LIT_STRNSTART(name, "OSCompareAndSwap") ||
436 LIT_STRNSTART(name, "etimer_") ||
437 LIT_STRNSTART(name, "dtxnu_kern_") ||
438 LIT_STRNSTART(name, "flush_mmu_tlb_"))
439 return TRUE;
440 /*
441 * Fasttrap inner-workings we can't instrument
442 * on Intel (6230149)
443 */
444 if (LIT_STRNSTART(name, "fasttrap_") ||
445 LIT_STRNSTART(name, "fuword") ||
446 LIT_STRNSTART(name, "suword"))
447 return TRUE;
448
449 if (LIT_STRNSTART(name, "_dtrace"))
450 return TRUE; /* Shims in dtrace.c */
451
452 if (LIT_STRNSTART(name, "hibernate_"))
453 return TRUE;
454
455 /*
456 * Place no probes in the exception handling path
457 */
458#if __arm__ || __arm64__
459 if (LIT_STRNSTART(name, "fleh_") ||
460 LIT_STRNSTART(name, "sleh_") ||
461 LIT_STRNSTART(name, "timer_state_event") ||
462 LIT_STRNEQL(name, "get_vfp_enabled"))
463 return TRUE;
464
465 if (LIT_STRNSTART(name, "_ZNK15OSMetaClassBase8metaCastEPK11OSMetaClass") ||
466 LIT_STRNSTART(name, "_ZN15OSMetaClassBase12safeMetaCastEPKS_PK11OSMetaClass") ||
467 LIT_STRNSTART(name, "_ZNK11OSMetaClass13checkMetaCastEPK15OSMetaClassBase"))
468 return TRUE;
469#endif
470
5ba3f43e
A
471#ifdef __x86_64__
472 if (LIT_STRNSTART(name, "machine_") ||
d9a64523
A
473 LIT_STRNSTART(name, "idt64") ||
474 LIT_STRNSTART(name, "ks_") ||
475 LIT_STRNSTART(name, "hndl_") ||
476 LIT_STRNSTART(name, "_intr_") ||
5ba3f43e
A
477 LIT_STRNSTART(name, "mapping_") ||
478 LIT_STRNSTART(name, "tsc_") ||
479 LIT_STRNSTART(name, "pmCPU") ||
480 LIT_STRNSTART(name, "pms") ||
481 LIT_STRNSTART(name, "usimple_") ||
482 LIT_STRNSTART(name, "lck_spin_lock") ||
483 LIT_STRNSTART(name, "lck_spin_unlock") ||
484 LIT_STRNSTART(name, "absolutetime_to_") ||
485 LIT_STRNSTART(name, "commpage_") ||
486 LIT_STRNSTART(name, "ml_") ||
487 LIT_STRNSTART(name, "PE_") ||
488 LIT_STRNSTART(name, "act_machine") ||
489 LIT_STRNSTART(name, "acpi_") ||
490 LIT_STRNSTART(name, "pal_")) {
491 return TRUE;
492 }
493 // Don't Steal Mac OS X
494 if (LIT_STRNSTART(name, "dsmos_"))
495 return TRUE;
496
497#endif
498
499 /*
500 * Place no probes that could be hit on the way to the debugger.
501 */
502 if (LIT_STRNSTART(name, "kdp_") ||
503 LIT_STRNSTART(name, "kdb_") ||
504 LIT_STRNSTART(name, "debug_")) {
505 return TRUE;
506 }
507
a39ff7e2
A
508#if KASAN
509 if (LIT_STRNSTART(name, "kasan") ||
510 LIT_STRNSTART(name, "__kasan") ||
511 LIT_STRNSTART(name, "__asan")) {
512 return TRUE;
513 }
514#endif
515
5ba3f43e
A
516 /*
517 * Place no probes that could be hit on the way to a panic.
518 */
519 if (NULL != strstr(name, "panic_"))
520 return TRUE;
521
522 return FALSE;
523}
524
525
2d21ac55
A
526/*ARGSUSED*/
527static void
528fbt_destroy(void *arg, dtrace_id_t id, void *parg)
529{
530#pragma unused(arg,id)
531 fbt_probe_t *fbt = parg, *next, *hash, *last;
532 int ndx;
533
534 do {
535 /*
536 * Now we need to remove this probe from the fbt_probetab.
537 */
538 ndx = FBT_ADDR2NDX(fbt->fbtp_patchpoint);
539 last = NULL;
540 hash = fbt_probetab[ndx];
541
542 while (hash != fbt) {
543 ASSERT(hash != NULL);
544 last = hash;
545 hash = hash->fbtp_hashnext;
546 }
547
548 if (last != NULL) {
549 last->fbtp_hashnext = fbt->fbtp_hashnext;
550 } else {
551 fbt_probetab[ndx] = fbt->fbtp_hashnext;
552 }
553
554 next = fbt->fbtp_next;
555 kmem_free(fbt, sizeof (fbt_probe_t));
556
557 fbt = next;
558 } while (fbt != NULL);
559}
560
561/*ARGSUSED*/
6d2010ae 562int
2d21ac55
A
563fbt_enable(void *arg, dtrace_id_t id, void *parg)
564{
565#pragma unused(arg,id)
566 fbt_probe_t *fbt = parg;
6d2010ae
A
567 struct modctl *ctl = NULL;
568
569 for (; fbt != NULL; fbt = fbt->fbtp_next) {
2d21ac55 570
6d2010ae 571 ctl = fbt->fbtp_ctl;
d9a64523 572
6d2010ae 573 if (!ctl->mod_loaded) {
2d21ac55 574 if (fbt_verbose) {
6d2010ae
A
575 cmn_err(CE_NOTE, "fbt is failing for probe %s "
576 "(module %s unloaded)",
2d21ac55
A
577 fbt->fbtp_name, ctl->mod_modname);
578 }
6d2010ae
A
579
580 continue;
2d21ac55 581 }
6d2010ae
A
582
583 /*
584 * Now check that our modctl has the expected load count. If it
585 * doesn't, this module must have been unloaded and reloaded -- and
586 * we're not going to touch it.
587 */
588 if (ctl->mod_loadcnt != fbt->fbtp_loadcnt) {
589 if (fbt_verbose) {
590 cmn_err(CE_NOTE, "fbt is failing for probe %s "
591 "(module %s reloaded)",
592 fbt->fbtp_name, ctl->mod_modname);
593 }
594
595 continue;
d9a64523 596 }
6d2010ae 597
2d21ac55
A
598 dtrace_casptr(&tempDTraceTrapHook, NULL, fbt_perfCallback);
599 if (tempDTraceTrapHook != (perfCallback)fbt_perfCallback) {
600 if (fbt_verbose) {
601 cmn_err(CE_NOTE, "fbt_enable is failing for probe %s "
602 "in module %s: tempDTraceTrapHook already occupied.",
603 fbt->fbtp_name, ctl->mod_modname);
604 }
6d2010ae 605 continue;
2d21ac55
A
606 }
607
6d2010ae 608 if (fbt->fbtp_currentval != fbt->fbtp_patchval) {
a39ff7e2
A
609#if KASAN
610 /* Since dtrace probes can call into KASan and vice versa, things can get
611 * very slow if we have a lot of probes. This call will disable the KASan
612 * fakestack after a threshold of probes is reached. */
613 kasan_fakestack_suspend();
614#endif
615
d9a64523 616 (void)ml_nofault_copy( (vm_offset_t)&fbt->fbtp_patchval, (vm_offset_t)fbt->fbtp_patchpoint,
2d21ac55 617 sizeof(fbt->fbtp_patchval));
39037602
A
618 /*
619 * Make the patched instruction visible via a data + instruction
620 * cache flush for the platforms that need it
621 */
622 flush_dcache((vm_offset_t)fbt->fbtp_patchpoint,(vm_size_t)sizeof(fbt->fbtp_patchval), 0);
623 invalidate_icache((vm_offset_t)fbt->fbtp_patchpoint,(vm_size_t)sizeof(fbt->fbtp_patchval), 0);
6d2010ae 624 fbt->fbtp_currentval = fbt->fbtp_patchval;
39037602 625
6d2010ae
A
626 ctl->mod_nenabled++;
627 }
628
629 }
d9a64523 630
6d2010ae 631 dtrace_membar_consumer();
d9a64523 632
6d2010ae 633 return (0);
2d21ac55
A
634}
635
636/*ARGSUSED*/
637static void
638fbt_disable(void *arg, dtrace_id_t id, void *parg)
639{
640#pragma unused(arg,id)
641 fbt_probe_t *fbt = parg;
6d2010ae
A
642 struct modctl *ctl = NULL;
643
644 for (; fbt != NULL; fbt = fbt->fbtp_next) {
645 ctl = fbt->fbtp_ctl;
d9a64523 646
6d2010ae
A
647 if (!ctl->mod_loaded || (ctl->mod_loadcnt != fbt->fbtp_loadcnt))
648 continue;
2d21ac55 649
6d2010ae 650 if (fbt->fbtp_currentval != fbt->fbtp_savedval) {
d9a64523 651 (void)ml_nofault_copy( (vm_offset_t)&fbt->fbtp_savedval, (vm_offset_t)fbt->fbtp_patchpoint,
2d21ac55 652 sizeof(fbt->fbtp_savedval));
39037602
A
653 /*
654 * Make the patched instruction visible via a data + instruction
655 * cache flush for the platforms that need it
656 */
657 flush_dcache((vm_offset_t)fbt->fbtp_patchpoint,(vm_size_t)sizeof(fbt->fbtp_patchval), 0);
658 invalidate_icache((vm_offset_t)fbt->fbtp_patchpoint,(vm_size_t)sizeof(fbt->fbtp_patchval), 0);
659
6d2010ae
A
660 fbt->fbtp_currentval = fbt->fbtp_savedval;
661 ASSERT(ctl->mod_nenabled > 0);
662 ctl->mod_nenabled--;
a39ff7e2
A
663
664#if KASAN
665 kasan_fakestack_resume();
666#endif
6d2010ae
A
667 }
668 }
2d21ac55
A
669 dtrace_membar_consumer();
670}
671
672/*ARGSUSED*/
673static void
674fbt_suspend(void *arg, dtrace_id_t id, void *parg)
675{
676#pragma unused(arg,id)
677 fbt_probe_t *fbt = parg;
6d2010ae 678 struct modctl *ctl = NULL;
2d21ac55 679
6d2010ae
A
680 for (; fbt != NULL; fbt = fbt->fbtp_next) {
681 ctl = fbt->fbtp_ctl;
682
683 ASSERT(ctl->mod_nenabled > 0);
684 if (!ctl->mod_loaded || (ctl->mod_loadcnt != fbt->fbtp_loadcnt))
685 continue;
686
d9a64523 687 (void)ml_nofault_copy( (vm_offset_t)&fbt->fbtp_savedval, (vm_offset_t)fbt->fbtp_patchpoint,
2d21ac55 688 sizeof(fbt->fbtp_savedval));
d9a64523 689
39037602
A
690 /*
691 * Make the patched instruction visible via a data + instruction
692 * cache flush for the platforms that need it
693 */
694 flush_dcache((vm_offset_t)fbt->fbtp_patchpoint,(vm_size_t)sizeof(fbt->fbtp_savedval), 0);
695 invalidate_icache((vm_offset_t)fbt->fbtp_patchpoint,(vm_size_t)sizeof(fbt->fbtp_savedval), 0);
d9a64523 696
39037602 697 fbt->fbtp_currentval = fbt->fbtp_savedval;
6d2010ae 698 }
d9a64523 699
2d21ac55
A
700 dtrace_membar_consumer();
701}
702
703/*ARGSUSED*/
704static void
705fbt_resume(void *arg, dtrace_id_t id, void *parg)
706{
707#pragma unused(arg,id)
708 fbt_probe_t *fbt = parg;
6d2010ae 709 struct modctl *ctl = NULL;
2d21ac55 710
6d2010ae
A
711 for (; fbt != NULL; fbt = fbt->fbtp_next) {
712 ctl = fbt->fbtp_ctl;
713
714 ASSERT(ctl->mod_nenabled > 0);
715 if (!ctl->mod_loaded || (ctl->mod_loadcnt != fbt->fbtp_loadcnt))
716 continue;
d9a64523 717
6d2010ae
A
718 dtrace_casptr(&tempDTraceTrapHook, NULL, fbt_perfCallback);
719 if (tempDTraceTrapHook != (perfCallback)fbt_perfCallback) {
2d21ac55
A
720 if (fbt_verbose) {
721 cmn_err(CE_NOTE, "fbt_resume is failing for probe %s "
722 "in module %s: tempDTraceTrapHook already occupied.",
723 fbt->fbtp_name, ctl->mod_modname);
724 }
725 return;
6d2010ae 726 }
d9a64523
A
727
728 (void)ml_nofault_copy( (vm_offset_t)&fbt->fbtp_patchval, (vm_offset_t)fbt->fbtp_patchpoint,
2d21ac55 729 sizeof(fbt->fbtp_patchval));
39236c6e 730
5ba3f43e
A
731 /*
732 * Make the patched instruction visible via a data + instruction cache flush.
733 */
734 flush_dcache((vm_offset_t)fbt->fbtp_patchpoint,(vm_size_t)sizeof(fbt->fbtp_patchval), 0);
735 invalidate_icache((vm_offset_t)fbt->fbtp_patchpoint,(vm_size_t)sizeof(fbt->fbtp_patchval), 0);
2d21ac55 736
d9a64523 737 fbt->fbtp_currentval = fbt->fbtp_patchval;
2d21ac55 738 }
2d21ac55 739
d9a64523 740 dtrace_membar_consumer();
2d21ac55 741}
2d21ac55 742
5ba3f43e
A
743static void
744fbt_provide_module_user_syms(struct modctl *ctl)
745{
746 unsigned int i;
747 char *modname = ctl->mod_modname;
748
749 dtrace_module_symbols_t* module_symbols = ctl->mod_user_symbols;
750 if (module_symbols) {
751 for (i=0; i<module_symbols->dtmodsyms_count; i++) {
752
753 /*
754 * symbol->dtsym_addr (the symbol address) passed in from
755 * user space, is already slid for both kexts and kernel.
756 */
757 dtrace_symbol_t* symbol = &module_symbols->dtmodsyms_symbols[i];
758
759 char* name = symbol->dtsym_name;
760
761 /* Lop off omnipresent leading underscore. */
762 if (*name == '_')
763 name += 1;
764
d9a64523
A
765 if (MOD_IS_MACH_KERNEL(ctl) && fbt_excluded(name))
766 continue;
5ba3f43e
A
767
768 /*
769 * Ignore symbols with a null address
770 */
771 if (!symbol->dtsym_addr)
772 continue;
773
d9a64523
A
774 /*
775 * Ignore symbols not part of this module
776 */
777 if (!dtrace_addr_in_module((void*)symbol->dtsym_addr, ctl))
778 continue;
779
780 fbt_provide_probe(ctl, modname, name, (machine_inst_t*)(uintptr_t)symbol->dtsym_addr, (machine_inst_t*)(uintptr_t)(symbol->dtsym_addr + symbol->dtsym_size));
5ba3f43e
A
781 }
782 }
783}
d9a64523
A
784static void
785fbt_provide_kernel_section(struct modctl *ctl, kernel_section_t *sect, kernel_nlist_t *sym, uint32_t nsyms, const char *strings)
786{
787 uintptr_t sect_start = (uintptr_t)sect->addr;
788 uintptr_t sect_end = (uintptr_t)sect->size + sect->addr;
789 unsigned int i;
790
791 if ((sect->flags & S_ATTR_PURE_INSTRUCTIONS) != S_ATTR_PURE_INSTRUCTIONS) {
792 return;
793 }
794
795 for (i = 0; i < nsyms; i++) {
796 uint8_t n_type = sym[i].n_type & (N_TYPE | N_EXT);
797 const char *name = strings + sym[i].n_un.n_strx;
798 uint64_t limit;
799
800 if (sym[i].n_value < sect_start || sym[i].n_value > sect_end)
801 continue;
802
803 /* Check that the symbol is a global and that it has a name. */
804 if (((N_SECT | N_EXT) != n_type && (N_ABS | N_EXT) != n_type))
805 continue;
806
807 if (0 == sym[i].n_un.n_strx) /* iff a null, "", name. */
808 continue;
809
810 /* Lop off omnipresent leading underscore. */
811 if (*name == '_')
812 name += 1;
813
814#if defined(__arm__)
815 // Skip non-thumb functions on arm32
816 if (sym[i].n_sect == 1 && !(sym[i].n_desc & N_ARM_THUMB_DEF)) {
817 continue;
818 }
819#endif /* defined(__arm__) */
820
821 if (MOD_IS_MACH_KERNEL(ctl) && fbt_excluded(name))
822 continue;
823
824 /*
825 * Find the function boundary by looking at either the
826 * end of the section or the beginning of the next symbol
827 */
828 if (i == nsyms - 1) {
829 limit = sect_end;
830 }
831 else {
832 limit = sym[i + 1].n_value;
833 }
834
835 fbt_provide_probe(ctl, ctl->mod_modname, name, (machine_inst_t*)sym[i].n_value, (machine_inst_t*)limit);
836 }
837
838}
839
840static int
841fbt_sym_cmp(const void *ap, const void *bp)
842{
843 return (int)(((const kernel_nlist_t*)ap)->n_value - ((const kernel_nlist_t*)bp)->n_value);
844}
845
846static void
847fbt_provide_module_kernel_syms(struct modctl *ctl)
848{
849 kernel_mach_header_t *mh = (kernel_mach_header_t *)(ctl->mod_address);
850 kernel_segment_command_t *seg;
851 struct load_command *cmd;
852 kernel_segment_command_t *linkedit = NULL;
853 struct symtab_command *symtab = NULL;
854 kernel_nlist_t *syms = NULL, *sorted_syms = NULL;
855 const char *strings;
856 unsigned int i;
857 size_t symlen;
858
859 if (mh->magic != MH_MAGIC_KERNEL)
860 return;
861
862 cmd = (struct load_command *) &mh[1];
863 for (i = 0; i < mh->ncmds; i++) {
864 if (cmd->cmd == LC_SEGMENT_KERNEL) {
865 kernel_segment_command_t *orig_sg = (kernel_segment_command_t *) cmd;
866 if (LIT_STRNEQL(orig_sg->segname, SEG_LINKEDIT))
867 linkedit = orig_sg;
868 } else if (cmd->cmd == LC_SYMTAB) {
869 symtab = (struct symtab_command *) cmd;
870 }
871 if (symtab && linkedit) {
872 break;
873 }
874 cmd = (struct load_command *) ((caddr_t) cmd + cmd->cmdsize);
875 }
5ba3f43e 876
d9a64523
A
877 if ((symtab == NULL) || (linkedit == NULL)) {
878 return;
879 }
880
881 syms = (kernel_nlist_t *)(linkedit->vmaddr + symtab->symoff - linkedit->fileoff);
882 strings = (const char *)(linkedit->vmaddr + symtab->stroff - linkedit->fileoff);
883
884 /*
885 * Make a copy of the symbol table and sort it to not cross into the next function
886 * when disassembling the function
887 */
888 symlen = sizeof(kernel_nlist_t) * symtab->nsyms;
889 sorted_syms = kmem_alloc(symlen, KM_SLEEP);
890 bcopy(syms, sorted_syms, symlen);
891 qsort(sorted_syms, symtab->nsyms, sizeof(kernel_nlist_t), fbt_sym_cmp);
892
893 for (seg = firstsegfromheader(mh); seg != NULL; seg = nextsegfromheader(mh, seg)) {
894 kernel_section_t *sect = firstsect(seg);
895
896 if (strcmp(seg->segname, "__KLD") == 0) {
897 continue;
898 }
899
900 for (sect = firstsect(seg); sect != NULL; sect = nextsect(seg, sect)) {
901 fbt_provide_kernel_section(ctl, sect, sorted_syms, symtab->nsyms, strings);
902 }
903 }
904
905 kmem_free(sorted_syms, symlen);
906}
5ba3f43e
A
907
908void
909fbt_provide_module(void *arg, struct modctl *ctl)
910{
911#pragma unused(arg)
912 ASSERT(ctl != NULL);
913 ASSERT(dtrace_kernel_symbol_mode != DTRACE_KERNEL_SYMBOLS_NEVER);
914 LCK_MTX_ASSERT(&mod_lock, LCK_MTX_ASSERT_OWNED);
915
916 // Update the "ignore blacklist" bit
917 if (ignore_fbt_blacklist)
918 ctl->mod_flags |= MODCTL_FBT_PROVIDE_BLACKLISTED_PROBES;
919
920 if (MOD_FBT_DONE(ctl))
921 return;
922
923 if (fbt_module_excluded(ctl)) {
924 ctl->mod_flags |= MODCTL_FBT_INVALID;
925 return;
926 }
927
928 if (MOD_HAS_KERNEL_SYMBOLS(ctl)) {
929 fbt_provide_module_kernel_syms(ctl);
930 ctl->mod_flags |= MODCTL_FBT_PROBES_PROVIDED;
931 if (MOD_FBT_PROVIDE_BLACKLISTED_PROBES(ctl))
932 ctl->mod_flags |= MODCTL_FBT_BLACKLISTED_PROBES_PROVIDED;
933 return;
934 }
935
936 if (MOD_HAS_USERSPACE_SYMBOLS(ctl)) {
937 fbt_provide_module_user_syms(ctl);
938 ctl->mod_flags |= MODCTL_FBT_PROBES_PROVIDED;
939 if (MOD_FBT_PROVIDE_PRIVATE_PROBES(ctl))
940 ctl->mod_flags |= MODCTL_FBT_PRIVATE_PROBES_PROVIDED;
941 if (MOD_FBT_PROVIDE_BLACKLISTED_PROBES(ctl))
942 ctl->mod_flags |= MODCTL_FBT_BLACKLISTED_PROBES_PROVIDED;
943 return;
944 }
945}
946
2d21ac55
A
947static dtrace_pattr_t fbt_attr = {
948{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
949{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
950{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
951{ DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
952{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
953};
954
955static dtrace_pops_t fbt_pops = {
d9a64523
A
956 .dtps_provide = NULL,
957 .dtps_provide_module = fbt_provide_module,
958 .dtps_enable = fbt_enable,
959 .dtps_disable = fbt_disable,
960 .dtps_suspend = fbt_suspend,
961 .dtps_resume = fbt_resume,
962 .dtps_getargdesc = NULL, /* APPLE NOTE: fbt_getargdesc implemented in userspace */
963 .dtps_getargval = NULL,
964 .dtps_usermode = NULL,
965 .dtps_destroy = fbt_destroy
2d21ac55
A
966};
967
968static void
969fbt_cleanup(dev_info_t *devi)
970{
971 dtrace_invop_remove(fbt_invop);
972 ddi_remove_minor_node(devi, NULL);
973 kmem_free(fbt_probetab, fbt_probetab_size * sizeof (fbt_probe_t *));
974 fbt_probetab = NULL;
975 fbt_probetab_mask = 0;
976}
977
978static int
d9a64523 979fbt_attach(dev_info_t *devi)
2d21ac55 980{
2d21ac55
A
981 if (fbt_probetab_size == 0)
982 fbt_probetab_size = FBT_PROBETAB_SIZE;
983
984 fbt_probetab_mask = fbt_probetab_size - 1;
985 fbt_probetab =
986 kmem_zalloc(fbt_probetab_size * sizeof (fbt_probe_t *), KM_SLEEP);
987
988 dtrace_invop_add(fbt_invop);
989
b0d623f7
A
990 if (ddi_create_minor_node(devi, "fbt", S_IFCHR, 0,
991 DDI_PSEUDO, 0) == DDI_FAILURE ||
992 dtrace_register("fbt", &fbt_attr, DTRACE_PRIV_KERNEL, NULL,
993 &fbt_pops, NULL, &fbt_id) != 0) {
994 fbt_cleanup(devi);
995 return (DDI_FAILURE);
996 }
2d21ac55 997
2d21ac55
A
998 return (DDI_SUCCESS);
999}
1000
1001static d_open_t _fbt_open;
1002
1003static int
1004_fbt_open(dev_t dev, int flags, int devtype, struct proc *p)
1005{
1006#pragma unused(dev,flags,devtype,p)
1007 return 0;
1008}
1009
1010#define FBT_MAJOR -24 /* let the kernel pick the device number */
1011
5ba3f43e
A
1012SYSCTL_DECL(_kern_dtrace);
1013
1014static int
1015sysctl_dtrace_ignore_fbt_blacklist SYSCTL_HANDLER_ARGS
1016{
1017#pragma unused(oidp, arg2)
1018 int err;
1019 int value = *(int*)arg1;
1020
1021 err = sysctl_io_number(req, value, sizeof(value), &value, NULL);
1022 if (err)
1023 return (err);
1024 if (req->newptr) {
1025 if (!(value == 0 || value == 1))
1026 return (ERANGE);
1027
1028 /*
1029 * We do not allow setting the blacklist back to on, as we have no way
1030 * of knowing if those unsafe probes are still used.
1031 *
1032 * If we are using kernel symbols, we also do not allow any change,
1033 * since the symbols are jettison'd after the first pass.
1034 *
1035 * We do not need to take any locks here because those symbol modes
1036 * are permanent and do not change after boot.
1037 */
1038 if (value != 1 || dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_NEVER ||
1039 dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_ALWAYS_FROM_KERNEL)
1040 return (EPERM);
1041
1042 ignore_fbt_blacklist = 1;
1043 }
1044
1045 return (0);
1046}
1047
1048SYSCTL_PROC(_kern_dtrace, OID_AUTO, ignore_fbt_blacklist,
1049 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
1050 &ignore_fbt_blacklist, 0,
1051 sysctl_dtrace_ignore_fbt_blacklist, "I", "fbt provider ignore blacklist");
1052
2d21ac55
A
1053/*
1054 * A struct describing which functions will get invoked for certain
1055 * actions.
1056 */
1057static struct cdevsw fbt_cdevsw =
1058{
1059 _fbt_open, /* open */
1060 eno_opcl, /* close */
1061 eno_rdwrt, /* read */
1062 eno_rdwrt, /* write */
1063 eno_ioctl, /* ioctl */
1064 (stop_fcn_t *)nulldev, /* stop */
1065 (reset_fcn_t *)nulldev, /* reset */
1066 NULL, /* tty's */
1067 eno_select, /* select */
1068 eno_mmap, /* mmap */
1069 eno_strat, /* strategy */
1070 eno_getc, /* getc */
1071 eno_putc, /* putc */
1072 0 /* type */
1073};
1074
2d21ac55
A
1075#undef kmem_alloc /* from its binding to dt_kmem_alloc glue */
1076#undef kmem_free /* from its binding to dt_kmem_free glue */
1077#include <vm/vm_kern.h>
1078
1079void
1080fbt_init( void )
1081{
d9a64523 1082 int majdevno = cdevsw_add(FBT_MAJOR, &fbt_cdevsw);
2d21ac55 1083
d9a64523
A
1084 if (majdevno < 0) {
1085 printf("fbt_init: failed to allocate a major number!\n");
1086 return;
2d21ac55 1087 }
d9a64523
A
1088
1089 PE_parse_boot_argn("IgnoreFBTBlacklist", &ignore_fbt_blacklist, sizeof (ignore_fbt_blacklist));
1090
1091 fbt_attach((dev_info_t*)(uintptr_t)majdevno);
2d21ac55
A
1092}
1093#undef FBT_MAJOR