]> git.saurik.com Git - apple/xnu.git/blob - bsd/dev/i386/fbt_x86.c
xnu-3789.60.24.tar.gz
[apple/xnu.git] / bsd / dev / i386 / fbt_x86.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /* #pragma ident "@(#)fbt.c 1.15 05/09/19 SMI" */
28
29 #ifdef KERNEL
30 #ifndef _KERNEL
31 #define _KERNEL /* Solaris vs. Darwin */
32 #endif
33 #endif
34
35 #define MACH__POSIX_C_SOURCE_PRIVATE 1 /* pulls in suitable savearea from mach/ppc/thread_status.h */
36 #include <kern/thread.h>
37 #include <mach/thread_status.h>
38 #include <mach/vm_param.h>
39 #include <mach-o/loader.h>
40 #include <mach-o/nlist.h>
41 #include <libkern/kernel_mach_header.h>
42 #include <libkern/OSAtomic.h>
43
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/errno.h>
47 #include <sys/stat.h>
48 #include <sys/ioctl.h>
49 #include <sys/conf.h>
50 #include <sys/fcntl.h>
51 #include <miscfs/devfs/devfs.h>
52
53 #include <sys/dtrace.h>
54 #include <sys/dtrace_impl.h>
55 #include <sys/fbt.h>
56
57 #include <sys/dtrace_glue.h>
58
59 #define DTRACE_INVOP_NOP_SKIP 1
60 #define DTRACE_INVOP_MOVL_ESP_EBP 10
61 #define DTRACE_INVOP_MOVL_ESP_EBP_SKIP 2
62 #define DTRACE_INVOP_MOV_RSP_RBP 11
63 #define DTRACE_INVOP_MOV_RSP_RBP_SKIP 3
64 #define DTRACE_INVOP_POP_RBP 12
65 #define DTRACE_INVOP_POP_RBP_SKIP 1
66 #define DTRACE_INVOP_LEAVE_SKIP 1
67
68 #define FBT_PUSHL_EBP 0x55
69 #define FBT_MOVL_ESP_EBP0_V0 0x8b
70 #define FBT_MOVL_ESP_EBP1_V0 0xec
71 #define FBT_MOVL_ESP_EBP0_V1 0x89
72 #define FBT_MOVL_ESP_EBP1_V1 0xe5
73
74 #define FBT_PUSH_RBP 0x55
75 #define FBT_REX_RSP_RBP 0x48
76 #define FBT_MOV_RSP_RBP0 0x89
77 #define FBT_MOV_RSP_RBP1 0xe5
78 #define FBT_POP_RBP 0x5d
79
80 #define FBT_POPL_EBP 0x5d
81 #define FBT_RET 0xc3
82 #define FBT_RET_IMM16 0xc2
83 #define FBT_LEAVE 0xc9
84 #define FBT_JMP_SHORT_REL 0xeb /* Jump short, relative, displacement relative to next instr. */
85 #define FBT_JMP_NEAR_REL 0xe9 /* Jump near, relative, displacement relative to next instr. */
86 #define FBT_JMP_FAR_ABS 0xea /* Jump far, absolute, address given in operand */
87 #define FBT_RET_LEN 1
88 #define FBT_RET_IMM16_LEN 3
89 #define FBT_JMP_SHORT_REL_LEN 2
90 #define FBT_JMP_NEAR_REL_LEN 5
91 #define FBT_JMP_FAR_ABS_LEN 5
92
93 #define FBT_PATCHVAL 0xf0
94 #define FBT_AFRAMES_ENTRY 7
95 #define FBT_AFRAMES_RETURN 6
96
97 #define FBT_ENTRY "entry"
98 #define FBT_RETURN "return"
99 #define FBT_ADDR2NDX(addr) ((((uintptr_t)(addr)) >> 4) & fbt_probetab_mask)
100
101 extern dtrace_provider_id_t fbt_id;
102 extern fbt_probe_t **fbt_probetab;
103 extern int fbt_probetab_mask;
104
105 extern int gIgnoreFBTBlacklist; /* From fbt_init */
106
107 kern_return_t fbt_perfCallback(int, x86_saved_state_t *, uintptr_t *, __unused int);
108
109 /*
110 * Critical routines that must not be probed. PR_5221096, PR_5379018.
111 * The blacklist must be kept in alphabetic order for purposes of bsearch().
112 */
113
114 static const char * critical_blacklist[] =
115 {
116 "bcopy_phys",
117 "console_cpu_alloc",
118 "console_cpu_free",
119 "cpu_IA32e_disable",
120 "cpu_IA32e_enable",
121 "cpu_NMI_interrupt",
122 "cpu_control",
123 "cpu_data_alloc",
124 "cpu_desc_init",
125 "cpu_desc_init64",
126 "cpu_desc_load",
127 "cpu_desc_load64",
128 "cpu_exit_wait",
129 "cpu_info",
130 "cpu_info_count",
131 "cpu_init",
132 "cpu_interrupt",
133 "cpu_machine_init",
134 "cpu_mode_init",
135 "cpu_processor_alloc",
136 "cpu_processor_free",
137 "cpu_signal_handler",
138 "cpu_sleep",
139 "cpu_start",
140 "cpu_subtype",
141 "cpu_thread_alloc",
142 "cpu_thread_halt",
143 "cpu_thread_init",
144 "cpu_threadtype",
145 "cpu_to_processor",
146 "cpu_topology_sort",
147 "cpu_topology_start_cpu",
148 "cpu_type",
149 "cpuid_cpu_display",
150 "cpuid_extfeatures",
151 "handle_pending_TLB_flushes",
152 "hw_compare_and_store",
153 "machine_idle_cstate",
154 "mca_cpu_alloc",
155 "mca_cpu_init",
156 "ml_nofault_copy",
157 "pmap_cpu_alloc",
158 "pmap_cpu_free",
159 "pmap_cpu_high_map_vaddr",
160 "pmap_cpu_high_shared_remap",
161 "pmap_cpu_init",
162 "register_cpu_setup_func",
163 "unregister_cpu_setup_func",
164 "vstart"
165 };
166 #define CRITICAL_BLACKLIST_COUNT (sizeof(critical_blacklist)/sizeof(critical_blacklist[0]))
167
168 /*
169 * The transitive closure of entry points that can be reached from probe context.
170 * (Apart from routines whose names begin with dtrace_).
171 */
172 static const char * probe_ctx_closure[] =
173 {
174 "Debugger",
175 "IS_64BIT_PROCESS",
176 "OSCompareAndSwap",
177 "_disable_preemption",
178 "_enable_preemption",
179 "absolutetime_to_microtime",
180 "act_set_astbsd",
181 "ast_dtrace_on",
182 "ast_pending",
183 "clock_get_calendar_nanotime_nowait",
184 "copyin",
185 "copyin_user",
186 "copyinstr",
187 "copyout",
188 "copyoutstr",
189 "cpu_number",
190 "current_proc",
191 "current_processor",
192 "current_task",
193 "current_thread",
194 "debug_enter",
195 "find_user_regs",
196 "flush_tlb64",
197 "get_bsdtask_info",
198 "get_bsdthread_info",
199 "hw_atomic_and",
200 "kauth_cred_get",
201 "kauth_getgid",
202 "kauth_getuid",
203 "kernel_preempt_check",
204 "mach_absolute_time",
205 "max_valid_stack_address",
206 "ml_at_interrupt_context",
207 "ml_phys_write_byte_64",
208 "ml_phys_write_half_64",
209 "ml_phys_write_word_64",
210 "ml_set_interrupts_enabled",
211 "panic",
212 "pmap64_pde",
213 "pmap64_pdpt",
214 "pmap_find_phys",
215 "pmap_get_mapwindow",
216 "pmap_pde",
217 "pmap_pte",
218 "pmap_put_mapwindow",
219 "pmap_valid_page",
220 "prf",
221 "proc_is64bit",
222 "proc_selfname",
223 "psignal_lock",
224 "rtc_nanotime_load",
225 "rtc_nanotime_read",
226 "sdt_getargdesc",
227 "strlcpy",
228 "sync_iss_to_iks_unconditionally",
229 "systrace_stub",
230 "timer_grab"
231 };
232 #define PROBE_CTX_CLOSURE_COUNT (sizeof(probe_ctx_closure)/sizeof(probe_ctx_closure[0]))
233
234
235 static int _cmp(const void *a, const void *b)
236 {
237 return strncmp((const char *)a, *(const char **)b, strlen((const char *)a) + 1);
238 }
239
240 static const void * bsearch(
241 const void *key,
242 const void *base0,
243 size_t nmemb,
244 size_t size,
245 int (*compar)(const void *, const void *)) {
246
247 const char *base = base0;
248 size_t lim;
249 int cmp;
250 const void *p;
251
252 for (lim = nmemb; lim != 0; lim >>= 1) {
253 p = base + (lim >> 1) * size;
254 cmp = (*compar)(key, p);
255 if (cmp == 0)
256 return p;
257 if (cmp > 0) { /* key > p: move right */
258 base = (const char *)p + size;
259 lim--;
260 } /* else move left */
261 }
262 return (NULL);
263 }
264
265 /*
266 * Module validation
267 */
268 static int
269 is_module_valid(struct modctl* ctl)
270 {
271 ASSERT(!MOD_FBT_PROBES_PROVIDED(ctl));
272 ASSERT(!MOD_FBT_INVALID(ctl));
273
274 if (0 == ctl->mod_address || 0 == ctl->mod_size) {
275 return FALSE;
276 }
277
278 if (0 == ctl->mod_loaded) {
279 return FALSE;
280 }
281
282 if (strstr(ctl->mod_modname, "CHUD") != NULL)
283 return FALSE;
284
285 /*
286 * If the user sets this, trust they know what they are doing.
287 */
288 if (gIgnoreFBTBlacklist) /* per boot-arg set in fbt_init() */
289 return TRUE;
290
291 /*
292 * These drivers control low level functions that when traced
293 * cause problems often in the sleep/wake paths as well as
294 * critical debug and panic paths.
295 * If somebody really wants to drill in on one of these kexts, then
296 * they can override blacklisting using the boot-arg above.
297 */
298
299 if (strstr(ctl->mod_modname, "AppleACPIEC") != NULL)
300 return FALSE;
301
302 if (strstr(ctl->mod_modname, "AppleACPIPlatform") != NULL)
303 return FALSE;
304
305 if (strstr(ctl->mod_modname, "AppleRTC") != NULL)
306 return FALSE;
307
308 if (strstr(ctl->mod_modname, "IOACPIFamily") != NULL)
309 return FALSE;
310
311 if (strstr(ctl->mod_modname, "AppleIntelCPUPowerManagement") != NULL)
312 return FALSE;
313
314 if (strstr(ctl->mod_modname, "AppleProfile") != NULL)
315 return FALSE;
316
317 if (strstr(ctl->mod_modname, "AppleIntelProfile") != NULL)
318 return FALSE;
319
320 if (strstr(ctl->mod_modname, "AppleEFI") != NULL)
321 return FALSE;
322
323 return TRUE;
324 }
325
326 /*
327 * FBT probe name validation
328 */
329 static int
330 is_symbol_valid(const char* name)
331 {
332 /*
333 * If the user set this, trust they know what they are doing.
334 */
335 if (gIgnoreFBTBlacklist)
336 return TRUE;
337
338 if (LIT_STRNSTART(name, "dtrace_") && !LIT_STRNSTART(name, "dtrace_safe_")) {
339 /*
340 * Anything beginning with "dtrace_" may be called
341 * from probe context unless it explitly indicates
342 * that it won't be called from probe context by
343 * using the prefix "dtrace_safe_".
344 */
345 return FALSE;
346 }
347
348 if (LIT_STRNSTART(name, "fasttrap_") ||
349 LIT_STRNSTART(name, "fuword") ||
350 LIT_STRNSTART(name, "suword") ||
351 LIT_STRNEQL(name, "sprlock") ||
352 LIT_STRNEQL(name, "sprunlock") ||
353 LIT_STRNEQL(name, "uread") ||
354 LIT_STRNEQL(name, "uwrite")) {
355 return FALSE; /* Fasttrap inner-workings. */
356 }
357
358 if (LIT_STRNSTART(name, "dsmos_"))
359 return FALSE; /* Don't Steal Mac OS X! */
360
361 if (LIT_STRNSTART(name, "_dtrace"))
362 return FALSE; /* Shims in dtrace.c */
363
364 if (LIT_STRNSTART(name, "chud"))
365 return FALSE; /* Professional courtesy. */
366
367 if (LIT_STRNSTART(name, "hibernate_"))
368 return FALSE; /* Let sleeping dogs lie. */
369
370 if (LIT_STRNEQL(name, "_ZNK6OSData14getBytesNoCopyEv"))
371 return FALSE; /* Data::getBytesNoCopy, IOHibernateSystemWake path */
372
373 if (LIT_STRNEQL(name, "_ZN9IOService14newTemperatureElPS_") || /* IOService::newTemperature */
374 LIT_STRNEQL(name, "_ZN9IOService26temperatureCriticalForZoneEPS_")) { /* IOService::temperatureCriticalForZone */
375 return FALSE; /* Per the fire code */
376 }
377
378 /*
379 * Place no probes (illegal instructions) in the exception handling path!
380 */
381 if (LIT_STRNEQL(name, "t_invop") ||
382 LIT_STRNEQL(name, "enter_lohandler") ||
383 LIT_STRNEQL(name, "lo_alltraps") ||
384 LIT_STRNEQL(name, "kernel_trap") ||
385 LIT_STRNEQL(name, "interrupt") ||
386 LIT_STRNEQL(name, "i386_astintr")) {
387 return FALSE;
388 }
389
390 if (LIT_STRNEQL(name, "current_thread") ||
391 LIT_STRNEQL(name, "ast_pending") ||
392 LIT_STRNEQL(name, "fbt_perfCallback") ||
393 LIT_STRNEQL(name, "machine_thread_get_kern_state") ||
394 LIT_STRNEQL(name, "get_threadtask") ||
395 LIT_STRNEQL(name, "ml_set_interrupts_enabled") ||
396 LIT_STRNEQL(name, "dtrace_invop") ||
397 LIT_STRNEQL(name, "fbt_invop") ||
398 LIT_STRNEQL(name, "sdt_invop") ||
399 LIT_STRNEQL(name, "max_valid_stack_address")) {
400 return FALSE;
401 }
402
403 /*
404 * Voodoo.
405 */
406 if (LIT_STRNSTART(name, "machine_stack_") ||
407 LIT_STRNSTART(name, "mapping_") ||
408 LIT_STRNEQL(name, "tmrCvt") ||
409
410 LIT_STRNSTART(name, "tsc_") ||
411
412 LIT_STRNSTART(name, "pmCPU") ||
413 LIT_STRNEQL(name, "pmKextRegister") ||
414 LIT_STRNEQL(name, "pmMarkAllCPUsOff") ||
415 LIT_STRNEQL(name, "pmSafeMode") ||
416 LIT_STRNEQL(name, "pmTimerSave") ||
417 LIT_STRNEQL(name, "pmTimerRestore") ||
418 LIT_STRNEQL(name, "pmUnRegister") ||
419 LIT_STRNSTART(name, "pms") ||
420 LIT_STRNEQL(name, "power_management_init") ||
421 LIT_STRNSTART(name, "usimple_") ||
422 LIT_STRNSTART(name, "lck_spin_lock") ||
423 LIT_STRNSTART(name, "lck_spin_unlock") ||
424
425 LIT_STRNSTART(name, "rtc_") ||
426 LIT_STRNSTART(name, "_rtc_") ||
427 LIT_STRNSTART(name, "rtclock_") ||
428 LIT_STRNSTART(name, "clock_") ||
429 LIT_STRNSTART(name, "absolutetime_to_") ||
430 LIT_STRNEQL(name, "setPop") ||
431 LIT_STRNEQL(name, "nanoseconds_to_absolutetime") ||
432 LIT_STRNEQL(name, "nanotime_to_absolutetime") ||
433
434 LIT_STRNSTART(name, "etimer_") ||
435
436 LIT_STRNSTART(name, "commpage_") ||
437 LIT_STRNSTART(name, "pmap_") ||
438 LIT_STRNSTART(name, "ml_") ||
439 LIT_STRNSTART(name, "PE_") ||
440 LIT_STRNEQL(name, "kprintf") ||
441 LIT_STRNSTART(name, "lapic_") ||
442 LIT_STRNSTART(name, "act_machine") ||
443 LIT_STRNSTART(name, "acpi_") ||
444 LIT_STRNSTART(name, "pal_")){
445 return FALSE;
446 }
447
448 /*
449 * Avoid machine_ routines. PR_5346750.
450 */
451 if (LIT_STRNSTART(name, "machine_"))
452 return FALSE;
453
454 if (LIT_STRNEQL(name, "handle_pending_TLB_flushes"))
455 return FALSE;
456
457 /*
458 * Place no probes on critical routines. PR_5221096
459 */
460 if (bsearch( name, critical_blacklist, CRITICAL_BLACKLIST_COUNT, sizeof(name), _cmp ) != NULL)
461 return FALSE;
462
463 /*
464 * Place no probes that could be hit in probe context.
465 */
466 if (bsearch( name, probe_ctx_closure, PROBE_CTX_CLOSURE_COUNT, sizeof(name), _cmp ) != NULL) {
467 return FALSE;
468 }
469
470 /*
471 * Place no probes that could be hit on the way to the debugger.
472 */
473 if (LIT_STRNSTART(name, "kdp_") ||
474 LIT_STRNSTART(name, "kdb_") ||
475 LIT_STRNSTART(name, "debug_") ||
476 LIT_STRNEQL(name, "Debugger") ||
477 LIT_STRNEQL(name, "Call_DebuggerC") ||
478 LIT_STRNEQL(name, "lock_debugger") ||
479 LIT_STRNEQL(name, "unlock_debugger") ||
480 LIT_STRNEQL(name, "packA") ||
481 LIT_STRNEQL(name, "unpackA") ||
482 LIT_STRNEQL(name, "SysChoked")) {
483 return FALSE;
484 }
485
486
487 /*
488 * Place no probes that could be hit on the way to a panic.
489 */
490 if (NULL != strstr(name, "panic_") ||
491 LIT_STRNEQL(name, "panic") ||
492 LIT_STRNEQL(name, "preemption_underflow_panic")) {
493 return FALSE;
494 }
495
496 return TRUE;
497 }
498
499 int
500 fbt_invop(uintptr_t addr, uintptr_t *state, uintptr_t rval)
501 {
502 fbt_probe_t *fbt = fbt_probetab[FBT_ADDR2NDX(addr)];
503
504 for (; fbt != NULL; fbt = fbt->fbtp_hashnext) {
505 if ((uintptr_t)fbt->fbtp_patchpoint == addr) {
506
507 if (fbt->fbtp_roffset == 0) {
508 x86_saved_state64_t *regs = (x86_saved_state64_t *)state;
509
510 CPU->cpu_dtrace_caller = *(uintptr_t *)(((uintptr_t)(regs->isf.rsp))+sizeof(uint64_t)); // 8(%rsp)
511 /* 64-bit ABI, arguments passed in registers. */
512 dtrace_probe(fbt->fbtp_id, regs->rdi, regs->rsi, regs->rdx, regs->rcx, regs->r8);
513 CPU->cpu_dtrace_caller = 0;
514 } else {
515
516 dtrace_probe(fbt->fbtp_id, fbt->fbtp_roffset, rval, 0, 0, 0);
517 CPU->cpu_dtrace_caller = 0;
518 }
519
520 return (fbt->fbtp_rval);
521 }
522 }
523
524 return (0);
525 }
526
527 #define IS_USER_TRAP(regs) (regs && (((regs)->isf.cs & 3) != 0))
528 #define T_INVALID_OPCODE 6
529 #define FBT_EXCEPTION_CODE T_INVALID_OPCODE
530 #define T_PREEMPT 255
531
532 kern_return_t
533 fbt_perfCallback(
534 int trapno,
535 x86_saved_state_t *tagged_regs,
536 uintptr_t *lo_spp,
537 __unused int unused2)
538 {
539 kern_return_t retval = KERN_FAILURE;
540 x86_saved_state64_t *saved_state = saved_state64(tagged_regs);
541
542 if (FBT_EXCEPTION_CODE == trapno && !IS_USER_TRAP(saved_state)) {
543 boolean_t oldlevel;
544 uint64_t rsp_probe, fp, delta = 0;
545 uintptr_t old_sp;
546 uint32_t *pDst;
547 int emul;
548
549
550 oldlevel = ml_set_interrupts_enabled(FALSE);
551
552 /* Calculate where the stack pointer was when the probe instruction "fired." */
553 rsp_probe = saved_state->isf.rsp; /* Easy, x86_64 establishes this value in idt64.s */
554
555 __asm__ volatile(
556 "Ldtrace_invop_callsite_pre_label:\n"
557 ".data\n"
558 ".private_extern _dtrace_invop_callsite_pre\n"
559 "_dtrace_invop_callsite_pre:\n"
560 " .quad Ldtrace_invop_callsite_pre_label\n"
561 ".text\n"
562 );
563
564 emul = dtrace_invop( saved_state->isf.rip, (uintptr_t *)saved_state, saved_state->rax );
565
566 __asm__ volatile(
567 "Ldtrace_invop_callsite_post_label:\n"
568 ".data\n"
569 ".private_extern _dtrace_invop_callsite_post\n"
570 "_dtrace_invop_callsite_post:\n"
571 " .quad Ldtrace_invop_callsite_post_label\n"
572 ".text\n"
573 );
574
575 switch (emul) {
576 case DTRACE_INVOP_NOP:
577 saved_state->isf.rip += DTRACE_INVOP_NOP_SKIP; /* Skip over the patched NOP (planted by sdt). */
578 retval = KERN_SUCCESS;
579 break;
580
581 case DTRACE_INVOP_MOV_RSP_RBP:
582 saved_state->rbp = rsp_probe; /* Emulate patched mov %rsp,%rbp */
583 saved_state->isf.rip += DTRACE_INVOP_MOV_RSP_RBP_SKIP; /* Skip over the bytes of the patched mov %rsp,%rbp */
584 retval = KERN_SUCCESS;
585 break;
586
587 case DTRACE_INVOP_POP_RBP:
588 case DTRACE_INVOP_LEAVE:
589 /*
590 * Emulate first micro-op of patched leave: mov %rbp,%rsp
591 * fp points just below the return address slot for target's ret
592 * and at the slot holding the frame pointer saved by the target's prologue.
593 */
594 fp = saved_state->rbp;
595 /* Emulate second micro-op of patched leave: patched pop %rbp
596 * savearea rbp is set for the frame of the caller to target
597 * The *live* %rsp will be adjusted below for pop increment(s)
598 */
599 saved_state->rbp = *(uint64_t *)fp;
600 /* Skip over the patched leave */
601 saved_state->isf.rip += DTRACE_INVOP_LEAVE_SKIP;
602 /*
603 * Lift the stack to account for the emulated leave
604 * Account for words local in this frame
605 * (in "case DTRACE_INVOP_POPL_EBP:" this is zero.)
606 */
607 delta = ((uint32_t *)fp) - ((uint32_t *)rsp_probe); /* delta is a *word* increment */
608 /* Account for popping off the rbp (just accomplished by the emulation
609 * above...)
610 */
611 delta += 2;
612 saved_state->isf.rsp += (delta << 2);
613 /* Obtain the stack pointer recorded by the trampolines */
614 old_sp = *lo_spp;
615 /* Shift contents of stack */
616 for (pDst = (uint32_t *)fp;
617 pDst > (((uint32_t *)old_sp));
618 pDst--)
619 *pDst = pDst[-delta];
620
621 /* Track the stack lift in "saved_state". */
622 saved_state = (x86_saved_state64_t *) (((uintptr_t)saved_state) + (delta << 2));
623 /* Adjust the stack pointer utilized by the trampolines */
624 *lo_spp = old_sp + (delta << 2);
625
626 retval = KERN_SUCCESS;
627 break;
628
629 default:
630 retval = KERN_FAILURE;
631 break;
632 }
633 saved_state->isf.trapno = T_PREEMPT; /* Avoid call to i386_astintr()! */
634
635 ml_set_interrupts_enabled(oldlevel);
636 }
637
638 return retval;
639 }
640
641 /*ARGSUSED*/
642 static void
643 __provide_probe_64(struct modctl *ctl, uintptr_t instrLow, uintptr_t instrHigh, char *modname, char* symbolName, machine_inst_t* symbolStart)
644 {
645 unsigned int j;
646 unsigned int doenable = 0;
647 dtrace_id_t thisid;
648
649 fbt_probe_t *newfbt, *retfbt, *entryfbt;
650 machine_inst_t *instr, *limit, theInstr, i1, i2, i3;
651 int size;
652
653 /*
654 * Guard against null symbols
655 */
656 if (!symbolStart || !instrLow || !instrHigh) {
657 kprintf("dtrace: %s has an invalid address\n", symbolName);
658 return;
659 }
660
661 for (j = 0, instr = symbolStart, theInstr = 0;
662 (j < 4) && ((uintptr_t)instr >= instrLow) && (instrHigh > (uintptr_t)(instr + 2));
663 j++) {
664 theInstr = instr[0];
665 if (theInstr == FBT_PUSH_RBP || theInstr == FBT_RET || theInstr == FBT_RET_IMM16)
666 break;
667
668 if ((size = dtrace_instr_size(instr)) <= 0)
669 break;
670
671 instr += size;
672 }
673
674 if (theInstr != FBT_PUSH_RBP)
675 return;
676
677 i1 = instr[1];
678 i2 = instr[2];
679 i3 = instr[3];
680
681 limit = (machine_inst_t *)instrHigh;
682
683 if (i1 == FBT_REX_RSP_RBP && i2 == FBT_MOV_RSP_RBP0 && i3 == FBT_MOV_RSP_RBP1) {
684 instr += 1; /* Advance to the mov %rsp,%rbp */
685 theInstr = i1;
686 } else {
687 return;
688 }
689 #if 0
690 else {
691 /*
692 * Sometimes, the compiler will schedule an intervening instruction
693 * in the function prologue. Example:
694 *
695 * _mach_vm_read:
696 * 000006d8 pushl %ebp
697 * 000006d9 movl $0x00000004,%edx
698 * 000006de movl %esp,%ebp
699 *
700 * Try the next instruction, to see if it is a movl %esp,%ebp
701 */
702
703 instr += 1; /* Advance past the pushl %ebp */
704 if ((size = dtrace_instr_size(instr)) <= 0)
705 return;
706
707 instr += size;
708
709 if ((instr + 1) >= limit)
710 return;
711
712 i1 = instr[0];
713 i2 = instr[1];
714
715 if (!(i1 == FBT_MOVL_ESP_EBP0_V0 && i2 == FBT_MOVL_ESP_EBP1_V0) &&
716 !(i1 == FBT_MOVL_ESP_EBP0_V1 && i2 == FBT_MOVL_ESP_EBP1_V1))
717 return;
718
719 /* instr already points at the movl %esp,%ebp */
720 theInstr = i1;
721 }
722 #endif
723 thisid = dtrace_probe_lookup(fbt_id, modname, symbolName, FBT_ENTRY);
724 newfbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP);
725 strlcpy( (char *)&(newfbt->fbtp_name), symbolName, MAX_FBTP_NAME_CHARS );
726
727 if (thisid != 0) {
728 /*
729 * The dtrace_probe previously existed, so we have to hook
730 * the newfbt entry onto the end of the existing fbt's chain.
731 * If we find an fbt entry that was previously patched to
732 * fire, (as indicated by the current patched value), then
733 * we want to enable this newfbt on the spot.
734 */
735 entryfbt = dtrace_probe_arg (fbt_id, thisid);
736 ASSERT (entryfbt != NULL);
737 for(; entryfbt != NULL; entryfbt = entryfbt->fbtp_next) {
738 if (entryfbt->fbtp_currentval == entryfbt->fbtp_patchval)
739 doenable++;
740
741 if (entryfbt->fbtp_next == NULL) {
742 entryfbt->fbtp_next = newfbt;
743 newfbt->fbtp_id = entryfbt->fbtp_id;
744 break;
745 }
746 }
747 }
748 else {
749 /*
750 * The dtrace_probe did not previously exist, so we
751 * create it and hook in the newfbt. Since the probe is
752 * new, we obviously do not need to enable it on the spot.
753 */
754 newfbt->fbtp_id = dtrace_probe_create(fbt_id, modname, symbolName, FBT_ENTRY, FBT_AFRAMES_ENTRY, newfbt);
755 doenable = 0;
756 }
757
758 newfbt->fbtp_patchpoint = instr;
759 newfbt->fbtp_ctl = ctl;
760 newfbt->fbtp_loadcnt = ctl->mod_loadcnt;
761 newfbt->fbtp_rval = DTRACE_INVOP_MOV_RSP_RBP;
762 newfbt->fbtp_savedval = theInstr;
763 newfbt->fbtp_patchval = FBT_PATCHVAL;
764 newfbt->fbtp_currentval = 0;
765 newfbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)];
766 fbt_probetab[FBT_ADDR2NDX(instr)] = newfbt;
767
768 if (doenable)
769 fbt_enable(NULL, newfbt->fbtp_id, newfbt);
770
771 /*
772 * The fbt entry chain is in place, one entry point per symbol.
773 * The fbt return chain can have multiple return points per symbol.
774 * Here we find the end of the fbt return chain.
775 */
776
777 doenable=0;
778
779 thisid = dtrace_probe_lookup(fbt_id, modname, symbolName, FBT_RETURN);
780 if (thisid != 0) {
781 /* The dtrace_probe previously existed, so we have to
782 * find the end of the existing fbt chain. If we find
783 * an fbt return that was previously patched to fire,
784 * (as indicated by the currrent patched value), then
785 * we want to enable any new fbts on the spot.
786 */
787 retfbt = dtrace_probe_arg (fbt_id, thisid);
788 ASSERT(retfbt != NULL);
789 for (; retfbt != NULL; retfbt = retfbt->fbtp_next) {
790 if (retfbt->fbtp_currentval == retfbt->fbtp_patchval)
791 doenable++;
792 if(retfbt->fbtp_next == NULL)
793 break;
794 }
795 }
796 else {
797 doenable = 0;
798 retfbt = NULL;
799 }
800
801 again:
802 if (instr >= limit)
803 return;
804
805 /*
806 * If this disassembly fails, then we've likely walked off into
807 * a jump table or some other unsuitable area. Bail out of the
808 * disassembly now.
809 */
810 if ((size = dtrace_instr_size(instr)) <= 0)
811 return;
812
813 /*
814 * We (desperately) want to avoid erroneously instrumenting a
815 * jump table, especially given that our markers are pretty
816 * short: two bytes on x86, and just one byte on amd64. To
817 * determine if we're looking at a true instruction sequence
818 * or an inline jump table that happens to contain the same
819 * byte sequences, we resort to some heuristic sleeze: we
820 * treat this instruction as being contained within a pointer,
821 * and see if that pointer points to within the body of the
822 * function. If it does, we refuse to instrument it.
823 */
824 for (j = 0; j < sizeof (uintptr_t); j++) {
825 uintptr_t check = (uintptr_t)instr - j;
826 uint8_t *ptr;
827
828 if (check < (uintptr_t)symbolStart)
829 break;
830
831 if (check + sizeof (uintptr_t) > (uintptr_t)limit)
832 continue;
833
834 ptr = *(uint8_t **)check;
835
836 if (ptr >= (uint8_t *)symbolStart && ptr < limit) {
837 instr += size;
838 goto again;
839 }
840 }
841
842 /*
843 * OK, it's an instruction.
844 */
845 theInstr = instr[0];
846
847 /* Walked onto the start of the next routine? If so, bail out of this function. */
848 if (theInstr == FBT_PUSH_RBP)
849 return;
850
851 if (!(size == 1 && (theInstr == FBT_POP_RBP || theInstr == FBT_LEAVE))) {
852 instr += size;
853 goto again;
854 }
855
856 /*
857 * Found the pop %rbp; or leave.
858 */
859 machine_inst_t *patch_instr = instr;
860
861 /*
862 * Scan forward for a "ret", or "jmp".
863 */
864 instr += size;
865 if (instr >= limit)
866 return;
867
868 size = dtrace_instr_size(instr);
869 if (size <= 0) /* Failed instruction decode? */
870 return;
871
872 theInstr = instr[0];
873
874 if (!(size == FBT_RET_LEN && (theInstr == FBT_RET)) &&
875 !(size == FBT_RET_IMM16_LEN && (theInstr == FBT_RET_IMM16)) &&
876 !(size == FBT_JMP_SHORT_REL_LEN && (theInstr == FBT_JMP_SHORT_REL)) &&
877 !(size == FBT_JMP_NEAR_REL_LEN && (theInstr == FBT_JMP_NEAR_REL)) &&
878 !(size == FBT_JMP_FAR_ABS_LEN && (theInstr == FBT_JMP_FAR_ABS)))
879 return;
880
881 /*
882 * pop %rbp; ret; or leave; ret; or leave; jmp tailCalledFun; -- We have a winner!
883 */
884 newfbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP);
885 strlcpy( (char *)&(newfbt->fbtp_name), symbolName, MAX_FBTP_NAME_CHARS );
886
887 if (retfbt == NULL) {
888 newfbt->fbtp_id = dtrace_probe_create(fbt_id, modname,
889 symbolName, FBT_RETURN, FBT_AFRAMES_RETURN, newfbt);
890 } else {
891 retfbt->fbtp_next = newfbt;
892 newfbt->fbtp_id = retfbt->fbtp_id;
893 }
894
895 retfbt = newfbt;
896 newfbt->fbtp_patchpoint = patch_instr;
897 newfbt->fbtp_ctl = ctl;
898 newfbt->fbtp_loadcnt = ctl->mod_loadcnt;
899
900 if (*patch_instr == FBT_POP_RBP) {
901 newfbt->fbtp_rval = DTRACE_INVOP_POP_RBP;
902 } else {
903 ASSERT(*patch_instr == FBT_LEAVE);
904 newfbt->fbtp_rval = DTRACE_INVOP_LEAVE;
905 }
906 newfbt->fbtp_roffset =
907 (uintptr_t)(patch_instr - (uint8_t *)symbolStart);
908
909 newfbt->fbtp_savedval = *patch_instr;
910 newfbt->fbtp_patchval = FBT_PATCHVAL;
911 newfbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(patch_instr)];
912 fbt_probetab[FBT_ADDR2NDX(patch_instr)] = newfbt;
913
914 if (doenable)
915 fbt_enable(NULL, newfbt->fbtp_id, newfbt);
916
917 instr += size;
918 goto again;
919 }
920
921 static void
922 __kernel_syms_provide_module(void *arg, struct modctl *ctl)
923 {
924 #pragma unused(arg)
925 kernel_mach_header_t *mh;
926 struct load_command *cmd;
927 kernel_segment_command_t *orig_ts = NULL, *orig_le = NULL;
928 struct symtab_command *orig_st = NULL;
929 kernel_nlist_t *sym = NULL;
930 char *strings;
931 uintptr_t instrLow, instrHigh;
932 char *modname;
933 unsigned int i;
934
935 mh = (kernel_mach_header_t *)(ctl->mod_address);
936 modname = ctl->mod_modname;
937
938 if (mh->magic != MH_MAGIC_KERNEL)
939 return;
940
941 cmd = (struct load_command *) &mh[1];
942 for (i = 0; i < mh->ncmds; i++) {
943 if (cmd->cmd == LC_SEGMENT_KERNEL) {
944 kernel_segment_command_t *orig_sg = (kernel_segment_command_t *) cmd;
945
946 if (LIT_STRNEQL(orig_sg->segname, SEG_TEXT))
947 orig_ts = orig_sg;
948 else if (LIT_STRNEQL(orig_sg->segname, SEG_LINKEDIT))
949 orig_le = orig_sg;
950 else if (LIT_STRNEQL(orig_sg->segname, ""))
951 orig_ts = orig_sg; /* kexts have a single unnamed segment */
952 }
953 else if (cmd->cmd == LC_SYMTAB)
954 orig_st = (struct symtab_command *) cmd;
955
956 cmd = (struct load_command *) ((caddr_t) cmd + cmd->cmdsize);
957 }
958
959 if ((orig_ts == NULL) || (orig_st == NULL) || (orig_le == NULL))
960 return;
961
962 sym = (kernel_nlist_t *)(orig_le->vmaddr + orig_st->symoff - orig_le->fileoff);
963 strings = (char *)(orig_le->vmaddr + orig_st->stroff - orig_le->fileoff);
964
965 /* Find extent of the TEXT section */
966 instrLow = (uintptr_t)orig_ts->vmaddr;
967 instrHigh = (uintptr_t)(orig_ts->vmaddr + orig_ts->vmsize);
968
969 for (i = 0; i < orig_st->nsyms; i++) {
970 uint8_t n_type = sym[i].n_type & (N_TYPE | N_EXT);
971 char *name = strings + sym[i].n_un.n_strx;
972
973 /* Check that the symbol is a global and that it has a name. */
974 if (((N_SECT | N_EXT) != n_type && (N_ABS | N_EXT) != n_type))
975 continue;
976
977 if (0 == sym[i].n_un.n_strx) /* iff a null, "", name. */
978 continue;
979
980 /* Lop off omnipresent leading underscore. */
981 if (*name == '_')
982 name += 1;
983
984 /*
985 * We're only blacklisting functions in the kernel for now.
986 */
987 if (MOD_IS_MACH_KERNEL(ctl) && !is_symbol_valid(name))
988 continue;
989
990 __provide_probe_64(ctl, instrLow, instrHigh, modname, name, (machine_inst_t*)sym[i].n_value);
991 }
992 }
993
994 static void
995 __user_syms_provide_module(void *arg, struct modctl *ctl)
996 {
997 #pragma unused(arg)
998 char *modname;
999 unsigned int i;
1000
1001 modname = ctl->mod_modname;
1002
1003 dtrace_module_symbols_t* module_symbols = ctl->mod_user_symbols;
1004 if (module_symbols) {
1005 for (i=0; i<module_symbols->dtmodsyms_count; i++) {
1006
1007 /*
1008 * symbol->dtsym_addr (the symbol address) passed in from
1009 * user space, is already slid for both kexts and kernel.
1010 */
1011 dtrace_symbol_t* symbol = &module_symbols->dtmodsyms_symbols[i];
1012
1013 char* name = symbol->dtsym_name;
1014
1015 /* Lop off omnipresent leading underscore. */
1016 if (*name == '_')
1017 name += 1;
1018
1019 /*
1020 * We're only blacklisting functions in the kernel for now.
1021 */
1022 if (MOD_IS_MACH_KERNEL(ctl) && !is_symbol_valid(name))
1023 continue;
1024
1025 __provide_probe_64(ctl, (uintptr_t)symbol->dtsym_addr, (uintptr_t)(symbol->dtsym_addr + symbol->dtsym_size), modname, name, (machine_inst_t*)(uintptr_t)symbol->dtsym_addr);
1026 }
1027 }
1028 }
1029
1030 extern int dtrace_kernel_symbol_mode;
1031
1032 /*ARGSUSED*/
1033 void
1034 fbt_provide_module(void *arg, struct modctl *ctl)
1035 {
1036 ASSERT(ctl != NULL);
1037 ASSERT(dtrace_kernel_symbol_mode != DTRACE_KERNEL_SYMBOLS_NEVER);
1038 lck_mtx_assert(&mod_lock, LCK_MTX_ASSERT_OWNED);
1039
1040 if (MOD_FBT_DONE(ctl))
1041 return;
1042
1043 if (!is_module_valid(ctl)) {
1044 ctl->mod_flags |= MODCTL_FBT_INVALID;
1045 return;
1046 }
1047
1048 if (MOD_HAS_KERNEL_SYMBOLS(ctl)) {
1049 __kernel_syms_provide_module(arg, ctl);
1050 ctl->mod_flags |= MODCTL_FBT_PROBES_PROVIDED;
1051 return;
1052 }
1053
1054 if (MOD_HAS_USERSPACE_SYMBOLS(ctl)) {
1055 __user_syms_provide_module(arg, ctl);
1056 ctl->mod_flags |= MODCTL_FBT_PROBES_PROVIDED;
1057 if (MOD_FBT_PROVIDE_PRIVATE_PROBES(ctl))
1058 ctl->mod_flags |= MODCTL_FBT_PRIVATE_PROBES_PROVIDED;
1059 return;
1060 }
1061 }