]> git.saurik.com Git - apple/xnu.git/blame - bsd/dev/i386/fbt_x86.c
xnu-2050.24.15.tar.gz
[apple/xnu.git] / bsd / dev / i386 / fbt_x86.c
CommitLineData
2d21ac55
A
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22/*
23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27/* #pragma ident "@(#)fbt.c 1.15 05/09/19 SMI" */
28
29#ifdef KERNEL
30#ifndef _KERNEL
31#define _KERNEL /* Solaris vs. Darwin */
32#endif
33#endif
34
35#define MACH__POSIX_C_SOURCE_PRIVATE 1 /* pulls in suitable savearea from mach/ppc/thread_status.h */
36#include <kern/thread.h>
37#include <mach/thread_status.h>
38#include <mach/vm_param.h>
39#include <mach-o/loader.h>
40#include <mach-o/nlist.h>
b0d623f7 41#include <libkern/kernel_mach_header.h>
6d2010ae 42#include <libkern/OSAtomic.h>
2d21ac55
A
43
44#include <sys/param.h>
45#include <sys/systm.h>
46#include <sys/errno.h>
47#include <sys/stat.h>
48#include <sys/ioctl.h>
49#include <sys/conf.h>
50#include <sys/fcntl.h>
51#include <miscfs/devfs/devfs.h>
52
53#include <sys/dtrace.h>
54#include <sys/dtrace_impl.h>
55#include <sys/fbt.h>
56
57#include <sys/dtrace_glue.h>
58
59#define DTRACE_INVOP_NOP_SKIP 1
60#define DTRACE_INVOP_MOVL_ESP_EBP 10
61#define DTRACE_INVOP_MOVL_ESP_EBP_SKIP 2
b0d623f7
A
62#define DTRACE_INVOP_MOV_RSP_RBP 11
63#define DTRACE_INVOP_MOV_RSP_RBP_SKIP 3
64#define DTRACE_INVOP_POP_RBP 12
65#define DTRACE_INVOP_POP_RBP_SKIP 1
2d21ac55
A
66#define DTRACE_INVOP_LEAVE_SKIP 1
67
68#define FBT_PUSHL_EBP 0x55
69#define FBT_MOVL_ESP_EBP0_V0 0x8b
70#define FBT_MOVL_ESP_EBP1_V0 0xec
71#define FBT_MOVL_ESP_EBP0_V1 0x89
72#define FBT_MOVL_ESP_EBP1_V1 0xe5
b0d623f7
A
73
74#define FBT_PUSH_RBP 0x55
2d21ac55 75#define FBT_REX_RSP_RBP 0x48
b0d623f7
A
76#define FBT_MOV_RSP_RBP0 0x89
77#define FBT_MOV_RSP_RBP1 0xe5
78#define FBT_POP_RBP 0x5d
2d21ac55
A
79
80#define FBT_POPL_EBP 0x5d
81#define FBT_RET 0xc3
82#define FBT_RET_IMM16 0xc2
83#define FBT_LEAVE 0xc9
84#define FBT_JMP_SHORT_REL 0xeb /* Jump short, relative, displacement relative to next instr. */
85#define FBT_JMP_NEAR_REL 0xe9 /* Jump near, relative, displacement relative to next instr. */
86#define FBT_JMP_FAR_ABS 0xea /* Jump far, absolute, address given in operand */
87#define FBT_RET_LEN 1
88#define FBT_RET_IMM16_LEN 3
89#define FBT_JMP_SHORT_REL_LEN 2
90#define FBT_JMP_NEAR_REL_LEN 5
91#define FBT_JMP_FAR_ABS_LEN 5
92
93#define FBT_PATCHVAL 0xf0
94#define FBT_AFRAMES_ENTRY 7
95#define FBT_AFRAMES_RETURN 6
96
97#define FBT_ENTRY "entry"
98#define FBT_RETURN "return"
99#define FBT_ADDR2NDX(addr) ((((uintptr_t)(addr)) >> 4) & fbt_probetab_mask)
100
101extern dtrace_provider_id_t fbt_id;
102extern fbt_probe_t **fbt_probetab;
103extern int fbt_probetab_mask;
104
6d2010ae
A
105extern int gIgnoreFBTBlacklist; /* From fbt_init */
106
107kern_return_t fbt_perfCallback(int, x86_saved_state_t *, uintptr_t *, __unused int);
b0d623f7 108
2d21ac55
A
109/*
110 * Critical routines that must not be probed. PR_5221096, PR_5379018.
b0d623f7 111 * The blacklist must be kept in alphabetic order for purposes of bsearch().
2d21ac55
A
112 */
113
114static const char * critical_blacklist[] =
115{
116 "bcopy_phys",
117 "console_cpu_alloc",
118 "console_cpu_free",
119 "cpu_IA32e_disable",
120 "cpu_IA32e_enable",
b0d623f7 121 "cpu_NMI_interrupt",
2d21ac55
A
122 "cpu_control",
123 "cpu_data_alloc",
b0d623f7
A
124 "cpu_desc_init",
125 "cpu_desc_init64",
126 "cpu_desc_load",
127 "cpu_desc_load64",
2d21ac55
A
128 "cpu_exit_wait",
129 "cpu_info",
130 "cpu_info_count",
131 "cpu_init",
132 "cpu_interrupt",
133 "cpu_machine_init",
134 "cpu_mode_init",
135 "cpu_processor_alloc",
136 "cpu_processor_free",
137 "cpu_signal_handler",
138 "cpu_sleep",
139 "cpu_start",
140 "cpu_subtype",
141 "cpu_thread_alloc",
142 "cpu_thread_halt",
143 "cpu_thread_init",
144 "cpu_threadtype",
145 "cpu_to_processor",
b0d623f7
A
146 "cpu_topology_sort",
147 "cpu_topology_start_cpu",
2d21ac55 148 "cpu_type",
2d21ac55 149 "cpuid_cpu_display",
6d2010ae 150 "cpuid_extfeatures",
2d21ac55
A
151 "handle_pending_TLB_flushes",
152 "hw_compare_and_store",
153 "machine_idle_cstate",
154 "mca_cpu_alloc",
155 "mca_cpu_init",
156 "ml_nofault_copy",
157 "pmap_cpu_alloc",
158 "pmap_cpu_free",
159 "pmap_cpu_high_map_vaddr",
160 "pmap_cpu_high_shared_remap",
161 "pmap_cpu_init",
2d21ac55 162 "register_cpu_setup_func",
b0d623f7
A
163 "unregister_cpu_setup_func",
164 "vstart"
2d21ac55
A
165};
166#define CRITICAL_BLACKLIST_COUNT (sizeof(critical_blacklist)/sizeof(critical_blacklist[0]))
167
168/*
169 * The transitive closure of entry points that can be reached from probe context.
b0d623f7 170 * (Apart from routines whose names begin with dtrace_).
2d21ac55
A
171 */
172static const char * probe_ctx_closure[] =
173{
174 "Debugger",
b0d623f7 175 "IS_64BIT_PROCESS",
2d21ac55
A
176 "OSCompareAndSwap",
177 "absolutetime_to_microtime",
6d2010ae 178 "act_set_astbsd",
2d21ac55
A
179 "ast_pending",
180 "clock_get_calendar_nanotime_nowait",
181 "copyin",
182 "copyin_user",
183 "copyinstr",
184 "copyout",
185 "copyoutstr",
186 "cpu_number",
187 "current_proc",
188 "current_processor",
189 "current_task",
190 "current_thread",
191 "debug_enter",
192 "find_user_regs",
193 "flush_tlb64",
194 "get_bsdtask_info",
195 "get_bsdthread_info",
196 "hw_atomic_and",
197 "kauth_cred_get",
198 "kauth_getgid",
199 "kauth_getuid",
200 "kernel_preempt_check",
201 "mach_absolute_time",
202 "max_valid_stack_address",
203 "ml_at_interrupt_context",
204 "ml_phys_write_byte_64",
205 "ml_phys_write_half_64",
206 "ml_phys_write_word_64",
207 "ml_set_interrupts_enabled",
208 "panic",
209 "pmap64_pde",
210 "pmap64_pdpt",
211 "pmap_find_phys",
212 "pmap_get_mapwindow",
213 "pmap_pde",
214 "pmap_pte",
215 "pmap_put_mapwindow",
216 "pmap_valid_page",
217 "prf",
218 "proc_is64bit",
219 "proc_selfname",
220 "proc_selfpid",
b0d623f7 221 "proc_selfppid",
2d21ac55
A
222 "psignal_lock",
223 "rtc_nanotime_load",
224 "rtc_nanotime_read",
b0d623f7 225 "sdt_getargdesc",
2d21ac55
A
226 "strlcpy",
227 "sync_iss_to_iks_unconditionally",
b0d623f7 228 "systrace_stub",
2d21ac55
A
229 "timer_grab"
230};
231#define PROBE_CTX_CLOSURE_COUNT (sizeof(probe_ctx_closure)/sizeof(probe_ctx_closure[0]))
232
233
234static int _cmp(const void *a, const void *b)
235{
b0d623f7 236 return strncmp((const char *)a, *(const char **)b, strlen((const char *)a) + 1);
2d21ac55
A
237}
238
239static const void * bsearch(
240 register const void *key,
241 const void *base0,
242 size_t nmemb,
243 register size_t size,
244 register int (*compar)(const void *, const void *)) {
245
246 register const char *base = base0;
247 register size_t lim;
248 register int cmp;
249 register const void *p;
250
251 for (lim = nmemb; lim != 0; lim >>= 1) {
252 p = base + (lim >> 1) * size;
253 cmp = (*compar)(key, p);
254 if (cmp == 0)
255 return p;
256 if (cmp > 0) { /* key > p: move right */
257 base = (const char *)p + size;
258 lim--;
259 } /* else move left */
260 }
261 return (NULL);
262}
263
6d2010ae
A
264/*
265 * Module validation
266 */
267static int
268is_module_valid(struct modctl* ctl)
269{
270 ASSERT(!MOD_FBT_PROBES_PROVIDED(ctl));
271 ASSERT(!MOD_FBT_INVALID(ctl));
272
273 if (0 == ctl->mod_address || 0 == ctl->mod_size) {
274 return FALSE;
275 }
276
277 if (0 == ctl->mod_loaded) {
278 return FALSE;
279 }
280
281 if (strstr(ctl->mod_modname, "CHUD") != NULL)
282 return FALSE;
283
284 /*
285 * If the user sets this, trust they know what they are doing.
286 */
287 if (gIgnoreFBTBlacklist) /* per boot-arg set in fbt_init() */
288 return TRUE;
289
290 /*
291 * These drivers control low level functions that when traced
292 * cause problems, especially in the sleep/wake paths.
293 * If somebody really wants to drill in on one of these kexts, then
294 * they can override blacklisting using the boot-arg above.
295 */
296
297 if (strstr(ctl->mod_modname, "AppleACPIEC") != NULL)
298 return FALSE;
299
300 if (strstr(ctl->mod_modname, "AppleACPIPlatform") != NULL)
301 return FALSE;
302
303 if (strstr(ctl->mod_modname, "AppleRTC") != NULL)
304 return FALSE;
305
306 if (strstr(ctl->mod_modname, "IOACPIFamily") != NULL)
307 return FALSE;
308
309 if (strstr(ctl->mod_modname, "AppleIntelCPUPowerManagement") != NULL)
310 return FALSE;
311
312 if (strstr(ctl->mod_modname, "AppleProfile") != NULL)
313 return FALSE;
314
315 if (strstr(ctl->mod_modname, "AppleIntelProfile") != NULL)
316 return FALSE;
317
318
319
320 return TRUE;
321}
322
323/*
324 * FBT probe name validation
325 */
326static int
327is_symbol_valid(const char* name)
328{
329 /*
330 * If the user set this, trust they know what they are doing.
331 */
332 if (gIgnoreFBTBlacklist)
333 return TRUE;
334
335 if (LIT_STRNSTART(name, "dtrace_") && !LIT_STRNSTART(name, "dtrace_safe_")) {
336 /*
337 * Anything beginning with "dtrace_" may be called
338 * from probe context unless it explitly indicates
339 * that it won't be called from probe context by
340 * using the prefix "dtrace_safe_".
341 */
342 return FALSE;
343 }
344
345 if (LIT_STRNSTART(name, "fasttrap_") ||
346 LIT_STRNSTART(name, "fuword") ||
347 LIT_STRNSTART(name, "suword") ||
348 LIT_STRNEQL(name, "sprlock") ||
349 LIT_STRNEQL(name, "sprunlock") ||
350 LIT_STRNEQL(name, "uread") ||
351 LIT_STRNEQL(name, "uwrite")) {
352 return FALSE; /* Fasttrap inner-workings. */
353 }
354
355 if (LIT_STRNSTART(name, "dsmos_"))
356 return FALSE; /* Don't Steal Mac OS X! */
357
358 if (LIT_STRNSTART(name, "_dtrace"))
359 return FALSE; /* Shims in dtrace.c */
360
361 if (LIT_STRNSTART(name, "chud"))
362 return FALSE; /* Professional courtesy. */
363
364 if (LIT_STRNSTART(name, "hibernate_"))
365 return FALSE; /* Let sleeping dogs lie. */
366
367 if (LIT_STRNEQL(name, "_ZNK6OSData14getBytesNoCopyEv"))
368 return FALSE; /* Data::getBytesNoCopy, IOHibernateSystemWake path */
369
370 if (LIT_STRNEQL(name, "_ZN9IOService14newTemperatureElPS_") || /* IOService::newTemperature */
371 LIT_STRNEQL(name, "_ZN9IOService26temperatureCriticalForZoneEPS_")) { /* IOService::temperatureCriticalForZone */
372 return FALSE; /* Per the fire code */
373 }
374
375 /*
376 * Place no probes (illegal instructions) in the exception handling path!
377 */
378 if (LIT_STRNEQL(name, "t_invop") ||
379 LIT_STRNEQL(name, "enter_lohandler") ||
380 LIT_STRNEQL(name, "lo_alltraps") ||
381 LIT_STRNEQL(name, "kernel_trap") ||
382 LIT_STRNEQL(name, "interrupt") ||
383 LIT_STRNEQL(name, "i386_astintr")) {
384 return FALSE;
385 }
386
387 if (LIT_STRNEQL(name, "current_thread") ||
388 LIT_STRNEQL(name, "ast_pending") ||
389 LIT_STRNEQL(name, "fbt_perfCallback") ||
390 LIT_STRNEQL(name, "machine_thread_get_kern_state") ||
391 LIT_STRNEQL(name, "get_threadtask") ||
392 LIT_STRNEQL(name, "ml_set_interrupts_enabled") ||
393 LIT_STRNEQL(name, "dtrace_invop") ||
394 LIT_STRNEQL(name, "fbt_invop") ||
395 LIT_STRNEQL(name, "sdt_invop") ||
396 LIT_STRNEQL(name, "max_valid_stack_address")) {
397 return FALSE;
398 }
399
400 /*
401 * Voodoo.
402 */
403 if (LIT_STRNSTART(name, "machine_stack_") ||
404 LIT_STRNSTART(name, "mapping_") ||
405 LIT_STRNEQL(name, "tmrCvt") ||
406
407 LIT_STRNSTART(name, "tsc_") ||
408
409 LIT_STRNSTART(name, "pmCPU") ||
410 LIT_STRNEQL(name, "pmKextRegister") ||
411 LIT_STRNEQL(name, "pmMarkAllCPUsOff") ||
412 LIT_STRNEQL(name, "pmSafeMode") ||
413 LIT_STRNEQL(name, "pmTimerSave") ||
414 LIT_STRNEQL(name, "pmTimerRestore") ||
415 LIT_STRNEQL(name, "pmUnRegister") ||
416 LIT_STRNSTART(name, "pms") ||
417 LIT_STRNEQL(name, "power_management_init") ||
418 LIT_STRNSTART(name, "usimple_") ||
419 LIT_STRNSTART(name, "lck_spin_lock") ||
420 LIT_STRNSTART(name, "lck_spin_unlock") ||
421
422 LIT_STRNSTART(name, "rtc_") ||
423 LIT_STRNSTART(name, "_rtc_") ||
424 LIT_STRNSTART(name, "rtclock_") ||
425 LIT_STRNSTART(name, "clock_") ||
426 LIT_STRNSTART(name, "absolutetime_to_") ||
427 LIT_STRNEQL(name, "setPop") ||
428 LIT_STRNEQL(name, "nanoseconds_to_absolutetime") ||
429 LIT_STRNEQL(name, "nanotime_to_absolutetime") ||
430
431 LIT_STRNSTART(name, "etimer_") ||
432
433 LIT_STRNSTART(name, "commpage_") ||
434 LIT_STRNSTART(name, "pmap_") ||
435 LIT_STRNSTART(name, "ml_") ||
436 LIT_STRNSTART(name, "PE_") ||
437 LIT_STRNEQL(name, "kprintf") ||
438 LIT_STRNSTART(name, "lapic_") ||
439 LIT_STRNSTART(name, "act_machine") ||
440 LIT_STRNSTART(name, "acpi_") ||
441 LIT_STRNSTART(name, "pal_")){
442 return FALSE;
443 }
444
445 /*
446 * Avoid machine_ routines. PR_5346750.
447 */
448 if (LIT_STRNSTART(name, "machine_"))
449 return FALSE;
450
451 if (LIT_STRNEQL(name, "handle_pending_TLB_flushes"))
452 return FALSE;
453
454 /*
455 * Place no probes on critical routines. PR_5221096
456 */
457 if (bsearch( name, critical_blacklist, CRITICAL_BLACKLIST_COUNT, sizeof(name), _cmp ) != NULL)
458 return FALSE;
459
460 /*
461 * Place no probes that could be hit in probe context.
462 */
463 if (bsearch( name, probe_ctx_closure, PROBE_CTX_CLOSURE_COUNT, sizeof(name), _cmp ) != NULL) {
464 return FALSE;
465 }
466
467 /*
468 * Place no probes that could be hit on the way to the debugger.
469 */
470 if (LIT_STRNSTART(name, "kdp_") ||
471 LIT_STRNSTART(name, "kdb_") ||
472 LIT_STRNSTART(name, "kdbg_") ||
473 LIT_STRNSTART(name, "kdebug_") ||
474 LIT_STRNSTART(name, "kernel_debug") ||
475 LIT_STRNEQL(name, "Debugger") ||
476 LIT_STRNEQL(name, "Call_DebuggerC") ||
477 LIT_STRNEQL(name, "lock_debugger") ||
478 LIT_STRNEQL(name, "unlock_debugger") ||
479 LIT_STRNEQL(name, "SysChoked")) {
480 return FALSE;
481 }
482
483
484 /*
485 * Place no probes that could be hit on the way to a panic.
486 */
487 if (NULL != strstr(name, "panic_") ||
488 LIT_STRNEQL(name, "panic") ||
489 LIT_STRNEQL(name, "preemption_underflow_panic")) {
490 return FALSE;
491 }
492
493 return TRUE;
494}
495
b0d623f7 496#if defined(__i386__)
2d21ac55
A
497int
498fbt_invop(uintptr_t addr, uintptr_t *stack, uintptr_t rval)
499{
500 uintptr_t stack0 = 0, stack1 = 0, stack2 = 0, stack3 = 0, stack4 = 0;
501 fbt_probe_t *fbt = fbt_probetab[FBT_ADDR2NDX(addr)];
502
503 for (; fbt != NULL; fbt = fbt->fbtp_hashnext) {
504 if ((uintptr_t)fbt->fbtp_patchpoint == addr) {
505
506 if (fbt->fbtp_roffset == 0) {
507 uintptr_t *stacktop;
508 if (CPU_ON_INTR(CPU))
509 stacktop = (uintptr_t *)dtrace_get_cpu_int_stack_top();
510 else
b0d623f7 511 stacktop = (uintptr_t *)(dtrace_get_kernel_stack(current_thread()) + kernel_stack_size);
2d21ac55
A
512
513 stack += 1; /* skip over the target's pushl'd %ebp */
514
515 if (stack <= stacktop)
516 CPU->cpu_dtrace_caller = *stack++;
517 if (stack <= stacktop)
518 stack0 = *stack++;
519 if (stack <= stacktop)
520 stack1 = *stack++;
521 if (stack <= stacktop)
522 stack2 = *stack++;
523 if (stack <= stacktop)
524 stack3 = *stack++;
525 if (stack <= stacktop)
526 stack4 = *stack++;
527
b0d623f7 528 /* 32-bit ABI, arguments passed on stack. */
2d21ac55
A
529 dtrace_probe(fbt->fbtp_id, stack0, stack1, stack2, stack3, stack4);
530 CPU->cpu_dtrace_caller = 0;
531 } else {
532 dtrace_probe(fbt->fbtp_id, fbt->fbtp_roffset, rval, 0, 0, 0);
533 CPU->cpu_dtrace_caller = 0;
534 }
535
536 return (fbt->fbtp_rval);
537 }
538 }
539
540 return (0);
541}
542
543#define IS_USER_TRAP(regs) (regs && (((regs)->cs & 3) != 0))
544#define T_INVALID_OPCODE 6
545#define FBT_EXCEPTION_CODE T_INVALID_OPCODE
b0d623f7 546#define T_PREEMPT 255
2d21ac55
A
547
548kern_return_t
549fbt_perfCallback(
550 int trapno,
551 x86_saved_state_t *tagged_regs,
6d2010ae
A
552 uintptr_t *lo_spp,
553 __unused int unused )
2d21ac55
A
554{
555 kern_return_t retval = KERN_FAILURE;
556 x86_saved_state32_t *saved_state = saved_state32(tagged_regs);
557 struct x86_saved_state32_from_kernel *regs = (struct x86_saved_state32_from_kernel *)saved_state;
558
559 if (FBT_EXCEPTION_CODE == trapno && !IS_USER_TRAP(saved_state)) {
560 boolean_t oldlevel, cpu_64bit;
6d2010ae
A
561 uint32_t esp_probe, fp, *pDst, delta = 0;
562 uintptr_t old_sp;
2d21ac55
A
563 int emul;
564
565 cpu_64bit = ml_is64bit();
566 oldlevel = ml_set_interrupts_enabled(FALSE);
567
568 /* Calculate where the stack pointer was when the probe instruction "fired." */
569 if (cpu_64bit) {
570 esp_probe = saved_state->uesp; /* Easy, x86_64 establishes this value in idt64.s */
571 } else {
572 esp_probe = (uint32_t)&(regs[1]); /* Nasty, infer the location above the save area */
573 }
574
6d2010ae
A
575 __asm__ volatile(
576 "Ldtrace_invop_callsite_pre_label:\n"
577 ".data\n"
578 ".private_extern _dtrace_invop_callsite_pre\n"
579 "_dtrace_invop_callsite_pre:\n"
580 " .long Ldtrace_invop_callsite_pre_label\n"
581 ".text\n"
582 );
583
2d21ac55 584 emul = dtrace_invop( saved_state->eip, (uintptr_t *)esp_probe, saved_state->eax );
2d21ac55 585
6d2010ae
A
586 __asm__ volatile(
587 "Ldtrace_invop_callsite_post_label:\n"
588 ".data\n"
589 ".private_extern _dtrace_invop_callsite_post\n"
590 "_dtrace_invop_callsite_post:\n"
591 " .long Ldtrace_invop_callsite_post_label\n"
592 ".text\n"
593 );
594
2d21ac55
A
595 switch (emul) {
596 case DTRACE_INVOP_NOP:
b0d623f7 597 saved_state->eip += DTRACE_INVOP_NOP_SKIP; /* Skip over the patched NOP (planted by sdt.) */
2d21ac55
A
598 retval = KERN_SUCCESS;
599 break;
600
601 case DTRACE_INVOP_MOVL_ESP_EBP:
602 saved_state->ebp = esp_probe; /* Emulate patched movl %esp,%ebp */
603 saved_state->eip += DTRACE_INVOP_MOVL_ESP_EBP_SKIP; /* Skip over the bytes of the patched movl %esp,%ebp */
604 retval = KERN_SUCCESS;
605 break;
606
607 case DTRACE_INVOP_POPL_EBP:
608 case DTRACE_INVOP_LEAVE:
609/*
610 * Emulate first micro-op of patched leave: movl %ebp,%esp
611 * fp points just below the return address slot for target's ret
612 * and at the slot holding the frame pointer saved by the target's prologue.
613 */
614 fp = saved_state->ebp;
615/* Emulate second micro-op of patched leave: patched popl %ebp
616 * savearea ebp is set for the frame of the caller to target
617 * The *live* %esp will be adjusted below for pop increment(s)
618 */
619 saved_state->ebp = *(uint32_t *)fp;
620/* Skip over the patched leave */
621 saved_state->eip += DTRACE_INVOP_LEAVE_SKIP;
622/*
623 * Lift the stack to account for the emulated leave
624 * Account for words local in this frame
625 * (in "case DTRACE_INVOP_POPL_EBP:" this is zero.)
626 */
627 delta = ((uint32_t *)fp) - ((uint32_t *)esp_probe);
628/* Account for popping off the ebp (just accomplished by the emulation
629 * above...)
630 */
631 delta += 1;
632
633 if (cpu_64bit)
634 saved_state->uesp += (delta << 2);
6d2010ae
A
635/* Obtain the stack pointer recorded by the trampolines */
636 old_sp = *lo_spp;
2d21ac55
A
637/* Shift contents of stack */
638 for (pDst = (uint32_t *)fp;
6d2010ae 639 pDst > (((uint32_t *)old_sp));
2d21ac55
A
640 pDst--)
641 *pDst = pDst[-delta];
b0d623f7
A
642
643/* Track the stack lift in "saved_state". */
644 saved_state = (x86_saved_state32_t *) (((uintptr_t)saved_state) + (delta << 2));
6d2010ae
A
645/* Adjust the stack pointer utilized by the trampolines */
646 *lo_spp = old_sp + (delta << 2);
2d21ac55
A
647
648 retval = KERN_SUCCESS;
649 break;
650
651 default:
652 retval = KERN_FAILURE;
653 break;
654 }
b0d623f7
A
655 saved_state->trapno = T_PREEMPT; /* Avoid call to i386_astintr()! */
656
2d21ac55
A
657 ml_set_interrupts_enabled(oldlevel);
658 }
659
660 return retval;
661}
662
663/*ARGSUSED*/
664static void
6d2010ae 665__provide_probe_32(struct modctl *ctl, uintptr_t instrLow, uintptr_t instrHigh, char *modname, char* symbolName, machine_inst_t* symbolStart)
2d21ac55 666{
6d2010ae
A
667 unsigned int j;
668 unsigned int doenable = 0;
669 dtrace_id_t thisid;
2d21ac55 670
6d2010ae
A
671 fbt_probe_t *newfbt, *retfbt, *entryfbt;
672 machine_inst_t *instr, *limit, theInstr, i1, i2;
673 int size;
2d21ac55 674
6d2010ae
A
675 for (j = 0, instr = symbolStart, theInstr = 0;
676 (j < 4) && ((uintptr_t)instr >= instrLow) && (instrHigh > (uintptr_t)(instr + 2));
677 j++) {
678 theInstr = instr[0];
679 if (theInstr == FBT_PUSHL_EBP || theInstr == FBT_RET || theInstr == FBT_RET_IMM16)
680 break;
681
682 if ((size = dtrace_instr_size(instr)) <= 0)
683 break;
684
685 instr += size;
686 }
687
688 if (theInstr != FBT_PUSHL_EBP)
2d21ac55 689 return;
6d2010ae
A
690
691 i1 = instr[1];
692 i2 = instr[2];
693
694 limit = (machine_inst_t *)instrHigh;
695
696 if ((i1 == FBT_MOVL_ESP_EBP0_V0 && i2 == FBT_MOVL_ESP_EBP1_V0) ||
697 (i1 == FBT_MOVL_ESP_EBP0_V1 && i2 == FBT_MOVL_ESP_EBP1_V1)) {
698 instr += 1; /* Advance to the movl %esp,%ebp */
699 theInstr = i1;
700 } else {
701 /*
702 * Sometimes, the compiler will schedule an intervening instruction
703 * in the function prologue. Example:
704 *
705 * _mach_vm_read:
706 * 000006d8 pushl %ebp
707 * 000006d9 movl $0x00000004,%edx
708 * 000006de movl %esp,%ebp
709 *
710 * Try the next instruction, to see if it is a movl %esp,%ebp
711 */
712
713 instr += 1; /* Advance past the pushl %ebp */
714 if ((size = dtrace_instr_size(instr)) <= 0)
715 return;
716
717 instr += size;
718
719 if ((instr + 1) >= limit)
720 return;
721
722 i1 = instr[0];
723 i2 = instr[1];
724
725 if (!(i1 == FBT_MOVL_ESP_EBP0_V0 && i2 == FBT_MOVL_ESP_EBP1_V0) &&
726 !(i1 == FBT_MOVL_ESP_EBP0_V1 && i2 == FBT_MOVL_ESP_EBP1_V1))
727 return;
728
729 /* instr already points at the movl %esp,%ebp */
730 theInstr = i1;
731 }
732
733 thisid = dtrace_probe_lookup(fbt_id, modname, symbolName, FBT_ENTRY);
734 newfbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP);
735 strlcpy( (char *)&(newfbt->fbtp_name), symbolName, MAX_FBTP_NAME_CHARS );
736
737 if (thisid != 0) {
738 /*
739 * The dtrace_probe previously existed, so we have to hook
740 * the newfbt entry onto the end of the existing fbt's chain.
741 * If we find an fbt entry that was previously patched to
742 * fire, (as indicated by the current patched value), then
743 * we want to enable this newfbt on the spot.
744 */
745 entryfbt = dtrace_probe_arg (fbt_id, thisid);
746 ASSERT (entryfbt != NULL);
747 for(; entryfbt != NULL; entryfbt = entryfbt->fbtp_next) {
748 if (entryfbt->fbtp_currentval == entryfbt->fbtp_patchval)
749 doenable++;
750
751 if (entryfbt->fbtp_next == NULL) {
752 entryfbt->fbtp_next = newfbt;
753 newfbt->fbtp_id = entryfbt->fbtp_id;
754 break;
755 }
756 }
757 }
758 else {
759 /*
760 * The dtrace_probe did not previously exist, so we
761 * create it and hook in the newfbt. Since the probe is
762 * new, we obviously do not need to enable it on the spot.
763 */
764 newfbt->fbtp_id = dtrace_probe_create(fbt_id, modname, symbolName, FBT_ENTRY, FBT_AFRAMES_ENTRY, newfbt);
765 doenable = 0;
766 }
767
768
769 newfbt->fbtp_patchpoint = instr;
770 newfbt->fbtp_ctl = ctl;
771 newfbt->fbtp_loadcnt = ctl->mod_loadcnt;
772 newfbt->fbtp_rval = DTRACE_INVOP_MOVL_ESP_EBP;
773 newfbt->fbtp_savedval = theInstr;
774 newfbt->fbtp_patchval = FBT_PATCHVAL;
775 newfbt->fbtp_currentval = 0;
776 newfbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)];
777 fbt_probetab[FBT_ADDR2NDX(instr)] = newfbt;
778
779 if (doenable)
780 fbt_enable(NULL, newfbt->fbtp_id, newfbt);
781
2d21ac55 782 /*
6d2010ae
A
783 * The fbt entry chain is in place, one entry point per symbol.
784 * The fbt return chain can have multiple return points per symbol.
785 * Here we find the end of the fbt return chain.
2d21ac55 786 */
6d2010ae
A
787
788 doenable=0;
789
790 thisid = dtrace_probe_lookup(fbt_id, modname, symbolName, FBT_RETURN);
791 if (thisid != 0) {
792 /* The dtrace_probe previously existed, so we have to
793 * find the end of the existing fbt chain. If we find
794 * an fbt return that was previously patched to fire,
795 * (as indicated by the currrent patched value), then
796 * we want to enable any new fbts on the spot.
797 */
798 retfbt = dtrace_probe_arg (fbt_id, thisid);
799 ASSERT(retfbt != NULL);
800 for (; retfbt != NULL; retfbt = retfbt->fbtp_next) {
801 if (retfbt->fbtp_currentval == retfbt->fbtp_patchval)
802 doenable++;
803 if(retfbt->fbtp_next == NULL)
804 break;
805 }
806 }
807 else {
808 doenable = 0;
809 retfbt = NULL;
810 }
811
812again:
813 if (instr >= limit)
2d21ac55 814 return;
6d2010ae
A
815
816 /*
817 * If this disassembly fails, then we've likely walked off into
818 * a jump table or some other unsuitable area. Bail out of the
819 * disassembly now.
820 */
821 if ((size = dtrace_instr_size(instr)) <= 0)
822 return;
823
824 /*
825 * We (desperately) want to avoid erroneously instrumenting a
826 * jump table, especially given that our markers are pretty
827 * short: two bytes on x86, and just one byte on amd64. To
828 * determine if we're looking at a true instruction sequence
829 * or an inline jump table that happens to contain the same
830 * byte sequences, we resort to some heuristic sleeze: we
831 * treat this instruction as being contained within a pointer,
832 * and see if that pointer points to within the body of the
833 * function. If it does, we refuse to instrument it.
834 */
835 for (j = 0; j < sizeof (uintptr_t); j++) {
836 uintptr_t check = (uintptr_t)instr - j;
837 uint8_t *ptr;
838
839 if (check < (uintptr_t)symbolStart)
840 break;
841
842 if (check + sizeof (uintptr_t) > (uintptr_t)limit)
843 continue;
844
845 ptr = *(uint8_t **)check;
846
847 if (ptr >= (uint8_t *)symbolStart && ptr < limit) {
848 instr += size;
849 goto again;
850 }
851 }
852
853 /*
854 * OK, it's an instruction.
855 */
856 theInstr = instr[0];
857
858 /* Walked onto the start of the next routine? If so, bail out of this function. */
859 if (theInstr == FBT_PUSHL_EBP)
860 return;
861
862 if (!(size == 1 && (theInstr == FBT_POPL_EBP || theInstr == FBT_LEAVE))) {
863 instr += size;
864 goto again;
865 }
866
867 /*
868 * Found the popl %ebp; or leave.
869 */
870 machine_inst_t *patch_instr = instr;
871
872 /*
873 * Scan forward for a "ret", or "jmp".
874 */
875 instr += size;
876 if (instr >= limit)
877 return;
878
879 size = dtrace_instr_size(instr);
880 if (size <= 0) /* Failed instruction decode? */
881 return;
882
883 theInstr = instr[0];
884
885 if (!(size == FBT_RET_LEN && (theInstr == FBT_RET)) &&
886 !(size == FBT_RET_IMM16_LEN && (theInstr == FBT_RET_IMM16)) &&
887 !(size == FBT_JMP_SHORT_REL_LEN && (theInstr == FBT_JMP_SHORT_REL)) &&
888 !(size == FBT_JMP_NEAR_REL_LEN && (theInstr == FBT_JMP_NEAR_REL)) &&
889 !(size == FBT_JMP_FAR_ABS_LEN && (theInstr == FBT_JMP_FAR_ABS)))
2d21ac55 890 return;
6d2010ae
A
891
892 /*
893 * popl %ebp; ret; or leave; ret; or leave; jmp tailCalledFun; -- We have a winner!
894 */
895 newfbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP);
896 strlcpy( (char *)&(newfbt->fbtp_name), symbolName, MAX_FBTP_NAME_CHARS );
897
898 if (retfbt == NULL) {
899 newfbt->fbtp_id = dtrace_probe_create(fbt_id, modname,
900 symbolName, FBT_RETURN, FBT_AFRAMES_RETURN, newfbt);
901 } else {
902 retfbt->fbtp_next = newfbt;
903 newfbt->fbtp_id = retfbt->fbtp_id;
904 }
905
906 retfbt = newfbt;
907 newfbt->fbtp_patchpoint = patch_instr;
908 newfbt->fbtp_ctl = ctl;
909 newfbt->fbtp_loadcnt = ctl->mod_loadcnt;
910
911 if (*patch_instr == FBT_POPL_EBP) {
912 newfbt->fbtp_rval = DTRACE_INVOP_POPL_EBP;
913 } else {
914 ASSERT(*patch_instr == FBT_LEAVE);
915 newfbt->fbtp_rval = DTRACE_INVOP_LEAVE;
916 }
917 newfbt->fbtp_roffset =
918 (uintptr_t)(patch_instr - (uint8_t *)symbolStart);
919
920 newfbt->fbtp_savedval = *patch_instr;
921 newfbt->fbtp_patchval = FBT_PATCHVAL;
922 newfbt->fbtp_currentval = 0;
923 newfbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(patch_instr)];
924 fbt_probetab[FBT_ADDR2NDX(patch_instr)] = newfbt;
925
926 if (doenable)
927 fbt_enable(NULL, newfbt->fbtp_id, newfbt);
928
929 instr += size;
930 goto again;
931}
2d21ac55 932
6d2010ae
A
933static void
934__kernel_syms_provide_module(void *arg, struct modctl *ctl)
935{
936#pragma unused(arg)
937 kernel_mach_header_t *mh;
938 struct load_command *cmd;
939 kernel_segment_command_t *orig_ts = NULL, *orig_le = NULL;
940 struct symtab_command *orig_st = NULL;
941 struct nlist *sym = NULL;
942 char *strings;
943 uintptr_t instrLow, instrHigh;
944 char *modname;
945 unsigned int i;
946
947 mh = (kernel_mach_header_t *)(ctl->mod_address);
948 modname = ctl->mod_modname;
949
2d21ac55
A
950 if (mh->magic != MH_MAGIC)
951 return;
6d2010ae 952
2d21ac55
A
953 cmd = (struct load_command *) &mh[1];
954 for (i = 0; i < mh->ncmds; i++) {
b0d623f7
A
955 if (cmd->cmd == LC_SEGMENT_KERNEL) {
956 kernel_segment_command_t *orig_sg = (kernel_segment_command_t *) cmd;
6d2010ae 957
b0d623f7 958 if (LIT_STRNEQL(orig_sg->segname, SEG_TEXT))
2d21ac55 959 orig_ts = orig_sg;
b0d623f7 960 else if (LIT_STRNEQL(orig_sg->segname, SEG_LINKEDIT))
2d21ac55 961 orig_le = orig_sg;
b0d623f7 962 else if (LIT_STRNEQL(orig_sg->segname, ""))
2d21ac55
A
963 orig_ts = orig_sg; /* kexts have a single unnamed segment */
964 }
965 else if (cmd->cmd == LC_SYMTAB)
966 orig_st = (struct symtab_command *) cmd;
6d2010ae 967
2d21ac55
A
968 cmd = (struct load_command *) ((caddr_t) cmd + cmd->cmdsize);
969 }
6d2010ae 970
2d21ac55
A
971 if ((orig_ts == NULL) || (orig_st == NULL) || (orig_le == NULL))
972 return;
6d2010ae 973
b0d623f7
A
974 sym = (struct nlist *)(orig_le->vmaddr + orig_st->symoff - orig_le->fileoff);
975 strings = (char *)(orig_le->vmaddr + orig_st->stroff - orig_le->fileoff);
6d2010ae 976
2d21ac55
A
977 /* Find extent of the TEXT section */
978 instrLow = (uintptr_t)orig_ts->vmaddr;
979 instrHigh = (uintptr_t)(orig_ts->vmaddr + orig_ts->vmsize);
6d2010ae 980
2d21ac55 981 for (i = 0; i < orig_st->nsyms; i++) {
2d21ac55
A
982 uint8_t n_type = sym[i].n_type & (N_TYPE | N_EXT);
983 char *name = strings + sym[i].n_un.n_strx;
6d2010ae 984
2d21ac55
A
985 /* Check that the symbol is a global and that it has a name. */
986 if (((N_SECT | N_EXT) != n_type && (N_ABS | N_EXT) != n_type))
987 continue;
6d2010ae 988
2d21ac55
A
989 if (0 == sym[i].n_un.n_strx) /* iff a null, "", name. */
990 continue;
991
992 /* Lop off omnipresent leading underscore. */
993 if (*name == '_')
994 name += 1;
2d21ac55 995
2d21ac55 996 /*
6d2010ae 997 * We're only blacklisting functions in the kernel for now.
2d21ac55 998 */
6d2010ae 999 if (MOD_IS_MACH_KERNEL(ctl) && !is_symbol_valid(name))
2d21ac55 1000 continue;
6d2010ae
A
1001
1002 __provide_probe_32(ctl, instrLow, instrHigh, modname, name, (machine_inst_t*)sym[i].n_value);
1003 }
1004}
2d21ac55 1005
6d2010ae
A
1006static void
1007__user_syms_provide_module(void *arg, struct modctl *ctl)
1008{
1009#pragma unused(arg)
1010 char *modname;
1011 unsigned int i;
1012
1013 modname = ctl->mod_modname;
1014
1015 dtrace_module_symbols_t* module_symbols = ctl->mod_user_symbols;
1016 if (module_symbols) {
1017 for (i=0; i<module_symbols->dtmodsyms_count; i++) {
1018 dtrace_symbol_t* symbol = &module_symbols->dtmodsyms_symbols[i];
1019 char* name = symbol->dtsym_name;
1020
1021 /* Lop off omnipresent leading underscore. */
1022 if (*name == '_')
1023 name += 1;
2d21ac55 1024
6d2010ae
A
1025 /*
1026 * We're only blacklisting functions in the kernel for now.
1027 */
1028 if (MOD_IS_MACH_KERNEL(ctl) && !is_symbol_valid(name))
1029 continue;
2d21ac55 1030
6d2010ae
A
1031 __provide_probe_32(ctl, (uintptr_t)symbol->dtsym_addr, (uintptr_t)(symbol->dtsym_addr + symbol->dtsym_size), modname, name, (machine_inst_t*)(uintptr_t)symbol->dtsym_addr);
1032 }
1033 }
1034}
2d21ac55 1035
b0d623f7
A
1036#elif defined(__x86_64__)
1037int
1038fbt_invop(uintptr_t addr, uintptr_t *state, uintptr_t rval)
1039{
1040 fbt_probe_t *fbt = fbt_probetab[FBT_ADDR2NDX(addr)];
1041
1042 for (; fbt != NULL; fbt = fbt->fbtp_hashnext) {
1043 if ((uintptr_t)fbt->fbtp_patchpoint == addr) {
1044
1045 if (fbt->fbtp_roffset == 0) {
1046 x86_saved_state64_t *regs = (x86_saved_state64_t *)state;
1047
1048 CPU->cpu_dtrace_caller = *(uintptr_t *)(((uintptr_t)(regs->isf.rsp))+sizeof(uint64_t)); // 8(%rsp)
1049 /* 64-bit ABI, arguments passed in registers. */
1050 dtrace_probe(fbt->fbtp_id, regs->rdi, regs->rsi, regs->rdx, regs->rcx, regs->r8);
1051 CPU->cpu_dtrace_caller = 0;
1052 } else {
1053
1054 dtrace_probe(fbt->fbtp_id, fbt->fbtp_roffset, rval, 0, 0, 0);
1055 CPU->cpu_dtrace_caller = 0;
1056 }
1057
1058 return (fbt->fbtp_rval);
1059 }
1060 }
1061
1062 return (0);
1063}
1064
1065#define IS_USER_TRAP(regs) (regs && (((regs)->isf.cs & 3) != 0))
1066#define T_INVALID_OPCODE 6
1067#define FBT_EXCEPTION_CODE T_INVALID_OPCODE
1068#define T_PREEMPT 255
1069
1070kern_return_t
1071fbt_perfCallback(
1072 int trapno,
1073 x86_saved_state_t *tagged_regs,
6d2010ae 1074 uintptr_t *lo_spp,
b0d623f7
A
1075 __unused int unused2)
1076{
1077 kern_return_t retval = KERN_FAILURE;
1078 x86_saved_state64_t *saved_state = saved_state64(tagged_regs);
1079
1080 if (FBT_EXCEPTION_CODE == trapno && !IS_USER_TRAP(saved_state)) {
1081 boolean_t oldlevel;
6d2010ae
A
1082 uint64_t rsp_probe, fp, delta = 0;
1083 uintptr_t old_sp;
b0d623f7
A
1084 uint32_t *pDst;
1085 int emul;
1086
6d2010ae 1087
b0d623f7
A
1088 oldlevel = ml_set_interrupts_enabled(FALSE);
1089
1090 /* Calculate where the stack pointer was when the probe instruction "fired." */
1091 rsp_probe = saved_state->isf.rsp; /* Easy, x86_64 establishes this value in idt64.s */
1092
6d2010ae
A
1093 __asm__ volatile(
1094 "Ldtrace_invop_callsite_pre_label:\n"
1095 ".data\n"
1096 ".private_extern _dtrace_invop_callsite_pre\n"
1097 "_dtrace_invop_callsite_pre:\n"
1098 " .quad Ldtrace_invop_callsite_pre_label\n"
1099 ".text\n"
1100 );
1101
b0d623f7 1102 emul = dtrace_invop( saved_state->isf.rip, (uintptr_t *)saved_state, saved_state->rax );
6d2010ae
A
1103
1104 __asm__ volatile(
1105 "Ldtrace_invop_callsite_post_label:\n"
1106 ".data\n"
1107 ".private_extern _dtrace_invop_callsite_post\n"
1108 "_dtrace_invop_callsite_post:\n"
1109 " .quad Ldtrace_invop_callsite_post_label\n"
1110 ".text\n"
1111 );
b0d623f7
A
1112
1113 switch (emul) {
1114 case DTRACE_INVOP_NOP:
1115 saved_state->isf.rip += DTRACE_INVOP_NOP_SKIP; /* Skip over the patched NOP (planted by sdt). */
1116 retval = KERN_SUCCESS;
1117 break;
1118
1119 case DTRACE_INVOP_MOV_RSP_RBP:
1120 saved_state->rbp = rsp_probe; /* Emulate patched mov %rsp,%rbp */
1121 saved_state->isf.rip += DTRACE_INVOP_MOV_RSP_RBP_SKIP; /* Skip over the bytes of the patched mov %rsp,%rbp */
1122 retval = KERN_SUCCESS;
1123 break;
1124
1125 case DTRACE_INVOP_POP_RBP:
1126 case DTRACE_INVOP_LEAVE:
1127/*
1128 * Emulate first micro-op of patched leave: mov %rbp,%rsp
1129 * fp points just below the return address slot for target's ret
1130 * and at the slot holding the frame pointer saved by the target's prologue.
1131 */
1132 fp = saved_state->rbp;
1133/* Emulate second micro-op of patched leave: patched pop %rbp
1134 * savearea rbp is set for the frame of the caller to target
1135 * The *live* %rsp will be adjusted below for pop increment(s)
1136 */
1137 saved_state->rbp = *(uint64_t *)fp;
1138/* Skip over the patched leave */
1139 saved_state->isf.rip += DTRACE_INVOP_LEAVE_SKIP;
1140/*
1141 * Lift the stack to account for the emulated leave
1142 * Account for words local in this frame
1143 * (in "case DTRACE_INVOP_POPL_EBP:" this is zero.)
1144 */
1145 delta = ((uint32_t *)fp) - ((uint32_t *)rsp_probe); /* delta is a *word* increment */
1146/* Account for popping off the rbp (just accomplished by the emulation
1147 * above...)
1148 */
1149 delta += 2;
1150 saved_state->isf.rsp += (delta << 2);
6d2010ae
A
1151/* Obtain the stack pointer recorded by the trampolines */
1152 old_sp = *lo_spp;
b0d623f7
A
1153/* Shift contents of stack */
1154 for (pDst = (uint32_t *)fp;
6d2010ae 1155 pDst > (((uint32_t *)old_sp));
b0d623f7
A
1156 pDst--)
1157 *pDst = pDst[-delta];
1158
1159/* Track the stack lift in "saved_state". */
1160 saved_state = (x86_saved_state64_t *) (((uintptr_t)saved_state) + (delta << 2));
6d2010ae
A
1161/* Adjust the stack pointer utilized by the trampolines */
1162 *lo_spp = old_sp + (delta << 2);
b0d623f7
A
1163
1164 retval = KERN_SUCCESS;
1165 break;
1166
1167 default:
1168 retval = KERN_FAILURE;
1169 break;
1170 }
1171 saved_state->isf.trapno = T_PREEMPT; /* Avoid call to i386_astintr()! */
1172
1173 ml_set_interrupts_enabled(oldlevel);
1174 }
1175
1176 return retval;
1177}
1178
1179/*ARGSUSED*/
1180static void
6d2010ae 1181__provide_probe_64(struct modctl *ctl, uintptr_t instrLow, uintptr_t instrHigh, char *modname, char* symbolName, machine_inst_t* symbolStart)
b0d623f7 1182{
6d2010ae
A
1183 unsigned int j;
1184 unsigned int doenable = 0;
1185 dtrace_id_t thisid;
b0d623f7 1186
6d2010ae
A
1187 fbt_probe_t *newfbt, *retfbt, *entryfbt;
1188 machine_inst_t *instr, *limit, theInstr, i1, i2, i3;
1189 int size;
1190
1191 for (j = 0, instr = symbolStart, theInstr = 0;
1192 (j < 4) && ((uintptr_t)instr >= instrLow) && (instrHigh > (uintptr_t)(instr + 2));
1193 j++) {
1194 theInstr = instr[0];
1195 if (theInstr == FBT_PUSH_RBP || theInstr == FBT_RET || theInstr == FBT_RET_IMM16)
1196 break;
1197
1198 if ((size = dtrace_instr_size(instr)) <= 0)
1199 break;
1200
1201 instr += size;
1202 }
1203
1204 if (theInstr != FBT_PUSH_RBP)
b0d623f7 1205 return;
6d2010ae
A
1206
1207 i1 = instr[1];
1208 i2 = instr[2];
1209 i3 = instr[3];
1210
1211 limit = (machine_inst_t *)instrHigh;
1212
1213 if (i1 == FBT_REX_RSP_RBP && i2 == FBT_MOV_RSP_RBP0 && i3 == FBT_MOV_RSP_RBP1) {
1214 instr += 1; /* Advance to the mov %rsp,%rbp */
1215 theInstr = i1;
1216 } else {
1217 return;
1218 }
1219#if 0
1220 else {
1221 /*
1222 * Sometimes, the compiler will schedule an intervening instruction
1223 * in the function prologue. Example:
1224 *
1225 * _mach_vm_read:
1226 * 000006d8 pushl %ebp
1227 * 000006d9 movl $0x00000004,%edx
1228 * 000006de movl %esp,%ebp
1229 *
1230 * Try the next instruction, to see if it is a movl %esp,%ebp
1231 */
1232
1233 instr += 1; /* Advance past the pushl %ebp */
1234 if ((size = dtrace_instr_size(instr)) <= 0)
1235 return;
1236
1237 instr += size;
1238
1239 if ((instr + 1) >= limit)
1240 return;
1241
1242 i1 = instr[0];
1243 i2 = instr[1];
1244
1245 if (!(i1 == FBT_MOVL_ESP_EBP0_V0 && i2 == FBT_MOVL_ESP_EBP1_V0) &&
1246 !(i1 == FBT_MOVL_ESP_EBP0_V1 && i2 == FBT_MOVL_ESP_EBP1_V1))
1247 return;
1248
1249 /* instr already points at the movl %esp,%ebp */
1250 theInstr = i1;
1251 }
1252#endif
1253 thisid = dtrace_probe_lookup(fbt_id, modname, symbolName, FBT_ENTRY);
1254 newfbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP);
1255 strlcpy( (char *)&(newfbt->fbtp_name), symbolName, MAX_FBTP_NAME_CHARS );
1256
1257 if (thisid != 0) {
1258 /*
1259 * The dtrace_probe previously existed, so we have to hook
1260 * the newfbt entry onto the end of the existing fbt's chain.
1261 * If we find an fbt entry that was previously patched to
1262 * fire, (as indicated by the current patched value), then
1263 * we want to enable this newfbt on the spot.
1264 */
1265 entryfbt = dtrace_probe_arg (fbt_id, thisid);
1266 ASSERT (entryfbt != NULL);
1267 for(; entryfbt != NULL; entryfbt = entryfbt->fbtp_next) {
1268 if (entryfbt->fbtp_currentval == entryfbt->fbtp_patchval)
1269 doenable++;
1270
1271 if (entryfbt->fbtp_next == NULL) {
1272 entryfbt->fbtp_next = newfbt;
1273 newfbt->fbtp_id = entryfbt->fbtp_id;
1274 break;
1275 }
1276 }
1277 }
1278 else {
1279 /*
1280 * The dtrace_probe did not previously exist, so we
1281 * create it and hook in the newfbt. Since the probe is
1282 * new, we obviously do not need to enable it on the spot.
1283 */
1284 newfbt->fbtp_id = dtrace_probe_create(fbt_id, modname, symbolName, FBT_ENTRY, FBT_AFRAMES_ENTRY, newfbt);
1285 doenable = 0;
1286 }
1287
1288 newfbt->fbtp_patchpoint = instr;
1289 newfbt->fbtp_ctl = ctl;
1290 newfbt->fbtp_loadcnt = ctl->mod_loadcnt;
1291 newfbt->fbtp_rval = DTRACE_INVOP_MOV_RSP_RBP;
1292 newfbt->fbtp_savedval = theInstr;
1293 newfbt->fbtp_patchval = FBT_PATCHVAL;
1294 newfbt->fbtp_currentval = 0;
1295 newfbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)];
1296 fbt_probetab[FBT_ADDR2NDX(instr)] = newfbt;
1297
1298 if (doenable)
1299 fbt_enable(NULL, newfbt->fbtp_id, newfbt);
1300
b0d623f7 1301 /*
6d2010ae
A
1302 * The fbt entry chain is in place, one entry point per symbol.
1303 * The fbt return chain can have multiple return points per symbol.
1304 * Here we find the end of the fbt return chain.
b0d623f7 1305 */
6d2010ae
A
1306
1307 doenable=0;
1308
1309 thisid = dtrace_probe_lookup(fbt_id, modname, symbolName, FBT_RETURN);
1310 if (thisid != 0) {
1311 /* The dtrace_probe previously existed, so we have to
1312 * find the end of the existing fbt chain. If we find
1313 * an fbt return that was previously patched to fire,
1314 * (as indicated by the currrent patched value), then
1315 * we want to enable any new fbts on the spot.
1316 */
1317 retfbt = dtrace_probe_arg (fbt_id, thisid);
1318 ASSERT(retfbt != NULL);
1319 for (; retfbt != NULL; retfbt = retfbt->fbtp_next) {
1320 if (retfbt->fbtp_currentval == retfbt->fbtp_patchval)
1321 doenable++;
1322 if(retfbt->fbtp_next == NULL)
1323 break;
1324 }
1325 }
1326 else {
1327 doenable = 0;
1328 retfbt = NULL;
1329 }
1330
1331again:
1332 if (instr >= limit)
b0d623f7 1333 return;
6d2010ae
A
1334
1335 /*
1336 * If this disassembly fails, then we've likely walked off into
1337 * a jump table or some other unsuitable area. Bail out of the
1338 * disassembly now.
1339 */
1340 if ((size = dtrace_instr_size(instr)) <= 0)
1341 return;
1342
1343 /*
1344 * We (desperately) want to avoid erroneously instrumenting a
1345 * jump table, especially given that our markers are pretty
1346 * short: two bytes on x86, and just one byte on amd64. To
1347 * determine if we're looking at a true instruction sequence
1348 * or an inline jump table that happens to contain the same
1349 * byte sequences, we resort to some heuristic sleeze: we
1350 * treat this instruction as being contained within a pointer,
1351 * and see if that pointer points to within the body of the
1352 * function. If it does, we refuse to instrument it.
1353 */
1354 for (j = 0; j < sizeof (uintptr_t); j++) {
1355 uintptr_t check = (uintptr_t)instr - j;
1356 uint8_t *ptr;
1357
1358 if (check < (uintptr_t)symbolStart)
1359 break;
1360
1361 if (check + sizeof (uintptr_t) > (uintptr_t)limit)
1362 continue;
1363
1364 ptr = *(uint8_t **)check;
1365
1366 if (ptr >= (uint8_t *)symbolStart && ptr < limit) {
1367 instr += size;
1368 goto again;
1369 }
1370 }
1371
1372 /*
1373 * OK, it's an instruction.
1374 */
1375 theInstr = instr[0];
1376
1377 /* Walked onto the start of the next routine? If so, bail out of this function. */
1378 if (theInstr == FBT_PUSH_RBP)
b0d623f7 1379 return;
6d2010ae
A
1380
1381 if (!(size == 1 && (theInstr == FBT_POP_RBP || theInstr == FBT_LEAVE))) {
1382 instr += size;
1383 goto again;
1384 }
1385
1386 /*
1387 * Found the pop %rbp; or leave.
1388 */
1389 machine_inst_t *patch_instr = instr;
1390
1391 /*
1392 * Scan forward for a "ret", or "jmp".
1393 */
1394 instr += size;
1395 if (instr >= limit)
1396 return;
1397
1398 size = dtrace_instr_size(instr);
1399 if (size <= 0) /* Failed instruction decode? */
1400 return;
1401
1402 theInstr = instr[0];
1403
1404 if (!(size == FBT_RET_LEN && (theInstr == FBT_RET)) &&
1405 !(size == FBT_RET_IMM16_LEN && (theInstr == FBT_RET_IMM16)) &&
1406 !(size == FBT_JMP_SHORT_REL_LEN && (theInstr == FBT_JMP_SHORT_REL)) &&
1407 !(size == FBT_JMP_NEAR_REL_LEN && (theInstr == FBT_JMP_NEAR_REL)) &&
1408 !(size == FBT_JMP_FAR_ABS_LEN && (theInstr == FBT_JMP_FAR_ABS)))
1409 return;
1410
1411 /*
1412 * pop %rbp; ret; or leave; ret; or leave; jmp tailCalledFun; -- We have a winner!
1413 */
1414 newfbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP);
1415 strlcpy( (char *)&(newfbt->fbtp_name), symbolName, MAX_FBTP_NAME_CHARS );
1416
1417 if (retfbt == NULL) {
1418 newfbt->fbtp_id = dtrace_probe_create(fbt_id, modname,
1419 symbolName, FBT_RETURN, FBT_AFRAMES_RETURN, newfbt);
1420 } else {
1421 retfbt->fbtp_next = newfbt;
1422 newfbt->fbtp_id = retfbt->fbtp_id;
1423 }
1424
1425 retfbt = newfbt;
1426 newfbt->fbtp_patchpoint = patch_instr;
1427 newfbt->fbtp_ctl = ctl;
1428 newfbt->fbtp_loadcnt = ctl->mod_loadcnt;
1429
1430 if (*patch_instr == FBT_POP_RBP) {
1431 newfbt->fbtp_rval = DTRACE_INVOP_POP_RBP;
1432 } else {
1433 ASSERT(*patch_instr == FBT_LEAVE);
1434 newfbt->fbtp_rval = DTRACE_INVOP_LEAVE;
1435 }
1436 newfbt->fbtp_roffset =
1437 (uintptr_t)(patch_instr - (uint8_t *)symbolStart);
1438
1439 newfbt->fbtp_savedval = *patch_instr;
1440 newfbt->fbtp_patchval = FBT_PATCHVAL;
1441 newfbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(patch_instr)];
1442 fbt_probetab[FBT_ADDR2NDX(patch_instr)] = newfbt;
1443
1444 if (doenable)
1445 fbt_enable(NULL, newfbt->fbtp_id, newfbt);
1446
1447 instr += size;
1448 goto again;
1449}
b0d623f7 1450
6d2010ae
A
1451static void
1452__kernel_syms_provide_module(void *arg, struct modctl *ctl)
1453{
1454#pragma unused(arg)
1455 kernel_mach_header_t *mh;
1456 struct load_command *cmd;
1457 kernel_segment_command_t *orig_ts = NULL, *orig_le = NULL;
1458 struct symtab_command *orig_st = NULL;
1459 struct nlist_64 *sym = NULL;
1460 char *strings;
1461 uintptr_t instrLow, instrHigh;
1462 char *modname;
1463 unsigned int i;
1464
1465 mh = (kernel_mach_header_t *)(ctl->mod_address);
1466 modname = ctl->mod_modname;
1467
b0d623f7
A
1468 if (mh->magic != MH_MAGIC_64)
1469 return;
6d2010ae 1470
b0d623f7
A
1471 cmd = (struct load_command *) &mh[1];
1472 for (i = 0; i < mh->ncmds; i++) {
1473 if (cmd->cmd == LC_SEGMENT_KERNEL) {
1474 kernel_segment_command_t *orig_sg = (kernel_segment_command_t *) cmd;
6d2010ae 1475
b0d623f7
A
1476 if (LIT_STRNEQL(orig_sg->segname, SEG_TEXT))
1477 orig_ts = orig_sg;
1478 else if (LIT_STRNEQL(orig_sg->segname, SEG_LINKEDIT))
1479 orig_le = orig_sg;
1480 else if (LIT_STRNEQL(orig_sg->segname, ""))
1481 orig_ts = orig_sg; /* kexts have a single unnamed segment */
1482 }
1483 else if (cmd->cmd == LC_SYMTAB)
1484 orig_st = (struct symtab_command *) cmd;
6d2010ae 1485
b0d623f7
A
1486 cmd = (struct load_command *) ((caddr_t) cmd + cmd->cmdsize);
1487 }
6d2010ae 1488
b0d623f7
A
1489 if ((orig_ts == NULL) || (orig_st == NULL) || (orig_le == NULL))
1490 return;
6d2010ae 1491
b0d623f7
A
1492 sym = (struct nlist_64 *)(orig_le->vmaddr + orig_st->symoff - orig_le->fileoff);
1493 strings = (char *)(orig_le->vmaddr + orig_st->stroff - orig_le->fileoff);
6d2010ae 1494
b0d623f7
A
1495 /* Find extent of the TEXT section */
1496 instrLow = (uintptr_t)orig_ts->vmaddr;
1497 instrHigh = (uintptr_t)(orig_ts->vmaddr + orig_ts->vmsize);
6d2010ae 1498
b0d623f7 1499 for (i = 0; i < orig_st->nsyms; i++) {
b0d623f7
A
1500 uint8_t n_type = sym[i].n_type & (N_TYPE | N_EXT);
1501 char *name = strings + sym[i].n_un.n_strx;
6d2010ae 1502
b0d623f7
A
1503 /* Check that the symbol is a global and that it has a name. */
1504 if (((N_SECT | N_EXT) != n_type && (N_ABS | N_EXT) != n_type))
1505 continue;
6d2010ae 1506
b0d623f7
A
1507 if (0 == sym[i].n_un.n_strx) /* iff a null, "", name. */
1508 continue;
1509
1510 /* Lop off omnipresent leading underscore. */
1511 if (*name == '_')
1512 name += 1;
b0d623f7 1513
b0d623f7 1514 /*
6d2010ae 1515 * We're only blacklisting functions in the kernel for now.
b0d623f7 1516 */
6d2010ae 1517 if (MOD_IS_MACH_KERNEL(ctl) && !is_symbol_valid(name))
b0d623f7
A
1518 continue;
1519
6d2010ae
A
1520 __provide_probe_64(ctl, instrLow, instrHigh, modname, name, (machine_inst_t*)sym[i].n_value);
1521 }
1522}
b0d623f7 1523
6d2010ae
A
1524static void
1525__user_syms_provide_module(void *arg, struct modctl *ctl)
1526{
1527#pragma unused(arg)
1528 char *modname;
1529 unsigned int i;
1530
1531 modname = ctl->mod_modname;
1532
1533 dtrace_module_symbols_t* module_symbols = ctl->mod_user_symbols;
1534 if (module_symbols) {
1535 for (i=0; i<module_symbols->dtmodsyms_count; i++) {
316670eb
A
1536
1537 /*
1538 * symbol->dtsym_addr (the symbol address) passed in from
1539 * user space, is already slid for both kexts and kernel.
1540 */
6d2010ae 1541 dtrace_symbol_t* symbol = &module_symbols->dtmodsyms_symbols[i];
316670eb 1542
6d2010ae
A
1543 char* name = symbol->dtsym_name;
1544
1545 /* Lop off omnipresent leading underscore. */
1546 if (*name == '_')
1547 name += 1;
1548
b0d623f7 1549 /*
6d2010ae 1550 * We're only blacklisting functions in the kernel for now.
b0d623f7 1551 */
316670eb
A
1552 if (MOD_IS_MACH_KERNEL(ctl) && !is_symbol_valid(name))
1553 continue;
6d2010ae
A
1554
1555 __provide_probe_64(ctl, (uintptr_t)symbol->dtsym_addr, (uintptr_t)(symbol->dtsym_addr + symbol->dtsym_size), modname, name, (machine_inst_t*)(uintptr_t)symbol->dtsym_addr);
b0d623f7 1556 }
b0d623f7
A
1557 }
1558}
1559#else
1560#error Unknown arch
1561#endif
2d21ac55 1562
6d2010ae 1563extern int dtrace_kernel_symbol_mode;
2d21ac55
A
1564
1565/*ARGSUSED*/
1566void
1567fbt_provide_module(void *arg, struct modctl *ctl)
1568{
6d2010ae
A
1569 ASSERT(ctl != NULL);
1570 ASSERT(dtrace_kernel_symbol_mode != DTRACE_KERNEL_SYMBOLS_NEVER);
1571 lck_mtx_assert(&mod_lock, LCK_MTX_ASSERT_OWNED);
2d21ac55 1572
6d2010ae
A
1573 if (MOD_FBT_DONE(ctl))
1574 return;
1575
1576 if (!is_module_valid(ctl)) {
1577 ctl->mod_flags |= MODCTL_FBT_INVALID;
1578 return;
1579 }
1580
1581 if (MOD_HAS_KERNEL_SYMBOLS(ctl)) {
1582 __kernel_syms_provide_module(arg, ctl);
1583 ctl->mod_flags |= MODCTL_FBT_PROBES_PROVIDED;
1584 return;
1585 }
1586
1587 if (MOD_HAS_USERSPACE_SYMBOLS(ctl)) {
1588 __user_syms_provide_module(arg, ctl);
1589 ctl->mod_flags |= MODCTL_FBT_PROBES_PROVIDED;
1590 return;
1591 }
2d21ac55 1592}