]>
Commit | Line | Data |
---|---|---|
2d21ac55 A |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License, Version 1.0 only | |
6 | * (the "License"). You may not use this file except in compliance | |
7 | * with the License. | |
8 | * | |
9 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
10 | * or http://www.opensolaris.org/os/licensing. | |
11 | * See the License for the specific language governing permissions | |
12 | * and limitations under the License. | |
13 | * | |
14 | * When distributing Covered Code, include this CDDL HEADER in each | |
15 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
16 | * If applicable, add the following below this CDDL HEADER, with the | |
17 | * fields enclosed by brackets "[]" replaced with your own identifying | |
18 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
19 | * | |
20 | * CDDL HEADER END | |
21 | */ | |
22 | /* | |
23 | * Copyright 2005 Sun Microsystems, Inc. All rights reserved. | |
24 | * Use is subject to license terms. | |
25 | */ | |
26 | ||
27 | /* #pragma ident "@(#)fbt.c 1.15 05/09/19 SMI" */ | |
28 | ||
29 | #ifdef KERNEL | |
30 | #ifndef _KERNEL | |
31 | #define _KERNEL /* Solaris vs. Darwin */ | |
32 | #endif | |
33 | #endif | |
34 | ||
35 | #define MACH__POSIX_C_SOURCE_PRIVATE 1 /* pulls in suitable savearea from mach/ppc/thread_status.h */ | |
36 | #include <kern/thread.h> | |
37 | #include <mach/thread_status.h> | |
38 | #include <mach/vm_param.h> | |
39 | #include <mach-o/loader.h> | |
40 | #include <mach-o/nlist.h> | |
b0d623f7 | 41 | #include <libkern/kernel_mach_header.h> |
2d21ac55 A |
42 | |
43 | #include <sys/param.h> | |
44 | #include <sys/systm.h> | |
45 | #include <sys/errno.h> | |
46 | #include <sys/stat.h> | |
47 | #include <sys/ioctl.h> | |
48 | #include <sys/conf.h> | |
49 | #include <sys/fcntl.h> | |
50 | #include <miscfs/devfs/devfs.h> | |
51 | ||
52 | #include <sys/dtrace.h> | |
53 | #include <sys/dtrace_impl.h> | |
54 | #include <sys/fbt.h> | |
55 | ||
56 | #include <sys/dtrace_glue.h> | |
57 | ||
58 | #define DTRACE_INVOP_NOP_SKIP 1 | |
59 | #define DTRACE_INVOP_MOVL_ESP_EBP 10 | |
60 | #define DTRACE_INVOP_MOVL_ESP_EBP_SKIP 2 | |
b0d623f7 A |
61 | #define DTRACE_INVOP_MOV_RSP_RBP 11 |
62 | #define DTRACE_INVOP_MOV_RSP_RBP_SKIP 3 | |
63 | #define DTRACE_INVOP_POP_RBP 12 | |
64 | #define DTRACE_INVOP_POP_RBP_SKIP 1 | |
2d21ac55 A |
65 | #define DTRACE_INVOP_LEAVE_SKIP 1 |
66 | ||
67 | #define FBT_PUSHL_EBP 0x55 | |
68 | #define FBT_MOVL_ESP_EBP0_V0 0x8b | |
69 | #define FBT_MOVL_ESP_EBP1_V0 0xec | |
70 | #define FBT_MOVL_ESP_EBP0_V1 0x89 | |
71 | #define FBT_MOVL_ESP_EBP1_V1 0xe5 | |
b0d623f7 A |
72 | |
73 | #define FBT_PUSH_RBP 0x55 | |
2d21ac55 | 74 | #define FBT_REX_RSP_RBP 0x48 |
b0d623f7 A |
75 | #define FBT_MOV_RSP_RBP0 0x89 |
76 | #define FBT_MOV_RSP_RBP1 0xe5 | |
77 | #define FBT_POP_RBP 0x5d | |
2d21ac55 A |
78 | |
79 | #define FBT_POPL_EBP 0x5d | |
80 | #define FBT_RET 0xc3 | |
81 | #define FBT_RET_IMM16 0xc2 | |
82 | #define FBT_LEAVE 0xc9 | |
83 | #define FBT_JMP_SHORT_REL 0xeb /* Jump short, relative, displacement relative to next instr. */ | |
84 | #define FBT_JMP_NEAR_REL 0xe9 /* Jump near, relative, displacement relative to next instr. */ | |
85 | #define FBT_JMP_FAR_ABS 0xea /* Jump far, absolute, address given in operand */ | |
86 | #define FBT_RET_LEN 1 | |
87 | #define FBT_RET_IMM16_LEN 3 | |
88 | #define FBT_JMP_SHORT_REL_LEN 2 | |
89 | #define FBT_JMP_NEAR_REL_LEN 5 | |
90 | #define FBT_JMP_FAR_ABS_LEN 5 | |
91 | ||
92 | #define FBT_PATCHVAL 0xf0 | |
93 | #define FBT_AFRAMES_ENTRY 7 | |
94 | #define FBT_AFRAMES_RETURN 6 | |
95 | ||
96 | #define FBT_ENTRY "entry" | |
97 | #define FBT_RETURN "return" | |
98 | #define FBT_ADDR2NDX(addr) ((((uintptr_t)(addr)) >> 4) & fbt_probetab_mask) | |
99 | ||
100 | extern dtrace_provider_id_t fbt_id; | |
101 | extern fbt_probe_t **fbt_probetab; | |
102 | extern int fbt_probetab_mask; | |
103 | ||
b0d623f7 A |
104 | kern_return_t fbt_perfCallback(int, x86_saved_state_t *, __unused int, __unused int); |
105 | ||
2d21ac55 A |
106 | /* |
107 | * Critical routines that must not be probed. PR_5221096, PR_5379018. | |
b0d623f7 | 108 | * The blacklist must be kept in alphabetic order for purposes of bsearch(). |
2d21ac55 A |
109 | */ |
110 | ||
111 | static const char * critical_blacklist[] = | |
112 | { | |
113 | "bcopy_phys", | |
114 | "console_cpu_alloc", | |
115 | "console_cpu_free", | |
116 | "cpu_IA32e_disable", | |
117 | "cpu_IA32e_enable", | |
b0d623f7 | 118 | "cpu_NMI_interrupt", |
2d21ac55 A |
119 | "cpu_control", |
120 | "cpu_data_alloc", | |
b0d623f7 A |
121 | "cpu_desc_init", |
122 | "cpu_desc_init64", | |
123 | "cpu_desc_load", | |
124 | "cpu_desc_load64", | |
2d21ac55 A |
125 | "cpu_exit_wait", |
126 | "cpu_info", | |
127 | "cpu_info_count", | |
128 | "cpu_init", | |
129 | "cpu_interrupt", | |
130 | "cpu_machine_init", | |
131 | "cpu_mode_init", | |
132 | "cpu_processor_alloc", | |
133 | "cpu_processor_free", | |
134 | "cpu_signal_handler", | |
135 | "cpu_sleep", | |
136 | "cpu_start", | |
137 | "cpu_subtype", | |
138 | "cpu_thread_alloc", | |
139 | "cpu_thread_halt", | |
140 | "cpu_thread_init", | |
141 | "cpu_threadtype", | |
142 | "cpu_to_processor", | |
b0d623f7 A |
143 | "cpu_topology_sort", |
144 | "cpu_topology_start_cpu", | |
2d21ac55 | 145 | "cpu_type", |
2d21ac55 A |
146 | "cpuid_cpu_display", |
147 | "handle_pending_TLB_flushes", | |
148 | "hw_compare_and_store", | |
149 | "machine_idle_cstate", | |
150 | "mca_cpu_alloc", | |
151 | "mca_cpu_init", | |
152 | "ml_nofault_copy", | |
153 | "pmap_cpu_alloc", | |
154 | "pmap_cpu_free", | |
155 | "pmap_cpu_high_map_vaddr", | |
156 | "pmap_cpu_high_shared_remap", | |
157 | "pmap_cpu_init", | |
2d21ac55 | 158 | "register_cpu_setup_func", |
b0d623f7 A |
159 | "unregister_cpu_setup_func", |
160 | "vstart" | |
2d21ac55 A |
161 | }; |
162 | #define CRITICAL_BLACKLIST_COUNT (sizeof(critical_blacklist)/sizeof(critical_blacklist[0])) | |
163 | ||
164 | /* | |
165 | * The transitive closure of entry points that can be reached from probe context. | |
b0d623f7 | 166 | * (Apart from routines whose names begin with dtrace_). |
2d21ac55 A |
167 | */ |
168 | static const char * probe_ctx_closure[] = | |
169 | { | |
170 | "Debugger", | |
b0d623f7 | 171 | "IS_64BIT_PROCESS", |
2d21ac55 A |
172 | "OSCompareAndSwap", |
173 | "absolutetime_to_microtime", | |
174 | "ast_pending", | |
b0d623f7 | 175 | "astbsd_on", |
2d21ac55 A |
176 | "clock_get_calendar_nanotime_nowait", |
177 | "copyin", | |
178 | "copyin_user", | |
179 | "copyinstr", | |
180 | "copyout", | |
181 | "copyoutstr", | |
182 | "cpu_number", | |
183 | "current_proc", | |
184 | "current_processor", | |
185 | "current_task", | |
186 | "current_thread", | |
187 | "debug_enter", | |
188 | "find_user_regs", | |
189 | "flush_tlb64", | |
190 | "get_bsdtask_info", | |
191 | "get_bsdthread_info", | |
192 | "hw_atomic_and", | |
193 | "kauth_cred_get", | |
194 | "kauth_getgid", | |
195 | "kauth_getuid", | |
196 | "kernel_preempt_check", | |
197 | "mach_absolute_time", | |
198 | "max_valid_stack_address", | |
199 | "ml_at_interrupt_context", | |
200 | "ml_phys_write_byte_64", | |
201 | "ml_phys_write_half_64", | |
202 | "ml_phys_write_word_64", | |
203 | "ml_set_interrupts_enabled", | |
204 | "panic", | |
205 | "pmap64_pde", | |
206 | "pmap64_pdpt", | |
207 | "pmap_find_phys", | |
208 | "pmap_get_mapwindow", | |
209 | "pmap_pde", | |
210 | "pmap_pte", | |
211 | "pmap_put_mapwindow", | |
212 | "pmap_valid_page", | |
213 | "prf", | |
214 | "proc_is64bit", | |
215 | "proc_selfname", | |
216 | "proc_selfpid", | |
b0d623f7 | 217 | "proc_selfppid", |
2d21ac55 A |
218 | "psignal_lock", |
219 | "rtc_nanotime_load", | |
220 | "rtc_nanotime_read", | |
b0d623f7 | 221 | "sdt_getargdesc", |
2d21ac55 A |
222 | "strlcpy", |
223 | "sync_iss_to_iks_unconditionally", | |
b0d623f7 | 224 | "systrace_stub", |
2d21ac55 A |
225 | "timer_grab" |
226 | }; | |
227 | #define PROBE_CTX_CLOSURE_COUNT (sizeof(probe_ctx_closure)/sizeof(probe_ctx_closure[0])) | |
228 | ||
229 | ||
230 | static int _cmp(const void *a, const void *b) | |
231 | { | |
b0d623f7 | 232 | return strncmp((const char *)a, *(const char **)b, strlen((const char *)a) + 1); |
2d21ac55 A |
233 | } |
234 | ||
235 | static const void * bsearch( | |
236 | register const void *key, | |
237 | const void *base0, | |
238 | size_t nmemb, | |
239 | register size_t size, | |
240 | register int (*compar)(const void *, const void *)) { | |
241 | ||
242 | register const char *base = base0; | |
243 | register size_t lim; | |
244 | register int cmp; | |
245 | register const void *p; | |
246 | ||
247 | for (lim = nmemb; lim != 0; lim >>= 1) { | |
248 | p = base + (lim >> 1) * size; | |
249 | cmp = (*compar)(key, p); | |
250 | if (cmp == 0) | |
251 | return p; | |
252 | if (cmp > 0) { /* key > p: move right */ | |
253 | base = (const char *)p + size; | |
254 | lim--; | |
255 | } /* else move left */ | |
256 | } | |
257 | return (NULL); | |
258 | } | |
259 | ||
b0d623f7 | 260 | #if defined(__i386__) |
2d21ac55 A |
261 | int |
262 | fbt_invop(uintptr_t addr, uintptr_t *stack, uintptr_t rval) | |
263 | { | |
264 | uintptr_t stack0 = 0, stack1 = 0, stack2 = 0, stack3 = 0, stack4 = 0; | |
265 | fbt_probe_t *fbt = fbt_probetab[FBT_ADDR2NDX(addr)]; | |
266 | ||
267 | for (; fbt != NULL; fbt = fbt->fbtp_hashnext) { | |
268 | if ((uintptr_t)fbt->fbtp_patchpoint == addr) { | |
269 | ||
270 | if (fbt->fbtp_roffset == 0) { | |
271 | uintptr_t *stacktop; | |
272 | if (CPU_ON_INTR(CPU)) | |
273 | stacktop = (uintptr_t *)dtrace_get_cpu_int_stack_top(); | |
274 | else | |
b0d623f7 | 275 | stacktop = (uintptr_t *)(dtrace_get_kernel_stack(current_thread()) + kernel_stack_size); |
2d21ac55 A |
276 | |
277 | stack += 1; /* skip over the target's pushl'd %ebp */ | |
278 | ||
279 | if (stack <= stacktop) | |
280 | CPU->cpu_dtrace_caller = *stack++; | |
281 | if (stack <= stacktop) | |
282 | stack0 = *stack++; | |
283 | if (stack <= stacktop) | |
284 | stack1 = *stack++; | |
285 | if (stack <= stacktop) | |
286 | stack2 = *stack++; | |
287 | if (stack <= stacktop) | |
288 | stack3 = *stack++; | |
289 | if (stack <= stacktop) | |
290 | stack4 = *stack++; | |
291 | ||
b0d623f7 | 292 | /* 32-bit ABI, arguments passed on stack. */ |
2d21ac55 A |
293 | dtrace_probe(fbt->fbtp_id, stack0, stack1, stack2, stack3, stack4); |
294 | CPU->cpu_dtrace_caller = 0; | |
295 | } else { | |
296 | dtrace_probe(fbt->fbtp_id, fbt->fbtp_roffset, rval, 0, 0, 0); | |
297 | CPU->cpu_dtrace_caller = 0; | |
298 | } | |
299 | ||
300 | return (fbt->fbtp_rval); | |
301 | } | |
302 | } | |
303 | ||
304 | return (0); | |
305 | } | |
306 | ||
307 | #define IS_USER_TRAP(regs) (regs && (((regs)->cs & 3) != 0)) | |
308 | #define T_INVALID_OPCODE 6 | |
309 | #define FBT_EXCEPTION_CODE T_INVALID_OPCODE | |
b0d623f7 | 310 | #define T_PREEMPT 255 |
2d21ac55 A |
311 | |
312 | kern_return_t | |
313 | fbt_perfCallback( | |
314 | int trapno, | |
315 | x86_saved_state_t *tagged_regs, | |
316 | __unused int unused1, | |
317 | __unused int unused2) | |
318 | { | |
319 | kern_return_t retval = KERN_FAILURE; | |
320 | x86_saved_state32_t *saved_state = saved_state32(tagged_regs); | |
321 | struct x86_saved_state32_from_kernel *regs = (struct x86_saved_state32_from_kernel *)saved_state; | |
322 | ||
323 | if (FBT_EXCEPTION_CODE == trapno && !IS_USER_TRAP(saved_state)) { | |
324 | boolean_t oldlevel, cpu_64bit; | |
325 | uint32_t esp_probe, *ebp, edi, fp, *pDst, delta = 0; | |
326 | int emul; | |
327 | ||
328 | cpu_64bit = ml_is64bit(); | |
329 | oldlevel = ml_set_interrupts_enabled(FALSE); | |
330 | ||
331 | /* Calculate where the stack pointer was when the probe instruction "fired." */ | |
332 | if (cpu_64bit) { | |
333 | esp_probe = saved_state->uesp; /* Easy, x86_64 establishes this value in idt64.s */ | |
334 | } else { | |
335 | esp_probe = (uint32_t)&(regs[1]); /* Nasty, infer the location above the save area */ | |
336 | } | |
337 | ||
338 | emul = dtrace_invop( saved_state->eip, (uintptr_t *)esp_probe, saved_state->eax ); | |
339 | __asm__ volatile(".globl _dtrace_invop_callsite"); | |
340 | __asm__ volatile("_dtrace_invop_callsite:"); | |
341 | ||
342 | switch (emul) { | |
343 | case DTRACE_INVOP_NOP: | |
b0d623f7 | 344 | saved_state->eip += DTRACE_INVOP_NOP_SKIP; /* Skip over the patched NOP (planted by sdt.) */ |
2d21ac55 A |
345 | retval = KERN_SUCCESS; |
346 | break; | |
347 | ||
348 | case DTRACE_INVOP_MOVL_ESP_EBP: | |
349 | saved_state->ebp = esp_probe; /* Emulate patched movl %esp,%ebp */ | |
350 | saved_state->eip += DTRACE_INVOP_MOVL_ESP_EBP_SKIP; /* Skip over the bytes of the patched movl %esp,%ebp */ | |
351 | retval = KERN_SUCCESS; | |
352 | break; | |
353 | ||
354 | case DTRACE_INVOP_POPL_EBP: | |
355 | case DTRACE_INVOP_LEAVE: | |
356 | /* | |
357 | * Emulate first micro-op of patched leave: movl %ebp,%esp | |
358 | * fp points just below the return address slot for target's ret | |
359 | * and at the slot holding the frame pointer saved by the target's prologue. | |
360 | */ | |
361 | fp = saved_state->ebp; | |
362 | /* Emulate second micro-op of patched leave: patched popl %ebp | |
363 | * savearea ebp is set for the frame of the caller to target | |
364 | * The *live* %esp will be adjusted below for pop increment(s) | |
365 | */ | |
366 | saved_state->ebp = *(uint32_t *)fp; | |
367 | /* Skip over the patched leave */ | |
368 | saved_state->eip += DTRACE_INVOP_LEAVE_SKIP; | |
369 | /* | |
370 | * Lift the stack to account for the emulated leave | |
371 | * Account for words local in this frame | |
372 | * (in "case DTRACE_INVOP_POPL_EBP:" this is zero.) | |
373 | */ | |
374 | delta = ((uint32_t *)fp) - ((uint32_t *)esp_probe); | |
375 | /* Account for popping off the ebp (just accomplished by the emulation | |
376 | * above...) | |
377 | */ | |
378 | delta += 1; | |
379 | ||
380 | if (cpu_64bit) | |
381 | saved_state->uesp += (delta << 2); | |
382 | ||
383 | /* XXX Fragile in the extreme. Obtain the value of %edi that our caller pushed | |
384 | * (on behalf of its caller -- trap_from_kernel()). Ultimately, | |
385 | * trap_from_kernel's stack pointer is restored from this slot. | |
386 | * This is sensitive to the manner in which the compiler preserves %edi, | |
387 | * and trap_from_kernel()'s internals. | |
388 | */ | |
389 | ebp = (uint32_t *)__builtin_frame_address(0); | |
390 | ebp = (uint32_t *)*ebp; | |
391 | edi = *(ebp - 1); | |
392 | /* Shift contents of stack */ | |
393 | for (pDst = (uint32_t *)fp; | |
394 | pDst > (((uint32_t *)edi)); | |
395 | pDst--) | |
396 | *pDst = pDst[-delta]; | |
b0d623f7 A |
397 | |
398 | /* Track the stack lift in "saved_state". */ | |
399 | saved_state = (x86_saved_state32_t *) (((uintptr_t)saved_state) + (delta << 2)); | |
400 | ||
2d21ac55 A |
401 | /* Now adjust the value of %edi in our caller (kernel_trap)'s frame */ |
402 | *(ebp - 1) = edi + (delta << 2); | |
403 | ||
404 | retval = KERN_SUCCESS; | |
405 | break; | |
406 | ||
407 | default: | |
408 | retval = KERN_FAILURE; | |
409 | break; | |
410 | } | |
b0d623f7 A |
411 | saved_state->trapno = T_PREEMPT; /* Avoid call to i386_astintr()! */ |
412 | ||
2d21ac55 A |
413 | ml_set_interrupts_enabled(oldlevel); |
414 | } | |
415 | ||
416 | return retval; | |
417 | } | |
418 | ||
419 | /*ARGSUSED*/ | |
420 | static void | |
421 | __fbt_provide_module(void *arg, struct modctl *ctl) | |
422 | { | |
423 | #pragma unused(arg) | |
b0d623f7 | 424 | kernel_mach_header_t *mh; |
2d21ac55 | 425 | struct load_command *cmd; |
b0d623f7 | 426 | kernel_segment_command_t *orig_ts = NULL, *orig_le = NULL; |
2d21ac55 A |
427 | struct symtab_command *orig_st = NULL; |
428 | struct nlist *sym = NULL; | |
429 | char *strings; | |
430 | uintptr_t instrLow, instrHigh; | |
431 | char *modname; | |
b0d623f7 | 432 | unsigned int i, j; |
2d21ac55 A |
433 | |
434 | int gIgnoreFBTBlacklist = 0; | |
593a1d5f | 435 | PE_parse_boot_argn("IgnoreFBTBlacklist", &gIgnoreFBTBlacklist, sizeof (gIgnoreFBTBlacklist)); |
2d21ac55 | 436 | |
b0d623f7 | 437 | mh = (kernel_mach_header_t *)(ctl->address); |
2d21ac55 A |
438 | modname = ctl->mod_modname; |
439 | ||
440 | if (0 == ctl->address || 0 == ctl->size) /* Has the linker been jettisoned? */ | |
441 | return; | |
442 | ||
443 | /* | |
444 | * Employees of dtrace and their families are ineligible. Void | |
445 | * where prohibited. | |
446 | */ | |
447 | ||
b0d623f7 | 448 | if (LIT_STRNEQL(modname, "com.apple.driver.dtrace")) |
2d21ac55 A |
449 | return; |
450 | ||
451 | if (strstr(modname, "CHUD") != NULL) | |
452 | return; | |
453 | ||
454 | if (mh->magic != MH_MAGIC) | |
455 | return; | |
456 | ||
457 | cmd = (struct load_command *) &mh[1]; | |
458 | for (i = 0; i < mh->ncmds; i++) { | |
b0d623f7 A |
459 | if (cmd->cmd == LC_SEGMENT_KERNEL) { |
460 | kernel_segment_command_t *orig_sg = (kernel_segment_command_t *) cmd; | |
2d21ac55 | 461 | |
b0d623f7 | 462 | if (LIT_STRNEQL(orig_sg->segname, SEG_TEXT)) |
2d21ac55 | 463 | orig_ts = orig_sg; |
b0d623f7 | 464 | else if (LIT_STRNEQL(orig_sg->segname, SEG_LINKEDIT)) |
2d21ac55 | 465 | orig_le = orig_sg; |
b0d623f7 | 466 | else if (LIT_STRNEQL(orig_sg->segname, "")) |
2d21ac55 A |
467 | orig_ts = orig_sg; /* kexts have a single unnamed segment */ |
468 | } | |
469 | else if (cmd->cmd == LC_SYMTAB) | |
470 | orig_st = (struct symtab_command *) cmd; | |
471 | ||
472 | cmd = (struct load_command *) ((caddr_t) cmd + cmd->cmdsize); | |
473 | } | |
474 | ||
475 | if ((orig_ts == NULL) || (orig_st == NULL) || (orig_le == NULL)) | |
476 | return; | |
477 | ||
b0d623f7 A |
478 | sym = (struct nlist *)(orig_le->vmaddr + orig_st->symoff - orig_le->fileoff); |
479 | strings = (char *)(orig_le->vmaddr + orig_st->stroff - orig_le->fileoff); | |
2d21ac55 A |
480 | |
481 | /* Find extent of the TEXT section */ | |
482 | instrLow = (uintptr_t)orig_ts->vmaddr; | |
483 | instrHigh = (uintptr_t)(orig_ts->vmaddr + orig_ts->vmsize); | |
484 | ||
485 | for (i = 0; i < orig_st->nsyms; i++) { | |
486 | fbt_probe_t *fbt, *retfbt; | |
487 | machine_inst_t *instr, *limit, theInstr, i1, i2; | |
488 | uint8_t n_type = sym[i].n_type & (N_TYPE | N_EXT); | |
489 | char *name = strings + sym[i].n_un.n_strx; | |
490 | int size; | |
491 | ||
492 | /* Check that the symbol is a global and that it has a name. */ | |
493 | if (((N_SECT | N_EXT) != n_type && (N_ABS | N_EXT) != n_type)) | |
494 | continue; | |
495 | ||
496 | if (0 == sym[i].n_un.n_strx) /* iff a null, "", name. */ | |
497 | continue; | |
498 | ||
499 | /* Lop off omnipresent leading underscore. */ | |
500 | if (*name == '_') | |
501 | name += 1; | |
502 | ||
b0d623f7 | 503 | if (LIT_STRNSTART(name, "dtrace_") && !LIT_STRNSTART(name, "dtrace_safe_")) { |
2d21ac55 A |
504 | /* |
505 | * Anything beginning with "dtrace_" may be called | |
506 | * from probe context unless it explitly indicates | |
507 | * that it won't be called from probe context by | |
508 | * using the prefix "dtrace_safe_". | |
509 | */ | |
510 | continue; | |
511 | } | |
512 | ||
b0d623f7 | 513 | if (LIT_STRNSTART(name, "dsmos_")) |
2d21ac55 A |
514 | continue; /* Don't Steal Mac OS X! */ |
515 | ||
b0d623f7 | 516 | if (LIT_STRNSTART(name, "_dtrace")) |
2d21ac55 A |
517 | continue; /* Shims in dtrace.c */ |
518 | ||
b0d623f7 | 519 | if (LIT_STRNSTART(name, "chud")) |
2d21ac55 A |
520 | continue; /* Professional courtesy. */ |
521 | ||
b0d623f7 | 522 | if (LIT_STRNSTART(name, "hibernate_")) |
2d21ac55 A |
523 | continue; /* Let sleeping dogs lie. */ |
524 | ||
b0d623f7 A |
525 | if (LIT_STRNEQL(name, "_ZN9IOService14newTemperatureElPS_") || /* IOService::newTemperature */ |
526 | LIT_STRNEQL(name, "_ZN9IOService26temperatureCriticalForZoneEPS_")) /* IOService::temperatureCriticalForZone */ | |
2d21ac55 A |
527 | continue; /* Per the fire code */ |
528 | ||
529 | /* | |
530 | * Place no probes (illegal instructions) in the exception handling path! | |
531 | */ | |
b0d623f7 A |
532 | if (LIT_STRNEQL(name, "t_invop") || |
533 | LIT_STRNEQL(name, "enter_lohandler") || | |
534 | LIT_STRNEQL(name, "lo_alltraps") || | |
535 | LIT_STRNEQL(name, "kernel_trap") || | |
536 | LIT_STRNEQL(name, "interrupt") || | |
537 | LIT_STRNEQL(name, "i386_astintr")) | |
2d21ac55 A |
538 | continue; |
539 | ||
b0d623f7 A |
540 | if (LIT_STRNEQL(name, "current_thread") || |
541 | LIT_STRNEQL(name, "ast_pending") || | |
542 | LIT_STRNEQL(name, "fbt_perfCallback") || | |
543 | LIT_STRNEQL(name, "machine_thread_get_kern_state") || | |
544 | LIT_STRNEQL(name, "get_threadtask") || | |
545 | LIT_STRNEQL(name, "ml_set_interrupts_enabled") || | |
546 | LIT_STRNEQL(name, "dtrace_invop") || | |
547 | LIT_STRNEQL(name, "fbt_invop") || | |
548 | LIT_STRNEQL(name, "sdt_invop") || | |
549 | LIT_STRNEQL(name, "max_valid_stack_address")) | |
2d21ac55 A |
550 | continue; |
551 | ||
552 | /* | |
553 | * Voodoo. | |
554 | */ | |
b0d623f7 A |
555 | if (LIT_STRNSTART(name, "machine_stack_") || |
556 | LIT_STRNSTART(name, "mapping_") || | |
557 | LIT_STRNEQL(name, "tmrCvt") || | |
558 | ||
559 | LIT_STRNSTART(name, "tsc_") || | |
560 | ||
561 | LIT_STRNSTART(name, "pmCPU") || | |
562 | LIT_STRNEQL(name, "pmKextRegister") || | |
563 | LIT_STRNEQL(name, "pmMarkAllCPUsOff") || | |
564 | LIT_STRNEQL(name, "pmSafeMode") || | |
565 | LIT_STRNEQL(name, "pmTimerSave") || | |
566 | LIT_STRNEQL(name, "pmTimerRestore") || | |
567 | LIT_STRNEQL(name, "pmUnRegister") || | |
568 | LIT_STRNSTART(name, "pms") || | |
569 | LIT_STRNEQL(name, "power_management_init") || | |
570 | LIT_STRNSTART(name, "usimple_") || | |
571 | LIT_STRNEQL(name, "lck_spin_lock") || | |
572 | LIT_STRNEQL(name, "lck_spin_unlock") || | |
573 | ||
574 | LIT_STRNSTART(name, "rtc_") || | |
575 | LIT_STRNSTART(name, "_rtc_") || | |
576 | LIT_STRNSTART(name, "rtclock_") || | |
577 | LIT_STRNSTART(name, "clock_") || | |
578 | LIT_STRNSTART(name, "absolutetime_to_") || | |
579 | LIT_STRNEQL(name, "setPop") || | |
580 | LIT_STRNEQL(name, "nanoseconds_to_absolutetime") || | |
581 | LIT_STRNEQL(name, "nanotime_to_absolutetime") || | |
582 | ||
583 | LIT_STRNSTART(name, "etimer_") || | |
584 | ||
585 | LIT_STRNSTART(name, "commpage_") || | |
586 | LIT_STRNSTART(name, "pmap_") || | |
587 | LIT_STRNSTART(name, "ml_") || | |
588 | LIT_STRNSTART(name, "PE_") || | |
589 | LIT_STRNEQL(name, "kprintf") || | |
590 | LIT_STRNSTART(name, "lapic_") || | |
591 | LIT_STRNSTART(name, "acpi_")) | |
2d21ac55 A |
592 | continue; |
593 | ||
594 | /* | |
595 | * Avoid machine_ routines. PR_5346750. | |
596 | */ | |
b0d623f7 | 597 | if (LIT_STRNSTART(name, "machine_")) |
2d21ac55 A |
598 | continue; |
599 | ||
b0d623f7 | 600 | if (LIT_STRNEQL(name, "handle_pending_TLB_flushes")) |
2d21ac55 A |
601 | continue; |
602 | ||
603 | /* | |
604 | * Place no probes on critical routines. PR_5221096 | |
605 | */ | |
606 | if (!gIgnoreFBTBlacklist && | |
607 | bsearch( name, critical_blacklist, CRITICAL_BLACKLIST_COUNT, sizeof(name), _cmp ) != NULL) | |
608 | continue; | |
609 | ||
610 | /* | |
611 | * Place no probes that could be hit in probe context. | |
612 | */ | |
613 | if (!gIgnoreFBTBlacklist && | |
614 | bsearch( name, probe_ctx_closure, PROBE_CTX_CLOSURE_COUNT, sizeof(name), _cmp ) != NULL) | |
615 | continue; | |
616 | ||
617 | /* | |
618 | * Place no probes that could be hit on the way to the debugger. | |
619 | */ | |
b0d623f7 A |
620 | if (LIT_STRNSTART(name, "kdp_") || |
621 | LIT_STRNSTART(name, "kdb_") || | |
622 | LIT_STRNSTART(name, "kdbg_") || | |
623 | LIT_STRNSTART(name, "kdebug_") || | |
624 | LIT_STRNEQL(name, "kernel_debug") || | |
625 | LIT_STRNEQL(name, "Debugger") || | |
626 | LIT_STRNEQL(name, "Call_DebuggerC") || | |
627 | LIT_STRNEQL(name, "lock_debugger") || | |
628 | LIT_STRNEQL(name, "unlock_debugger") || | |
629 | LIT_STRNEQL(name, "SysChoked")) | |
2d21ac55 A |
630 | continue; |
631 | ||
632 | /* | |
633 | * Place no probes that could be hit on the way to a panic. | |
634 | */ | |
635 | if (NULL != strstr(name, "panic_") || | |
b0d623f7 A |
636 | LIT_STRNEQL(name, "panic") || |
637 | LIT_STRNEQL(name, "handleMck") || | |
638 | LIT_STRNEQL(name, "unresolved_kernel_trap")) | |
2d21ac55 A |
639 | continue; |
640 | ||
641 | if (dtrace_probe_lookup(fbt_id, modname, name, NULL) != 0) | |
642 | continue; | |
643 | ||
644 | for (j = 0, instr = (machine_inst_t *)sym[i].n_value, theInstr = 0; | |
645 | (j < 4) && ((uintptr_t)instr >= instrLow) && (instrHigh > (uintptr_t)(instr + 2)); | |
646 | j++) { | |
647 | theInstr = instr[0]; | |
648 | if (theInstr == FBT_PUSHL_EBP || theInstr == FBT_RET || theInstr == FBT_RET_IMM16) | |
649 | break; | |
650 | ||
651 | if ((size = dtrace_instr_size(instr)) <= 0) | |
652 | break; | |
653 | ||
654 | instr += size; | |
655 | } | |
656 | ||
657 | if (theInstr != FBT_PUSHL_EBP) | |
658 | continue; | |
659 | ||
660 | i1 = instr[1]; | |
661 | i2 = instr[2]; | |
662 | ||
663 | limit = (machine_inst_t *)instrHigh; | |
664 | ||
665 | if ((i1 == FBT_MOVL_ESP_EBP0_V0 && i2 == FBT_MOVL_ESP_EBP1_V0) || | |
666 | (i1 == FBT_MOVL_ESP_EBP0_V1 && i2 == FBT_MOVL_ESP_EBP1_V1)) { | |
667 | instr += 1; /* Advance to the movl %esp,%ebp */ | |
668 | theInstr = i1; | |
669 | } else { | |
670 | /* | |
671 | * Sometimes, the compiler will schedule an intervening instruction | |
672 | * in the function prologue. Example: | |
673 | * | |
674 | * _mach_vm_read: | |
675 | * 000006d8 pushl %ebp | |
676 | * 000006d9 movl $0x00000004,%edx | |
677 | * 000006de movl %esp,%ebp | |
678 | * | |
679 | * Try the next instruction, to see if it is a movl %esp,%ebp | |
680 | */ | |
681 | ||
682 | instr += 1; /* Advance past the pushl %ebp */ | |
683 | if ((size = dtrace_instr_size(instr)) <= 0) | |
684 | continue; | |
685 | ||
686 | instr += size; | |
687 | ||
688 | if ((instr + 1) >= limit) | |
689 | continue; | |
690 | ||
691 | i1 = instr[0]; | |
692 | i2 = instr[1]; | |
693 | ||
694 | if (!(i1 == FBT_MOVL_ESP_EBP0_V0 && i2 == FBT_MOVL_ESP_EBP1_V0) && | |
695 | !(i1 == FBT_MOVL_ESP_EBP0_V1 && i2 == FBT_MOVL_ESP_EBP1_V1)) | |
696 | continue; | |
697 | ||
698 | /* instr already points at the movl %esp,%ebp */ | |
699 | theInstr = i1; | |
700 | } | |
701 | ||
702 | fbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP); | |
703 | strlcpy( (char *)&(fbt->fbtp_name), name, MAX_FBTP_NAME_CHARS ); | |
704 | fbt->fbtp_id = dtrace_probe_create(fbt_id, modname, name, FBT_ENTRY, FBT_AFRAMES_ENTRY, fbt); | |
705 | fbt->fbtp_patchpoint = instr; | |
706 | fbt->fbtp_ctl = ctl; | |
707 | fbt->fbtp_loadcnt = ctl->mod_loadcnt; | |
708 | fbt->fbtp_rval = DTRACE_INVOP_MOVL_ESP_EBP; | |
709 | fbt->fbtp_savedval = theInstr; | |
710 | fbt->fbtp_patchval = FBT_PATCHVAL; | |
711 | ||
712 | fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)]; | |
713 | fbt->fbtp_symndx = i; | |
714 | fbt_probetab[FBT_ADDR2NDX(instr)] = fbt; | |
715 | ||
716 | retfbt = NULL; | |
717 | again: | |
718 | if (instr >= limit) | |
719 | continue; | |
720 | ||
721 | /* | |
722 | * If this disassembly fails, then we've likely walked off into | |
723 | * a jump table or some other unsuitable area. Bail out of the | |
724 | * disassembly now. | |
725 | */ | |
726 | if ((size = dtrace_instr_size(instr)) <= 0) | |
727 | continue; | |
728 | ||
729 | /* | |
730 | * We (desperately) want to avoid erroneously instrumenting a | |
731 | * jump table, especially given that our markers are pretty | |
732 | * short: two bytes on x86, and just one byte on amd64. To | |
733 | * determine if we're looking at a true instruction sequence | |
734 | * or an inline jump table that happens to contain the same | |
735 | * byte sequences, we resort to some heuristic sleeze: we | |
736 | * treat this instruction as being contained within a pointer, | |
737 | * and see if that pointer points to within the body of the | |
738 | * function. If it does, we refuse to instrument it. | |
739 | */ | |
740 | for (j = 0; j < sizeof (uintptr_t); j++) { | |
741 | uintptr_t check = (uintptr_t)instr - j; | |
742 | uint8_t *ptr; | |
743 | ||
744 | if (check < sym[i].n_value) | |
745 | break; | |
746 | ||
747 | if (check + sizeof (uintptr_t) > (uintptr_t)limit) | |
748 | continue; | |
749 | ||
750 | ptr = *(uint8_t **)check; | |
751 | ||
752 | if (ptr >= (uint8_t *)sym[i].n_value && ptr < limit) { | |
753 | instr += size; | |
754 | goto again; | |
755 | } | |
756 | } | |
757 | ||
758 | /* | |
759 | * OK, it's an instruction. | |
760 | */ | |
761 | theInstr = instr[0]; | |
762 | ||
763 | /* Walked onto the start of the next routine? If so, bail out of this function. */ | |
764 | if (theInstr == FBT_PUSHL_EBP) | |
765 | continue; | |
766 | ||
767 | if (!(size == 1 && (theInstr == FBT_POPL_EBP || theInstr == FBT_LEAVE))) { | |
768 | instr += size; | |
769 | goto again; | |
770 | } | |
771 | ||
772 | /* | |
773 | * Found the popl %ebp; or leave. | |
774 | */ | |
775 | machine_inst_t *patch_instr = instr; | |
776 | ||
777 | /* | |
778 | * Scan forward for a "ret", or "jmp". | |
779 | */ | |
780 | instr += size; | |
781 | if (instr >= limit) | |
782 | continue; | |
783 | ||
784 | size = dtrace_instr_size(instr); | |
785 | if (size <= 0) /* Failed instruction decode? */ | |
786 | continue; | |
787 | ||
788 | theInstr = instr[0]; | |
789 | ||
790 | if (!(size == FBT_RET_LEN && (theInstr == FBT_RET)) && | |
791 | !(size == FBT_RET_IMM16_LEN && (theInstr == FBT_RET_IMM16)) && | |
792 | !(size == FBT_JMP_SHORT_REL_LEN && (theInstr == FBT_JMP_SHORT_REL)) && | |
793 | !(size == FBT_JMP_NEAR_REL_LEN && (theInstr == FBT_JMP_NEAR_REL)) && | |
794 | !(size == FBT_JMP_FAR_ABS_LEN && (theInstr == FBT_JMP_FAR_ABS))) | |
795 | continue; | |
796 | ||
797 | /* | |
798 | * popl %ebp; ret; or leave; ret; or leave; jmp tailCalledFun; -- We have a winner! | |
799 | */ | |
800 | fbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP); | |
801 | strlcpy( (char *)&(fbt->fbtp_name), name, MAX_FBTP_NAME_CHARS ); | |
802 | ||
803 | if (retfbt == NULL) { | |
804 | fbt->fbtp_id = dtrace_probe_create(fbt_id, modname, | |
805 | name, FBT_RETURN, FBT_AFRAMES_RETURN, fbt); | |
806 | } else { | |
807 | retfbt->fbtp_next = fbt; | |
808 | fbt->fbtp_id = retfbt->fbtp_id; | |
809 | } | |
810 | ||
811 | retfbt = fbt; | |
812 | fbt->fbtp_patchpoint = patch_instr; | |
813 | fbt->fbtp_ctl = ctl; | |
814 | fbt->fbtp_loadcnt = ctl->mod_loadcnt; | |
815 | ||
816 | if (*patch_instr == FBT_POPL_EBP) { | |
817 | fbt->fbtp_rval = DTRACE_INVOP_POPL_EBP; | |
818 | } else { | |
819 | ASSERT(*patch_instr == FBT_LEAVE); | |
820 | fbt->fbtp_rval = DTRACE_INVOP_LEAVE; | |
821 | } | |
822 | fbt->fbtp_roffset = | |
823 | (uintptr_t)(patch_instr - (uint8_t *)sym[i].n_value); | |
824 | ||
825 | fbt->fbtp_savedval = *patch_instr; | |
826 | fbt->fbtp_patchval = FBT_PATCHVAL; | |
827 | fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(patch_instr)]; | |
828 | fbt->fbtp_symndx = i; | |
829 | fbt_probetab[FBT_ADDR2NDX(patch_instr)] = fbt; | |
830 | ||
831 | instr += size; | |
832 | goto again; | |
833 | } | |
834 | } | |
b0d623f7 A |
835 | #elif defined(__x86_64__) |
836 | int | |
837 | fbt_invop(uintptr_t addr, uintptr_t *state, uintptr_t rval) | |
838 | { | |
839 | fbt_probe_t *fbt = fbt_probetab[FBT_ADDR2NDX(addr)]; | |
840 | ||
841 | for (; fbt != NULL; fbt = fbt->fbtp_hashnext) { | |
842 | if ((uintptr_t)fbt->fbtp_patchpoint == addr) { | |
843 | ||
844 | if (fbt->fbtp_roffset == 0) { | |
845 | x86_saved_state64_t *regs = (x86_saved_state64_t *)state; | |
846 | ||
847 | CPU->cpu_dtrace_caller = *(uintptr_t *)(((uintptr_t)(regs->isf.rsp))+sizeof(uint64_t)); // 8(%rsp) | |
848 | /* 64-bit ABI, arguments passed in registers. */ | |
849 | dtrace_probe(fbt->fbtp_id, regs->rdi, regs->rsi, regs->rdx, regs->rcx, regs->r8); | |
850 | CPU->cpu_dtrace_caller = 0; | |
851 | } else { | |
852 | ||
853 | dtrace_probe(fbt->fbtp_id, fbt->fbtp_roffset, rval, 0, 0, 0); | |
854 | CPU->cpu_dtrace_caller = 0; | |
855 | } | |
856 | ||
857 | return (fbt->fbtp_rval); | |
858 | } | |
859 | } | |
860 | ||
861 | return (0); | |
862 | } | |
863 | ||
864 | #define IS_USER_TRAP(regs) (regs && (((regs)->isf.cs & 3) != 0)) | |
865 | #define T_INVALID_OPCODE 6 | |
866 | #define FBT_EXCEPTION_CODE T_INVALID_OPCODE | |
867 | #define T_PREEMPT 255 | |
868 | ||
869 | kern_return_t | |
870 | fbt_perfCallback( | |
871 | int trapno, | |
872 | x86_saved_state_t *tagged_regs, | |
873 | __unused int unused1, | |
874 | __unused int unused2) | |
875 | { | |
876 | kern_return_t retval = KERN_FAILURE; | |
877 | x86_saved_state64_t *saved_state = saved_state64(tagged_regs); | |
878 | ||
879 | if (FBT_EXCEPTION_CODE == trapno && !IS_USER_TRAP(saved_state)) { | |
880 | boolean_t oldlevel; | |
881 | uint64_t rsp_probe, *rbp, r12, fp, delta = 0; | |
882 | uint32_t *pDst; | |
883 | int emul; | |
884 | ||
885 | oldlevel = ml_set_interrupts_enabled(FALSE); | |
886 | ||
887 | /* Calculate where the stack pointer was when the probe instruction "fired." */ | |
888 | rsp_probe = saved_state->isf.rsp; /* Easy, x86_64 establishes this value in idt64.s */ | |
889 | ||
890 | emul = dtrace_invop( saved_state->isf.rip, (uintptr_t *)saved_state, saved_state->rax ); | |
891 | __asm__ volatile(".globl _dtrace_invop_callsite"); | |
892 | __asm__ volatile("_dtrace_invop_callsite:"); | |
893 | ||
894 | switch (emul) { | |
895 | case DTRACE_INVOP_NOP: | |
896 | saved_state->isf.rip += DTRACE_INVOP_NOP_SKIP; /* Skip over the patched NOP (planted by sdt). */ | |
897 | retval = KERN_SUCCESS; | |
898 | break; | |
899 | ||
900 | case DTRACE_INVOP_MOV_RSP_RBP: | |
901 | saved_state->rbp = rsp_probe; /* Emulate patched mov %rsp,%rbp */ | |
902 | saved_state->isf.rip += DTRACE_INVOP_MOV_RSP_RBP_SKIP; /* Skip over the bytes of the patched mov %rsp,%rbp */ | |
903 | retval = KERN_SUCCESS; | |
904 | break; | |
905 | ||
906 | case DTRACE_INVOP_POP_RBP: | |
907 | case DTRACE_INVOP_LEAVE: | |
908 | /* | |
909 | * Emulate first micro-op of patched leave: mov %rbp,%rsp | |
910 | * fp points just below the return address slot for target's ret | |
911 | * and at the slot holding the frame pointer saved by the target's prologue. | |
912 | */ | |
913 | fp = saved_state->rbp; | |
914 | /* Emulate second micro-op of patched leave: patched pop %rbp | |
915 | * savearea rbp is set for the frame of the caller to target | |
916 | * The *live* %rsp will be adjusted below for pop increment(s) | |
917 | */ | |
918 | saved_state->rbp = *(uint64_t *)fp; | |
919 | /* Skip over the patched leave */ | |
920 | saved_state->isf.rip += DTRACE_INVOP_LEAVE_SKIP; | |
921 | /* | |
922 | * Lift the stack to account for the emulated leave | |
923 | * Account for words local in this frame | |
924 | * (in "case DTRACE_INVOP_POPL_EBP:" this is zero.) | |
925 | */ | |
926 | delta = ((uint32_t *)fp) - ((uint32_t *)rsp_probe); /* delta is a *word* increment */ | |
927 | /* Account for popping off the rbp (just accomplished by the emulation | |
928 | * above...) | |
929 | */ | |
930 | delta += 2; | |
931 | saved_state->isf.rsp += (delta << 2); | |
932 | ||
933 | /* XXX Fragile in the extreme. | |
934 | * This is sensitive to trap_from_kernel()'s internals. | |
935 | */ | |
936 | rbp = (uint64_t *)__builtin_frame_address(0); | |
937 | rbp = (uint64_t *)*rbp; | |
938 | r12 = *(rbp - 4); | |
939 | ||
940 | /* Shift contents of stack */ | |
941 | for (pDst = (uint32_t *)fp; | |
942 | pDst > (((uint32_t *)r12)); | |
943 | pDst--) | |
944 | *pDst = pDst[-delta]; | |
945 | ||
946 | /* Track the stack lift in "saved_state". */ | |
947 | saved_state = (x86_saved_state64_t *) (((uintptr_t)saved_state) + (delta << 2)); | |
948 | ||
949 | /* Now adjust the value of %r12 in our caller (kernel_trap)'s frame */ | |
950 | *(rbp - 4) = r12 + (delta << 2); | |
951 | ||
952 | retval = KERN_SUCCESS; | |
953 | break; | |
954 | ||
955 | default: | |
956 | retval = KERN_FAILURE; | |
957 | break; | |
958 | } | |
959 | saved_state->isf.trapno = T_PREEMPT; /* Avoid call to i386_astintr()! */ | |
960 | ||
961 | ml_set_interrupts_enabled(oldlevel); | |
962 | } | |
963 | ||
964 | return retval; | |
965 | } | |
966 | ||
967 | /*ARGSUSED*/ | |
968 | static void | |
969 | __fbt_provide_module(void *arg, struct modctl *ctl) | |
970 | { | |
971 | #pragma unused(arg) | |
972 | kernel_mach_header_t *mh; | |
973 | struct load_command *cmd; | |
974 | kernel_segment_command_t *orig_ts = NULL, *orig_le = NULL; | |
975 | struct symtab_command *orig_st = NULL; | |
976 | struct nlist_64 *sym = NULL; | |
977 | char *strings; | |
978 | uintptr_t instrLow, instrHigh; | |
979 | char *modname; | |
980 | unsigned int i, j; | |
981 | ||
982 | int gIgnoreFBTBlacklist = 0; | |
983 | PE_parse_boot_argn("IgnoreFBTBlacklist", &gIgnoreFBTBlacklist, sizeof (gIgnoreFBTBlacklist)); | |
984 | ||
985 | mh = (kernel_mach_header_t *)(ctl->address); | |
986 | modname = ctl->mod_modname; | |
987 | ||
988 | if (0 == ctl->address || 0 == ctl->size) /* Has the linker been jettisoned? */ | |
989 | return; | |
990 | ||
991 | /* | |
992 | * Employees of dtrace and their families are ineligible. Void | |
993 | * where prohibited. | |
994 | */ | |
995 | ||
996 | if (LIT_STRNEQL(modname, "com.apple.driver.dtrace")) | |
997 | return; | |
998 | ||
999 | if (strstr(modname, "CHUD") != NULL) | |
1000 | return; | |
1001 | ||
1002 | if (mh->magic != MH_MAGIC_64) | |
1003 | return; | |
1004 | ||
1005 | cmd = (struct load_command *) &mh[1]; | |
1006 | for (i = 0; i < mh->ncmds; i++) { | |
1007 | if (cmd->cmd == LC_SEGMENT_KERNEL) { | |
1008 | kernel_segment_command_t *orig_sg = (kernel_segment_command_t *) cmd; | |
1009 | ||
1010 | if (LIT_STRNEQL(orig_sg->segname, SEG_TEXT)) | |
1011 | orig_ts = orig_sg; | |
1012 | else if (LIT_STRNEQL(orig_sg->segname, SEG_LINKEDIT)) | |
1013 | orig_le = orig_sg; | |
1014 | else if (LIT_STRNEQL(orig_sg->segname, "")) | |
1015 | orig_ts = orig_sg; /* kexts have a single unnamed segment */ | |
1016 | } | |
1017 | else if (cmd->cmd == LC_SYMTAB) | |
1018 | orig_st = (struct symtab_command *) cmd; | |
1019 | ||
1020 | cmd = (struct load_command *) ((caddr_t) cmd + cmd->cmdsize); | |
1021 | } | |
1022 | ||
1023 | if ((orig_ts == NULL) || (orig_st == NULL) || (orig_le == NULL)) | |
1024 | return; | |
1025 | ||
1026 | sym = (struct nlist_64 *)(orig_le->vmaddr + orig_st->symoff - orig_le->fileoff); | |
1027 | strings = (char *)(orig_le->vmaddr + orig_st->stroff - orig_le->fileoff); | |
1028 | ||
1029 | /* Find extent of the TEXT section */ | |
1030 | instrLow = (uintptr_t)orig_ts->vmaddr; | |
1031 | instrHigh = (uintptr_t)(orig_ts->vmaddr + orig_ts->vmsize); | |
1032 | ||
1033 | for (i = 0; i < orig_st->nsyms; i++) { | |
1034 | fbt_probe_t *fbt, *retfbt; | |
1035 | machine_inst_t *instr, *limit, theInstr, i1, i2, i3; | |
1036 | uint8_t n_type = sym[i].n_type & (N_TYPE | N_EXT); | |
1037 | char *name = strings + sym[i].n_un.n_strx; | |
1038 | int size; | |
1039 | ||
1040 | /* Check that the symbol is a global and that it has a name. */ | |
1041 | if (((N_SECT | N_EXT) != n_type && (N_ABS | N_EXT) != n_type)) | |
1042 | continue; | |
1043 | ||
1044 | if (0 == sym[i].n_un.n_strx) /* iff a null, "", name. */ | |
1045 | continue; | |
1046 | ||
1047 | /* Lop off omnipresent leading underscore. */ | |
1048 | if (*name == '_') | |
1049 | name += 1; | |
1050 | ||
1051 | if (LIT_STRNSTART(name, "dtrace_") && !LIT_STRNSTART(name, "dtrace_safe_")) { | |
1052 | /* | |
1053 | * Anything beginning with "dtrace_" may be called | |
1054 | * from probe context unless it explitly indicates | |
1055 | * that it won't be called from probe context by | |
1056 | * using the prefix "dtrace_safe_". | |
1057 | */ | |
1058 | continue; | |
1059 | } | |
1060 | ||
1061 | if (LIT_STRNSTART(name, "fasttrap_") || | |
1062 | LIT_STRNSTART(name, "fuword") || | |
1063 | LIT_STRNSTART(name, "suword") || | |
1064 | LIT_STRNEQL(name, "sprlock") || | |
1065 | LIT_STRNEQL(name, "sprunlock") || | |
1066 | LIT_STRNEQL(name, "uread") || | |
1067 | LIT_STRNEQL(name, "uwrite")) | |
1068 | continue; /* Fasttrap inner-workings. */ | |
1069 | ||
1070 | if (LIT_STRNSTART(name, "dsmos_")) | |
1071 | continue; /* Don't Steal Mac OS X! */ | |
1072 | ||
1073 | if (LIT_STRNSTART(name, "_dtrace")) | |
1074 | continue; /* Shims in dtrace.c */ | |
1075 | ||
1076 | if (LIT_STRNSTART(name, "chud")) | |
1077 | continue; /* Professional courtesy. */ | |
1078 | ||
1079 | if (LIT_STRNSTART(name, "hibernate_")) | |
1080 | continue; /* Let sleeping dogs lie. */ | |
1081 | ||
1082 | if (LIT_STRNEQL(name, "ZN9IOService14newTemperatureElPS_") || /* IOService::newTemperature */ | |
1083 | LIT_STRNEQL(name, "ZN9IOService26temperatureCriticalForZoneEPS_")) /* IOService::temperatureCriticalForZone */ | |
1084 | continue; /* Per the fire code */ | |
1085 | ||
1086 | /* | |
1087 | * Place no probes (illegal instructions) in the exception handling path! | |
1088 | */ | |
1089 | if (LIT_STRNEQL(name, "t_invop") || | |
1090 | LIT_STRNEQL(name, "enter_lohandler") || | |
1091 | LIT_STRNEQL(name, "lo_alltraps") || | |
1092 | LIT_STRNEQL(name, "kernel_trap") || | |
1093 | LIT_STRNEQL(name, "interrupt") || | |
1094 | LIT_STRNEQL(name, "i386_astintr")) | |
1095 | continue; | |
1096 | ||
1097 | if (LIT_STRNEQL(name, "current_thread") || | |
1098 | LIT_STRNEQL(name, "ast_pending") || | |
1099 | LIT_STRNEQL(name, "fbt_perfCallback") || | |
1100 | LIT_STRNEQL(name, "machine_thread_get_kern_state") || | |
1101 | LIT_STRNEQL(name, "get_threadtask") || | |
1102 | LIT_STRNEQL(name, "ml_set_interrupts_enabled") || | |
1103 | LIT_STRNEQL(name, "dtrace_invop") || | |
1104 | LIT_STRNEQL(name, "fbt_invop") || | |
1105 | LIT_STRNEQL(name, "sdt_invop") || | |
1106 | LIT_STRNEQL(name, "max_valid_stack_address")) | |
1107 | continue; | |
1108 | ||
1109 | /* | |
1110 | * Voodoo. | |
1111 | */ | |
1112 | if (LIT_STRNSTART(name, "machine_stack_") || | |
1113 | LIT_STRNSTART(name, "mapping_") || | |
1114 | LIT_STRNEQL(name, "tmrCvt") || | |
1115 | ||
1116 | LIT_STRNSTART(name, "tsc_") || | |
1117 | ||
1118 | LIT_STRNSTART(name, "pmCPU") || | |
1119 | LIT_STRNEQL(name, "pmKextRegister") || | |
1120 | LIT_STRNEQL(name, "pmMarkAllCPUsOff") || | |
1121 | LIT_STRNEQL(name, "pmSafeMode") || | |
1122 | LIT_STRNEQL(name, "pmTimerSave") || | |
1123 | LIT_STRNEQL(name, "pmTimerRestore") || | |
1124 | LIT_STRNEQL(name, "pmUnRegister") || | |
1125 | LIT_STRNSTART(name, "pms") || | |
1126 | LIT_STRNEQL(name, "power_management_init") || | |
1127 | LIT_STRNSTART(name, "usimple_") || | |
1128 | LIT_STRNSTART(name, "lck_spin_lock") || | |
1129 | LIT_STRNSTART(name, "lck_spin_unlock") || | |
1130 | ||
1131 | LIT_STRNSTART(name, "rtc_") || | |
1132 | LIT_STRNSTART(name, "_rtc_") || | |
1133 | LIT_STRNSTART(name, "rtclock_") || | |
1134 | LIT_STRNSTART(name, "clock_") || | |
1135 | LIT_STRNSTART(name, "absolutetime_to_") || | |
1136 | LIT_STRNEQL(name, "setPop") || | |
1137 | LIT_STRNEQL(name, "nanoseconds_to_absolutetime") || | |
1138 | LIT_STRNEQL(name, "nanotime_to_absolutetime") || | |
1139 | ||
1140 | LIT_STRNSTART(name, "etimer_") || | |
1141 | ||
1142 | LIT_STRNSTART(name, "commpage_") || | |
1143 | LIT_STRNSTART(name, "pmap_") || | |
1144 | LIT_STRNSTART(name, "ml_") || | |
1145 | LIT_STRNSTART(name, "PE_") || | |
1146 | LIT_STRNEQL(name, "kprintf") || | |
1147 | LIT_STRNSTART(name, "lapic_") || | |
1148 | LIT_STRNSTART(name, "acpi_")) | |
1149 | continue; | |
1150 | ||
1151 | /* | |
1152 | * Avoid machine_ routines. PR_5346750. | |
1153 | */ | |
1154 | if (LIT_STRNSTART(name, "machine_")) | |
1155 | continue; | |
1156 | ||
1157 | if (LIT_STRNEQL(name, "handle_pending_TLB_flushes")) | |
1158 | continue; | |
1159 | ||
1160 | /* | |
1161 | * Place no probes on critical routines. PR_5221096 | |
1162 | */ | |
1163 | if (!gIgnoreFBTBlacklist && | |
1164 | bsearch( name, critical_blacklist, CRITICAL_BLACKLIST_COUNT, sizeof(name), _cmp ) != NULL) | |
1165 | continue; | |
1166 | ||
1167 | /* | |
1168 | * Place no probes that could be hit in probe context. | |
1169 | */ | |
1170 | if (!gIgnoreFBTBlacklist && | |
1171 | bsearch( name, probe_ctx_closure, PROBE_CTX_CLOSURE_COUNT, sizeof(name), _cmp ) != NULL) | |
1172 | continue; | |
1173 | ||
1174 | /* | |
1175 | * Place no probes that could be hit on the way to the debugger. | |
1176 | */ | |
1177 | if (LIT_STRNSTART(name, "kdp_") || | |
1178 | LIT_STRNSTART(name, "kdb_") || | |
1179 | LIT_STRNSTART(name, "kdbg_") || | |
1180 | LIT_STRNSTART(name, "kdebug_") || | |
1181 | LIT_STRNEQL(name, "kernel_debug") || | |
1182 | LIT_STRNEQL(name, "Debugger") || | |
1183 | LIT_STRNEQL(name, "Call_DebuggerC") || | |
1184 | LIT_STRNEQL(name, "lock_debugger") || | |
1185 | LIT_STRNEQL(name, "unlock_debugger") || | |
1186 | LIT_STRNEQL(name, "SysChoked")) | |
1187 | continue; | |
1188 | ||
1189 | /* | |
1190 | * Place no probes that could be hit on the way to a panic. | |
1191 | */ | |
1192 | if (NULL != strstr(name, "panic_") || | |
1193 | LIT_STRNEQL(name, "panic") || | |
1194 | LIT_STRNEQL(name, "handleMck") || | |
1195 | LIT_STRNEQL(name, "unresolved_kernel_trap")) | |
1196 | continue; | |
1197 | ||
1198 | if (dtrace_probe_lookup(fbt_id, modname, name, NULL) != 0) | |
1199 | continue; | |
1200 | ||
1201 | for (j = 0, instr = (machine_inst_t *)sym[i].n_value, theInstr = 0; | |
1202 | (j < 4) && ((uintptr_t)instr >= instrLow) && (instrHigh > (uintptr_t)(instr + 2)); | |
1203 | j++) { | |
1204 | theInstr = instr[0]; | |
1205 | if (theInstr == FBT_PUSH_RBP || theInstr == FBT_RET || theInstr == FBT_RET_IMM16) | |
1206 | break; | |
1207 | ||
1208 | if ((size = dtrace_instr_size(instr)) <= 0) | |
1209 | break; | |
1210 | ||
1211 | instr += size; | |
1212 | } | |
1213 | ||
1214 | if (theInstr != FBT_PUSH_RBP) | |
1215 | continue; | |
1216 | ||
1217 | i1 = instr[1]; | |
1218 | i2 = instr[2]; | |
1219 | i3 = instr[3]; | |
1220 | ||
1221 | limit = (machine_inst_t *)instrHigh; | |
1222 | ||
1223 | if (i1 == FBT_REX_RSP_RBP && i2 == FBT_MOV_RSP_RBP0 && i3 == FBT_MOV_RSP_RBP1) { | |
1224 | instr += 1; /* Advance to the mov %rsp,%rbp */ | |
1225 | theInstr = i1; | |
1226 | } else { | |
1227 | continue; | |
1228 | } | |
1229 | #if 0 | |
1230 | else { | |
1231 | /* | |
1232 | * Sometimes, the compiler will schedule an intervening instruction | |
1233 | * in the function prologue. Example: | |
1234 | * | |
1235 | * _mach_vm_read: | |
1236 | * 000006d8 pushl %ebp | |
1237 | * 000006d9 movl $0x00000004,%edx | |
1238 | * 000006de movl %esp,%ebp | |
1239 | * | |
1240 | * Try the next instruction, to see if it is a movl %esp,%ebp | |
1241 | */ | |
1242 | ||
1243 | instr += 1; /* Advance past the pushl %ebp */ | |
1244 | if ((size = dtrace_instr_size(instr)) <= 0) | |
1245 | continue; | |
1246 | ||
1247 | instr += size; | |
1248 | ||
1249 | if ((instr + 1) >= limit) | |
1250 | continue; | |
1251 | ||
1252 | i1 = instr[0]; | |
1253 | i2 = instr[1]; | |
1254 | ||
1255 | if (!(i1 == FBT_MOVL_ESP_EBP0_V0 && i2 == FBT_MOVL_ESP_EBP1_V0) && | |
1256 | !(i1 == FBT_MOVL_ESP_EBP0_V1 && i2 == FBT_MOVL_ESP_EBP1_V1)) | |
1257 | continue; | |
1258 | ||
1259 | /* instr already points at the movl %esp,%ebp */ | |
1260 | theInstr = i1; | |
1261 | } | |
1262 | #endif | |
1263 | ||
1264 | fbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP); | |
1265 | strlcpy( (char *)&(fbt->fbtp_name), name, MAX_FBTP_NAME_CHARS ); | |
1266 | fbt->fbtp_id = dtrace_probe_create(fbt_id, modname, name, FBT_ENTRY, FBT_AFRAMES_ENTRY, fbt); | |
1267 | fbt->fbtp_patchpoint = instr; | |
1268 | fbt->fbtp_ctl = ctl; | |
1269 | fbt->fbtp_loadcnt = ctl->mod_loadcnt; | |
1270 | fbt->fbtp_rval = DTRACE_INVOP_MOV_RSP_RBP; | |
1271 | fbt->fbtp_savedval = theInstr; | |
1272 | fbt->fbtp_patchval = FBT_PATCHVAL; | |
1273 | ||
1274 | fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)]; | |
1275 | fbt->fbtp_symndx = i; | |
1276 | fbt_probetab[FBT_ADDR2NDX(instr)] = fbt; | |
1277 | ||
1278 | retfbt = NULL; | |
1279 | again: | |
1280 | if (instr >= limit) | |
1281 | continue; | |
1282 | ||
1283 | /* | |
1284 | * If this disassembly fails, then we've likely walked off into | |
1285 | * a jump table or some other unsuitable area. Bail out of the | |
1286 | * disassembly now. | |
1287 | */ | |
1288 | if ((size = dtrace_instr_size(instr)) <= 0) | |
1289 | continue; | |
1290 | ||
1291 | /* | |
1292 | * We (desperately) want to avoid erroneously instrumenting a | |
1293 | * jump table, especially given that our markers are pretty | |
1294 | * short: two bytes on x86, and just one byte on amd64. To | |
1295 | * determine if we're looking at a true instruction sequence | |
1296 | * or an inline jump table that happens to contain the same | |
1297 | * byte sequences, we resort to some heuristic sleeze: we | |
1298 | * treat this instruction as being contained within a pointer, | |
1299 | * and see if that pointer points to within the body of the | |
1300 | * function. If it does, we refuse to instrument it. | |
1301 | */ | |
1302 | for (j = 0; j < sizeof (uintptr_t); j++) { | |
1303 | uintptr_t check = (uintptr_t)instr - j; | |
1304 | uint8_t *ptr; | |
1305 | ||
1306 | if (check < sym[i].n_value) | |
1307 | break; | |
1308 | ||
1309 | if (check + sizeof (uintptr_t) > (uintptr_t)limit) | |
1310 | continue; | |
1311 | ||
1312 | ptr = *(uint8_t **)check; | |
1313 | ||
1314 | if (ptr >= (uint8_t *)sym[i].n_value && ptr < limit) { | |
1315 | instr += size; | |
1316 | goto again; | |
1317 | } | |
1318 | } | |
1319 | ||
1320 | /* | |
1321 | * OK, it's an instruction. | |
1322 | */ | |
1323 | theInstr = instr[0]; | |
1324 | ||
1325 | /* Walked onto the start of the next routine? If so, bail out of this function. */ | |
1326 | if (theInstr == FBT_PUSH_RBP) | |
1327 | continue; | |
1328 | ||
1329 | if (!(size == 1 && (theInstr == FBT_POP_RBP || theInstr == FBT_LEAVE))) { | |
1330 | instr += size; | |
1331 | goto again; | |
1332 | } | |
1333 | ||
1334 | /* | |
1335 | * Found the pop %rbp; or leave. | |
1336 | */ | |
1337 | machine_inst_t *patch_instr = instr; | |
1338 | ||
1339 | /* | |
1340 | * Scan forward for a "ret", or "jmp". | |
1341 | */ | |
1342 | instr += size; | |
1343 | if (instr >= limit) | |
1344 | continue; | |
1345 | ||
1346 | size = dtrace_instr_size(instr); | |
1347 | if (size <= 0) /* Failed instruction decode? */ | |
1348 | continue; | |
1349 | ||
1350 | theInstr = instr[0]; | |
1351 | ||
1352 | if (!(size == FBT_RET_LEN && (theInstr == FBT_RET)) && | |
1353 | !(size == FBT_RET_IMM16_LEN && (theInstr == FBT_RET_IMM16)) && | |
1354 | !(size == FBT_JMP_SHORT_REL_LEN && (theInstr == FBT_JMP_SHORT_REL)) && | |
1355 | !(size == FBT_JMP_NEAR_REL_LEN && (theInstr == FBT_JMP_NEAR_REL)) && | |
1356 | !(size == FBT_JMP_FAR_ABS_LEN && (theInstr == FBT_JMP_FAR_ABS))) | |
1357 | continue; | |
1358 | ||
1359 | /* | |
1360 | * pop %rbp; ret; or leave; ret; or leave; jmp tailCalledFun; -- We have a winner! | |
1361 | */ | |
1362 | fbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP); | |
1363 | strlcpy( (char *)&(fbt->fbtp_name), name, MAX_FBTP_NAME_CHARS ); | |
1364 | ||
1365 | if (retfbt == NULL) { | |
1366 | fbt->fbtp_id = dtrace_probe_create(fbt_id, modname, | |
1367 | name, FBT_RETURN, FBT_AFRAMES_RETURN, fbt); | |
1368 | } else { | |
1369 | retfbt->fbtp_next = fbt; | |
1370 | fbt->fbtp_id = retfbt->fbtp_id; | |
1371 | } | |
1372 | ||
1373 | retfbt = fbt; | |
1374 | fbt->fbtp_patchpoint = patch_instr; | |
1375 | fbt->fbtp_ctl = ctl; | |
1376 | fbt->fbtp_loadcnt = ctl->mod_loadcnt; | |
1377 | ||
1378 | if (*patch_instr == FBT_POP_RBP) { | |
1379 | fbt->fbtp_rval = DTRACE_INVOP_POP_RBP; | |
1380 | } else { | |
1381 | ASSERT(*patch_instr == FBT_LEAVE); | |
1382 | fbt->fbtp_rval = DTRACE_INVOP_LEAVE; | |
1383 | } | |
1384 | fbt->fbtp_roffset = | |
1385 | (uintptr_t)(patch_instr - (uint8_t *)sym[i].n_value); | |
1386 | ||
1387 | fbt->fbtp_savedval = *patch_instr; | |
1388 | fbt->fbtp_patchval = FBT_PATCHVAL; | |
1389 | fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(patch_instr)]; | |
1390 | fbt->fbtp_symndx = i; | |
1391 | fbt_probetab[FBT_ADDR2NDX(patch_instr)] = fbt; | |
1392 | ||
1393 | instr += size; | |
1394 | goto again; | |
1395 | } | |
1396 | } | |
1397 | #else | |
1398 | #error Unknown arch | |
1399 | #endif | |
2d21ac55 A |
1400 | |
1401 | extern struct modctl g_fbt_kernctl; | |
1402 | #undef kmem_alloc /* from its binding to dt_kmem_alloc glue */ | |
1403 | #undef kmem_free /* from its binding to dt_kmem_free glue */ | |
1404 | #include <vm/vm_kern.h> | |
1405 | ||
1406 | /*ARGSUSED*/ | |
1407 | void | |
1408 | fbt_provide_module(void *arg, struct modctl *ctl) | |
1409 | { | |
1410 | #pragma unused(ctl) | |
1411 | __fbt_provide_module(arg, &g_fbt_kernctl); | |
1412 | ||
b0d623f7 A |
1413 | if ( (vm_offset_t)g_fbt_kernctl.address != (vm_offset_t )NULL ) |
1414 | kmem_free(kernel_map, (vm_offset_t)g_fbt_kernctl.address, round_page(g_fbt_kernctl.size)); | |
2d21ac55 A |
1415 | g_fbt_kernctl.address = 0; |
1416 | g_fbt_kernctl.size = 0; | |
1417 | } |