]>
Commit | Line | Data |
---|---|---|
2d21ac55 A |
1 | /* |
2 | * CDDL HEADER START | |
3 | * | |
4 | * The contents of this file are subject to the terms of the | |
5 | * Common Development and Distribution License, Version 1.0 only | |
6 | * (the "License"). You may not use this file except in compliance | |
7 | * with the License. | |
8 | * | |
9 | * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE | |
10 | * or http://www.opensolaris.org/os/licensing. | |
11 | * See the License for the specific language governing permissions | |
12 | * and limitations under the License. | |
13 | * | |
14 | * When distributing Covered Code, include this CDDL HEADER in each | |
15 | * file and include the License file at usr/src/OPENSOLARIS.LICENSE. | |
16 | * If applicable, add the following below this CDDL HEADER, with the | |
17 | * fields enclosed by brackets "[]" replaced with your own identifying | |
18 | * information: Portions Copyright [yyyy] [name of copyright owner] | |
19 | * | |
20 | * CDDL HEADER END | |
21 | */ | |
22 | /* | |
23 | * Copyright 2005 Sun Microsystems, Inc. All rights reserved. | |
24 | * Use is subject to license terms. | |
25 | */ | |
26 | ||
27 | /* #pragma ident "@(#)fbt.c 1.15 05/09/19 SMI" */ | |
28 | ||
29 | #ifdef KERNEL | |
30 | #ifndef _KERNEL | |
31 | #define _KERNEL /* Solaris vs. Darwin */ | |
32 | #endif | |
33 | #endif | |
34 | ||
35 | #define MACH__POSIX_C_SOURCE_PRIVATE 1 /* pulls in suitable savearea from mach/ppc/thread_status.h */ | |
36 | #include <kern/thread.h> | |
37 | #include <mach/thread_status.h> | |
38 | #include <mach/vm_param.h> | |
39 | #include <mach-o/loader.h> | |
40 | #include <mach-o/nlist.h> | |
41 | ||
42 | extern struct mach_header _mh_execute_header; /* the kernel's mach header */ | |
43 | ||
44 | #include <sys/param.h> | |
45 | #include <sys/systm.h> | |
46 | #include <sys/errno.h> | |
47 | #include <sys/stat.h> | |
48 | #include <sys/ioctl.h> | |
49 | #include <sys/conf.h> | |
50 | #include <sys/fcntl.h> | |
51 | #include <miscfs/devfs/devfs.h> | |
52 | ||
53 | #include <sys/dtrace.h> | |
54 | #include <sys/dtrace_impl.h> | |
55 | #include <sys/fbt.h> | |
56 | ||
57 | #include <sys/dtrace_glue.h> | |
58 | ||
59 | #define DTRACE_INVOP_NOP_SKIP 1 | |
60 | #define DTRACE_INVOP_MOVL_ESP_EBP 10 | |
61 | #define DTRACE_INVOP_MOVL_ESP_EBP_SKIP 2 | |
62 | #define DTRACE_INVOP_LEAVE_SKIP 1 | |
63 | ||
64 | #define FBT_PUSHL_EBP 0x55 | |
65 | #define FBT_MOVL_ESP_EBP0_V0 0x8b | |
66 | #define FBT_MOVL_ESP_EBP1_V0 0xec | |
67 | #define FBT_MOVL_ESP_EBP0_V1 0x89 | |
68 | #define FBT_MOVL_ESP_EBP1_V1 0xe5 | |
69 | #define FBT_REX_RSP_RBP 0x48 | |
70 | ||
71 | #define FBT_POPL_EBP 0x5d | |
72 | #define FBT_RET 0xc3 | |
73 | #define FBT_RET_IMM16 0xc2 | |
74 | #define FBT_LEAVE 0xc9 | |
75 | #define FBT_JMP_SHORT_REL 0xeb /* Jump short, relative, displacement relative to next instr. */ | |
76 | #define FBT_JMP_NEAR_REL 0xe9 /* Jump near, relative, displacement relative to next instr. */ | |
77 | #define FBT_JMP_FAR_ABS 0xea /* Jump far, absolute, address given in operand */ | |
78 | #define FBT_RET_LEN 1 | |
79 | #define FBT_RET_IMM16_LEN 3 | |
80 | #define FBT_JMP_SHORT_REL_LEN 2 | |
81 | #define FBT_JMP_NEAR_REL_LEN 5 | |
82 | #define FBT_JMP_FAR_ABS_LEN 5 | |
83 | ||
84 | #define FBT_PATCHVAL 0xf0 | |
85 | #define FBT_AFRAMES_ENTRY 7 | |
86 | #define FBT_AFRAMES_RETURN 6 | |
87 | ||
88 | #define FBT_ENTRY "entry" | |
89 | #define FBT_RETURN "return" | |
90 | #define FBT_ADDR2NDX(addr) ((((uintptr_t)(addr)) >> 4) & fbt_probetab_mask) | |
91 | ||
92 | extern dtrace_provider_id_t fbt_id; | |
93 | extern fbt_probe_t **fbt_probetab; | |
94 | extern int fbt_probetab_mask; | |
95 | ||
96 | /* | |
97 | * Critical routines that must not be probed. PR_5221096, PR_5379018. | |
98 | */ | |
99 | ||
100 | static const char * critical_blacklist[] = | |
101 | { | |
102 | "bcopy_phys", | |
103 | "console_cpu_alloc", | |
104 | "console_cpu_free", | |
105 | "cpu_IA32e_disable", | |
106 | "cpu_IA32e_enable", | |
107 | "cpu_control", | |
108 | "cpu_data_alloc", | |
109 | "cpu_desc_init", | |
110 | "cpu_desc_init64", | |
111 | "cpu_desc_load64", | |
112 | "cpu_exit_wait", | |
113 | "cpu_info", | |
114 | "cpu_info_count", | |
115 | "cpu_init", | |
116 | "cpu_interrupt", | |
117 | "cpu_machine_init", | |
118 | "cpu_mode_init", | |
119 | "cpu_processor_alloc", | |
120 | "cpu_processor_free", | |
121 | "cpu_signal_handler", | |
122 | "cpu_sleep", | |
123 | "cpu_start", | |
124 | "cpu_subtype", | |
125 | "cpu_thread_alloc", | |
126 | "cpu_thread_halt", | |
127 | "cpu_thread_init", | |
128 | "cpu_threadtype", | |
129 | "cpu_to_processor", | |
130 | "cpu_topology_start", | |
131 | "cpu_type", | |
132 | "cpu_window_init", | |
133 | "cpuid_cpu_display", | |
134 | "handle_pending_TLB_flushes", | |
135 | "hw_compare_and_store", | |
136 | "machine_idle_cstate", | |
137 | "mca_cpu_alloc", | |
138 | "mca_cpu_init", | |
139 | "ml_nofault_copy", | |
140 | "pmap_cpu_alloc", | |
141 | "pmap_cpu_free", | |
142 | "pmap_cpu_high_map_vaddr", | |
143 | "pmap_cpu_high_shared_remap", | |
144 | "pmap_cpu_init", | |
145 | "rdHPET", | |
146 | "register_cpu_setup_func", | |
147 | "unregister_cpu_setup_func" | |
148 | }; | |
149 | #define CRITICAL_BLACKLIST_COUNT (sizeof(critical_blacklist)/sizeof(critical_blacklist[0])) | |
150 | ||
151 | /* | |
152 | * The transitive closure of entry points that can be reached from probe context. | |
153 | * (Apart from routines whose names begin with dtrace_ or dtxnu_.) | |
154 | */ | |
155 | static const char * probe_ctx_closure[] = | |
156 | { | |
157 | "Debugger", | |
158 | "OSCompareAndSwap", | |
159 | "absolutetime_to_microtime", | |
160 | "ast_pending", | |
161 | "clock_get_calendar_nanotime_nowait", | |
162 | "copyin", | |
163 | "copyin_user", | |
164 | "copyinstr", | |
165 | "copyout", | |
166 | "copyoutstr", | |
167 | "cpu_number", | |
168 | "current_proc", | |
169 | "current_processor", | |
170 | "current_task", | |
171 | "current_thread", | |
172 | "debug_enter", | |
173 | "find_user_regs", | |
174 | "flush_tlb64", | |
175 | "get_bsdtask_info", | |
176 | "get_bsdthread_info", | |
177 | "hw_atomic_and", | |
178 | "kauth_cred_get", | |
179 | "kauth_getgid", | |
180 | "kauth_getuid", | |
181 | "kernel_preempt_check", | |
182 | "mach_absolute_time", | |
183 | "max_valid_stack_address", | |
184 | "ml_at_interrupt_context", | |
185 | "ml_phys_write_byte_64", | |
186 | "ml_phys_write_half_64", | |
187 | "ml_phys_write_word_64", | |
188 | "ml_set_interrupts_enabled", | |
189 | "panic", | |
190 | "pmap64_pde", | |
191 | "pmap64_pdpt", | |
192 | "pmap_find_phys", | |
193 | "pmap_get_mapwindow", | |
194 | "pmap_pde", | |
195 | "pmap_pte", | |
196 | "pmap_put_mapwindow", | |
197 | "pmap_valid_page", | |
198 | "prf", | |
199 | "proc_is64bit", | |
200 | "proc_selfname", | |
201 | "proc_selfpid", | |
202 | "psignal_lock", | |
203 | "rtc_nanotime_load", | |
204 | "rtc_nanotime_read", | |
205 | "strlcpy", | |
206 | "sync_iss_to_iks_unconditionally", | |
207 | "timer_grab" | |
208 | }; | |
209 | #define PROBE_CTX_CLOSURE_COUNT (sizeof(probe_ctx_closure)/sizeof(probe_ctx_closure[0])) | |
210 | ||
211 | ||
212 | static int _cmp(const void *a, const void *b) | |
213 | { | |
214 | return strcmp((const char *)a, *(const char **)b); | |
215 | } | |
216 | ||
217 | static const void * bsearch( | |
218 | register const void *key, | |
219 | const void *base0, | |
220 | size_t nmemb, | |
221 | register size_t size, | |
222 | register int (*compar)(const void *, const void *)) { | |
223 | ||
224 | register const char *base = base0; | |
225 | register size_t lim; | |
226 | register int cmp; | |
227 | register const void *p; | |
228 | ||
229 | for (lim = nmemb; lim != 0; lim >>= 1) { | |
230 | p = base + (lim >> 1) * size; | |
231 | cmp = (*compar)(key, p); | |
232 | if (cmp == 0) | |
233 | return p; | |
234 | if (cmp > 0) { /* key > p: move right */ | |
235 | base = (const char *)p + size; | |
236 | lim--; | |
237 | } /* else move left */ | |
238 | } | |
239 | return (NULL); | |
240 | } | |
241 | ||
242 | int | |
243 | fbt_invop(uintptr_t addr, uintptr_t *stack, uintptr_t rval) | |
244 | { | |
245 | uintptr_t stack0 = 0, stack1 = 0, stack2 = 0, stack3 = 0, stack4 = 0; | |
246 | fbt_probe_t *fbt = fbt_probetab[FBT_ADDR2NDX(addr)]; | |
247 | ||
248 | for (; fbt != NULL; fbt = fbt->fbtp_hashnext) { | |
249 | if ((uintptr_t)fbt->fbtp_patchpoint == addr) { | |
250 | ||
251 | if (fbt->fbtp_roffset == 0) { | |
252 | uintptr_t *stacktop; | |
253 | if (CPU_ON_INTR(CPU)) | |
254 | stacktop = (uintptr_t *)dtrace_get_cpu_int_stack_top(); | |
255 | else | |
256 | stacktop = (uintptr_t *)(dtrace_get_kernel_stack(current_thread()) + KERNEL_STACK_SIZE); | |
257 | ||
258 | stack += 1; /* skip over the target's pushl'd %ebp */ | |
259 | ||
260 | if (stack <= stacktop) | |
261 | CPU->cpu_dtrace_caller = *stack++; | |
262 | if (stack <= stacktop) | |
263 | stack0 = *stack++; | |
264 | if (stack <= stacktop) | |
265 | stack1 = *stack++; | |
266 | if (stack <= stacktop) | |
267 | stack2 = *stack++; | |
268 | if (stack <= stacktop) | |
269 | stack3 = *stack++; | |
270 | if (stack <= stacktop) | |
271 | stack4 = *stack++; | |
272 | ||
273 | dtrace_probe(fbt->fbtp_id, stack0, stack1, stack2, stack3, stack4); | |
274 | CPU->cpu_dtrace_caller = 0; | |
275 | } else { | |
276 | dtrace_probe(fbt->fbtp_id, fbt->fbtp_roffset, rval, 0, 0, 0); | |
277 | CPU->cpu_dtrace_caller = 0; | |
278 | } | |
279 | ||
280 | return (fbt->fbtp_rval); | |
281 | } | |
282 | } | |
283 | ||
284 | return (0); | |
285 | } | |
286 | ||
287 | #define IS_USER_TRAP(regs) (regs && (((regs)->cs & 3) != 0)) | |
288 | #define T_INVALID_OPCODE 6 | |
289 | #define FBT_EXCEPTION_CODE T_INVALID_OPCODE | |
290 | ||
291 | kern_return_t | |
292 | fbt_perfCallback( | |
293 | int trapno, | |
294 | x86_saved_state_t *tagged_regs, | |
295 | __unused int unused1, | |
296 | __unused int unused2) | |
297 | { | |
298 | kern_return_t retval = KERN_FAILURE; | |
299 | x86_saved_state32_t *saved_state = saved_state32(tagged_regs); | |
300 | struct x86_saved_state32_from_kernel *regs = (struct x86_saved_state32_from_kernel *)saved_state; | |
301 | ||
302 | if (FBT_EXCEPTION_CODE == trapno && !IS_USER_TRAP(saved_state)) { | |
303 | boolean_t oldlevel, cpu_64bit; | |
304 | uint32_t esp_probe, *ebp, edi, fp, *pDst, delta = 0; | |
305 | int emul; | |
306 | ||
307 | cpu_64bit = ml_is64bit(); | |
308 | oldlevel = ml_set_interrupts_enabled(FALSE); | |
309 | ||
310 | /* Calculate where the stack pointer was when the probe instruction "fired." */ | |
311 | if (cpu_64bit) { | |
312 | esp_probe = saved_state->uesp; /* Easy, x86_64 establishes this value in idt64.s */ | |
313 | } else { | |
314 | esp_probe = (uint32_t)&(regs[1]); /* Nasty, infer the location above the save area */ | |
315 | } | |
316 | ||
317 | emul = dtrace_invop( saved_state->eip, (uintptr_t *)esp_probe, saved_state->eax ); | |
318 | __asm__ volatile(".globl _dtrace_invop_callsite"); | |
319 | __asm__ volatile("_dtrace_invop_callsite:"); | |
320 | ||
321 | switch (emul) { | |
322 | case DTRACE_INVOP_NOP: | |
323 | saved_state->eip += DTRACE_INVOP_NOP_SKIP; /* Skip over the patched NOP */ | |
324 | retval = KERN_SUCCESS; | |
325 | break; | |
326 | ||
327 | case DTRACE_INVOP_MOVL_ESP_EBP: | |
328 | saved_state->ebp = esp_probe; /* Emulate patched movl %esp,%ebp */ | |
329 | saved_state->eip += DTRACE_INVOP_MOVL_ESP_EBP_SKIP; /* Skip over the bytes of the patched movl %esp,%ebp */ | |
330 | retval = KERN_SUCCESS; | |
331 | break; | |
332 | ||
333 | case DTRACE_INVOP_POPL_EBP: | |
334 | case DTRACE_INVOP_LEAVE: | |
335 | /* | |
336 | * Emulate first micro-op of patched leave: movl %ebp,%esp | |
337 | * fp points just below the return address slot for target's ret | |
338 | * and at the slot holding the frame pointer saved by the target's prologue. | |
339 | */ | |
340 | fp = saved_state->ebp; | |
341 | /* Emulate second micro-op of patched leave: patched popl %ebp | |
342 | * savearea ebp is set for the frame of the caller to target | |
343 | * The *live* %esp will be adjusted below for pop increment(s) | |
344 | */ | |
345 | saved_state->ebp = *(uint32_t *)fp; | |
346 | /* Skip over the patched leave */ | |
347 | saved_state->eip += DTRACE_INVOP_LEAVE_SKIP; | |
348 | /* | |
349 | * Lift the stack to account for the emulated leave | |
350 | * Account for words local in this frame | |
351 | * (in "case DTRACE_INVOP_POPL_EBP:" this is zero.) | |
352 | */ | |
353 | delta = ((uint32_t *)fp) - ((uint32_t *)esp_probe); | |
354 | /* Account for popping off the ebp (just accomplished by the emulation | |
355 | * above...) | |
356 | */ | |
357 | delta += 1; | |
358 | ||
359 | if (cpu_64bit) | |
360 | saved_state->uesp += (delta << 2); | |
361 | ||
362 | /* XXX Fragile in the extreme. Obtain the value of %edi that our caller pushed | |
363 | * (on behalf of its caller -- trap_from_kernel()). Ultimately, | |
364 | * trap_from_kernel's stack pointer is restored from this slot. | |
365 | * This is sensitive to the manner in which the compiler preserves %edi, | |
366 | * and trap_from_kernel()'s internals. | |
367 | */ | |
368 | ebp = (uint32_t *)__builtin_frame_address(0); | |
369 | ebp = (uint32_t *)*ebp; | |
370 | edi = *(ebp - 1); | |
371 | /* Shift contents of stack */ | |
372 | for (pDst = (uint32_t *)fp; | |
373 | pDst > (((uint32_t *)edi)); | |
374 | pDst--) | |
375 | *pDst = pDst[-delta]; | |
376 | /* Now adjust the value of %edi in our caller (kernel_trap)'s frame */ | |
377 | *(ebp - 1) = edi + (delta << 2); | |
378 | ||
379 | retval = KERN_SUCCESS; | |
380 | break; | |
381 | ||
382 | default: | |
383 | retval = KERN_FAILURE; | |
384 | break; | |
385 | } | |
386 | ml_set_interrupts_enabled(oldlevel); | |
387 | } | |
388 | ||
389 | return retval; | |
390 | } | |
391 | ||
392 | /*ARGSUSED*/ | |
393 | static void | |
394 | __fbt_provide_module(void *arg, struct modctl *ctl) | |
395 | { | |
396 | #pragma unused(arg) | |
397 | struct mach_header *mh; | |
398 | struct load_command *cmd; | |
399 | struct segment_command *orig_ts = NULL, *orig_le = NULL; | |
400 | struct symtab_command *orig_st = NULL; | |
401 | struct nlist *sym = NULL; | |
402 | char *strings; | |
403 | uintptr_t instrLow, instrHigh; | |
404 | char *modname; | |
405 | unsigned int i, j; | |
406 | ||
407 | int gIgnoreFBTBlacklist = 0; | |
408 | PE_parse_boot_arg("IgnoreFBTBlacklist", &gIgnoreFBTBlacklist); | |
409 | ||
410 | mh = (struct mach_header *)(ctl->address); | |
411 | modname = ctl->mod_modname; | |
412 | ||
413 | if (0 == ctl->address || 0 == ctl->size) /* Has the linker been jettisoned? */ | |
414 | return; | |
415 | ||
416 | /* | |
417 | * Employees of dtrace and their families are ineligible. Void | |
418 | * where prohibited. | |
419 | */ | |
420 | ||
421 | if (strcmp(modname, "com.apple.driver.dtrace") == 0) | |
422 | return; | |
423 | ||
424 | if (strstr(modname, "CHUD") != NULL) | |
425 | return; | |
426 | ||
427 | if (mh->magic != MH_MAGIC) | |
428 | return; | |
429 | ||
430 | cmd = (struct load_command *) &mh[1]; | |
431 | for (i = 0; i < mh->ncmds; i++) { | |
432 | if (cmd->cmd == LC_SEGMENT) { | |
433 | struct segment_command *orig_sg = (struct segment_command *) cmd; | |
434 | ||
435 | if (strcmp(SEG_TEXT, orig_sg->segname) == 0) | |
436 | orig_ts = orig_sg; | |
437 | else if (strcmp(SEG_LINKEDIT, orig_sg->segname) == 0) | |
438 | orig_le = orig_sg; | |
439 | else if (strcmp("", orig_sg->segname) == 0) | |
440 | orig_ts = orig_sg; /* kexts have a single unnamed segment */ | |
441 | } | |
442 | else if (cmd->cmd == LC_SYMTAB) | |
443 | orig_st = (struct symtab_command *) cmd; | |
444 | ||
445 | cmd = (struct load_command *) ((caddr_t) cmd + cmd->cmdsize); | |
446 | } | |
447 | ||
448 | if ((orig_ts == NULL) || (orig_st == NULL) || (orig_le == NULL)) | |
449 | return; | |
450 | ||
451 | sym = (struct nlist *)orig_le->vmaddr; | |
452 | strings = ((char *)sym) + orig_st->nsyms * sizeof(struct nlist); | |
453 | ||
454 | /* Find extent of the TEXT section */ | |
455 | instrLow = (uintptr_t)orig_ts->vmaddr; | |
456 | instrHigh = (uintptr_t)(orig_ts->vmaddr + orig_ts->vmsize); | |
457 | ||
458 | for (i = 0; i < orig_st->nsyms; i++) { | |
459 | fbt_probe_t *fbt, *retfbt; | |
460 | machine_inst_t *instr, *limit, theInstr, i1, i2; | |
461 | uint8_t n_type = sym[i].n_type & (N_TYPE | N_EXT); | |
462 | char *name = strings + sym[i].n_un.n_strx; | |
463 | int size; | |
464 | ||
465 | /* Check that the symbol is a global and that it has a name. */ | |
466 | if (((N_SECT | N_EXT) != n_type && (N_ABS | N_EXT) != n_type)) | |
467 | continue; | |
468 | ||
469 | if (0 == sym[i].n_un.n_strx) /* iff a null, "", name. */ | |
470 | continue; | |
471 | ||
472 | /* Lop off omnipresent leading underscore. */ | |
473 | if (*name == '_') | |
474 | name += 1; | |
475 | ||
476 | if (strstr(name, "dtrace_") == name && | |
477 | strstr(name, "dtrace_safe_") != name) { | |
478 | /* | |
479 | * Anything beginning with "dtrace_" may be called | |
480 | * from probe context unless it explitly indicates | |
481 | * that it won't be called from probe context by | |
482 | * using the prefix "dtrace_safe_". | |
483 | */ | |
484 | continue; | |
485 | } | |
486 | ||
487 | if (strstr(name, "dsmos_") == name) | |
488 | continue; /* Don't Steal Mac OS X! */ | |
489 | ||
490 | if (strstr(name, "dtxnu_") == name || | |
491 | strstr(name, "_dtrace") == name) | |
492 | continue; /* Shims in dtrace.c */ | |
493 | ||
494 | if (strstr(name, "chud") == name) | |
495 | continue; /* Professional courtesy. */ | |
496 | ||
497 | if (strstr(name, "hibernate_") == name) | |
498 | continue; /* Let sleeping dogs lie. */ | |
499 | ||
500 | if (0 == strcmp(name, "ZN9IOService14newTemperatureElPS_") || /* IOService::newTemperature */ | |
501 | 0 == strcmp(name, "ZN9IOService26temperatureCriticalForZoneEPS_")) /* IOService::temperatureCriticalForZone */ | |
502 | continue; /* Per the fire code */ | |
503 | ||
504 | /* | |
505 | * Place no probes (illegal instructions) in the exception handling path! | |
506 | */ | |
507 | if (0 == strcmp(name, "t_invop") || | |
508 | 0 == strcmp(name, "enter_lohandler") || | |
509 | 0 == strcmp(name, "lo_alltraps") || | |
510 | 0 == strcmp(name, "kernel_trap") || | |
511 | 0 == strcmp(name, "i386_astintr")) | |
512 | continue; | |
513 | ||
514 | if (0 == strcmp(name, "current_thread") || | |
515 | 0 == strcmp(name, "ast_pending") || | |
516 | 0 == strcmp(name, "fbt_perfCallback") || | |
517 | 0 == strcmp(name, "machine_thread_get_kern_state") || | |
518 | 0 == strcmp(name, "ml_set_interrupts_enabled") || | |
519 | 0 == strcmp(name, "dtrace_invop") || | |
520 | 0 == strcmp(name, "fbt_invop") || | |
521 | 0 == strcmp(name, "sdt_invop") || | |
522 | 0 == strcmp(name, "max_valid_stack_address")) | |
523 | continue; | |
524 | ||
525 | /* | |
526 | * Voodoo. | |
527 | */ | |
528 | if (strstr(name, "machine_stack_") == name || | |
529 | strstr(name, "mapping_") == name || | |
530 | strstr(name, "hpet_") == name || | |
531 | ||
532 | 0 == strcmp(name, "rdHPET") || | |
533 | 0 == strcmp(name, "HPETInterrupt") || | |
534 | 0 == strcmp(name, "tmrCvt") || | |
535 | ||
536 | strstr(name, "tsc_") == name || | |
537 | ||
538 | strstr(name, "pmCPU") == name || | |
539 | 0 == strcmp(name, "Cstate_table_set") || | |
540 | 0 == strcmp(name, "pmHPETInterrupt") || | |
541 | 0 == strcmp(name, "pmKextRegister") || | |
542 | 0 == strcmp(name, "pmSafeMode") || | |
543 | 0 == strcmp(name, "pmUnregister") || | |
544 | strstr(name, "pms") == name || | |
545 | 0 == strcmp(name, "power_management_init") || | |
546 | strstr(name, "usimple_") == name || | |
547 | ||
548 | strstr(name, "rtc_") == name || | |
549 | strstr(name, "_rtc_") == name || | |
550 | strstr(name, "rtclock_") == name || | |
551 | strstr(name, "clock_") == name || | |
552 | strstr(name, "absolutetime_to_") == name || | |
553 | 0 == strcmp(name, "setPop") || | |
554 | 0 == strcmp(name, "nanoseconds_to_absolutetime") || | |
555 | 0 == strcmp(name, "nanotime_to_absolutetime") || | |
556 | ||
557 | strstr(name, "etimer_") == name || | |
558 | ||
559 | strstr(name, "commpage_") == name || | |
560 | strstr(name, "pmap_") == name || | |
561 | strstr(name, "ml_") == name || | |
562 | strstr(name, "PE_") == name || | |
563 | strstr(name, "lapic_") == name || | |
564 | strstr(name, "acpi_") == name) | |
565 | continue; | |
566 | ||
567 | /* | |
568 | * Avoid machine_ routines. PR_5346750. | |
569 | */ | |
570 | if (strstr(name, "machine_") == name) | |
571 | continue; | |
572 | ||
573 | if (0 == strcmp(name, "handle_pending_TLB_flushes")) | |
574 | continue; | |
575 | ||
576 | /* | |
577 | * Place no probes on critical routines. PR_5221096 | |
578 | */ | |
579 | if (!gIgnoreFBTBlacklist && | |
580 | bsearch( name, critical_blacklist, CRITICAL_BLACKLIST_COUNT, sizeof(name), _cmp ) != NULL) | |
581 | continue; | |
582 | ||
583 | /* | |
584 | * Place no probes that could be hit in probe context. | |
585 | */ | |
586 | if (!gIgnoreFBTBlacklist && | |
587 | bsearch( name, probe_ctx_closure, PROBE_CTX_CLOSURE_COUNT, sizeof(name), _cmp ) != NULL) | |
588 | continue; | |
589 | ||
590 | /* | |
591 | * Place no probes that could be hit on the way to the debugger. | |
592 | */ | |
593 | if (strstr(name, "kdp_") == name || | |
594 | strstr(name, "kdb_") == name || | |
595 | strstr(name, "kdbg_") == name || | |
596 | strstr(name, "kdebug_") == name || | |
597 | 0 == strcmp(name, "kernel_debug") || | |
598 | 0 == strcmp(name, "Debugger") || | |
599 | 0 == strcmp(name, "Call_DebuggerC") || | |
600 | 0 == strcmp(name, "lock_debugger") || | |
601 | 0 == strcmp(name, "unlock_debugger") || | |
602 | 0 == strcmp(name, "SysChoked")) | |
603 | continue; | |
604 | ||
605 | /* | |
606 | * Place no probes that could be hit on the way to a panic. | |
607 | */ | |
608 | if (NULL != strstr(name, "panic_") || | |
609 | 0 == strcmp(name, "panic") || | |
610 | 0 == strcmp(name, "handleMck") || | |
611 | 0 == strcmp(name, "unresolved_kernel_trap")) | |
612 | continue; | |
613 | ||
614 | if (dtrace_probe_lookup(fbt_id, modname, name, NULL) != 0) | |
615 | continue; | |
616 | ||
617 | for (j = 0, instr = (machine_inst_t *)sym[i].n_value, theInstr = 0; | |
618 | (j < 4) && ((uintptr_t)instr >= instrLow) && (instrHigh > (uintptr_t)(instr + 2)); | |
619 | j++) { | |
620 | theInstr = instr[0]; | |
621 | if (theInstr == FBT_PUSHL_EBP || theInstr == FBT_RET || theInstr == FBT_RET_IMM16) | |
622 | break; | |
623 | ||
624 | if ((size = dtrace_instr_size(instr)) <= 0) | |
625 | break; | |
626 | ||
627 | instr += size; | |
628 | } | |
629 | ||
630 | if (theInstr != FBT_PUSHL_EBP) | |
631 | continue; | |
632 | ||
633 | i1 = instr[1]; | |
634 | i2 = instr[2]; | |
635 | ||
636 | limit = (machine_inst_t *)instrHigh; | |
637 | ||
638 | if ((i1 == FBT_MOVL_ESP_EBP0_V0 && i2 == FBT_MOVL_ESP_EBP1_V0) || | |
639 | (i1 == FBT_MOVL_ESP_EBP0_V1 && i2 == FBT_MOVL_ESP_EBP1_V1)) { | |
640 | instr += 1; /* Advance to the movl %esp,%ebp */ | |
641 | theInstr = i1; | |
642 | } else { | |
643 | /* | |
644 | * Sometimes, the compiler will schedule an intervening instruction | |
645 | * in the function prologue. Example: | |
646 | * | |
647 | * _mach_vm_read: | |
648 | * 000006d8 pushl %ebp | |
649 | * 000006d9 movl $0x00000004,%edx | |
650 | * 000006de movl %esp,%ebp | |
651 | * | |
652 | * Try the next instruction, to see if it is a movl %esp,%ebp | |
653 | */ | |
654 | ||
655 | instr += 1; /* Advance past the pushl %ebp */ | |
656 | if ((size = dtrace_instr_size(instr)) <= 0) | |
657 | continue; | |
658 | ||
659 | instr += size; | |
660 | ||
661 | if ((instr + 1) >= limit) | |
662 | continue; | |
663 | ||
664 | i1 = instr[0]; | |
665 | i2 = instr[1]; | |
666 | ||
667 | if (!(i1 == FBT_MOVL_ESP_EBP0_V0 && i2 == FBT_MOVL_ESP_EBP1_V0) && | |
668 | !(i1 == FBT_MOVL_ESP_EBP0_V1 && i2 == FBT_MOVL_ESP_EBP1_V1)) | |
669 | continue; | |
670 | ||
671 | /* instr already points at the movl %esp,%ebp */ | |
672 | theInstr = i1; | |
673 | } | |
674 | ||
675 | fbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP); | |
676 | strlcpy( (char *)&(fbt->fbtp_name), name, MAX_FBTP_NAME_CHARS ); | |
677 | fbt->fbtp_id = dtrace_probe_create(fbt_id, modname, name, FBT_ENTRY, FBT_AFRAMES_ENTRY, fbt); | |
678 | fbt->fbtp_patchpoint = instr; | |
679 | fbt->fbtp_ctl = ctl; | |
680 | fbt->fbtp_loadcnt = ctl->mod_loadcnt; | |
681 | fbt->fbtp_rval = DTRACE_INVOP_MOVL_ESP_EBP; | |
682 | fbt->fbtp_savedval = theInstr; | |
683 | fbt->fbtp_patchval = FBT_PATCHVAL; | |
684 | ||
685 | fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)]; | |
686 | fbt->fbtp_symndx = i; | |
687 | fbt_probetab[FBT_ADDR2NDX(instr)] = fbt; | |
688 | ||
689 | retfbt = NULL; | |
690 | again: | |
691 | if (instr >= limit) | |
692 | continue; | |
693 | ||
694 | /* | |
695 | * If this disassembly fails, then we've likely walked off into | |
696 | * a jump table or some other unsuitable area. Bail out of the | |
697 | * disassembly now. | |
698 | */ | |
699 | if ((size = dtrace_instr_size(instr)) <= 0) | |
700 | continue; | |
701 | ||
702 | /* | |
703 | * We (desperately) want to avoid erroneously instrumenting a | |
704 | * jump table, especially given that our markers are pretty | |
705 | * short: two bytes on x86, and just one byte on amd64. To | |
706 | * determine if we're looking at a true instruction sequence | |
707 | * or an inline jump table that happens to contain the same | |
708 | * byte sequences, we resort to some heuristic sleeze: we | |
709 | * treat this instruction as being contained within a pointer, | |
710 | * and see if that pointer points to within the body of the | |
711 | * function. If it does, we refuse to instrument it. | |
712 | */ | |
713 | for (j = 0; j < sizeof (uintptr_t); j++) { | |
714 | uintptr_t check = (uintptr_t)instr - j; | |
715 | uint8_t *ptr; | |
716 | ||
717 | if (check < sym[i].n_value) | |
718 | break; | |
719 | ||
720 | if (check + sizeof (uintptr_t) > (uintptr_t)limit) | |
721 | continue; | |
722 | ||
723 | ptr = *(uint8_t **)check; | |
724 | ||
725 | if (ptr >= (uint8_t *)sym[i].n_value && ptr < limit) { | |
726 | instr += size; | |
727 | goto again; | |
728 | } | |
729 | } | |
730 | ||
731 | /* | |
732 | * OK, it's an instruction. | |
733 | */ | |
734 | theInstr = instr[0]; | |
735 | ||
736 | /* Walked onto the start of the next routine? If so, bail out of this function. */ | |
737 | if (theInstr == FBT_PUSHL_EBP) | |
738 | continue; | |
739 | ||
740 | if (!(size == 1 && (theInstr == FBT_POPL_EBP || theInstr == FBT_LEAVE))) { | |
741 | instr += size; | |
742 | goto again; | |
743 | } | |
744 | ||
745 | /* | |
746 | * Found the popl %ebp; or leave. | |
747 | */ | |
748 | machine_inst_t *patch_instr = instr; | |
749 | ||
750 | /* | |
751 | * Scan forward for a "ret", or "jmp". | |
752 | */ | |
753 | instr += size; | |
754 | if (instr >= limit) | |
755 | continue; | |
756 | ||
757 | size = dtrace_instr_size(instr); | |
758 | if (size <= 0) /* Failed instruction decode? */ | |
759 | continue; | |
760 | ||
761 | theInstr = instr[0]; | |
762 | ||
763 | if (!(size == FBT_RET_LEN && (theInstr == FBT_RET)) && | |
764 | !(size == FBT_RET_IMM16_LEN && (theInstr == FBT_RET_IMM16)) && | |
765 | !(size == FBT_JMP_SHORT_REL_LEN && (theInstr == FBT_JMP_SHORT_REL)) && | |
766 | !(size == FBT_JMP_NEAR_REL_LEN && (theInstr == FBT_JMP_NEAR_REL)) && | |
767 | !(size == FBT_JMP_FAR_ABS_LEN && (theInstr == FBT_JMP_FAR_ABS))) | |
768 | continue; | |
769 | ||
770 | /* | |
771 | * popl %ebp; ret; or leave; ret; or leave; jmp tailCalledFun; -- We have a winner! | |
772 | */ | |
773 | fbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP); | |
774 | strlcpy( (char *)&(fbt->fbtp_name), name, MAX_FBTP_NAME_CHARS ); | |
775 | ||
776 | if (retfbt == NULL) { | |
777 | fbt->fbtp_id = dtrace_probe_create(fbt_id, modname, | |
778 | name, FBT_RETURN, FBT_AFRAMES_RETURN, fbt); | |
779 | } else { | |
780 | retfbt->fbtp_next = fbt; | |
781 | fbt->fbtp_id = retfbt->fbtp_id; | |
782 | } | |
783 | ||
784 | retfbt = fbt; | |
785 | fbt->fbtp_patchpoint = patch_instr; | |
786 | fbt->fbtp_ctl = ctl; | |
787 | fbt->fbtp_loadcnt = ctl->mod_loadcnt; | |
788 | ||
789 | if (*patch_instr == FBT_POPL_EBP) { | |
790 | fbt->fbtp_rval = DTRACE_INVOP_POPL_EBP; | |
791 | } else { | |
792 | ASSERT(*patch_instr == FBT_LEAVE); | |
793 | fbt->fbtp_rval = DTRACE_INVOP_LEAVE; | |
794 | } | |
795 | fbt->fbtp_roffset = | |
796 | (uintptr_t)(patch_instr - (uint8_t *)sym[i].n_value); | |
797 | ||
798 | fbt->fbtp_savedval = *patch_instr; | |
799 | fbt->fbtp_patchval = FBT_PATCHVAL; | |
800 | fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(patch_instr)]; | |
801 | fbt->fbtp_symndx = i; | |
802 | fbt_probetab[FBT_ADDR2NDX(patch_instr)] = fbt; | |
803 | ||
804 | instr += size; | |
805 | goto again; | |
806 | } | |
807 | } | |
808 | ||
809 | extern struct modctl g_fbt_kernctl; | |
810 | #undef kmem_alloc /* from its binding to dt_kmem_alloc glue */ | |
811 | #undef kmem_free /* from its binding to dt_kmem_free glue */ | |
812 | #include <vm/vm_kern.h> | |
813 | ||
814 | /*ARGSUSED*/ | |
815 | void | |
816 | fbt_provide_module(void *arg, struct modctl *ctl) | |
817 | { | |
818 | #pragma unused(ctl) | |
819 | __fbt_provide_module(arg, &g_fbt_kernctl); | |
820 | ||
821 | kmem_free(kernel_map, (vm_offset_t)g_fbt_kernctl.address, round_page_32(g_fbt_kernctl.size)); | |
822 | g_fbt_kernctl.address = 0; | |
823 | g_fbt_kernctl.size = 0; | |
824 | } |