]> git.saurik.com Git - apple/xnu.git/blob - bsd/dev/i386/fbt_x86.c
xnu-1228.tar.gz
[apple/xnu.git] / bsd / dev / i386 / fbt_x86.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /* #pragma ident "@(#)fbt.c 1.15 05/09/19 SMI" */
28
29 #ifdef KERNEL
30 #ifndef _KERNEL
31 #define _KERNEL /* Solaris vs. Darwin */
32 #endif
33 #endif
34
35 #define MACH__POSIX_C_SOURCE_PRIVATE 1 /* pulls in suitable savearea from mach/ppc/thread_status.h */
36 #include <kern/thread.h>
37 #include <mach/thread_status.h>
38 #include <mach/vm_param.h>
39 #include <mach-o/loader.h>
40 #include <mach-o/nlist.h>
41
42 extern struct mach_header _mh_execute_header; /* the kernel's mach header */
43
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/errno.h>
47 #include <sys/stat.h>
48 #include <sys/ioctl.h>
49 #include <sys/conf.h>
50 #include <sys/fcntl.h>
51 #include <miscfs/devfs/devfs.h>
52
53 #include <sys/dtrace.h>
54 #include <sys/dtrace_impl.h>
55 #include <sys/fbt.h>
56
57 #include <sys/dtrace_glue.h>
58
59 #define DTRACE_INVOP_NOP_SKIP 1
60 #define DTRACE_INVOP_MOVL_ESP_EBP 10
61 #define DTRACE_INVOP_MOVL_ESP_EBP_SKIP 2
62 #define DTRACE_INVOP_LEAVE_SKIP 1
63
64 #define FBT_PUSHL_EBP 0x55
65 #define FBT_MOVL_ESP_EBP0_V0 0x8b
66 #define FBT_MOVL_ESP_EBP1_V0 0xec
67 #define FBT_MOVL_ESP_EBP0_V1 0x89
68 #define FBT_MOVL_ESP_EBP1_V1 0xe5
69 #define FBT_REX_RSP_RBP 0x48
70
71 #define FBT_POPL_EBP 0x5d
72 #define FBT_RET 0xc3
73 #define FBT_RET_IMM16 0xc2
74 #define FBT_LEAVE 0xc9
75 #define FBT_JMP_SHORT_REL 0xeb /* Jump short, relative, displacement relative to next instr. */
76 #define FBT_JMP_NEAR_REL 0xe9 /* Jump near, relative, displacement relative to next instr. */
77 #define FBT_JMP_FAR_ABS 0xea /* Jump far, absolute, address given in operand */
78 #define FBT_RET_LEN 1
79 #define FBT_RET_IMM16_LEN 3
80 #define FBT_JMP_SHORT_REL_LEN 2
81 #define FBT_JMP_NEAR_REL_LEN 5
82 #define FBT_JMP_FAR_ABS_LEN 5
83
84 #define FBT_PATCHVAL 0xf0
85 #define FBT_AFRAMES_ENTRY 7
86 #define FBT_AFRAMES_RETURN 6
87
88 #define FBT_ENTRY "entry"
89 #define FBT_RETURN "return"
90 #define FBT_ADDR2NDX(addr) ((((uintptr_t)(addr)) >> 4) & fbt_probetab_mask)
91
92 extern dtrace_provider_id_t fbt_id;
93 extern fbt_probe_t **fbt_probetab;
94 extern int fbt_probetab_mask;
95
96 /*
97 * Critical routines that must not be probed. PR_5221096, PR_5379018.
98 */
99
100 static const char * critical_blacklist[] =
101 {
102 "bcopy_phys",
103 "console_cpu_alloc",
104 "console_cpu_free",
105 "cpu_IA32e_disable",
106 "cpu_IA32e_enable",
107 "cpu_control",
108 "cpu_data_alloc",
109 "cpu_desc_init",
110 "cpu_desc_init64",
111 "cpu_desc_load64",
112 "cpu_exit_wait",
113 "cpu_info",
114 "cpu_info_count",
115 "cpu_init",
116 "cpu_interrupt",
117 "cpu_machine_init",
118 "cpu_mode_init",
119 "cpu_processor_alloc",
120 "cpu_processor_free",
121 "cpu_signal_handler",
122 "cpu_sleep",
123 "cpu_start",
124 "cpu_subtype",
125 "cpu_thread_alloc",
126 "cpu_thread_halt",
127 "cpu_thread_init",
128 "cpu_threadtype",
129 "cpu_to_processor",
130 "cpu_topology_start",
131 "cpu_type",
132 "cpu_window_init",
133 "cpuid_cpu_display",
134 "handle_pending_TLB_flushes",
135 "hw_compare_and_store",
136 "machine_idle_cstate",
137 "mca_cpu_alloc",
138 "mca_cpu_init",
139 "ml_nofault_copy",
140 "pmap_cpu_alloc",
141 "pmap_cpu_free",
142 "pmap_cpu_high_map_vaddr",
143 "pmap_cpu_high_shared_remap",
144 "pmap_cpu_init",
145 "rdHPET",
146 "register_cpu_setup_func",
147 "unregister_cpu_setup_func"
148 };
149 #define CRITICAL_BLACKLIST_COUNT (sizeof(critical_blacklist)/sizeof(critical_blacklist[0]))
150
151 /*
152 * The transitive closure of entry points that can be reached from probe context.
153 * (Apart from routines whose names begin with dtrace_ or dtxnu_.)
154 */
155 static const char * probe_ctx_closure[] =
156 {
157 "Debugger",
158 "OSCompareAndSwap",
159 "absolutetime_to_microtime",
160 "ast_pending",
161 "clock_get_calendar_nanotime_nowait",
162 "copyin",
163 "copyin_user",
164 "copyinstr",
165 "copyout",
166 "copyoutstr",
167 "cpu_number",
168 "current_proc",
169 "current_processor",
170 "current_task",
171 "current_thread",
172 "debug_enter",
173 "find_user_regs",
174 "flush_tlb64",
175 "get_bsdtask_info",
176 "get_bsdthread_info",
177 "hw_atomic_and",
178 "kauth_cred_get",
179 "kauth_getgid",
180 "kauth_getuid",
181 "kernel_preempt_check",
182 "mach_absolute_time",
183 "max_valid_stack_address",
184 "ml_at_interrupt_context",
185 "ml_phys_write_byte_64",
186 "ml_phys_write_half_64",
187 "ml_phys_write_word_64",
188 "ml_set_interrupts_enabled",
189 "panic",
190 "pmap64_pde",
191 "pmap64_pdpt",
192 "pmap_find_phys",
193 "pmap_get_mapwindow",
194 "pmap_pde",
195 "pmap_pte",
196 "pmap_put_mapwindow",
197 "pmap_valid_page",
198 "prf",
199 "proc_is64bit",
200 "proc_selfname",
201 "proc_selfpid",
202 "psignal_lock",
203 "rtc_nanotime_load",
204 "rtc_nanotime_read",
205 "strlcpy",
206 "sync_iss_to_iks_unconditionally",
207 "timer_grab"
208 };
209 #define PROBE_CTX_CLOSURE_COUNT (sizeof(probe_ctx_closure)/sizeof(probe_ctx_closure[0]))
210
211
212 static int _cmp(const void *a, const void *b)
213 {
214 return strcmp((const char *)a, *(const char **)b);
215 }
216
217 static const void * bsearch(
218 register const void *key,
219 const void *base0,
220 size_t nmemb,
221 register size_t size,
222 register int (*compar)(const void *, const void *)) {
223
224 register const char *base = base0;
225 register size_t lim;
226 register int cmp;
227 register const void *p;
228
229 for (lim = nmemb; lim != 0; lim >>= 1) {
230 p = base + (lim >> 1) * size;
231 cmp = (*compar)(key, p);
232 if (cmp == 0)
233 return p;
234 if (cmp > 0) { /* key > p: move right */
235 base = (const char *)p + size;
236 lim--;
237 } /* else move left */
238 }
239 return (NULL);
240 }
241
242 int
243 fbt_invop(uintptr_t addr, uintptr_t *stack, uintptr_t rval)
244 {
245 uintptr_t stack0 = 0, stack1 = 0, stack2 = 0, stack3 = 0, stack4 = 0;
246 fbt_probe_t *fbt = fbt_probetab[FBT_ADDR2NDX(addr)];
247
248 for (; fbt != NULL; fbt = fbt->fbtp_hashnext) {
249 if ((uintptr_t)fbt->fbtp_patchpoint == addr) {
250
251 if (fbt->fbtp_roffset == 0) {
252 uintptr_t *stacktop;
253 if (CPU_ON_INTR(CPU))
254 stacktop = (uintptr_t *)dtrace_get_cpu_int_stack_top();
255 else
256 stacktop = (uintptr_t *)(dtrace_get_kernel_stack(current_thread()) + KERNEL_STACK_SIZE);
257
258 stack += 1; /* skip over the target's pushl'd %ebp */
259
260 if (stack <= stacktop)
261 CPU->cpu_dtrace_caller = *stack++;
262 if (stack <= stacktop)
263 stack0 = *stack++;
264 if (stack <= stacktop)
265 stack1 = *stack++;
266 if (stack <= stacktop)
267 stack2 = *stack++;
268 if (stack <= stacktop)
269 stack3 = *stack++;
270 if (stack <= stacktop)
271 stack4 = *stack++;
272
273 dtrace_probe(fbt->fbtp_id, stack0, stack1, stack2, stack3, stack4);
274 CPU->cpu_dtrace_caller = 0;
275 } else {
276 dtrace_probe(fbt->fbtp_id, fbt->fbtp_roffset, rval, 0, 0, 0);
277 CPU->cpu_dtrace_caller = 0;
278 }
279
280 return (fbt->fbtp_rval);
281 }
282 }
283
284 return (0);
285 }
286
287 #define IS_USER_TRAP(regs) (regs && (((regs)->cs & 3) != 0))
288 #define T_INVALID_OPCODE 6
289 #define FBT_EXCEPTION_CODE T_INVALID_OPCODE
290
291 kern_return_t
292 fbt_perfCallback(
293 int trapno,
294 x86_saved_state_t *tagged_regs,
295 __unused int unused1,
296 __unused int unused2)
297 {
298 kern_return_t retval = KERN_FAILURE;
299 x86_saved_state32_t *saved_state = saved_state32(tagged_regs);
300 struct x86_saved_state32_from_kernel *regs = (struct x86_saved_state32_from_kernel *)saved_state;
301
302 if (FBT_EXCEPTION_CODE == trapno && !IS_USER_TRAP(saved_state)) {
303 boolean_t oldlevel, cpu_64bit;
304 uint32_t esp_probe, *ebp, edi, fp, *pDst, delta = 0;
305 int emul;
306
307 cpu_64bit = ml_is64bit();
308 oldlevel = ml_set_interrupts_enabled(FALSE);
309
310 /* Calculate where the stack pointer was when the probe instruction "fired." */
311 if (cpu_64bit) {
312 esp_probe = saved_state->uesp; /* Easy, x86_64 establishes this value in idt64.s */
313 } else {
314 esp_probe = (uint32_t)&(regs[1]); /* Nasty, infer the location above the save area */
315 }
316
317 emul = dtrace_invop( saved_state->eip, (uintptr_t *)esp_probe, saved_state->eax );
318 __asm__ volatile(".globl _dtrace_invop_callsite");
319 __asm__ volatile("_dtrace_invop_callsite:");
320
321 switch (emul) {
322 case DTRACE_INVOP_NOP:
323 saved_state->eip += DTRACE_INVOP_NOP_SKIP; /* Skip over the patched NOP */
324 retval = KERN_SUCCESS;
325 break;
326
327 case DTRACE_INVOP_MOVL_ESP_EBP:
328 saved_state->ebp = esp_probe; /* Emulate patched movl %esp,%ebp */
329 saved_state->eip += DTRACE_INVOP_MOVL_ESP_EBP_SKIP; /* Skip over the bytes of the patched movl %esp,%ebp */
330 retval = KERN_SUCCESS;
331 break;
332
333 case DTRACE_INVOP_POPL_EBP:
334 case DTRACE_INVOP_LEAVE:
335 /*
336 * Emulate first micro-op of patched leave: movl %ebp,%esp
337 * fp points just below the return address slot for target's ret
338 * and at the slot holding the frame pointer saved by the target's prologue.
339 */
340 fp = saved_state->ebp;
341 /* Emulate second micro-op of patched leave: patched popl %ebp
342 * savearea ebp is set for the frame of the caller to target
343 * The *live* %esp will be adjusted below for pop increment(s)
344 */
345 saved_state->ebp = *(uint32_t *)fp;
346 /* Skip over the patched leave */
347 saved_state->eip += DTRACE_INVOP_LEAVE_SKIP;
348 /*
349 * Lift the stack to account for the emulated leave
350 * Account for words local in this frame
351 * (in "case DTRACE_INVOP_POPL_EBP:" this is zero.)
352 */
353 delta = ((uint32_t *)fp) - ((uint32_t *)esp_probe);
354 /* Account for popping off the ebp (just accomplished by the emulation
355 * above...)
356 */
357 delta += 1;
358
359 if (cpu_64bit)
360 saved_state->uesp += (delta << 2);
361
362 /* XXX Fragile in the extreme. Obtain the value of %edi that our caller pushed
363 * (on behalf of its caller -- trap_from_kernel()). Ultimately,
364 * trap_from_kernel's stack pointer is restored from this slot.
365 * This is sensitive to the manner in which the compiler preserves %edi,
366 * and trap_from_kernel()'s internals.
367 */
368 ebp = (uint32_t *)__builtin_frame_address(0);
369 ebp = (uint32_t *)*ebp;
370 edi = *(ebp - 1);
371 /* Shift contents of stack */
372 for (pDst = (uint32_t *)fp;
373 pDst > (((uint32_t *)edi));
374 pDst--)
375 *pDst = pDst[-delta];
376 /* Now adjust the value of %edi in our caller (kernel_trap)'s frame */
377 *(ebp - 1) = edi + (delta << 2);
378
379 retval = KERN_SUCCESS;
380 break;
381
382 default:
383 retval = KERN_FAILURE;
384 break;
385 }
386 ml_set_interrupts_enabled(oldlevel);
387 }
388
389 return retval;
390 }
391
392 /*ARGSUSED*/
393 static void
394 __fbt_provide_module(void *arg, struct modctl *ctl)
395 {
396 #pragma unused(arg)
397 struct mach_header *mh;
398 struct load_command *cmd;
399 struct segment_command *orig_ts = NULL, *orig_le = NULL;
400 struct symtab_command *orig_st = NULL;
401 struct nlist *sym = NULL;
402 char *strings;
403 uintptr_t instrLow, instrHigh;
404 char *modname;
405 unsigned int i, j;
406
407 int gIgnoreFBTBlacklist = 0;
408 PE_parse_boot_arg("IgnoreFBTBlacklist", &gIgnoreFBTBlacklist);
409
410 mh = (struct mach_header *)(ctl->address);
411 modname = ctl->mod_modname;
412
413 if (0 == ctl->address || 0 == ctl->size) /* Has the linker been jettisoned? */
414 return;
415
416 /*
417 * Employees of dtrace and their families are ineligible. Void
418 * where prohibited.
419 */
420
421 if (strcmp(modname, "com.apple.driver.dtrace") == 0)
422 return;
423
424 if (strstr(modname, "CHUD") != NULL)
425 return;
426
427 if (mh->magic != MH_MAGIC)
428 return;
429
430 cmd = (struct load_command *) &mh[1];
431 for (i = 0; i < mh->ncmds; i++) {
432 if (cmd->cmd == LC_SEGMENT) {
433 struct segment_command *orig_sg = (struct segment_command *) cmd;
434
435 if (strcmp(SEG_TEXT, orig_sg->segname) == 0)
436 orig_ts = orig_sg;
437 else if (strcmp(SEG_LINKEDIT, orig_sg->segname) == 0)
438 orig_le = orig_sg;
439 else if (strcmp("", orig_sg->segname) == 0)
440 orig_ts = orig_sg; /* kexts have a single unnamed segment */
441 }
442 else if (cmd->cmd == LC_SYMTAB)
443 orig_st = (struct symtab_command *) cmd;
444
445 cmd = (struct load_command *) ((caddr_t) cmd + cmd->cmdsize);
446 }
447
448 if ((orig_ts == NULL) || (orig_st == NULL) || (orig_le == NULL))
449 return;
450
451 sym = (struct nlist *)orig_le->vmaddr;
452 strings = ((char *)sym) + orig_st->nsyms * sizeof(struct nlist);
453
454 /* Find extent of the TEXT section */
455 instrLow = (uintptr_t)orig_ts->vmaddr;
456 instrHigh = (uintptr_t)(orig_ts->vmaddr + orig_ts->vmsize);
457
458 for (i = 0; i < orig_st->nsyms; i++) {
459 fbt_probe_t *fbt, *retfbt;
460 machine_inst_t *instr, *limit, theInstr, i1, i2;
461 uint8_t n_type = sym[i].n_type & (N_TYPE | N_EXT);
462 char *name = strings + sym[i].n_un.n_strx;
463 int size;
464
465 /* Check that the symbol is a global and that it has a name. */
466 if (((N_SECT | N_EXT) != n_type && (N_ABS | N_EXT) != n_type))
467 continue;
468
469 if (0 == sym[i].n_un.n_strx) /* iff a null, "", name. */
470 continue;
471
472 /* Lop off omnipresent leading underscore. */
473 if (*name == '_')
474 name += 1;
475
476 if (strstr(name, "dtrace_") == name &&
477 strstr(name, "dtrace_safe_") != name) {
478 /*
479 * Anything beginning with "dtrace_" may be called
480 * from probe context unless it explitly indicates
481 * that it won't be called from probe context by
482 * using the prefix "dtrace_safe_".
483 */
484 continue;
485 }
486
487 if (strstr(name, "dsmos_") == name)
488 continue; /* Don't Steal Mac OS X! */
489
490 if (strstr(name, "dtxnu_") == name ||
491 strstr(name, "_dtrace") == name)
492 continue; /* Shims in dtrace.c */
493
494 if (strstr(name, "chud") == name)
495 continue; /* Professional courtesy. */
496
497 if (strstr(name, "hibernate_") == name)
498 continue; /* Let sleeping dogs lie. */
499
500 if (0 == strcmp(name, "ZN9IOService14newTemperatureElPS_") || /* IOService::newTemperature */
501 0 == strcmp(name, "ZN9IOService26temperatureCriticalForZoneEPS_")) /* IOService::temperatureCriticalForZone */
502 continue; /* Per the fire code */
503
504 /*
505 * Place no probes (illegal instructions) in the exception handling path!
506 */
507 if (0 == strcmp(name, "t_invop") ||
508 0 == strcmp(name, "enter_lohandler") ||
509 0 == strcmp(name, "lo_alltraps") ||
510 0 == strcmp(name, "kernel_trap") ||
511 0 == strcmp(name, "i386_astintr"))
512 continue;
513
514 if (0 == strcmp(name, "current_thread") ||
515 0 == strcmp(name, "ast_pending") ||
516 0 == strcmp(name, "fbt_perfCallback") ||
517 0 == strcmp(name, "machine_thread_get_kern_state") ||
518 0 == strcmp(name, "ml_set_interrupts_enabled") ||
519 0 == strcmp(name, "dtrace_invop") ||
520 0 == strcmp(name, "fbt_invop") ||
521 0 == strcmp(name, "sdt_invop") ||
522 0 == strcmp(name, "max_valid_stack_address"))
523 continue;
524
525 /*
526 * Voodoo.
527 */
528 if (strstr(name, "machine_stack_") == name ||
529 strstr(name, "mapping_") == name ||
530 strstr(name, "hpet_") == name ||
531
532 0 == strcmp(name, "rdHPET") ||
533 0 == strcmp(name, "HPETInterrupt") ||
534 0 == strcmp(name, "tmrCvt") ||
535
536 strstr(name, "tsc_") == name ||
537
538 strstr(name, "pmCPU") == name ||
539 0 == strcmp(name, "Cstate_table_set") ||
540 0 == strcmp(name, "pmHPETInterrupt") ||
541 0 == strcmp(name, "pmKextRegister") ||
542 0 == strcmp(name, "pmSafeMode") ||
543 0 == strcmp(name, "pmUnregister") ||
544 strstr(name, "pms") == name ||
545 0 == strcmp(name, "power_management_init") ||
546 strstr(name, "usimple_") == name ||
547
548 strstr(name, "rtc_") == name ||
549 strstr(name, "_rtc_") == name ||
550 strstr(name, "rtclock_") == name ||
551 strstr(name, "clock_") == name ||
552 strstr(name, "absolutetime_to_") == name ||
553 0 == strcmp(name, "setPop") ||
554 0 == strcmp(name, "nanoseconds_to_absolutetime") ||
555 0 == strcmp(name, "nanotime_to_absolutetime") ||
556
557 strstr(name, "etimer_") == name ||
558
559 strstr(name, "commpage_") == name ||
560 strstr(name, "pmap_") == name ||
561 strstr(name, "ml_") == name ||
562 strstr(name, "PE_") == name ||
563 strstr(name, "lapic_") == name ||
564 strstr(name, "acpi_") == name)
565 continue;
566
567 /*
568 * Avoid machine_ routines. PR_5346750.
569 */
570 if (strstr(name, "machine_") == name)
571 continue;
572
573 if (0 == strcmp(name, "handle_pending_TLB_flushes"))
574 continue;
575
576 /*
577 * Place no probes on critical routines. PR_5221096
578 */
579 if (!gIgnoreFBTBlacklist &&
580 bsearch( name, critical_blacklist, CRITICAL_BLACKLIST_COUNT, sizeof(name), _cmp ) != NULL)
581 continue;
582
583 /*
584 * Place no probes that could be hit in probe context.
585 */
586 if (!gIgnoreFBTBlacklist &&
587 bsearch( name, probe_ctx_closure, PROBE_CTX_CLOSURE_COUNT, sizeof(name), _cmp ) != NULL)
588 continue;
589
590 /*
591 * Place no probes that could be hit on the way to the debugger.
592 */
593 if (strstr(name, "kdp_") == name ||
594 strstr(name, "kdb_") == name ||
595 strstr(name, "kdbg_") == name ||
596 strstr(name, "kdebug_") == name ||
597 0 == strcmp(name, "kernel_debug") ||
598 0 == strcmp(name, "Debugger") ||
599 0 == strcmp(name, "Call_DebuggerC") ||
600 0 == strcmp(name, "lock_debugger") ||
601 0 == strcmp(name, "unlock_debugger") ||
602 0 == strcmp(name, "SysChoked"))
603 continue;
604
605 /*
606 * Place no probes that could be hit on the way to a panic.
607 */
608 if (NULL != strstr(name, "panic_") ||
609 0 == strcmp(name, "panic") ||
610 0 == strcmp(name, "handleMck") ||
611 0 == strcmp(name, "unresolved_kernel_trap"))
612 continue;
613
614 if (dtrace_probe_lookup(fbt_id, modname, name, NULL) != 0)
615 continue;
616
617 for (j = 0, instr = (machine_inst_t *)sym[i].n_value, theInstr = 0;
618 (j < 4) && ((uintptr_t)instr >= instrLow) && (instrHigh > (uintptr_t)(instr + 2));
619 j++) {
620 theInstr = instr[0];
621 if (theInstr == FBT_PUSHL_EBP || theInstr == FBT_RET || theInstr == FBT_RET_IMM16)
622 break;
623
624 if ((size = dtrace_instr_size(instr)) <= 0)
625 break;
626
627 instr += size;
628 }
629
630 if (theInstr != FBT_PUSHL_EBP)
631 continue;
632
633 i1 = instr[1];
634 i2 = instr[2];
635
636 limit = (machine_inst_t *)instrHigh;
637
638 if ((i1 == FBT_MOVL_ESP_EBP0_V0 && i2 == FBT_MOVL_ESP_EBP1_V0) ||
639 (i1 == FBT_MOVL_ESP_EBP0_V1 && i2 == FBT_MOVL_ESP_EBP1_V1)) {
640 instr += 1; /* Advance to the movl %esp,%ebp */
641 theInstr = i1;
642 } else {
643 /*
644 * Sometimes, the compiler will schedule an intervening instruction
645 * in the function prologue. Example:
646 *
647 * _mach_vm_read:
648 * 000006d8 pushl %ebp
649 * 000006d9 movl $0x00000004,%edx
650 * 000006de movl %esp,%ebp
651 *
652 * Try the next instruction, to see if it is a movl %esp,%ebp
653 */
654
655 instr += 1; /* Advance past the pushl %ebp */
656 if ((size = dtrace_instr_size(instr)) <= 0)
657 continue;
658
659 instr += size;
660
661 if ((instr + 1) >= limit)
662 continue;
663
664 i1 = instr[0];
665 i2 = instr[1];
666
667 if (!(i1 == FBT_MOVL_ESP_EBP0_V0 && i2 == FBT_MOVL_ESP_EBP1_V0) &&
668 !(i1 == FBT_MOVL_ESP_EBP0_V1 && i2 == FBT_MOVL_ESP_EBP1_V1))
669 continue;
670
671 /* instr already points at the movl %esp,%ebp */
672 theInstr = i1;
673 }
674
675 fbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP);
676 strlcpy( (char *)&(fbt->fbtp_name), name, MAX_FBTP_NAME_CHARS );
677 fbt->fbtp_id = dtrace_probe_create(fbt_id, modname, name, FBT_ENTRY, FBT_AFRAMES_ENTRY, fbt);
678 fbt->fbtp_patchpoint = instr;
679 fbt->fbtp_ctl = ctl;
680 fbt->fbtp_loadcnt = ctl->mod_loadcnt;
681 fbt->fbtp_rval = DTRACE_INVOP_MOVL_ESP_EBP;
682 fbt->fbtp_savedval = theInstr;
683 fbt->fbtp_patchval = FBT_PATCHVAL;
684
685 fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)];
686 fbt->fbtp_symndx = i;
687 fbt_probetab[FBT_ADDR2NDX(instr)] = fbt;
688
689 retfbt = NULL;
690 again:
691 if (instr >= limit)
692 continue;
693
694 /*
695 * If this disassembly fails, then we've likely walked off into
696 * a jump table or some other unsuitable area. Bail out of the
697 * disassembly now.
698 */
699 if ((size = dtrace_instr_size(instr)) <= 0)
700 continue;
701
702 /*
703 * We (desperately) want to avoid erroneously instrumenting a
704 * jump table, especially given that our markers are pretty
705 * short: two bytes on x86, and just one byte on amd64. To
706 * determine if we're looking at a true instruction sequence
707 * or an inline jump table that happens to contain the same
708 * byte sequences, we resort to some heuristic sleeze: we
709 * treat this instruction as being contained within a pointer,
710 * and see if that pointer points to within the body of the
711 * function. If it does, we refuse to instrument it.
712 */
713 for (j = 0; j < sizeof (uintptr_t); j++) {
714 uintptr_t check = (uintptr_t)instr - j;
715 uint8_t *ptr;
716
717 if (check < sym[i].n_value)
718 break;
719
720 if (check + sizeof (uintptr_t) > (uintptr_t)limit)
721 continue;
722
723 ptr = *(uint8_t **)check;
724
725 if (ptr >= (uint8_t *)sym[i].n_value && ptr < limit) {
726 instr += size;
727 goto again;
728 }
729 }
730
731 /*
732 * OK, it's an instruction.
733 */
734 theInstr = instr[0];
735
736 /* Walked onto the start of the next routine? If so, bail out of this function. */
737 if (theInstr == FBT_PUSHL_EBP)
738 continue;
739
740 if (!(size == 1 && (theInstr == FBT_POPL_EBP || theInstr == FBT_LEAVE))) {
741 instr += size;
742 goto again;
743 }
744
745 /*
746 * Found the popl %ebp; or leave.
747 */
748 machine_inst_t *patch_instr = instr;
749
750 /*
751 * Scan forward for a "ret", or "jmp".
752 */
753 instr += size;
754 if (instr >= limit)
755 continue;
756
757 size = dtrace_instr_size(instr);
758 if (size <= 0) /* Failed instruction decode? */
759 continue;
760
761 theInstr = instr[0];
762
763 if (!(size == FBT_RET_LEN && (theInstr == FBT_RET)) &&
764 !(size == FBT_RET_IMM16_LEN && (theInstr == FBT_RET_IMM16)) &&
765 !(size == FBT_JMP_SHORT_REL_LEN && (theInstr == FBT_JMP_SHORT_REL)) &&
766 !(size == FBT_JMP_NEAR_REL_LEN && (theInstr == FBT_JMP_NEAR_REL)) &&
767 !(size == FBT_JMP_FAR_ABS_LEN && (theInstr == FBT_JMP_FAR_ABS)))
768 continue;
769
770 /*
771 * popl %ebp; ret; or leave; ret; or leave; jmp tailCalledFun; -- We have a winner!
772 */
773 fbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP);
774 strlcpy( (char *)&(fbt->fbtp_name), name, MAX_FBTP_NAME_CHARS );
775
776 if (retfbt == NULL) {
777 fbt->fbtp_id = dtrace_probe_create(fbt_id, modname,
778 name, FBT_RETURN, FBT_AFRAMES_RETURN, fbt);
779 } else {
780 retfbt->fbtp_next = fbt;
781 fbt->fbtp_id = retfbt->fbtp_id;
782 }
783
784 retfbt = fbt;
785 fbt->fbtp_patchpoint = patch_instr;
786 fbt->fbtp_ctl = ctl;
787 fbt->fbtp_loadcnt = ctl->mod_loadcnt;
788
789 if (*patch_instr == FBT_POPL_EBP) {
790 fbt->fbtp_rval = DTRACE_INVOP_POPL_EBP;
791 } else {
792 ASSERT(*patch_instr == FBT_LEAVE);
793 fbt->fbtp_rval = DTRACE_INVOP_LEAVE;
794 }
795 fbt->fbtp_roffset =
796 (uintptr_t)(patch_instr - (uint8_t *)sym[i].n_value);
797
798 fbt->fbtp_savedval = *patch_instr;
799 fbt->fbtp_patchval = FBT_PATCHVAL;
800 fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(patch_instr)];
801 fbt->fbtp_symndx = i;
802 fbt_probetab[FBT_ADDR2NDX(patch_instr)] = fbt;
803
804 instr += size;
805 goto again;
806 }
807 }
808
809 extern struct modctl g_fbt_kernctl;
810 #undef kmem_alloc /* from its binding to dt_kmem_alloc glue */
811 #undef kmem_free /* from its binding to dt_kmem_free glue */
812 #include <vm/vm_kern.h>
813
814 /*ARGSUSED*/
815 void
816 fbt_provide_module(void *arg, struct modctl *ctl)
817 {
818 #pragma unused(ctl)
819 __fbt_provide_module(arg, &g_fbt_kernctl);
820
821 kmem_free(kernel_map, (vm_offset_t)g_fbt_kernctl.address, round_page_32(g_fbt_kernctl.size));
822 g_fbt_kernctl.address = 0;
823 g_fbt_kernctl.size = 0;
824 }