]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Copyright (c) 2005-2018 Apple Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * This file contains Original Code and/or Modifications of Original Code | |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
14 | * | |
15 | * Please obtain a copy of the License at | |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. | |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
25 | * | |
26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ | |
27 | */ | |
28 | ||
29 | #include <kern/thread.h> | |
30 | #include <mach/thread_status.h> | |
31 | ||
32 | typedef x86_saved_state_t savearea_t; | |
33 | ||
34 | #include <stdarg.h> | |
35 | #include <string.h> | |
36 | #include <sys/malloc.h> | |
37 | #include <sys/time.h> | |
38 | #include <sys/systm.h> | |
39 | #include <sys/proc.h> | |
40 | #include <sys/proc_internal.h> | |
41 | #include <sys/kauth.h> | |
42 | #include <sys/dtrace.h> | |
43 | #include <sys/dtrace_impl.h> | |
44 | #include <libkern/OSAtomic.h> | |
45 | #include <kern/thread_call.h> | |
46 | #include <kern/task.h> | |
47 | #include <kern/sched_prim.h> | |
48 | #include <miscfs/devfs/devfs.h> | |
49 | #include <mach/vm_param.h> | |
50 | #include <machine/pal_routines.h> | |
51 | #include <i386/mp.h> | |
52 | #include <machine/trap.h> | |
53 | ||
54 | /* | |
55 | * APPLE NOTE: The regmap is used to decode which 64bit uregs[] register | |
56 | * is being accessed when passed the 32bit uregs[] constant (based on | |
57 | * the reg.d translator file). The dtrace_getreg() is smart enough to handle | |
58 | * the register mappings. The register set definitions are the same as | |
59 | * those used by the fasttrap_getreg code. | |
60 | */ | |
61 | #include "fasttrap_regset.h" | |
62 | static const uint8_t regmap[19] = { | |
63 | REG_GS, /* GS */ | |
64 | REG_FS, /* FS */ | |
65 | REG_ES, /* ES */ | |
66 | REG_DS, /* DS */ | |
67 | REG_RDI, /* EDI */ | |
68 | REG_RSI, /* ESI */ | |
69 | REG_RBP, /* EBP, REG_FP */ | |
70 | REG_RSP, /* ESP */ | |
71 | REG_RBX, /* EBX */ | |
72 | REG_RDX, /* EDX, REG_R1 */ | |
73 | REG_RCX, /* ECX */ | |
74 | REG_RAX, /* EAX, REG_R0 */ | |
75 | REG_TRAPNO, /* TRAPNO */ | |
76 | REG_ERR, /* ERR */ | |
77 | REG_RIP, /* EIP, REG_PC */ | |
78 | REG_CS, /* CS */ | |
79 | REG_RFL, /* EFL, REG_PS */ | |
80 | REG_RSP, /* UESP, REG_SP */ | |
81 | REG_SS /* SS */ | |
82 | }; | |
83 | ||
84 | extern dtrace_id_t dtrace_probeid_error; /* special ERROR probe */ | |
85 | ||
86 | void | |
87 | dtrace_probe_error(dtrace_state_t *state, dtrace_epid_t epid, int which, | |
88 | int fltoffs, int fault, uint64_t illval) | |
89 | { | |
90 | /* | |
91 | * For the case of the error probe firing lets | |
92 | * stash away "illval" here, and special-case retrieving it in DIF_VARIABLE_ARG. | |
93 | */ | |
94 | state->dts_arg_error_illval = illval; | |
95 | dtrace_probe( dtrace_probeid_error, (uint64_t)(uintptr_t)state, epid, which, fltoffs, fault ); | |
96 | } | |
97 | ||
98 | /* | |
99 | * Atomicity and synchronization | |
100 | */ | |
101 | void | |
102 | dtrace_membar_producer(void) | |
103 | { | |
104 | __asm__ volatile("sfence"); | |
105 | } | |
106 | ||
107 | void | |
108 | dtrace_membar_consumer(void) | |
109 | { | |
110 | __asm__ volatile("lfence"); | |
111 | } | |
112 | ||
113 | /* | |
114 | * Interrupt manipulation | |
115 | * XXX dtrace_getipl() can be called from probe context. | |
116 | */ | |
117 | int | |
118 | dtrace_getipl(void) | |
119 | { | |
120 | /* | |
121 | * XXX Drat, get_interrupt_level is MACH_KERNEL_PRIVATE | |
122 | * in osfmk/kern/cpu_data.h | |
123 | */ | |
124 | /* return get_interrupt_level(); */ | |
125 | return (ml_at_interrupt_context() ? 1: 0); | |
126 | } | |
127 | ||
128 | /* | |
129 | * MP coordination | |
130 | */ | |
131 | typedef struct xcArg { | |
132 | processorid_t cpu; | |
133 | dtrace_xcall_t f; | |
134 | void *arg; | |
135 | } xcArg_t; | |
136 | ||
137 | static void | |
138 | xcRemote( void *foo ) | |
139 | { | |
140 | xcArg_t *pArg = (xcArg_t *)foo; | |
141 | ||
142 | if ( pArg->cpu == CPU->cpu_id || pArg->cpu == DTRACE_CPUALL ) { | |
143 | (pArg->f)(pArg->arg); | |
144 | } | |
145 | } | |
146 | ||
147 | ||
148 | /* | |
149 | * dtrace_xcall() is not called from probe context. | |
150 | */ | |
151 | void | |
152 | dtrace_xcall(processorid_t cpu, dtrace_xcall_t f, void *arg) | |
153 | { | |
154 | xcArg_t xcArg; | |
155 | ||
156 | xcArg.cpu = cpu; | |
157 | xcArg.f = f; | |
158 | xcArg.arg = arg; | |
159 | ||
160 | if (cpu == DTRACE_CPUALL) { | |
161 | mp_cpus_call (CPUMASK_ALL, ASYNC, xcRemote, (void*)&xcArg); | |
162 | } | |
163 | else { | |
164 | mp_cpus_call (cpu_to_cpumask((cpu_t)cpu), ASYNC, xcRemote, (void*)&xcArg); | |
165 | } | |
166 | } | |
167 | ||
168 | /* | |
169 | * Runtime and ABI | |
170 | */ | |
171 | uint64_t | |
172 | dtrace_getreg(struct regs *savearea, uint_t reg) | |
173 | { | |
174 | boolean_t is64Bit = proc_is64bit(current_proc()); | |
175 | x86_saved_state_t *regs = (x86_saved_state_t *)savearea; | |
176 | ||
177 | if (regs == NULL) { | |
178 | DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); | |
179 | return (0); | |
180 | } | |
181 | ||
182 | if (is64Bit) { | |
183 | if (reg <= SS) { | |
184 | reg = regmap[reg]; | |
185 | } else { | |
186 | reg -= (SS + 1); | |
187 | } | |
188 | ||
189 | switch (reg) { | |
190 | case REG_RDI: | |
191 | return (uint64_t)(regs->ss_64.rdi); | |
192 | case REG_RSI: | |
193 | return (uint64_t)(regs->ss_64.rsi); | |
194 | case REG_RDX: | |
195 | return (uint64_t)(regs->ss_64.rdx); | |
196 | case REG_RCX: | |
197 | return (uint64_t)(regs->ss_64.rcx); | |
198 | case REG_R8: | |
199 | return (uint64_t)(regs->ss_64.r8); | |
200 | case REG_R9: | |
201 | return (uint64_t)(regs->ss_64.r9); | |
202 | case REG_RAX: | |
203 | return (uint64_t)(regs->ss_64.rax); | |
204 | case REG_RBX: | |
205 | return (uint64_t)(regs->ss_64.rbx); | |
206 | case REG_RBP: | |
207 | return (uint64_t)(regs->ss_64.rbp); | |
208 | case REG_R10: | |
209 | return (uint64_t)(regs->ss_64.r10); | |
210 | case REG_R11: | |
211 | return (uint64_t)(regs->ss_64.r11); | |
212 | case REG_R12: | |
213 | return (uint64_t)(regs->ss_64.r12); | |
214 | case REG_R13: | |
215 | return (uint64_t)(regs->ss_64.r13); | |
216 | case REG_R14: | |
217 | return (uint64_t)(regs->ss_64.r14); | |
218 | case REG_R15: | |
219 | return (uint64_t)(regs->ss_64.r15); | |
220 | case REG_FS: | |
221 | return (uint64_t)(regs->ss_64.fs); | |
222 | case REG_GS: | |
223 | return (uint64_t)(regs->ss_64.gs); | |
224 | case REG_TRAPNO: | |
225 | return (uint64_t)(regs->ss_64.isf.trapno); | |
226 | case REG_ERR: | |
227 | return (uint64_t)(regs->ss_64.isf.err); | |
228 | case REG_RIP: | |
229 | return (uint64_t)(regs->ss_64.isf.rip); | |
230 | case REG_CS: | |
231 | return (uint64_t)(regs->ss_64.isf.cs); | |
232 | case REG_SS: | |
233 | return (uint64_t)(regs->ss_64.isf.ss); | |
234 | case REG_RFL: | |
235 | return (uint64_t)(regs->ss_64.isf.rflags); | |
236 | case REG_RSP: | |
237 | return (uint64_t)(regs->ss_64.isf.rsp); | |
238 | case REG_DS: | |
239 | case REG_ES: | |
240 | default: | |
241 | DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); | |
242 | return (0); | |
243 | } | |
244 | ||
245 | } else { /* is 32bit user */ | |
246 | /* beyond register SS */ | |
247 | if (reg > x86_SAVED_STATE32_COUNT - 1) { | |
248 | DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); | |
249 | return (0); | |
250 | } | |
251 | return (uint64_t)((unsigned int *)(&(regs->ss_32.gs)))[reg]; | |
252 | } | |
253 | } | |
254 | ||
255 | uint64_t | |
256 | dtrace_getvmreg(uint_t ndx) | |
257 | { | |
258 | uint64_t reg = 0; | |
259 | bool failed = false; | |
260 | ||
261 | /* Any change in the vmread final opcode must be reflected in dtrace_handle_trap below. */ | |
262 | __asm__ __volatile__( | |
263 | "vmread %2, %0\n" | |
264 | "ja 1f\n" | |
265 | "mov $1, %1\n" | |
266 | "1:\n" | |
267 | : "=a" (reg), "+r" (failed) : "D" ((uint64_t)ndx)); | |
268 | ||
269 | /* | |
270 | * Check for fault in vmreg first. If DTrace has recovered the fault cause by | |
271 | * vmread above then the value in failed will be unreliable. | |
272 | */ | |
273 | if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ILLOP)) { | |
274 | return 0; | |
275 | } | |
276 | ||
277 | /* If vmread succeeded but failed because CF or ZS is 1 report fail. */ | |
278 | if (failed) { | |
279 | DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); | |
280 | cpu_core[CPU->cpu_id].cpuc_dtrace_illval = ndx; | |
281 | return 0; | |
282 | } | |
283 | ||
284 | return reg; | |
285 | } | |
286 | ||
287 | #define RETURN_OFFSET 4 | |
288 | #define RETURN_OFFSET64 8 | |
289 | ||
290 | static int | |
291 | dtrace_getustack_common(uint64_t *pcstack, int pcstack_limit, user_addr_t pc, | |
292 | user_addr_t sp) | |
293 | { | |
294 | volatile uint16_t *flags = | |
295 | (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags; | |
296 | ||
297 | #if 0 | |
298 | uintptr_t oldcontext = lwp->lwp_oldcontext; /* XXX signal stack crawl */ | |
299 | size_t s1, s2; | |
300 | #endif | |
301 | int ret = 0; | |
302 | boolean_t is64Bit = proc_is64bit(current_proc()); | |
303 | ||
304 | ASSERT(pcstack == NULL || pcstack_limit > 0); | |
305 | ||
306 | #if 0 /* XXX signal stack crawl */ | |
307 | if (p->p_model == DATAMODEL_NATIVE) { | |
308 | s1 = sizeof (struct frame) + 2 * sizeof (long); | |
309 | s2 = s1 + sizeof (siginfo_t); | |
310 | } else { | |
311 | s1 = sizeof (struct frame32) + 3 * sizeof (int); | |
312 | s2 = s1 + sizeof (siginfo32_t); | |
313 | } | |
314 | #endif | |
315 | ||
316 | while (pc != 0) { | |
317 | ret++; | |
318 | if (pcstack != NULL) { | |
319 | *pcstack++ = (uint64_t)pc; | |
320 | pcstack_limit--; | |
321 | if (pcstack_limit <= 0) | |
322 | break; | |
323 | } | |
324 | ||
325 | if (sp == 0) | |
326 | break; | |
327 | ||
328 | #if 0 /* XXX signal stack crawl */ | |
329 | if (oldcontext == sp + s1 || oldcontext == sp + s2) { | |
330 | if (p->p_model == DATAMODEL_NATIVE) { | |
331 | ucontext_t *ucp = (ucontext_t *)oldcontext; | |
332 | greg_t *gregs = ucp->uc_mcontext.gregs; | |
333 | ||
334 | sp = dtrace_fulword(&gregs[REG_FP]); | |
335 | pc = dtrace_fulword(&gregs[REG_PC]); | |
336 | ||
337 | oldcontext = dtrace_fulword(&ucp->uc_link); | |
338 | } else { | |
339 | ucontext32_t *ucp = (ucontext32_t *)oldcontext; | |
340 | greg32_t *gregs = ucp->uc_mcontext.gregs; | |
341 | ||
342 | sp = dtrace_fuword32(&gregs[EBP]); | |
343 | pc = dtrace_fuword32(&gregs[EIP]); | |
344 | ||
345 | oldcontext = dtrace_fuword32(&ucp->uc_link); | |
346 | } | |
347 | } | |
348 | else | |
349 | #endif | |
350 | { | |
351 | if (is64Bit) { | |
352 | pc = dtrace_fuword64((sp + RETURN_OFFSET64)); | |
353 | sp = dtrace_fuword64(sp); | |
354 | } else { | |
355 | pc = dtrace_fuword32((sp + RETURN_OFFSET)); | |
356 | sp = dtrace_fuword32(sp); | |
357 | } | |
358 | } | |
359 | ||
360 | /* Truncate ustack if the iterator causes fault. */ | |
361 | if (*flags & CPU_DTRACE_FAULT) { | |
362 | *flags &= ~CPU_DTRACE_FAULT; | |
363 | break; | |
364 | } | |
365 | } | |
366 | ||
367 | return (ret); | |
368 | } | |
369 | ||
370 | ||
371 | /* | |
372 | * The return value indicates if we've modified the stack. | |
373 | */ | |
374 | static int | |
375 | dtrace_adjust_stack(uint64_t **pcstack, int *pcstack_limit, user_addr_t *pc, | |
376 | user_addr_t sp) | |
377 | { | |
378 | volatile uint16_t *flags = (volatile uint16_t *) &cpu_core[CPU->cpu_id].cpuc_dtrace_flags; | |
379 | int64_t missing_tos; | |
380 | int rc = 0; | |
381 | boolean_t is64Bit = proc_is64bit(current_proc()); | |
382 | ||
383 | ASSERT(pc != NULL); | |
384 | ||
385 | if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) { | |
386 | /* | |
387 | * If we found ourselves in an entry probe, the frame pointer has not | |
388 | * yet been pushed (that happens in the | |
389 | * function prologue). The best approach is to | |
390 | * add the current pc as a missing top of stack, | |
391 | * and back the pc up to the caller, which is stored at the | |
392 | * current stack pointer address since the call | |
393 | * instruction puts it there right before | |
394 | * the branch. | |
395 | */ | |
396 | ||
397 | missing_tos = *pc; | |
398 | ||
399 | if (is64Bit) | |
400 | *pc = dtrace_fuword64(sp); | |
401 | else | |
402 | *pc = dtrace_fuword32(sp); | |
403 | ||
404 | /* Truncate ustack if the iterator causes fault. */ | |
405 | if (*flags & CPU_DTRACE_FAULT) { | |
406 | *flags &= ~CPU_DTRACE_FAULT; | |
407 | } | |
408 | } else { | |
409 | /* | |
410 | * We might have a top of stack override, in which case we just | |
411 | * add that frame without question to the top. This | |
412 | * happens in return probes where you have a valid | |
413 | * frame pointer, but it's for the callers frame | |
414 | * and you'd like to add the pc of the return site | |
415 | * to the frame. | |
416 | */ | |
417 | missing_tos = cpu_core[CPU->cpu_id].cpuc_missing_tos; | |
418 | } | |
419 | ||
420 | if (missing_tos != 0) { | |
421 | if (pcstack != NULL && pcstack_limit != NULL) { | |
422 | /* | |
423 | * If the missing top of stack has been filled out, then | |
424 | * we add it and adjust the size. | |
425 | */ | |
426 | *(*pcstack)++ = missing_tos; | |
427 | (*pcstack_limit)--; | |
428 | } | |
429 | /* | |
430 | * return 1 because we would have changed the | |
431 | * stack whether or not it was passed in. This | |
432 | * ensures the stack count is correct | |
433 | */ | |
434 | rc = 1; | |
435 | } | |
436 | return rc; | |
437 | } | |
438 | ||
439 | void | |
440 | dtrace_getupcstack(uint64_t *pcstack, int pcstack_limit) | |
441 | { | |
442 | thread_t thread = current_thread(); | |
443 | x86_saved_state_t *regs; | |
444 | user_addr_t pc, sp, fp; | |
445 | volatile uint16_t *flags = | |
446 | (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags; | |
447 | int n; | |
448 | boolean_t is64Bit = proc_is64bit(current_proc()); | |
449 | ||
450 | if (*flags & CPU_DTRACE_FAULT) | |
451 | return; | |
452 | ||
453 | if (pcstack_limit <= 0) | |
454 | return; | |
455 | ||
456 | /* | |
457 | * If there's no user context we still need to zero the stack. | |
458 | */ | |
459 | if (thread == NULL) | |
460 | goto zero; | |
461 | ||
462 | pal_register_cache_state(thread, VALID); | |
463 | regs = (x86_saved_state_t *)find_user_regs(thread); | |
464 | if (regs == NULL) | |
465 | goto zero; | |
466 | ||
467 | *pcstack++ = (uint64_t)dtrace_proc_selfpid(); | |
468 | pcstack_limit--; | |
469 | ||
470 | if (pcstack_limit <= 0) | |
471 | return; | |
472 | ||
473 | if (is64Bit) { | |
474 | pc = regs->ss_64.isf.rip; | |
475 | sp = regs->ss_64.isf.rsp; | |
476 | fp = regs->ss_64.rbp; | |
477 | } else { | |
478 | pc = regs->ss_32.eip; | |
479 | sp = regs->ss_32.uesp; | |
480 | fp = regs->ss_32.ebp; | |
481 | } | |
482 | ||
483 | /* | |
484 | * The return value indicates if we've modified the stack. | |
485 | * Since there is nothing else to fix up in either case, | |
486 | * we can safely ignore it here. | |
487 | */ | |
488 | (void)dtrace_adjust_stack(&pcstack, &pcstack_limit, &pc, sp); | |
489 | ||
490 | if(pcstack_limit <= 0) | |
491 | return; | |
492 | ||
493 | /* | |
494 | * Note that unlike ppc, the x86 code does not use | |
495 | * CPU_DTRACE_USTACK_FP. This is because x86 always | |
496 | * traces from the fp, even in syscall/profile/fbt | |
497 | * providers. | |
498 | */ | |
499 | n = dtrace_getustack_common(pcstack, pcstack_limit, pc, fp); | |
500 | ASSERT(n >= 0); | |
501 | ASSERT(n <= pcstack_limit); | |
502 | ||
503 | pcstack += n; | |
504 | pcstack_limit -= n; | |
505 | ||
506 | zero: | |
507 | while (pcstack_limit-- > 0) | |
508 | *pcstack++ = 0; | |
509 | } | |
510 | ||
511 | int | |
512 | dtrace_getustackdepth(void) | |
513 | { | |
514 | thread_t thread = current_thread(); | |
515 | x86_saved_state_t *regs; | |
516 | user_addr_t pc, sp, fp; | |
517 | int n = 0; | |
518 | boolean_t is64Bit = proc_is64bit(current_proc()); | |
519 | ||
520 | if (thread == NULL) | |
521 | return 0; | |
522 | ||
523 | if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT)) | |
524 | return (-1); | |
525 | ||
526 | pal_register_cache_state(thread, VALID); | |
527 | regs = (x86_saved_state_t *)find_user_regs(thread); | |
528 | if (regs == NULL) | |
529 | return 0; | |
530 | ||
531 | if (is64Bit) { | |
532 | pc = regs->ss_64.isf.rip; | |
533 | sp = regs->ss_64.isf.rsp; | |
534 | fp = regs->ss_64.rbp; | |
535 | } else { | |
536 | pc = regs->ss_32.eip; | |
537 | sp = regs->ss_32.uesp; | |
538 | fp = regs->ss_32.ebp; | |
539 | } | |
540 | ||
541 | if (dtrace_adjust_stack(NULL, NULL, &pc, sp) == 1) { | |
542 | /* | |
543 | * we would have adjusted the stack if we had | |
544 | * supplied one (that is what rc == 1 means). | |
545 | * Also, as a side effect, the pc might have | |
546 | * been fixed up, which is good for calling | |
547 | * in to dtrace_getustack_common. | |
548 | */ | |
549 | n++; | |
550 | } | |
551 | ||
552 | /* | |
553 | * Note that unlike ppc, the x86 code does not use | |
554 | * CPU_DTRACE_USTACK_FP. This is because x86 always | |
555 | * traces from the fp, even in syscall/profile/fbt | |
556 | * providers. | |
557 | */ | |
558 | ||
559 | n += dtrace_getustack_common(NULL, 0, pc, fp); | |
560 | ||
561 | return (n); | |
562 | } | |
563 | ||
564 | void | |
565 | dtrace_getufpstack(uint64_t *pcstack, uint64_t *fpstack, int pcstack_limit) | |
566 | { | |
567 | thread_t thread = current_thread(); | |
568 | savearea_t *regs; | |
569 | user_addr_t pc, sp; | |
570 | volatile uint16_t *flags = | |
571 | (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags; | |
572 | #if 0 | |
573 | uintptr_t oldcontext; | |
574 | size_t s1, s2; | |
575 | #endif | |
576 | boolean_t is64Bit = proc_is64bit(current_proc()); | |
577 | ||
578 | if (*flags & CPU_DTRACE_FAULT) | |
579 | return; | |
580 | ||
581 | if (pcstack_limit <= 0) | |
582 | return; | |
583 | ||
584 | /* | |
585 | * If there's no user context we still need to zero the stack. | |
586 | */ | |
587 | if (thread == NULL) | |
588 | goto zero; | |
589 | ||
590 | regs = (savearea_t *)find_user_regs(thread); | |
591 | if (regs == NULL) | |
592 | goto zero; | |
593 | ||
594 | *pcstack++ = (uint64_t)dtrace_proc_selfpid(); | |
595 | pcstack_limit--; | |
596 | ||
597 | if (pcstack_limit <= 0) | |
598 | return; | |
599 | ||
600 | pc = regs->ss_32.eip; | |
601 | sp = regs->ss_32.ebp; | |
602 | ||
603 | #if 0 /* XXX signal stack crawl */ | |
604 | oldcontext = lwp->lwp_oldcontext; | |
605 | ||
606 | if (p->p_model == DATAMODEL_NATIVE) { | |
607 | s1 = sizeof (struct frame) + 2 * sizeof (long); | |
608 | s2 = s1 + sizeof (siginfo_t); | |
609 | } else { | |
610 | s1 = sizeof (struct frame32) + 3 * sizeof (int); | |
611 | s2 = s1 + sizeof (siginfo32_t); | |
612 | } | |
613 | #endif | |
614 | ||
615 | if(dtrace_adjust_stack(&pcstack, &pcstack_limit, &pc, sp) == 1) { | |
616 | /* | |
617 | * we made a change. | |
618 | */ | |
619 | *fpstack++ = 0; | |
620 | if (pcstack_limit <= 0) | |
621 | return; | |
622 | } | |
623 | ||
624 | while (pc != 0) { | |
625 | *pcstack++ = (uint64_t)pc; | |
626 | *fpstack++ = sp; | |
627 | pcstack_limit--; | |
628 | if (pcstack_limit <= 0) | |
629 | break; | |
630 | ||
631 | if (sp == 0) | |
632 | break; | |
633 | ||
634 | #if 0 /* XXX signal stack crawl */ | |
635 | if (oldcontext == sp + s1 || oldcontext == sp + s2) { | |
636 | if (p->p_model == DATAMODEL_NATIVE) { | |
637 | ucontext_t *ucp = (ucontext_t *)oldcontext; | |
638 | greg_t *gregs = ucp->uc_mcontext.gregs; | |
639 | ||
640 | sp = dtrace_fulword(&gregs[REG_FP]); | |
641 | pc = dtrace_fulword(&gregs[REG_PC]); | |
642 | ||
643 | oldcontext = dtrace_fulword(&ucp->uc_link); | |
644 | } else { | |
645 | ucontext_t *ucp = (ucontext_t *)oldcontext; | |
646 | greg_t *gregs = ucp->uc_mcontext.gregs; | |
647 | ||
648 | sp = dtrace_fuword32(&gregs[EBP]); | |
649 | pc = dtrace_fuword32(&gregs[EIP]); | |
650 | ||
651 | oldcontext = dtrace_fuword32(&ucp->uc_link); | |
652 | } | |
653 | } | |
654 | else | |
655 | #endif | |
656 | { | |
657 | if (is64Bit) { | |
658 | pc = dtrace_fuword64((sp + RETURN_OFFSET64)); | |
659 | sp = dtrace_fuword64(sp); | |
660 | } else { | |
661 | pc = dtrace_fuword32((sp + RETURN_OFFSET)); | |
662 | sp = dtrace_fuword32(sp); | |
663 | } | |
664 | } | |
665 | ||
666 | /* Truncate ustack if the iterator causes fault. */ | |
667 | if (*flags & CPU_DTRACE_FAULT) { | |
668 | *flags &= ~CPU_DTRACE_FAULT; | |
669 | break; | |
670 | } | |
671 | } | |
672 | ||
673 | zero: | |
674 | while (pcstack_limit-- > 0) | |
675 | *pcstack++ = 0; | |
676 | } | |
677 | ||
678 | void | |
679 | dtrace_getpcstack(pc_t *pcstack, int pcstack_limit, int aframes, | |
680 | uint32_t *intrpc) | |
681 | { | |
682 | struct frame *fp = (struct frame *)__builtin_frame_address(0); | |
683 | struct frame *nextfp, *minfp, *stacktop; | |
684 | int depth = 0; | |
685 | int last = 0; | |
686 | uintptr_t pc; | |
687 | uintptr_t caller = CPU->cpu_dtrace_caller; | |
688 | int on_intr; | |
689 | ||
690 | if ((on_intr = CPU_ON_INTR(CPU)) != 0) | |
691 | stacktop = (struct frame *)dtrace_get_cpu_int_stack_top(); | |
692 | else | |
693 | stacktop = (struct frame *)(dtrace_get_kernel_stack(current_thread()) + kernel_stack_size); | |
694 | ||
695 | minfp = fp; | |
696 | ||
697 | aframes++; | |
698 | ||
699 | if (intrpc != NULL && depth < pcstack_limit) | |
700 | pcstack[depth++] = (pc_t)intrpc; | |
701 | ||
702 | while (depth < pcstack_limit) { | |
703 | nextfp = *(struct frame **)fp; | |
704 | pc = *(uintptr_t *)(((uintptr_t)fp) + RETURN_OFFSET64); | |
705 | ||
706 | if (nextfp <= minfp || nextfp >= stacktop) { | |
707 | if (on_intr) { | |
708 | /* | |
709 | * Hop from interrupt stack to thread stack. | |
710 | */ | |
711 | vm_offset_t kstack_base = dtrace_get_kernel_stack(current_thread()); | |
712 | ||
713 | minfp = (struct frame *)kstack_base; | |
714 | stacktop = (struct frame *)(kstack_base + kernel_stack_size); | |
715 | ||
716 | on_intr = 0; | |
717 | continue; | |
718 | } | |
719 | /* | |
720 | * This is the last frame we can process; indicate | |
721 | * that we should return after processing this frame. | |
722 | */ | |
723 | last = 1; | |
724 | } | |
725 | ||
726 | if (aframes > 0) { | |
727 | if (--aframes == 0 && caller != 0) { | |
728 | /* | |
729 | * We've just run out of artificial frames, | |
730 | * and we have a valid caller -- fill it in | |
731 | * now. | |
732 | */ | |
733 | ASSERT(depth < pcstack_limit); | |
734 | pcstack[depth++] = (pc_t)caller; | |
735 | caller = 0; | |
736 | } | |
737 | } else { | |
738 | if (depth < pcstack_limit) | |
739 | pcstack[depth++] = (pc_t)pc; | |
740 | } | |
741 | ||
742 | if (last) { | |
743 | while (depth < pcstack_limit) | |
744 | pcstack[depth++] = 0; | |
745 | return; | |
746 | } | |
747 | ||
748 | fp = nextfp; | |
749 | minfp = fp; | |
750 | } | |
751 | } | |
752 | ||
753 | struct frame { | |
754 | struct frame *backchain; | |
755 | uintptr_t retaddr; | |
756 | }; | |
757 | ||
758 | uint64_t | |
759 | dtrace_getarg(int arg, int aframes, dtrace_mstate_t *mstate, dtrace_vstate_t *vstate) | |
760 | { | |
761 | uint64_t val = 0; | |
762 | struct frame *fp = (struct frame *)__builtin_frame_address(0); | |
763 | uintptr_t *stack; | |
764 | uintptr_t pc; | |
765 | int i; | |
766 | ||
767 | ||
768 | /* | |
769 | * A total of 6 arguments are passed via registers; any argument with | |
770 | * index of 5 or lower is therefore in a register. | |
771 | */ | |
772 | int inreg = 5; | |
773 | ||
774 | for (i = 1; i <= aframes; i++) { | |
775 | fp = fp->backchain; | |
776 | pc = fp->retaddr; | |
777 | ||
778 | if (dtrace_invop_callsite_pre != NULL | |
779 | && pc > (uintptr_t)dtrace_invop_callsite_pre | |
780 | && pc <= (uintptr_t)dtrace_invop_callsite_post) { | |
781 | /* | |
782 | * In the case of x86_64, we will use the pointer to the | |
783 | * save area structure that was pushed when we took the | |
784 | * trap. To get this structure, we must increment | |
785 | * beyond the frame structure. If the | |
786 | * argument that we're seeking is passed on the stack, | |
787 | * we'll pull the true stack pointer out of the saved | |
788 | * registers and decrement our argument by the number | |
789 | * of arguments passed in registers; if the argument | |
790 | * we're seeking is passed in regsiters, we can just | |
791 | * load it directly. | |
792 | */ | |
793 | ||
794 | /* fp points to frame of dtrace_invop() activation. */ | |
795 | fp = fp->backchain; /* to fbt_perfcallback() activation. */ | |
796 | fp = fp->backchain; /* to kernel_trap() activation. */ | |
797 | fp = fp->backchain; /* to trap_from_kernel() activation. */ | |
798 | ||
799 | x86_saved_state_t *tagged_regs = (x86_saved_state_t *)&fp[1]; | |
800 | x86_saved_state64_t *saved_state = saved_state64(tagged_regs); | |
801 | ||
802 | if (arg <= inreg) { | |
803 | stack = (uintptr_t *)(void*)&saved_state->rdi; | |
804 | } else { | |
805 | fp = (struct frame *)(saved_state->isf.rsp); | |
806 | stack = (uintptr_t *)&fp[1]; /* Find marshalled | |
807 | arguments */ | |
808 | arg -= inreg + 1; | |
809 | } | |
810 | goto load; | |
811 | } | |
812 | } | |
813 | ||
814 | /* | |
815 | * We know that we did not come through a trap to get into | |
816 | * dtrace_probe() -- We arrive here when the provider has | |
817 | * called dtrace_probe() directly. | |
818 | * The probe ID is the first argument to dtrace_probe(). | |
819 | * We must advance beyond that to get the argX. | |
820 | */ | |
821 | arg++; /* Advance past probeID */ | |
822 | ||
823 | if (arg <= inreg) { | |
824 | /* | |
825 | * This shouldn't happen. If the argument is passed in a | |
826 | * register then it should have been, well, passed in a | |
827 | * register... | |
828 | */ | |
829 | DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); | |
830 | return (0); | |
831 | } | |
832 | ||
833 | arg -= (inreg + 1); | |
834 | stack = (uintptr_t *)&fp[1]; /* Find marshalled arguments */ | |
835 | ||
836 | load: | |
837 | if (dtrace_canload((uint64_t)(stack + arg), sizeof(uint64_t), | |
838 | mstate, vstate)) { | |
839 | /* dtrace_probe arguments arg0 ... arg4 are 64bits wide */ | |
840 | val = dtrace_load64((uint64_t)(stack + arg)); | |
841 | } | |
842 | ||
843 | return (val); | |
844 | } | |
845 | ||
846 | /* | |
847 | * Load/Store Safety | |
848 | */ | |
849 | void | |
850 | dtrace_toxic_ranges(void (*func)(uintptr_t base, uintptr_t limit)) | |
851 | { | |
852 | /* | |
853 | * "base" is the smallest toxic address in the range, "limit" is the first | |
854 | * VALID address greater than "base". | |
855 | */ | |
856 | func(0x0, VM_MIN_KERNEL_AND_KEXT_ADDRESS); | |
857 | if (VM_MAX_KERNEL_ADDRESS < ~(uintptr_t)0) | |
858 | func(VM_MAX_KERNEL_ADDRESS + 1, ~(uintptr_t)0); | |
859 | } | |
860 | ||
861 | /* | |
862 | * Trap Safety | |
863 | */ | |
864 | extern boolean_t dtrace_handle_trap(int, x86_saved_state_t *); | |
865 | ||
866 | boolean_t | |
867 | dtrace_handle_trap(int trapno, x86_saved_state_t *state) | |
868 | { | |
869 | x86_saved_state64_t *saved_state = saved_state64(state); | |
870 | ||
871 | if (!DTRACE_CPUFLAG_ISSET(CPU_DTRACE_NOFAULT)) { | |
872 | return FALSE; | |
873 | } | |
874 | ||
875 | /* | |
876 | * General purpose solution would require pulling in disassembler. Right now there | |
877 | * is only one specific case to be handled so it is hardcoded here. | |
878 | */ | |
879 | if (trapno == T_INVALID_OPCODE) { | |
880 | uint8_t *inst = (uint8_t *)saved_state->isf.rip; | |
881 | ||
882 | /* vmread %rdi, %rax */ | |
883 | if (inst[0] == 0x0f && inst[1] == 0x78 && inst[2] == 0xf8) { | |
884 | DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); | |
885 | saved_state->isf.rip += 3; | |
886 | return TRUE; | |
887 | } | |
888 | } | |
889 | ||
890 | return FALSE; | |
891 | } |