]> git.saurik.com Git - apple/xnu.git/blob - bsd/dev/dtrace/systrace.c
c8a6305b1f8c64d12c94800da151698ef49a299e
[apple/xnu.git] / bsd / dev / dtrace / systrace.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /* #pragma ident "@(#)systrace.c 1.6 06/09/19 SMI" */
27
28 #ifdef KERNEL
29 #ifndef _KERNEL
30 #define _KERNEL /* Solaris vs. Darwin */
31 #endif
32 #endif
33
34 #include <kern/thread.h>
35 #include <mach/thread_status.h>
36
37 /* XXX All of these should really be derived from syscall_sw.h */
38 #if defined (__x86_64__)
39 #define SYSCALL_CLASS_SHIFT 24
40 #define SYSCALL_CLASS_MASK (0xFF << SYSCALL_CLASS_SHIFT)
41 #define SYSCALL_NUMBER_MASK (~SYSCALL_CLASS_MASK)
42 #define I386_SYSCALL_NUMBER_MASK (0xFFFF)
43 #endif
44
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/proc.h>
48 #include <sys/errno.h>
49 #include <sys/ioctl.h>
50 #include <sys/conf.h>
51 #include <sys/fcntl.h>
52 #include <miscfs/devfs/devfs.h>
53
54 #include <sys/dtrace.h>
55 #include <sys/dtrace_impl.h>
56 #include "systrace.h"
57 #include <sys/stat.h>
58 #include <sys/systm.h>
59 #include <sys/conf.h>
60 #include <sys/user.h>
61
62 #include <machine/pal_routines.h>
63
64 #if defined (__x86_64__)
65 #define SYSTRACE_ARTIFICIAL_FRAMES 2
66 #define MACHTRACE_ARTIFICIAL_FRAMES 3
67 #else
68 #error Unknown Architecture
69 #endif
70
71 #include <sys/sysent.h>
72 #define sy_callc sy_call /* Map Solaris slot name to Darwin's */
73 #define NSYSCALL nsysent /* and is less than 500 or so */
74
75 extern const char *syscallnames[];
76
77 #include <sys/dtrace_glue.h>
78 #define casptr dtrace_casptr
79 #define membar_enter dtrace_membar_producer
80
81 #define LOADABLE_SYSCALL(a) 0 /* Not pertinent to Darwin. */
82 #define LOADED_SYSCALL(a) 1 /* Not pertinent to Darwin. */
83
84 extern lck_attr_t* dtrace_lck_attr;
85 extern lck_grp_t* dtrace_lck_grp;
86 static lck_mtx_t dtrace_systrace_lock; /* probe state lock */
87
88 systrace_sysent_t *systrace_sysent = NULL;
89 void (*systrace_probe)(dtrace_id_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t);
90
91 static uint64_t systrace_getarg(void *, dtrace_id_t, void *, int, int);
92
93 void
94 systrace_stub(dtrace_id_t id, uint64_t arg0, uint64_t arg1,
95 uint64_t arg2, uint64_t arg3, uint64_t arg4)
96 {
97 #pragma unused(id,arg0,arg1,arg2,arg3,arg4)
98 }
99
100 int32_t
101 dtrace_systrace_syscall(struct proc *pp, void *uap, int *rv)
102 {
103 unsigned short code; /* The system call number */
104
105 systrace_sysent_t *sy;
106 dtrace_id_t id;
107 int32_t rval;
108 #if 0 /* XXX */
109 proc_t *p;
110 #endif
111 syscall_arg_t *ip = (syscall_arg_t *)uap;
112
113 #if defined (__x86_64__)
114 {
115 pal_register_cache_state(current_thread(), VALID);
116 x86_saved_state_t *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread());
117
118 if (is_saved_state64(tagged_regs)) {
119 x86_saved_state64_t *regs = saved_state64(tagged_regs);
120 code = regs->rax & SYSCALL_NUMBER_MASK;
121 /*
122 * Check for indirect system call... system call number
123 * passed as 'arg0'
124 */
125 if (code == 0) {
126 code = regs->rdi;
127 }
128 } else {
129 code = saved_state32(tagged_regs)->eax & I386_SYSCALL_NUMBER_MASK;
130
131 if (code == 0) {
132 vm_offset_t params = (vm_offset_t) (saved_state32(tagged_regs)->uesp + sizeof (int));
133 code = fuword(params);
134 }
135 }
136 }
137 #else
138 #error Unknown Architecture
139 #endif
140
141 // Bounds "check" the value of code a la unix_syscall
142 sy = (code >= NUM_SYSENT) ? &systrace_sysent[63] : &systrace_sysent[code];
143
144 if ((id = sy->stsy_entry) != DTRACE_IDNONE) {
145 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
146 if (uthread)
147 uthread->t_dtrace_syscall_args = (void *)ip;
148
149 if (ip)
150 (*systrace_probe)(id, *ip, *(ip+1), *(ip+2), *(ip+3), *(ip+4));
151 else
152 (*systrace_probe)(id, 0, 0, 0, 0, 0);
153
154 if (uthread)
155 uthread->t_dtrace_syscall_args = (void *)0;
156 }
157
158 #if 0 /* XXX */
159 /*
160 * APPLE NOTE: Not implemented.
161 * We want to explicitly allow DTrace consumers to stop a process
162 * before it actually executes the meat of the syscall.
163 */
164 p = ttoproc(curthread);
165 mutex_enter(&p->p_lock);
166 if (curthread->t_dtrace_stop && !curthread->t_lwp->lwp_nostop) {
167 curthread->t_dtrace_stop = 0;
168 stop(PR_REQUESTED, 0);
169 }
170 mutex_exit(&p->p_lock);
171 #endif
172
173 rval = (*sy->stsy_underlying)(pp, uap, rv);
174
175 if ((id = sy->stsy_return) != DTRACE_IDNONE) {
176 uint64_t munged_rv0, munged_rv1;
177 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
178
179 if (uthread)
180 uthread->t_dtrace_errno = rval; /* Establish t_dtrace_errno now in case this enabling refers to it. */
181
182 /*
183 * "Decode" rv for use in the call to dtrace_probe()
184 */
185 if (rval == ERESTART) {
186 munged_rv0 = -1LL; /* System call will be reissued in user mode. Make DTrace report a -1 return. */
187 munged_rv1 = -1LL;
188 } else if (rval != EJUSTRETURN) {
189 if (rval) {
190 munged_rv0 = -1LL; /* Mimic what libc will do. */
191 munged_rv1 = -1LL;
192 } else {
193 switch (sy->stsy_return_type) {
194 case _SYSCALL_RET_INT_T:
195 munged_rv0 = rv[0];
196 munged_rv1 = rv[1];
197 break;
198 case _SYSCALL_RET_UINT_T:
199 munged_rv0 = ((u_int)rv[0]);
200 munged_rv1 = ((u_int)rv[1]);
201 break;
202 case _SYSCALL_RET_OFF_T:
203 case _SYSCALL_RET_UINT64_T:
204 munged_rv0 = *(u_int64_t *)rv;
205 munged_rv1 = 0LL;
206 break;
207 case _SYSCALL_RET_ADDR_T:
208 case _SYSCALL_RET_SIZE_T:
209 case _SYSCALL_RET_SSIZE_T:
210 munged_rv0 = *(user_addr_t *)rv;
211 munged_rv1 = 0LL;
212 break;
213 case _SYSCALL_RET_NONE:
214 munged_rv0 = 0LL;
215 munged_rv1 = 0LL;
216 break;
217 default:
218 munged_rv0 = 0LL;
219 munged_rv1 = 0LL;
220 break;
221 }
222 }
223 } else {
224 munged_rv0 = 0LL;
225 munged_rv1 = 0LL;
226 }
227
228 /*
229 * <http://mail.opensolaris.org/pipermail/dtrace-discuss/2007-January/003276.html> says:
230 *
231 * "This is a bit of an historical artifact. At first, the syscall provider just
232 * had its return value in arg0, and the fbt and pid providers had their return
233 * values in arg1 (so that we could use arg0 for the offset of the return site).
234 *
235 * We inevitably started writing scripts where we wanted to see the return
236 * values from probes in all three providers, and we made this script easier
237 * to write by replicating the syscall return values in arg1 to match fbt and
238 * pid. We debated briefly about removing the return value from arg0, but
239 * decided that it would be less confusing to have the same data in two places
240 * than to have some non-helpful, non-intuitive value in arg0.
241 *
242 * This change was made 4/23/2003 according to the DTrace project's putback log."
243 */
244 (*systrace_probe)(id, munged_rv0, munged_rv0, munged_rv1, (uint64_t)rval, 0);
245 }
246
247 return (rval);
248 }
249
250 void
251 dtrace_systrace_syscall_return(unsigned short code, int rval, int *rv)
252 {
253 systrace_sysent_t *sy;
254 dtrace_id_t id;
255
256 // Bounds "check" the value of code a la unix_syscall_return
257 sy = (code >= NUM_SYSENT) ? &systrace_sysent[63] : &systrace_sysent[code];
258
259 if ((id = sy->stsy_return) != DTRACE_IDNONE) {
260 uint64_t munged_rv0, munged_rv1;
261 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
262
263 if (uthread)
264 uthread->t_dtrace_errno = rval; /* Establish t_dtrace_errno now in case this enabling refers to it. */
265
266 /*
267 * "Decode" rv for use in the call to dtrace_probe()
268 */
269 if (rval == ERESTART) {
270 munged_rv0 = -1LL; /* System call will be reissued in user mode. Make DTrace report a -1 return. */
271 munged_rv1 = -1LL;
272 } else if (rval != EJUSTRETURN) {
273 if (rval) {
274 munged_rv0 = -1LL; /* Mimic what libc will do. */
275 munged_rv1 = -1LL;
276 } else {
277 switch (sy->stsy_return_type) {
278 case _SYSCALL_RET_INT_T:
279 munged_rv0 = rv[0];
280 munged_rv1 = rv[1];
281 break;
282 case _SYSCALL_RET_UINT_T:
283 munged_rv0 = ((u_int)rv[0]);
284 munged_rv1 = ((u_int)rv[1]);
285 break;
286 case _SYSCALL_RET_OFF_T:
287 case _SYSCALL_RET_UINT64_T:
288 munged_rv0 = *(u_int64_t *)rv;
289 munged_rv1 = 0LL;
290 break;
291 case _SYSCALL_RET_ADDR_T:
292 case _SYSCALL_RET_SIZE_T:
293 case _SYSCALL_RET_SSIZE_T:
294 munged_rv0 = *(user_addr_t *)rv;
295 munged_rv1 = 0LL;
296 break;
297 case _SYSCALL_RET_NONE:
298 munged_rv0 = 0LL;
299 munged_rv1 = 0LL;
300 break;
301 default:
302 munged_rv0 = 0LL;
303 munged_rv1 = 0LL;
304 break;
305 }
306 }
307 } else {
308 munged_rv0 = 0LL;
309 munged_rv1 = 0LL;
310 }
311
312 (*systrace_probe)(id, munged_rv0, munged_rv0, munged_rv1, (uint64_t)rval, 0);
313 }
314 }
315
316 #define SYSTRACE_SHIFT 16
317 #define SYSTRACE_ISENTRY(x) ((int)(x) >> SYSTRACE_SHIFT)
318 #define SYSTRACE_SYSNUM(x) ((int)(x) & ((1 << SYSTRACE_SHIFT) - 1))
319 #define SYSTRACE_ENTRY(id) ((1 << SYSTRACE_SHIFT) | (id))
320 #define SYSTRACE_RETURN(id) (id)
321
322 #if ((1 << SYSTRACE_SHIFT) <= NSYSCALL)
323 #error 1 << SYSTRACE_SHIFT must exceed number of system calls
324 #endif
325
326 static dev_info_t *systrace_devi;
327 static dtrace_provider_id_t systrace_id;
328
329 /*
330 * APPLE NOTE: Avoid name clash with Darwin automagic conf symbol.
331 * See balanced undef below.
332 */
333 #define systrace_init _systrace_init
334
335 static void
336 systrace_init(struct sysent *actual, systrace_sysent_t **interposed)
337 {
338
339 systrace_sysent_t *ssysent = *interposed; /* Avoid sysent shadow warning
340 from bsd/sys/sysent.h */
341 int i;
342
343 if (ssysent == NULL) {
344 *interposed = ssysent = kmem_zalloc(sizeof (systrace_sysent_t) *
345 NSYSCALL, KM_SLEEP);
346 }
347
348 for (i = 0; i < NSYSCALL; i++) {
349 struct sysent *a = &actual[i];
350 systrace_sysent_t *s = &ssysent[i];
351
352 if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a))
353 continue;
354
355 if (a->sy_callc == dtrace_systrace_syscall)
356 continue;
357
358 #ifdef _SYSCALL32_IMPL
359 if (a->sy_callc == dtrace_systrace_syscall32)
360 continue;
361 #endif
362
363 s->stsy_underlying = a->sy_callc;
364 s->stsy_return_type = a->sy_return_type;
365 }
366 lck_mtx_init(&dtrace_systrace_lock, dtrace_lck_grp, dtrace_lck_attr);
367 }
368
369
370 /*ARGSUSED*/
371 static void
372 systrace_provide(void *arg, const dtrace_probedesc_t *desc)
373 {
374 #pragma unused(arg) /* __APPLE__ */
375 int i;
376
377 if (desc != NULL)
378 return;
379
380 systrace_init(sysent, &systrace_sysent);
381 #ifdef _SYSCALL32_IMPL
382 systrace_init(sysent32, &systrace_sysent32);
383 #endif
384
385 for (i = 0; i < NSYSCALL; i++) {
386 if (systrace_sysent[i].stsy_underlying == NULL)
387 continue;
388
389 if (dtrace_probe_lookup(systrace_id, NULL,
390 syscallnames[i], "entry") != 0)
391 continue;
392
393 (void) dtrace_probe_create(systrace_id, NULL, syscallnames[i],
394 "entry", SYSTRACE_ARTIFICIAL_FRAMES,
395 (void *)((uintptr_t)SYSTRACE_ENTRY(i)));
396 (void) dtrace_probe_create(systrace_id, NULL, syscallnames[i],
397 "return", SYSTRACE_ARTIFICIAL_FRAMES,
398 (void *)((uintptr_t)SYSTRACE_RETURN(i)));
399
400 systrace_sysent[i].stsy_entry = DTRACE_IDNONE;
401 systrace_sysent[i].stsy_return = DTRACE_IDNONE;
402 #ifdef _SYSCALL32_IMPL
403 systrace_sysent32[i].stsy_entry = DTRACE_IDNONE;
404 systrace_sysent32[i].stsy_return = DTRACE_IDNONE;
405 #endif
406 }
407 }
408 #undef systrace_init
409
410 /*ARGSUSED*/
411 static void
412 systrace_destroy(void *arg, dtrace_id_t id, void *parg)
413 {
414 #pragma unused(arg,id) /* __APPLE__ */
415
416 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
417
418 #pragma unused(sysnum) /* __APPLE__ */
419 /*
420 * There's nothing to do here but assert that we have actually been
421 * disabled.
422 */
423 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
424 ASSERT(systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE);
425 #ifdef _SYSCALL32_IMPL
426 ASSERT(systrace_sysent32[sysnum].stsy_entry == DTRACE_IDNONE);
427 #endif
428 } else {
429 ASSERT(systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
430 #ifdef _SYSCALL32_IMPL
431 ASSERT(systrace_sysent32[sysnum].stsy_return == DTRACE_IDNONE);
432 #endif
433 }
434 }
435
436 /*ARGSUSED*/
437 static int
438 systrace_enable(void *arg, dtrace_id_t id, void *parg)
439 {
440 #pragma unused(arg) /* __APPLE__ */
441
442 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
443 int enabled = (systrace_sysent[sysnum].stsy_entry != DTRACE_IDNONE ||
444 systrace_sysent[sysnum].stsy_return != DTRACE_IDNONE);
445
446 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
447 systrace_sysent[sysnum].stsy_entry = id;
448 #ifdef _SYSCALL32_IMPL
449 systrace_sysent32[sysnum].stsy_entry = id;
450 #endif
451 } else {
452 systrace_sysent[sysnum].stsy_return = id;
453 #ifdef _SYSCALL32_IMPL
454 systrace_sysent32[sysnum].stsy_return = id;
455 #endif
456 }
457
458 if (enabled) {
459 ASSERT(sysent[sysnum].sy_callc == dtrace_systrace_syscall);
460 return(0);
461 }
462 #ifdef _SYSCALL32_IMPL
463 (void) casptr(&sysent32[sysnum].sy_callc,
464 (void *)systrace_sysent32[sysnum].stsy_underlying,
465 (void *)dtrace_systrace_syscall32);
466 #endif
467
468 lck_mtx_lock(&dtrace_systrace_lock);
469 if (sysent[sysnum].sy_callc == systrace_sysent[sysnum].stsy_underlying) {
470 vm_offset_t dss = (vm_offset_t)&dtrace_systrace_syscall;
471 ml_nofault_copy((vm_offset_t)&dss, (vm_offset_t)&sysent[sysnum].sy_callc, sizeof(vm_offset_t));
472 }
473 lck_mtx_unlock(&dtrace_systrace_lock);
474 return (0);
475 }
476
477 /*ARGSUSED*/
478 static void
479 systrace_disable(void *arg, dtrace_id_t id, void *parg)
480 {
481 #pragma unused(arg,id) /* __APPLE__ */
482
483 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
484 int disable = (systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE ||
485 systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
486
487 if (disable) {
488 lck_mtx_lock(&dtrace_systrace_lock);
489 if (sysent[sysnum].sy_callc == dtrace_systrace_syscall)
490 ml_nofault_copy((vm_offset_t)&systrace_sysent[sysnum].stsy_underlying, (vm_offset_t)&sysent[sysnum].sy_callc, sizeof(systrace_sysent[sysnum].stsy_underlying));
491 lck_mtx_unlock(&dtrace_systrace_lock);
492
493 #ifdef _SYSCALL32_IMPL
494 (void) casptr(&sysent32[sysnum].sy_callc,
495 (void *)dtrace_systrace_syscall32,
496 (void *)systrace_sysent32[sysnum].stsy_underlying);
497 #endif
498 }
499
500 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
501 systrace_sysent[sysnum].stsy_entry = DTRACE_IDNONE;
502 #ifdef _SYSCALL32_IMPL
503 systrace_sysent32[sysnum].stsy_entry = DTRACE_IDNONE;
504 #endif
505 } else {
506 systrace_sysent[sysnum].stsy_return = DTRACE_IDNONE;
507 #ifdef _SYSCALL32_IMPL
508 systrace_sysent32[sysnum].stsy_return = DTRACE_IDNONE;
509 #endif
510 }
511 }
512
513 static dtrace_pattr_t systrace_attr = {
514 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
515 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
516 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
517 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
518 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
519 };
520
521 static dtrace_pops_t systrace_pops = {
522 systrace_provide,
523 NULL,
524 systrace_enable,
525 systrace_disable,
526 NULL,
527 NULL,
528 NULL,
529 systrace_getarg,
530 NULL,
531 systrace_destroy
532 };
533
534 static int
535 systrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
536 {
537 switch (cmd) {
538 case DDI_ATTACH:
539 break;
540 case DDI_RESUME:
541 return (DDI_SUCCESS);
542 default:
543 return (DDI_FAILURE);
544 }
545
546 systrace_probe = (void(*))&dtrace_probe;
547 membar_enter();
548
549 if (ddi_create_minor_node(devi, "systrace", S_IFCHR, 0,
550 DDI_PSEUDO, 0) == DDI_FAILURE ||
551 dtrace_register("syscall", &systrace_attr, DTRACE_PRIV_USER, NULL,
552 &systrace_pops, NULL, &systrace_id) != 0) {
553 systrace_probe = systrace_stub;
554 ddi_remove_minor_node(devi, NULL);
555 return (DDI_FAILURE);
556 }
557
558 ddi_report_dev(devi);
559 systrace_devi = devi;
560
561 return (DDI_SUCCESS);
562 }
563
564
565 /*
566 * APPLE NOTE: systrace_detach not implemented
567 */
568 #if !defined(__APPLE__)
569 static int
570 systrace_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
571 {
572 switch (cmd) {
573 case DDI_DETACH:
574 break;
575 case DDI_SUSPEND:
576 return (DDI_SUCCESS);
577 default:
578 return (DDI_FAILURE);
579 }
580
581 if (dtrace_unregister(systrace_id) != 0)
582 return (DDI_FAILURE);
583
584 ddi_remove_minor_node(devi, NULL);
585 systrace_probe = systrace_stub;
586 return (DDI_SUCCESS);
587 }
588 #endif /* __APPLE__ */
589
590
591 typedef kern_return_t (*mach_call_t)(void *);
592
593 /* APPLE NOTE: From #include <kern/syscall_sw.h> which may be changed for 64 bit! */
594 typedef void mach_munge_t(void *);
595
596 typedef struct {
597 int mach_trap_arg_count;
598 kern_return_t (*mach_trap_function)(void *);
599 #if defined(__arm64__) || defined(__x86_64__)
600 mach_munge_t *mach_trap_arg_munge32; /* system call arguments for 32-bit */
601 #endif
602 int mach_trap_u32_words;
603 #if MACH_ASSERT
604 const char* mach_trap_name;
605 #endif /* MACH_ASSERT */
606 } mach_trap_t;
607
608 extern const mach_trap_t mach_trap_table[]; /* syscall_sw.h now declares this as const */
609 extern int mach_trap_count;
610
611 extern const char *mach_syscall_name_table[];
612
613 /* XXX From osfmk/i386/bsd_i386.c */
614 struct mach_call_args {
615 syscall_arg_t arg1;
616 syscall_arg_t arg2;
617 syscall_arg_t arg3;
618 syscall_arg_t arg4;
619 syscall_arg_t arg5;
620 syscall_arg_t arg6;
621 syscall_arg_t arg7;
622 syscall_arg_t arg8;
623 syscall_arg_t arg9;
624 };
625
626 #undef NSYSCALL
627 #define NSYSCALL mach_trap_count
628
629 #if ((1 << SYSTRACE_SHIFT) <= NSYSCALL)
630 #error 1 << SYSTRACE_SHIFT must exceed number of Mach traps
631 #endif
632
633 typedef struct machtrace_sysent {
634 dtrace_id_t stsy_entry;
635 dtrace_id_t stsy_return;
636 kern_return_t (*stsy_underlying)(void *);
637 int32_t stsy_return_type;
638 } machtrace_sysent_t;
639
640 static machtrace_sysent_t *machtrace_sysent = NULL;
641
642 void (*machtrace_probe)(dtrace_id_t, uint64_t, uint64_t,
643 uint64_t, uint64_t, uint64_t);
644
645 static uint64_t machtrace_getarg(void *, dtrace_id_t, void *, int, int);
646
647 static dev_info_t *machtrace_devi;
648 static dtrace_provider_id_t machtrace_id;
649
650 static kern_return_t
651 dtrace_machtrace_syscall(struct mach_call_args *args)
652 {
653 int code; /* The mach call number */
654
655 machtrace_sysent_t *sy;
656 dtrace_id_t id;
657 kern_return_t rval;
658 #if 0 /* XXX */
659 proc_t *p;
660 #endif
661 syscall_arg_t *ip = (syscall_arg_t *)args;
662 mach_call_t mach_call;
663
664 #if defined (__x86_64__)
665 {
666 pal_register_cache_state(current_thread(), VALID);
667 x86_saved_state_t *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread());
668
669 if (is_saved_state64(tagged_regs)) {
670 code = saved_state64(tagged_regs)->rax & SYSCALL_NUMBER_MASK;
671 } else {
672 code = -saved_state32(tagged_regs)->eax;
673 }
674 }
675 #else
676 #error Unknown Architecture
677 #endif
678
679 sy = &machtrace_sysent[code];
680
681 if ((id = sy->stsy_entry) != DTRACE_IDNONE) {
682 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
683
684 if (uthread)
685 uthread->t_dtrace_syscall_args = (void *)ip;
686
687 (*machtrace_probe)(id, *ip, *(ip+1), *(ip+2), *(ip+3), *(ip+4));
688
689 if (uthread)
690 uthread->t_dtrace_syscall_args = (void *)0;
691 }
692
693 #if 0 /* XXX */
694 /*
695 * APPLE NOTE: Not implemented.
696 * We want to explicitly allow DTrace consumers to stop a process
697 * before it actually executes the meat of the syscall.
698 */
699 p = ttoproc(curthread);
700 mutex_enter(&p->p_lock);
701 if (curthread->t_dtrace_stop && !curthread->t_lwp->lwp_nostop) {
702 curthread->t_dtrace_stop = 0;
703 stop(PR_REQUESTED, 0);
704 }
705 mutex_exit(&p->p_lock);
706 #endif
707
708 mach_call = (mach_call_t)(*sy->stsy_underlying);
709 rval = mach_call(args);
710
711 if ((id = sy->stsy_return) != DTRACE_IDNONE)
712 (*machtrace_probe)(id, (uint64_t)rval, 0, 0, 0, 0);
713
714 return (rval);
715 }
716
717 static void
718 machtrace_init(const mach_trap_t *actual, machtrace_sysent_t **interposed)
719 {
720 machtrace_sysent_t *msysent = *interposed;
721 int i;
722
723 if (msysent == NULL) {
724 *interposed = msysent = kmem_zalloc(sizeof (machtrace_sysent_t) *
725 NSYSCALL, KM_SLEEP);
726 }
727
728 for (i = 0; i < NSYSCALL; i++) {
729 const mach_trap_t *a = &actual[i];
730 machtrace_sysent_t *s = &msysent[i];
731
732 if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a))
733 continue;
734
735 if (a->mach_trap_function == (mach_call_t)(dtrace_machtrace_syscall))
736 continue;
737
738 s->stsy_underlying = a->mach_trap_function;
739 }
740 }
741
742 /*ARGSUSED*/
743 static void
744 machtrace_provide(void *arg, const dtrace_probedesc_t *desc)
745 {
746 #pragma unused(arg) /* __APPLE__ */
747
748 int i;
749
750 if (desc != NULL)
751 return;
752
753 machtrace_init(mach_trap_table, &machtrace_sysent);
754
755 for (i = 0; i < NSYSCALL; i++) {
756
757 if (machtrace_sysent[i].stsy_underlying == NULL)
758 continue;
759
760 if (dtrace_probe_lookup(machtrace_id, NULL,
761 mach_syscall_name_table[i], "entry") != 0)
762 continue;
763
764 (void) dtrace_probe_create(machtrace_id, NULL, mach_syscall_name_table[i],
765 "entry", MACHTRACE_ARTIFICIAL_FRAMES,
766 (void *)((uintptr_t)SYSTRACE_ENTRY(i)));
767 (void) dtrace_probe_create(machtrace_id, NULL, mach_syscall_name_table[i],
768 "return", MACHTRACE_ARTIFICIAL_FRAMES,
769 (void *)((uintptr_t)SYSTRACE_RETURN(i)));
770
771 machtrace_sysent[i].stsy_entry = DTRACE_IDNONE;
772 machtrace_sysent[i].stsy_return = DTRACE_IDNONE;
773 }
774 }
775
776 /*ARGSUSED*/
777 static void
778 machtrace_destroy(void *arg, dtrace_id_t id, void *parg)
779 {
780 #pragma unused(arg,id) /* __APPLE__ */
781 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
782
783 #pragma unused(sysnum) /* __APPLE__ */
784
785 /*
786 * There's nothing to do here but assert that we have actually been
787 * disabled.
788 */
789 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
790 ASSERT(machtrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE);
791 } else {
792 ASSERT(machtrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
793 }
794 }
795
796 /*ARGSUSED*/
797 static int
798 machtrace_enable(void *arg, dtrace_id_t id, void *parg)
799 {
800 #pragma unused(arg) /* __APPLE__ */
801
802 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
803 int enabled = (machtrace_sysent[sysnum].stsy_entry != DTRACE_IDNONE ||
804 machtrace_sysent[sysnum].stsy_return != DTRACE_IDNONE);
805
806 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
807 machtrace_sysent[sysnum].stsy_entry = id;
808 } else {
809 machtrace_sysent[sysnum].stsy_return = id;
810 }
811
812 if (enabled) {
813 ASSERT(mach_trap_table[sysnum].mach_trap_function == (void *)dtrace_machtrace_syscall);
814 return(0);
815 }
816
817 lck_mtx_lock(&dtrace_systrace_lock);
818
819 if (mach_trap_table[sysnum].mach_trap_function == machtrace_sysent[sysnum].stsy_underlying) {
820 vm_offset_t dss = (vm_offset_t)&dtrace_machtrace_syscall;
821 ml_nofault_copy((vm_offset_t)&dss, (vm_offset_t)&mach_trap_table[sysnum].mach_trap_function, sizeof(vm_offset_t));
822 }
823
824 lck_mtx_unlock(&dtrace_systrace_lock);
825
826 return(0);
827 }
828
829 /*ARGSUSED*/
830 static void
831 machtrace_disable(void *arg, dtrace_id_t id, void *parg)
832 {
833 #pragma unused(arg,id) /* __APPLE__ */
834
835 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
836 int disable = (machtrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE ||
837 machtrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
838
839 if (disable) {
840
841 lck_mtx_lock(&dtrace_systrace_lock);
842
843 if (mach_trap_table[sysnum].mach_trap_function == (mach_call_t)dtrace_machtrace_syscall) {
844 ml_nofault_copy((vm_offset_t)&machtrace_sysent[sysnum].stsy_underlying, (vm_offset_t)&mach_trap_table[sysnum].mach_trap_function, sizeof(vm_offset_t));
845 }
846 lck_mtx_unlock(&dtrace_systrace_lock);
847 }
848
849 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
850 machtrace_sysent[sysnum].stsy_entry = DTRACE_IDNONE;
851 } else {
852 machtrace_sysent[sysnum].stsy_return = DTRACE_IDNONE;
853 }
854 }
855
856 static dtrace_pattr_t machtrace_attr = {
857 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
858 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
859 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
860 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
861 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
862 };
863
864 static dtrace_pops_t machtrace_pops = {
865 machtrace_provide,
866 NULL,
867 machtrace_enable,
868 machtrace_disable,
869 NULL,
870 NULL,
871 NULL,
872 machtrace_getarg,
873 NULL,
874 machtrace_destroy
875 };
876
877 static int
878 machtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
879 {
880 switch (cmd) {
881 case DDI_ATTACH:
882 break;
883 case DDI_RESUME:
884 return (DDI_SUCCESS);
885 default:
886 return (DDI_FAILURE);
887 }
888
889 machtrace_probe = dtrace_probe;
890 membar_enter();
891
892 if (ddi_create_minor_node(devi, "machtrace", S_IFCHR, 0,
893 DDI_PSEUDO, 0) == DDI_FAILURE ||
894 dtrace_register("mach_trap", &machtrace_attr, DTRACE_PRIV_USER, NULL,
895 &machtrace_pops, NULL, &machtrace_id) != 0) {
896 machtrace_probe = (void (*))&systrace_stub;
897 ddi_remove_minor_node(devi, NULL);
898 return (DDI_FAILURE);
899 }
900
901 ddi_report_dev(devi);
902 machtrace_devi = devi;
903
904 return (DDI_SUCCESS);
905 }
906
907 d_open_t _systrace_open;
908
909 int _systrace_open(dev_t dev, int flags, int devtype, struct proc *p)
910 {
911 #pragma unused(dev,flags,devtype,p)
912 return 0;
913 }
914
915 #define SYSTRACE_MAJOR -24 /* let the kernel pick the device number */
916
917 /*
918 * A struct describing which functions will get invoked for certain
919 * actions.
920 */
921 static struct cdevsw systrace_cdevsw =
922 {
923 _systrace_open, /* open */
924 eno_opcl, /* close */
925 eno_rdwrt, /* read */
926 eno_rdwrt, /* write */
927 eno_ioctl, /* ioctl */
928 (stop_fcn_t *)nulldev, /* stop */
929 (reset_fcn_t *)nulldev, /* reset */
930 NULL, /* tty's */
931 eno_select, /* select */
932 eno_mmap, /* mmap */
933 eno_strat, /* strategy */
934 eno_getc, /* getc */
935 eno_putc, /* putc */
936 0 /* type */
937 };
938
939 static int gSysTraceInited = 0;
940
941 void systrace_init( void );
942
943 void systrace_init( void )
944 {
945 if (0 == gSysTraceInited) {
946 int majdevno = cdevsw_add(SYSTRACE_MAJOR, &systrace_cdevsw);
947
948 if (majdevno < 0) {
949 printf("systrace_init: failed to allocate a major number!\n");
950 gSysTraceInited = 0;
951 return;
952 }
953
954 systrace_attach( (dev_info_t *)(uintptr_t)majdevno, DDI_ATTACH );
955 machtrace_attach( (dev_info_t *)(uintptr_t)majdevno, DDI_ATTACH );
956
957 gSysTraceInited = 1;
958 } else
959 panic("systrace_init: called twice!\n");
960 }
961 #undef SYSTRACE_MAJOR
962
963 static uint64_t
964 systrace_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes)
965 {
966 #pragma unused(arg,id,parg,aframes) /* __APPLE__ */
967 uint64_t val = 0;
968 syscall_arg_t *stack = (syscall_arg_t *)NULL;
969
970 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
971
972 if (uthread)
973 stack = (syscall_arg_t *)uthread->t_dtrace_syscall_args;
974
975 if (!stack)
976 return(0);
977
978 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
979 /* dtrace_probe arguments arg0 .. arg4 are 64bits wide */
980 val = (uint64_t)*(stack+argno);
981 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
982 return (val);
983 }
984
985
986 static uint64_t
987 machtrace_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes)
988 {
989 #pragma unused(arg,id,parg,aframes) /* __APPLE__ */
990 uint64_t val = 0;
991 syscall_arg_t *stack = (syscall_arg_t *)NULL;
992
993 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
994
995 if (uthread)
996 stack = (syscall_arg_t *)uthread->t_dtrace_syscall_args;
997
998 if (!stack)
999 return(0);
1000
1001 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
1002 /* dtrace_probe arguments arg0 .. arg4 are 64bits wide */
1003 val = (uint64_t)*(stack+argno);
1004 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
1005 return (val);
1006 }
1007