]> git.saurik.com Git - apple/xnu.git/blob - bsd/dev/dtrace/systrace.c
xnu-2422.90.20.tar.gz
[apple/xnu.git] / bsd / dev / dtrace / systrace.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /* #pragma ident "@(#)systrace.c 1.6 06/09/19 SMI" */
27
28 #if !defined(__APPLE__)
29 #include <sys/dtrace.h>
30 #include <sys/systrace.h>
31 #include <sys/stat.h>
32 #include <sys/systm.h>
33 #include <sys/conf.h>
34 #include <sys/ddi.h>
35 #include <sys/sunddi.h>
36 #include <sys/atomic.h>
37 #define SYSTRACE_ARTIFICIAL_FRAMES 1
38 #else
39
40 #ifdef KERNEL
41 #ifndef _KERNEL
42 #define _KERNEL /* Solaris vs. Darwin */
43 #endif
44 #endif
45
46 #include <kern/thread.h>
47 #include <mach/thread_status.h>
48
49 /* XXX All of these should really be derived from syscall_sw.h */
50 #if defined (__x86_64__)
51 #define SYSCALL_CLASS_SHIFT 24
52 #define SYSCALL_CLASS_MASK (0xFF << SYSCALL_CLASS_SHIFT)
53 #define SYSCALL_NUMBER_MASK (~SYSCALL_CLASS_MASK)
54 #define I386_SYSCALL_NUMBER_MASK (0xFFFF)
55 #endif
56
57 #include <sys/param.h>
58 #include <sys/systm.h>
59 #include <sys/proc.h>
60 #include <sys/errno.h>
61 #include <sys/ioctl.h>
62 #include <sys/conf.h>
63 #include <sys/fcntl.h>
64 #include <miscfs/devfs/devfs.h>
65
66 #include <sys/dtrace.h>
67 #include <sys/dtrace_impl.h>
68 #include "systrace.h"
69 #include <sys/stat.h>
70 #include <sys/systm.h>
71 #include <sys/conf.h>
72 #include <sys/user.h>
73
74 #include <machine/pal_routines.h>
75
76 #if defined (__x86_64__)
77 #define SYSTRACE_ARTIFICIAL_FRAMES 2
78 #define MACHTRACE_ARTIFICIAL_FRAMES 3
79 #else
80 #error Unknown Architecture
81 #endif
82
83 #include <sys/sysent.h>
84 #define sy_callc sy_call /* Map Solaris slot name to Darwin's */
85 #define NSYSCALL nsysent /* and is less than 500 or so */
86
87 extern const char *syscallnames[];
88
89 #include <sys/dtrace_glue.h>
90 #define casptr dtrace_casptr
91 #define membar_enter dtrace_membar_producer
92
93 #define LOADABLE_SYSCALL(a) 0 /* Not pertinent to Darwin. */
94 #define LOADED_SYSCALL(a) 1 /* Not pertinent to Darwin. */
95
96 extern lck_attr_t* dtrace_lck_attr;
97 extern lck_grp_t* dtrace_lck_grp;
98 static lck_mtx_t dtrace_systrace_lock; /* probe state lock */
99
100 systrace_sysent_t *systrace_sysent = NULL;
101 void (*systrace_probe)(dtrace_id_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t);
102
103 static uint64_t systrace_getarg(void *, dtrace_id_t, void *, int, int);
104
105 void
106 systrace_stub(dtrace_id_t id, uint64_t arg0, uint64_t arg1,
107 uint64_t arg2, uint64_t arg3, uint64_t arg4)
108 {
109 #pragma unused(id,arg0,arg1,arg2,arg3,arg4)
110 }
111
112 int32_t
113 dtrace_systrace_syscall(struct proc *pp, void *uap, int *rv)
114 {
115 unsigned short code; /* The system call number */
116
117 systrace_sysent_t *sy;
118 dtrace_id_t id;
119 int32_t rval;
120 #if 0 /* XXX */
121 proc_t *p;
122 #endif
123 syscall_arg_t *ip = (syscall_arg_t *)uap;
124
125 #if defined (__x86_64__)
126 {
127 pal_register_cache_state(current_thread(), VALID);
128 x86_saved_state_t *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread());
129
130 if (is_saved_state64(tagged_regs)) {
131 x86_saved_state64_t *regs = saved_state64(tagged_regs);
132 code = regs->rax & SYSCALL_NUMBER_MASK;
133 /*
134 * Check for indirect system call... system call number
135 * passed as 'arg0'
136 */
137 if (code == 0) {
138 code = regs->rdi;
139 }
140 } else {
141 code = saved_state32(tagged_regs)->eax & I386_SYSCALL_NUMBER_MASK;
142
143 if (code == 0) {
144 vm_offset_t params = (vm_offset_t) (saved_state32(tagged_regs)->uesp + sizeof (int));
145 code = fuword(params);
146 }
147 }
148 }
149 #else
150 #error Unknown Architecture
151 #endif
152
153 // Bounds "check" the value of code a la unix_syscall
154 sy = (code >= NUM_SYSENT) ? &systrace_sysent[63] : &systrace_sysent[code];
155
156 if ((id = sy->stsy_entry) != DTRACE_IDNONE) {
157 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
158 if (uthread)
159 uthread->t_dtrace_syscall_args = (void *)ip;
160
161 if (ip)
162 (*systrace_probe)(id, *ip, *(ip+1), *(ip+2), *(ip+3), *(ip+4));
163 else
164 (*systrace_probe)(id, 0, 0, 0, 0, 0);
165
166 if (uthread)
167 uthread->t_dtrace_syscall_args = (void *)0;
168 }
169
170 #if 0 /* XXX */
171 /*
172 * We want to explicitly allow DTrace consumers to stop a process
173 * before it actually executes the meat of the syscall.
174 */
175 p = ttoproc(curthread);
176 mutex_enter(&p->p_lock);
177 if (curthread->t_dtrace_stop && !curthread->t_lwp->lwp_nostop) {
178 curthread->t_dtrace_stop = 0;
179 stop(PR_REQUESTED, 0);
180 }
181 mutex_exit(&p->p_lock);
182 #endif
183
184 rval = (*sy->stsy_underlying)(pp, uap, rv);
185
186 if ((id = sy->stsy_return) != DTRACE_IDNONE) {
187 uint64_t munged_rv0, munged_rv1;
188 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
189
190 if (uthread)
191 uthread->t_dtrace_errno = rval; /* Establish t_dtrace_errno now in case this enabling refers to it. */
192
193 /*
194 * "Decode" rv for use in the call to dtrace_probe()
195 */
196 if (rval == ERESTART) {
197 munged_rv0 = -1LL; /* System call will be reissued in user mode. Make DTrace report a -1 return. */
198 munged_rv1 = -1LL;
199 } else if (rval != EJUSTRETURN) {
200 if (rval) {
201 munged_rv0 = -1LL; /* Mimic what libc will do. */
202 munged_rv1 = -1LL;
203 } else {
204 switch (sy->stsy_return_type) {
205 case _SYSCALL_RET_INT_T:
206 munged_rv0 = rv[0];
207 munged_rv1 = rv[1];
208 break;
209 case _SYSCALL_RET_UINT_T:
210 munged_rv0 = ((u_int)rv[0]);
211 munged_rv1 = ((u_int)rv[1]);
212 break;
213 case _SYSCALL_RET_OFF_T:
214 case _SYSCALL_RET_UINT64_T:
215 munged_rv0 = *(u_int64_t *)rv;
216 munged_rv1 = 0LL;
217 break;
218 case _SYSCALL_RET_ADDR_T:
219 case _SYSCALL_RET_SIZE_T:
220 case _SYSCALL_RET_SSIZE_T:
221 munged_rv0 = *(user_addr_t *)rv;
222 munged_rv1 = 0LL;
223 break;
224 case _SYSCALL_RET_NONE:
225 munged_rv0 = 0LL;
226 munged_rv1 = 0LL;
227 break;
228 default:
229 munged_rv0 = 0LL;
230 munged_rv1 = 0LL;
231 break;
232 }
233 }
234 } else {
235 munged_rv0 = 0LL;
236 munged_rv1 = 0LL;
237 }
238
239 /*
240 * <http://mail.opensolaris.org/pipermail/dtrace-discuss/2007-January/003276.html> says:
241 *
242 * "This is a bit of an historical artifact. At first, the syscall provider just
243 * had its return value in arg0, and the fbt and pid providers had their return
244 * values in arg1 (so that we could use arg0 for the offset of the return site).
245 *
246 * We inevitably started writing scripts where we wanted to see the return
247 * values from probes in all three providers, and we made this script easier
248 * to write by replicating the syscall return values in arg1 to match fbt and
249 * pid. We debated briefly about removing the return value from arg0, but
250 * decided that it would be less confusing to have the same data in two places
251 * than to have some non-helpful, non-intuitive value in arg0.
252 *
253 * This change was made 4/23/2003 according to the DTrace project's putback log."
254 */
255 (*systrace_probe)(id, munged_rv0, munged_rv0, munged_rv1, (uint64_t)rval, 0);
256 }
257
258 return (rval);
259 }
260
261 void
262 dtrace_systrace_syscall_return(unsigned short code, int rval, int *rv)
263 {
264 systrace_sysent_t *sy;
265 dtrace_id_t id;
266
267 // Bounds "check" the value of code a la unix_syscall_return
268 sy = (code >= NUM_SYSENT) ? &systrace_sysent[63] : &systrace_sysent[code];
269
270 if ((id = sy->stsy_return) != DTRACE_IDNONE) {
271 uint64_t munged_rv0, munged_rv1;
272 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
273
274 if (uthread)
275 uthread->t_dtrace_errno = rval; /* Establish t_dtrace_errno now in case this enabling refers to it. */
276
277 /*
278 * "Decode" rv for use in the call to dtrace_probe()
279 */
280 if (rval == ERESTART) {
281 munged_rv0 = -1LL; /* System call will be reissued in user mode. Make DTrace report a -1 return. */
282 munged_rv1 = -1LL;
283 } else if (rval != EJUSTRETURN) {
284 if (rval) {
285 munged_rv0 = -1LL; /* Mimic what libc will do. */
286 munged_rv1 = -1LL;
287 } else {
288 switch (sy->stsy_return_type) {
289 case _SYSCALL_RET_INT_T:
290 munged_rv0 = rv[0];
291 munged_rv1 = rv[1];
292 break;
293 case _SYSCALL_RET_UINT_T:
294 munged_rv0 = ((u_int)rv[0]);
295 munged_rv1 = ((u_int)rv[1]);
296 break;
297 case _SYSCALL_RET_OFF_T:
298 case _SYSCALL_RET_UINT64_T:
299 munged_rv0 = *(u_int64_t *)rv;
300 munged_rv1 = 0LL;
301 break;
302 case _SYSCALL_RET_ADDR_T:
303 case _SYSCALL_RET_SIZE_T:
304 case _SYSCALL_RET_SSIZE_T:
305 munged_rv0 = *(user_addr_t *)rv;
306 munged_rv1 = 0LL;
307 break;
308 case _SYSCALL_RET_NONE:
309 munged_rv0 = 0LL;
310 munged_rv1 = 0LL;
311 break;
312 default:
313 munged_rv0 = 0LL;
314 munged_rv1 = 0LL;
315 break;
316 }
317 }
318 } else {
319 munged_rv0 = 0LL;
320 munged_rv1 = 0LL;
321 }
322
323 (*systrace_probe)(id, munged_rv0, munged_rv0, munged_rv1, (uint64_t)rval, 0);
324 }
325 }
326 #endif /* __APPLE__ */
327
328 #define SYSTRACE_SHIFT 16
329 #define SYSTRACE_ISENTRY(x) ((int)(x) >> SYSTRACE_SHIFT)
330 #define SYSTRACE_SYSNUM(x) ((int)(x) & ((1 << SYSTRACE_SHIFT) - 1))
331 #define SYSTRACE_ENTRY(id) ((1 << SYSTRACE_SHIFT) | (id))
332 #define SYSTRACE_RETURN(id) (id)
333
334 #if ((1 << SYSTRACE_SHIFT) <= NSYSCALL)
335 #error 1 << SYSTRACE_SHIFT must exceed number of system calls
336 #endif
337
338 static dev_info_t *systrace_devi;
339 static dtrace_provider_id_t systrace_id;
340
341 #if !defined (__APPLE__)
342 static void
343 systrace_init(struct sysent *actual, systrace_sysent_t **interposed)
344 {
345 systrace_sysent_t *sysent = *interposed;
346 int i;
347
348 if (sysent == NULL) {
349 *interposed = sysent = kmem_zalloc(sizeof (systrace_sysent_t) *
350 NSYSCALL, KM_SLEEP);
351 }
352
353 for (i = 0; i < NSYSCALL; i++) {
354 struct sysent *a = &actual[i];
355 systrace_sysent_t *s = &sysent[i];
356
357 if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a))
358 continue;
359
360 if (a->sy_callc == dtrace_systrace_syscall)
361 continue;
362
363 #ifdef _SYSCALL32_IMPL
364 if (a->sy_callc == dtrace_systrace_syscall32)
365 continue;
366 #endif
367
368 s->stsy_underlying = a->sy_callc;
369 }
370 }
371 #else
372 #define systrace_init _systrace_init /* Avoid name clash with Darwin automagic conf symbol */
373 static void
374 systrace_init(struct sysent *actual, systrace_sysent_t **interposed)
375 {
376
377 systrace_sysent_t *ssysent = *interposed; /* Avoid sysent shadow warning
378 from bsd/sys/sysent.h */
379 int i;
380
381 if (ssysent == NULL) {
382 *interposed = ssysent = kmem_zalloc(sizeof (systrace_sysent_t) *
383 NSYSCALL, KM_SLEEP);
384 }
385
386 for (i = 0; i < NSYSCALL; i++) {
387 struct sysent *a = &actual[i];
388 systrace_sysent_t *s = &ssysent[i];
389
390 if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a))
391 continue;
392
393 if (a->sy_callc == dtrace_systrace_syscall)
394 continue;
395
396 #ifdef _SYSCALL32_IMPL
397 if (a->sy_callc == dtrace_systrace_syscall32)
398 continue;
399 #endif
400
401 s->stsy_underlying = a->sy_callc;
402 s->stsy_return_type = a->sy_return_type;
403 }
404 lck_mtx_init(&dtrace_systrace_lock, dtrace_lck_grp, dtrace_lck_attr);
405 }
406
407 #endif /* __APPLE__ */
408
409 /*ARGSUSED*/
410 static void
411 systrace_provide(void *arg, const dtrace_probedesc_t *desc)
412 {
413 #pragma unused(arg) /* __APPLE__ */
414 int i;
415
416 if (desc != NULL)
417 return;
418
419 systrace_init(sysent, &systrace_sysent);
420 #ifdef _SYSCALL32_IMPL
421 systrace_init(sysent32, &systrace_sysent32);
422 #endif
423
424 for (i = 0; i < NSYSCALL; i++) {
425 if (systrace_sysent[i].stsy_underlying == NULL)
426 continue;
427
428 if (dtrace_probe_lookup(systrace_id, NULL,
429 syscallnames[i], "entry") != 0)
430 continue;
431
432 (void) dtrace_probe_create(systrace_id, NULL, syscallnames[i],
433 "entry", SYSTRACE_ARTIFICIAL_FRAMES,
434 (void *)((uintptr_t)SYSTRACE_ENTRY(i)));
435 (void) dtrace_probe_create(systrace_id, NULL, syscallnames[i],
436 "return", SYSTRACE_ARTIFICIAL_FRAMES,
437 (void *)((uintptr_t)SYSTRACE_RETURN(i)));
438
439 systrace_sysent[i].stsy_entry = DTRACE_IDNONE;
440 systrace_sysent[i].stsy_return = DTRACE_IDNONE;
441 #ifdef _SYSCALL32_IMPL
442 systrace_sysent32[i].stsy_entry = DTRACE_IDNONE;
443 systrace_sysent32[i].stsy_return = DTRACE_IDNONE;
444 #endif
445 }
446 }
447 #if defined(__APPLE__)
448 #undef systrace_init
449 #endif
450
451 /*ARGSUSED*/
452 static void
453 systrace_destroy(void *arg, dtrace_id_t id, void *parg)
454 {
455 #pragma unused(arg,id) /* __APPLE__ */
456
457 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
458
459 #pragma unused(sysnum) /* __APPLE__ */
460 /*
461 * There's nothing to do here but assert that we have actually been
462 * disabled.
463 */
464 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
465 ASSERT(systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE);
466 #ifdef _SYSCALL32_IMPL
467 ASSERT(systrace_sysent32[sysnum].stsy_entry == DTRACE_IDNONE);
468 #endif
469 } else {
470 ASSERT(systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
471 #ifdef _SYSCALL32_IMPL
472 ASSERT(systrace_sysent32[sysnum].stsy_return == DTRACE_IDNONE);
473 #endif
474 }
475 }
476
477 /*ARGSUSED*/
478 static int
479 systrace_enable(void *arg, dtrace_id_t id, void *parg)
480 {
481 #pragma unused(arg) /* __APPLE__ */
482
483 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
484 int enabled = (systrace_sysent[sysnum].stsy_entry != DTRACE_IDNONE ||
485 systrace_sysent[sysnum].stsy_return != DTRACE_IDNONE);
486
487 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
488 systrace_sysent[sysnum].stsy_entry = id;
489 #ifdef _SYSCALL32_IMPL
490 systrace_sysent32[sysnum].stsy_entry = id;
491 #endif
492 } else {
493 systrace_sysent[sysnum].stsy_return = id;
494 #ifdef _SYSCALL32_IMPL
495 systrace_sysent32[sysnum].stsy_return = id;
496 #endif
497 }
498
499 if (enabled) {
500 ASSERT(sysent[sysnum].sy_callc == dtrace_systrace_syscall);
501 return(0);
502 }
503 #ifdef _SYSCALL32_IMPL
504 (void) casptr(&sysent32[sysnum].sy_callc,
505 (void *)systrace_sysent32[sysnum].stsy_underlying,
506 (void *)dtrace_systrace_syscall32);
507 #endif
508
509 lck_mtx_lock(&dtrace_systrace_lock);
510 if (sysent[sysnum].sy_callc == systrace_sysent[sysnum].stsy_underlying) {
511 vm_offset_t dss = (vm_offset_t)&dtrace_systrace_syscall;
512 ml_nofault_copy((vm_offset_t)&dss, (vm_offset_t)&sysent[sysnum].sy_callc, sizeof(vm_offset_t));
513 }
514 lck_mtx_unlock(&dtrace_systrace_lock);
515 return (0);
516 }
517
518 /*ARGSUSED*/
519 static void
520 systrace_disable(void *arg, dtrace_id_t id, void *parg)
521 {
522 #pragma unused(arg,id) /* __APPLE__ */
523
524 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
525 int disable = (systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE ||
526 systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
527
528 if (disable) {
529 lck_mtx_lock(&dtrace_systrace_lock);
530 if (sysent[sysnum].sy_callc == dtrace_systrace_syscall)
531 ml_nofault_copy((vm_offset_t)&systrace_sysent[sysnum].stsy_underlying, (vm_offset_t)&sysent[sysnum].sy_callc, sizeof(systrace_sysent[sysnum].stsy_underlying));
532 lck_mtx_unlock(&dtrace_systrace_lock);
533
534 #ifdef _SYSCALL32_IMPL
535 (void) casptr(&sysent32[sysnum].sy_callc,
536 (void *)dtrace_systrace_syscall32,
537 (void *)systrace_sysent32[sysnum].stsy_underlying);
538 #endif
539 }
540
541 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
542 systrace_sysent[sysnum].stsy_entry = DTRACE_IDNONE;
543 #ifdef _SYSCALL32_IMPL
544 systrace_sysent32[sysnum].stsy_entry = DTRACE_IDNONE;
545 #endif
546 } else {
547 systrace_sysent[sysnum].stsy_return = DTRACE_IDNONE;
548 #ifdef _SYSCALL32_IMPL
549 systrace_sysent32[sysnum].stsy_return = DTRACE_IDNONE;
550 #endif
551 }
552 }
553
554 static dtrace_pattr_t systrace_attr = {
555 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
556 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
557 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
558 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
559 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
560 };
561
562 static dtrace_pops_t systrace_pops = {
563 systrace_provide,
564 NULL,
565 systrace_enable,
566 systrace_disable,
567 NULL,
568 NULL,
569 NULL,
570 systrace_getarg,
571 NULL,
572 systrace_destroy
573 };
574
575 static int
576 systrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
577 {
578 switch (cmd) {
579 case DDI_ATTACH:
580 break;
581 case DDI_RESUME:
582 return (DDI_SUCCESS);
583 default:
584 return (DDI_FAILURE);
585 }
586
587 #if !defined(__APPLE__)
588 systrace_probe = (void (*)())dtrace_probe;
589 membar_enter();
590
591 if (ddi_create_minor_node(devi, "systrace", S_IFCHR, 0,
592 DDI_PSEUDO, NULL) == DDI_FAILURE ||
593 dtrace_register("syscall", &systrace_attr, DTRACE_PRIV_USER, NULL,
594 &systrace_pops, NULL, &systrace_id) != 0) {
595 systrace_probe = systrace_stub;
596 ddi_remove_minor_node(devi, NULL);
597 return (DDI_FAILURE);
598 }
599 #else
600 systrace_probe = (void(*))&dtrace_probe;
601 membar_enter();
602
603 if (ddi_create_minor_node(devi, "systrace", S_IFCHR, 0,
604 DDI_PSEUDO, 0) == DDI_FAILURE ||
605 dtrace_register("syscall", &systrace_attr, DTRACE_PRIV_USER, NULL,
606 &systrace_pops, NULL, &systrace_id) != 0) {
607 systrace_probe = systrace_stub;
608 ddi_remove_minor_node(devi, NULL);
609 return (DDI_FAILURE);
610 }
611 #endif /* __APPLE__ */
612
613 ddi_report_dev(devi);
614 systrace_devi = devi;
615
616 return (DDI_SUCCESS);
617 }
618
619 #if !defined(__APPLE__)
620 static int
621 systrace_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
622 {
623 switch (cmd) {
624 case DDI_DETACH:
625 break;
626 case DDI_SUSPEND:
627 return (DDI_SUCCESS);
628 default:
629 return (DDI_FAILURE);
630 }
631
632 if (dtrace_unregister(systrace_id) != 0)
633 return (DDI_FAILURE);
634
635 ddi_remove_minor_node(devi, NULL);
636 systrace_probe = systrace_stub;
637 return (DDI_SUCCESS);
638 }
639
640 /*ARGSUSED*/
641 static int
642 systrace_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
643 {
644 int error;
645
646 switch (infocmd) {
647 case DDI_INFO_DEVT2DEVINFO:
648 *result = (void *)systrace_devi;
649 error = DDI_SUCCESS;
650 break;
651 case DDI_INFO_DEVT2INSTANCE:
652 *result = (void *)0;
653 error = DDI_SUCCESS;
654 break;
655 default:
656 error = DDI_FAILURE;
657 }
658 return (error);
659 }
660
661 /*ARGSUSED*/
662 static int
663 systrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
664 {
665 return (0);
666 }
667
668 static struct cb_ops systrace_cb_ops = {
669 systrace_open, /* open */
670 nodev, /* close */
671 nulldev, /* strategy */
672 nulldev, /* print */
673 nodev, /* dump */
674 nodev, /* read */
675 nodev, /* write */
676 nodev, /* ioctl */
677 nodev, /* devmap */
678 nodev, /* mmap */
679 nodev, /* segmap */
680 nochpoll, /* poll */
681 ddi_prop_op, /* cb_prop_op */
682 0, /* streamtab */
683 D_NEW | D_MP /* Driver compatibility flag */
684 };
685
686 static struct dev_ops systrace_ops = {
687 DEVO_REV, /* devo_rev, */
688 0, /* refcnt */
689 systrace_info, /* get_dev_info */
690 nulldev, /* identify */
691 nulldev, /* probe */
692 systrace_attach, /* attach */
693 systrace_detach, /* detach */
694 nodev, /* reset */
695 &systrace_cb_ops, /* driver operations */
696 NULL, /* bus operations */
697 nodev /* dev power */
698 };
699
700 /*
701 * Module linkage information for the kernel.
702 */
703 static struct modldrv modldrv = {
704 &mod_driverops, /* module type (this is a pseudo driver) */
705 "System Call Tracing", /* name of module */
706 &systrace_ops, /* driver ops */
707 };
708
709 static struct modlinkage modlinkage = {
710 MODREV_1,
711 (void *)&modldrv,
712 NULL
713 };
714
715 int
716 _init(void)
717 {
718 return (mod_install(&modlinkage));
719 }
720
721 int
722 _info(struct modinfo *modinfop)
723 {
724 return (mod_info(&modlinkage, modinfop));
725 }
726
727 int
728 _fini(void)
729 {
730 return (mod_remove(&modlinkage));
731 }
732 #else
733 typedef kern_return_t (*mach_call_t)(void *);
734
735 /* XXX From #include <kern/syscall_sw.h> which may be changed for 64 bit! */
736 typedef void mach_munge_t(const void *, void *);
737
738 typedef struct {
739 int mach_trap_arg_count;
740 kern_return_t (*mach_trap_function)(void *);
741 #if defined(__x86_64__)
742 mach_munge_t *mach_trap_arg_munge32; /* system call arguments for 32-bit */
743 #endif
744 int mach_trap_u32_words;
745 #if MACH_ASSERT
746 const char* mach_trap_name;
747 #endif /* MACH_ASSERT */
748 } mach_trap_t;
749
750 extern const mach_trap_t mach_trap_table[]; /* syscall_sw.h now declares this as const */
751 extern int mach_trap_count;
752
753 extern const char *mach_syscall_name_table[];
754
755 /* XXX From osfmk/i386/bsd_i386.c */
756 struct mach_call_args {
757 syscall_arg_t arg1;
758 syscall_arg_t arg2;
759 syscall_arg_t arg3;
760 syscall_arg_t arg4;
761 syscall_arg_t arg5;
762 syscall_arg_t arg6;
763 syscall_arg_t arg7;
764 syscall_arg_t arg8;
765 syscall_arg_t arg9;
766 };
767
768 #undef NSYSCALL
769 #define NSYSCALL mach_trap_count
770
771 #if ((1 << SYSTRACE_SHIFT) <= NSYSCALL)
772 #error 1 << SYSTRACE_SHIFT must exceed number of Mach traps
773 #endif
774
775 typedef struct machtrace_sysent {
776 dtrace_id_t stsy_entry;
777 dtrace_id_t stsy_return;
778 kern_return_t (*stsy_underlying)(void *);
779 int32_t stsy_return_type;
780 } machtrace_sysent_t;
781
782 static machtrace_sysent_t *machtrace_sysent = NULL;
783
784 void (*machtrace_probe)(dtrace_id_t, uint64_t, uint64_t,
785 uint64_t, uint64_t, uint64_t);
786
787 static uint64_t machtrace_getarg(void *, dtrace_id_t, void *, int, int);
788
789 static dev_info_t *machtrace_devi;
790 static dtrace_provider_id_t machtrace_id;
791
792 static kern_return_t
793 dtrace_machtrace_syscall(struct mach_call_args *args)
794 {
795 int code; /* The mach call number */
796
797 machtrace_sysent_t *sy;
798 dtrace_id_t id;
799 kern_return_t rval;
800 #if 0 /* XXX */
801 proc_t *p;
802 #endif
803 syscall_arg_t *ip = (syscall_arg_t *)args;
804 mach_call_t mach_call;
805
806 #if defined (__x86_64__)
807 {
808 pal_register_cache_state(current_thread(), VALID);
809 x86_saved_state_t *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread());
810
811 if (is_saved_state64(tagged_regs)) {
812 code = saved_state64(tagged_regs)->rax & SYSCALL_NUMBER_MASK;
813 } else {
814 code = -saved_state32(tagged_regs)->eax;
815 }
816 }
817 #else
818 #error Unknown Architecture
819 #endif
820
821 sy = &machtrace_sysent[code];
822
823 if ((id = sy->stsy_entry) != DTRACE_IDNONE) {
824 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
825
826 if (uthread)
827 uthread->t_dtrace_syscall_args = (void *)ip;
828
829 (*machtrace_probe)(id, *ip, *(ip+1), *(ip+2), *(ip+3), *(ip+4));
830
831 if (uthread)
832 uthread->t_dtrace_syscall_args = (void *)0;
833 }
834
835 #if 0 /* XXX */
836 /*
837 * We want to explicitly allow DTrace consumers to stop a process
838 * before it actually executes the meat of the syscall.
839 */
840 p = ttoproc(curthread);
841 mutex_enter(&p->p_lock);
842 if (curthread->t_dtrace_stop && !curthread->t_lwp->lwp_nostop) {
843 curthread->t_dtrace_stop = 0;
844 stop(PR_REQUESTED, 0);
845 }
846 mutex_exit(&p->p_lock);
847 #endif
848
849 mach_call = (mach_call_t)(*sy->stsy_underlying);
850 rval = mach_call(args);
851
852 if ((id = sy->stsy_return) != DTRACE_IDNONE)
853 (*machtrace_probe)(id, (uint64_t)rval, 0, 0, 0, 0);
854
855 return (rval);
856 }
857
858 static void
859 machtrace_init(const mach_trap_t *actual, machtrace_sysent_t **interposed)
860 {
861 machtrace_sysent_t *msysent = *interposed;
862 int i;
863
864 if (msysent == NULL) {
865 *interposed = msysent = kmem_zalloc(sizeof (machtrace_sysent_t) *
866 NSYSCALL, KM_SLEEP);
867 }
868
869 for (i = 0; i < NSYSCALL; i++) {
870 const mach_trap_t *a = &actual[i];
871 machtrace_sysent_t *s = &msysent[i];
872
873 if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a))
874 continue;
875
876 if (a->mach_trap_function == (mach_call_t)(dtrace_machtrace_syscall))
877 continue;
878
879 s->stsy_underlying = a->mach_trap_function;
880 }
881 }
882
883 /*ARGSUSED*/
884 static void
885 machtrace_provide(void *arg, const dtrace_probedesc_t *desc)
886 {
887 #pragma unused(arg) /* __APPLE__ */
888
889 int i;
890
891 if (desc != NULL)
892 return;
893
894 machtrace_init(mach_trap_table, &machtrace_sysent);
895
896 for (i = 0; i < NSYSCALL; i++) {
897
898 if (machtrace_sysent[i].stsy_underlying == NULL)
899 continue;
900
901 if (dtrace_probe_lookup(machtrace_id, NULL,
902 mach_syscall_name_table[i], "entry") != 0)
903 continue;
904
905 (void) dtrace_probe_create(machtrace_id, NULL, mach_syscall_name_table[i],
906 "entry", MACHTRACE_ARTIFICIAL_FRAMES,
907 (void *)((uintptr_t)SYSTRACE_ENTRY(i)));
908 (void) dtrace_probe_create(machtrace_id, NULL, mach_syscall_name_table[i],
909 "return", MACHTRACE_ARTIFICIAL_FRAMES,
910 (void *)((uintptr_t)SYSTRACE_RETURN(i)));
911
912 machtrace_sysent[i].stsy_entry = DTRACE_IDNONE;
913 machtrace_sysent[i].stsy_return = DTRACE_IDNONE;
914 }
915 }
916
917 /*ARGSUSED*/
918 static void
919 machtrace_destroy(void *arg, dtrace_id_t id, void *parg)
920 {
921 #pragma unused(arg,id) /* __APPLE__ */
922 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
923
924 #pragma unused(sysnum) /* __APPLE__ */
925
926 /*
927 * There's nothing to do here but assert that we have actually been
928 * disabled.
929 */
930 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
931 ASSERT(machtrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE);
932 } else {
933 ASSERT(machtrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
934 }
935 }
936
937 /*ARGSUSED*/
938 static int
939 machtrace_enable(void *arg, dtrace_id_t id, void *parg)
940 {
941 #pragma unused(arg) /* __APPLE__ */
942
943 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
944 int enabled = (machtrace_sysent[sysnum].stsy_entry != DTRACE_IDNONE ||
945 machtrace_sysent[sysnum].stsy_return != DTRACE_IDNONE);
946
947 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
948 machtrace_sysent[sysnum].stsy_entry = id;
949 } else {
950 machtrace_sysent[sysnum].stsy_return = id;
951 }
952
953 if (enabled) {
954 ASSERT(mach_trap_table[sysnum].mach_trap_function == (void *)dtrace_machtrace_syscall);
955 return(0);
956 }
957
958 lck_mtx_lock(&dtrace_systrace_lock);
959
960 if (mach_trap_table[sysnum].mach_trap_function == machtrace_sysent[sysnum].stsy_underlying) {
961 vm_offset_t dss = (vm_offset_t)&dtrace_machtrace_syscall;
962 ml_nofault_copy((vm_offset_t)&dss, (vm_offset_t)&mach_trap_table[sysnum].mach_trap_function, sizeof(vm_offset_t));
963 }
964
965 lck_mtx_unlock(&dtrace_systrace_lock);
966
967 return(0);
968 }
969
970 /*ARGSUSED*/
971 static void
972 machtrace_disable(void *arg, dtrace_id_t id, void *parg)
973 {
974 #pragma unused(arg,id) /* __APPLE__ */
975
976 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
977 int disable = (machtrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE ||
978 machtrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
979
980 if (disable) {
981
982 lck_mtx_lock(&dtrace_systrace_lock);
983
984 if (mach_trap_table[sysnum].mach_trap_function == (mach_call_t)dtrace_machtrace_syscall) {
985 ml_nofault_copy((vm_offset_t)&machtrace_sysent[sysnum].stsy_underlying, (vm_offset_t)&mach_trap_table[sysnum].mach_trap_function, sizeof(vm_offset_t));
986 }
987 lck_mtx_unlock(&dtrace_systrace_lock);
988 }
989
990 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
991 machtrace_sysent[sysnum].stsy_entry = DTRACE_IDNONE;
992 } else {
993 machtrace_sysent[sysnum].stsy_return = DTRACE_IDNONE;
994 }
995 }
996
997 static dtrace_pattr_t machtrace_attr = {
998 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
999 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
1000 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
1001 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
1002 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
1003 };
1004
1005 static dtrace_pops_t machtrace_pops = {
1006 machtrace_provide,
1007 NULL,
1008 machtrace_enable,
1009 machtrace_disable,
1010 NULL,
1011 NULL,
1012 NULL,
1013 machtrace_getarg,
1014 NULL,
1015 machtrace_destroy
1016 };
1017
1018 static int
1019 machtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
1020 {
1021 switch (cmd) {
1022 case DDI_ATTACH:
1023 break;
1024 case DDI_RESUME:
1025 return (DDI_SUCCESS);
1026 default:
1027 return (DDI_FAILURE);
1028 }
1029
1030 #if !defined(__APPLE__)
1031 machtrace_probe = (void (*)())dtrace_probe;
1032 membar_enter();
1033
1034 if (ddi_create_minor_node(devi, "machtrace", S_IFCHR, 0,
1035 DDI_PSEUDO, NULL) == DDI_FAILURE ||
1036 dtrace_register("mach_trap", &machtrace_attr, DTRACE_PRIV_USER, NULL,
1037 &machtrace_pops, NULL, &machtrace_id) != 0) {
1038 machtrace_probe = systrace_stub;
1039 #else
1040 machtrace_probe = dtrace_probe;
1041 membar_enter();
1042
1043 if (ddi_create_minor_node(devi, "machtrace", S_IFCHR, 0,
1044 DDI_PSEUDO, 0) == DDI_FAILURE ||
1045 dtrace_register("mach_trap", &machtrace_attr, DTRACE_PRIV_USER, NULL,
1046 &machtrace_pops, NULL, &machtrace_id) != 0) {
1047 machtrace_probe = (void (*))&systrace_stub;
1048 #endif /* __APPLE__ */
1049 ddi_remove_minor_node(devi, NULL);
1050 return (DDI_FAILURE);
1051 }
1052
1053 ddi_report_dev(devi);
1054 machtrace_devi = devi;
1055
1056 return (DDI_SUCCESS);
1057 }
1058
1059 d_open_t _systrace_open;
1060
1061 int _systrace_open(dev_t dev, int flags, int devtype, struct proc *p)
1062 {
1063 #pragma unused(dev,flags,devtype,p)
1064 return 0;
1065 }
1066
1067 #define SYSTRACE_MAJOR -24 /* let the kernel pick the device number */
1068
1069 /*
1070 * A struct describing which functions will get invoked for certain
1071 * actions.
1072 */
1073 static struct cdevsw systrace_cdevsw =
1074 {
1075 _systrace_open, /* open */
1076 eno_opcl, /* close */
1077 eno_rdwrt, /* read */
1078 eno_rdwrt, /* write */
1079 eno_ioctl, /* ioctl */
1080 (stop_fcn_t *)nulldev, /* stop */
1081 (reset_fcn_t *)nulldev, /* reset */
1082 NULL, /* tty's */
1083 eno_select, /* select */
1084 eno_mmap, /* mmap */
1085 eno_strat, /* strategy */
1086 eno_getc, /* getc */
1087 eno_putc, /* putc */
1088 0 /* type */
1089 };
1090
1091 static int gSysTraceInited = 0;
1092
1093 void systrace_init( void );
1094
1095 void systrace_init( void )
1096 {
1097 if (0 == gSysTraceInited) {
1098 int majdevno = cdevsw_add(SYSTRACE_MAJOR, &systrace_cdevsw);
1099
1100 if (majdevno < 0) {
1101 printf("systrace_init: failed to allocate a major number!\n");
1102 gSysTraceInited = 0;
1103 return;
1104 }
1105
1106 systrace_attach( (dev_info_t *)(uintptr_t)majdevno, DDI_ATTACH );
1107 machtrace_attach( (dev_info_t *)(uintptr_t)majdevno, DDI_ATTACH );
1108
1109 gSysTraceInited = 1;
1110 } else
1111 panic("systrace_init: called twice!\n");
1112 }
1113 #undef SYSTRACE_MAJOR
1114 #endif /* __APPLE__ */
1115
1116 static uint64_t
1117 systrace_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes)
1118 {
1119 #pragma unused(arg,id,parg,aframes) /* __APPLE__ */
1120 uint64_t val = 0;
1121 syscall_arg_t *stack = (syscall_arg_t *)NULL;
1122
1123 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
1124
1125 if (uthread)
1126 stack = (syscall_arg_t *)uthread->t_dtrace_syscall_args;
1127
1128 if (!stack)
1129 return(0);
1130
1131 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
1132 /* dtrace_probe arguments arg0 .. arg4 are 64bits wide */
1133 val = (uint64_t)*(stack+argno);
1134 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
1135 return (val);
1136 }
1137
1138
1139 static uint64_t
1140 machtrace_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes)
1141 {
1142 #pragma unused(arg,id,parg,aframes) /* __APPLE__ */
1143 uint64_t val = 0;
1144 syscall_arg_t *stack = (syscall_arg_t *)NULL;
1145
1146 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
1147
1148 if (uthread)
1149 stack = (syscall_arg_t *)uthread->t_dtrace_syscall_args;
1150
1151 if (!stack)
1152 return(0);
1153
1154 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
1155 /* dtrace_probe arguments arg0 .. arg4 are 64bits wide */
1156 val = (uint64_t)*(stack+argno);
1157 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
1158 return (val);
1159 }
1160