]> git.saurik.com Git - apple/xnu.git/blob - bsd/dev/dtrace/systrace.c
xnu-1504.15.3.tar.gz
[apple/xnu.git] / bsd / dev / dtrace / systrace.c
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /* #pragma ident "@(#)systrace.c 1.6 06/09/19 SMI" */
27
28 #if !defined(__APPLE__)
29 #include <sys/dtrace.h>
30 #include <sys/systrace.h>
31 #include <sys/stat.h>
32 #include <sys/systm.h>
33 #include <sys/conf.h>
34 #include <sys/ddi.h>
35 #include <sys/sunddi.h>
36 #include <sys/atomic.h>
37 #define SYSTRACE_ARTIFICIAL_FRAMES 1
38 #else
39
40 #ifdef KERNEL
41 #ifndef _KERNEL
42 #define _KERNEL /* Solaris vs. Darwin */
43 #endif
44 #endif
45
46 #define MACH__POSIX_C_SOURCE_PRIVATE 1 /* pulls in suitable savearea from mach/ppc/thread_status.h */
47 #include <kern/thread.h>
48 #include <mach/thread_status.h>
49 /* XXX All of these should really be derived from syscall_sw.h */
50 #if defined(__i386__) || defined (__x86_64__)
51 #define SYSCALL_CLASS_SHIFT 24
52 #define SYSCALL_CLASS_MASK (0xFF << SYSCALL_CLASS_SHIFT)
53 #define SYSCALL_NUMBER_MASK (~SYSCALL_CLASS_MASK)
54 #define I386_SYSCALL_NUMBER_MASK (0xFFFF)
55
56 typedef x86_saved_state_t savearea_t;
57 #endif
58
59 #include <sys/param.h>
60 #include <sys/systm.h>
61 #include <sys/proc.h>
62 #include <sys/errno.h>
63 #include <sys/ioctl.h>
64 #include <sys/conf.h>
65 #include <sys/fcntl.h>
66 #include <miscfs/devfs/devfs.h>
67
68 #include <sys/dtrace.h>
69 #include <sys/dtrace_impl.h>
70 #include "systrace.h"
71 #include <sys/stat.h>
72 #include <sys/systm.h>
73 #include <sys/conf.h>
74 #include <sys/user.h>
75
76 #if defined (__ppc__) || defined (__ppc64__)
77 #define SYSTRACE_ARTIFICIAL_FRAMES 3
78 #define MACHTRACE_ARTIFICIAL_FRAMES 4
79 #elif defined(__i386__) || defined (__x86_64__)
80 #define SYSTRACE_ARTIFICIAL_FRAMES 2
81 #define MACHTRACE_ARTIFICIAL_FRAMES 3
82 #else
83 #error Unknown Architecture
84 #endif
85
86 #include <sys/sysent.h>
87 #define sy_callc sy_call /* Map Solaris slot name to Darwin's */
88 #define NSYSCALL nsysent /* and is less than 500 or so */
89
90 extern const char *syscallnames[];
91
92 #include <sys/dtrace_glue.h>
93 #define casptr dtrace_casptr
94 #define membar_enter dtrace_membar_producer
95
96 #define LOADABLE_SYSCALL(a) 0 /* Not pertinent to Darwin. */
97 #define LOADED_SYSCALL(a) 1 /* Not pertinent to Darwin. */
98
99 systrace_sysent_t *systrace_sysent = NULL;
100 void (*systrace_probe)(dtrace_id_t, uint64_t, uint64_t,
101 uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t);
102
103 void
104 systrace_stub(dtrace_id_t id, uint64_t arg0, uint64_t arg1,
105 uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, uint64_t arg6, uint64_t arg7)
106 {
107 #pragma unused(id,arg0,arg1,arg2,arg3,arg4,arg5,arg6,arg7)
108 }
109
110
111 int32_t
112 dtrace_systrace_syscall(struct proc *pp, void *uap, int *rv)
113 {
114 boolean_t flavor;
115 unsigned short code;
116
117 systrace_sysent_t *sy;
118 dtrace_id_t id;
119 int32_t rval;
120 #if 0 /* XXX */
121 proc_t *p;
122 #endif
123 syscall_arg_t *ip = (syscall_arg_t *)uap;
124
125 #if defined (__ppc__) || defined (__ppc64__)
126 {
127 savearea_t *regs = (savearea_t *)find_user_regs(current_thread());
128
129 flavor = (((unsigned int)regs->save_r0) == 0)? 1: 0;
130
131 if (flavor)
132 code = regs->save_r3;
133 else
134 code = regs->save_r0;
135
136 /*
137 * FIXME: unix_syscall screens for "unsafe calls" and instead calls nosys(), *not* sysent[code] !
138 */
139 }
140 #elif defined(__i386__) || defined (__x86_64__)
141 #pragma unused(flavor)
142 {
143 x86_saved_state_t *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread());
144
145 if (is_saved_state64(tagged_regs)) {
146 x86_saved_state64_t *regs = saved_state64(tagged_regs);
147 code = regs->rax & SYSCALL_NUMBER_MASK;
148 /*
149 * Check for indirect system call... system call number
150 * passed as 'arg0'
151 */
152 if (code == 0) {
153 code = regs->rdi;
154 }
155 } else {
156 code = saved_state32(tagged_regs)->eax & I386_SYSCALL_NUMBER_MASK;
157
158 if (code == 0) {
159 vm_offset_t params = (vm_offset_t) (saved_state32(tagged_regs)->uesp + sizeof (int));
160 code = fuword(params);
161 }
162 }
163 }
164 #else
165 #error Unknown Architecture
166 #endif
167
168 // Bounds "check" the value of code a la unix_syscall
169 sy = (code >= NUM_SYSENT) ? &systrace_sysent[63] : &systrace_sysent[code];
170
171 if ((id = sy->stsy_entry) != DTRACE_IDNONE) {
172 if (ip)
173 (*systrace_probe)(id, *ip, *(ip+1), *(ip+2), *(ip+3), *(ip+4), *(ip+5), *(ip+6), *(ip+7));
174 else
175 (*systrace_probe)(id, 0, 0, 0, 0, 0, 0, 0, 0);
176 }
177
178 #if 0 /* XXX */
179 /*
180 * We want to explicitly allow DTrace consumers to stop a process
181 * before it actually executes the meat of the syscall.
182 */
183 p = ttoproc(curthread);
184 mutex_enter(&p->p_lock);
185 if (curthread->t_dtrace_stop && !curthread->t_lwp->lwp_nostop) {
186 curthread->t_dtrace_stop = 0;
187 stop(PR_REQUESTED, 0);
188 }
189 mutex_exit(&p->p_lock);
190 #endif
191
192 rval = (*sy->stsy_underlying)(pp, uap, rv);
193
194 if ((id = sy->stsy_return) != DTRACE_IDNONE) {
195 uint64_t munged_rv0, munged_rv1;
196 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
197
198 if (uthread)
199 uthread->t_dtrace_errno = rval; /* Establish t_dtrace_errno now in case this enabling refers to it. */
200
201 /*
202 * "Decode" rv for use in the call to dtrace_probe()
203 */
204 if (rval == ERESTART) {
205 munged_rv0 = -1LL; /* System call will be reissued in user mode. Make DTrace report a -1 return. */
206 munged_rv1 = -1LL;
207 } else if (rval != EJUSTRETURN) {
208 if (rval) {
209 munged_rv0 = -1LL; /* Mimic what libc will do. */
210 munged_rv1 = -1LL;
211 } else {
212 switch (sy->stsy_return_type) {
213 case _SYSCALL_RET_INT_T:
214 munged_rv0 = rv[0];
215 munged_rv1 = rv[1];
216 break;
217 case _SYSCALL_RET_UINT_T:
218 munged_rv0 = ((u_int)rv[0]);
219 munged_rv1 = ((u_int)rv[1]);
220 break;
221 case _SYSCALL_RET_OFF_T:
222 case _SYSCALL_RET_UINT64_T:
223 munged_rv0 = *(u_int64_t *)rv;
224 munged_rv1 = 0LL;
225 break;
226 case _SYSCALL_RET_ADDR_T:
227 case _SYSCALL_RET_SIZE_T:
228 case _SYSCALL_RET_SSIZE_T:
229 munged_rv0 = *(user_addr_t *)rv;
230 munged_rv1 = 0LL;
231 break;
232 case _SYSCALL_RET_NONE:
233 munged_rv0 = 0LL;
234 munged_rv1 = 0LL;
235 break;
236 default:
237 munged_rv0 = 0LL;
238 munged_rv1 = 0LL;
239 break;
240 }
241 }
242 } else {
243 munged_rv0 = 0LL;
244 munged_rv1 = 0LL;
245 }
246
247 /*
248 * <http://mail.opensolaris.org/pipermail/dtrace-discuss/2007-January/003276.html> says:
249 *
250 * "This is a bit of an historical artifact. At first, the syscall provider just
251 * had its return value in arg0, and the fbt and pid providers had their return
252 * values in arg1 (so that we could use arg0 for the offset of the return site).
253 *
254 * We inevitably started writing scripts where we wanted to see the return
255 * values from probes in all three providers, and we made this script easier
256 * to write by replicating the syscall return values in arg1 to match fbt and
257 * pid. We debated briefly about removing the return value from arg0, but
258 * decided that it would be less confusing to have the same data in two places
259 * than to have some non-helpful, non-intuitive value in arg0.
260 *
261 * This change was made 4/23/2003 according to the DTrace project's putback log."
262 */
263 (*systrace_probe)(id, munged_rv0, munged_rv0, munged_rv1, (uint64_t)rval, 0, 0, 0, 0);
264 }
265
266 return (rval);
267 }
268
269 void
270 dtrace_systrace_syscall_return(unsigned short code, int rval, int *rv)
271 {
272 systrace_sysent_t *sy;
273 dtrace_id_t id;
274
275 // Bounds "check" the value of code a la unix_syscall_return
276 sy = (code >= NUM_SYSENT) ? &systrace_sysent[63] : &systrace_sysent[code];
277
278 if ((id = sy->stsy_return) != DTRACE_IDNONE) {
279 uint64_t munged_rv0, munged_rv1;
280 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
281
282 if (uthread)
283 uthread->t_dtrace_errno = rval; /* Establish t_dtrace_errno now in case this enabling refers to it. */
284
285 /*
286 * "Decode" rv for use in the call to dtrace_probe()
287 */
288 if (rval == ERESTART) {
289 munged_rv0 = -1LL; /* System call will be reissued in user mode. Make DTrace report a -1 return. */
290 munged_rv1 = -1LL;
291 } else if (rval != EJUSTRETURN) {
292 if (rval) {
293 munged_rv0 = -1LL; /* Mimic what libc will do. */
294 munged_rv1 = -1LL;
295 } else {
296 switch (sy->stsy_return_type) {
297 case _SYSCALL_RET_INT_T:
298 munged_rv0 = rv[0];
299 munged_rv1 = rv[1];
300 break;
301 case _SYSCALL_RET_UINT_T:
302 munged_rv0 = ((u_int)rv[0]);
303 munged_rv1 = ((u_int)rv[1]);
304 break;
305 case _SYSCALL_RET_OFF_T:
306 case _SYSCALL_RET_UINT64_T:
307 munged_rv0 = *(u_int64_t *)rv;
308 munged_rv1 = 0LL;
309 break;
310 case _SYSCALL_RET_ADDR_T:
311 case _SYSCALL_RET_SIZE_T:
312 case _SYSCALL_RET_SSIZE_T:
313 munged_rv0 = *(user_addr_t *)rv;
314 munged_rv1 = 0LL;
315 break;
316 case _SYSCALL_RET_NONE:
317 munged_rv0 = 0LL;
318 munged_rv1 = 0LL;
319 break;
320 default:
321 munged_rv0 = 0LL;
322 munged_rv1 = 0LL;
323 break;
324 }
325 }
326 } else {
327 munged_rv0 = 0LL;
328 munged_rv1 = 0LL;
329 }
330
331 (*systrace_probe)(id, munged_rv0, munged_rv0, munged_rv1, (uint64_t)rval, 0, 0, 0, 0);
332 }
333 }
334 #endif /* __APPLE__ */
335
336 #define SYSTRACE_SHIFT 16
337 #define SYSTRACE_ISENTRY(x) ((int)(x) >> SYSTRACE_SHIFT)
338 #define SYSTRACE_SYSNUM(x) ((int)(x) & ((1 << SYSTRACE_SHIFT) - 1))
339 #define SYSTRACE_ENTRY(id) ((1 << SYSTRACE_SHIFT) | (id))
340 #define SYSTRACE_RETURN(id) (id)
341
342 #if ((1 << SYSTRACE_SHIFT) <= NSYSCALL)
343 #error 1 << SYSTRACE_SHIFT must exceed number of system calls
344 #endif
345
346 static dev_info_t *systrace_devi;
347 static dtrace_provider_id_t systrace_id;
348
349 #if !defined (__APPLE__)
350 static void
351 systrace_init(struct sysent *actual, systrace_sysent_t **interposed)
352 {
353 systrace_sysent_t *sysent = *interposed;
354 int i;
355
356 if (sysent == NULL) {
357 *interposed = sysent = kmem_zalloc(sizeof (systrace_sysent_t) *
358 NSYSCALL, KM_SLEEP);
359 }
360
361 for (i = 0; i < NSYSCALL; i++) {
362 struct sysent *a = &actual[i];
363 systrace_sysent_t *s = &sysent[i];
364
365 if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a))
366 continue;
367
368 if (a->sy_callc == dtrace_systrace_syscall)
369 continue;
370
371 #ifdef _SYSCALL32_IMPL
372 if (a->sy_callc == dtrace_systrace_syscall32)
373 continue;
374 #endif
375
376 s->stsy_underlying = a->sy_callc;
377 }
378 }
379 #else
380 #define systrace_init _systrace_init /* Avoid name clash with Darwin automagic conf symbol */
381 static void
382 systrace_init(struct sysent *actual, systrace_sysent_t **interposed)
383 {
384
385 systrace_sysent_t *ssysent = *interposed; /* Avoid sysent shadow warning
386 from bsd/sys/sysent.h */
387 int i;
388
389 if (ssysent == NULL) {
390 *interposed = ssysent = kmem_zalloc(sizeof (systrace_sysent_t) *
391 NSYSCALL, KM_SLEEP);
392 }
393
394 for (i = 0; i < NSYSCALL; i++) {
395 struct sysent *a = &actual[i];
396 systrace_sysent_t *s = &ssysent[i];
397
398 if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a))
399 continue;
400
401 if (a->sy_callc == dtrace_systrace_syscall)
402 continue;
403
404 #ifdef _SYSCALL32_IMPL
405 if (a->sy_callc == dtrace_systrace_syscall32)
406 continue;
407 #endif
408
409 s->stsy_underlying = a->sy_callc;
410 s->stsy_return_type = a->sy_return_type;
411 }
412 }
413
414 #endif /* __APPLE__ */
415
416 /*ARGSUSED*/
417 static void
418 systrace_provide(void *arg, const dtrace_probedesc_t *desc)
419 {
420 #pragma unused(arg) /* __APPLE__ */
421 int i;
422
423 if (desc != NULL)
424 return;
425
426 systrace_init(sysent, &systrace_sysent);
427 #ifdef _SYSCALL32_IMPL
428 systrace_init(sysent32, &systrace_sysent32);
429 #endif
430
431 for (i = 0; i < NSYSCALL; i++) {
432 if (systrace_sysent[i].stsy_underlying == NULL)
433 continue;
434
435 if (dtrace_probe_lookup(systrace_id, NULL,
436 syscallnames[i], "entry") != 0)
437 continue;
438
439 (void) dtrace_probe_create(systrace_id, NULL, syscallnames[i],
440 "entry", SYSTRACE_ARTIFICIAL_FRAMES,
441 (void *)((uintptr_t)SYSTRACE_ENTRY(i)));
442 (void) dtrace_probe_create(systrace_id, NULL, syscallnames[i],
443 "return", SYSTRACE_ARTIFICIAL_FRAMES,
444 (void *)((uintptr_t)SYSTRACE_RETURN(i)));
445
446 systrace_sysent[i].stsy_entry = DTRACE_IDNONE;
447 systrace_sysent[i].stsy_return = DTRACE_IDNONE;
448 #ifdef _SYSCALL32_IMPL
449 systrace_sysent32[i].stsy_entry = DTRACE_IDNONE;
450 systrace_sysent32[i].stsy_return = DTRACE_IDNONE;
451 #endif
452 }
453 }
454 #if defined(__APPLE__)
455 #undef systrace_init
456 #endif
457
458 /*ARGSUSED*/
459 static void
460 systrace_destroy(void *arg, dtrace_id_t id, void *parg)
461 {
462 #pragma unused(arg,id) /* __APPLE__ */
463
464 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
465
466 #pragma unused(sysnum) /* __APPLE__ */
467 /*
468 * There's nothing to do here but assert that we have actually been
469 * disabled.
470 */
471 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
472 ASSERT(systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE);
473 #ifdef _SYSCALL32_IMPL
474 ASSERT(systrace_sysent32[sysnum].stsy_entry == DTRACE_IDNONE);
475 #endif
476 } else {
477 ASSERT(systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
478 #ifdef _SYSCALL32_IMPL
479 ASSERT(systrace_sysent32[sysnum].stsy_return == DTRACE_IDNONE);
480 #endif
481 }
482 }
483
484 /*ARGSUSED*/
485 static void
486 systrace_enable(void *arg, dtrace_id_t id, void *parg)
487 {
488 #pragma unused(arg) /* __APPLE__ */
489
490 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
491 int enabled = (systrace_sysent[sysnum].stsy_entry != DTRACE_IDNONE ||
492 systrace_sysent[sysnum].stsy_return != DTRACE_IDNONE);
493
494 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
495 systrace_sysent[sysnum].stsy_entry = id;
496 #ifdef _SYSCALL32_IMPL
497 systrace_sysent32[sysnum].stsy_entry = id;
498 #endif
499 } else {
500 systrace_sysent[sysnum].stsy_return = id;
501 #ifdef _SYSCALL32_IMPL
502 systrace_sysent32[sysnum].stsy_return = id;
503 #endif
504 }
505
506 if (enabled) {
507 ASSERT(sysent[sysnum].sy_callc == dtrace_systrace_syscall);
508 return;
509 }
510
511 (void) casptr(&sysent[sysnum].sy_callc,
512 (void *)systrace_sysent[sysnum].stsy_underlying,
513 (void *)dtrace_systrace_syscall);
514 #ifdef _SYSCALL32_IMPL
515 (void) casptr(&sysent32[sysnum].sy_callc,
516 (void *)systrace_sysent32[sysnum].stsy_underlying,
517 (void *)dtrace_systrace_syscall32);
518 #endif
519 }
520
521 /*ARGSUSED*/
522 static void
523 systrace_disable(void *arg, dtrace_id_t id, void *parg)
524 {
525 #pragma unused(arg,id) /* __APPLE__ */
526
527 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
528 int disable = (systrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE ||
529 systrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
530
531 if (disable) {
532 (void) casptr(&sysent[sysnum].sy_callc,
533 (void *)dtrace_systrace_syscall,
534 (void *)systrace_sysent[sysnum].stsy_underlying);
535
536 #ifdef _SYSCALL32_IMPL
537 (void) casptr(&sysent32[sysnum].sy_callc,
538 (void *)dtrace_systrace_syscall32,
539 (void *)systrace_sysent32[sysnum].stsy_underlying);
540 #endif
541 }
542
543 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
544 systrace_sysent[sysnum].stsy_entry = DTRACE_IDNONE;
545 #ifdef _SYSCALL32_IMPL
546 systrace_sysent32[sysnum].stsy_entry = DTRACE_IDNONE;
547 #endif
548 } else {
549 systrace_sysent[sysnum].stsy_return = DTRACE_IDNONE;
550 #ifdef _SYSCALL32_IMPL
551 systrace_sysent32[sysnum].stsy_return = DTRACE_IDNONE;
552 #endif
553 }
554 }
555
556 static dtrace_pattr_t systrace_attr = {
557 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
558 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
559 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
560 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
561 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
562 };
563
564 static dtrace_pops_t systrace_pops = {
565 systrace_provide,
566 NULL,
567 systrace_enable,
568 systrace_disable,
569 NULL,
570 NULL,
571 NULL,
572 NULL,
573 NULL,
574 systrace_destroy
575 };
576
577 static int
578 systrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
579 {
580 switch (cmd) {
581 case DDI_ATTACH:
582 break;
583 case DDI_RESUME:
584 return (DDI_SUCCESS);
585 default:
586 return (DDI_FAILURE);
587 }
588
589 #if !defined(__APPLE__)
590 systrace_probe = (void (*)())dtrace_probe;
591 membar_enter();
592
593 if (ddi_create_minor_node(devi, "systrace", S_IFCHR, 0,
594 DDI_PSEUDO, NULL) == DDI_FAILURE ||
595 dtrace_register("syscall", &systrace_attr, DTRACE_PRIV_USER, NULL,
596 &systrace_pops, NULL, &systrace_id) != 0) {
597 systrace_probe = systrace_stub;
598 ddi_remove_minor_node(devi, NULL);
599 return (DDI_FAILURE);
600 }
601 #else
602 systrace_probe = (void(*))&dtrace_probe;
603 membar_enter();
604
605 if (ddi_create_minor_node(devi, "systrace", S_IFCHR, 0,
606 DDI_PSEUDO, 0) == DDI_FAILURE ||
607 dtrace_register("syscall", &systrace_attr, DTRACE_PRIV_USER, NULL,
608 &systrace_pops, NULL, &systrace_id) != 0) {
609 systrace_probe = systrace_stub;
610 ddi_remove_minor_node(devi, NULL);
611 return (DDI_FAILURE);
612 }
613 #endif /* __APPLE__ */
614
615 ddi_report_dev(devi);
616 systrace_devi = devi;
617
618 return (DDI_SUCCESS);
619 }
620
621 #if !defined(__APPLE__)
622 static int
623 systrace_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
624 {
625 switch (cmd) {
626 case DDI_DETACH:
627 break;
628 case DDI_SUSPEND:
629 return (DDI_SUCCESS);
630 default:
631 return (DDI_FAILURE);
632 }
633
634 if (dtrace_unregister(systrace_id) != 0)
635 return (DDI_FAILURE);
636
637 ddi_remove_minor_node(devi, NULL);
638 systrace_probe = systrace_stub;
639 return (DDI_SUCCESS);
640 }
641
642 /*ARGSUSED*/
643 static int
644 systrace_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
645 {
646 int error;
647
648 switch (infocmd) {
649 case DDI_INFO_DEVT2DEVINFO:
650 *result = (void *)systrace_devi;
651 error = DDI_SUCCESS;
652 break;
653 case DDI_INFO_DEVT2INSTANCE:
654 *result = (void *)0;
655 error = DDI_SUCCESS;
656 break;
657 default:
658 error = DDI_FAILURE;
659 }
660 return (error);
661 }
662
663 /*ARGSUSED*/
664 static int
665 systrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
666 {
667 return (0);
668 }
669
670 static struct cb_ops systrace_cb_ops = {
671 systrace_open, /* open */
672 nodev, /* close */
673 nulldev, /* strategy */
674 nulldev, /* print */
675 nodev, /* dump */
676 nodev, /* read */
677 nodev, /* write */
678 nodev, /* ioctl */
679 nodev, /* devmap */
680 nodev, /* mmap */
681 nodev, /* segmap */
682 nochpoll, /* poll */
683 ddi_prop_op, /* cb_prop_op */
684 0, /* streamtab */
685 D_NEW | D_MP /* Driver compatibility flag */
686 };
687
688 static struct dev_ops systrace_ops = {
689 DEVO_REV, /* devo_rev, */
690 0, /* refcnt */
691 systrace_info, /* get_dev_info */
692 nulldev, /* identify */
693 nulldev, /* probe */
694 systrace_attach, /* attach */
695 systrace_detach, /* detach */
696 nodev, /* reset */
697 &systrace_cb_ops, /* driver operations */
698 NULL, /* bus operations */
699 nodev /* dev power */
700 };
701
702 /*
703 * Module linkage information for the kernel.
704 */
705 static struct modldrv modldrv = {
706 &mod_driverops, /* module type (this is a pseudo driver) */
707 "System Call Tracing", /* name of module */
708 &systrace_ops, /* driver ops */
709 };
710
711 static struct modlinkage modlinkage = {
712 MODREV_1,
713 (void *)&modldrv,
714 NULL
715 };
716
717 int
718 _init(void)
719 {
720 return (mod_install(&modlinkage));
721 }
722
723 int
724 _info(struct modinfo *modinfop)
725 {
726 return (mod_info(&modlinkage, modinfop));
727 }
728
729 int
730 _fini(void)
731 {
732 return (mod_remove(&modlinkage));
733 }
734 #else
735 typedef kern_return_t (*mach_call_t)(void *);
736
737 /* XXX From #include <kern/syscall_sw.h> which may be changed for 64 bit! */
738 typedef void mach_munge_t(const void *, void *);
739
740 typedef struct {
741 int mach_trap_arg_count;
742 int (*mach_trap_function)(void);
743 #if defined(__i386__)
744 boolean_t mach_trap_stack;
745 #else
746 mach_munge_t *mach_trap_arg_munge32; /* system call arguments for 32-bit */
747 mach_munge_t *mach_trap_arg_munge64; /* system call arguments for 64-bit */
748 #endif
749 #if !MACH_ASSERT
750 int mach_trap_unused;
751 #else
752 const char* mach_trap_name;
753 #endif /* !MACH_ASSERT */
754 } mach_trap_t;
755
756 extern mach_trap_t mach_trap_table[];
757 extern int mach_trap_count;
758
759 extern const char *mach_syscall_name_table[];
760
761 /* XXX From osfmk/i386/bsd_i386.c */
762 struct mach_call_args {
763 syscall_arg_t arg1;
764 syscall_arg_t arg2;
765 syscall_arg_t arg3;
766 syscall_arg_t arg4;
767 syscall_arg_t arg5;
768 syscall_arg_t arg6;
769 syscall_arg_t arg7;
770 syscall_arg_t arg8;
771 syscall_arg_t arg9;
772 };
773
774 #undef NSYSCALL
775 #define NSYSCALL mach_trap_count
776
777 #if ((1 << SYSTRACE_SHIFT) <= NSYSCALL)
778 #error 1 << SYSTRACE_SHIFT must exceed number of Mach traps
779 #endif
780
781 typedef systrace_sysent_t machtrace_sysent_t;
782
783 static machtrace_sysent_t *machtrace_sysent = NULL;
784
785 void (*machtrace_probe)(dtrace_id_t, uint64_t, uint64_t,
786 uint64_t, uint64_t, uint64_t);
787
788 static dev_info_t *machtrace_devi;
789 static dtrace_provider_id_t machtrace_id;
790
791 static kern_return_t
792 dtrace_machtrace_syscall(struct mach_call_args *args)
793 {
794 boolean_t flavor;
795 unsigned short code;
796
797 machtrace_sysent_t *sy;
798 dtrace_id_t id;
799 kern_return_t rval;
800 #if 0 /* XXX */
801 proc_t *p;
802 #endif
803 syscall_arg_t *ip = (syscall_arg_t *)args;
804 mach_call_t mach_call;
805
806 #if defined (__ppc__) || defined (__ppc64__)
807 {
808 savearea_t *regs = (savearea_t *)find_user_regs(current_thread());
809
810 flavor = (((unsigned int)regs->save_r0) == 0)? 1: 0;
811
812 if (flavor)
813 code = -regs->save_r3;
814 else
815 code = -regs->save_r0;
816 }
817 #elif defined(__i386__) || defined (__x86_64__)
818 #pragma unused(flavor)
819 {
820 x86_saved_state_t *tagged_regs = (x86_saved_state_t *)find_user_regs(current_thread());
821
822 if (is_saved_state64(tagged_regs)) {
823 code = saved_state64(tagged_regs)->rax & SYSCALL_NUMBER_MASK;
824 } else {
825 code = -saved_state32(tagged_regs)->eax;
826 }
827 }
828 #else
829 #error Unknown Architecture
830 #endif
831
832 sy = &machtrace_sysent[code];
833
834 if ((id = sy->stsy_entry) != DTRACE_IDNONE)
835 (*machtrace_probe)(id, *ip, *(ip+1), *(ip+2), *(ip+3), *(ip+4));
836
837 #if 0 /* XXX */
838 /*
839 * We want to explicitly allow DTrace consumers to stop a process
840 * before it actually executes the meat of the syscall.
841 */
842 p = ttoproc(curthread);
843 mutex_enter(&p->p_lock);
844 if (curthread->t_dtrace_stop && !curthread->t_lwp->lwp_nostop) {
845 curthread->t_dtrace_stop = 0;
846 stop(PR_REQUESTED, 0);
847 }
848 mutex_exit(&p->p_lock);
849 #endif
850
851 mach_call = (mach_call_t)(*sy->stsy_underlying);
852 rval = mach_call(args);
853
854 if ((id = sy->stsy_return) != DTRACE_IDNONE)
855 (*machtrace_probe)(id, (uint64_t)rval, 0, 0, 0, 0);
856
857 return (rval);
858 }
859
860 static void
861 machtrace_init(mach_trap_t *actual, machtrace_sysent_t **interposed)
862 {
863 machtrace_sysent_t *msysent = *interposed;
864 int i;
865
866 if (msysent == NULL) {
867 *interposed = msysent = kmem_zalloc(sizeof (machtrace_sysent_t) *
868 NSYSCALL, KM_SLEEP);
869 }
870
871 for (i = 0; i < NSYSCALL; i++) {
872 mach_trap_t *a = &actual[i];
873 machtrace_sysent_t *s = &msysent[i];
874
875 if (LOADABLE_SYSCALL(a) && !LOADED_SYSCALL(a))
876 continue;
877
878 if ((mach_call_t)(a->mach_trap_function) == (mach_call_t)(dtrace_machtrace_syscall))
879 continue;
880
881 s->stsy_underlying = (sy_call_t *)a->mach_trap_function;
882 }
883 }
884
885 /*ARGSUSED*/
886 static void
887 machtrace_provide(void *arg, const dtrace_probedesc_t *desc)
888 {
889 #pragma unused(arg) /* __APPLE__ */
890
891 int i;
892
893 if (desc != NULL)
894 return;
895
896 machtrace_init(mach_trap_table, &machtrace_sysent);
897
898 for (i = 0; i < NSYSCALL; i++) {
899
900 if (machtrace_sysent[i].stsy_underlying == NULL)
901 continue;
902
903 if (dtrace_probe_lookup(machtrace_id, NULL,
904 mach_syscall_name_table[i], "entry") != 0)
905 continue;
906
907 (void) dtrace_probe_create(machtrace_id, NULL, mach_syscall_name_table[i],
908 "entry", MACHTRACE_ARTIFICIAL_FRAMES,
909 (void *)((uintptr_t)SYSTRACE_ENTRY(i)));
910 (void) dtrace_probe_create(machtrace_id, NULL, mach_syscall_name_table[i],
911 "return", MACHTRACE_ARTIFICIAL_FRAMES,
912 (void *)((uintptr_t)SYSTRACE_RETURN(i)));
913
914 machtrace_sysent[i].stsy_entry = DTRACE_IDNONE;
915 machtrace_sysent[i].stsy_return = DTRACE_IDNONE;
916 }
917 }
918
919 /*ARGSUSED*/
920 static void
921 machtrace_destroy(void *arg, dtrace_id_t id, void *parg)
922 {
923 #pragma unused(arg,id) /* __APPLE__ */
924 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
925
926 #pragma unused(sysnum) /* __APPLE__ */
927
928 /*
929 * There's nothing to do here but assert that we have actually been
930 * disabled.
931 */
932 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
933 ASSERT(machtrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE);
934 } else {
935 ASSERT(machtrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
936 }
937 }
938
939 /*ARGSUSED*/
940 static void
941 machtrace_enable(void *arg, dtrace_id_t id, void *parg)
942 {
943 #pragma unused(arg) /* __APPLE__ */
944
945 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
946 int enabled = (machtrace_sysent[sysnum].stsy_entry != DTRACE_IDNONE ||
947 machtrace_sysent[sysnum].stsy_return != DTRACE_IDNONE);
948
949 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
950 machtrace_sysent[sysnum].stsy_entry = id;
951 } else {
952 machtrace_sysent[sysnum].stsy_return = id;
953 }
954
955 if (enabled) {
956 ASSERT(sysent[sysnum].sy_callc == (void *)dtrace_machtrace_syscall);
957 return;
958 }
959
960 (void) casptr(&mach_trap_table[sysnum].mach_trap_function,
961 (void *)machtrace_sysent[sysnum].stsy_underlying,
962 (void *)dtrace_machtrace_syscall);
963 }
964
965 /*ARGSUSED*/
966 static void
967 machtrace_disable(void *arg, dtrace_id_t id, void *parg)
968 {
969 #pragma unused(arg,id) /* __APPLE__ */
970
971 int sysnum = SYSTRACE_SYSNUM((uintptr_t)parg);
972 int disable = (machtrace_sysent[sysnum].stsy_entry == DTRACE_IDNONE ||
973 machtrace_sysent[sysnum].stsy_return == DTRACE_IDNONE);
974
975 if (disable) {
976 (void) casptr(&mach_trap_table[sysnum].mach_trap_function,
977 (void *)dtrace_machtrace_syscall,
978 (void *)machtrace_sysent[sysnum].stsy_underlying);
979
980 }
981
982 if (SYSTRACE_ISENTRY((uintptr_t)parg)) {
983 machtrace_sysent[sysnum].stsy_entry = DTRACE_IDNONE;
984 } else {
985 machtrace_sysent[sysnum].stsy_return = DTRACE_IDNONE;
986 }
987 }
988
989 static dtrace_pattr_t machtrace_attr = {
990 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
991 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
992 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
993 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_COMMON },
994 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_ISA },
995 };
996
997 static dtrace_pops_t machtrace_pops = {
998 machtrace_provide,
999 NULL,
1000 machtrace_enable,
1001 machtrace_disable,
1002 NULL,
1003 NULL,
1004 NULL,
1005 NULL,
1006 NULL,
1007 machtrace_destroy
1008 };
1009
1010 static int
1011 machtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
1012 {
1013 switch (cmd) {
1014 case DDI_ATTACH:
1015 break;
1016 case DDI_RESUME:
1017 return (DDI_SUCCESS);
1018 default:
1019 return (DDI_FAILURE);
1020 }
1021
1022 #if !defined(__APPLE__)
1023 machtrace_probe = (void (*)())dtrace_probe;
1024 membar_enter();
1025
1026 if (ddi_create_minor_node(devi, "machtrace", S_IFCHR, 0,
1027 DDI_PSEUDO, NULL) == DDI_FAILURE ||
1028 dtrace_register("mach_trap", &machtrace_attr, DTRACE_PRIV_USER, NULL,
1029 &machtrace_pops, NULL, &machtrace_id) != 0) {
1030 machtrace_probe = systrace_stub;
1031 #else
1032 machtrace_probe = dtrace_probe;
1033 membar_enter();
1034
1035 if (ddi_create_minor_node(devi, "machtrace", S_IFCHR, 0,
1036 DDI_PSEUDO, 0) == DDI_FAILURE ||
1037 dtrace_register("mach_trap", &machtrace_attr, DTRACE_PRIV_USER, NULL,
1038 &machtrace_pops, NULL, &machtrace_id) != 0) {
1039 machtrace_probe = (void (*))&systrace_stub;
1040 #endif /* __APPLE__ */
1041 ddi_remove_minor_node(devi, NULL);
1042 return (DDI_FAILURE);
1043 }
1044
1045 ddi_report_dev(devi);
1046 machtrace_devi = devi;
1047
1048 return (DDI_SUCCESS);
1049 }
1050
1051 d_open_t _systrace_open;
1052
1053 int _systrace_open(dev_t dev, int flags, int devtype, struct proc *p)
1054 {
1055 #pragma unused(dev,flags,devtype,p)
1056 return 0;
1057 }
1058
1059 #define SYSTRACE_MAJOR -24 /* let the kernel pick the device number */
1060
1061 /*
1062 * A struct describing which functions will get invoked for certain
1063 * actions.
1064 */
1065 static struct cdevsw systrace_cdevsw =
1066 {
1067 _systrace_open, /* open */
1068 eno_opcl, /* close */
1069 eno_rdwrt, /* read */
1070 eno_rdwrt, /* write */
1071 eno_ioctl, /* ioctl */
1072 (stop_fcn_t *)nulldev, /* stop */
1073 (reset_fcn_t *)nulldev, /* reset */
1074 NULL, /* tty's */
1075 eno_select, /* select */
1076 eno_mmap, /* mmap */
1077 eno_strat, /* strategy */
1078 eno_getc, /* getc */
1079 eno_putc, /* putc */
1080 0 /* type */
1081 };
1082
1083 static int gSysTraceInited = 0;
1084
1085 void systrace_init( void );
1086
1087 void systrace_init( void )
1088 {
1089 if (0 == gSysTraceInited) {
1090 int majdevno = cdevsw_add(SYSTRACE_MAJOR, &systrace_cdevsw);
1091
1092 if (majdevno < 0) {
1093 printf("systrace_init: failed to allocate a major number!\n");
1094 gSysTraceInited = 0;
1095 return;
1096 }
1097
1098 systrace_attach( (dev_info_t *)(uintptr_t)majdevno, DDI_ATTACH );
1099 machtrace_attach( (dev_info_t *)(uintptr_t)majdevno, DDI_ATTACH );
1100
1101 gSysTraceInited = 1;
1102 } else
1103 panic("systrace_init: called twice!\n");
1104 }
1105 #undef SYSTRACE_MAJOR
1106 #endif /* __APPLE__ */