2 * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 #include <kern/task.h>
29 #include <kern/thread.h>
30 #include <kern/assert.h>
31 #include <kern/clock.h>
32 #include <kern/locks.h>
33 #include <kern/sched_prim.h>
34 #include <kern/debug.h>
35 #include <mach/machine/thread_status.h>
36 #include <mach/thread_act.h>
37 #include <mach/branch_predicates.h>
39 #include <sys/kernel.h>
41 #include <sys/proc_internal.h>
42 #include <sys/syscall.h>
43 #include <sys/systm.h>
45 #include <sys/errno.h>
46 #include <sys/kdebug.h>
47 #include <sys/sysent.h>
48 #include <sys/sysproto.h>
49 #include <sys/kauth.h>
50 #include <sys/systm.h>
52 #include <security/audit/audit.h>
55 #include <i386/machine_routines.h>
56 #include <mach/i386/syscall_sw.h>
58 #include <machine/pal_routines.h>
61 extern int32_t dtrace_systrace_syscall(struct proc
*, void *, int *);
62 extern void dtrace_systrace_syscall_return(unsigned short, int, int *);
65 extern void unix_syscall(x86_saved_state_t
*);
66 extern void unix_syscall64(x86_saved_state_t
*);
67 extern void *find_user_regs(thread_t
);
69 extern void x86_toggle_sysenter_arg_store(thread_t thread
, boolean_t valid
);
70 extern boolean_t
x86_sysenter_arg_store_isvalid(thread_t thread
);
72 /* dynamically generated at build time based on syscalls.master */
73 extern const char *syscallnames
[];
76 * This needs to be a single switch so that it's "all on" or "all off",
77 * rather than being turned on for some code paths and not others, as this
78 * has a tendency to introduce "blame the next guy" bugs.
81 #define FUNNEL_DEBUG 1 /* Check for funnel held on exit */
85 * Function: unix_syscall
87 * Inputs: regs - pointer to i386 save area
92 unix_syscall(x86_saved_state_t
*state
)
102 struct uthread
*uthread
;
103 x86_saved_state32_t
*regs
;
104 boolean_t args_in_uthread
;
107 assert(is_saved_state32(state
));
108 regs
= saved_state32(state
);
110 if (regs
->eax
== 0x800)
111 thread_exception_return();
113 thread
= current_thread();
114 uthread
= get_bsdthread_info(thread
);
116 /* Get the approriate proc; may be different from task's for vfork() */
117 is_vfork
= uthread
->uu_flag
& UT_VFORK
;
118 if (__improbable(is_vfork
!= 0))
121 p
= (struct proc
*)get_bsdtask_info(current_task());
123 /* Verify that we are not being called from a task without a proc */
124 if (__improbable(p
== NULL
)) {
127 task_terminate_internal(current_task());
128 thread_exception_return();
132 code
= regs
->eax
& I386_SYSCALL_NUMBER_MASK
;
133 DEBUG_KPRINT_SYSCALL_UNIX("unix_syscall: code=%d(%s) eip=%u\n",
134 code
, syscallnames
[code
>= NUM_SYSENT
? 63 : code
], (uint32_t)regs
->eip
);
135 args_in_uthread
= ((regs
->eax
& I386_SYSCALL_ARG_BYTES_MASK
) != 0) && x86_sysenter_arg_store_isvalid(thread
);
136 params
= (vm_offset_t
) (regs
->uesp
+ sizeof (int));
138 regs
->efl
&= ~(EFL_CF
);
140 callp
= (code
>= NUM_SYSENT
) ? &sysent
[63] : &sysent
[code
];
142 if (__improbable(callp
== sysent
)) {
143 code
= fuword(params
);
144 params
+= sizeof(int);
145 callp
= (code
>= NUM_SYSENT
) ? &sysent
[63] : &sysent
[code
];
148 vt
= (void *)uthread
->uu_arg
;
150 if (callp
->sy_arg_bytes
!= 0) {
153 assert((unsigned) callp
->sy_arg_bytes
<= sizeof (uthread
->uu_arg
));
154 if (!args_in_uthread
)
157 nargs
= callp
->sy_arg_bytes
;
158 error
= copyin((user_addr_t
) params
, (char *) vt
, nargs
);
162 thread_exception_return();
167 if (__probable(code
!= 180)) {
170 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_START
,
171 *ip
, *(ip
+1), *(ip
+2), *(ip
+3), 0);
173 mungerp
= callp
->sy_arg_munge32
;
176 * If non-NULL, then call the syscall argument munger to
177 * copy in arguments (see xnu/bsd/dev/{i386|x86_64}/munge.s); the
178 * first argument is NULL because we are munging in place
179 * after a copyin because the ABI currently doesn't use
180 * registers to pass system call arguments.
183 (*mungerp
)(NULL
, vt
);
185 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_START
,
189 * Delayed binding of thread credential to process credential, if we
190 * are not running with an explicitly set thread credential.
192 kauth_cred_uthread_update(uthread
, p
);
194 uthread
->uu_rval
[0] = 0;
195 uthread
->uu_rval
[1] = regs
->edx
;
196 uthread
->uu_flag
|= UT_NOTCANCELPT
;
200 uthread
->uu_iocount
= 0;
201 uthread
->uu_vpindex
= 0;
204 AUDIT_SYSCALL_ENTER(code
, p
, uthread
);
205 error
= (*(callp
->sy_call
))((void *) p
, (void *) vt
, &(uthread
->uu_rval
[0]));
206 AUDIT_SYSCALL_EXIT(code
, p
, uthread
, error
);
209 if (uthread
->uu_iocount
)
210 printf("system call returned with uu_iocount != 0\n");
213 uthread
->t_dtrace_errno
= error
;
214 #endif /* CONFIG_DTRACE */
216 if (__improbable(error
== ERESTART
)) {
218 * Move the user's pc back to repeat the syscall:
219 * 5 bytes for a sysenter, or 2 for an int 8x.
220 * The SYSENTER_TF_CS covers single-stepping over a sysenter
221 * - see debug trap handler in idt.s/idt64.s
224 pal_syscall_restart(thread
, state
);
226 else if (error
!= EJUSTRETURN
) {
227 if (__improbable(error
)) {
229 regs
->efl
|= EFL_CF
; /* carry bit */
230 } else { /* (not error) */
231 regs
->eax
= uthread
->uu_rval
[0];
232 regs
->edx
= uthread
->uu_rval
[1];
236 DEBUG_KPRINT_SYSCALL_UNIX(
237 "unix_syscall: error=%d retval=(%u,%u)\n",
238 error
, regs
->eax
, regs
->edx
);
240 uthread
->uu_flag
&= ~UT_NOTCANCELPT
;
243 * if we're holding the funnel panic
245 syscall_exit_funnelcheck();
246 #endif /* FUNNEL_DEBUG */
248 if (__improbable(uthread
->uu_lowpri_window
)) {
250 * task is marked as a low priority I/O type
251 * and the I/O we issued while in this system call
252 * collided with normal I/O operations... we'll
253 * delay in order to mitigate the impact of this
254 * task on the normal operation of the system
256 throttle_lowpri_io(TRUE
);
258 if (__probable(code
!= 180))
259 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_END
,
260 error
, uthread
->uu_rval
[0], uthread
->uu_rval
[1], p
->p_pid
, 0);
262 if (__improbable(!is_vfork
&& callp
->sy_call
== (sy_call_t
*)execve
&& !error
)) {
263 pal_execve_return(thread
);
266 thread_exception_return();
272 unix_syscall64(x86_saved_state_t
*state
)
276 struct sysent
*callp
;
281 struct uthread
*uthread
;
282 x86_saved_state64_t
*regs
;
284 assert(is_saved_state64(state
));
285 regs
= saved_state64(state
);
287 if (regs
->rax
== 0x2000800)
288 thread_exception_return();
290 thread
= current_thread();
291 uthread
= get_bsdthread_info(thread
);
293 /* Get the approriate proc; may be different from task's for vfork() */
294 if (__probable(!(uthread
->uu_flag
& UT_VFORK
)))
295 p
= (struct proc
*)get_bsdtask_info(current_task());
299 /* Verify that we are not being called from a task without a proc */
300 if (__improbable(p
== NULL
)) {
302 regs
->isf
.rflags
|= EFL_CF
;
303 task_terminate_internal(current_task());
304 thread_exception_return();
309 code
= regs
->rax
& SYSCALL_NUMBER_MASK
;
310 DEBUG_KPRINT_SYSCALL_UNIX(
311 "unix_syscall64: code=%d(%s) rip=%llx\n",
312 code
, syscallnames
[code
>= NUM_SYSENT
? 63 : code
], regs
->isf
.rip
);
313 callp
= (code
>= NUM_SYSENT
) ? &sysent
[63] : &sysent
[code
];
314 uargp
= (void *)(®s
->rdi
);
316 if (__improbable(callp
== sysent
)) {
318 * indirect system call... system call number
322 callp
= (code
>= NUM_SYSENT
) ? &sysent
[63] : &sysent
[code
];
323 uargp
= (void *)(®s
->rsi
);
327 if (callp
->sy_narg
!= 0) {
329 uint64_t *ip
= (uint64_t *)uargp
;
331 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_START
,
332 (int)(*ip
), (int)(*(ip
+1)), (int)(*(ip
+2)), (int)(*(ip
+3)), 0);
334 assert(callp
->sy_narg
<= 8);
336 if (__improbable(callp
->sy_narg
> args_in_regs
)) {
339 copyin_count
= (callp
->sy_narg
- args_in_regs
) * sizeof(uint64_t);
341 error
= copyin((user_addr_t
)(regs
->isf
.rsp
+ sizeof(user_addr_t
)), (char *)®s
->v_arg6
, copyin_count
);
344 regs
->isf
.rflags
|= EFL_CF
;
345 thread_exception_return();
350 * XXX Turn 64 bit unsafe calls into nosys()
352 if (__improbable(callp
->sy_flags
& UNSAFE_64BIT
)) {
357 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_START
,
362 * Delayed binding of thread credential to process credential, if we
363 * are not running with an explicitly set thread credential.
365 kauth_cred_uthread_update(uthread
, p
);
367 uthread
->uu_rval
[0] = 0;
368 uthread
->uu_rval
[1] = 0;
371 uthread
->uu_flag
|= UT_NOTCANCELPT
;
374 uthread
->uu_iocount
= 0;
375 uthread
->uu_vpindex
= 0;
378 AUDIT_SYSCALL_ENTER(code
, p
, uthread
);
379 error
= (*(callp
->sy_call
))((void *) p
, uargp
, &(uthread
->uu_rval
[0]));
380 AUDIT_SYSCALL_EXIT(code
, p
, uthread
, error
);
383 if (uthread
->uu_iocount
)
384 printf("system call returned with uu_iocount != 0\n");
388 uthread
->t_dtrace_errno
= error
;
389 #endif /* CONFIG_DTRACE */
391 if (__improbable(error
== ERESTART
)) {
393 * all system calls come through via the syscall instruction
394 * in 64 bit mode... its 2 bytes in length
395 * move the user's pc back to repeat the syscall:
397 pal_syscall_restart( thread
, state
);
399 else if (error
!= EJUSTRETURN
) {
400 if (__improbable(error
)) {
402 regs
->isf
.rflags
|= EFL_CF
; /* carry bit */
403 } else { /* (not error) */
405 switch (callp
->sy_return_type
) {
406 case _SYSCALL_RET_INT_T
:
407 regs
->rax
= uthread
->uu_rval
[0];
408 regs
->rdx
= uthread
->uu_rval
[1];
410 case _SYSCALL_RET_UINT_T
:
411 regs
->rax
= ((u_int
)uthread
->uu_rval
[0]);
412 regs
->rdx
= ((u_int
)uthread
->uu_rval
[1]);
414 case _SYSCALL_RET_OFF_T
:
415 case _SYSCALL_RET_ADDR_T
:
416 case _SYSCALL_RET_SIZE_T
:
417 case _SYSCALL_RET_SSIZE_T
:
418 case _SYSCALL_RET_UINT64_T
:
419 regs
->rax
= *((uint64_t *)(&uthread
->uu_rval
[0]));
422 case _SYSCALL_RET_NONE
:
425 panic("unix_syscall: unknown return type");
428 regs
->isf
.rflags
&= ~EFL_CF
;
432 DEBUG_KPRINT_SYSCALL_UNIX(
433 "unix_syscall64: error=%d retval=(%llu,%llu)\n",
434 error
, regs
->rax
, regs
->rdx
);
436 uthread
->uu_flag
&= ~UT_NOTCANCELPT
;
440 * if we're holding the funnel panic
442 syscall_exit_funnelcheck();
443 #endif /* FUNNEL_DEBUG */
445 if (__improbable(uthread
->uu_lowpri_window
)) {
447 * task is marked as a low priority I/O type
448 * and the I/O we issued while in this system call
449 * collided with normal I/O operations... we'll
450 * delay in order to mitigate the impact of this
451 * task on the normal operation of the system
453 throttle_lowpri_io(TRUE
);
455 if (__probable(code
!= 180))
456 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_END
,
457 error
, uthread
->uu_rval
[0], uthread
->uu_rval
[1], p
->p_pid
, 0);
459 thread_exception_return();
465 unix_syscall_return(int error
)
468 struct uthread
*uthread
;
472 struct sysent
*callp
;
474 thread
= current_thread();
475 uthread
= get_bsdthread_info(thread
);
477 pal_register_cache_state(thread
, DIRTY
);
481 if (proc_is64bit(p
)) {
482 x86_saved_state64_t
*regs
;
484 regs
= saved_state64(find_user_regs(thread
));
486 /* reconstruct code for tracing before blasting rax */
487 code
= regs
->rax
& SYSCALL_NUMBER_MASK
;
488 callp
= (code
>= NUM_SYSENT
) ? &sysent
[63] : &sysent
[code
];
492 * indirect system call... system call number
498 if (callp
->sy_call
== dtrace_systrace_syscall
)
499 dtrace_systrace_syscall_return( code
, error
, uthread
->uu_rval
);
500 #endif /* CONFIG_DTRACE */
501 AUDIT_SYSCALL_EXIT(code
, p
, uthread
, error
);
503 if (error
== ERESTART
) {
507 pal_syscall_restart( thread
, find_user_regs(thread
) );
509 else if (error
!= EJUSTRETURN
) {
512 regs
->isf
.rflags
|= EFL_CF
; /* carry bit */
513 } else { /* (not error) */
515 switch (callp
->sy_return_type
) {
516 case _SYSCALL_RET_INT_T
:
517 regs
->rax
= uthread
->uu_rval
[0];
518 regs
->rdx
= uthread
->uu_rval
[1];
520 case _SYSCALL_RET_UINT_T
:
521 regs
->rax
= ((u_int
)uthread
->uu_rval
[0]);
522 regs
->rdx
= ((u_int
)uthread
->uu_rval
[1]);
524 case _SYSCALL_RET_OFF_T
:
525 case _SYSCALL_RET_ADDR_T
:
526 case _SYSCALL_RET_SIZE_T
:
527 case _SYSCALL_RET_SSIZE_T
:
528 case _SYSCALL_RET_UINT64_T
:
529 regs
->rax
= *((uint64_t *)(&uthread
->uu_rval
[0]));
532 case _SYSCALL_RET_NONE
:
535 panic("unix_syscall: unknown return type");
538 regs
->isf
.rflags
&= ~EFL_CF
;
541 DEBUG_KPRINT_SYSCALL_UNIX(
542 "unix_syscall_return: error=%d retval=(%llu,%llu)\n",
543 error
, regs
->rax
, regs
->rdx
);
545 x86_saved_state32_t
*regs
;
547 regs
= saved_state32(find_user_regs(thread
));
549 regs
->efl
&= ~(EFL_CF
);
550 /* reconstruct code for tracing before blasting eax */
551 code
= regs
->eax
& I386_SYSCALL_NUMBER_MASK
;
552 callp
= (code
>= NUM_SYSENT
) ? &sysent
[63] : &sysent
[code
];
555 if (callp
->sy_call
== dtrace_systrace_syscall
)
556 dtrace_systrace_syscall_return( code
, error
, uthread
->uu_rval
);
557 #endif /* CONFIG_DTRACE */
558 AUDIT_SYSCALL_EXIT(code
, p
, uthread
, error
);
560 if (callp
== sysent
) {
561 params
= (vm_offset_t
) (regs
->uesp
+ sizeof (int));
562 code
= fuword(params
);
564 if (error
== ERESTART
) {
565 pal_syscall_restart( thread
, find_user_regs(thread
) );
567 else if (error
!= EJUSTRETURN
) {
570 regs
->efl
|= EFL_CF
; /* carry bit */
571 } else { /* (not error) */
572 regs
->eax
= uthread
->uu_rval
[0];
573 regs
->edx
= uthread
->uu_rval
[1];
576 DEBUG_KPRINT_SYSCALL_UNIX(
577 "unix_syscall_return: error=%d retval=(%u,%u)\n",
578 error
, regs
->eax
, regs
->edx
);
582 uthread
->uu_flag
&= ~UT_NOTCANCELPT
;
586 * if we're holding the funnel panic
588 syscall_exit_funnelcheck();
589 #endif /* FUNNEL_DEBUG */
591 if (uthread
->uu_lowpri_window
) {
593 * task is marked as a low priority I/O type
594 * and the I/O we issued while in this system call
595 * collided with normal I/O operations... we'll
596 * delay in order to mitigate the impact of this
597 * task on the normal operation of the system
599 throttle_lowpri_io(TRUE
);
602 KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC
, code
) | DBG_FUNC_END
,
603 error
, uthread
->uu_rval
[0], uthread
->uu_rval
[1], p
->p_pid
, 0);
605 thread_exception_return();
611 __unused
const void *in32
,
617 /* we convert in place in out64 */
618 arg32
= (uint32_t *) out64
;
619 arg64
= (uint64_t *) out64
;
621 arg64
[5] = arg32
[6]; /* wwwlwW */
622 arg64
[4] = arg32
[5]; /* wwwlWw */
623 arg32
[7] = arg32
[4]; /* wwwLww (hi) */
624 arg32
[6] = arg32
[3]; /* wwwLww (lo) */
625 arg64
[2] = arg32
[2]; /* wwWlww */
626 arg64
[1] = arg32
[1]; /* wWwlww */
627 arg64
[0] = arg32
[0]; /* Wwwlww */
633 __unused
const void *in32
,
639 /* we convert in place in out64 */
640 arg32
= (uint32_t *) out64
;
641 arg64
= (uint64_t *) out64
;
643 arg64
[5] = arg32
[6]; /* wwlwwW */
644 arg64
[4] = arg32
[5]; /* wwlwWw */
645 arg64
[3] = arg32
[4]; /* wwlWww */
646 arg32
[5] = arg32
[3]; /* wwLwww (hi) */
647 arg32
[4] = arg32
[2]; /* wwLwww (lo) */
648 arg64
[1] = arg32
[1]; /* wWlwww */
649 arg64
[0] = arg32
[0]; /* Wwlwww */