--- /dev/null
+/*
+ * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_LICENSE_HEADER_START@
+ *
+ * The contents of this file constitute Original Code as defined in and
+ * are subject to the Apple Public Source License Version 1.1 (the
+ * "License"). You may not use this file except in compliance with the
+ * License. Please obtain a copy of the License at
+ * http://www.apple.com/publicsource and read it before using this file.
+ *
+ * This Original Code and all software distributed under the License are
+ * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
+ * License for the specific language governing rights and limitations
+ * under the License.
+ *
+ * @APPLE_LICENSE_HEADER_END@
+ */
+#include <kern/task.h>
+#include <kern/thread.h>
+#include <kern/assert.h>
+#include <kern/clock.h>
+#include <kern/locks.h>
+#include <kern/sched_prim.h>
+#include <mach/machine/thread_status.h>
+#include <mach/thread_act.h>
+
+#include <sys/kernel.h>
+#include <sys/vm.h>
+#include <sys/proc_internal.h>
+#include <sys/syscall.h>
+#include <sys/systm.h>
+#include <sys/user.h>
+#include <sys/errno.h>
+#include <sys/ktrace.h>
+#include <sys/kdebug.h>
+#include <sys/sysent.h>
+#include <sys/sysproto.h>
+#include <sys/kauth.h>
+#include <sys/systm.h>
+
+#include <bsm/audit_kernel.h>
+
+#include <i386/seg.h>
+#include <i386/machine_routines.h>
+#include <mach/i386/syscall_sw.h>
+
+extern void unix_syscall(x86_saved_state_t *);
+extern void unix_syscall64(x86_saved_state_t *);
+extern void unix_syscall_return(int);
+extern void *find_user_regs(thread_t);
+extern void IOSleep(int);
+extern void exit_funnel_section(void);
+
+extern void Debugger(const char * message);
+
+/*
+ * Function: unix_syscall
+ *
+ * Inputs: regs - pointer to i386 save area
+ *
+ * Outputs: none
+ */
+void
+unix_syscall(x86_saved_state_t *state)
+{
+ thread_t thread;
+ void *vt;
+ unsigned int code;
+ struct sysent *callp;
+ int nargs;
+ int error;
+ int funnel_type;
+ vm_offset_t params;
+ struct proc *p;
+ struct uthread *uthread;
+ unsigned int cancel_enable;
+ x86_saved_state32_t *regs;
+
+ assert(is_saved_state32(state));
+ regs = saved_state32(state);
+
+ if (regs->eax == 0x800)
+ thread_exception_return();
+
+ thread = current_thread();
+ uthread = get_bsdthread_info(thread);
+
+ /* Get the approriate proc; may be different from task's for vfork() */
+ if (!(uthread->uu_flag & UT_VFORK))
+ p = (struct proc *)get_bsdtask_info(current_task());
+ else
+ p = current_proc();
+
+ /* Verify that we are not being called from a task without a proc */
+ if (p == NULL) {
+ regs->eax = EPERM;
+ regs->efl |= EFL_CF;
+ task_terminate_internal(current_task());
+ thread_exception_return();
+ /* NOTREACHED */
+ }
+
+ code = regs->eax;
+ params = (vm_offset_t) ((caddr_t)regs->uesp + sizeof (int));
+ callp = (code >= (unsigned int)nsysent) ? &sysent[63] : &sysent[code];
+
+ if (callp == sysent) {
+ code = fuword(params);
+ params += sizeof (int);
+ callp = (code >= (unsigned int)nsysent) ? &sysent[63] : &sysent[code];
+ }
+ vt = (void *)uthread->uu_arg;
+
+ nargs = callp->sy_narg * sizeof (syscall_arg_t);
+ if (nargs != 0) {
+ sy_munge_t *mungerp;
+
+ assert(nargs <= 8);
+
+ error = copyin((user_addr_t) params, (char *) vt, nargs);
+ if (error) {
+ regs->eax = error;
+ regs->efl |= EFL_CF;
+ thread_exception_return();
+ /* NOTREACHED */
+ }
+ if (code != 180) {
+ int *ip = (int *)vt;
+
+ KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
+ *ip, *(ip+1), *(ip+2), *(ip+3), 0);
+ }
+ mungerp = callp->sy_arg_munge32;
+
+ /*
+ * If non-NULL, then call the syscall argument munger to
+ * copy in arguments (see xnu/bsd/dev/i386/munge.s); the
+ * first argument is NULL because we are munging in place
+ * after a copyin because the ABI currently doesn't use
+ * registers to pass system call arguments.
+ */
+ if (mungerp != NULL)
+ (*mungerp)(NULL, vt);
+ } else
+ KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
+ 0, 0, 0, 0, 0);
+ /*
+ * Delayed binding of thread credential to process credential, if we
+ * are not running with an explicitly set thread credential.
+ */
+ if (uthread->uu_ucred != p->p_ucred &&
+ (uthread->uu_flag & UT_SETUID) == 0) {
+ kauth_cred_t old = uthread->uu_ucred;
+ uthread->uu_ucred = kauth_cred_proc_ref(p);
+ if (IS_VALID_CRED(old))
+ kauth_cred_unref(&old);
+ }
+
+ uthread->uu_rval[0] = 0;
+ uthread->uu_rval[1] = regs->edx;
+
+ cancel_enable = callp->sy_cancel;
+
+ if (cancel_enable == _SYSCALL_CANCEL_NONE) {
+ uthread->uu_flag |= UT_NOTCANCELPT;
+ } else {
+ if ((uthread->uu_flag & (UT_CANCELDISABLE | UT_CANCEL | UT_CANCELED)) == UT_CANCEL) {
+ if (cancel_enable == _SYSCALL_CANCEL_PRE) {
+ /* system call cancelled; return to handle cancellation */
+ regs->eax = (long long)EINTR;
+ regs->efl |= EFL_CF;
+ thread_exception_return();
+ /* NOTREACHED */
+ } else {
+ thread_abort_safely(thread);
+ }
+ }
+ }
+
+ funnel_type = (callp->sy_funnel & FUNNEL_MASK);
+ if (funnel_type == KERNEL_FUNNEL)
+ thread_funnel_set(kernel_flock, TRUE);
+
+ if (KTRPOINT(p, KTR_SYSCALL))
+ ktrsyscall(p, code, callp->sy_narg, vt);
+
+ AUDIT_SYSCALL_ENTER(code, p, uthread);
+ error = (*(callp->sy_call))((void *) p, (void *) vt, &(uthread->uu_rval[0]));
+ AUDIT_SYSCALL_EXIT(error, p, uthread);
+
+ if (error == ERESTART) {
+ /*
+ * Move the user's pc back to repeat the syscall:
+ * 5 bytes for a sysenter, or 2 for an int 8x.
+ * The SYSENTER_TF_CS covers single-stepping over a sysenter
+ * - see debug trap handler in idt.s/idt64.s
+ */
+ if (regs->cs == SYSENTER_CS || regs->cs == SYSENTER_TF_CS)
+ regs->eip -= 5;
+ else
+ regs->eip -= 2;
+ }
+ else if (error != EJUSTRETURN) {
+ if (error) {
+ regs->eax = error;
+ regs->efl |= EFL_CF; /* carry bit */
+ } else { /* (not error) */
+ regs->eax = uthread->uu_rval[0];
+ regs->edx = uthread->uu_rval[1];
+ regs->efl &= ~EFL_CF;
+ }
+ }
+
+ if (KTRPOINT(p, KTR_SYSRET))
+ ktrsysret(p, code, error, uthread->uu_rval[0]);
+
+ if (cancel_enable == _SYSCALL_CANCEL_NONE)
+ uthread->uu_flag &= ~UT_NOTCANCELPT;
+
+ /*
+ * if we're holding the funnel
+ * than drop it regardless of whether
+ * we took it on system call entry
+ */
+ exit_funnel_section();
+
+ if (uthread->uu_lowpri_delay) {
+ /*
+ * task is marked as a low priority I/O type
+ * and the I/O we issued while in this system call
+ * collided with normal I/O operations... we'll
+ * delay in order to mitigate the impact of this
+ * task on the normal operation of the system
+ */
+ IOSleep(uthread->uu_lowpri_delay);
+ uthread->uu_lowpri_delay = 0;
+ }
+ if (code != 180)
+ KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
+ error, uthread->uu_rval[0], uthread->uu_rval[1], 0, 0);
+
+ thread_exception_return();
+ /* NOTREACHED */
+}
+
+
+void
+unix_syscall64(x86_saved_state_t *state)
+{
+ thread_t thread;
+ unsigned int code;
+ struct sysent *callp;
+ void *uargp;
+ int args_in_regs;
+ int error;
+ int funnel_type;
+ struct proc *p;
+ struct uthread *uthread;
+ unsigned int cancel_enable;
+ x86_saved_state64_t *regs;
+
+ assert(is_saved_state64(state));
+ regs = saved_state64(state);
+
+ if (regs->rax == 0x2000800)
+ thread_exception_return();
+
+ thread = current_thread();
+ uthread = get_bsdthread_info(thread);
+
+ /* Get the approriate proc; may be different from task's for vfork() */
+ if (!(uthread->uu_flag & UT_VFORK))
+ p = (struct proc *)get_bsdtask_info(current_task());
+ else
+ p = current_proc();
+
+ /* Verify that we are not being called from a task without a proc */
+ if (p == NULL) {
+ regs->rax = EPERM;
+ regs->isf.rflags |= EFL_CF;
+ task_terminate_internal(current_task());
+ thread_exception_return();
+ /* NOTREACHED */
+ }
+ args_in_regs = 6;
+
+ code = regs->rax & SYSCALL_NUMBER_MASK;
+ callp = (code >= (unsigned int)nsysent) ? &sysent[63] : &sysent[code];
+ uargp = (void *)(®s->rdi);
+
+ if (callp == sysent) {
+ /*
+ * indirect system call... system call number
+ * passed as 'arg0'
+ */
+ code = regs->rdi;
+ callp = (code >= (unsigned int)nsysent) ? &sysent[63] : &sysent[code];
+ uargp = (void *)(®s->rsi);
+ args_in_regs = 5;
+ }
+
+ if (callp->sy_narg != 0) {
+ if (code != 180) {
+ uint64_t *ip = (uint64_t *)uargp;
+
+ KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
+ (int)(*ip), (int)(*(ip+1)), (int)(*(ip+2)), (int)(*(ip+3)), 0);
+ }
+ assert(callp->sy_narg <= 8);
+
+ if (callp->sy_narg > args_in_regs) {
+ int copyin_count;
+
+ copyin_count = (callp->sy_narg - args_in_regs) * sizeof(uint64_t);
+
+ error = copyin((user_addr_t)(regs->isf.rsp + sizeof(user_addr_t)), (char *)®s->v_arg6, copyin_count);
+ if (error) {
+ regs->rax = error;
+ regs->isf.rflags |= EFL_CF;
+ thread_exception_return();
+ /* NOTREACHED */
+ }
+ }
+ /*
+ * XXX Turn 64 bit unsafe calls into nosys()
+ */
+ if (callp->sy_funnel & UNSAFE_64BIT) {
+ callp = &sysent[63];
+ goto unsafe;
+ }
+
+ } else
+ KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
+ 0, 0, 0, 0, 0);
+unsafe:
+
+ /*
+ * Delayed binding of thread credential to process credential, if we
+ * are not running with an explicitly set thread credential.
+ */
+ if (uthread->uu_ucred != p->p_ucred &&
+ (uthread->uu_flag & UT_SETUID) == 0) {
+ kauth_cred_t old = uthread->uu_ucred;
+ uthread->uu_ucred = kauth_cred_proc_ref(p);
+ if (IS_VALID_CRED(old))
+ kauth_cred_unref(&old);
+ }
+
+ uthread->uu_rval[0] = 0;
+ uthread->uu_rval[1] = 0;
+
+ cancel_enable = callp->sy_cancel;
+
+ if (cancel_enable == _SYSCALL_CANCEL_NONE) {
+ uthread->uu_flag |= UT_NOTCANCELPT;
+ } else {
+ if ((uthread->uu_flag & (UT_CANCELDISABLE | UT_CANCEL | UT_CANCELED)) == UT_CANCEL) {
+ if (cancel_enable == _SYSCALL_CANCEL_PRE) {
+ /* system call cancelled; return to handle cancellation */
+ regs->rax = EINTR;
+ regs->isf.rflags |= EFL_CF;
+ thread_exception_return();
+ /* NOTREACHED */
+ } else {
+ thread_abort_safely(thread);
+ }
+ }
+ }
+
+ funnel_type = (callp->sy_funnel & FUNNEL_MASK);
+ if (funnel_type == KERNEL_FUNNEL)
+ thread_funnel_set(kernel_flock, TRUE);
+
+ if (KTRPOINT(p, KTR_SYSCALL))
+ ktrsyscall(p, code, callp->sy_narg, uargp);
+
+ AUDIT_SYSCALL_ENTER(code, p, uthread);
+ error = (*(callp->sy_call))((void *) p, uargp, &(uthread->uu_rval[0]));
+ AUDIT_SYSCALL_EXIT(error, p, uthread);
+
+ if (error == ERESTART) {
+ /*
+ * all system calls come through via the syscall instruction
+ * in 64 bit mode... its 2 bytes in length
+ * move the user's pc back to repeat the syscall:
+ */
+ regs->isf.rip -= 2;
+ }
+ else if (error != EJUSTRETURN) {
+ if (error) {
+ regs->rax = error;
+ regs->isf.rflags |= EFL_CF; /* carry bit */
+ } else { /* (not error) */
+
+ switch (callp->sy_return_type) {
+ case _SYSCALL_RET_INT_T:
+ regs->rax = uthread->uu_rval[0];
+ regs->rdx = uthread->uu_rval[1];
+ break;
+ case _SYSCALL_RET_UINT_T:
+ regs->rax = ((u_int)uthread->uu_rval[0]);
+ regs->rdx = ((u_int)uthread->uu_rval[1]);
+ break;
+ case _SYSCALL_RET_OFF_T:
+ case _SYSCALL_RET_ADDR_T:
+ case _SYSCALL_RET_SIZE_T:
+ case _SYSCALL_RET_SSIZE_T:
+ regs->rax = *((uint64_t *)(&uthread->uu_rval[0]));
+ regs->rdx = 0;
+ break;
+ case _SYSCALL_RET_NONE:
+ break;
+ default:
+ panic("unix_syscall: unknown return type");
+ break;
+ }
+ regs->isf.rflags &= ~EFL_CF;
+ }
+ }
+
+ if (KTRPOINT(p, KTR_SYSRET))
+ ktrsysret(p, code, error, uthread->uu_rval[0]);
+
+ if (cancel_enable == _SYSCALL_CANCEL_NONE)
+ uthread->uu_flag &= ~UT_NOTCANCELPT;
+
+ /*
+ * if we're holding the funnel
+ * than drop it regardless of whether
+ * we took it on system call entry
+ */
+ exit_funnel_section();
+
+ if (uthread->uu_lowpri_delay) {
+ /*
+ * task is marked as a low priority I/O type
+ * and the I/O we issued while in this system call
+ * collided with normal I/O operations... we'll
+ * delay in order to mitigate the impact of this
+ * task on the normal operation of the system
+ */
+ IOSleep(uthread->uu_lowpri_delay);
+ uthread->uu_lowpri_delay = 0;
+ }
+ if (code != 180)
+ KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
+ error, uthread->uu_rval[0], uthread->uu_rval[1], 0, 0);
+
+ thread_exception_return();
+ /* NOTREACHED */
+}
+
+
+void
+unix_syscall_return(int error)
+{
+ thread_t thread;
+ struct uthread *uthread;
+ struct proc *p;
+ unsigned int code;
+ vm_offset_t params;
+ struct sysent *callp;
+ unsigned int cancel_enable;
+
+ thread = current_thread();
+ uthread = get_bsdthread_info(thread);
+
+ p = current_proc();
+
+ if (proc_is64bit(p)) {
+ x86_saved_state64_t *regs;
+
+ regs = saved_state64(find_user_regs(thread));
+
+ /* reconstruct code for tracing before blasting rax */
+ code = regs->rax & SYSCALL_NUMBER_MASK;
+ callp = (code >= (unsigned int)nsysent) ? &sysent[63] : &sysent[code];
+
+ if (callp == sysent)
+ /*
+ * indirect system call... system call number
+ * passed as 'arg0'
+ */
+ code = regs->rdi;
+
+ if (error == ERESTART) {
+ /*
+ * all system calls come through via the syscall instruction
+ * in 64 bit mode... its 2 bytes in length
+ * move the user's pc back to repeat the syscall:
+ */
+ regs->isf.rip -= 2;
+ }
+ else if (error != EJUSTRETURN) {
+ if (error) {
+ regs->rax = error;
+ regs->isf.rflags |= EFL_CF; /* carry bit */
+ } else { /* (not error) */
+
+ switch (callp->sy_return_type) {
+ case _SYSCALL_RET_INT_T:
+ regs->rax = uthread->uu_rval[0];
+ regs->rdx = uthread->uu_rval[1];
+ break;
+ case _SYSCALL_RET_UINT_T:
+ regs->rax = ((u_int)uthread->uu_rval[0]);
+ regs->rdx = ((u_int)uthread->uu_rval[1]);
+ break;
+ case _SYSCALL_RET_OFF_T:
+ case _SYSCALL_RET_ADDR_T:
+ case _SYSCALL_RET_SIZE_T:
+ case _SYSCALL_RET_SSIZE_T:
+ regs->rax = *((uint64_t *)(&uthread->uu_rval[0]));
+ regs->rdx = 0;
+ break;
+ case _SYSCALL_RET_NONE:
+ break;
+ default:
+ panic("unix_syscall: unknown return type");
+ break;
+ }
+ regs->isf.rflags &= ~EFL_CF;
+ }
+ }
+ } else {
+ x86_saved_state32_t *regs;
+
+ regs = saved_state32(find_user_regs(thread));
+
+ /* reconstruct code for tracing before blasting eax */
+ code = regs->eax;
+ callp = (code >= (unsigned int)nsysent) ? &sysent[63] : &sysent[code];
+
+ if (callp == sysent) {
+ params = (vm_offset_t) ((caddr_t)regs->uesp + sizeof (int));
+ code = fuword(params);
+ }
+ if (error == ERESTART) {
+ regs->eip -= ((regs->cs & 0xffff) == SYSENTER_CS) ? 5 : 2;
+ }
+ else if (error != EJUSTRETURN) {
+ if (error) {
+ regs->eax = error;
+ regs->efl |= EFL_CF; /* carry bit */
+ } else { /* (not error) */
+ regs->eax = uthread->uu_rval[0];
+ regs->edx = uthread->uu_rval[1];
+ regs->efl &= ~EFL_CF;
+ }
+ }
+ }
+ if (KTRPOINT(p, KTR_SYSRET))
+ ktrsysret(p, code, error, uthread->uu_rval[0]);
+
+ cancel_enable = callp->sy_cancel;
+
+ if (cancel_enable == _SYSCALL_CANCEL_NONE)
+ uthread->uu_flag &= ~UT_NOTCANCELPT;
+
+ /*
+ * if we're holding the funnel
+ * than drop it regardless of whether
+ * we took it on system call entry
+ */
+ exit_funnel_section();
+
+ if (uthread->uu_lowpri_delay) {
+ /*
+ * task is marked as a low priority I/O type
+ * and the I/O we issued while in this system call
+ * collided with normal I/O operations... we'll
+ * delay in order to mitigate the impact of this
+ * task on the normal operation of the system
+ */
+ IOSleep(uthread->uu_lowpri_delay);
+ uthread->uu_lowpri_delay = 0;
+ }
+ if (code != 180)
+ KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
+ error, uthread->uu_rval[0], uthread->uu_rval[1], 0, 0);
+
+ thread_exception_return();
+ /* NOTREACHED */
+}
+
+void
+munge_wwwlww(
+ __unused const void *in32,
+ void *out64)
+{
+ uint32_t *arg32;
+ uint64_t *arg64;
+
+ /* we convert in place in out64 */
+ arg32 = (uint32_t *) out64;
+ arg64 = (uint64_t *) out64;
+
+ arg64[5] = arg32[6]; /* wwwlwW */
+ arg64[4] = arg32[5]; /* wwwlWw */
+ arg32[7] = arg32[4]; /* wwwLww (hi) */
+ arg32[6] = arg32[3]; /* wwwLww (lo) */
+ arg64[2] = arg32[2]; /* wwWlww */
+ arg64[1] = arg32[1]; /* wWwlww */
+ arg64[0] = arg32[0]; /* Wwwlww */
+}