/*
- * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
#include <sys/systm.h>
#include <sys/user.h>
#include <sys/errno.h>
-#include <sys/ktrace.h>
#include <sys/kdebug.h>
#include <sys/sysent.h>
#include <sys/sysproto.h>
#include <i386/machine_routines.h>
#include <mach/i386/syscall_sw.h>
+#if CONFIG_DTRACE
+extern int32_t dtrace_systrace_syscall(struct proc *, void *, int *);
+extern void dtrace_systrace_syscall_return(unsigned short, int, int *);
+#endif
+
extern void unix_syscall(x86_saved_state_t *);
extern void unix_syscall64(x86_saved_state_t *);
-extern void unix_syscall_return(int);
extern void *find_user_regs(thread_t);
-extern void IOSleep(int);
-extern void exit_funnel_section(void);
-
-extern void Debugger(const char * message);
+extern void throttle_lowpri_io(int *lowpri_window, mount_t v_mount);
+extern void x86_toggle_sysenter_arg_store(thread_t thread, boolean_t valid);
+extern boolean_t x86_sysenter_arg_store_isvalid(thread_t thread);
/*
* Function: unix_syscall
*
void
unix_syscall(x86_saved_state_t *state)
{
- thread_t thread;
- void *vt;
- unsigned short code;
- struct sysent *callp;
- int nargs;
- int error;
- int funnel_type;
- vm_offset_t params;
- struct proc *p;
- struct uthread *uthread;
- unsigned int cancel_enable;
+ thread_t thread;
+ void *vt;
+ unsigned int code;
+ struct sysent *callp;
+
+ int error;
+ vm_offset_t params;
+ struct proc *p;
+ struct uthread *uthread;
x86_saved_state32_t *regs;
+ boolean_t args_in_uthread;
assert(is_saved_state32(state));
regs = saved_state32(state);
-
+#if DEBUG
if (regs->eax == 0x800)
thread_exception_return();
-
+#endif
thread = current_thread();
uthread = get_bsdthread_info(thread);
/* NOTREACHED */
}
- //printf("[scall : eax %x]", regs->eax);
- code = regs->eax;
+ code = regs->eax & I386_SYSCALL_NUMBER_MASK;
+ args_in_uthread = ((regs->eax & I386_SYSCALL_ARG_BYTES_MASK) != 0) && x86_sysenter_arg_store_isvalid(thread);
params = (vm_offset_t) ((caddr_t)regs->uesp + sizeof (int));
- callp = (code >= nsysent) ? &sysent[63] : &sysent[code];
+
+ regs->efl &= ~(EFL_CF);
+
+ callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
if (callp == sysent) {
code = fuword(params);
- params += sizeof (int);
- callp = (code >= nsysent) ? &sysent[63] : &sysent[code];
+ params += sizeof(int);
+ callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
}
+
vt = (void *)uthread->uu_arg;
- nargs = callp->sy_narg * sizeof (syscall_arg_t);
- if (nargs != 0) {
+ if (callp->sy_arg_bytes != 0) {
sy_munge_t *mungerp;
- assert(nargs <= 8);
-
- error = copyin((user_addr_t) params, (char *) vt, nargs);
- if (error) {
- regs->eax = error;
- regs->efl |= EFL_CF;
- thread_exception_return();
- /* NOTREACHED */
+ assert((unsigned) callp->sy_arg_bytes <= sizeof (uthread->uu_arg));
+ if (!args_in_uthread)
+ {
+ uint32_t nargs;
+ nargs = callp->sy_arg_bytes;
+ error = copyin((user_addr_t) params, (char *) vt, nargs);
+ if (error) {
+ regs->eax = error;
+ regs->efl |= EFL_CF;
+ thread_exception_return();
+ /* NOTREACHED */
+ }
}
+
if (code != 180) {
- int *ip = (int *)vt;
+ int *ip = (int *)vt;
KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
- *ip, *(ip+1), *(ip+2), *(ip+3), 0);
+ *ip, *(ip+1), *(ip+2), *(ip+3), 0);
}
mungerp = callp->sy_arg_munge32;
if (mungerp != NULL)
(*mungerp)(NULL, vt);
} else
- KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
- 0, 0, 0, 0, 0);
+ KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
+ 0, 0, 0, 0, 0);
+
/*
* Delayed binding of thread credential to process credential, if we
* are not running with an explicitly set thread credential.
*/
- if (uthread->uu_ucred != p->p_ucred &&
- (uthread->uu_flag & UT_SETUID) == 0) {
- kauth_cred_t old = uthread->uu_ucred;
- uthread->uu_ucred = kauth_cred_proc_ref(p);
- if (IS_VALID_CRED(old))
- kauth_cred_unref(&old);
- }
+ kauth_cred_uthread_update(uthread, p);
uthread->uu_rval[0] = 0;
uthread->uu_rval[1] = regs->edx;
+ uthread->uu_flag |= UT_NOTCANCELPT;
- cancel_enable = callp->sy_cancel;
-
- if (cancel_enable == _SYSCALL_CANCEL_NONE) {
- uthread->uu_flag |= UT_NOTCANCELPT;
- } else {
- if ((uthread->uu_flag & (UT_CANCELDISABLE | UT_CANCEL | UT_CANCELED)) == UT_CANCEL) {
- if (cancel_enable == _SYSCALL_CANCEL_PRE) {
- /* system call cancelled; return to handle cancellation */
- regs->eax = (long long)EINTR;
- regs->efl |= EFL_CF;
- thread_exception_return();
- /* NOTREACHED */
- } else {
- thread_abort_safely(thread);
- }
- }
- }
-
- funnel_type = (callp->sy_funnel & FUNNEL_MASK);
- if (funnel_type == KERNEL_FUNNEL)
- thread_funnel_set(kernel_flock, TRUE);
- if (KTRPOINT(p, KTR_SYSCALL))
- ktrsyscall(p, code, callp->sy_narg, vt);
+#ifdef JOE_DEBUG
+ uthread->uu_iocount = 0;
+ uthread->uu_vpindex = 0;
+#endif
AUDIT_SYSCALL_ENTER(code, p, uthread);
error = (*(callp->sy_call))((void *) p, (void *) vt, &(uthread->uu_rval[0]));
- AUDIT_SYSCALL_EXIT(error, p, uthread);
-
+ AUDIT_SYSCALL_EXIT(code, p, uthread, error);
+
+#ifdef JOE_DEBUG
+ if (uthread->uu_iocount)
+ joe_debug("system call returned with uu_iocount != 0");
+#endif
+#if CONFIG_DTRACE
+ uthread->t_dtrace_errno = error;
+#endif /* CONFIG_DTRACE */
+
if (error == ERESTART) {
/*
* Move the user's pc back to repeat the syscall:
* The SYSENTER_TF_CS covers single-stepping over a sysenter
* - see debug trap handler in idt.s/idt64.s
*/
- if (regs->cs == SYSENTER_CS || regs->cs == SYSENTER_TF_CS)
+ if (regs->cs == SYSENTER_CS || regs->cs == SYSENTER_TF_CS) {
regs->eip -= 5;
+ }
else
regs->eip -= 2;
}
} else { /* (not error) */
regs->eax = uthread->uu_rval[0];
regs->edx = uthread->uu_rval[1];
- regs->efl &= ~EFL_CF;
}
}
- if (KTRPOINT(p, KTR_SYSRET))
- ktrsysret(p, code, error, uthread->uu_rval[0]);
-
- if (cancel_enable == _SYSCALL_CANCEL_NONE)
- uthread->uu_flag &= ~UT_NOTCANCELPT;
-
+ uthread->uu_flag &= ~UT_NOTCANCELPT;
+#if DEBUG
/*
- * if we're holding the funnel
- * than drop it regardless of whether
- * we took it on system call entry
+ * if we're holding the funnel panic
*/
- exit_funnel_section();
-
- if (uthread->uu_lowpri_delay) {
+ syscall_exit_funnelcheck();
+#endif /* DEBUG */
+ if (uthread->uu_lowpri_window && uthread->v_mount) {
/*
* task is marked as a low priority I/O type
* and the I/O we issued while in this system call
* delay in order to mitigate the impact of this
* task on the normal operation of the system
*/
- IOSleep(uthread->uu_lowpri_delay);
- uthread->uu_lowpri_delay = 0;
+ throttle_lowpri_io(&uthread->uu_lowpri_window,uthread->v_mount);
}
if (code != 180)
KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
unix_syscall64(x86_saved_state_t *state)
{
thread_t thread;
- unsigned short code;
+ unsigned int code;
struct sysent *callp;
void *uargp;
int args_in_regs;
int error;
- int funnel_type;
struct proc *p;
struct uthread *uthread;
- unsigned int cancel_enable;
x86_saved_state64_t *regs;
assert(is_saved_state64(state));
args_in_regs = 6;
code = regs->rax & SYSCALL_NUMBER_MASK;
- callp = (code >= nsysent) ? &sysent[63] : &sysent[code];
+ callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
uargp = (void *)(®s->rdi);
if (callp == sysent) {
* passed as 'arg0'
*/
code = regs->rdi;
- callp = (code >= nsysent) ? &sysent[63] : &sysent[code];
+ callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
uargp = (void *)(®s->rsi);
args_in_regs = 5;
}
if (callp->sy_narg != 0) {
if (code != 180) {
- uint64_t *ip = (uint64_t *)uargp;
+ uint64_t *ip = (uint64_t *)uargp;
KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
- (int)(*ip), (int)(*(ip+1)), (int)(*(ip+2)), (int)(*(ip+3)), 0);
+ (int)(*ip), (int)(*(ip+1)), (int)(*(ip+2)), (int)(*(ip+3)), 0);
}
- assert(callp->sy_narg <= 8);
+ assert(callp->sy_narg <= 8);
if (callp->sy_narg > args_in_regs) {
- int copyin_count;
+ int copyin_count;
- copyin_count = (callp->sy_narg - args_in_regs) * sizeof(uint64_t);
+ copyin_count = (callp->sy_narg - args_in_regs) * sizeof(uint64_t);
- error = copyin((user_addr_t)(regs->isf.rsp + sizeof(user_addr_t)), (char *)®s->v_arg6, copyin_count);
+ error = copyin((user_addr_t)(regs->isf.rsp + sizeof(user_addr_t)), (char *)®s->v_arg6, copyin_count);
if (error) {
- regs->rax = error;
+ regs->rax = error;
regs->isf.rflags |= EFL_CF;
thread_exception_return();
/* NOTREACHED */
/*
* XXX Turn 64 bit unsafe calls into nosys()
*/
- if (callp->sy_funnel & UNSAFE_64BIT) {
- callp = &sysent[63];
+ if (callp->sy_flags & UNSAFE_64BIT) {
+ callp = &sysent[63];
goto unsafe;
}
-
} else
KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START,
0, 0, 0, 0, 0);
* Delayed binding of thread credential to process credential, if we
* are not running with an explicitly set thread credential.
*/
- if (uthread->uu_ucred != p->p_ucred &&
- (uthread->uu_flag & UT_SETUID) == 0) {
- kauth_cred_t old = uthread->uu_ucred;
- uthread->uu_ucred = kauth_cred_proc_ref(p);
- if (IS_VALID_CRED(old))
- kauth_cred_unref(&old);
- }
+ kauth_cred_uthread_update(uthread, p);
uthread->uu_rval[0] = 0;
uthread->uu_rval[1] = 0;
- cancel_enable = callp->sy_cancel;
- if (cancel_enable == _SYSCALL_CANCEL_NONE) {
- uthread->uu_flag |= UT_NOTCANCELPT;
- } else {
- if ((uthread->uu_flag & (UT_CANCELDISABLE | UT_CANCEL | UT_CANCELED)) == UT_CANCEL) {
- if (cancel_enable == _SYSCALL_CANCEL_PRE) {
- /* system call cancelled; return to handle cancellation */
- regs->rax = EINTR;
- regs->isf.rflags |= EFL_CF;
- thread_exception_return();
- /* NOTREACHED */
- } else {
- thread_abort_safely(thread);
- }
- }
- }
+ uthread->uu_flag |= UT_NOTCANCELPT;
- funnel_type = (callp->sy_funnel & FUNNEL_MASK);
- if (funnel_type == KERNEL_FUNNEL)
- thread_funnel_set(kernel_flock, TRUE);
-
- if (KTRPOINT(p, KTR_SYSCALL))
- ktrsyscall(p, code, callp->sy_narg, uargp);
AUDIT_SYSCALL_ENTER(code, p, uthread);
error = (*(callp->sy_call))((void *) p, uargp, &(uthread->uu_rval[0]));
- AUDIT_SYSCALL_EXIT(error, p, uthread);
+ AUDIT_SYSCALL_EXIT(code, p, uthread, error);
+
+#if CONFIG_DTRACE
+ uthread->t_dtrace_errno = error;
+#endif /* CONFIG_DTRACE */
if (error == ERESTART) {
/*
}
else if (error != EJUSTRETURN) {
if (error) {
- regs->rax = error;
+ regs->rax = error;
regs->isf.rflags |= EFL_CF; /* carry bit */
} else { /* (not error) */
}
}
- if (KTRPOINT(p, KTR_SYSRET))
- ktrsysret(p, code, error, uthread->uu_rval[0]);
- if (cancel_enable == _SYSCALL_CANCEL_NONE)
- uthread->uu_flag &= ~UT_NOTCANCELPT;
+ uthread->uu_flag &= ~UT_NOTCANCELPT;
/*
- * if we're holding the funnel
- * than drop it regardless of whether
- * we took it on system call entry
+ * if we're holding the funnel panic
*/
- exit_funnel_section();
+ syscall_exit_funnelcheck();
- if (uthread->uu_lowpri_delay) {
+ if (uthread->uu_lowpri_window && uthread->v_mount) {
/*
* task is marked as a low priority I/O type
* and the I/O we issued while in this system call
* delay in order to mitigate the impact of this
* task on the normal operation of the system
*/
- IOSleep(uthread->uu_lowpri_delay);
- uthread->uu_lowpri_delay = 0;
+ throttle_lowpri_io(&uthread->uu_lowpri_window,uthread->v_mount);
}
if (code != 180)
KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
thread_t thread;
struct uthread *uthread;
struct proc *p;
- unsigned short code;
+ unsigned int code;
vm_offset_t params;
struct sysent *callp;
- unsigned int cancel_enable;
thread = current_thread();
uthread = get_bsdthread_info(thread);
p = current_proc();
if (proc_is64bit(p)) {
- x86_saved_state64_t *regs;
+ x86_saved_state64_t *regs;
regs = saved_state64(find_user_regs(thread));
- /* reconstruct code for tracing before blasting rax */
+ /* reconstruct code for tracing before blasting rax */
code = regs->rax & SYSCALL_NUMBER_MASK;
- callp = (code >= nsysent) ? &sysent[63] : &sysent[code];
+ callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
if (callp == sysent)
- /*
+ /*
* indirect system call... system call number
* passed as 'arg0'
*/
- code = regs->rdi;
+ code = regs->rdi;
+
+#if CONFIG_DTRACE
+ if (callp->sy_call == dtrace_systrace_syscall)
+ dtrace_systrace_syscall_return( code, error, uthread->uu_rval );
+#endif /* CONFIG_DTRACE */
if (error == ERESTART) {
- /*
+ /*
* all system calls come through via the syscall instruction
* in 64 bit mode... its 2 bytes in length
* move the user's pc back to repeat the syscall:
*/
- regs->isf.rip -= 2;
+ regs->isf.rip -= 2;
}
else if (error != EJUSTRETURN) {
- if (error) {
- regs->rax = error;
+ if (error) {
+ regs->rax = error;
regs->isf.rflags |= EFL_CF; /* carry bit */
} else { /* (not error) */
- switch (callp->sy_return_type) {
+ switch (callp->sy_return_type) {
case _SYSCALL_RET_INT_T:
- regs->rax = uthread->uu_rval[0];
+ regs->rax = uthread->uu_rval[0];
regs->rdx = uthread->uu_rval[1];
break;
case _SYSCALL_RET_UINT_T:
- regs->rax = ((u_int)uthread->uu_rval[0]);
+ regs->rax = ((u_int)uthread->uu_rval[0]);
regs->rdx = ((u_int)uthread->uu_rval[1]);
break;
case _SYSCALL_RET_OFF_T:
case _SYSCALL_RET_ADDR_T:
case _SYSCALL_RET_SIZE_T:
case _SYSCALL_RET_SSIZE_T:
- regs->rax = *((uint64_t *)(&uthread->uu_rval[0]));
+ regs->rax = *((uint64_t *)(&uthread->uu_rval[0]));
regs->rdx = 0;
break;
case _SYSCALL_RET_NONE:
- break;
+ break;
default:
- panic("unix_syscall: unknown return type");
+ panic("unix_syscall: unknown return type");
break;
}
regs->isf.rflags &= ~EFL_CF;
}
}
} else {
- x86_saved_state32_t *regs;
+ x86_saved_state32_t *regs;
regs = saved_state32(find_user_regs(thread));
+ regs->efl &= ~(EFL_CF);
/* reconstruct code for tracing before blasting eax */
- code = regs->eax;
- callp = (code >= nsysent) ? &sysent[63] : &sysent[code];
+ code = regs->eax & I386_SYSCALL_NUMBER_MASK;
+ callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code];
+
+#if CONFIG_DTRACE
+ if (callp->sy_call == dtrace_systrace_syscall)
+ dtrace_systrace_syscall_return( code, error, uthread->uu_rval );
+#endif /* CONFIG_DTRACE */
if (callp == sysent) {
- params = (vm_offset_t) ((caddr_t)regs->uesp + sizeof (int));
- code = fuword(params);
+ params = (vm_offset_t) ((caddr_t)regs->uesp + sizeof (int));
+ code = fuword(params);
}
if (error == ERESTART) {
- regs->eip -= ((regs->cs & 0xffff) == SYSENTER_CS) ? 5 : 2;
+ regs->eip -= ((regs->cs & 0xffff) == SYSENTER_CS) ? 5 : 2;
}
else if (error != EJUSTRETURN) {
- if (error) {
- regs->eax = error;
+ if (error) {
+ regs->eax = error;
regs->efl |= EFL_CF; /* carry bit */
} else { /* (not error) */
- regs->eax = uthread->uu_rval[0];
+ regs->eax = uthread->uu_rval[0];
regs->edx = uthread->uu_rval[1];
- regs->efl &= ~EFL_CF;
}
}
}
- if (KTRPOINT(p, KTR_SYSRET))
- ktrsysret(p, code, error, uthread->uu_rval[0]);
- cancel_enable = callp->sy_cancel;
- if (cancel_enable == _SYSCALL_CANCEL_NONE)
- uthread->uu_flag &= ~UT_NOTCANCELPT;
+ uthread->uu_flag &= ~UT_NOTCANCELPT;
/*
- * if we're holding the funnel
- * than drop it regardless of whether
- * we took it on system call entry
+ * if we're holding the funnel panic
*/
- exit_funnel_section();
+ syscall_exit_funnelcheck();
- if (uthread->uu_lowpri_delay) {
+ if (uthread->uu_lowpri_window && uthread->v_mount) {
/*
* task is marked as a low priority I/O type
* and the I/O we issued while in this system call
* delay in order to mitigate the impact of this
* task on the normal operation of the system
*/
- IOSleep(uthread->uu_lowpri_delay);
- uthread->uu_lowpri_delay = 0;
+ throttle_lowpri_io(&uthread->uu_lowpri_window,uthread->v_mount);
}
if (code != 180)
KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
arg64[1] = arg32[1]; /* wWwlww */
arg64[0] = arg32[0]; /* Wwwlww */
}
+
+
+void
+munge_wwlwww(
+ __unused const void *in32,
+ void *out64)
+{
+ uint32_t *arg32;
+ uint64_t *arg64;
+
+ /* we convert in place in out64 */
+ arg32 = (uint32_t *) out64;
+ arg64 = (uint64_t *) out64;
+
+ arg64[5] = arg32[6]; /* wwlwwW */
+ arg64[4] = arg32[5]; /* wwlwWw */
+ arg64[3] = arg32[4]; /* wwlWww */
+ arg32[5] = arg32[3]; /* wwLwww (hi) */
+ arg32[4] = arg32[2]; /* wwLwww (lo) */
+ arg64[1] = arg32[1]; /* wWlwww */
+ arg64[0] = arg32[0]; /* Wwlwww */
+}
+
+#ifdef JOE_DEBUG
+joe_debug(char *p) {
+
+ printf("%s\n", p);
+}
+#endif
+
+