X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/5d5c5d0d5b79ade9a973d55186ffda2638ba2b6e..39236c6e673c41db228275375ab7fdb0f837b292:/bsd/dev/i386/systemcalls.c diff --git a/bsd/dev/i386/systemcalls.c b/bsd/dev/i386/systemcalls.c index f8c09f6ff..77ecfba3a 100644 --- a/bsd/dev/i386/systemcalls.c +++ b/bsd/dev/i386/systemcalls.c @@ -1,31 +1,29 @@ /* - * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2008 Apple Inc. All rights reserved. * - * @APPLE_LICENSE_OSREFERENCE_HEADER_START@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * - * This file contains Original Code and/or Modifications of Original Code - * as defined in and that are subject to the Apple Public Source License - * Version 2.0 (the 'License'). You may not use this file except in - * compliance with the License. The rights granted to you under the - * License may not be used to create, or enable the creation or - * redistribution of, unlawful or unlicensed copies of an Apple operating - * system, or to circumvent, violate, or enable the circumvention or - * violation of, any terms of an Apple operating system software license - * agreement. - * - * Please obtain a copy of the License at - * http://www.opensource.apple.com/apsl/ and read it before using this - * file. - * - * The Original Code and all software distributed under the License are - * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER - * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, - * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. - * Please see the License for the specific language governing rights and + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. + * + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, + * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and * limitations under the License. - * - * @APPLE_LICENSE_OSREFERENCE_HEADER_END@ + * + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ #include #include @@ -33,8 +31,10 @@ #include #include #include +#include #include #include +#include #include #include @@ -43,27 +43,40 @@ #include #include #include -#include #include #include #include #include #include -#include +#include #include #include #include +#include + +#if CONFIG_DTRACE +extern int32_t dtrace_systrace_syscall(struct proc *, void *, int *); +extern void dtrace_systrace_syscall_return(unsigned short, int, int *); +#endif + extern void unix_syscall(x86_saved_state_t *); extern void unix_syscall64(x86_saved_state_t *); -extern void unix_syscall_return(int); extern void *find_user_regs(thread_t); -extern void IOSleep(int); -extern void exit_funnel_section(void); -extern void Debugger(const char * message); +/* dynamically generated at build time based on syscalls.master */ +extern const char *syscallnames[]; + +/* + * This needs to be a single switch so that it's "all on" or "all off", + * rather than being turned on for some code paths and not others, as this + * has a tendency to introduce "blame the next guy" bugs. + */ +#if DEBUG +#define FUNNEL_DEBUG 1 /* Check for funnel held on exit */ +#endif /* * Function: unix_syscall @@ -75,36 +88,36 @@ extern void Debugger(const char * message); void unix_syscall(x86_saved_state_t *state) { - thread_t thread; - void *vt; - unsigned short code; - struct sysent *callp; - int nargs; - int error; - int funnel_type; - vm_offset_t params; - struct proc *p; - struct uthread *uthread; - unsigned int cancel_enable; + thread_t thread; + void *vt; + unsigned int code; + struct sysent *callp; + + int error; + vm_offset_t params; + struct proc *p; + struct uthread *uthread; x86_saved_state32_t *regs; + boolean_t is_vfork; assert(is_saved_state32(state)); regs = saved_state32(state); - +#if DEBUG if (regs->eax == 0x800) thread_exception_return(); - +#endif thread = current_thread(); uthread = get_bsdthread_info(thread); /* Get the approriate proc; may be different from task's for vfork() */ - if (!(uthread->uu_flag & UT_VFORK)) - p = (struct proc *)get_bsdtask_info(current_task()); - else + is_vfork = uthread->uu_flag & UT_VFORK; + if (__improbable(is_vfork != 0)) p = current_proc(); + else + p = (struct proc *)get_bsdtask_info(current_task()); /* Verify that we are not being called from a task without a proc */ - if (p == NULL) { + if (__improbable(p == NULL)) { regs->eax = EPERM; regs->efl |= EFL_CF; task_terminate_internal(current_task()); @@ -112,24 +125,30 @@ unix_syscall(x86_saved_state_t *state) /* NOTREACHED */ } - //printf("[scall : eax %x]", regs->eax); - code = regs->eax; - params = (vm_offset_t) ((caddr_t)regs->uesp + sizeof (int)); - callp = (code >= nsysent) ? &sysent[63] : &sysent[code]; + code = regs->eax & I386_SYSCALL_NUMBER_MASK; + DEBUG_KPRINT_SYSCALL_UNIX("unix_syscall: code=%d(%s) eip=%u\n", + code, syscallnames[code >= NUM_SYSENT ? 63 : code], (uint32_t)regs->eip); + params = (vm_offset_t) (regs->uesp + sizeof (int)); + + regs->efl &= ~(EFL_CF); - if (callp == sysent) { + callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code]; + + if (__improbable(callp == sysent)) { code = fuword(params); - params += sizeof (int); - callp = (code >= nsysent) ? &sysent[63] : &sysent[code]; + params += sizeof(int); + callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code]; } + vt = (void *)uthread->uu_arg; + uthread->uu_ap = vt; - nargs = callp->sy_narg * sizeof (syscall_arg_t); - if (nargs != 0) { + if (callp->sy_arg_bytes != 0) { sy_munge_t *mungerp; + uint32_t nargs; - assert(nargs <= 8); - + assert((unsigned) callp->sy_arg_bytes <= sizeof (uthread->uu_arg)); + nargs = callp->sy_arg_bytes; error = copyin((user_addr_t) params, (char *) vt, nargs); if (error) { regs->eax = error; @@ -137,17 +156,19 @@ unix_syscall(x86_saved_state_t *state) thread_exception_return(); /* NOTREACHED */ } - if (code != 180) { - int *ip = (int *)vt; - KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START, - *ip, *(ip+1), *(ip+2), *(ip+3), 0); + if (__probable(code != 180)) { + int *ip = (int *)vt; + + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START, + *ip, *(ip+1), *(ip+2), *(ip+3), 0); } mungerp = callp->sy_arg_munge32; /* * If non-NULL, then call the syscall argument munger to - * copy in arguments (see xnu/bsd/dev/i386/munge.s); the + * copy in arguments (see xnu/bsd/dev/{i386|x86_64}/munge.s); the * first argument is NULL because we are munging in place * after a copyin because the ABI currently doesn't use * registers to pass system call arguments. @@ -155,92 +176,71 @@ unix_syscall(x86_saved_state_t *state) if (mungerp != NULL) (*mungerp)(NULL, vt); } else - KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START, - 0, 0, 0, 0, 0); + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START, + 0, 0, 0, 0, 0); + /* * Delayed binding of thread credential to process credential, if we * are not running with an explicitly set thread credential. */ - if (uthread->uu_ucred != p->p_ucred && - (uthread->uu_flag & UT_SETUID) == 0) { - kauth_cred_t old = uthread->uu_ucred; - proc_lock(p); - uthread->uu_ucred = p->p_ucred; - kauth_cred_ref(uthread->uu_ucred); - proc_unlock(p); - if (old != NOCRED) - kauth_cred_rele(old); - } + kauth_cred_uthread_update(uthread, p); uthread->uu_rval[0] = 0; uthread->uu_rval[1] = regs->edx; + uthread->uu_flag |= UT_NOTCANCELPT; - cancel_enable = callp->sy_cancel; - - if (cancel_enable == _SYSCALL_CANCEL_NONE) { - uthread->uu_flag |= UT_NOTCANCELPT; - } else { - if ((uthread->uu_flag & (UT_CANCELDISABLE | UT_CANCEL | UT_CANCELED)) == UT_CANCEL) { - if (cancel_enable == _SYSCALL_CANCEL_PRE) { - /* system call cancelled; return to handle cancellation */ - regs->eax = (long long)EINTR; - regs->efl |= EFL_CF; - thread_exception_return(); - /* NOTREACHED */ - } else { - thread_abort_safely(thread); - } - } - } - - funnel_type = (callp->sy_funnel & FUNNEL_MASK); - if (funnel_type == KERNEL_FUNNEL) - thread_funnel_set(kernel_flock, TRUE); - if (KTRPOINT(p, KTR_SYSCALL)) - ktrsyscall(p, code, callp->sy_narg, vt); +#ifdef JOE_DEBUG + uthread->uu_iocount = 0; + uthread->uu_vpindex = 0; +#endif AUDIT_SYSCALL_ENTER(code, p, uthread); error = (*(callp->sy_call))((void *) p, (void *) vt, &(uthread->uu_rval[0])); - AUDIT_SYSCALL_EXIT(error, p, uthread); - - if (error == ERESTART) { + AUDIT_SYSCALL_EXIT(code, p, uthread, error); + +#ifdef JOE_DEBUG + if (uthread->uu_iocount) + printf("system call returned with uu_iocount != 0\n"); +#endif +#if CONFIG_DTRACE + uthread->t_dtrace_errno = error; +#endif /* CONFIG_DTRACE */ + + if (__improbable(error == ERESTART)) { /* * Move the user's pc back to repeat the syscall: * 5 bytes for a sysenter, or 2 for an int 8x. * The SYSENTER_TF_CS covers single-stepping over a sysenter * - see debug trap handler in idt.s/idt64.s */ - if (regs->cs == SYSENTER_CS || regs->cs == SYSENTER_TF_CS) - regs->eip -= 5; - else - regs->eip -= 2; + + pal_syscall_restart(thread, state); } else if (error != EJUSTRETURN) { - if (error) { + if (__improbable(error)) { regs->eax = error; regs->efl |= EFL_CF; /* carry bit */ } else { /* (not error) */ regs->eax = uthread->uu_rval[0]; regs->edx = uthread->uu_rval[1]; - regs->efl &= ~EFL_CF; } } - if (KTRPOINT(p, KTR_SYSRET)) - ktrsysret(p, code, error, uthread->uu_rval[0]); - - if (cancel_enable == _SYSCALL_CANCEL_NONE) - uthread->uu_flag &= ~UT_NOTCANCELPT; + DEBUG_KPRINT_SYSCALL_UNIX( + "unix_syscall: error=%d retval=(%u,%u)\n", + error, regs->eax, regs->edx); + uthread->uu_flag &= ~UT_NOTCANCELPT; +#if FUNNEL_DEBUG /* - * if we're holding the funnel - * than drop it regardless of whether - * we took it on system call entry + * if we're holding the funnel panic */ - exit_funnel_section(); + syscall_exit_funnelcheck(); +#endif /* FUNNEL_DEBUG */ - if (uthread->uu_lowpri_delay) { + if (__improbable(uthread->uu_lowpri_window)) { /* * task is marked as a low priority I/O type * and the I/O we issued while in this system call @@ -248,12 +248,16 @@ unix_syscall(x86_saved_state_t *state) * delay in order to mitigate the impact of this * task on the normal operation of the system */ - IOSleep(uthread->uu_lowpri_delay); - uthread->uu_lowpri_delay = 0; + throttle_lowpri_io(1); + } + if (__probable(code != 180)) + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, + error, uthread->uu_rval[0], uthread->uu_rval[1], p->p_pid, 0); + + if (__improbable(!is_vfork && callp->sy_call == (sy_call_t *)execve && !error)) { + pal_execve_return(thread); } - if (code != 180) - KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, - error, uthread->uu_rval[0], uthread->uu_rval[1], 0, 0); thread_exception_return(); /* NOTREACHED */ @@ -264,34 +268,32 @@ void unix_syscall64(x86_saved_state_t *state) { thread_t thread; - unsigned short code; + unsigned int code; struct sysent *callp; void *uargp; int args_in_regs; int error; - int funnel_type; struct proc *p; struct uthread *uthread; - unsigned int cancel_enable; x86_saved_state64_t *regs; assert(is_saved_state64(state)); regs = saved_state64(state); - +#if DEBUG if (regs->rax == 0x2000800) thread_exception_return(); - +#endif thread = current_thread(); uthread = get_bsdthread_info(thread); /* Get the approriate proc; may be different from task's for vfork() */ - if (!(uthread->uu_flag & UT_VFORK)) + if (__probable(!(uthread->uu_flag & UT_VFORK))) p = (struct proc *)get_bsdtask_info(current_task()); else p = current_proc(); /* Verify that we are not being called from a task without a proc */ - if (p == NULL) { + if (__improbable(p == NULL)) { regs->rax = EPERM; regs->isf.rflags |= EFL_CF; task_terminate_internal(current_task()); @@ -301,113 +303,93 @@ unix_syscall64(x86_saved_state_t *state) args_in_regs = 6; code = regs->rax & SYSCALL_NUMBER_MASK; - callp = (code >= nsysent) ? &sysent[63] : &sysent[code]; + DEBUG_KPRINT_SYSCALL_UNIX( + "unix_syscall64: code=%d(%s) rip=%llx\n", + code, syscallnames[code >= NUM_SYSENT ? 63 : code], regs->isf.rip); + callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code]; uargp = (void *)(®s->rdi); - if (callp == sysent) { + if (__improbable(callp == sysent)) { /* * indirect system call... system call number * passed as 'arg0' */ code = regs->rdi; - callp = (code >= nsysent) ? &sysent[63] : &sysent[code]; + callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code]; uargp = (void *)(®s->rsi); args_in_regs = 5; } + uthread->uu_ap = uargp; if (callp->sy_narg != 0) { if (code != 180) { - uint64_t *ip = (uint64_t *)uargp; + uint64_t *ip = (uint64_t *)uargp; - KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START, - (int)(*ip), (int)(*(ip+1)), (int)(*(ip+2)), (int)(*(ip+3)), 0); + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START, + (int)(*ip), (int)(*(ip+1)), (int)(*(ip+2)), (int)(*(ip+3)), 0); } - assert(callp->sy_narg <= 8); + assert(callp->sy_narg <= 8); - if (callp->sy_narg > args_in_regs) { - int copyin_count; + if (__improbable(callp->sy_narg > args_in_regs)) { + int copyin_count; - copyin_count = (callp->sy_narg - args_in_regs) * sizeof(uint64_t); + copyin_count = (callp->sy_narg - args_in_regs) * sizeof(uint64_t); - error = copyin((user_addr_t)(regs->isf.rsp + sizeof(user_addr_t)), (char *)®s->v_arg6, copyin_count); + error = copyin((user_addr_t)(regs->isf.rsp + sizeof(user_addr_t)), (char *)®s->v_arg6, copyin_count); if (error) { - regs->rax = error; + regs->rax = error; regs->isf.rflags |= EFL_CF; thread_exception_return(); /* NOTREACHED */ } } - /* - * XXX Turn 64 bit unsafe calls into nosys() - */ - if (callp->sy_funnel & UNSAFE_64BIT) { - callp = &sysent[63]; - goto unsafe; - } - } else - KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START, - 0, 0, 0, 0, 0); -unsafe: + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START, + 0, 0, 0, 0, 0); /* * Delayed binding of thread credential to process credential, if we * are not running with an explicitly set thread credential. */ - if (uthread->uu_ucred != p->p_ucred && - (uthread->uu_flag & UT_SETUID) == 0) { - kauth_cred_t old = uthread->uu_ucred; - proc_lock(p); - uthread->uu_ucred = p->p_ucred; - kauth_cred_ref(uthread->uu_ucred); - proc_unlock(p); - if (old != NOCRED) - kauth_cred_rele(old); - } + kauth_cred_uthread_update(uthread, p); uthread->uu_rval[0] = 0; uthread->uu_rval[1] = 0; - cancel_enable = callp->sy_cancel; - if (cancel_enable == _SYSCALL_CANCEL_NONE) { - uthread->uu_flag |= UT_NOTCANCELPT; - } else { - if ((uthread->uu_flag & (UT_CANCELDISABLE | UT_CANCEL | UT_CANCELED)) == UT_CANCEL) { - if (cancel_enable == _SYSCALL_CANCEL_PRE) { - /* system call cancelled; return to handle cancellation */ - regs->rax = EINTR; - regs->isf.rflags |= EFL_CF; - thread_exception_return(); - /* NOTREACHED */ - } else { - thread_abort_safely(thread); - } - } - } + uthread->uu_flag |= UT_NOTCANCELPT; - funnel_type = (callp->sy_funnel & FUNNEL_MASK); - if (funnel_type == KERNEL_FUNNEL) - thread_funnel_set(kernel_flock, TRUE); - - if (KTRPOINT(p, KTR_SYSCALL)) - ktrsyscall(p, code, callp->sy_narg, uargp); +#ifdef JOE_DEBUG + uthread->uu_iocount = 0; + uthread->uu_vpindex = 0; +#endif AUDIT_SYSCALL_ENTER(code, p, uthread); error = (*(callp->sy_call))((void *) p, uargp, &(uthread->uu_rval[0])); - AUDIT_SYSCALL_EXIT(error, p, uthread); + AUDIT_SYSCALL_EXIT(code, p, uthread, error); + +#ifdef JOE_DEBUG + if (uthread->uu_iocount) + printf("system call returned with uu_iocount != 0\n"); +#endif + +#if CONFIG_DTRACE + uthread->t_dtrace_errno = error; +#endif /* CONFIG_DTRACE */ - if (error == ERESTART) { + if (__improbable(error == ERESTART)) { /* * all system calls come through via the syscall instruction * in 64 bit mode... its 2 bytes in length * move the user's pc back to repeat the syscall: */ - regs->isf.rip -= 2; + pal_syscall_restart( thread, state ); } else if (error != EJUSTRETURN) { - if (error) { - regs->rax = error; + if (__improbable(error)) { + regs->rax = error; regs->isf.rflags |= EFL_CF; /* carry bit */ } else { /* (not error) */ @@ -424,6 +406,7 @@ unsafe: case _SYSCALL_RET_ADDR_T: case _SYSCALL_RET_SIZE_T: case _SYSCALL_RET_SSIZE_T: + case _SYSCALL_RET_UINT64_T: regs->rax = *((uint64_t *)(&uthread->uu_rval[0])); regs->rdx = 0; break; @@ -437,20 +420,20 @@ unsafe: } } - if (KTRPOINT(p, KTR_SYSRET)) - ktrsysret(p, code, error, uthread->uu_rval[0]); - - if (cancel_enable == _SYSCALL_CANCEL_NONE) - uthread->uu_flag &= ~UT_NOTCANCELPT; + DEBUG_KPRINT_SYSCALL_UNIX( + "unix_syscall64: error=%d retval=(%llu,%llu)\n", + error, regs->rax, regs->rdx); + + uthread->uu_flag &= ~UT_NOTCANCELPT; +#if FUNNEL_DEBUG /* - * if we're holding the funnel - * than drop it regardless of whether - * we took it on system call entry + * if we're holding the funnel panic */ - exit_funnel_section(); + syscall_exit_funnelcheck(); +#endif /* FUNNEL_DEBUG */ - if (uthread->uu_lowpri_delay) { + if (__improbable(uthread->uu_lowpri_window)) { /* * task is marked as a low priority I/O type * and the I/O we issued while in this system call @@ -458,12 +441,12 @@ unsafe: * delay in order to mitigate the impact of this * task on the normal operation of the system */ - IOSleep(uthread->uu_lowpri_delay); - uthread->uu_lowpri_delay = 0; + throttle_lowpri_io(1); } - if (code != 180) - KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, - error, uthread->uu_rval[0], uthread->uu_rval[1], 0, 0); + if (__probable(code != 180)) + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, + error, uthread->uu_rval[0], uthread->uu_rval[1], p->p_pid, 0); thread_exception_return(); /* NOTREACHED */ @@ -476,114 +459,128 @@ unix_syscall_return(int error) thread_t thread; struct uthread *uthread; struct proc *p; - unsigned short code; + unsigned int code; vm_offset_t params; struct sysent *callp; - unsigned int cancel_enable; thread = current_thread(); uthread = get_bsdthread_info(thread); + pal_register_cache_state(thread, DIRTY); + p = current_proc(); if (proc_is64bit(p)) { - x86_saved_state64_t *regs; + x86_saved_state64_t *regs; regs = saved_state64(find_user_regs(thread)); - /* reconstruct code for tracing before blasting rax */ + /* reconstruct code for tracing before blasting rax */ code = regs->rax & SYSCALL_NUMBER_MASK; - callp = (code >= nsysent) ? &sysent[63] : &sysent[code]; + callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code]; if (callp == sysent) - /* + /* * indirect system call... system call number * passed as 'arg0' */ - code = regs->rdi; + code = regs->rdi; + +#if CONFIG_DTRACE + if (callp->sy_call == dtrace_systrace_syscall) + dtrace_systrace_syscall_return( code, error, uthread->uu_rval ); +#endif /* CONFIG_DTRACE */ + AUDIT_SYSCALL_EXIT(code, p, uthread, error); if (error == ERESTART) { - /* - * all system calls come through via the syscall instruction - * in 64 bit mode... its 2 bytes in length - * move the user's pc back to repeat the syscall: + /* + * repeat the syscall */ - regs->isf.rip -= 2; + pal_syscall_restart( thread, find_user_regs(thread) ); } else if (error != EJUSTRETURN) { - if (error) { - regs->rax = error; + if (error) { + regs->rax = error; regs->isf.rflags |= EFL_CF; /* carry bit */ } else { /* (not error) */ - switch (callp->sy_return_type) { + switch (callp->sy_return_type) { case _SYSCALL_RET_INT_T: - regs->rax = uthread->uu_rval[0]; + regs->rax = uthread->uu_rval[0]; regs->rdx = uthread->uu_rval[1]; break; case _SYSCALL_RET_UINT_T: - regs->rax = ((u_int)uthread->uu_rval[0]); + regs->rax = ((u_int)uthread->uu_rval[0]); regs->rdx = ((u_int)uthread->uu_rval[1]); break; case _SYSCALL_RET_OFF_T: case _SYSCALL_RET_ADDR_T: case _SYSCALL_RET_SIZE_T: case _SYSCALL_RET_SSIZE_T: - regs->rax = *((uint64_t *)(&uthread->uu_rval[0])); + case _SYSCALL_RET_UINT64_T: + regs->rax = *((uint64_t *)(&uthread->uu_rval[0])); regs->rdx = 0; break; case _SYSCALL_RET_NONE: - break; + break; default: - panic("unix_syscall: unknown return type"); + panic("unix_syscall: unknown return type"); break; } regs->isf.rflags &= ~EFL_CF; } } + DEBUG_KPRINT_SYSCALL_UNIX( + "unix_syscall_return: error=%d retval=(%llu,%llu)\n", + error, regs->rax, regs->rdx); } else { - x86_saved_state32_t *regs; + x86_saved_state32_t *regs; regs = saved_state32(find_user_regs(thread)); + regs->efl &= ~(EFL_CF); /* reconstruct code for tracing before blasting eax */ - code = regs->eax; - callp = (code >= nsysent) ? &sysent[63] : &sysent[code]; + code = regs->eax & I386_SYSCALL_NUMBER_MASK; + callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code]; + +#if CONFIG_DTRACE + if (callp->sy_call == dtrace_systrace_syscall) + dtrace_systrace_syscall_return( code, error, uthread->uu_rval ); +#endif /* CONFIG_DTRACE */ + AUDIT_SYSCALL_EXIT(code, p, uthread, error); if (callp == sysent) { - params = (vm_offset_t) ((caddr_t)regs->uesp + sizeof (int)); - code = fuword(params); + params = (vm_offset_t) (regs->uesp + sizeof (int)); + code = fuword(params); } if (error == ERESTART) { - regs->eip -= ((regs->cs & 0xffff) == SYSENTER_CS) ? 5 : 2; + pal_syscall_restart( thread, find_user_regs(thread) ); } else if (error != EJUSTRETURN) { - if (error) { - regs->eax = error; + if (error) { + regs->eax = error; regs->efl |= EFL_CF; /* carry bit */ } else { /* (not error) */ - regs->eax = uthread->uu_rval[0]; + regs->eax = uthread->uu_rval[0]; regs->edx = uthread->uu_rval[1]; - regs->efl &= ~EFL_CF; } } + DEBUG_KPRINT_SYSCALL_UNIX( + "unix_syscall_return: error=%d retval=(%u,%u)\n", + error, regs->eax, regs->edx); } - if (KTRPOINT(p, KTR_SYSRET)) - ktrsysret(p, code, error, uthread->uu_rval[0]); - cancel_enable = callp->sy_cancel; - if (cancel_enable == _SYSCALL_CANCEL_NONE) - uthread->uu_flag &= ~UT_NOTCANCELPT; + uthread->uu_flag &= ~UT_NOTCANCELPT; +#if FUNNEL_DEBUG /* - * if we're holding the funnel - * than drop it regardless of whether - * we took it on system call entry + * if we're holding the funnel panic */ - exit_funnel_section(); + syscall_exit_funnelcheck(); +#endif /* FUNNEL_DEBUG */ - if (uthread->uu_lowpri_delay) { + if (uthread->uu_lowpri_window) { /* * task is marked as a low priority I/O type * and the I/O we issued while in this system call @@ -591,34 +588,14 @@ unix_syscall_return(int error) * delay in order to mitigate the impact of this * task on the normal operation of the system */ - IOSleep(uthread->uu_lowpri_delay); - uthread->uu_lowpri_delay = 0; + throttle_lowpri_io(1); } if (code != 180) - KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, - error, uthread->uu_rval[0], uthread->uu_rval[1], 0, 0); + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, + error, uthread->uu_rval[0], uthread->uu_rval[1], p->p_pid, 0); thread_exception_return(); /* NOTREACHED */ } -void -munge_wwwlww( - __unused const void *in32, - void *out64) -{ - uint32_t *arg32; - uint64_t *arg64; - - /* we convert in place in out64 */ - arg32 = (uint32_t *) out64; - arg64 = (uint64_t *) out64; - - arg64[5] = arg32[6]; /* wwwlwW */ - arg64[4] = arg32[5]; /* wwwlWw */ - arg32[7] = arg32[4]; /* wwwLww (hi) */ - arg32[6] = arg32[3]; /* wwwLww (lo) */ - arg64[2] = arg32[2]; /* wwWlww */ - arg64[1] = arg32[1]; /* wWwlww */ - arg64[0] = arg32[0]; /* Wwwlww */ -}