X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/2d21ac55c334faf3a56e5634905ed6987fc787d4..HEAD:/bsd/dev/i386/systemcalls.c?ds=sidebyside diff --git a/bsd/dev/i386/systemcalls.c b/bsd/dev/i386/systemcalls.c index fa6db0a3d..8a12ad5a3 100644 --- a/bsd/dev/i386/systemcalls.c +++ b/bsd/dev/i386/systemcalls.c @@ -1,8 +1,8 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2016 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ - * + * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in @@ -11,10 +11,10 @@ * unlawful or unlicensed copies of an Apple operating system, or to * circumvent, violate, or enable the circumvention or violation of, any * terms of an Apple operating system software license agreement. - * + * * Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this file. - * + * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, @@ -22,7 +22,7 @@ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. - * + * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ #include @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -46,13 +47,20 @@ #include #include #include +#include -#include +#include #include #include #include +#include + +#if CONFIG_MACF +#include +#endif + #if CONFIG_DTRACE extern int32_t dtrace_systrace_syscall(struct proc *, void *, int *); extern void dtrace_systrace_syscall_return(unsigned short, int, int *); @@ -61,10 +69,14 @@ extern void dtrace_systrace_syscall_return(unsigned short, int, int *); extern void unix_syscall(x86_saved_state_t *); extern void unix_syscall64(x86_saved_state_t *); extern void *find_user_regs(thread_t); -extern void throttle_lowpri_io(int *lowpri_window, mount_t v_mount); -extern void x86_toggle_sysenter_arg_store(thread_t thread, boolean_t valid); -extern boolean_t x86_sysenter_arg_store_isvalid(thread_t thread); +/* dynamically generated at build time based on syscalls.master */ +extern const char *syscallnames[]; + +#define code_is_kdebug_trace(code) (((code) == SYS_kdebug_trace) || \ + ((code) == SYS_kdebug_trace64) || \ + ((code) == SYS_kdebug_trace_string)) + /* * Function: unix_syscall * @@ -72,98 +84,96 @@ extern boolean_t x86_sysenter_arg_store_isvalid(thread_t thread); * * Outputs: none */ +__attribute__((noreturn)) void unix_syscall(x86_saved_state_t *state) { - thread_t thread; - void *vt; - unsigned int code; - struct sysent *callp; - - int error; - vm_offset_t params; - struct proc *p; - struct uthread *uthread; - x86_saved_state32_t *regs; - boolean_t args_in_uthread; + thread_t thread; + void *vt; + unsigned int code, syscode; + const struct sysent *callp; + + int error; + vm_offset_t params; + struct proc *p; + struct uthread *uthread; + x86_saved_state32_t *regs; + boolean_t is_vfork; + pid_t pid; assert(is_saved_state32(state)); regs = saved_state32(state); #if DEBUG - if (regs->eax == 0x800) + if (regs->eax == 0x800) { thread_exception_return(); + } #endif thread = current_thread(); uthread = get_bsdthread_info(thread); + uthread_reset_proc_refcount(uthread); + /* Get the approriate proc; may be different from task's for vfork() */ - if (!(uthread->uu_flag & UT_VFORK)) - p = (struct proc *)get_bsdtask_info(current_task()); - else + is_vfork = uthread->uu_flag & UT_VFORK; + if (__improbable(is_vfork != 0)) { p = current_proc(); - - /* Verify that we are not being called from a task without a proc */ - if (p == NULL) { - regs->eax = EPERM; - regs->efl |= EFL_CF; - task_terminate_internal(current_task()); - thread_exception_return(); - /* NOTREACHED */ + } else { + p = (struct proc *)get_bsdtask_info(current_task()); } - code = regs->eax & I386_SYSCALL_NUMBER_MASK; - args_in_uthread = ((regs->eax & I386_SYSCALL_ARG_BYTES_MASK) != 0) && x86_sysenter_arg_store_isvalid(thread); - params = (vm_offset_t) ((caddr_t)regs->uesp + sizeof (int)); + code = regs->eax & I386_SYSCALL_NUMBER_MASK; + syscode = (code < nsysent) ? code : SYS_invalid; + DEBUG_KPRINT_SYSCALL_UNIX("unix_syscall: code=%d(%s) eip=%u\n", + code, syscallnames[syscode], (uint32_t)regs->eip); + params = (vm_offset_t) (regs->uesp + sizeof(int)); regs->efl &= ~(EFL_CF); - callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code]; + callp = &sysent[syscode]; - if (callp == sysent) { + if (__improbable(callp == sysent)) { code = fuword(params); params += sizeof(int); - callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code]; + syscode = (code < nsysent) ? code : SYS_invalid; + callp = &sysent[syscode]; } vt = (void *)uthread->uu_arg; if (callp->sy_arg_bytes != 0) { - sy_munge_t *mungerp; - - assert((unsigned) callp->sy_arg_bytes <= sizeof (uthread->uu_arg)); - if (!args_in_uthread) - { - uint32_t nargs; - nargs = callp->sy_arg_bytes; - error = copyin((user_addr_t) params, (char *) vt, nargs); - if (error) { - regs->eax = error; - regs->efl |= EFL_CF; - thread_exception_return(); - /* NOTREACHED */ - } - } +#if CONFIG_REQUIRES_U32_MUNGING + sy_munge_t *mungerp; +#else +#error U32 syscalls on x86_64 kernel requires munging +#endif + uint32_t nargs; - if (code != 180) { - int *ip = (int *)vt; + assert((unsigned) callp->sy_arg_bytes <= sizeof(uthread->uu_arg)); + nargs = callp->sy_arg_bytes; + error = copyin((user_addr_t) params, (char *) vt, nargs); + if (error) { + regs->eax = error; + regs->efl |= EFL_CF; + thread_exception_return(); + /* NOTREACHED */ + } - KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START, - *ip, *(ip+1), *(ip+2), *(ip+3), 0); + if (__probable(!code_is_kdebug_trace(code))) { + uint32_t *uip = vt; + KDBG_RELEASE(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START, + uip[0], uip[1], uip[2], uip[3]); } + +#if CONFIG_REQUIRES_U32_MUNGING mungerp = callp->sy_arg_munge32; - /* - * If non-NULL, then call the syscall argument munger to - * copy in arguments (see xnu/bsd/dev/i386/munge.s); the - * first argument is NULL because we are munging in place - * after a copyin because the ABI currently doesn't use - * registers to pass system call arguments. - */ - if (mungerp != NULL) - (*mungerp)(NULL, vt); - } else - KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START, - 0, 0, 0, 0, 0); + if (mungerp != NULL) { + (*mungerp)(vt); + } +#endif + } else { + KDBG_RELEASE(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START); + } /* * Delayed binding of thread credential to process credential, if we @@ -172,144 +182,195 @@ unix_syscall(x86_saved_state_t *state) kauth_cred_uthread_update(uthread, p); uthread->uu_rval[0] = 0; - uthread->uu_rval[1] = regs->edx; + uthread->uu_rval[1] = 0; uthread->uu_flag |= UT_NOTCANCELPT; - + uthread->syscall_code = code; + pid = proc_pid(p); #ifdef JOE_DEBUG - uthread->uu_iocount = 0; - uthread->uu_vpindex = 0; + uthread->uu_iocount = 0; + uthread->uu_vpindex = 0; #endif +#if CONFIG_MACF + if (__improbable(p->syscall_filter_mask != NULL && !bitstr_test(p->syscall_filter_mask, syscode))) { + error = mac_proc_check_syscall_unix(p, syscode); + if (error) { + goto skip_syscall; + } + } +#endif /* CONFIG_MACF */ + AUDIT_SYSCALL_ENTER(code, p, uthread); error = (*(callp->sy_call))((void *) p, (void *) vt, &(uthread->uu_rval[0])); - AUDIT_SYSCALL_EXIT(code, p, uthread, error); + AUDIT_SYSCALL_EXIT(code, p, uthread, error); + +#if CONFIG_MACF +skip_syscall: +#endif /* CONFIG_MACF */ #ifdef JOE_DEBUG - if (uthread->uu_iocount) - joe_debug("system call returned with uu_iocount != 0"); + if (uthread->uu_iocount) { + printf("system call returned with uu_iocount != 0\n"); + } #endif #if CONFIG_DTRACE uthread->t_dtrace_errno = error; #endif /* CONFIG_DTRACE */ - if (error == ERESTART) { + if (__improbable(error == ERESTART)) { /* * Move the user's pc back to repeat the syscall: * 5 bytes for a sysenter, or 2 for an int 8x. * The SYSENTER_TF_CS covers single-stepping over a sysenter * - see debug trap handler in idt.s/idt64.s */ - if (regs->cs == SYSENTER_CS || regs->cs == SYSENTER_TF_CS) { - regs->eip -= 5; - } - else - regs->eip -= 2; - } - else if (error != EJUSTRETURN) { - if (error) { - regs->eax = error; - regs->efl |= EFL_CF; /* carry bit */ + + pal_syscall_restart(thread, state); + } else if (error != EJUSTRETURN) { + if (__improbable(error)) { + regs->eax = error; + regs->efl |= EFL_CF; /* carry bit */ } else { /* (not error) */ - regs->eax = uthread->uu_rval[0]; - regs->edx = uthread->uu_rval[1]; - } + /* + * We split retval across two registers, in case the + * syscall had a 64-bit return value, in which case + * eax/edx matches the function call ABI. + */ + regs->eax = uthread->uu_rval[0]; + regs->edx = uthread->uu_rval[1]; + } } + DEBUG_KPRINT_SYSCALL_UNIX( + "unix_syscall: error=%d retval=(%u,%u)\n", + error, regs->eax, regs->edx); + uthread->uu_flag &= ~UT_NOTCANCELPT; -#if DEBUG - /* - * if we're holding the funnel panic - */ - syscall_exit_funnelcheck(); -#endif /* DEBUG */ - if (uthread->uu_lowpri_window && uthread->v_mount) { - /* + uthread->syscall_code = 0; + +#if DEBUG || DEVELOPMENT + kern_allocation_name_t + prior __assert_only = thread_set_allocation_name(NULL); + assertf(prior == NULL, "thread_set_allocation_name(\"%s\") not cleared", kern_allocation_get_name(prior)); +#endif /* DEBUG || DEVELOPMENT */ + + if (__improbable(uthread->uu_lowpri_window)) { + /* * task is marked as a low priority I/O type * and the I/O we issued while in this system call * collided with normal I/O operations... we'll * delay in order to mitigate the impact of this * task on the normal operation of the system */ - throttle_lowpri_io(&uthread->uu_lowpri_window,uthread->v_mount); + throttle_lowpri_io(1); } - if (code != 180) - KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, - error, uthread->uu_rval[0], uthread->uu_rval[1], 0, 0); + if (__probable(!code_is_kdebug_trace(code))) { + KDBG_RELEASE(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, + error, uthread->uu_rval[0], uthread->uu_rval[1], pid); + } + + if (__improbable(!is_vfork && callp->sy_call == (sy_call_t *)execve && !error)) { + pal_execve_return(thread); + } + +#if PROC_REF_DEBUG + if (__improbable(uthread_get_proc_refcount(uthread) != 0)) { + panic("system call returned with uu_proc_refcount != 0"); + } +#endif thread_exception_return(); /* NOTREACHED */ } - +__attribute__((noreturn)) void unix_syscall64(x86_saved_state_t *state) { - thread_t thread; - unsigned int code; - struct sysent *callp; - void *uargp; - int args_in_regs; - int error; - struct proc *p; - struct uthread *uthread; + thread_t thread; + void *vt; + unsigned int code, syscode; + const struct sysent *callp; + int args_in_regs; + boolean_t args_start_at_rdi; + int error; + struct proc *p; + struct uthread *uthread; x86_saved_state64_t *regs; + pid_t pid; assert(is_saved_state64(state)); regs = saved_state64(state); - - if (regs->rax == 0x2000800) +#if DEBUG + if (regs->rax == 0x2000800) { thread_exception_return(); - + } +#endif thread = current_thread(); uthread = get_bsdthread_info(thread); + uthread_reset_proc_refcount(uthread); + /* Get the approriate proc; may be different from task's for vfork() */ - if (!(uthread->uu_flag & UT_VFORK)) + if (__probable(!(uthread->uu_flag & UT_VFORK))) { p = (struct proc *)get_bsdtask_info(current_task()); - else + } else { p = current_proc(); + } /* Verify that we are not being called from a task without a proc */ - if (p == NULL) { + if (__improbable(p == NULL)) { regs->rax = EPERM; regs->isf.rflags |= EFL_CF; task_terminate_internal(current_task()); thread_exception_return(); /* NOTREACHED */ } - args_in_regs = 6; - code = regs->rax & SYSCALL_NUMBER_MASK; - callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code]; - uargp = (void *)(®s->rdi); + code = regs->rax & SYSCALL_NUMBER_MASK; + syscode = (code < nsysent) ? code : SYS_invalid; + DEBUG_KPRINT_SYSCALL_UNIX( + "unix_syscall64: code=%d(%s) rip=%llx\n", + code, syscallnames[syscode], regs->isf.rip); + callp = &sysent[syscode]; - if (callp == sysent) { - /* + vt = (void *)uthread->uu_arg; + + if (__improbable(callp == sysent)) { + /* * indirect system call... system call number * passed as 'arg0' */ - code = regs->rdi; - callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code]; - uargp = (void *)(®s->rsi); + code = regs->rdi; + syscode = (code < nsysent) ? code : SYS_invalid; + callp = &sysent[syscode]; + args_start_at_rdi = FALSE; args_in_regs = 5; + } else { + args_start_at_rdi = TRUE; + args_in_regs = 6; } if (callp->sy_narg != 0) { - if (code != 180) { - uint64_t *ip = (uint64_t *)uargp; + assert(callp->sy_narg <= 8); /* size of uu_arg */ + + args_in_regs = MIN(args_in_regs, callp->sy_narg); + memcpy(vt, args_start_at_rdi ? ®s->rdi : ®s->rsi, args_in_regs * sizeof(syscall_arg_t)); - KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START, - (int)(*ip), (int)(*(ip+1)), (int)(*(ip+2)), (int)(*(ip+3)), 0); + if (!code_is_kdebug_trace(code)) { + uint64_t *uip = vt; + + KDBG_RELEASE(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START, + uip[0], uip[1], uip[2], uip[3]); } - assert(callp->sy_narg <= 8); - if (callp->sy_narg > args_in_regs) { + if (__improbable(callp->sy_narg > args_in_regs)) { int copyin_count; - copyin_count = (callp->sy_narg - args_in_regs) * sizeof(uint64_t); + copyin_count = (callp->sy_narg - args_in_regs) * sizeof(syscall_arg_t); - error = copyin((user_addr_t)(regs->isf.rsp + sizeof(user_addr_t)), (char *)®s->v_arg6, copyin_count); + error = copyin((user_addr_t)(regs->isf.rsp + sizeof(user_addr_t)), (char *)&uthread->uu_arg[args_in_regs], copyin_count); if (error) { regs->rax = error; regs->isf.rflags |= EFL_CF; @@ -317,17 +378,9 @@ unix_syscall64(x86_saved_state_t *state) /* NOTREACHED */ } } - /* - * XXX Turn 64 bit unsafe calls into nosys() - */ - if (callp->sy_flags & UNSAFE_64BIT) { - callp = &sysent[63]; - goto unsafe; - } - } else - KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START, - 0, 0, 0, 0, 0); -unsafe: + } else { + KDBG_RELEASE(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START); + } /* * Delayed binding of thread credential to process credential, if we @@ -337,33 +390,54 @@ unsafe: uthread->uu_rval[0] = 0; uthread->uu_rval[1] = 0; - - uthread->uu_flag |= UT_NOTCANCELPT; + uthread->syscall_code = code; + pid = proc_pid(p); + +#ifdef JOE_DEBUG + uthread->uu_iocount = 0; + uthread->uu_vpindex = 0; +#endif +#if CONFIG_MACF + if (__improbable(p->syscall_filter_mask != NULL && !bitstr_test(p->syscall_filter_mask, syscode))) { + error = mac_proc_check_syscall_unix(p, syscode); + if (error) { + goto skip_syscall; + } + } +#endif /* CONFIG_MACF */ AUDIT_SYSCALL_ENTER(code, p, uthread); - error = (*(callp->sy_call))((void *) p, uargp, &(uthread->uu_rval[0])); - AUDIT_SYSCALL_EXIT(code, p, uthread, error); + error = (*(callp->sy_call))((void *) p, vt, &(uthread->uu_rval[0])); + AUDIT_SYSCALL_EXIT(code, p, uthread, error); + +#if CONFIG_MACF +skip_syscall: +#endif /* CONFIG_MACF */ + +#ifdef JOE_DEBUG + if (uthread->uu_iocount) { + printf("system call returned with uu_iocount != 0\n"); + } +#endif #if CONFIG_DTRACE uthread->t_dtrace_errno = error; #endif /* CONFIG_DTRACE */ - - if (error == ERESTART) { + + if (__improbable(error == ERESTART)) { /* * all system calls come through via the syscall instruction * in 64 bit mode... its 2 bytes in length * move the user's pc back to repeat the syscall: */ - regs->isf.rip -= 2; - } - else if (error != EJUSTRETURN) { - if (error) { + pal_syscall_restart( thread, state ); + } else if (error != EJUSTRETURN) { + if (__improbable(error)) { regs->rax = error; - regs->isf.rflags |= EFL_CF; /* carry bit */ + regs->isf.rflags |= EFL_CF; /* carry bit */ } else { /* (not error) */ - switch (callp->sy_return_type) { case _SYSCALL_RET_INT_T: regs->rax = uthread->uu_rval[0]; @@ -377,7 +451,8 @@ unsafe: case _SYSCALL_RET_ADDR_T: case _SYSCALL_RET_SIZE_T: case _SYSCALL_RET_SSIZE_T: - regs->rax = *((uint64_t *)(&uthread->uu_rval[0])); + case _SYSCALL_RET_UINT64_T: + regs->rax = *((uint64_t *)(&uthread->uu_rval[0])); regs->rdx = 0; break; case _SYSCALL_RET_NONE: @@ -387,30 +462,42 @@ unsafe: break; } regs->isf.rflags &= ~EFL_CF; - } + } } + DEBUG_KPRINT_SYSCALL_UNIX( + "unix_syscall64: error=%d retval=(%llu,%llu)\n", + error, regs->rax, regs->rdx); uthread->uu_flag &= ~UT_NOTCANCELPT; + uthread->syscall_code = 0; - /* - * if we're holding the funnel panic - */ - syscall_exit_funnelcheck(); +#if DEBUG || DEVELOPMENT + kern_allocation_name_t + prior __assert_only = thread_set_allocation_name(NULL); + assertf(prior == NULL, "thread_set_allocation_name(\"%s\") not cleared", kern_allocation_get_name(prior)); +#endif /* DEBUG || DEVELOPMENT */ - if (uthread->uu_lowpri_window && uthread->v_mount) { - /* + if (__improbable(uthread->uu_lowpri_window)) { + /* * task is marked as a low priority I/O type * and the I/O we issued while in this system call * collided with normal I/O operations... we'll * delay in order to mitigate the impact of this * task on the normal operation of the system */ - throttle_lowpri_io(&uthread->uu_lowpri_window,uthread->v_mount); + throttle_lowpri_io(1); + } + if (__probable(!code_is_kdebug_trace(code))) { + KDBG_RELEASE(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, + error, uthread->uu_rval[0], uthread->uu_rval[1], pid); + } + +#if PROC_REF_DEBUG + if (__improbable(uthread_get_proc_refcount(uthread))) { + panic("system call returned with uu_proc_refcount != 0"); } - if (code != 180) - KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, - error, uthread->uu_rval[0], uthread->uu_rval[1], 0, 0); +#endif thread_exception_return(); /* NOTREACHED */ @@ -420,16 +507,17 @@ unsafe: void unix_syscall_return(int error) { - thread_t thread; - struct uthread *uthread; + thread_t thread; + struct uthread *uthread; struct proc *p; unsigned int code; - vm_offset_t params; - struct sysent *callp; + const struct sysent *callp; thread = current_thread(); uthread = get_bsdthread_info(thread); + pal_register_cache_state(thread, DIRTY); + p = current_proc(); if (proc_is64bit(p)) { @@ -437,36 +525,26 @@ unix_syscall_return(int error) regs = saved_state64(find_user_regs(thread)); - /* reconstruct code for tracing before blasting rax */ - code = regs->rax & SYSCALL_NUMBER_MASK; - callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code]; - - if (callp == sysent) - /* - * indirect system call... system call number - * passed as 'arg0' - */ - code = regs->rdi; + code = uthread->syscall_code; + callp = (code >= nsysent) ? &sysent[SYS_invalid] : &sysent[code]; #if CONFIG_DTRACE - if (callp->sy_call == dtrace_systrace_syscall) + if (callp->sy_call == dtrace_systrace_syscall) { dtrace_systrace_syscall_return( code, error, uthread->uu_rval ); + } #endif /* CONFIG_DTRACE */ + AUDIT_SYSCALL_EXIT(code, p, uthread, error); if (error == ERESTART) { /* - * all system calls come through via the syscall instruction - * in 64 bit mode... its 2 bytes in length - * move the user's pc back to repeat the syscall: + * repeat the syscall */ - regs->isf.rip -= 2; - } - else if (error != EJUSTRETURN) { + pal_syscall_restart( thread, find_user_regs(thread)); + } else if (error != EJUSTRETURN) { if (error) { regs->rax = error; - regs->isf.rflags |= EFL_CF; /* carry bit */ + regs->isf.rflags |= EFL_CF; /* carry bit */ } else { /* (not error) */ - switch (callp->sy_return_type) { case _SYSCALL_RET_INT_T: regs->rax = uthread->uu_rval[0]; @@ -480,6 +558,7 @@ unix_syscall_return(int error) case _SYSCALL_RET_ADDR_T: case _SYSCALL_RET_SIZE_T: case _SYSCALL_RET_SSIZE_T: + case _SYSCALL_RET_UINT64_T: regs->rax = *((uint64_t *)(&uthread->uu_rval[0])); regs->rdx = 0; break; @@ -490,115 +569,68 @@ unix_syscall_return(int error) break; } regs->isf.rflags &= ~EFL_CF; - } + } } + DEBUG_KPRINT_SYSCALL_UNIX( + "unix_syscall_return: error=%d retval=(%llu,%llu)\n", + error, regs->rax, regs->rdx); } else { - x86_saved_state32_t *regs; + x86_saved_state32_t *regs; regs = saved_state32(find_user_regs(thread)); regs->efl &= ~(EFL_CF); - /* reconstruct code for tracing before blasting eax */ - code = regs->eax & I386_SYSCALL_NUMBER_MASK; - callp = (code >= NUM_SYSENT) ? &sysent[63] : &sysent[code]; + + code = uthread->syscall_code; + callp = (code >= nsysent) ? &sysent[SYS_invalid] : &sysent[code]; #if CONFIG_DTRACE - if (callp->sy_call == dtrace_systrace_syscall) + if (callp->sy_call == dtrace_systrace_syscall) { dtrace_systrace_syscall_return( code, error, uthread->uu_rval ); + } #endif /* CONFIG_DTRACE */ + AUDIT_SYSCALL_EXIT(code, p, uthread, error); - if (callp == sysent) { - params = (vm_offset_t) ((caddr_t)regs->uesp + sizeof (int)); - code = fuword(params); - } if (error == ERESTART) { - regs->eip -= ((regs->cs & 0xffff) == SYSENTER_CS) ? 5 : 2; - } - else if (error != EJUSTRETURN) { + pal_syscall_restart( thread, find_user_regs(thread)); + } else if (error != EJUSTRETURN) { if (error) { regs->eax = error; - regs->efl |= EFL_CF; /* carry bit */ + regs->efl |= EFL_CF; /* carry bit */ } else { /* (not error) */ regs->eax = uthread->uu_rval[0]; regs->edx = uthread->uu_rval[1]; - } + } } + DEBUG_KPRINT_SYSCALL_UNIX( + "unix_syscall_return: error=%d retval=(%u,%u)\n", + error, regs->eax, regs->edx); } uthread->uu_flag &= ~UT_NOTCANCELPT; - /* - * if we're holding the funnel panic - */ - syscall_exit_funnelcheck(); +#if DEBUG || DEVELOPMENT + kern_allocation_name_t + prior __assert_only = thread_set_allocation_name(NULL); + assertf(prior == NULL, "thread_set_allocation_name(\"%s\") not cleared", kern_allocation_get_name(prior)); +#endif /* DEBUG || DEVELOPMENT */ - if (uthread->uu_lowpri_window && uthread->v_mount) { - /* + if (uthread->uu_lowpri_window) { + /* * task is marked as a low priority I/O type * and the I/O we issued while in this system call * collided with normal I/O operations... we'll * delay in order to mitigate the impact of this * task on the normal operation of the system */ - throttle_lowpri_io(&uthread->uu_lowpri_window,uthread->v_mount); + throttle_lowpri_io(1); + } + if (!code_is_kdebug_trace(code)) { + KDBG_RELEASE(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, + error, uthread->uu_rval[0], uthread->uu_rval[1], p->p_pid); } - if (code != 180) - KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, - error, uthread->uu_rval[0], uthread->uu_rval[1], 0, 0); thread_exception_return(); /* NOTREACHED */ } - -void -munge_wwwlww( - __unused const void *in32, - void *out64) -{ - uint32_t *arg32; - uint64_t *arg64; - - /* we convert in place in out64 */ - arg32 = (uint32_t *) out64; - arg64 = (uint64_t *) out64; - - arg64[5] = arg32[6]; /* wwwlwW */ - arg64[4] = arg32[5]; /* wwwlWw */ - arg32[7] = arg32[4]; /* wwwLww (hi) */ - arg32[6] = arg32[3]; /* wwwLww (lo) */ - arg64[2] = arg32[2]; /* wwWlww */ - arg64[1] = arg32[1]; /* wWwlww */ - arg64[0] = arg32[0]; /* Wwwlww */ -} - - -void -munge_wwlwww( - __unused const void *in32, - void *out64) -{ - uint32_t *arg32; - uint64_t *arg64; - - /* we convert in place in out64 */ - arg32 = (uint32_t *) out64; - arg64 = (uint64_t *) out64; - - arg64[5] = arg32[6]; /* wwlwwW */ - arg64[4] = arg32[5]; /* wwlwWw */ - arg64[3] = arg32[4]; /* wwlWww */ - arg32[5] = arg32[3]; /* wwLwww (hi) */ - arg32[4] = arg32[2]; /* wwLwww (lo) */ - arg64[1] = arg32[1]; /* wWlwww */ - arg64[0] = arg32[0]; /* Wwlwww */ -} - -#ifdef JOE_DEBUG -joe_debug(char *p) { - - printf("%s\n", p); -} -#endif - -