X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/21362eb3e66fd2c787aee132bce100a44d71a99c..143464d58d2bd6378e74eec636961ceb0d32fb91:/osfmk/i386/bsd_i386.c diff --git a/osfmk/i386/bsd_i386.c b/osfmk/i386/bsd_i386.c index 6568929ed..147951308 100644 --- a/osfmk/i386/bsd_i386.c +++ b/osfmk/i386/bsd_i386.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2010 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -43,53 +43,38 @@ #include #include #include +#include #include #include #include #include #include -#include #include -#include #include #include -#include #include #include #include -#include #include -#include -#include -#include -#include #include +#include +#include +#include +#include +#include +#include #include #include -#include +#include #include <../bsd/sys/sysent.h> -extern struct proc *current_proc(void); - -kern_return_t -thread_userstack( - thread_t, - int, - thread_state_t, - unsigned int, - mach_vm_offset_t *, - int * -); +#ifdef MACH_BSD +extern void mach_kauth_cred_uthread_update(void); +extern void throttle_lowpri_io(int); +#endif -kern_return_t -thread_entrypoint( - thread_t, - int, - thread_state_t, - unsigned int, - mach_vm_offset_t * -); +void * find_user_regs(thread_t); unsigned int get_msr_exportmask(void); @@ -97,11 +82,6 @@ unsigned int get_msr_nbits(void); unsigned int get_msr_rbits(void); -kern_return_t -thread_compose_cthread_desc(unsigned int addr, pcb_t pcb); - -void IOSleep(int); - /* * thread_userstack: * @@ -113,155 +93,111 @@ thread_userstack( __unused thread_t thread, int flavor, thread_state_t tstate, - unsigned int count, - user_addr_t *user_stack, + __unused unsigned int count, + mach_vm_offset_t *user_stack, int *customstack ) { - struct i386_saved_state *state; - i386_thread_state_t *state25; - vm_offset_t uesp; - - if (customstack) - *customstack = 0; - - switch (flavor) { - case i386_THREAD_STATE: /* FIXME */ - state25 = (i386_thread_state_t *) tstate; - if (state25->esp) - *user_stack = state25->esp; - else - *user_stack = USRSTACK; - if (customstack && state25->esp) - *customstack = 1; - else - *customstack = 0; - break; - - case i386_NEW_THREAD_STATE: - if (count < i386_NEW_THREAD_STATE_COUNT) - return (KERN_INVALID_ARGUMENT); - else { - state = (struct i386_saved_state *) tstate; - uesp = state->uesp; - } - - /* If a valid user stack is specified, use it. */ - if (uesp) - *user_stack = uesp; - else - *user_stack = USRSTACK; - if (customstack && uesp) - *customstack = 1; - else - *customstack = 0; - break; - default : - return (KERN_INVALID_ARGUMENT); - } - - return (KERN_SUCCESS); -} - -kern_return_t -thread_entrypoint( - __unused thread_t thread, - int flavor, - thread_state_t tstate, - unsigned int count, - mach_vm_offset_t *entry_point -) -{ - struct i386_saved_state *state; - i386_thread_state_t *state25; - - /* - * Set a default. - */ - if (*entry_point == 0) - *entry_point = VM_MIN_ADDRESS; - - switch (flavor) { - case i386_THREAD_STATE: - state25 = (i386_thread_state_t *) tstate; - *entry_point = state25->eip ? state25->eip: VM_MIN_ADDRESS; - break; - - case i386_NEW_THREAD_STATE: - if (count < i386_THREAD_STATE_COUNT) - return (KERN_INVALID_ARGUMENT); - else { - state = (struct i386_saved_state *) tstate; - - /* - * If a valid entry point is specified, use it. - */ - *entry_point = state->eip ? state->eip: VM_MIN_ADDRESS; + if (customstack) + *customstack = 0; + + switch (flavor) { + case x86_THREAD_STATE32: + { + x86_thread_state32_t *state25; + + state25 = (x86_thread_state32_t *) tstate; + + if (state25->esp) { + *user_stack = state25->esp; + if (customstack) + *customstack = 1; + } else { + *user_stack = VM_USRSTACK32; + if (customstack) + *customstack = 0; + } + break; + } + + case x86_THREAD_STATE64: + { + x86_thread_state64_t *state25; + + state25 = (x86_thread_state64_t *) tstate; + + if (state25->rsp) { + *user_stack = state25->rsp; + if (customstack) + *customstack = 1; + } else { + *user_stack = VM_USRSTACK64; + if (customstack) + *customstack = 0; + } + break; + } + + default: + return (KERN_INVALID_ARGUMENT); } - break; - } - - return (KERN_SUCCESS); -} -struct i386_saved_state * -get_user_regs(thread_t th) -{ - if (th->machine.pcb) - return(USER_REGS(th)); - else { - printf("[get_user_regs: thread does not have pcb]"); - return NULL; - } + return (KERN_SUCCESS); } /* - * Duplicate parent state in child - * for U**X fork. + * thread_userstackdefault: + * + * Return the default stack location for the + * thread, if otherwise unknown. */ kern_return_t -machine_thread_dup( - thread_t parent, - thread_t child -) +thread_userstackdefault( + thread_t thread, + mach_vm_offset_t *default_user_stack) { - struct i386_float_state floatregs; - -#ifdef XXX - /* Save the FPU state */ - if ((pcb_t)(per_proc_info[cpu_number()].fpu_pcb) == parent->machine.pcb) { - fp_state_save(parent); + if (thread_is_64bit(thread)) { + *default_user_stack = VM_USRSTACK64; + } else { + *default_user_stack = VM_USRSTACK32; } -#endif - - if (child->machine.pcb == NULL || parent->machine.pcb == NULL) - return (KERN_FAILURE); - - /* Copy over the i386_saved_state registers */ - child->machine.pcb->iss = parent->machine.pcb->iss; + return (KERN_SUCCESS); +} - /* Check to see if parent is using floating point - * and if so, copy the registers to the child - * FIXME - make sure this works. +kern_return_t +thread_entrypoint( + __unused thread_t thread, + int flavor, + thread_state_t tstate, + __unused unsigned int count, + mach_vm_offset_t *entry_point +) +{ + /* + * Set a default. */ - - if (parent->machine.pcb->ims.ifps) { - if (fpu_get_state(parent, &floatregs) == KERN_SUCCESS) - fpu_set_state(child, &floatregs); + if (*entry_point == 0) + *entry_point = VM_MIN_ADDRESS; + + switch (flavor) { + case x86_THREAD_STATE32: + { + x86_thread_state32_t *state25; + + state25 = (i386_thread_state_t *) tstate; + *entry_point = state25->eip ? state25->eip: VM_MIN_ADDRESS; + break; + } + + case x86_THREAD_STATE64: + { + x86_thread_state64_t *state25; + + state25 = (x86_thread_state64_t *) tstate; + *entry_point = state25->rip ? state25->rip: VM_MIN_ADDRESS64; + break; + } } - - /* FIXME - should a user specified LDT, TSS and V86 info - * be duplicated as well?? - probably not. - */ - // duplicate any use LDT entry that was set I think this is appropriate. -#ifdef MACH_BSD - if (parent->machine.pcb->uldt_selector!= 0) { - child->machine.pcb->uldt_selector = parent->machine.pcb->uldt_selector; - child->machine.pcb->uldt_desc = parent->machine.pcb->uldt_desc; - } -#endif - - return (KERN_SUCCESS); } @@ -273,535 +209,341 @@ void thread_set_child(thread_t child, int pid); void thread_set_child(thread_t child, int pid) { - child->machine.pcb->iss.eax = pid; - child->machine.pcb->iss.edx = 1; - child->machine.pcb->iss.efl &= ~EFL_CF; -} -void thread_set_parent(thread_t parent, int pid); -void -thread_set_parent(thread_t parent, int pid) -{ - parent->machine.pcb->iss.eax = pid; - parent->machine.pcb->iss.edx = 0; - parent->machine.pcb->iss.efl &= ~EFL_CF; -} - - - -/* - * System Call handling code - */ - -#define ERESTART -1 /* restart syscall */ -#define EJUSTRETURN -2 /* don't modify regs, just return */ - + pal_register_cache_state(child, DIRTY); -#define NO_FUNNEL 0 -#define KERNEL_FUNNEL 1 - -extern funnel_t * kernel_flock; - -extern int set_bsduthreadargs (thread_t, struct i386_saved_state *, void *); -extern void * get_bsduthreadarg(thread_t); -extern int * get_bsduthreadrval(thread_t th); -extern int * get_bsduthreadlowpridelay(thread_t th); - -extern long fuword(vm_offset_t); + if (thread_is_64bit(child)) { + x86_saved_state64_t *iss64; -extern void unix_syscall(struct i386_saved_state *); -extern void unix_syscall_return(int); + iss64 = USER_REGS64(child); -/* following implemented in bsd/dev/i386/unix_signal.c */ -int __pthread_cset(struct sysent *); + iss64->rax = pid; + iss64->rdx = 1; + iss64->isf.rflags &= ~EFL_CF; + } else { + x86_saved_state32_t *iss32; -void __pthread_creset(struct sysent *); + iss32 = USER_REGS32(child); - -void -unix_syscall_return(int error) -{ - thread_t thread; - volatile int *rval; - struct i386_saved_state *regs; - struct proc *p; - unsigned short code; - vm_offset_t params; - struct sysent *callp; - volatile int *lowpri_delay; - - thread = current_thread(); - rval = get_bsduthreadrval(thread); - lowpri_delay = get_bsduthreadlowpridelay(thread); - p = current_proc(); - - regs = USER_REGS(thread); - - /* reconstruct code for tracing before blasting eax */ - code = regs->eax; - params = (vm_offset_t) ((caddr_t)regs->uesp + sizeof (int)); - callp = (code >= nsysent) ? &sysent[63] : &sysent[code]; - if (callp == sysent) { - code = fuword(params); - } - - if (error == ERESTART) { - regs->eip -= 7; - } - else if (error != EJUSTRETURN) { - if (error) { - regs->eax = error; - regs->efl |= EFL_CF; /* carry bit */ - } else { /* (not error) */ - regs->eax = rval[0]; - regs->edx = rval[1]; - regs->efl &= ~EFL_CF; - } + iss32->eax = pid; + iss32->edx = 1; + iss32->efl &= ~EFL_CF; } +} - ktrsysret(p, code, error, rval[0], (callp->sy_funnel & FUNNEL_MASK)); - __pthread_creset(callp); - if ((callp->sy_funnel & FUNNEL_MASK) != NO_FUNNEL) - (void) thread_funnel_set(current_thread()->funnel_lock, FALSE); +/* + * System Call handling code + */ - if (*lowpri_delay) { - /* - * task is marked as a low priority I/O type - * and the I/O we issued while in this system call - * collided with normal I/O operations... we'll - * delay in order to mitigate the impact of this - * task on the normal operation of the system - */ - IOSleep(*lowpri_delay); - *lowpri_delay = 0; - } - KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, - error, rval[0], rval[1], 0, 0); +extern long fuword(vm_offset_t); - thread_exception_return(); - /* NOTREACHED */ -} void -unix_syscall(struct i386_saved_state *regs) +machdep_syscall(x86_saved_state_t *state) { - thread_t thread; - void *vt; - unsigned short code; - struct sysent *callp; - int nargs; - int error; - int *rval; - int funnel_type; - vm_offset_t params; - struct proc *p; - volatile int *lowpri_delay; - - thread = current_thread(); - p = current_proc(); - rval = get_bsduthreadrval(thread); - lowpri_delay = get_bsduthreadlowpridelay(thread); - - thread->task->syscalls_unix++; /* MP-safety ignored */ - - //printf("[scall : eax %x]", regs->eax); - code = regs->eax; - params = (vm_offset_t) ((caddr_t)regs->uesp + sizeof (int)); - callp = (code >= nsysent) ? &sysent[63] : &sysent[code]; - if (callp == sysent) { - code = fuword(params); - params += sizeof (int); - callp = (code >= nsysent) ? &sysent[63] : &sysent[code]; - } + int args[machdep_call_count]; + int trapno; + int nargs; + const machdep_call_t *entry; + x86_saved_state32_t *regs; + + assert(is_saved_state32(state)); + regs = saved_state32(state); - vt = get_bsduthreadarg(thread); + trapno = regs->eax; +#if DEBUG_TRACE + kprintf("machdep_syscall(0x%08x) code=%d\n", regs, trapno); +#endif - if ((nargs = (callp->sy_narg * sizeof (int))) && - (error = copyin((user_addr_t) params, (char *) vt, nargs)) != 0) { - regs->eax = error; - regs->efl |= EFL_CF; - thread_exception_return(); - /* NOTREACHED */ - } - - rval[0] = 0; - rval[1] = regs->edx; + DEBUG_KPRINT_SYSCALL_MDEP( + "machdep_syscall: trapno=%d\n", trapno); + + if (trapno < 0 || trapno >= machdep_call_count) { + regs->eax = (unsigned int)kern_invalid(NULL); - if ((error = __pthread_cset(callp))) { - /* cancelled system call; let it returned with EINTR for handling */ - regs->eax = error; - regs->efl |= EFL_CF; thread_exception_return(); /* NOTREACHED */ } + entry = &machdep_call_table[trapno]; + nargs = entry->nargs; - funnel_type = (callp->sy_funnel & FUNNEL_MASK); - if(funnel_type == KERNEL_FUNNEL) - (void) thread_funnel_set(kernel_flock, TRUE); - - (void) set_bsduthreadargs(thread, regs, NULL); - - if (callp->sy_narg > 8) - panic("unix_syscall max arg count exceeded (%d)", callp->sy_narg); + if (nargs != 0) { + if (copyin((user_addr_t) regs->uesp + sizeof (int), + (char *) args, (nargs * sizeof (int)))) { + regs->eax = KERN_INVALID_ADDRESS; - ktrsyscall(p, code, callp->sy_narg, vt, funnel_type); - - { - int *ip = (int *)vt; - KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_START, - *ip, *(ip+1), *(ip+2), *(ip+3), 0); + thread_exception_return(); + /* NOTREACHED */ + } } + switch (nargs) { + case 0: + regs->eax = (*entry->routine.args_0)(); + break; + case 1: + regs->eax = (*entry->routine.args_1)(args[0]); + break; + case 2: + regs->eax = (*entry->routine.args_2)(args[0],args[1]); + break; + case 3: + if (!entry->bsd_style) + regs->eax = (*entry->routine.args_3)(args[0],args[1],args[2]); + else { + int error; + uint32_t rval; + + error = (*entry->routine.args_bsd_3)(&rval, args[0], args[1], args[2]); + if (error) { + regs->eax = error; + regs->efl |= EFL_CF; /* carry bit */ + } else { + regs->eax = rval; + regs->efl &= ~EFL_CF; + } + } + break; + case 4: + regs->eax = (*entry->routine.args_4)(args[0], args[1], args[2], args[3]); + break; - error = (*(callp->sy_call))((void *) p, (void *) vt, &rval[0]); - -#if 0 - /* May be needed with vfork changes */ - regs = USER_REGS(thread); -#endif - if (error == ERESTART) { - regs->eip -= 7; - } - else if (error != EJUSTRETURN) { - if (error) { - regs->eax = error; - regs->efl |= EFL_CF; /* carry bit */ - } else { /* (not error) */ - regs->eax = rval[0]; - regs->edx = rval[1]; - regs->efl &= ~EFL_CF; - } + default: + panic("machdep_syscall: too many args"); } + if (current_thread()->funnel_lock) + (void) thread_funnel_set(current_thread()->funnel_lock, FALSE); - ktrsysret(p, code, error, rval[0], funnel_type); + DEBUG_KPRINT_SYSCALL_MDEP("machdep_syscall: retval=%u\n", regs->eax); - __pthread_creset(callp); + throttle_lowpri_io(1); - if(funnel_type != NO_FUNNEL) - (void) thread_funnel_set(current_thread()->funnel_lock, FALSE); - - if (*lowpri_delay) { - /* - * task is marked as a low priority I/O type - * and the I/O we issued while in this system call - * collided with normal I/O operations... we'll - * delay in order to mitigate the impact of this - * task on the normal operation of the system - */ - IOSleep(*lowpri_delay); - *lowpri_delay = 0; - } - KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END, - error, rval[0], rval[1], 0, 0); - - thread_exception_return(); - /* NOTREACHED */ + thread_exception_return(); + /* NOTREACHED */ } void -machdep_syscall( struct i386_saved_state *regs) +machdep_syscall64(x86_saved_state_t *state) { - int trapno, nargs; - machdep_call_t *entry; - - trapno = regs->eax; - if (trapno < 0 || trapno >= machdep_call_count) { - regs->eax = (unsigned int)kern_invalid(NULL); + int trapno; + const machdep_call_t *entry; + x86_saved_state64_t *regs; - thread_exception_return(); - /* NOTREACHED */ - } + assert(is_saved_state64(state)); + regs = saved_state64(state); - entry = &machdep_call_table[trapno]; - nargs = entry->nargs; + trapno = (int)(regs->rax & SYSCALL_NUMBER_MASK); - if (nargs > 0) { - int args[nargs]; + DEBUG_KPRINT_SYSCALL_MDEP( + "machdep_syscall64: trapno=%d\n", trapno); - if (copyin((user_addr_t) regs->uesp + sizeof (int), - (char *) args, - nargs * sizeof (int))) { + if (trapno < 0 || trapno >= machdep_call_count) { + regs->rax = (unsigned int)kern_invalid(NULL); - regs->eax = KERN_INVALID_ADDRESS; - - thread_exception_return(); - /* NOTREACHED */ + thread_exception_return(); + /* NOTREACHED */ } + entry = &machdep_call_table64[trapno]; - switch (nargs) { - case 1: - regs->eax = (*entry->routine.args_1)(args[0]); - break; - case 2: - regs->eax = (*entry->routine.args_2)(args[0],args[1]); - break; - case 3: - regs->eax = (*entry->routine.args_3)(args[0],args[1],args[2]); + switch (entry->nargs) { + case 0: + regs->rax = (*entry->routine.args_0)(); break; - case 4: - regs->eax = (*entry->routine.args_4)(args[0],args[1],args[2],args[3]); + case 1: + regs->rax = (*entry->routine.args64_1)(regs->rdi); break; - default: - panic("machdep_syscall(): too many args"); + default: + panic("machdep_syscall64: too many args"); } - } - else - regs->eax = (*entry->routine.args_0)(); + if (current_thread()->funnel_lock) + (void) thread_funnel_set(current_thread()->funnel_lock, FALSE); - if (current_thread()->funnel_lock) - (void) thread_funnel_set(current_thread()->funnel_lock, FALSE); + DEBUG_KPRINT_SYSCALL_MDEP("machdep_syscall: retval=%llu\n", regs->rax); - thread_exception_return(); - /* NOTREACHED */ + throttle_lowpri_io(1); + + thread_exception_return(); + /* NOTREACHED */ } +#endif /* MACH_BSD */ -kern_return_t -thread_compose_cthread_desc(unsigned int addr, pcb_t pcb) -{ - struct real_descriptor desc; - mp_disable_preemption(); +typedef kern_return_t (*mach_call_t)(void *); - desc.limit_low = 1; - desc.limit_high = 0; - desc.base_low = addr & 0xffff; - desc.base_med = (addr >> 16) & 0xff; - desc.base_high = (addr >> 24) & 0xff; - desc.access = ACC_P|ACC_PL_U|ACC_DATA_W; - desc.granularity = SZ_32|SZ_G; - pcb->cthread_desc = desc; - *ldt_desc_p(USER_CTHREAD) = desc; +struct mach_call_args { + syscall_arg_t arg1; + syscall_arg_t arg2; + syscall_arg_t arg3; + syscall_arg_t arg4; + syscall_arg_t arg5; + syscall_arg_t arg6; + syscall_arg_t arg7; + syscall_arg_t arg8; + syscall_arg_t arg9; +}; - mp_enable_preemption(); +static kern_return_t +mach_call_arg_munger32(uint32_t sp, struct mach_call_args *args, const mach_trap_t *trapp); - return(KERN_SUCCESS); -} -kern_return_t -thread_set_cthread_self(uint32_t self) +static kern_return_t +mach_call_arg_munger32(uint32_t sp, struct mach_call_args *args, const mach_trap_t *trapp) { - current_thread()->machine.pcb->cthread_self = self; - - return (KERN_SUCCESS); + if (copyin((user_addr_t)(sp + sizeof(int)), (char *)args, trapp->mach_trap_u32_words * sizeof (int))) + return KERN_INVALID_ARGUMENT; + trapp->mach_trap_arg_munge32(NULL, args); + return KERN_SUCCESS; } -kern_return_t -thread_get_cthread_self(void) -{ - return ((kern_return_t)current_thread()->machine.pcb->cthread_self); -} -kern_return_t -thread_fast_set_cthread_self(uint32_t self) -{ - pcb_t pcb; - pcb = (pcb_t)current_thread()->machine.pcb; - thread_compose_cthread_desc(self, pcb); - pcb->cthread_self = self; /* preserve old func too */ - return (USER_CTHREAD); -} +__private_extern__ void mach_call_munger(x86_saved_state_t *state); -/* - * thread_set_user_ldt routine is the interface for the user level - * settable ldt entry feature. allowing a user to create arbitrary - * ldt entries seems to be too large of a security hole, so instead - * this mechanism is in place to allow user level processes to have - * an ldt entry that can be used in conjunction with the FS register. - * - * Swapping occurs inside the pcb.c file along with initialization - * when a thread is created. The basic functioning theory is that the - * pcb->uldt_selector variable will contain either 0 meaning the - * process has not set up any entry, or the selector to be used in - * the FS register. pcb->uldt_desc contains the actual descriptor the - * user has set up stored in machine usable ldt format. - * - * Currently one entry is shared by all threads (USER_SETTABLE), but - * this could be changed in the future by changing how this routine - * allocates the selector. There seems to be no real reason at this - * time to have this added feature, but in the future it might be - * needed. - * - * address is the linear address of the start of the data area size - * is the size in bytes of the area flags should always be set to 0 - * for now. in the future it could be used to set R/W permisions or - * other functions. Currently the segment is created as a data segment - * up to 1 megabyte in size with full read/write permisions only. - * - * this call returns the segment selector or -1 if any error occurs - */ -kern_return_t -thread_set_user_ldt(uint32_t address, uint32_t size, uint32_t flags) +extern const char *mach_syscall_name_table[]; + +void +mach_call_munger(x86_saved_state_t *state) { - pcb_t pcb; - struct fake_descriptor temp; - int mycpu; + int argc; + int call_number; + mach_call_t mach_call; + kern_return_t retval; + struct mach_call_args args = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + x86_saved_state32_t *regs; - if (flags != 0) - return -1; // flags not supported - if (size > 0xFFFFF) - return -1; // size too big, 1 meg is the limit + assert(is_saved_state32(state)); + regs = saved_state32(state); - mp_disable_preemption(); - mycpu = cpu_number(); - - // create a "fake" descriptor so we can use fix_desc() - // to build a real one... - // 32 bit default operation size - // standard read/write perms for a data segment - pcb = (pcb_t)current_thread()->machine.pcb; - temp.offset = address; - temp.lim_or_seg = size; - temp.size_or_wdct = SZ_32; - temp.access = ACC_P|ACC_PL_U|ACC_DATA_W; + call_number = -(regs->eax); - // turn this into a real descriptor - fix_desc(&temp,1); + DEBUG_KPRINT_SYSCALL_MACH( + "mach_call_munger: code=%d(%s)\n", + call_number, mach_syscall_name_table[call_number]); +#if DEBUG_TRACE + kprintf("mach_call_munger(0x%08x) code=%d\n", regs, call_number); +#endif - // set up our data in the pcb - pcb->uldt_desc = *(struct real_descriptor*)&temp; - pcb->uldt_selector = USER_SETTABLE; // set the selector value + if (call_number < 0 || call_number >= mach_trap_count) { + i386_exception(EXC_SYSCALL, call_number, 1); + /* NOTREACHED */ + } + mach_call = (mach_call_t)mach_trap_table[call_number].mach_trap_function; - // now set it up in the current table... - *ldt_desc_p(USER_SETTABLE) = *(struct real_descriptor*)&temp; + if (mach_call == (mach_call_t)kern_invalid) { + DEBUG_KPRINT_SYSCALL_MACH( + "mach_call_munger: kern_invalid 0x%x\n", regs->eax); + i386_exception(EXC_SYSCALL, call_number, 1); + /* NOTREACHED */ + } - mp_enable_preemption(); + argc = mach_trap_table[call_number].mach_trap_arg_count; + if (argc) { + retval = mach_call_arg_munger32(regs->uesp, &args, &mach_trap_table[call_number]); + if (retval != KERN_SUCCESS) { + regs->eax = retval; - return USER_SETTABLE; -} -void -mach25_syscall(struct i386_saved_state *regs) -{ - printf("*** Atttempt to execute a Mach 2.5 system call at EIP=%x EAX=%x(%d)\n", - regs->eip, regs->eax, -regs->eax); - panic("FIXME!"); -} -#endif /* MACH_BSD */ + DEBUG_KPRINT_SYSCALL_MACH( + "mach_call_munger: retval=0x%x\n", retval); + thread_exception_return(); + /* NOTREACHED */ + } + } -/* This routine is called from assembly before each and every mach trap. - */ +#ifdef MACH_BSD + mach_kauth_cred_uthread_update(); +#endif -extern unsigned int mach_call_start(unsigned int, unsigned int *); + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + MACHDBG_CODE(DBG_MACH_EXCP_SC, (call_number)) | DBG_FUNC_START, + args.arg1, args.arg2, args.arg3, args.arg4, 0); -__private_extern__ -unsigned int -mach_call_start(unsigned int call_number, unsigned int *args) -{ - int i, argc; - unsigned int kdarg[3]; + retval = mach_call(&args); - current_thread()->task->syscalls_mach++; /* MP-safety ignored */ + DEBUG_KPRINT_SYSCALL_MACH("mach_call_munger: retval=0x%x\n", retval); -/* Always prepare to trace mach system calls */ + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + MACHDBG_CODE(DBG_MACH_EXCP_SC,(call_number)) | DBG_FUNC_END, + retval, 0, 0, 0, 0); - kdarg[0]=0; - kdarg[1]=0; - kdarg[2]=0; + regs->eax = retval; - argc = mach_trap_table[call_number>>4].mach_trap_arg_count; - - if (argc > 3) - argc = 3; - - for (i=0; i < argc; i++) - kdarg[i] = (int)*(args + i); - - KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC, (call_number>>4)) | DBG_FUNC_START, - kdarg[0], kdarg[1], kdarg[2], 0, 0); + throttle_lowpri_io(1); - return call_number; /* pass this back thru */ + thread_exception_return(); + /* NOTREACHED */ } -/* This routine is called from assembly after each mach system call - */ -extern unsigned int mach_call_end(unsigned int, unsigned int); +__private_extern__ void mach_call_munger64(x86_saved_state_t *regs); -__private_extern__ -unsigned int -mach_call_end(unsigned int call_number, unsigned int retval) +void +mach_call_munger64(x86_saved_state_t *state) { - KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC,(call_number>>4)) | DBG_FUNC_END, - retval, 0, 0, 0, 0); - return retval; /* pass this back thru */ -} + int call_number; + int argc; + mach_call_t mach_call; + x86_saved_state64_t *regs; -typedef kern_return_t (*mach_call_t)(void *); + assert(is_saved_state64(state)); + regs = saved_state64(state); -extern __attribute__((regparm(1))) kern_return_t -mach_call_munger(unsigned int call_number, - unsigned int arg1, - unsigned int arg2, - unsigned int arg3, - unsigned int arg4, - unsigned int arg5, - unsigned int arg6, - unsigned int arg7, - unsigned int arg8, - unsigned int arg9 -); + call_number = (int)(regs->rax & SYSCALL_NUMBER_MASK); -struct mach_call_args { - unsigned int arg1; - unsigned int arg2; - unsigned int arg3; - unsigned int arg4; - unsigned int arg5; - unsigned int arg6; - unsigned int arg7; - unsigned int arg8; - unsigned int arg9; -}; -__private_extern__ -__attribute__((regparm(1))) kern_return_t -mach_call_munger(unsigned int call_number, - unsigned int arg1, - unsigned int arg2, - unsigned int arg3, - unsigned int arg4, - unsigned int arg5, - unsigned int arg6, - unsigned int arg7, - unsigned int arg8, - unsigned int arg9 -) -{ - int argc; - mach_call_t mach_call; - kern_return_t retval; - struct mach_call_args args = { 0, 0, 0, 0, 0, 0, 0, 0, 0 }; - - current_thread()->task->syscalls_mach++; /* MP-safety ignored */ - call_number >>= 4; + DEBUG_KPRINT_SYSCALL_MACH( + "mach_call_munger64: code=%d(%s)\n", + call_number, mach_syscall_name_table[call_number]); + + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + MACHDBG_CODE(DBG_MACH_EXCP_SC,(call_number)) | DBG_FUNC_START, + regs->rdi, regs->rsi, regs->rdx, regs->r10, 0); + if (call_number < 0 || call_number >= mach_trap_count) { + i386_exception(EXC_SYSCALL, regs->rax, 1); + /* NOTREACHED */ + } + mach_call = (mach_call_t)mach_trap_table[call_number].mach_trap_function; + + if (mach_call == (mach_call_t)kern_invalid) { + i386_exception(EXC_SYSCALL, regs->rax, 1); + /* NOTREACHED */ + } argc = mach_trap_table[call_number].mach_trap_arg_count; - switch (argc) { - case 9: args.arg9 = arg9; - case 8: args.arg8 = arg8; - case 7: args.arg7 = arg7; - case 6: args.arg6 = arg6; - case 5: args.arg5 = arg5; - case 4: args.arg4 = arg4; - case 3: args.arg3 = arg3; - case 2: args.arg2 = arg2; - case 1: args.arg1 = arg1; + + if (argc > 6) { + int copyin_count; + + copyin_count = (argc - 6) * (int)sizeof(uint64_t); + + if (copyin((user_addr_t)(regs->isf.rsp + sizeof(user_addr_t)), (char *)®s->v_arg6, copyin_count)) { + regs->rax = KERN_INVALID_ARGUMENT; + + thread_exception_return(); + /* NOTREACHED */ + } } + +#ifdef MACH_BSD + mach_kauth_cred_uthread_update(); +#endif + + regs->rax = (uint64_t)mach_call((void *)(®s->rdi)); - KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC, (call_number)) | DBG_FUNC_START, - args.arg1, args.arg2, args.arg3, 0, 0); - - mach_call = (mach_call_t)mach_trap_table[call_number].mach_trap_function; - retval = mach_call(&args); + DEBUG_KPRINT_SYSCALL_MACH( "mach_call_munger64: retval=0x%llx\n", regs->rax); - KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_EXCP_SC,(call_number)) | DBG_FUNC_END, - retval, 0, 0, 0, 0); + KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, + MACHDBG_CODE(DBG_MACH_EXCP_SC,(call_number)) | DBG_FUNC_END, + regs->rax, 0, 0, 0, 0); + + throttle_lowpri_io(1); - return retval; + thread_exception_return(); + /* NOTREACHED */ } + /* * thread_setuserstack: * @@ -813,9 +555,20 @@ thread_setuserstack( thread_t thread, mach_vm_address_t user_stack) { - struct i386_saved_state *ss = get_user_regs(thread); + pal_register_cache_state(thread, DIRTY); + if (thread_is_64bit(thread)) { + x86_saved_state64_t *iss64; + + iss64 = USER_REGS64(thread); - ss->uesp = CAST_DOWN(unsigned int,user_stack); + iss64->isf.rsp = (uint64_t)user_stack; + } else { + x86_saved_state32_t *iss32; + + iss32 = USER_REGS32(thread); + + iss32->uesp = CAST_DOWN_EXPLICIT(unsigned int, user_stack); + } } /* @@ -829,10 +582,24 @@ thread_adjuserstack( thread_t thread, int adjust) { - struct i386_saved_state *ss = get_user_regs(thread); + pal_register_cache_state(thread, DIRTY); + if (thread_is_64bit(thread)) { + x86_saved_state64_t *iss64; + + iss64 = USER_REGS64(thread); + + iss64->isf.rsp += adjust; + + return iss64->isf.rsp; + } else { + x86_saved_state32_t *iss32; + + iss32 = USER_REGS32(thread); + + iss32->uesp += adjust; - ss->uesp += adjust; - return CAST_USER_ADDR_T(ss->uesp); + return CAST_USER_ADDR_T(iss32->uesp); + } } /* @@ -842,12 +609,99 @@ thread_adjuserstack( * dependent thread state info. */ void -thread_setentrypoint( - thread_t thread, - mach_vm_address_t entry) +thread_setentrypoint(thread_t thread, mach_vm_address_t entry) +{ + pal_register_cache_state(thread, DIRTY); + if (thread_is_64bit(thread)) { + x86_saved_state64_t *iss64; + + iss64 = USER_REGS64(thread); + + iss64->isf.rip = (uint64_t)entry; + } else { + x86_saved_state32_t *iss32; + + iss32 = USER_REGS32(thread); + + iss32->eip = CAST_DOWN_EXPLICIT(unsigned int, entry); + } +} + + +kern_return_t +thread_setsinglestep(thread_t thread, int on) { - struct i386_saved_state *ss = get_user_regs(thread); + pal_register_cache_state(thread, DIRTY); + if (thread_is_64bit(thread)) { + x86_saved_state64_t *iss64; - ss->eip = CAST_DOWN(unsigned int,entry); -} + iss64 = USER_REGS64(thread); + if (on) + iss64->isf.rflags |= EFL_TF; + else + iss64->isf.rflags &= ~EFL_TF; + } else { + x86_saved_state32_t *iss32; + + iss32 = USER_REGS32(thread); + + if (on) { + iss32->efl |= EFL_TF; + /* Ensure IRET */ + if (iss32->cs == SYSENTER_CS) + iss32->cs = SYSENTER_TF_CS; + } + else + iss32->efl &= ~EFL_TF; + } + + return (KERN_SUCCESS); +} + + + +/* XXX this should be a struct savearea so that CHUD will work better on x86 */ +void * +find_user_regs(thread_t thread) +{ + pal_register_cache_state(thread, DIRTY); + return USER_STATE(thread); +} + +void * +get_user_regs(thread_t th) +{ + pal_register_cache_state(th, DIRTY); + return(USER_STATE(th)); +} + +#if CONFIG_DTRACE +/* + * DTrace would like to have a peek at the kernel interrupt state, if available. + * Based on osfmk/chud/i386/chud_thread_i386.c:chudxnu_thread_get_state(), which see. + */ +x86_saved_state_t *find_kern_regs(thread_t); + +x86_saved_state_t * +find_kern_regs(thread_t thread) +{ + if (thread == current_thread() && + NULL != current_cpu_datap()->cpu_int_state && + !(USER_STATE(thread) == current_cpu_datap()->cpu_int_state && + current_cpu_datap()->cpu_interrupt_level == 1)) { + + return current_cpu_datap()->cpu_int_state; + } else { + return NULL; + } +} + +vm_offset_t dtrace_get_cpu_int_stack_top(void); + +vm_offset_t +dtrace_get_cpu_int_stack_top(void) +{ + return current_cpu_datap()->cpu_int_stack_top; +} +#endif