X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/9bccf70c0258c7cac2dcb80011b2a964d884c552..d1ecb069dfe24481e4a83f44cb5217a2b06746d7:/osfmk/i386/pcb.c diff --git a/osfmk/i386/pcb.c b/osfmk/i386/pcb.c index c261f4214..36d4fec6e 100644 --- a/osfmk/i386/pcb.c +++ b/osfmk/i386/pcb.c @@ -1,23 +1,29 @@ /* - * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * - * @APPLE_LICENSE_HEADER_START@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. * - * @APPLE_LICENSE_HEADER_END@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* * @OSF_COPYRIGHT@ @@ -48,7 +54,6 @@ * the rights to redistribute these changes. */ -#include #include #include #include @@ -60,1133 +65,2122 @@ #include #include +#include #include +#include +#include +#include #include #include -#include -#include #include #include #include #include +#include #include #include +#include #include +#include -#include +#include +#include #include #include -#include #include #include #include -#include +#include +#include +#include +#if defined(__i386__) +#include +#endif +#include +#include +#include /* LAPIC_PMC_SWI_VECTOR */ + +#include + +#if CONFIG_COUNTERS +#include +#endif /* CONFIG_COUNTERS */ /* * Maps state flavor to number of words in the state: */ -unsigned int state_count[] = { - /* FLAVOR_LIST */ 0, - i386_NEW_THREAD_STATE_COUNT, - i386_FLOAT_STATE_COUNT, - i386_ISA_PORT_MAP_STATE_COUNT, - i386_V86_ASSIST_STATE_COUNT, - i386_REGS_SEGS_STATE_COUNT, - i386_THREAD_SYSCALL_STATE_COUNT, - /* THREAD_STATE_NONE */ 0, - i386_SAVED_STATE_COUNT, +unsigned int _MachineStateCount[] = { + /* FLAVOR_LIST */ + 0, + x86_THREAD_STATE32_COUNT, + x86_FLOAT_STATE32_COUNT, + x86_EXCEPTION_STATE32_COUNT, + x86_THREAD_STATE64_COUNT, + x86_FLOAT_STATE64_COUNT, + x86_EXCEPTION_STATE64_COUNT, + x86_THREAD_STATE_COUNT, + x86_FLOAT_STATE_COUNT, + x86_EXCEPTION_STATE_COUNT, + 0, + x86_SAVED_STATE32_COUNT, + x86_SAVED_STATE64_COUNT, + x86_DEBUG_STATE32_COUNT, + x86_DEBUG_STATE64_COUNT, + x86_DEBUG_STATE_COUNT }; +zone_t iss_zone; /* zone for saved_state area */ +zone_t ids_zone; /* zone for debug_state area */ + /* Forward */ -void act_machine_throughcall(thread_act_t thr_act); -extern thread_t Switch_context( - thread_t old, - void (*cont)(void), - thread_t new); +void act_machine_throughcall(thread_t thr_act); +void act_machine_return(int); + extern void Thread_continue(void); extern void Load_context( - thread_t thread); + thread_t thread); -/* - * consider_machine_collect: - * - * Try to collect machine-dependent pages - */ -void -consider_machine_collect() -{ -} +static void +get_exception_state32(thread_t thread, x86_exception_state32_t *es); -void -consider_machine_adjust() -{ -} +static void +get_exception_state64(thread_t thread, x86_exception_state64_t *es); +static void +get_thread_state32(thread_t thread, x86_thread_state32_t *ts); -/* - * machine_kernel_stack_init: - * - * Initialize a kernel stack which has already been - * attached to its thread_activation. - */ +static void +get_thread_state64(thread_t thread, x86_thread_state64_t *ts); -void -machine_kernel_stack_init( - thread_t thread, - void (*start_pos)(thread_t)) -{ - thread_act_t thr_act = thread->top_act; - vm_offset_t stack; +static int +set_thread_state32(thread_t thread, x86_thread_state32_t *ts); - assert(thr_act); - stack = thread->kernel_stack; - assert(stack); +static int +set_thread_state64(thread_t thread, x86_thread_state64_t *ts); -#if MACH_ASSERT - if (watchacts & WA_PCB) { - printf("machine_kernel_stack_init(thr=%x,stk=%x,start_pos=%x)\n", - thread,stack,start_pos); - printf("\tstack_iks=%x, stack_iel=%x\n", - STACK_IKS(stack), STACK_IEL(stack)); - } -#endif /* MACH_ASSERT */ +#if CONFIG_COUNTERS +static inline void +machine_pmc_cswitch(thread_t /* old */, thread_t /* new */); - /* - * We want to run at start_pos, giving it as an argument - * the return value from Load_context/Switch_context. - * Thread_continue takes care of the mismatch between - * the argument-passing/return-value conventions. - * This function will not return normally, - * so we don`t have to worry about a return address. - */ - STACK_IKS(stack)->k_eip = (int) Thread_continue; - STACK_IKS(stack)->k_ebx = (int) start_pos; - STACK_IKS(stack)->k_esp = (int) STACK_IEL(stack); +static inline boolean_t +machine_thread_pmc_eligible(thread_t); + +static inline void +pmc_swi(thread_t /* old */, thread_t /*new */); +static inline boolean_t +machine_thread_pmc_eligible(thread_t t) { /* - * Point top of kernel stack to user`s registers. + * NOTE: Task-level reservations are propagated to child threads via + * thread_create_internal. Any mutation of task reservations forces a + * recalculate of t_chud (for the pmc flag) for all threads in that task. + * Consequently, we can simply check the current thread's flag against + * THREAD_PMC_FLAG. If the result is non-zero, we SWI for a PMC switch. */ - STACK_IEL(stack)->saved_state = &thr_act->mact.pcb->iss; + return (t != NULL) ? ((t->t_chud & THREAD_PMC_FLAG) ? TRUE : FALSE) : FALSE; } +static inline void +pmc_swi(thread_t old, thread_t new) { + current_cpu_datap()->csw_old_thread = old; + current_cpu_datap()->csw_new_thread = new; + __asm__ __volatile__("int %0"::"i"(LAPIC_PMC_SWI_VECTOR):"memory"); +} -#if NCPUS > 1 -#define curr_gdt(mycpu) (mp_gdt[mycpu]) -#define curr_ktss(mycpu) (mp_ktss[mycpu]) -#else -#define curr_gdt(mycpu) (gdt) -#define curr_ktss(mycpu) (&ktss) -#endif +static inline void +machine_pmc_cswitch(thread_t old, thread_t new) { + if (machine_thread_pmc_eligible(old) || machine_thread_pmc_eligible(new)) { + pmc_swi(old, new); + } +} + +void ml_get_csw_threads(thread_t *old, thread_t *new) { + *old = current_cpu_datap()->csw_old_thread; + *new = current_cpu_datap()->csw_new_thread; +} -#define gdt_desc_p(mycpu,sel) \ - ((struct real_descriptor *)&curr_gdt(mycpu)[sel_idx(sel)]) +#endif /* CONFIG_COUNTERS */ -void -act_machine_switch_pcb( thread_act_t new_act ) +/* + * Don't let an illegal value for dr7 get set. Specifically, + * check for undefined settings. Setting these bit patterns + * result in undefined behaviour and can lead to an unexpected + * TRCTRAP. + */ +static boolean_t +dr7_is_valid(uint32_t *dr7) { - pcb_t pcb = new_act->mact.pcb; - int mycpu; - { - register iopb_tss_t tss = pcb->ims.io_tss; - vm_offset_t pcb_stack_top; + int i; + uint32_t mask1, mask2; - assert(new_act->thread != NULL); - assert(new_act->thread->kernel_stack != 0); - STACK_IEL(new_act->thread->kernel_stack)->saved_state = - &new_act->mact.pcb->iss; + /* + * If the DE bit is set in CR4, R/W0-3 can be pattern + * "10B" to indicate i/o reads and write + */ + if (!(get_cr4() & CR4_DE)) + for (i = 0, mask1 = 0x3<<16, mask2 = 0x2<<16; i < 4; + i++, mask1 <<= 4, mask2 <<= 4) + if ((*dr7 & mask1) == mask2) + return (FALSE); /* - * Save a pointer to the top of the "kernel" stack - - * actually the place in the PCB where a trap into - * kernel mode will push the registers. - * The location depends on V8086 mode. If we are - * not in V8086 mode, then a trap into the kernel - * won`t save the v86 segments, so we leave room. + * len0-3 pattern "10B" is ok for len on Merom and newer processors + * (it signifies an 8-byte wide region). We use the 64bit capability + * of the processor in lieu of the more laborious model/family checks + * as all 64-bit capable processors so far support this. + * Reject an attempt to use this on 64-bit incapable processors. */ + if (current_cpu_datap()->cpu_is64bit == FALSE) + for (i = 0, mask1 = 0x3<<18, mask2 = 0x2<<18; i < 4; + i++, mask1 <<= 4, mask2 <<= 4) + if ((*dr7 & mask1) == mask2) + return (FALSE); - pcb_stack_top = (pcb->iss.efl & EFL_VM) - ? (int) (&pcb->iss + 1) - : (int) (&pcb->iss.v86_segs); - - mp_disable_preemption(); - mycpu = cpu_number(); - - if (tss == 0) { - /* - * No per-thread IO permissions. - * Use standard kernel TSS. - */ - if (!(gdt_desc_p(mycpu,KERNEL_TSS)->access & ACC_TSS_BUSY)) - set_tr(KERNEL_TSS); - curr_ktss(mycpu)->esp0 = pcb_stack_top; - } - else { - /* - * Set the IO permissions. Use this thread`s TSS. - */ - *gdt_desc_p(mycpu,USER_TSS) - = *(struct real_descriptor *)tss->iopb_desc; - tss->tss.esp0 = pcb_stack_top; - set_tr(USER_TSS); - gdt_desc_p(mycpu,KERNEL_TSS)->access &= ~ ACC_TSS_BUSY; - } - } + /* + * if we are doing an instruction execution break (indicated + * by r/w[x] being "00B"), then the len[x] must also be set + * to "00B" + */ + for (i = 0; i < 4; i++) + if (((((*dr7 >> (16 + i*4))) & 0x3) == 0) && + ((((*dr7 >> (18 + i*4))) & 0x3) != 0)) + return (FALSE); - { - register user_ldt_t ldt = pcb->ims.ldt; /* - * Set the thread`s LDT. + * Intel docs have these bits fixed. */ - if (ldt == 0) { - /* - * Use system LDT. - */ - set_ldt(KERNEL_LDT); - } - else { - /* - * Thread has its own LDT. - */ - *gdt_desc_p(mycpu,USER_LDT) = ldt->desc; - set_ldt(USER_LDT); - } - } - mp_enable_preemption(); + *dr7 |= 0x1 << 10; /* set bit 10 to 1 */ + *dr7 &= ~(0x1 << 11); /* set bit 11 to 0 */ + *dr7 &= ~(0x1 << 12); /* set bit 12 to 0 */ + *dr7 &= ~(0x1 << 14); /* set bit 14 to 0 */ + *dr7 &= ~(0x1 << 15); /* set bit 15 to 0 */ + /* - * Load the floating-point context, if necessary. + * We don't allow anything to set the global breakpoints. */ - fpu_load_context(pcb); + if (*dr7 & 0x2) + return (FALSE); + + if (*dr7 & (0x2<<2)) + return (FALSE); + + if (*dr7 & (0x2<<4)) + return (FALSE); + + if (*dr7 & (0x2<<6)) + return (FALSE); + + return (TRUE); } -/* - * flush out any lazily evaluated HW state in the - * owning thread's context, before termination. - */ -void -thread_machine_flush( thread_act_t cur_act ) +static inline void +set_live_debug_state32(cpu_data_t *cdp, x86_debug_state32_t *ds) { - fpflush(cur_act); + __asm__ volatile ("movl %0,%%db0" : :"r" (ds->dr0)); + __asm__ volatile ("movl %0,%%db1" : :"r" (ds->dr1)); + __asm__ volatile ("movl %0,%%db2" : :"r" (ds->dr2)); + __asm__ volatile ("movl %0,%%db3" : :"r" (ds->dr3)); + if (cpu_mode_is64bit()) + cdp->cpu_dr7 = ds->dr7; } -/* - * Switch to the first thread on a CPU. - */ -void -load_context( - thread_t new) +extern void set_64bit_debug_regs(x86_debug_state64_t *ds); + +static inline void +set_live_debug_state64(cpu_data_t *cdp, x86_debug_state64_t *ds) { - act_machine_switch_pcb(new->top_act); - Load_context(new); + /* + * We need to enter 64-bit mode in order to set the full + * width of these registers + */ + set_64bit_debug_regs(ds); + cdp->cpu_dr7 = ds->dr7; } -/* - * Number of times we needed to swap an activation back in before - * switching to it. - */ -int switch_act_swapins = 0; - -/* - * machine_switch_act - * - * Machine-dependent details of activation switching. Called with - * RPC locks held and preemption disabled. - */ -void -machine_switch_act( - thread_t thread, - thread_act_t old, - thread_act_t new, - int cpu) +boolean_t +debug_state_is_valid32(x86_debug_state32_t *ds) { + if (!dr7_is_valid(&ds->dr7)) + return FALSE; + +#if defined(__i386__) /* - * Switch the vm, ast and pcb context. - * Save FP registers if in use and set TS (task switch) bit. + * Only allow local breakpoints and make sure they are not + * in the trampoline code. */ - fpu_save_context(thread); + if (ds->dr7 & 0x1) + if (ds->dr0 >= (unsigned long)HIGH_MEM_BASE) + return FALSE; - active_stacks[cpu] = thread->kernel_stack; - ast_context(new, cpu); + if (ds->dr7 & (0x1<<2)) + if (ds->dr1 >= (unsigned long)HIGH_MEM_BASE) + return FALSE; - PMAP_SWITCH_CONTEXT(old, new, cpu); - act_machine_switch_pcb(new); + if (ds->dr7 & (0x1<<4)) + if (ds->dr2 >= (unsigned long)HIGH_MEM_BASE) + return FALSE; + + if (ds->dr7 & (0x1<<6)) + if (ds->dr3 >= (unsigned long)HIGH_MEM_BASE) + return FALSE; +#endif + + return TRUE; } -/* - * Switch to a new thread. - * Save the old thread`s kernel state or continuation, - * and return it. - */ -thread_t -switch_context( - thread_t old, - void (*continuation)(void), - thread_t new) +boolean_t +debug_state_is_valid64(x86_debug_state64_t *ds) { - register thread_act_t old_act = old->top_act, - new_act = new->top_act; - -#if MACH_RT - assert(old_act->kernel_loaded || - active_stacks[cpu_number()] == old_act->thread->kernel_stack); - assert (get_preemption_level() == 1); -#endif - check_simple_locks(); + if (!dr7_is_valid((uint32_t *)&ds->dr7)) + return FALSE; /* - * Save FP registers if in use. + * Don't allow the user to set debug addresses above their max + * value */ - fpu_save_context(old); + if (ds->dr7 & 0x1) + if (ds->dr0 >= VM_MAX_PAGE_ADDRESS) + return FALSE; -#if MACH_ASSERT - if (watchacts & WA_SWITCH) - printf("\tswitch_context(old=%x con=%x new=%x)\n", - old, continuation, new); -#endif /* MACH_ASSERT */ + if (ds->dr7 & (0x1<<2)) + if (ds->dr1 >= VM_MAX_PAGE_ADDRESS) + return FALSE; - /* - * Switch address maps if need be, even if not switching tasks. - * (A server activation may be "borrowing" a client map.) - */ - { - int mycpu = cpu_number(); + if (ds->dr7 & (0x1<<4)) + if (ds->dr2 >= VM_MAX_PAGE_ADDRESS) + return FALSE; - PMAP_SWITCH_CONTEXT(old_act, new_act, mycpu) - } + if (ds->dr7 & (0x1<<6)) + if (ds->dr3 >= VM_MAX_PAGE_ADDRESS) + return FALSE; - /* - * Load the rest of the user state for the new thread - */ - act_machine_switch_pcb(new_act); - KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_SCHED) | DBG_FUNC_NONE, - (int)old, (int)new, old->sched_pri, new->sched_pri, 0); - return(Switch_context(old, continuation, new)); + return TRUE; } -void -pcb_module_init(void) + +static kern_return_t +set_debug_state32(thread_t thread, x86_debug_state32_t *ds) { - fpu_module_init(); - iopb_init(); + x86_debug_state32_t *ids; + pcb_t pcb; + + pcb = thread->machine.pcb; + ids = pcb->ids; + + if (debug_state_is_valid32(ds) != TRUE) { + return KERN_INVALID_ARGUMENT; + } + + if (ids == NULL) { + ids = zalloc(ids_zone); + bzero(ids, sizeof *ids); + + simple_lock(&pcb->lock); + /* make sure it wasn't already alloc()'d elsewhere */ + if (pcb->ids == NULL) { + pcb->ids = ids; + simple_unlock(&pcb->lock); + } else { + simple_unlock(&pcb->lock); + zfree(ids_zone, ids); + } + } + + + copy_debug_state32(ds, ids, FALSE); + + return (KERN_SUCCESS); } -void -pcb_init( register thread_act_t thr_act ) +static kern_return_t +set_debug_state64(thread_t thread, x86_debug_state64_t *ds) { - register pcb_t pcb; + x86_debug_state64_t *ids; + pcb_t pcb; - assert(thr_act->mact.pcb == (pcb_t)0); - pcb = thr_act->mact.pcb = &thr_act->mact.xxx_pcb; + pcb = thread->machine.pcb; + ids = pcb->ids; -#if MACH_ASSERT - if (watchacts & WA_PCB) - printf("pcb_init(%x) pcb=%x\n", thr_act, pcb); -#endif /* MACH_ASSERT */ + if (debug_state_is_valid64(ds) != TRUE) { + return KERN_INVALID_ARGUMENT; + } - /* - * We can't let random values leak out to the user. - * (however, act_create() zeroed the entire thr_act, mact, pcb) - * bzero((char *) pcb, sizeof *pcb); - */ - simple_lock_init(&pcb->lock, ETAP_MISC_PCB); + if (ids == NULL) { + ids = zalloc(ids_zone); + bzero(ids, sizeof *ids); - /* - * Guarantee that the bootstrapped thread will be in user - * mode. - */ - pcb->iss.cs = USER_CS; - pcb->iss.ss = USER_DS; - pcb->iss.ds = USER_DS; - pcb->iss.es = USER_DS; - pcb->iss.fs = USER_DS; - pcb->iss.gs = USER_DS; - pcb->iss.efl = EFL_USER_SET; + simple_lock(&pcb->lock); + /* make sure it wasn't already alloc()'d elsewhere */ + if (pcb->ids == NULL) { + pcb->ids = ids; + simple_unlock(&pcb->lock); + } else { + simple_unlock(&pcb->lock); + zfree(ids_zone, ids); + } + } + + copy_debug_state64(ds, ids, FALSE); + + return (KERN_SUCCESS); } -/* - * Adjust saved register state for thread belonging to task - * created with kernel_task_create(). - */ -void -pcb_user_to_kernel( - thread_act_t thr_act) +static void +get_debug_state32(thread_t thread, x86_debug_state32_t *ds) { - register pcb_t pcb = thr_act->mact.pcb; - - pcb->iss.cs = KERNEL_CS; - pcb->iss.ss = KERNEL_DS; - pcb->iss.ds = KERNEL_DS; - pcb->iss.es = KERNEL_DS; - pcb->iss.fs = KERNEL_DS; - pcb->iss.gs = CPU_DATA; + x86_debug_state32_t *saved_state; + + saved_state = thread->machine.pcb->ids; + + if (saved_state) { + copy_debug_state32(saved_state, ds, TRUE); + } else + bzero(ds, sizeof *ds); } -void -pcb_terminate( - register thread_act_t thr_act) +static void +get_debug_state64(thread_t thread, x86_debug_state64_t *ds) { - register pcb_t pcb = thr_act->mact.pcb; + x86_debug_state64_t *saved_state; - assert(pcb); + saved_state = (x86_debug_state64_t *)thread->machine.pcb->ids; - if (pcb->ims.io_tss != 0) - iopb_destroy(pcb->ims.io_tss); - if (pcb->ims.ifps != 0) - fp_free(pcb->ims.ifps); - if (pcb->ims.ldt != 0) - user_ldt_free(pcb->ims.ldt); - thr_act->mact.pcb = (pcb_t)0; + if (saved_state) { + copy_debug_state64(saved_state, ds, TRUE); + } else + bzero(ds, sizeof *ds); } /* - * pcb_collect: + * consider_machine_collect: * - * Attempt to free excess pcb memory. + * Try to collect machine-dependent pages */ - void -pcb_collect( - register thread_act_t thr_act) +consider_machine_collect(void) { - /* accomplishes very little */ } -/* - * act_machine_sv_free - * release saveareas associated with an act. if flag is true, release - * user level savearea(s) too, else don't - */ void -act_machine_sv_free(thread_act_t act, int flag) +consider_machine_adjust(void) { - } +extern void *get_bsduthreadarg(thread_t th); -/* - * act_machine_set_state: - * - * Set the status of the specified thread. Called with "appropriate" - * thread-related locks held (see act_lock_thread()), so - * thr_act->thread is guaranteed not to change. - */ - -kern_return_t -act_machine_set_state( - thread_act_t thr_act, - thread_flavor_t flavor, - thread_state_t tstate, - mach_msg_type_number_t count) +#if defined(__x86_64__) +static void +act_machine_switch_pcb( thread_t new ) { - int kernel_act = thr_act->kernel_loading || - thr_act->kernel_loaded; - -#if MACH_ASSERT - if (watchacts & WA_STATE) - printf("act_%x act_m_set_state(thr_act=%x,flav=%x,st=%x,cnt=%x)\n", - current_act(), thr_act, flavor, tstate, count); -#endif /* MACH_ASSERT */ + pcb_t pcb = new->machine.pcb; + struct real_descriptor *ldtp; + mach_vm_offset_t pcb_stack_top; + cpu_data_t *cdp = current_cpu_datap(); - switch (flavor) { - case THREAD_SYSCALL_STATE: - { - register struct thread_syscall_state *state; - register struct i386_saved_state *saved_state = USER_REGS(thr_act); + assert(new->kernel_stack != 0); - state = (struct thread_syscall_state *) tstate; - saved_state->eax = state->eax; - saved_state->edx = state->edx; - if (kernel_act) - saved_state->efl = state->efl; - else - saved_state->efl = (state->efl & ~EFL_USER_CLEAR) | EFL_USER_SET; - saved_state->eip = state->eip; - saved_state->uesp = state->esp; - break; - } + if (!cpu_mode_is64bit()) { + panic("K64 is 64bit!"); + } else if (is_saved_state64(pcb->iss)) { + /* + * The test above is performed against the thread save state + * flavor and not task's 64-bit feature flag because of the + * thread/task 64-bit state divergence that can arise in + * task_set_64bit() x86: the task state is changed before + * the individual thread(s). + */ + x86_saved_state64_tagged_t *iss64; + vm_offset_t isf; - case i386_SAVED_STATE: - { - register struct i386_saved_state *state; - register struct i386_saved_state *saved_state; + assert(is_saved_state64(pcb->iss)); + + iss64 = (x86_saved_state64_tagged_t *) pcb->iss; + + /* + * Set pointer to PCB's interrupt stack frame in cpu data. + * Used by syscall and double-fault trap handlers. + */ + isf = (vm_offset_t) &iss64->state.isf; + cdp->cpu_uber.cu_isf = isf; + pcb_stack_top = (vm_offset_t) (iss64 + 1); + /* require 16-byte alignment */ + assert((pcb_stack_top & 0xF) == 0); - if (count < i386_SAVED_STATE_COUNT) { - return(KERN_INVALID_ARGUMENT); - } + /* Interrupt stack is pcb */ + current_ktss64()->rsp0 = pcb_stack_top; - state = (struct i386_saved_state *) tstate; + /* + * Top of temporary sysenter stack points to pcb stack. + * Although this is not normally used by 64-bit users, + * it needs to be set in case a sysenter is attempted. + */ + *current_sstk64() = pcb_stack_top; - saved_state = USER_REGS(thr_act); + cdp->cpu_task_map = new->map->pmap->pm_task_map; /* - * General registers + * Enable the 64-bit user code segment, USER64_CS. + * Disable the 32-bit user code segment, USER_CS. */ - saved_state->edi = state->edi; - saved_state->esi = state->esi; - saved_state->ebp = state->ebp; - saved_state->uesp = state->uesp; - saved_state->ebx = state->ebx; - saved_state->edx = state->edx; - saved_state->ecx = state->ecx; - saved_state->eax = state->eax; - saved_state->eip = state->eip; - if (kernel_act) - saved_state->efl = state->efl; - else - saved_state->efl = (state->efl & ~EFL_USER_CLEAR) - | EFL_USER_SET; + ldt_desc_p(USER64_CS)->access |= ACC_PL_U; + ldt_desc_p(USER_CS)->access &= ~ACC_PL_U; /* - * Segment registers. Set differently in V8086 mode. + * Switch user's GS base if necessary + * by setting the Kernel's GS base MSR + * - this will become the user's on the swapgs when + * returning to user-space. */ - if (state->efl & EFL_VM) { - /* - * Set V8086 mode segment registers. - */ - saved_state->cs = state->cs & 0xffff; - saved_state->ss = state->ss & 0xffff; - saved_state->v86_segs.v86_ds = state->ds & 0xffff; - saved_state->v86_segs.v86_es = state->es & 0xffff; - saved_state->v86_segs.v86_fs = state->fs & 0xffff; - saved_state->v86_segs.v86_gs = state->gs & 0xffff; - - /* - * Zero protected mode segment registers. - */ - saved_state->ds = 0; - saved_state->es = 0; - saved_state->fs = 0; - saved_state->gs = 0; - - if (thr_act->mact.pcb->ims.v86s.int_table) { - /* - * Hardware assist on. - */ - thr_act->mact.pcb->ims.v86s.flags = - state->efl & (EFL_TF | EFL_IF); - } - } - else if (!kernel_act) { - /* - * 386 mode. Set segment registers for flat - * 32-bit address space. - */ - saved_state->cs = USER_CS; - saved_state->ss = USER_DS; - saved_state->ds = USER_DS; - saved_state->es = USER_DS; - saved_state->fs = USER_DS; - saved_state->gs = USER_DS; + if (cdp->cpu_uber.cu_user_gs_base != pcb->cthread_self) { + cdp->cpu_uber.cu_user_gs_base = pcb->cthread_self; + wrmsr64(MSR_IA32_KERNEL_GS_BASE, pcb->cthread_self); } - else { - /* - * User setting segment registers. - * Code and stack selectors have already been - * checked. Others will be reset by 'iret' - * if they are not valid. - */ - saved_state->cs = state->cs; - saved_state->ss = state->ss; - saved_state->ds = state->ds; - saved_state->es = state->es; - saved_state->fs = state->fs; - saved_state->gs = state->gs; - } - break; - } + } else { + x86_saved_state_compat32_t *iss32compat; + vm_offset_t isf; - case i386_NEW_THREAD_STATE: - case i386_REGS_SEGS_STATE: - { - register struct i386_new_thread_state *state; - register struct i386_saved_state *saved_state; + assert(is_saved_state32(pcb->iss)); + iss32compat = (x86_saved_state_compat32_t *) pcb->iss; - if (count < i386_NEW_THREAD_STATE_COUNT) { - return(KERN_INVALID_ARGUMENT); - } + pcb_stack_top = (uintptr_t) (iss32compat + 1); + /* require 16-byte alignment */ + assert((pcb_stack_top & 0xF) == 0); - if (flavor == i386_REGS_SEGS_STATE) { - /* - * Code and stack selectors must not be null, - * and must have user protection levels. - * Only the low 16 bits are valid. - */ - state->cs &= 0xffff; - state->ss &= 0xffff; - state->ds &= 0xffff; - state->es &= 0xffff; - state->fs &= 0xffff; - state->gs &= 0xffff; - - if (!kernel_act && - (state->cs == 0 || (state->cs & SEL_PL) != SEL_PL_U - || state->ss == 0 || (state->ss & SEL_PL) != SEL_PL_U)) - return KERN_INVALID_ARGUMENT; - } + /* + * Set pointer to PCB's interrupt stack frame in cpu data. + * Used by debug trap handler. + */ + isf = (vm_offset_t) &iss32compat->isf64; + cdp->cpu_uber.cu_isf = isf; + + /* Top of temporary sysenter stack points to pcb stack */ + *current_sstk64() = pcb_stack_top; - state = (struct i386_new_thread_state *) tstate; + /* Interrupt stack is pcb */ + current_ktss64()->rsp0 = pcb_stack_top; - saved_state = USER_REGS(thr_act); + cdp->cpu_task_map = TASK_MAP_32BIT; + /* Precalculate pointers to syscall argument store, for use + * in the trampolines. + */ + cdp->cpu_uber_arg_store = (vm_offset_t)get_bsduthreadarg(new); + cdp->cpu_uber_arg_store_valid = (vm_offset_t)&pcb->arg_store_valid; + pcb->arg_store_valid = 0; /* - * General registers + * Disable USER64_CS + * Enable USER_CS */ - saved_state->edi = state->edi; - saved_state->esi = state->esi; - saved_state->ebp = state->ebp; - saved_state->uesp = state->uesp; - saved_state->ebx = state->ebx; - saved_state->edx = state->edx; - saved_state->ecx = state->ecx; - saved_state->eax = state->eax; - saved_state->eip = state->eip; - if (kernel_act) - saved_state->efl = state->efl; - else - saved_state->efl = (state->efl & ~EFL_USER_CLEAR) - | EFL_USER_SET; + ldt_desc_p(USER64_CS)->access &= ~ACC_PL_U; + ldt_desc_p(USER_CS)->access |= ACC_PL_U; /* - * Segment registers. Set differently in V8086 mode. + * Set the thread`s cthread (a.k.a pthread) + * For 32-bit user this involves setting the USER_CTHREAD + * descriptor in the LDT to point to the cthread data. + * The involves copying in the pre-initialized descriptor. + */ + ldtp = (struct real_descriptor *)current_ldt(); + ldtp[sel_idx(USER_CTHREAD)] = pcb->cthread_desc; + if (pcb->uldt_selector != 0) + ldtp[sel_idx(pcb->uldt_selector)] = pcb->uldt_desc; + cdp->cpu_uber.cu_user_gs_base = pcb->cthread_self; + + /* + * Set the thread`s LDT or LDT entry. */ - if (state->efl & EFL_VM) { - /* - * Set V8086 mode segment registers. - */ - saved_state->cs = state->cs & 0xffff; - saved_state->ss = state->ss & 0xffff; - saved_state->v86_segs.v86_ds = state->ds & 0xffff; - saved_state->v86_segs.v86_es = state->es & 0xffff; - saved_state->v86_segs.v86_fs = state->fs & 0xffff; - saved_state->v86_segs.v86_gs = state->gs & 0xffff; - - /* - * Zero protected mode segment registers. - */ - saved_state->ds = 0; - saved_state->es = 0; - saved_state->fs = 0; - saved_state->gs = 0; - - if (thr_act->mact.pcb->ims.v86s.int_table) { + if (new->task == TASK_NULL || new->task->i386_ldt == 0) { /* - * Hardware assist on. + * Use system LDT. */ - thr_act->mact.pcb->ims.v86s.flags = - state->efl & (EFL_TF | EFL_IF); - } - } - else if (flavor == i386_NEW_THREAD_STATE && !kernel_act) { - /* - * 386 mode. Set segment registers for flat - * 32-bit address space. - */ - saved_state->cs = USER_CS; - saved_state->ss = USER_DS; - saved_state->ds = USER_DS; - saved_state->es = USER_DS; - saved_state->fs = USER_DS; - saved_state->gs = USER_DS; - } - else { - /* - * User setting segment registers. - * Code and stack selectors have already been - * checked. Others will be reset by 'iret' - * if they are not valid. - */ - saved_state->cs = state->cs; - saved_state->ss = state->ss; - saved_state->ds = state->ds; - saved_state->es = state->es; - saved_state->fs = state->fs; - saved_state->gs = state->gs; + ml_cpu_set_ldt(KERNEL_LDT); + } else { + /* + * Task has its own LDT. + */ + user_ldt_set(new); } - break; - } - - case i386_FLOAT_STATE: { + } - if (count < i386_FLOAT_STATE_COUNT) - return(KERN_INVALID_ARGUMENT); + /* + * Bump the scheduler generation count in the commpage. + * This can be read by user code to detect its preemption. + */ + commpage_sched_gen_inc(); +} +#else +static void +act_machine_switch_pcb( thread_t new ) +{ + pcb_t pcb = new->machine.pcb; + struct real_descriptor *ldtp; + vm_offset_t pcb_stack_top; + vm_offset_t hi_pcb_stack_top; + vm_offset_t hi_iss; + cpu_data_t *cdp = current_cpu_datap(); - return fpu_set_state(thr_act,(struct i386_float_state*)tstate); - } + assert(new->kernel_stack != 0); + STACK_IEL(new->kernel_stack)->saved_state = pcb->iss; - /* - * Temporary - replace by i386_io_map - */ - case i386_ISA_PORT_MAP_STATE: { - register struct i386_isa_port_map_state *state; - register iopb_tss_t tss; + if (!cpu_mode_is64bit()) { + x86_saved_state32_tagged_t *hi_iss32; + /* + * Save a pointer to the top of the "kernel" stack - + * actually the place in the PCB where a trap into + * kernel mode will push the registers. + */ + hi_iss = (vm_offset_t)((unsigned long) + pmap_cpu_high_map_vaddr(cpu_number(), HIGH_CPU_ISS0) | + ((unsigned long)pcb->iss & PAGE_MASK)); - if (count < i386_ISA_PORT_MAP_STATE_COUNT) - return(KERN_INVALID_ARGUMENT); + cdp->cpu_hi_iss = (void *)hi_iss; - break; - } + pmap_high_map(pcb->iss_pte0, HIGH_CPU_ISS0); + pmap_high_map(pcb->iss_pte1, HIGH_CPU_ISS1); - case i386_V86_ASSIST_STATE: - { - register struct i386_v86_assist_state *state; - vm_offset_t int_table; - int int_count; + hi_iss32 = (x86_saved_state32_tagged_t *) hi_iss; + assert(hi_iss32->tag == x86_SAVED_STATE32); - if (count < i386_V86_ASSIST_STATE_COUNT) - return KERN_INVALID_ARGUMENT; + hi_pcb_stack_top = (int) (hi_iss32 + 1); - state = (struct i386_v86_assist_state *) tstate; - int_table = state->int_table; - int_count = state->int_count; + /* + * For fast syscall, top of interrupt stack points to pcb stack + */ + *(vm_offset_t *) current_sstk() = hi_pcb_stack_top; - if (int_table >= VM_MAX_ADDRESS || - int_table + - int_count * sizeof(struct v86_interrupt_table) - > VM_MAX_ADDRESS) - return KERN_INVALID_ARGUMENT; + current_ktss()->esp0 = hi_pcb_stack_top; - thr_act->mact.pcb->ims.v86s.int_table = int_table; - thr_act->mact.pcb->ims.v86s.int_count = int_count; + } else if (is_saved_state64(pcb->iss)) { + /* + * The test above is performed against the thread save state + * flavor and not task's 64-bit feature flag because of the + * thread/task 64-bit state divergence that can arise in + * task_set_64bit() x86: the task state is changed before + * the individual thread(s). + */ + x86_saved_state64_tagged_t *iss64; + vm_offset_t isf; - thr_act->mact.pcb->ims.v86s.flags = - USER_REGS(thr_act)->efl & (EFL_TF | EFL_IF); - break; - } + assert(is_saved_state64(pcb->iss)); + + iss64 = (x86_saved_state64_tagged_t *) pcb->iss; + + /* + * Set pointer to PCB's interrupt stack frame in cpu data. + * Used by syscall and double-fault trap handlers. + */ + isf = (vm_offset_t) &iss64->state.isf; + cdp->cpu_uber.cu_isf = UBER64(isf); + pcb_stack_top = (vm_offset_t) (iss64 + 1); + /* require 16-byte alignment */ + assert((pcb_stack_top & 0xF) == 0); + /* Interrupt stack is pcb */ + current_ktss64()->rsp0 = UBER64(pcb_stack_top); - case i386_THREAD_STATE: { - struct i386_saved_state *saved_state; - i386_thread_state_t *state25; - - saved_state = USER_REGS(thr_act); - state25 = (i386_thread_state_t *)tstate; - - saved_state->eax = state25->eax; - saved_state->ebx = state25->ebx; - saved_state->ecx = state25->ecx; - saved_state->edx = state25->edx; - saved_state->edi = state25->edi; - saved_state->esi = state25->esi; - saved_state->ebp = state25->ebp; - saved_state->uesp = state25->esp; - saved_state->efl = (state25->eflags & ~EFL_USER_CLEAR) - | EFL_USER_SET; - saved_state->eip = state25->eip; - saved_state->cs = USER_CS; /* FIXME? */ - saved_state->ss = USER_DS; - saved_state->ds = USER_DS; - saved_state->es = USER_DS; - saved_state->fs = USER_DS; - saved_state->gs = USER_DS; - } - break; + /* + * Top of temporary sysenter stack points to pcb stack. + * Although this is not normally used by 64-bit users, + * it needs to be set in case a sysenter is attempted. + */ + *current_sstk64() = UBER64(pcb_stack_top); - default: - return(KERN_INVALID_ARGUMENT); - } + cdp->cpu_task_map = new->map->pmap->pm_task_map; + + /* + * Enable the 64-bit user code segment, USER64_CS. + * Disable the 32-bit user code segment, USER_CS. + */ + ldt_desc_p(USER64_CS)->access |= ACC_PL_U; + ldt_desc_p(USER_CS)->access &= ~ACC_PL_U; + + } else { + x86_saved_state_compat32_t *iss32compat; + vm_offset_t isf; + + assert(is_saved_state32(pcb->iss)); + iss32compat = (x86_saved_state_compat32_t *) pcb->iss; + + pcb_stack_top = (int) (iss32compat + 1); + /* require 16-byte alignment */ + assert((pcb_stack_top & 0xF) == 0); + + /* + * Set pointer to PCB's interrupt stack frame in cpu data. + * Used by debug trap handler. + */ + isf = (vm_offset_t) &iss32compat->isf64; + cdp->cpu_uber.cu_isf = UBER64(isf); + + /* Top of temporary sysenter stack points to pcb stack */ + *current_sstk64() = UBER64(pcb_stack_top); + + /* Interrupt stack is pcb */ + current_ktss64()->rsp0 = UBER64(pcb_stack_top); + + cdp->cpu_task_map = TASK_MAP_32BIT; + /* Precalculate pointers to syscall argument store, for use + * in the trampolines. + */ + cdp->cpu_uber_arg_store = UBER64((vm_offset_t)get_bsduthreadarg(new)); + cdp->cpu_uber_arg_store_valid = UBER64((vm_offset_t)&pcb->arg_store_valid); + pcb->arg_store_valid = 0; + + /* + * Disable USER64_CS + * Enable USER_CS + */ + ldt_desc_p(USER64_CS)->access &= ~ACC_PL_U; + ldt_desc_p(USER_CS)->access |= ACC_PL_U; + } + + /* + * Set the thread`s cthread (a.k.a pthread) + * For 32-bit user this involves setting the USER_CTHREAD + * descriptor in the LDT to point to the cthread data. + * The involves copying in the pre-initialized descriptor. + */ + ldtp = (struct real_descriptor *)current_ldt(); + ldtp[sel_idx(USER_CTHREAD)] = pcb->cthread_desc; + if (pcb->uldt_selector != 0) + ldtp[sel_idx(pcb->uldt_selector)] = pcb->uldt_desc; + + + /* + * For 64-bit, we additionally set the 64-bit User GS base + * address. On return to 64-bit user, the GS.Base MSR will be written. + */ + cdp->cpu_uber.cu_user_gs_base = pcb->cthread_self; + + /* + * Set the thread`s LDT or LDT entry. + */ + if (new->task == TASK_NULL || new->task->i386_ldt == 0) { + /* + * Use system LDT. + */ + ml_cpu_set_ldt(KERNEL_LDT); + } else { + /* + * Task has its own LDT. + */ + user_ldt_set(new); + } + + /* + * Bump the scheduler generation count in the commpage. + * This can be read by user code to detect its preemption. + */ + commpage_sched_gen_inc(); +} +#endif + +/* + * Switch to the first thread on a CPU. + */ +void +machine_load_context( + thread_t new) +{ +#if CONFIG_COUNTERS + machine_pmc_cswitch(NULL, new); +#endif + new->machine.specFlags |= OnProc; + act_machine_switch_pcb(new); + Load_context(new); +} + +/* + * Switch to a new thread. + * Save the old thread`s kernel state or continuation, + * and return it. + */ +thread_t +machine_switch_context( + thread_t old, + thread_continue_t continuation, + thread_t new) +{ +#if MACH_RT + assert(current_cpu_datap()->cpu_active_stack == old->kernel_stack); +#endif +#if CONFIG_COUNTERS + machine_pmc_cswitch(old, new); +#endif + /* + * Save FP registers if in use. + */ + fpu_save_context(old); + + + old->machine.specFlags &= ~OnProc; + new->machine.specFlags |= OnProc; + + /* + * Monitor the stack depth and report new max, + * not worrying about races. + */ + vm_offset_t depth = current_stack_depth(); + if (depth > kernel_stack_depth_max) { + kernel_stack_depth_max = depth; + KERNEL_DEBUG_CONSTANT( + MACHDBG_CODE(DBG_MACH_SCHED, MACH_STACK_DEPTH), + (long) depth, 0, 0, 0, 0); + } + + /* + * Switch address maps if need be, even if not switching tasks. + * (A server activation may be "borrowing" a client map.) + */ + PMAP_SWITCH_CONTEXT(old, new, cpu_number()) + + /* + * Load the rest of the user state for the new thread + */ + act_machine_switch_pcb(new); + + return(Switch_context(old, continuation, new)); +} + +thread_t +machine_processor_shutdown( + thread_t thread, + void (*doshutdown)(processor_t), + processor_t processor) +{ +#if CONFIG_VMX + vmx_suspend(); +#endif + fpu_save_context(thread); + PMAP_SWITCH_CONTEXT(thread, processor->idle_thread, cpu_number()); + return(Shutdown_context(thread, doshutdown, processor)); +} + +/* + * act_machine_sv_free + * release saveareas associated with an act. if flag is true, release + * user level savearea(s) too, else don't + */ +void +act_machine_sv_free(__unused thread_t act, __unused int flag) +{ +} + + +/* + * This is where registers that are not normally specified by the mach-o + * file on an execve would be nullified, perhaps to avoid a covert channel. + */ +kern_return_t +machine_thread_state_initialize( + thread_t thread) +{ + /* + * If there's an fpu save area, free it. + * The initialized state will then be lazily faulted-in, if required. + * And if we're target, re-arm the no-fpu trap. + */ + if (thread->machine.pcb->ifps) { + (void) fpu_set_fxstate(thread, NULL); + + if (thread == current_thread()) + clear_fpu(); + } + + if (thread->machine.pcb->ids) { + zfree(ids_zone, thread->machine.pcb->ids); + thread->machine.pcb->ids = NULL; + } + + return KERN_SUCCESS; +} + +uint32_t +get_eflags_exportmask(void) +{ + return EFL_USER_SET; +} + +/* + * x86_SAVED_STATE32 - internal save/restore general register state on 32/64 bit processors + * for 32bit tasks only + * x86_SAVED_STATE64 - internal save/restore general register state on 64 bit processors + * for 64bit tasks only + * x86_THREAD_STATE32 - external set/get general register state on 32/64 bit processors + * for 32bit tasks only + * x86_THREAD_STATE64 - external set/get general register state on 64 bit processors + * for 64bit tasks only + * x86_SAVED_STATE - external set/get general register state on 32/64 bit processors + * for either 32bit or 64bit tasks + * x86_FLOAT_STATE32 - internal/external save/restore float and xmm state on 32/64 bit processors + * for 32bit tasks only + * x86_FLOAT_STATE64 - internal/external save/restore float and xmm state on 64 bit processors + * for 64bit tasks only + * x86_FLOAT_STATE - external save/restore float and xmm state on 32/64 bit processors + * for either 32bit or 64bit tasks + * x86_EXCEPTION_STATE32 - external get exception state on 32/64 bit processors + * for 32bit tasks only + * x86_EXCEPTION_STATE64 - external get exception state on 64 bit processors + * for 64bit tasks only + * x86_EXCEPTION_STATE - external get exception state on 323/64 bit processors + * for either 32bit or 64bit tasks + */ + + +static void +get_exception_state64(thread_t thread, x86_exception_state64_t *es) +{ + x86_saved_state64_t *saved_state; + + saved_state = USER_REGS64(thread); + + es->trapno = saved_state->isf.trapno; + es->err = (typeof(es->err))saved_state->isf.err; + es->faultvaddr = saved_state->cr2; +} + +static void +get_exception_state32(thread_t thread, x86_exception_state32_t *es) +{ + x86_saved_state32_t *saved_state; + + saved_state = USER_REGS32(thread); + + es->trapno = saved_state->trapno; + es->err = saved_state->err; + es->faultvaddr = saved_state->cr2; +} + + +static int +set_thread_state32(thread_t thread, x86_thread_state32_t *ts) +{ + x86_saved_state32_t *saved_state; + + + saved_state = USER_REGS32(thread); + + /* + * Scrub segment selector values: + */ + ts->cs = USER_CS; +#ifdef __i386__ + if (ts->ss == 0) ts->ss = USER_DS; + if (ts->ds == 0) ts->ds = USER_DS; + if (ts->es == 0) ts->es = USER_DS; +#else /* __x86_64__ */ + /* + * On a 64 bit kernel, we always override the data segments, + * as the actual selector numbers have changed. This also + * means that we don't support setting the data segments + * manually any more. + */ + ts->ss = USER_DS; + ts->ds = USER_DS; + ts->es = USER_DS; +#endif + + /* Check segment selectors are safe */ + if (!valid_user_segment_selectors(ts->cs, + ts->ss, + ts->ds, + ts->es, + ts->fs, + ts->gs)) + return(KERN_INVALID_ARGUMENT); + + saved_state->eax = ts->eax; + saved_state->ebx = ts->ebx; + saved_state->ecx = ts->ecx; + saved_state->edx = ts->edx; + saved_state->edi = ts->edi; + saved_state->esi = ts->esi; + saved_state->ebp = ts->ebp; + saved_state->uesp = ts->esp; + saved_state->efl = (ts->eflags & ~EFL_USER_CLEAR) | EFL_USER_SET; + saved_state->eip = ts->eip; + saved_state->cs = ts->cs; + saved_state->ss = ts->ss; + saved_state->ds = ts->ds; + saved_state->es = ts->es; + saved_state->fs = ts->fs; + saved_state->gs = ts->gs; + + /* + * If the trace trap bit is being set, + * ensure that the user returns via iret + * - which is signaled thusly: + */ + if ((saved_state->efl & EFL_TF) && saved_state->cs == SYSENTER_CS) + saved_state->cs = SYSENTER_TF_CS; + + return(KERN_SUCCESS); +} + +static int +set_thread_state64(thread_t thread, x86_thread_state64_t *ts) +{ + x86_saved_state64_t *saved_state; + + + saved_state = USER_REGS64(thread); + + if (!IS_USERADDR64_CANONICAL(ts->rsp) || + !IS_USERADDR64_CANONICAL(ts->rip)) + return(KERN_INVALID_ARGUMENT); + + saved_state->r8 = ts->r8; + saved_state->r9 = ts->r9; + saved_state->r10 = ts->r10; + saved_state->r11 = ts->r11; + saved_state->r12 = ts->r12; + saved_state->r13 = ts->r13; + saved_state->r14 = ts->r14; + saved_state->r15 = ts->r15; + saved_state->rax = ts->rax; + saved_state->rbx = ts->rbx; + saved_state->rcx = ts->rcx; + saved_state->rdx = ts->rdx; + saved_state->rdi = ts->rdi; + saved_state->rsi = ts->rsi; + saved_state->rbp = ts->rbp; + saved_state->isf.rsp = ts->rsp; + saved_state->isf.rflags = (ts->rflags & ~EFL_USER_CLEAR) | EFL_USER_SET; + saved_state->isf.rip = ts->rip; + saved_state->isf.cs = USER64_CS; + saved_state->fs = (uint32_t)ts->fs; + saved_state->gs = (uint32_t)ts->gs; return(KERN_SUCCESS); } + + +static void +get_thread_state32(thread_t thread, x86_thread_state32_t *ts) +{ + x86_saved_state32_t *saved_state; + + + saved_state = USER_REGS32(thread); + + ts->eax = saved_state->eax; + ts->ebx = saved_state->ebx; + ts->ecx = saved_state->ecx; + ts->edx = saved_state->edx; + ts->edi = saved_state->edi; + ts->esi = saved_state->esi; + ts->ebp = saved_state->ebp; + ts->esp = saved_state->uesp; + ts->eflags = saved_state->efl; + ts->eip = saved_state->eip; + ts->cs = saved_state->cs; + ts->ss = saved_state->ss; + ts->ds = saved_state->ds; + ts->es = saved_state->es; + ts->fs = saved_state->fs; + ts->gs = saved_state->gs; +} + + +static void +get_thread_state64(thread_t thread, x86_thread_state64_t *ts) +{ + x86_saved_state64_t *saved_state; + + + saved_state = USER_REGS64(thread); + + ts->r8 = saved_state->r8; + ts->r9 = saved_state->r9; + ts->r10 = saved_state->r10; + ts->r11 = saved_state->r11; + ts->r12 = saved_state->r12; + ts->r13 = saved_state->r13; + ts->r14 = saved_state->r14; + ts->r15 = saved_state->r15; + ts->rax = saved_state->rax; + ts->rbx = saved_state->rbx; + ts->rcx = saved_state->rcx; + ts->rdx = saved_state->rdx; + ts->rdi = saved_state->rdi; + ts->rsi = saved_state->rsi; + ts->rbp = saved_state->rbp; + ts->rsp = saved_state->isf.rsp; + ts->rflags = saved_state->isf.rflags; + ts->rip = saved_state->isf.rip; + ts->cs = saved_state->isf.cs; + ts->fs = saved_state->fs; + ts->gs = saved_state->gs; +} + + +void +thread_set_wq_state32(thread_t thread, thread_state_t tstate) +{ + x86_thread_state32_t *state; + x86_saved_state32_t *saved_state; + thread_t curth = current_thread(); + spl_t s=0; + + + saved_state = USER_REGS32(thread); + + state = (x86_thread_state32_t *)tstate; + + if (curth != thread) { + s = splsched(); + thread_lock(thread); + } + + saved_state->ebp = 0; + saved_state->eip = state->eip; + saved_state->eax = state->eax; + saved_state->ebx = state->ebx; + saved_state->ecx = state->ecx; + saved_state->edx = state->edx; + saved_state->edi = state->edi; + saved_state->esi = state->esi; + saved_state->uesp = state->esp; + saved_state->efl = EFL_USER_SET; + + saved_state->cs = USER_CS; + saved_state->ss = USER_DS; + saved_state->ds = USER_DS; + saved_state->es = USER_DS; + + + if (curth != thread) { + thread_unlock(thread); + splx(s); + } +} + + +void +thread_set_wq_state64(thread_t thread, thread_state_t tstate) +{ + x86_thread_state64_t *state; + x86_saved_state64_t *saved_state; + thread_t curth = current_thread(); + spl_t s=0; + + + saved_state = USER_REGS64(thread); + state = (x86_thread_state64_t *)tstate; + + if (curth != thread) { + s = splsched(); + thread_lock(thread); + } + + saved_state->rbp = 0; + saved_state->rdi = state->rdi; + saved_state->rsi = state->rsi; + saved_state->rdx = state->rdx; + saved_state->rcx = state->rcx; + saved_state->r8 = state->r8; + saved_state->r9 = state->r9; + + saved_state->isf.rip = state->rip; + saved_state->isf.rsp = state->rsp; + saved_state->isf.cs = USER64_CS; + saved_state->isf.rflags = EFL_USER_SET; + + + if (curth != thread) { + thread_unlock(thread); + splx(s); + } +} + + + +/* + * act_machine_set_state: + * + * Set the status of the specified thread. + */ + +kern_return_t +machine_thread_set_state( + thread_t thr_act, + thread_flavor_t flavor, + thread_state_t tstate, + mach_msg_type_number_t count) +{ + switch (flavor) { + case x86_SAVED_STATE32: + { + x86_saved_state32_t *state; + x86_saved_state32_t *saved_state; + + if (count < x86_SAVED_STATE32_COUNT) + return(KERN_INVALID_ARGUMENT); + + if (thread_is_64bit(thr_act)) + return(KERN_INVALID_ARGUMENT); + + state = (x86_saved_state32_t *) tstate; + + /* Check segment selectors are safe */ + if (!valid_user_segment_selectors(state->cs, + state->ss, + state->ds, + state->es, + state->fs, + state->gs)) + return KERN_INVALID_ARGUMENT; + + + saved_state = USER_REGS32(thr_act); + + /* + * General registers + */ + saved_state->edi = state->edi; + saved_state->esi = state->esi; + saved_state->ebp = state->ebp; + saved_state->uesp = state->uesp; + saved_state->ebx = state->ebx; + saved_state->edx = state->edx; + saved_state->ecx = state->ecx; + saved_state->eax = state->eax; + saved_state->eip = state->eip; + + saved_state->efl = (state->efl & ~EFL_USER_CLEAR) | EFL_USER_SET; + + /* + * If the trace trap bit is being set, + * ensure that the user returns via iret + * - which is signaled thusly: + */ + if ((saved_state->efl & EFL_TF) && state->cs == SYSENTER_CS) + state->cs = SYSENTER_TF_CS; + + /* + * User setting segment registers. + * Code and stack selectors have already been + * checked. Others will be reset by 'iret' + * if they are not valid. + */ + saved_state->cs = state->cs; + saved_state->ss = state->ss; + saved_state->ds = state->ds; + saved_state->es = state->es; + saved_state->fs = state->fs; + saved_state->gs = state->gs; + + break; + } + + case x86_SAVED_STATE64: + { + x86_saved_state64_t *state; + x86_saved_state64_t *saved_state; + + if (count < x86_SAVED_STATE64_COUNT) + return(KERN_INVALID_ARGUMENT); + + if (!thread_is_64bit(thr_act)) + return(KERN_INVALID_ARGUMENT); + + state = (x86_saved_state64_t *) tstate; + + /* Verify that the supplied code segment selector is + * valid. In 64-bit mode, the FS and GS segment overrides + * use the FS.base and GS.base MSRs to calculate + * base addresses, and the trampolines don't directly + * restore the segment registers--hence they are no + * longer relevant for validation. + */ + if (!valid_user_code_selector(state->isf.cs)) + return KERN_INVALID_ARGUMENT; + + /* Check pc and stack are canonical addresses */ + if (!IS_USERADDR64_CANONICAL(state->isf.rsp) || + !IS_USERADDR64_CANONICAL(state->isf.rip)) + return KERN_INVALID_ARGUMENT; + + + saved_state = USER_REGS64(thr_act); + + /* + * General registers + */ + saved_state->r8 = state->r8; + saved_state->r9 = state->r9; + saved_state->r10 = state->r10; + saved_state->r11 = state->r11; + saved_state->r12 = state->r12; + saved_state->r13 = state->r13; + saved_state->r14 = state->r14; + saved_state->r15 = state->r15; + saved_state->rdi = state->rdi; + saved_state->rsi = state->rsi; + saved_state->rbp = state->rbp; + saved_state->rbx = state->rbx; + saved_state->rdx = state->rdx; + saved_state->rcx = state->rcx; + saved_state->rax = state->rax; + saved_state->isf.rsp = state->isf.rsp; + saved_state->isf.rip = state->isf.rip; + + saved_state->isf.rflags = (state->isf.rflags & ~EFL_USER_CLEAR) | EFL_USER_SET; + + /* + * User setting segment registers. + * Code and stack selectors have already been + * checked. Others will be reset by 'sys' + * if they are not valid. + */ + saved_state->isf.cs = state->isf.cs; + saved_state->isf.ss = state->isf.ss; + saved_state->fs = state->fs; + saved_state->gs = state->gs; + + break; + } + + case x86_FLOAT_STATE32: + { + if (count != x86_FLOAT_STATE32_COUNT) + return(KERN_INVALID_ARGUMENT); + + if (thread_is_64bit(thr_act)) + return(KERN_INVALID_ARGUMENT); + + return fpu_set_fxstate(thr_act, tstate); + } + + case x86_FLOAT_STATE64: + { + if (count != x86_FLOAT_STATE64_COUNT) + return(KERN_INVALID_ARGUMENT); + + if ( !thread_is_64bit(thr_act)) + return(KERN_INVALID_ARGUMENT); + + return fpu_set_fxstate(thr_act, tstate); + } + + case x86_FLOAT_STATE: + { + x86_float_state_t *state; + + if (count != x86_FLOAT_STATE_COUNT) + return(KERN_INVALID_ARGUMENT); + + state = (x86_float_state_t *)tstate; + if (state->fsh.flavor == x86_FLOAT_STATE64 && state->fsh.count == x86_FLOAT_STATE64_COUNT && + thread_is_64bit(thr_act)) { + return fpu_set_fxstate(thr_act, (thread_state_t)&state->ufs.fs64); + } + if (state->fsh.flavor == x86_FLOAT_STATE32 && state->fsh.count == x86_FLOAT_STATE32_COUNT && + !thread_is_64bit(thr_act)) { + return fpu_set_fxstate(thr_act, (thread_state_t)&state->ufs.fs32); + } + return(KERN_INVALID_ARGUMENT); + } + + case x86_THREAD_STATE32: + { + if (count != x86_THREAD_STATE32_COUNT) + return(KERN_INVALID_ARGUMENT); + + if (thread_is_64bit(thr_act)) + return(KERN_INVALID_ARGUMENT); + + return set_thread_state32(thr_act, (x86_thread_state32_t *)tstate); + } + + case x86_THREAD_STATE64: + { + if (count != x86_THREAD_STATE64_COUNT) + return(KERN_INVALID_ARGUMENT); + + if (!thread_is_64bit(thr_act)) + return(KERN_INVALID_ARGUMENT); + + return set_thread_state64(thr_act, (x86_thread_state64_t *)tstate); + + } + case x86_THREAD_STATE: + { + x86_thread_state_t *state; + + if (count != x86_THREAD_STATE_COUNT) + return(KERN_INVALID_ARGUMENT); + + state = (x86_thread_state_t *)tstate; + + if (state->tsh.flavor == x86_THREAD_STATE64 && + state->tsh.count == x86_THREAD_STATE64_COUNT && + thread_is_64bit(thr_act)) { + return set_thread_state64(thr_act, &state->uts.ts64); + } else if (state->tsh.flavor == x86_THREAD_STATE32 && + state->tsh.count == x86_THREAD_STATE32_COUNT && + !thread_is_64bit(thr_act)) { + return set_thread_state32(thr_act, &state->uts.ts32); + } else + return(KERN_INVALID_ARGUMENT); + + break; + } + case x86_DEBUG_STATE32: + { + x86_debug_state32_t *state; + kern_return_t ret; + + if (thread_is_64bit(thr_act)) + return(KERN_INVALID_ARGUMENT); + + state = (x86_debug_state32_t *)tstate; + + ret = set_debug_state32(thr_act, state); + + return ret; + } + case x86_DEBUG_STATE64: + { + x86_debug_state64_t *state; + kern_return_t ret; + + if (!thread_is_64bit(thr_act)) + return(KERN_INVALID_ARGUMENT); + + state = (x86_debug_state64_t *)tstate; + + ret = set_debug_state64(thr_act, state); + + return ret; + } + case x86_DEBUG_STATE: + { + x86_debug_state_t *state; + kern_return_t ret = KERN_INVALID_ARGUMENT; + + if (count != x86_DEBUG_STATE_COUNT) + return (KERN_INVALID_ARGUMENT); + + state = (x86_debug_state_t *)tstate; + if (state->dsh.flavor == x86_DEBUG_STATE64 && + state->dsh.count == x86_DEBUG_STATE64_COUNT && + thread_is_64bit(thr_act)) { + ret = set_debug_state64(thr_act, &state->uds.ds64); + } + else + if (state->dsh.flavor == x86_DEBUG_STATE32 && + state->dsh.count == x86_DEBUG_STATE32_COUNT && + !thread_is_64bit(thr_act)) { + ret = set_debug_state32(thr_act, &state->uds.ds32); + } + return ret; + } + default: + return(KERN_INVALID_ARGUMENT); + } + + return(KERN_SUCCESS); +} + + + /* * thread_getstatus: * * Get the status of the specified thread. */ - kern_return_t -act_machine_get_state( - thread_act_t thr_act, +machine_thread_get_state( + thread_t thr_act, thread_flavor_t flavor, thread_state_t tstate, mach_msg_type_number_t *count) { -#if MACH_ASSERT - if (watchacts & WA_STATE) - printf("act_%x act_m_get_state(thr_act=%x,flav=%x,st=%x,cnt@%x=%x)\n", - current_act(), thr_act, flavor, tstate, - count, (count ? *count : 0)); -#endif /* MACH_ASSERT */ switch (flavor) { - case i386_SAVED_STATE: + case THREAD_STATE_FLAVOR_LIST: + { + if (*count < 3) + return (KERN_INVALID_ARGUMENT); + + tstate[0] = i386_THREAD_STATE; + tstate[1] = i386_FLOAT_STATE; + tstate[2] = i386_EXCEPTION_STATE; + + *count = 3; + break; + } + + case THREAD_STATE_FLAVOR_LIST_NEW: { - register struct i386_saved_state *state; - register struct i386_saved_state *saved_state; + if (*count < 4) + return (KERN_INVALID_ARGUMENT); + + tstate[0] = x86_THREAD_STATE; + tstate[1] = x86_FLOAT_STATE; + tstate[2] = x86_EXCEPTION_STATE; + tstate[3] = x86_DEBUG_STATE; - if (*count < i386_SAVED_STATE_COUNT) - return(KERN_INVALID_ARGUMENT); + *count = 4; + break; + } + + case x86_SAVED_STATE32: + { + x86_saved_state32_t *state; + x86_saved_state32_t *saved_state; + + if (*count < x86_SAVED_STATE32_COUNT) + return(KERN_INVALID_ARGUMENT); + + if (thread_is_64bit(thr_act)) + return(KERN_INVALID_ARGUMENT); - state = (struct i386_saved_state *) tstate; - saved_state = USER_REGS(thr_act); + state = (x86_saved_state32_t *) tstate; + saved_state = USER_REGS32(thr_act); /* * First, copy everything: */ *state = *saved_state; + state->ds = saved_state->ds & 0xffff; + state->es = saved_state->es & 0xffff; + state->fs = saved_state->fs & 0xffff; + state->gs = saved_state->gs & 0xffff; - if (saved_state->efl & EFL_VM) { - /* - * V8086 mode. - */ - state->ds = saved_state->v86_segs.v86_ds & 0xffff; - state->es = saved_state->v86_segs.v86_es & 0xffff; - state->fs = saved_state->v86_segs.v86_fs & 0xffff; - state->gs = saved_state->v86_segs.v86_gs & 0xffff; + *count = x86_SAVED_STATE32_COUNT; + break; + } - if (thr_act->mact.pcb->ims.v86s.int_table) { - /* - * Hardware assist on - */ - if ((thr_act->mact.pcb->ims.v86s.flags & - (EFL_IF|V86_IF_PENDING)) == 0) - state->efl &= ~EFL_IF; - } - } - else { - /* - * 386 mode. - */ - state->ds = saved_state->ds & 0xffff; - state->es = saved_state->es & 0xffff; - state->fs = saved_state->fs & 0xffff; - state->gs = saved_state->gs & 0xffff; - } - *count = i386_SAVED_STATE_COUNT; + case x86_SAVED_STATE64: + { + x86_saved_state64_t *state; + x86_saved_state64_t *saved_state; + + if (*count < x86_SAVED_STATE64_COUNT) + return(KERN_INVALID_ARGUMENT); + + if (!thread_is_64bit(thr_act)) + return(KERN_INVALID_ARGUMENT); + + state = (x86_saved_state64_t *)tstate; + saved_state = USER_REGS64(thr_act); + + /* + * First, copy everything: + */ + *state = *saved_state; + state->fs = saved_state->fs & 0xffff; + state->gs = saved_state->gs & 0xffff; + + *count = x86_SAVED_STATE64_COUNT; break; } - case i386_NEW_THREAD_STATE: - case i386_REGS_SEGS_STATE: + case x86_FLOAT_STATE32: + { + if (*count < x86_FLOAT_STATE32_COUNT) + return(KERN_INVALID_ARGUMENT); + + if (thread_is_64bit(thr_act)) + return(KERN_INVALID_ARGUMENT); + + *count = x86_FLOAT_STATE32_COUNT; + + return fpu_get_fxstate(thr_act, tstate); + } + + case x86_FLOAT_STATE64: { - register struct i386_new_thread_state *state; - register struct i386_saved_state *saved_state; + if (*count < x86_FLOAT_STATE64_COUNT) + return(KERN_INVALID_ARGUMENT); + + if ( !thread_is_64bit(thr_act)) + return(KERN_INVALID_ARGUMENT); - if (*count < i386_NEW_THREAD_STATE_COUNT) - return(KERN_INVALID_ARGUMENT); + *count = x86_FLOAT_STATE64_COUNT; - state = (struct i386_new_thread_state *) tstate; - saved_state = USER_REGS(thr_act); + return fpu_get_fxstate(thr_act, tstate); + } + + case x86_FLOAT_STATE: + { + x86_float_state_t *state; + kern_return_t kret; + + if (*count < x86_FLOAT_STATE_COUNT) + return(KERN_INVALID_ARGUMENT); + + state = (x86_float_state_t *)tstate; /* - * General registers. + * no need to bzero... currently + * x86_FLOAT_STATE64_COUNT == x86_FLOAT_STATE32_COUNT */ - state->edi = saved_state->edi; - state->esi = saved_state->esi; - state->ebp = saved_state->ebp; - state->ebx = saved_state->ebx; - state->edx = saved_state->edx; - state->ecx = saved_state->ecx; - state->eax = saved_state->eax; - state->eip = saved_state->eip; - state->efl = saved_state->efl; - state->uesp = saved_state->uesp; + if (thread_is_64bit(thr_act)) { + state->fsh.flavor = x86_FLOAT_STATE64; + state->fsh.count = x86_FLOAT_STATE64_COUNT; - state->cs = saved_state->cs; - state->ss = saved_state->ss; - if (saved_state->efl & EFL_VM) { - /* - * V8086 mode. - */ - state->ds = saved_state->v86_segs.v86_ds & 0xffff; - state->es = saved_state->v86_segs.v86_es & 0xffff; - state->fs = saved_state->v86_segs.v86_fs & 0xffff; - state->gs = saved_state->v86_segs.v86_gs & 0xffff; - - if (thr_act->mact.pcb->ims.v86s.int_table) { - /* - * Hardware assist on - */ - if ((thr_act->mact.pcb->ims.v86s.flags & - (EFL_IF|V86_IF_PENDING)) == 0) - state->efl &= ~EFL_IF; - } + kret = fpu_get_fxstate(thr_act, (thread_state_t)&state->ufs.fs64); + } else { + state->fsh.flavor = x86_FLOAT_STATE32; + state->fsh.count = x86_FLOAT_STATE32_COUNT; + + kret = fpu_get_fxstate(thr_act, (thread_state_t)&state->ufs.fs32); } - else { - /* - * 386 mode. - */ - state->ds = saved_state->ds & 0xffff; - state->es = saved_state->es & 0xffff; - state->fs = saved_state->fs & 0xffff; - state->gs = saved_state->gs & 0xffff; + *count = x86_FLOAT_STATE_COUNT; + + return(kret); + } + + case x86_THREAD_STATE32: + { + if (*count < x86_THREAD_STATE32_COUNT) + return(KERN_INVALID_ARGUMENT); + + if (thread_is_64bit(thr_act)) + return(KERN_INVALID_ARGUMENT); + + *count = x86_THREAD_STATE32_COUNT; + + get_thread_state32(thr_act, (x86_thread_state32_t *)tstate); + break; + } + + case x86_THREAD_STATE64: + { + if (*count < x86_THREAD_STATE64_COUNT) + return(KERN_INVALID_ARGUMENT); + + if ( !thread_is_64bit(thr_act)) + return(KERN_INVALID_ARGUMENT); + + *count = x86_THREAD_STATE64_COUNT; + + get_thread_state64(thr_act, (x86_thread_state64_t *)tstate); + break; + } + + case x86_THREAD_STATE: + { + x86_thread_state_t *state; + + if (*count < x86_THREAD_STATE_COUNT) + return(KERN_INVALID_ARGUMENT); + + state = (x86_thread_state_t *)tstate; + + bzero((char *)state, sizeof(x86_thread_state_t)); + + if (thread_is_64bit(thr_act)) { + state->tsh.flavor = x86_THREAD_STATE64; + state->tsh.count = x86_THREAD_STATE64_COUNT; + + get_thread_state64(thr_act, &state->uts.ts64); + } else { + state->tsh.flavor = x86_THREAD_STATE32; + state->tsh.count = x86_THREAD_STATE32_COUNT; + + get_thread_state32(thr_act, &state->uts.ts32); } - *count = i386_NEW_THREAD_STATE_COUNT; + *count = x86_THREAD_STATE_COUNT; + break; } - case THREAD_SYSCALL_STATE: + + case x86_EXCEPTION_STATE32: { - register struct thread_syscall_state *state; - register struct i386_saved_state *saved_state = USER_REGS(thr_act); + if (*count < x86_EXCEPTION_STATE32_COUNT) + return(KERN_INVALID_ARGUMENT); - state = (struct thread_syscall_state *) tstate; - state->eax = saved_state->eax; - state->edx = saved_state->edx; - state->efl = saved_state->efl; - state->eip = saved_state->eip; - state->esp = saved_state->uesp; - *count = i386_THREAD_SYSCALL_STATE_COUNT; - break; - } + if (thread_is_64bit(thr_act)) + return(KERN_INVALID_ARGUMENT); - case THREAD_STATE_FLAVOR_LIST: - if (*count < 5) - return (KERN_INVALID_ARGUMENT); - tstate[0] = i386_NEW_THREAD_STATE; - tstate[1] = i386_FLOAT_STATE; - tstate[2] = i386_ISA_PORT_MAP_STATE; - tstate[3] = i386_V86_ASSIST_STATE; - tstate[4] = THREAD_SYSCALL_STATE; - *count = 5; + *count = x86_EXCEPTION_STATE32_COUNT; + + get_exception_state32(thr_act, (x86_exception_state32_t *)tstate); break; + } - case i386_FLOAT_STATE: { + case x86_EXCEPTION_STATE64: + { + if (*count < x86_EXCEPTION_STATE64_COUNT) + return(KERN_INVALID_ARGUMENT); - if (*count < i386_FLOAT_STATE_COUNT) + if ( !thread_is_64bit(thr_act)) return(KERN_INVALID_ARGUMENT); - *count = i386_FLOAT_STATE_COUNT; - return fpu_get_state(thr_act,(struct i386_float_state *)tstate); + *count = x86_EXCEPTION_STATE64_COUNT; + + get_exception_state64(thr_act, (x86_exception_state64_t *)tstate); + break; } - /* - * Temporary - replace by i386_io_map - */ - case i386_ISA_PORT_MAP_STATE: { - register struct i386_isa_port_map_state *state; - register iopb_tss_t tss; + case x86_EXCEPTION_STATE: + { + x86_exception_state_t *state; - if (*count < i386_ISA_PORT_MAP_STATE_COUNT) + if (*count < x86_EXCEPTION_STATE_COUNT) return(KERN_INVALID_ARGUMENT); - state = (struct i386_isa_port_map_state *) tstate; - tss = thr_act->mact.pcb->ims.io_tss; + state = (x86_exception_state_t *)tstate; - if (tss == 0) { - int i; + bzero((char *)state, sizeof(x86_exception_state_t)); - /* - * The thread has no ktss, so no IO permissions. - */ + if (thread_is_64bit(thr_act)) { + state->esh.flavor = x86_EXCEPTION_STATE64; + state->esh.count = x86_EXCEPTION_STATE64_COUNT; - for (i = 0; i < sizeof state->pm; i++) - state->pm[i] = 0xff; + get_exception_state64(thr_act, &state->ues.es64); } else { - /* - * The thread has its own ktss. - */ + state->esh.flavor = x86_EXCEPTION_STATE32; + state->esh.count = x86_EXCEPTION_STATE32_COUNT; - bcopy((char *) tss->bitmap, - (char *) state->pm, - sizeof state->pm); + get_exception_state32(thr_act, &state->ues.es32); } + *count = x86_EXCEPTION_STATE_COUNT; - *count = i386_ISA_PORT_MAP_STATE_COUNT; break; - } + } + case x86_DEBUG_STATE32: + { + if (*count < x86_DEBUG_STATE32_COUNT) + return(KERN_INVALID_ARGUMENT); - case i386_V86_ASSIST_STATE: - { - register struct i386_v86_assist_state *state; + if (thread_is_64bit(thr_act)) + return(KERN_INVALID_ARGUMENT); - if (*count < i386_V86_ASSIST_STATE_COUNT) - return KERN_INVALID_ARGUMENT; + get_debug_state32(thr_act, (x86_debug_state32_t *)tstate); - state = (struct i386_v86_assist_state *) tstate; - state->int_table = thr_act->mact.pcb->ims.v86s.int_table; - state->int_count = thr_act->mact.pcb->ims.v86s.int_count; + *count = x86_DEBUG_STATE32_COUNT; - *count = i386_V86_ASSIST_STATE_COUNT; break; - } + } + case x86_DEBUG_STATE64: + { + if (*count < x86_DEBUG_STATE64_COUNT) + return(KERN_INVALID_ARGUMENT); + + if (!thread_is_64bit(thr_act)) + return(KERN_INVALID_ARGUMENT); - case i386_THREAD_STATE: { - struct i386_saved_state *saved_state; - i386_thread_state_t *state; + get_debug_state64(thr_act, (x86_debug_state64_t *)tstate); - saved_state = USER_REGS(thr_act); - state = (i386_thread_state_t *)tstate; + *count = x86_DEBUG_STATE64_COUNT; - state->eax = saved_state->eax; - state->ebx = saved_state->ebx; - state->ecx = saved_state->ecx; - state->edx = saved_state->edx; - state->edi = saved_state->edi; - state->esi = saved_state->esi; - state->ebp = saved_state->ebp; - state->esp = saved_state->uesp; - state->eflags = saved_state->efl; - state->eip = saved_state->eip; - state->cs = saved_state->cs; - state->ss = saved_state->ss; - state->ds = saved_state->ds; - state->es = saved_state->es; - state->fs = saved_state->fs; - state->gs = saved_state->gs; break; } + case x86_DEBUG_STATE: + { + x86_debug_state_t *state; + + if (*count < x86_DEBUG_STATE_COUNT) + return(KERN_INVALID_ARGUMENT); + + state = (x86_debug_state_t *)tstate; + + bzero(state, sizeof *state); + + if (thread_is_64bit(thr_act)) { + state->dsh.flavor = x86_DEBUG_STATE64; + state->dsh.count = x86_DEBUG_STATE64_COUNT; + + get_debug_state64(thr_act, &state->uds.ds64); + } else { + state->dsh.flavor = x86_DEBUG_STATE32; + state->dsh.count = x86_DEBUG_STATE32_COUNT; - default: + get_debug_state32(thr_act, &state->uds.ds32); + } + *count = x86_DEBUG_STATE_COUNT; + break; + } + default: return(KERN_INVALID_ARGUMENT); } return(KERN_SUCCESS); } -/* - * Alter the thread`s state so that a following thread_exception_return - * will make the thread return 'retval' from a syscall. - */ -void -thread_set_syscall_return( - thread_t thread, - kern_return_t retval) +kern_return_t +machine_thread_get_kern_state( + thread_t thread, + thread_flavor_t flavor, + thread_state_t tstate, + mach_msg_type_number_t *count) { - thread->top_act->mact.pcb->iss.eax = retval; + x86_saved_state_t *int_state = current_cpu_datap()->cpu_int_state; + + /* + * This works only for an interrupted kernel thread + */ + if (thread != current_thread() || int_state == NULL) + return KERN_FAILURE; + + switch (flavor) { + case x86_THREAD_STATE32: { + x86_thread_state32_t *state; + x86_saved_state32_t *saved_state; + + if (!is_saved_state32(int_state) || + *count < x86_THREAD_STATE32_COUNT) + return (KERN_INVALID_ARGUMENT); + + state = (x86_thread_state32_t *) tstate; + + saved_state = saved_state32(int_state); + /* + * General registers. + */ + state->eax = saved_state->eax; + state->ebx = saved_state->ebx; + state->ecx = saved_state->ecx; + state->edx = saved_state->edx; + state->edi = saved_state->edi; + state->esi = saved_state->esi; + state->ebp = saved_state->ebp; + state->esp = saved_state->uesp; + state->eflags = saved_state->efl; + state->eip = saved_state->eip; + state->cs = saved_state->cs; + state->ss = saved_state->ss; + state->ds = saved_state->ds & 0xffff; + state->es = saved_state->es & 0xffff; + state->fs = saved_state->fs & 0xffff; + state->gs = saved_state->gs & 0xffff; + + *count = x86_THREAD_STATE32_COUNT; + + return KERN_SUCCESS; + } + + case x86_THREAD_STATE64: { + x86_thread_state64_t *state; + x86_saved_state64_t *saved_state; + + if (!is_saved_state64(int_state) || + *count < x86_THREAD_STATE64_COUNT) + return (KERN_INVALID_ARGUMENT); + + state = (x86_thread_state64_t *) tstate; + + saved_state = saved_state64(int_state); + /* + * General registers. + */ + state->rax = saved_state->rax; + state->rbx = saved_state->rbx; + state->rcx = saved_state->rcx; + state->rdx = saved_state->rdx; + state->rdi = saved_state->rdi; + state->rsi = saved_state->rsi; + state->rbp = saved_state->rbp; + state->rsp = saved_state->isf.rsp; + state->r8 = saved_state->r8; + state->r9 = saved_state->r9; + state->r10 = saved_state->r10; + state->r11 = saved_state->r11; + state->r12 = saved_state->r12; + state->r13 = saved_state->r13; + state->r14 = saved_state->r14; + state->r15 = saved_state->r15; + + state->rip = saved_state->isf.rip; + state->rflags = saved_state->isf.rflags; + state->cs = saved_state->isf.cs; + state->fs = saved_state->fs & 0xffff; + state->gs = saved_state->gs & 0xffff; + *count = x86_THREAD_STATE64_COUNT; + + return KERN_SUCCESS; + } + + case x86_THREAD_STATE: { + x86_thread_state_t *state = NULL; + + if (*count < x86_THREAD_STATE_COUNT) + return (KERN_INVALID_ARGUMENT); + + state = (x86_thread_state_t *) tstate; + + if (is_saved_state32(int_state)) { + x86_saved_state32_t *saved_state = saved_state32(int_state); + + state->tsh.flavor = x86_THREAD_STATE32; + state->tsh.count = x86_THREAD_STATE32_COUNT; + + /* + * General registers. + */ + state->uts.ts32.eax = saved_state->eax; + state->uts.ts32.ebx = saved_state->ebx; + state->uts.ts32.ecx = saved_state->ecx; + state->uts.ts32.edx = saved_state->edx; + state->uts.ts32.edi = saved_state->edi; + state->uts.ts32.esi = saved_state->esi; + state->uts.ts32.ebp = saved_state->ebp; + state->uts.ts32.esp = saved_state->uesp; + state->uts.ts32.eflags = saved_state->efl; + state->uts.ts32.eip = saved_state->eip; + state->uts.ts32.cs = saved_state->cs; + state->uts.ts32.ss = saved_state->ss; + state->uts.ts32.ds = saved_state->ds & 0xffff; + state->uts.ts32.es = saved_state->es & 0xffff; + state->uts.ts32.fs = saved_state->fs & 0xffff; + state->uts.ts32.gs = saved_state->gs & 0xffff; + } else if (is_saved_state64(int_state)) { + x86_saved_state64_t *saved_state = saved_state64(int_state); + + state->tsh.flavor = x86_THREAD_STATE64; + state->tsh.count = x86_THREAD_STATE64_COUNT; + + /* + * General registers. + */ + state->uts.ts64.rax = saved_state->rax; + state->uts.ts64.rbx = saved_state->rbx; + state->uts.ts64.rcx = saved_state->rcx; + state->uts.ts64.rdx = saved_state->rdx; + state->uts.ts64.rdi = saved_state->rdi; + state->uts.ts64.rsi = saved_state->rsi; + state->uts.ts64.rbp = saved_state->rbp; + state->uts.ts64.rsp = saved_state->isf.rsp; + state->uts.ts64.r8 = saved_state->r8; + state->uts.ts64.r9 = saved_state->r9; + state->uts.ts64.r10 = saved_state->r10; + state->uts.ts64.r11 = saved_state->r11; + state->uts.ts64.r12 = saved_state->r12; + state->uts.ts64.r13 = saved_state->r13; + state->uts.ts64.r14 = saved_state->r14; + state->uts.ts64.r15 = saved_state->r15; + + state->uts.ts64.rip = saved_state->isf.rip; + state->uts.ts64.rflags = saved_state->isf.rflags; + state->uts.ts64.cs = saved_state->isf.cs; + state->uts.ts64.fs = saved_state->fs & 0xffff; + state->uts.ts64.gs = saved_state->gs & 0xffff; + } else { + panic("unknown thread state"); + } + + *count = x86_THREAD_STATE_COUNT; + return KERN_SUCCESS; + } + } + return KERN_FAILURE; } + /* * Initialize the machine-dependent state for a new thread. */ kern_return_t -thread_machine_create(thread_t thread, thread_act_t thr_act, void (*start_pos)(thread_t)) +machine_thread_create( + thread_t thread, + task_t task) { - MachineThrAct_t mact = &thr_act->mact; + pcb_t pcb = &thread->machine.xxx_pcb; + x86_saved_state_t *iss; -#if MACH_ASSERT - if (watchacts & WA_PCB) - printf("thread_machine_create(thr=%x,thr_act=%x,st=%x)\n", - thread, thr_act, start_pos); -#endif /* MACH_ASSERT */ +#if NCOPY_WINDOWS > 0 + inval_copy_windows(thread); - assert(thread != NULL); - assert(thr_act != NULL); + thread->machine.physwindow_pte = 0; + thread->machine.physwindow_busy = 0; +#endif /* - * Allocate a kernel stack per shuttle + * Allocate pcb only if required. */ - thread->kernel_stack = (int)stack_alloc(thread,start_pos); - thread->state &= ~TH_STACK_HANDOFF; - assert(thread->kernel_stack != 0); + if (pcb->sf == NULL) { + pcb->sf = zalloc(iss_zone); + if (pcb->sf == NULL) + panic("iss_zone"); + } - /* - * Point top of kernel stack to user`s registers. - */ - STACK_IEL(thread->kernel_stack)->saved_state = &mact->pcb->iss; + if (task_has_64BitAddr(task)) { + x86_sframe64_t *sf64; - return(KERN_SUCCESS); -} + sf64 = (x86_sframe64_t *) pcb->sf; -/* - * Machine-dependent cleanup prior to destroying a thread - */ -void -thread_machine_destroy( thread_t thread ) -{ - spl_t s; + bzero((char *)sf64, sizeof(x86_sframe64_t)); + + iss = (x86_saved_state_t *) &sf64->ssf; + iss->flavor = x86_SAVED_STATE64; + /* + * Guarantee that the bootstrapped thread will be in user + * mode. + */ + iss->ss_64.isf.rflags = EFL_USER_SET; + iss->ss_64.isf.cs = USER64_CS; + iss->ss_64.isf.ss = USER_DS; + iss->ss_64.fs = USER_DS; + iss->ss_64.gs = USER_DS; + } else { + if (cpu_mode_is64bit()) { + x86_sframe_compat32_t *sfc32; + + sfc32 = (x86_sframe_compat32_t *)pcb->sf; + + bzero((char *)sfc32, sizeof(x86_sframe_compat32_t)); + + iss = (x86_saved_state_t *) &sfc32->ssf.iss32; + iss->flavor = x86_SAVED_STATE32; +#if defined(__i386__) +#if DEBUG + { + x86_saved_state_compat32_t *xssc; + + xssc = (x86_saved_state_compat32_t *) iss; + + xssc->pad_for_16byte_alignment[0] = 0x64326432; + xssc->pad_for_16byte_alignment[1] = 0x64326432; + } +#endif /* DEBUG */ + } else { + x86_sframe32_t *sf32; + struct real_descriptor *ldtp; + pmap_paddr_t paddr; + + sf32 = (x86_sframe32_t *) pcb->sf; + + bzero((char *)sf32, sizeof(x86_sframe32_t)); + + iss = (x86_saved_state_t *) &sf32->ssf; + iss->flavor = x86_SAVED_STATE32; + pcb->iss_pte0 = pte_kernel_rw(kvtophys((vm_offset_t)iss)); + if (0 == (paddr = pa_to_pte(kvtophys((vm_offset_t)iss + PAGE_SIZE)))) + pcb->iss_pte1 = INTEL_PTE_INVALID; + else + pcb->iss_pte1 = pte_kernel_rw(paddr); + + + ldtp = (struct real_descriptor *) + pmap_index_to_virt(HIGH_FIXED_LDT_BEGIN); + pcb->cthread_desc = ldtp[sel_idx(USER_DS)]; + pcb->uldt_desc = ldtp[sel_idx(USER_DS)]; +#endif /* __i386__ */ + } + /* + * Guarantee that the bootstrapped thread will be in user + * mode. + */ + iss->ss_32.cs = USER_CS; + iss->ss_32.ss = USER_DS; + iss->ss_32.ds = USER_DS; + iss->ss_32.es = USER_DS; + iss->ss_32.fs = USER_DS; + iss->ss_32.gs = USER_DS; + iss->ss_32.efl = EFL_USER_SET; - if (thread->kernel_stack != 0) { - s = splsched(); - stack_free(thread); - splx(s); } -} + pcb->iss = iss; -/* - * This is used to set the current thr_act/thread - * when starting up a new processor - */ -void -thread_machine_set_current( thread_t thread ) -{ - register int my_cpu; + thread->machine.pcb = pcb; + simple_lock_init(&pcb->lock, 0); - mp_disable_preemption(); - my_cpu = cpu_number(); + pcb->arg_store_valid = 0; + pcb->cthread_self = 0; + pcb->uldt_selector = 0; - cpu_data[my_cpu].active_thread = thread; - active_kloaded[my_cpu] = - thread->top_act->kernel_loaded ? thread->top_act : THR_ACT_NULL; - mp_enable_preemption(); + return(KERN_SUCCESS); } - /* - * Pool of kernel activations. + * Machine-dependent cleanup prior to destroying a thread */ - -void act_machine_init() +void +machine_thread_destroy( + thread_t thread) { - int i; - thread_act_t thr_act; + register pcb_t pcb = thread->machine.pcb; -#if MACH_ASSERT - if (watchacts & WA_PCB) - printf("act_machine_init()\n"); -#endif /* MACH_ASSERT */ + assert(pcb); + + if (pcb->ifps != 0) + fpu_free(pcb->ifps); + if (pcb->sf != 0) { + zfree(iss_zone, pcb->sf); + pcb->sf = 0; + } + if (pcb->ids) { + zfree(ids_zone, pcb->ids); + pcb->ids = NULL; + } + thread->machine.pcb = (pcb_t)0; - /* Good to verify this once */ - assert( THREAD_MACHINE_STATE_MAX <= THREAD_STATE_MAX ); } -kern_return_t -act_machine_create(task_t task, thread_act_t thr_act) +void +machine_thread_switch_addrmode(thread_t thread) { - MachineThrAct_t mact = &thr_act->mact; - pcb_t pcb; - -#if MACH_ASSERT - if (watchacts & WA_PCB) - printf("act_machine_create(task=%x,thr_act=%x) pcb=%x\n", - task,thr_act, &mact->xxx_pcb); -#endif /* MACH_ASSERT */ - /* - * Clear & Init the pcb (sets up user-mode s regs) + * We don't want to be preempted until we're done + * - particularly if we're switching the current thread */ - pcb_init(thr_act); + disable_preemption(); - return KERN_SUCCESS; + /* + * Reset the state saveareas. + */ + machine_thread_create(thread, thread->task); + + /* If we're switching ourselves, reset the pcb addresses etc. */ + if (thread == current_thread()) { +#if defined(__i386__) + if (current_cpu_datap()->cpu_active_cr3 != kernel_pmap->pm_cr3) + pmap_load_kernel_cr3(); +#endif /* defined(__i386) */ + act_machine_switch_pcb(thread); + } + enable_preemption(); } + + +/* + * This is used to set the current thr_act/thread + * when starting up a new processor + */ void -act_virtual_machine_destroy(thread_act_t thr_act) +machine_set_current_thread(thread_t thread) { - return; + current_cpu_datap()->cpu_active_thread = thread; } +/* + * This is called when a task is terminated, and also on exec(). + * Clear machine-dependent state that is stored on the task. + */ void -act_machine_destroy(thread_act_t thr_act) +machine_thread_terminate_self(void) { + task_t self_task = current_task(); + if (self_task) { + user_ldt_t user_ldt = self_task->i386_ldt; + if (user_ldt != 0) { + self_task->i386_ldt = 0; + user_ldt_free(user_ldt); + } -#if MACH_ASSERT - if (watchacts & WA_PCB) - printf("act_machine_destroy(0x%x)\n", thr_act); -#endif /* MACH_ASSERT */ - - pcb_terminate(thr_act); + if (self_task->task_debug != NULL) { + zfree(ids_zone, self_task->task_debug); + self_task->task_debug = NULL; + } + } } void -act_machine_return(int code) +act_machine_return( + int code + ) { - thread_act_t thr_act = current_act(); - -#if MACH_ASSERT - /* - * We don't go through the locking dance here needed to - * acquire thr_act->thread safely. - */ - - if (watchacts & WA_EXIT) - printf("act_machine_return(0x%x) cur_act=%x(%d) thr=%x(%d)\n", - code, thr_act, thr_act->ref_count, - thr_act->thread, thr_act->thread->ref_count); -#endif /* MACH_ASSERT */ - /* * This code is called with nothing locked. * It also returns with nothing locked, if it returns. @@ -1197,17 +2191,12 @@ act_machine_return(int code) * activation) is terminated. */ assert( code == KERN_TERMINATED ); - assert( thr_act ); - /* This is the only activation attached to the shuttle... */ - /* terminate the entire thread (shuttle plus activation) */ - - assert(thr_act->thread->top_act == thr_act); thread_terminate_self(); /*NOTREACHED*/ - panic("act_machine_return: TALKING ZOMBIE! (1)"); + panic("act_machine_return(%d): TALKING ZOMBIE! (1)", code); } @@ -1215,122 +2204,168 @@ act_machine_return(int code) * Perform machine-dependent per-thread initializations */ void -thread_machine_init(void) +machine_thread_init(void) { - pcb_module_init(); + if (cpu_mode_is64bit()) { + assert(sizeof(x86_sframe_compat32_t) % 16 == 0); + iss_zone = zinit(sizeof(x86_sframe64_t), + thread_max * sizeof(x86_sframe64_t), + THREAD_CHUNK * sizeof(x86_sframe64_t), + "x86_64 saved state"); + + ids_zone = zinit(sizeof(x86_debug_state64_t), + thread_max * sizeof(x86_debug_state64_t), + THREAD_CHUNK * sizeof(x86_debug_state64_t), + "x86_64 debug state"); + + } else { + iss_zone = zinit(sizeof(x86_sframe32_t), + thread_max * sizeof(x86_sframe32_t), + THREAD_CHUNK * sizeof(x86_sframe32_t), + "x86 saved state"); + ids_zone = zinit(sizeof(x86_debug_state32_t), + thread_max * (sizeof(x86_debug_state32_t)), + THREAD_CHUNK * (sizeof(x86_debug_state32_t)), + "x86 debug state"); + } + fpu_module_init(); } + +#if defined(__i386__) /* * Some routines for debugging activation code */ -static void dump_handlers(thread_act_t); -void dump_regs(thread_act_t); +static void dump_handlers(thread_t); +void dump_regs(thread_t); +int dump_act(thread_t thr_act); static void -dump_handlers(thread_act_t thr_act) +dump_handlers(thread_t thr_act) { - ReturnHandler *rhp = thr_act->handlers; - int counter = 0; - - printf("\t"); - while (rhp) { - if (rhp == &thr_act->special_handler){ - if (rhp->next) - printf("[NON-Zero next ptr(%x)]", rhp->next); - printf("special_handler()->"); - break; - } - printf("hdlr_%d(%x)->",counter,rhp->handler); - rhp = rhp->next; - if (++counter > 32) { - printf("Aborting: HUGE handler chain\n"); - break; + ReturnHandler *rhp = thr_act->handlers; + int counter = 0; + + printf("\t"); + while (rhp) { + if (rhp == &thr_act->special_handler){ + if (rhp->next) + printf("[NON-Zero next ptr(%p)]", rhp->next); + printf("special_handler()->"); + break; + } + printf("hdlr_%d(%p)->", counter, rhp->handler); + rhp = rhp->next; + if (++counter > 32) { + printf("Aborting: HUGE handler chain\n"); + break; + } } - } - printf("HLDR_NULL\n"); + printf("HLDR_NULL\n"); } void -dump_regs(thread_act_t thr_act) +dump_regs(thread_t thr_act) { - if (thr_act->mact.pcb) { - register struct i386_saved_state *ssp = USER_REGS(thr_act); - /* Print out user register state */ + if (thr_act->machine.pcb == NULL) + return; + + if (thread_is_64bit(thr_act)) { + x86_saved_state64_t *ssp; + + ssp = USER_REGS64(thr_act); + + panic("dump_regs: 64bit tasks not yet supported"); + + } else { + x86_saved_state32_t *ssp; + + ssp = USER_REGS32(thr_act); + + /* + * Print out user register state + */ printf("\tRegs:\tedi=%x esi=%x ebp=%x ebx=%x edx=%x\n", - ssp->edi, ssp->esi, ssp->ebp, ssp->ebx, ssp->edx); + ssp->edi, ssp->esi, ssp->ebp, ssp->ebx, ssp->edx); + printf("\t\tecx=%x eax=%x eip=%x efl=%x uesp=%x\n", - ssp->ecx, ssp->eax, ssp->eip, ssp->efl, ssp->uesp); + ssp->ecx, ssp->eax, ssp->eip, ssp->efl, ssp->uesp); + printf("\t\tcs=%x ss=%x\n", ssp->cs, ssp->ss); } } int -dump_act(thread_act_t thr_act) +dump_act(thread_t thr_act) { if (!thr_act) return(0); - printf("thr_act(0x%x)(%d): thread=%x(%d) task=%x(%d)\n", - thr_act, thr_act->ref_count, - thr_act->thread, thr_act->thread ? thr_act->thread->ref_count:0, - thr_act->task, thr_act->task ? thr_act->task->ref_count : 0); + printf("thread(%p)(%d): task=%p(%d)\n", + thr_act, thr_act->ref_count, + thr_act->task, + thr_act->task ? thr_act->task->ref_count : 0); - printf("\talerts=%x mask=%x susp=%d user_stop=%d active=%x ast=%x\n", - thr_act->alerts, thr_act->alert_mask, - thr_act->suspend_count, thr_act->user_stop_count, - thr_act->active, thr_act->ast); - printf("\thi=%x lo=%x\n", thr_act->higher, thr_act->lower); - printf("\tpcb=%x\n", thr_act->mact.pcb); + printf("\tsusp=%d user_stop=%d active=%x ast=%x\n", + thr_act->suspend_count, thr_act->user_stop_count, + thr_act->active, thr_act->ast); + printf("\tpcb=%p\n", thr_act->machine.pcb); - if (thr_act->thread && thr_act->thread->kernel_stack) { - vm_offset_t stack = thr_act->thread->kernel_stack; + if (thr_act->kernel_stack) { + vm_offset_t stack = thr_act->kernel_stack; - printf("\tk_stk %x eip %x ebx %x esp %x iss %x\n", - stack, STACK_IKS(stack)->k_eip, STACK_IKS(stack)->k_ebx, - STACK_IKS(stack)->k_esp, STACK_IEL(stack)->saved_state); + printf("\tk_stk %lx eip %x ebx %x esp %x iss %p\n", + (long)stack, STACK_IKS(stack)->k_eip, STACK_IKS(stack)->k_ebx, + STACK_IKS(stack)->k_esp, STACK_IEL(stack)->saved_state); } dump_handlers(thr_act); dump_regs(thr_act); return((int)thr_act); } -unsigned int -get_useraddr() +#endif + +user_addr_t +get_useraddr(void) { - - thread_act_t thr_act = current_act(); + thread_t thr_act = current_thread(); - if (thr_act->mact.pcb) - return(thr_act->mact.pcb->iss.eip); - else + if (thr_act->machine.pcb == NULL) return(0); -} + if (thread_is_64bit(thr_act)) { + x86_saved_state64_t *iss64; + + iss64 = USER_REGS64(thr_act); -void -thread_swapin_mach_alloc(thread_t thread) -{ + return(iss64->isf.rip); + } else { + x86_saved_state32_t *iss32; - /* 386 does not have saveareas */ + iss32 = USER_REGS32(thr_act); + return(iss32->eip); + } } + /* * detach and return a kernel stack from a thread */ vm_offset_t -stack_detach(thread_t thread) +machine_stack_detach(thread_t thread) { - vm_offset_t stack; + vm_offset_t stack; - KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_SCHED,MACH_STACK_DETACH), - thread, thread->priority, - thread->sched_pri, 0, - 0); + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_STACK_DETACH), + (uintptr_t)thread_tid(thread), thread->priority, + thread->sched_pri, 0, + 0); + + stack = thread->kernel_stack; + thread->kernel_stack = 0; - stack = thread->kernel_stack; - thread->kernel_stack = 0; - return(stack); + return (stack); } /* @@ -1338,28 +2373,31 @@ stack_detach(thread_t thread) */ void -stack_attach(struct thread_shuttle *thread, - vm_offset_t stack, - void (*start_pos)(thread_t)) +machine_stack_attach( + thread_t thread, + vm_offset_t stack) { - struct i386_kernel_state *statep; + struct x86_kernel_state *statep; - KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_SCHED,MACH_STACK_ATTACH), - thread, thread->priority, - thread->sched_pri, continuation, - 0); + KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_SCHED, MACH_STACK_ATTACH), + (uintptr_t)thread_tid(thread), thread->priority, + thread->sched_pri, 0, 0); - assert(stack); - statep = STACK_IKS(stack); - thread->kernel_stack = stack; + assert(stack); + thread->kernel_stack = stack; - statep->k_eip = (unsigned long) Thread_continue; - statep->k_ebx = (unsigned long) start_pos; - statep->k_esp = (unsigned long) STACK_IEL(stack); - assert(thread->top_act); - STACK_IEL(stack)->saved_state = &thread->top_act->mact.pcb->iss; + statep = STACK_IKS(stack); +#if defined(__x86_64__) + statep->k_rip = (unsigned long) Thread_continue; + statep->k_rbx = (unsigned long) thread_continue; + statep->k_rsp = (unsigned long) STACK_IEL(stack); +#else + statep->k_eip = (unsigned long) Thread_continue; + statep->k_ebx = (unsigned long) thread_continue; + statep->k_esp = (unsigned long) STACK_IEL(stack); +#endif - return; + return; } /* @@ -1367,94 +2405,245 @@ stack_attach(struct thread_shuttle *thread, */ void -stack_handoff(thread_t old, +machine_stack_handoff(thread_t old, thread_t new) { + vm_offset_t stack; - vm_offset_t stack; - pmap_t new_pmap; + assert(new); + assert(old); - KERNEL_DEBUG(MACHDBG_CODE(DBG_MACH_SCHED,MACH_STACK_HANDOFF), - thread, thread->priority, - thread->sched_pri, continuation, - 0); - - assert(new->top_act); - assert(old->top_act); +#if CONFIG_COUNTERS + machine_pmc_cswitch(old, new); +#endif - stack = stack_detach(old); - stack_attach(new, stack, 0); + stack = old->kernel_stack; + if (stack == old->reserved_stack) { + assert(new->reserved_stack); + old->reserved_stack = new->reserved_stack; + new->reserved_stack = stack; + } + old->kernel_stack = 0; + /* + * A full call to machine_stack_attach() is unnecessry + * because old stack is already initialized. + */ + new->kernel_stack = stack; - new_pmap = new->top_act->task->map->pmap; - if (old->top_act->task->map->pmap != new_pmap) - PMAP_ACTIVATE_MAP(new->top_act->task->map, cpu_number()); + fpu_save_context(old); + - KERNEL_DEBUG_CONSTANT(MACHDBG_CODE(DBG_MACH_SCHED,MACH_STACK_HANDOFF) | DBG_FUNC_NONE, - (int)old, (int)new, old->sched_pri, new->sched_pri, 0); + old->machine.specFlags &= ~OnProc; + new->machine.specFlags |= OnProc; - thread_machine_set_current(new); + PMAP_SWITCH_CONTEXT(old, new, cpu_number()); + act_machine_switch_pcb(new); - active_stacks[cpu_number()] = new->kernel_stack; + machine_set_current_thread(new); - return; + return; } -struct i386_act_context { - struct i386_saved_state ss; - struct i386_float_state fs; + + + +struct x86_act_context32 { + x86_saved_state32_t ss; + x86_float_state32_t fs; + x86_debug_state32_t ds; +}; + +struct x86_act_context64 { + x86_saved_state64_t ss; + x86_float_state64_t fs; + x86_debug_state64_t ds; }; + + void * act_thread_csave(void) { -struct i386_act_context *ic; -kern_return_t kret; -int val; + kern_return_t kret; + mach_msg_type_number_t val; + thread_t thr_act = current_thread(); + + if (thread_is_64bit(thr_act)) { + struct x86_act_context64 *ic64; + + ic64 = (struct x86_act_context64 *)kalloc(sizeof(struct x86_act_context64)); + + if (ic64 == (struct x86_act_context64 *)NULL) + return((void *)0); + + val = x86_SAVED_STATE64_COUNT; + kret = machine_thread_get_state(thr_act, x86_SAVED_STATE64, + (thread_state_t) &ic64->ss, &val); + if (kret != KERN_SUCCESS) { + kfree(ic64, sizeof(struct x86_act_context64)); + return((void *)0); + } + val = x86_FLOAT_STATE64_COUNT; + kret = machine_thread_get_state(thr_act, x86_FLOAT_STATE64, + (thread_state_t) &ic64->fs, &val); + + if (kret != KERN_SUCCESS) { + kfree(ic64, sizeof(struct x86_act_context64)); + return((void *)0); + } + + val = x86_DEBUG_STATE64_COUNT; + kret = machine_thread_get_state(thr_act, + x86_DEBUG_STATE64, + (thread_state_t)&ic64->ds, + &val); + if (kret != KERN_SUCCESS) { + kfree(ic64, sizeof(struct x86_act_context64)); + return((void *)0); + } + return(ic64); + + } else { + struct x86_act_context32 *ic32; - ic = (struct i386_act_context *)kalloc(sizeof(struct i386_act_context)); + ic32 = (struct x86_act_context32 *)kalloc(sizeof(struct x86_act_context32)); - if (ic == (struct i386_act_context *)NULL) - return((void *)0); + if (ic32 == (struct x86_act_context32 *)NULL) + return((void *)0); - val = i386_SAVED_STATE_COUNT; - kret = act_machine_get_state(current_act(), i386_SAVED_STATE, &ic->ss, &val); + val = x86_SAVED_STATE32_COUNT; + kret = machine_thread_get_state(thr_act, x86_SAVED_STATE32, + (thread_state_t) &ic32->ss, &val); if (kret != KERN_SUCCESS) { - kfree((vm_offset_t)ic,sizeof(struct i386_act_context)); - return((void *)0); + kfree(ic32, sizeof(struct x86_act_context32)); + return((void *)0); } - val = i386_FLOAT_STATE_COUNT; - kret = act_machine_get_state(current_act(), i386_FLOAT_STATE, &ic->fs, &val); + val = x86_FLOAT_STATE32_COUNT; + kret = machine_thread_get_state(thr_act, x86_FLOAT_STATE32, + (thread_state_t) &ic32->fs, &val); if (kret != KERN_SUCCESS) { - kfree((vm_offset_t)ic,sizeof(struct i386_act_context)); - return((void *)0); + kfree(ic32, sizeof(struct x86_act_context32)); + return((void *)0); } - return(ic); + + val = x86_DEBUG_STATE32_COUNT; + kret = machine_thread_get_state(thr_act, + x86_DEBUG_STATE32, + (thread_state_t)&ic32->ds, + &val); + if (kret != KERN_SUCCESS) { + kfree(ic32, sizeof(struct x86_act_context32)); + return((void *)0); + } + return(ic32); + } } + + void act_thread_catt(void *ctx) { -struct i386_act_context *ic; -kern_return_t kret; -int val; + thread_t thr_act = current_thread(); + kern_return_t kret; - ic = (struct i386_act_context *)ctx; - - if (ic == (struct i386_act_context *)NULL) + if (ctx == (void *)NULL) return; - kret = act_machine_set_state(current_act(), i386_SAVED_STATE, &ic->ss, i386_SAVED_STATE_COUNT); - if (kret != KERN_SUCCESS) - goto out; + if (thread_is_64bit(thr_act)) { + struct x86_act_context64 *ic64; + + ic64 = (struct x86_act_context64 *)ctx; + + kret = machine_thread_set_state(thr_act, x86_SAVED_STATE64, + (thread_state_t) &ic64->ss, x86_SAVED_STATE64_COUNT); + if (kret == KERN_SUCCESS) { + machine_thread_set_state(thr_act, x86_FLOAT_STATE64, + (thread_state_t) &ic64->fs, x86_FLOAT_STATE64_COUNT); + } + kfree(ic64, sizeof(struct x86_act_context64)); + } else { + struct x86_act_context32 *ic32; + + ic32 = (struct x86_act_context32 *)ctx; + + kret = machine_thread_set_state(thr_act, x86_SAVED_STATE32, + (thread_state_t) &ic32->ss, x86_SAVED_STATE32_COUNT); + if (kret == KERN_SUCCESS) { + kret = machine_thread_set_state(thr_act, x86_FLOAT_STATE32, + (thread_state_t) &ic32->fs, x86_FLOAT_STATE32_COUNT); + if (kret == KERN_SUCCESS && thr_act->machine.pcb->ids) + machine_thread_set_state(thr_act, + x86_DEBUG_STATE32, + (thread_state_t)&ic32->ds, + x86_DEBUG_STATE32_COUNT); + } + kfree(ic32, sizeof(struct x86_act_context32)); + } +} + + +void act_thread_cfree(__unused void *ctx) +{ + /* XXX - Unused */ +} +void x86_toggle_sysenter_arg_store(thread_t thread, boolean_t valid); +void x86_toggle_sysenter_arg_store(thread_t thread, boolean_t valid) { + thread->machine.pcb->arg_store_valid = valid; +} + +boolean_t x86_sysenter_arg_store_isvalid(thread_t thread); + +boolean_t x86_sysenter_arg_store_isvalid(thread_t thread) { + return (thread->machine.pcb->arg_store_valid); +} + +/* + * Duplicate one x86_debug_state32_t to another. "all" parameter + * chooses whether dr4 and dr5 are copied (they are never meant + * to be installed when we do machine_task_set_state() or + * machine_thread_set_state()). + */ +void +copy_debug_state32( + x86_debug_state32_t *src, + x86_debug_state32_t *target, + boolean_t all) +{ + if (all) { + target->dr4 = src->dr4; + target->dr5 = src->dr5; + } - kret = act_machine_set_state(current_act(), i386_FLOAT_STATE, &ic->fs, i386_FLOAT_STATE_COUNT); - if (kret != KERN_SUCCESS) - goto out; -out: - kfree((vm_offset_t)ic,sizeof(struct i386_act_context)); + target->dr0 = src->dr0; + target->dr1 = src->dr1; + target->dr2 = src->dr2; + target->dr3 = src->dr3; + target->dr6 = src->dr6; + target->dr7 = src->dr7; } -void act_thread_cfree(void *ctx) +/* + * Duplicate one x86_debug_state64_t to another. "all" parameter + * chooses whether dr4 and dr5 are copied (they are never meant + * to be installed when we do machine_task_set_state() or + * machine_thread_set_state()). + */ +void +copy_debug_state64( + x86_debug_state64_t *src, + x86_debug_state64_t *target, + boolean_t all) { - kfree((vm_offset_t)ctx,sizeof(struct i386_act_context)); + if (all) { + target->dr4 = src->dr4; + target->dr5 = src->dr5; + } + + target->dr0 = src->dr0; + target->dr1 = src->dr1; + target->dr2 = src->dr2; + target->dr3 = src->dr3; + target->dr6 = src->dr6; + target->dr7 = src->dr7; }