]> git.saurik.com Git - apple/xnu.git/blobdiff - osfmk/i386/pcb.c
xnu-4570.41.2.tar.gz
[apple/xnu.git] / osfmk / i386 / pcb.c
index 421cc3f53c1d90212cba2e433f29d6cc79d5a759..5fb9112ea51416f0dadb13a39bcbe4782a9e2a1f 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2016 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -54,7 +54,6 @@
  * the rights to redistribute these changes.
  */
 
-#include <mach_rt.h>
 #include <mach_debug.h>
 #include <mach_ldebug.h>
 
@@ -77,6 +76,7 @@
 #include <kern/assert.h>
 #include <kern/spl.h>
 #include <kern/machine.h>
+#include <kern/kpc.h>
 #include <ipc/ipc_port.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_map.h>
 #include <i386/cpu_number.h>
 #include <i386/eflags.h>
 #include <i386/proc_reg.h>
-#include <i386/tss.h>
-#include <i386/user_ldt.h>
 #include <i386/fpu.h>
-#include <i386/mp_desc.h>
 #include <i386/misc_protos.h>
+#include <i386/mp_desc.h>
 #include <i386/thread.h>
-#if defined(__i386__)
-#include <i386/fpu.h>
-#endif
-#include <i386/seg.h>
 #include <i386/machine_routines.h>
 #include <i386/lapic.h> /* LAPIC_PMC_SWI_VECTOR */
 
-#include <machine/commpage.h>
-
-#if CONFIG_COUNTERS
-#include <pmc/pmc.h>
-#endif /* CONFIG_COUNTERS */
+#if HYPERVISOR
+#include <kern/hv_support.h>
+#endif
 
 /*
  * Maps state flavor to number of words in the state:
  */
 unsigned int _MachineStateCount[] = {
-       /* FLAVOR_LIST */
-        0,
-       x86_THREAD_STATE32_COUNT,
-       x86_FLOAT_STATE32_COUNT,
-       x86_EXCEPTION_STATE32_COUNT,
-       x86_THREAD_STATE64_COUNT,
-       x86_FLOAT_STATE64_COUNT,
-       x86_EXCEPTION_STATE64_COUNT,
-       x86_THREAD_STATE_COUNT,
-       x86_FLOAT_STATE_COUNT,
-       x86_EXCEPTION_STATE_COUNT,
-       0,
-       x86_SAVED_STATE32_COUNT,
-       x86_SAVED_STATE64_COUNT,
-       x86_DEBUG_STATE32_COUNT,
-       x86_DEBUG_STATE64_COUNT,
-       x86_DEBUG_STATE_COUNT
+       [x86_THREAD_STATE32]    = x86_THREAD_STATE32_COUNT,
+       [x86_THREAD_STATE64]    = x86_THREAD_STATE64_COUNT,
+       [x86_THREAD_STATE]      = x86_THREAD_STATE_COUNT,
+       [x86_FLOAT_STATE32]     = x86_FLOAT_STATE32_COUNT,
+       [x86_FLOAT_STATE64]     = x86_FLOAT_STATE64_COUNT,
+       [x86_FLOAT_STATE]       = x86_FLOAT_STATE_COUNT,
+       [x86_EXCEPTION_STATE32] = x86_EXCEPTION_STATE32_COUNT,
+       [x86_EXCEPTION_STATE64] = x86_EXCEPTION_STATE64_COUNT,
+       [x86_EXCEPTION_STATE]   = x86_EXCEPTION_STATE_COUNT,
+       [x86_DEBUG_STATE32]     = x86_DEBUG_STATE32_COUNT,
+       [x86_DEBUG_STATE64]     = x86_DEBUG_STATE64_COUNT,
+       [x86_DEBUG_STATE]       = x86_DEBUG_STATE_COUNT,
+       [x86_AVX_STATE32]       = x86_AVX_STATE32_COUNT,
+       [x86_AVX_STATE64]       = x86_AVX_STATE64_COUNT,
+       [x86_AVX_STATE]         = x86_AVX_STATE_COUNT,
+#if !defined(RC_HIDE_XNU_J137)
+       [x86_AVX512_STATE32]    = x86_AVX512_STATE32_COUNT,
+       [x86_AVX512_STATE64]    = x86_AVX512_STATE64_COUNT,
+       [x86_AVX512_STATE]      = x86_AVX512_STATE_COUNT,
+#endif /* not RC_HIDE_XNU_J137 */
 };
 
 zone_t         iss_zone;               /* zone for saved_state area */
@@ -134,12 +129,9 @@ zone_t             ids_zone;               /* zone for debug_state area */
 
 /* Forward */
 
-void           act_machine_throughcall(thread_t thr_act);
-void           act_machine_return(int);
-
 extern void            Thread_continue(void);
 extern void            Load_context(
-                               thread_t                        thread);
+                               thread_t                        thread) __attribute__((noreturn));
 
 static void
 get_exception_state32(thread_t thread, x86_exception_state32_t *es);
@@ -159,57 +151,26 @@ set_thread_state32(thread_t thread, x86_thread_state32_t *ts);
 static int
 set_thread_state64(thread_t thread, x86_thread_state64_t *ts);
 
-#if CONFIG_COUNTERS
+#if HYPERVISOR
 static inline void
-machine_pmc_cswitch(thread_t /* old */, thread_t /* new */);
-
-static inline boolean_t
-machine_thread_pmc_eligible(thread_t);
-
-static inline void
-pmc_swi(thread_t /* old */, thread_t /*new */);
-
-static inline boolean_t
-machine_thread_pmc_eligible(thread_t t) {
-       /*
-        * NOTE: Task-level reservations are propagated to child threads via
-        * thread_create_internal.  Any mutation of task reservations forces a
-        * recalculate of t_chud (for the pmc flag) for all threads in that task.
-        * Consequently, we can simply check the current thread's flag against
-        * THREAD_PMC_FLAG.  If the result is non-zero, we SWI for a PMC switch.
-        */
-       return (t != NULL) ? ((t->t_chud & THREAD_PMC_FLAG) ? TRUE : FALSE) : FALSE;
-}
-
-static inline void
-pmc_swi(thread_t old, thread_t new) {
-       current_cpu_datap()->csw_old_thread = old;
-       current_cpu_datap()->csw_new_thread = new;
-       __asm__ __volatile__("int %0"::"i"(LAPIC_PMC_SWI_VECTOR):"memory");
-}
-
-static inline void
-machine_pmc_cswitch(thread_t old, thread_t new) {
-       if (machine_thread_pmc_eligible(old) || machine_thread_pmc_eligible(new)) {
-               pmc_swi(old, new);
-       }
-}
+ml_hv_cswitch(thread_t old, thread_t new)
+{
+       if (old->hv_thread_target)
+               hv_callbacks.preempt(old->hv_thread_target);
 
-void ml_get_csw_threads(thread_t *old, thread_t *new) {
-       *old = current_cpu_datap()->csw_old_thread;
-       *new = current_cpu_datap()->csw_new_thread;
+       if (new->hv_thread_target)
+               hv_callbacks.dispatch(new->hv_thread_target);   
 }
-
-#endif /* CONFIG_COUNTERS */
+#endif
 
 /*
- * Don't let an illegal value for dr7 get set. Specifically,
- * check for undefined settings.  Setting these bit patterns
+ * Don't let an illegal value for the lower 32-bits of dr7 get set.
+ * Specifically, check for undefined settings.  Setting these bit patterns
  * result in undefined behaviour and can lead to an unexpected
  * TRCTRAP.
  */
 static boolean_t
-dr7_is_valid(uint32_t *dr7)
+dr7d_is_valid(uint32_t *dr7d)
 {
        int i;
        uint32_t mask1, mask2;
@@ -221,20 +182,7 @@ dr7_is_valid(uint32_t *dr7)
        if (!(get_cr4() & CR4_DE))
                for (i = 0, mask1 = 0x3<<16, mask2 = 0x2<<16; i < 4; 
                                i++, mask1 <<= 4, mask2 <<= 4)
-                       if ((*dr7 & mask1) == mask2)
-                               return (FALSE);
-
-       /*
-        * len0-3 pattern "10B" is ok for len on Merom and newer processors
-        * (it signifies an 8-byte wide region). We use the 64bit capability
-        * of the processor in lieu of the more laborious model/family checks
-        * as all 64-bit capable processors so far support this.
-        * Reject an attempt to use this on 64-bit incapable processors.
-        */
-       if (current_cpu_datap()->cpu_is64bit == FALSE)
-               for (i = 0, mask1 = 0x3<<18, mask2 = 0x2<<18; i < 4; 
-                               i++, mask1 <<= 4, mask2 <<= 4)
-                       if ((*dr7 & mask1) == mask2)
+                       if ((*dr7d & mask1) == mask2)
                                return (FALSE);
 
        /*
@@ -243,97 +191,53 @@ dr7_is_valid(uint32_t *dr7)
         * to "00B"
         */
        for (i = 0; i < 4; i++)
-               if (((((*dr7 >> (16 + i*4))) & 0x3) == 0) &&
-                               ((((*dr7 >> (18 + i*4))) & 0x3) != 0))
+               if (((((*dr7d >> (16 + i*4))) & 0x3) == 0) &&
+                               ((((*dr7d >> (18 + i*4))) & 0x3) != 0))
                        return (FALSE);
 
        /*
         * Intel docs have these bits fixed.
         */
-       *dr7 |= 0x1 << 10; /* set bit 10 to 1 */
-       *dr7 &= ~(0x1 << 11); /* set bit 11 to 0 */
-       *dr7 &= ~(0x1 << 12); /* set bit 12 to 0 */
-       *dr7 &= ~(0x1 << 14); /* set bit 14 to 0 */
-       *dr7 &= ~(0x1 << 15); /* set bit 15 to 0 */
+       *dr7d |= 0x1 << 10; /* set bit 10 to 1 */
+       *dr7d &= ~(0x1 << 11); /* set bit 11 to 0 */
+       *dr7d &= ~(0x1 << 12); /* set bit 12 to 0 */
+       *dr7d &= ~(0x1 << 14); /* set bit 14 to 0 */
+       *dr7d &= ~(0x1 << 15); /* set bit 15 to 0 */
 
        /*
         * We don't allow anything to set the global breakpoints.
         */
 
-       if (*dr7 & 0x2)
+       if (*dr7d & 0x2)
                return (FALSE);
 
-       if (*dr7 & (0x2<<2))
+       if (*dr7d & (0x2<<2))
                return (FALSE);
 
-       if (*dr7 & (0x2<<4))
+       if (*dr7d & (0x2<<4))
                return (FALSE);
 
-       if (*dr7 & (0x2<<6))
+       if (*dr7d & (0x2<<6))
                return (FALSE);
 
        return (TRUE);
 }
 
-static inline void
-set_live_debug_state32(cpu_data_t *cdp, x86_debug_state32_t *ds)
-{
-       __asm__ volatile ("movl %0,%%db0" : :"r" (ds->dr0));
-       __asm__ volatile ("movl %0,%%db1" : :"r" (ds->dr1));
-       __asm__ volatile ("movl %0,%%db2" : :"r" (ds->dr2));
-       __asm__ volatile ("movl %0,%%db3" : :"r" (ds->dr3));
-       if (cpu_mode_is64bit())
-               cdp->cpu_dr7 = ds->dr7;
-}
-
 extern void set_64bit_debug_regs(x86_debug_state64_t *ds);
 
-static inline void
-set_live_debug_state64(cpu_data_t *cdp, x86_debug_state64_t *ds)
-{
-       /*
-        * We need to enter 64-bit mode in order to set the full
-        * width of these registers
-        */
-       set_64bit_debug_regs(ds);
-       cdp->cpu_dr7 = ds->dr7;
-}
-
 boolean_t
 debug_state_is_valid32(x86_debug_state32_t *ds) 
 {
-       if (!dr7_is_valid(&ds->dr7))
+       if (!dr7d_is_valid(&ds->dr7))
                return FALSE;
 
-#if defined(__i386__)
-       /*
-        * Only allow local breakpoints and make sure they are not
-        * in the trampoline code.
-        */
-       if (ds->dr7 & 0x1)
-               if (ds->dr0 >= (unsigned long)HIGH_MEM_BASE)
-                       return FALSE;
-
-       if (ds->dr7 & (0x1<<2))
-               if (ds->dr1 >= (unsigned long)HIGH_MEM_BASE)
-                       return FALSE;
-
-       if (ds->dr7 & (0x1<<4))
-               if (ds->dr2 >= (unsigned long)HIGH_MEM_BASE)
-                       return FALSE;
-
-       if (ds->dr7 & (0x1<<6))
-               if (ds->dr3 >= (unsigned long)HIGH_MEM_BASE)
-                       return FALSE;
-#endif
-
        return TRUE;
 }
 
 boolean_t
 debug_state_is_valid64(x86_debug_state64_t *ds)
 {
-       if (!dr7_is_valid((uint32_t *)&ds->dr7))
+       if (!dr7d_is_valid((uint32_t *)&ds->dr7))
                return FALSE;
 
        /*
@@ -356,6 +260,9 @@ debug_state_is_valid64(x86_debug_state64_t *ds)
                if (ds->dr3 >= VM_MAX_PAGE_ADDRESS)
                        return FALSE;
 
+       /* For x86-64, we must ensure the upper 32-bits of DR7 are clear */
+       ds->dr7 &= 0xffffffffULL;
+
        return TRUE;
 }
 
@@ -366,7 +273,7 @@ set_debug_state32(thread_t thread, x86_debug_state32_t *ds)
        x86_debug_state32_t *ids;
        pcb_t pcb;
 
-       pcb = thread->machine.pcb;
+       pcb = THREAD_TO_PCB(thread);
        ids = pcb->ids;
 
        if (debug_state_is_valid32(ds) != TRUE) {
@@ -400,7 +307,7 @@ set_debug_state64(thread_t thread, x86_debug_state64_t *ds)
        x86_debug_state64_t *ids;
        pcb_t pcb;
 
-       pcb = thread->machine.pcb;
+       pcb = THREAD_TO_PCB(thread);
        ids = pcb->ids;
 
        if (debug_state_is_valid64(ds) != TRUE) {
@@ -411,6 +318,13 @@ set_debug_state64(thread_t thread, x86_debug_state64_t *ds)
                ids = zalloc(ids_zone);
                bzero(ids, sizeof *ids);
 
+#if HYPERVISOR
+               if (thread->hv_thread_target) {
+                       hv_callbacks.volatile_state(thread->hv_thread_target,
+                               HV_DEBUG_STATE);
+               }
+#endif
+
                simple_lock(&pcb->lock);
                /* make sure it wasn't already alloc()'d elsewhere */
                if (pcb->ids == NULL) {
@@ -432,7 +346,7 @@ get_debug_state32(thread_t thread, x86_debug_state32_t *ds)
 {
        x86_debug_state32_t *saved_state;
 
-       saved_state = thread->machine.pcb->ids;
+       saved_state = thread->machine.ids;
 
        if (saved_state) {
                copy_debug_state32(saved_state, ds, TRUE);
@@ -445,7 +359,7 @@ get_debug_state64(thread_t thread, x86_debug_state64_t *ds)
 {
        x86_debug_state64_t *saved_state;
 
-       saved_state = (x86_debug_state64_t *)thread->machine.pcb->ids;
+       saved_state = (x86_debug_state64_t *)thread->machine.ids;
 
        if (saved_state) {
                copy_debug_state64(saved_state, ds, TRUE);
@@ -467,318 +381,6 @@ void
 consider_machine_adjust(void)
 {
 }
-extern void *get_bsduthreadarg(thread_t th);
-
-#if defined(__x86_64__)
-static void
-act_machine_switch_pcb( thread_t new )
-{
-        pcb_t                  pcb = new->machine.pcb;
-       struct real_descriptor  *ldtp;
-       mach_vm_offset_t        pcb_stack_top;
-       cpu_data_t              *cdp = current_cpu_datap();
-
-       assert(new->kernel_stack != 0);
-
-       if (!cpu_mode_is64bit()) {
-               panic("K64 is 64bit!");
-       } else if (is_saved_state64(pcb->iss)) {
-               /*
-                * The test above is performed against the thread save state
-                * flavor and not task's 64-bit feature flag because of the
-                * thread/task 64-bit state divergence that can arise in
-                * task_set_64bit() x86: the task state is changed before
-                * the individual thread(s).
-                */
-               x86_saved_state64_tagged_t      *iss64;
-               vm_offset_t                     isf;
-
-               assert(is_saved_state64(pcb->iss));
-                                                  
-               iss64 = (x86_saved_state64_tagged_t *) pcb->iss;
-       
-               /*
-                * Set pointer to PCB's interrupt stack frame in cpu data.
-                * Used by syscall and double-fault trap handlers.
-                */
-               isf = (vm_offset_t) &iss64->state.isf;
-               cdp->cpu_uber.cu_isf = isf;
-               pcb_stack_top = (vm_offset_t) (iss64 + 1);
-               /* require 16-byte alignment */
-               assert((pcb_stack_top & 0xF) == 0);
-
-               /* Interrupt stack is pcb */
-               current_ktss64()->rsp0 = pcb_stack_top;
-
-               /*
-                * Top of temporary sysenter stack points to pcb stack.
-                * Although this is not normally used by 64-bit users,
-                * it needs to be set in case a sysenter is attempted.
-                */
-               *current_sstk64() = pcb_stack_top;
-
-               cdp->cpu_task_map = new->map->pmap->pm_task_map; 
-
-               /*
-                * Enable the 64-bit user code segment, USER64_CS.
-                * Disable the 32-bit user code segment, USER_CS.
-                */
-               ldt_desc_p(USER64_CS)->access |= ACC_PL_U;
-               ldt_desc_p(USER_CS)->access &= ~ACC_PL_U;
-
-               /*
-                * Switch user's GS base if necessary
-                * by setting the Kernel GS base MSR
-                * - this will become the user's on the swapgs when
-                * returning to user-space. Avoid this for
-                * kernel threads (no user TLS support required)
-                * and verify the memory shadow of the segment base
-                * in the event it was altered in user space.
-                */
-               if ((pcb->cthread_self != 0) || (new->task != kernel_task)) {
-                       if ((cdp->cpu_uber.cu_user_gs_base != pcb->cthread_self) || (pcb->cthread_self != rdmsr64(MSR_IA32_KERNEL_GS_BASE))) {
-                               cdp->cpu_uber.cu_user_gs_base = pcb->cthread_self;
-                               wrmsr64(MSR_IA32_KERNEL_GS_BASE, pcb->cthread_self);
-                       }
-               }
-       } else {
-               x86_saved_state_compat32_t      *iss32compat;
-               vm_offset_t                     isf;
-
-               assert(is_saved_state32(pcb->iss));
-               iss32compat = (x86_saved_state_compat32_t *) pcb->iss;
-
-               pcb_stack_top = (uintptr_t) (iss32compat + 1);
-               /* require 16-byte alignment */
-               assert((pcb_stack_top & 0xF) == 0);
-
-               /*
-                * Set pointer to PCB's interrupt stack frame in cpu data.
-                * Used by debug trap handler.
-                */
-               isf = (vm_offset_t) &iss32compat->isf64;
-               cdp->cpu_uber.cu_isf = isf;
-
-               /* Top of temporary sysenter stack points to pcb stack */
-               *current_sstk64() = pcb_stack_top;
-
-               /* Interrupt stack is pcb */
-               current_ktss64()->rsp0 = pcb_stack_top;
-
-               cdp->cpu_task_map = TASK_MAP_32BIT;
-               /* Precalculate pointers to syscall argument store, for use
-                * in the trampolines.
-                */
-               cdp->cpu_uber_arg_store = (vm_offset_t)get_bsduthreadarg(new);
-               cdp->cpu_uber_arg_store_valid = (vm_offset_t)&pcb->arg_store_valid;
-               pcb->arg_store_valid = 0;
-
-               /*
-                * Disable USER64_CS
-                * Enable USER_CS
-                */
-               ldt_desc_p(USER64_CS)->access &= ~ACC_PL_U;
-               ldt_desc_p(USER_CS)->access |= ACC_PL_U;
-
-               /*
-                * Set the thread`s cthread (a.k.a pthread)
-                * For 32-bit user this involves setting the USER_CTHREAD
-                * descriptor in the LDT to point to the cthread data.
-                * The involves copying in the pre-initialized descriptor.
-                */ 
-               ldtp = (struct real_descriptor *)current_ldt();
-               ldtp[sel_idx(USER_CTHREAD)] = pcb->cthread_desc;
-               if (pcb->uldt_selector != 0)
-                       ldtp[sel_idx(pcb->uldt_selector)] = pcb->uldt_desc;
-               cdp->cpu_uber.cu_user_gs_base = pcb->cthread_self;
-
-               /*
-                * Set the thread`s LDT or LDT entry.
-                */
-               if (new->task == TASK_NULL || new->task->i386_ldt == 0) {
-                       /*
-                        * Use system LDT.
-                        */
-                       ml_cpu_set_ldt(KERNEL_LDT);
-               } else {
-                       /*
-                        * Task has its own LDT.
-                        */
-                       user_ldt_set(new);
-               }
-       }
-
-       /*
-        * Bump the scheduler generation count in the commpage.
-        * This can be read by user code to detect its preemption.
-        */
-       commpage_sched_gen_inc();
-}
-#else
-static void
-act_machine_switch_pcb( thread_t new )
-{
-        pcb_t                  pcb = new->machine.pcb;
-       struct real_descriptor  *ldtp;
-       vm_offset_t             pcb_stack_top;
-       vm_offset_t             hi_pcb_stack_top;
-       vm_offset_t             hi_iss;
-       cpu_data_t              *cdp = current_cpu_datap();
-
-       assert(new->kernel_stack != 0);
-       STACK_IEL(new->kernel_stack)->saved_state = pcb->iss;
-
-       if (!cpu_mode_is64bit()) {
-               x86_saved_state32_tagged_t      *hi_iss32;
-               /*
-                *      Save a pointer to the top of the "kernel" stack -
-                *      actually the place in the PCB where a trap into
-                *      kernel mode will push the registers.
-                */
-               hi_iss = (vm_offset_t)((unsigned long)
-                       pmap_cpu_high_map_vaddr(cpu_number(), HIGH_CPU_ISS0) |
-                       ((unsigned long)pcb->iss & PAGE_MASK));
-
-               cdp->cpu_hi_iss = (void *)hi_iss;
-
-               pmap_high_map(pcb->iss_pte0, HIGH_CPU_ISS0);
-               pmap_high_map(pcb->iss_pte1, HIGH_CPU_ISS1);
-
-               hi_iss32 = (x86_saved_state32_tagged_t *) hi_iss;
-               assert(hi_iss32->tag == x86_SAVED_STATE32);
-
-               hi_pcb_stack_top = (int) (hi_iss32 + 1);
-
-               /*
-                * For fast syscall, top of interrupt stack points to pcb stack
-                */
-               *(vm_offset_t *) current_sstk() = hi_pcb_stack_top;
-
-               current_ktss()->esp0 = hi_pcb_stack_top;
-
-       } else if (is_saved_state64(pcb->iss)) {
-               /*
-                * The test above is performed against the thread save state
-                * flavor and not task's 64-bit feature flag because of the
-                * thread/task 64-bit state divergence that can arise in
-                * task_set_64bit() x86: the task state is changed before
-                * the individual thread(s).
-                */
-               x86_saved_state64_tagged_t      *iss64;
-               vm_offset_t                     isf;
-
-               assert(is_saved_state64(pcb->iss));
-                                                  
-               iss64 = (x86_saved_state64_tagged_t *) pcb->iss;
-       
-               /*
-                * Set pointer to PCB's interrupt stack frame in cpu data.
-                * Used by syscall and double-fault trap handlers.
-                */
-               isf = (vm_offset_t) &iss64->state.isf;
-               cdp->cpu_uber.cu_isf = UBER64(isf);
-               pcb_stack_top = (vm_offset_t) (iss64 + 1);
-               /* require 16-byte alignment */
-               assert((pcb_stack_top & 0xF) == 0);
-               /* Interrupt stack is pcb */
-               current_ktss64()->rsp0 = UBER64(pcb_stack_top);
-
-               /*
-                * Top of temporary sysenter stack points to pcb stack.
-                * Although this is not normally used by 64-bit users,
-                * it needs to be set in case a sysenter is attempted.
-                */
-               *current_sstk64() = UBER64(pcb_stack_top);
-
-               cdp->cpu_task_map = new->map->pmap->pm_task_map; 
-
-               /*
-                * Enable the 64-bit user code segment, USER64_CS.
-                * Disable the 32-bit user code segment, USER_CS.
-                */
-               ldt_desc_p(USER64_CS)->access |= ACC_PL_U;
-               ldt_desc_p(USER_CS)->access &= ~ACC_PL_U;
-
-       } else {
-               x86_saved_state_compat32_t      *iss32compat;
-               vm_offset_t                     isf;
-
-               assert(is_saved_state32(pcb->iss));
-               iss32compat = (x86_saved_state_compat32_t *) pcb->iss;
-
-               pcb_stack_top = (int) (iss32compat + 1);
-               /* require 16-byte alignment */
-               assert((pcb_stack_top & 0xF) == 0);
-
-               /*
-                * Set pointer to PCB's interrupt stack frame in cpu data.
-                * Used by debug trap handler.
-                */
-               isf = (vm_offset_t) &iss32compat->isf64;
-               cdp->cpu_uber.cu_isf = UBER64(isf);
-
-               /* Top of temporary sysenter stack points to pcb stack */
-               *current_sstk64() = UBER64(pcb_stack_top);
-
-               /* Interrupt stack is pcb */
-               current_ktss64()->rsp0 = UBER64(pcb_stack_top);
-
-               cdp->cpu_task_map = TASK_MAP_32BIT;
-               /* Precalculate pointers to syscall argument store, for use
-                * in the trampolines.
-                */
-               cdp->cpu_uber_arg_store = UBER64((vm_offset_t)get_bsduthreadarg(new));
-               cdp->cpu_uber_arg_store_valid = UBER64((vm_offset_t)&pcb->arg_store_valid);
-               pcb->arg_store_valid = 0;
-
-               /*
-                * Disable USER64_CS
-                * Enable USER_CS
-                */
-               ldt_desc_p(USER64_CS)->access &= ~ACC_PL_U;
-               ldt_desc_p(USER_CS)->access |= ACC_PL_U;
-       }
-
-       /*
-        * Set the thread`s cthread (a.k.a pthread)
-        * For 32-bit user this involves setting the USER_CTHREAD
-        * descriptor in the LDT to point to the cthread data.
-        * The involves copying in the pre-initialized descriptor.
-        */ 
-       ldtp = (struct real_descriptor *)current_ldt();
-       ldtp[sel_idx(USER_CTHREAD)] = pcb->cthread_desc;
-       if (pcb->uldt_selector != 0)
-               ldtp[sel_idx(pcb->uldt_selector)] = pcb->uldt_desc;
-
-
-       /*
-        * For 64-bit, we additionally set the 64-bit User GS base
-        * address. On return to 64-bit user, the GS.Base MSR will be written.
-        */
-       cdp->cpu_uber.cu_user_gs_base = pcb->cthread_self;
-
-       /*
-        * Set the thread`s LDT or LDT entry.
-        */
-       if (new->task == TASK_NULL || new->task->i386_ldt == 0) {
-               /*
-                * Use system LDT.
-                */
-               ml_cpu_set_ldt(KERNEL_LDT);
-       } else {
-               /*
-                * Task has its own LDT.
-                */
-               user_ldt_set(new);
-       }
-
-       /*
-        * Bump the scheduler generation count in the commpage.
-        * This can be read by user code to detect its preemption.
-        */
-       commpage_sched_gen_inc();
-}
-#endif
 
 /*
  * Switch to the first thread on a CPU.
@@ -787,14 +389,20 @@ void
 machine_load_context(
        thread_t                new)
 {
-#if CONFIG_COUNTERS
-       machine_pmc_cswitch(NULL, new);
-#endif
        new->machine.specFlags |= OnProc;
-       act_machine_switch_pcb(new);
+       act_machine_switch_pcb(NULL, new);
        Load_context(new);
 }
 
+static inline void pmap_switch_context(thread_t ot, thread_t nt, int cnum) {
+       pmap_assert(ml_get_interrupts_enabled() == FALSE);
+       vm_map_t nmap = nt->map, omap = ot->map;
+       if ((omap != nmap) || (nmap->pmap->pagezero_accessible)) {
+               PMAP_DEACTIVATE_MAP(omap, ot, cnum);
+               PMAP_ACTIVATE_MAP(nmap, nt, cnum);
+       }
+}
+
 /*
  * Switch to a new thread.
  * Save the old thread`s kernel state or continuation,
@@ -806,23 +414,22 @@ machine_switch_context(
        thread_continue_t       continuation,
        thread_t                        new)
 {
-#if MACH_RT
-        assert(current_cpu_datap()->cpu_active_stack == old->kernel_stack);
-#endif
-#if CONFIG_COUNTERS
-       machine_pmc_cswitch(old, new);
-#endif
+       assert(current_cpu_datap()->cpu_active_stack == old->kernel_stack);
+
+#if KPC
+       kpc_off_cpu(old);
+#endif /* KPC */
+
        /*
         *      Save FP registers if in use.
         */
-       fpu_save_context(old);
-
+       fpu_switch_context(old, new);
 
        old->machine.specFlags &= ~OnProc;
        new->machine.specFlags |= OnProc;
 
        /*
-        * Monitor the stack depth and report new max,
+        * Monitor the stack depth and report new max,
         * not worrying about races.
         */
        vm_offset_t     depth = current_stack_depth();
@@ -837,12 +444,16 @@ machine_switch_context(
         *      Switch address maps if need be, even if not switching tasks.
         *      (A server activation may be "borrowing" a client map.)
         */
-       PMAP_SWITCH_CONTEXT(old, new, cpu_number())
+       pmap_switch_context(old, new, cpu_number());
 
        /*
         *      Load the rest of the user state for the new thread
         */
-       act_machine_switch_pcb(new);
+       act_machine_switch_pcb(old, new);
+
+#if HYPERVISOR
+       ml_hv_cswitch(old, new);
+#endif
 
        return(Switch_context(old, continuation, new));
 }
@@ -856,21 +467,11 @@ machine_processor_shutdown(
 #if CONFIG_VMX
        vmx_suspend();
 #endif
-       fpu_save_context(thread);
-       PMAP_SWITCH_CONTEXT(thread, processor->idle_thread, cpu_number());
+       fpu_switch_context(thread, NULL);
+       pmap_switch_context(thread, processor->idle_thread, cpu_number());
        return(Shutdown_context(thread, doshutdown, processor));
 }
 
-/*
- * act_machine_sv_free
- * release saveareas associated with an act.  if flag is true, release
- * user level savearea(s) too, else don't
- */
-void
-act_machine_sv_free(__unused thread_t act, __unused int flag)
-{
-}
-
 
 /*
  * This is where registers that are not normally specified by the mach-o
@@ -885,16 +486,16 @@ machine_thread_state_initialize(
      * The initialized state will then be lazily faulted-in, if required.
      * And if we're target, re-arm the no-fpu trap.
      */
-       if (thread->machine.pcb->ifps) {
+       if (thread->machine.ifps) {
                (void) fpu_set_fxstate(thread, NULL, x86_FLOAT_STATE64);
 
                if (thread == current_thread())
                        clear_fpu();
        }
 
-       if (thread->machine.pcb->ids) {
-               zfree(ids_zone, thread->machine.pcb->ids);
-               thread->machine.pcb->ids = NULL;
+       if (thread->machine.ids) {
+               zfree(ids_zone, thread->machine.ids);
+               thread->machine.ids = NULL;
        }
 
        return  KERN_SUCCESS;
@@ -940,6 +541,7 @@ get_exception_state64(thread_t thread, x86_exception_state64_t *es)
         saved_state = USER_REGS64(thread);
 
        es->trapno = saved_state->isf.trapno;
+       es->cpu = saved_state->isf.cpu;
        es->err = (typeof(es->err))saved_state->isf.err;
        es->faultvaddr = saved_state->cr2;
 }              
@@ -952,6 +554,7 @@ get_exception_state32(thread_t thread, x86_exception_state32_t *es)
         saved_state = USER_REGS32(thread);
 
        es->trapno = saved_state->trapno;
+       es->cpu = saved_state->cpu;
        es->err = saved_state->err;
        es->faultvaddr = saved_state->cr2;
 }              
@@ -962,6 +565,7 @@ set_thread_state32(thread_t thread, x86_thread_state32_t *ts)
 {
         x86_saved_state32_t    *saved_state;
 
+       pal_register_cache_state(thread, DIRTY);
 
        saved_state = USER_REGS32(thread);
 
@@ -969,11 +573,6 @@ set_thread_state32(thread_t thread, x86_thread_state32_t *ts)
         * Scrub segment selector values:
         */
        ts->cs = USER_CS;
-#ifdef __i386__
-       if (ts->ss == 0) ts->ss = USER_DS;
-       if (ts->ds == 0) ts->ds = USER_DS;
-       if (ts->es == 0) ts->es = USER_DS;
-#else /* __x86_64__ */
        /*
         * On a 64 bit kernel, we always override the data segments,
         * as the actual selector numbers have changed. This also
@@ -983,8 +582,10 @@ set_thread_state32(thread_t thread, x86_thread_state32_t *ts)
        ts->ss = USER_DS;
        ts->ds = USER_DS;
        ts->es = USER_DS;
-#endif
 
+       /* Set GS to CTHREAD only if's been established */
+       ts->gs = thread->machine.cthread_self ? USER_CTHREAD : NULL_SEG;
        /* Check segment selectors are safe */
        if (!valid_user_segment_selectors(ts->cs,
                                          ts->ss,
@@ -1027,6 +628,7 @@ set_thread_state64(thread_t thread, x86_thread_state64_t *ts)
 {
         x86_saved_state64_t    *saved_state;
 
+       pal_register_cache_state(thread, DIRTY);
 
        saved_state = USER_REGS64(thread);
 
@@ -1066,6 +668,7 @@ get_thread_state32(thread_t thread, x86_thread_state32_t *ts)
 {
         x86_saved_state32_t    *saved_state;
 
+       pal_register_cache_state(thread, VALID);
 
        saved_state = USER_REGS32(thread);
 
@@ -1093,6 +696,7 @@ get_thread_state64(thread_t thread, x86_thread_state64_t *ts)
 {
         x86_saved_state64_t    *saved_state;
 
+       pal_register_cache_state(thread, VALID);
 
        saved_state = USER_REGS64(thread);
 
@@ -1120,87 +724,6 @@ get_thread_state64(thread_t thread, x86_thread_state64_t *ts)
 }
 
 
-void
-thread_set_wq_state32(thread_t thread, thread_state_t tstate)
-{
-        x86_thread_state32_t   *state;
-        x86_saved_state32_t    *saved_state;
-       thread_t curth = current_thread();
-       spl_t                   s=0;
-
-
-       saved_state = USER_REGS32(thread);
-
-       state = (x86_thread_state32_t *)tstate;
-       
-       if (curth != thread) {
-               s = splsched();
-               thread_lock(thread);
-       }
-
-       saved_state->ebp = 0;
-       saved_state->eip = state->eip;
-       saved_state->eax = state->eax;
-       saved_state->ebx = state->ebx;
-       saved_state->ecx = state->ecx;
-       saved_state->edx = state->edx;
-       saved_state->edi = state->edi;
-       saved_state->esi = state->esi;
-       saved_state->uesp = state->esp;
-       saved_state->efl = EFL_USER_SET;
-
-       saved_state->cs = USER_CS;
-       saved_state->ss = USER_DS;
-       saved_state->ds = USER_DS;
-       saved_state->es = USER_DS;
-
-
-       if (curth != thread) {
-               thread_unlock(thread);
-               splx(s);
-       }
-}
-
-
-void
-thread_set_wq_state64(thread_t thread, thread_state_t tstate)
-{
-        x86_thread_state64_t   *state;
-        x86_saved_state64_t    *saved_state;
-       thread_t curth = current_thread();
-       spl_t                   s=0;
-
-
-       saved_state = USER_REGS64(thread);
-       state = (x86_thread_state64_t *)tstate;
-       
-       if (curth != thread) {
-               s = splsched();
-               thread_lock(thread);
-       }
-
-       saved_state->rbp = 0;
-       saved_state->rdi = state->rdi;
-       saved_state->rsi = state->rsi;
-       saved_state->rdx = state->rdx;
-       saved_state->rcx = state->rcx;
-       saved_state->r8  = state->r8;
-       saved_state->r9  = state->r9;
-
-       saved_state->isf.rip = state->rip;
-       saved_state->isf.rsp = state->rsp;
-       saved_state->isf.cs = USER64_CS;
-       saved_state->isf.rflags = EFL_USER_SET;
-
-
-       if (curth != thread) {
-               thread_unlock(thread);
-               splx(s);
-       }
-}
-
-
-
 /*
  *     act_machine_set_state:
  *
@@ -1237,6 +760,7 @@ machine_thread_set_state(
                                        state->gs))
                        return KERN_INVALID_ARGUMENT;
 
+               pal_register_cache_state(thr_act, DIRTY);
 
                saved_state = USER_REGS32(thr_act);
 
@@ -1307,6 +831,7 @@ machine_thread_set_state(
                    !IS_USERADDR64_CANONICAL(state->isf.rip))
                        return KERN_INVALID_ARGUMENT;
 
+               pal_register_cache_state(thr_act, DIRTY);
 
                saved_state = USER_REGS64(thr_act);
 
@@ -1348,8 +873,12 @@ machine_thread_set_state(
        }
 
        case x86_FLOAT_STATE32:
+       case x86_AVX_STATE32:
+#if !defined(RC_HIDE_XNU_J137)
+       case x86_AVX512_STATE32:
+#endif /* not RC_HIDE_XNU_J137 */
        {
-               if (count != x86_FLOAT_STATE32_COUNT)
+               if (count != _MachineStateCount[flavor])
                        return(KERN_INVALID_ARGUMENT);
 
                if (thread_is_64bit(thr_act))
@@ -1359,11 +888,15 @@ machine_thread_set_state(
        }
 
        case x86_FLOAT_STATE64:
+       case x86_AVX_STATE64:
+#if !defined(RC_HIDE_XNU_J137)
+       case x86_AVX512_STATE64:
+#endif /* not RC_HIDE_XNU_J137 */
        {
-               if (count != x86_FLOAT_STATE64_COUNT)
+               if (count != _MachineStateCount[flavor])
                        return(KERN_INVALID_ARGUMENT);
 
-               if ( !thread_is_64bit(thr_act))
+               if (!thread_is_64bit(thr_act))
                        return(KERN_INVALID_ARGUMENT);
 
                return fpu_set_fxstate(thr_act, tstate, flavor);
@@ -1388,26 +921,35 @@ machine_thread_set_state(
                return(KERN_INVALID_ARGUMENT);
        }
 
-       case x86_AVX_STATE32:
-       {
-               if (count != x86_AVX_STATE32_COUNT)
-                       return(KERN_INVALID_ARGUMENT);
-
-               if (thread_is_64bit(thr_act))
-                       return(KERN_INVALID_ARGUMENT);
-
-               return fpu_set_fxstate(thr_act, tstate, flavor);
-       }
-
-       case x86_AVX_STATE64:
-       {
-               if (count != x86_AVX_STATE64_COUNT)
-                       return(KERN_INVALID_ARGUMENT);
+       case x86_AVX_STATE:
+#if !defined(RC_HIDE_XNU_J137)
+       case x86_AVX512_STATE:
+#endif
+       {   
+               x86_avx_state_t       *state;
 
-               if (!thread_is_64bit(thr_act))
+               if (count != _MachineStateCount[flavor])
                        return(KERN_INVALID_ARGUMENT);
 
-               return fpu_set_fxstate(thr_act, tstate, flavor);
+               state = (x86_avx_state_t *)tstate;
+               /* Flavors are defined to have sequential values: 32-bit, 64-bit, non-specific */
+               /* 64-bit flavor? */
+               if (state->ash.flavor == (flavor - 1) &&
+                   state->ash.count  == _MachineStateCount[flavor - 1] &&
+                   thread_is_64bit(thr_act)) {
+                       return fpu_set_fxstate(thr_act,
+                                              (thread_state_t)&state->ufs.as64,
+                                              flavor - 1);
+               }
+               /* 32-bit flavor? */
+               if (state->ash.flavor == (flavor - 2) &&
+                   state->ash.count  == _MachineStateCount[flavor - 2] &&
+                   !thread_is_64bit(thr_act)) {
+                       return fpu_set_fxstate(thr_act,
+                                              (thread_state_t)&state->ufs.as32,
+                                              flavor - 2); 
+               }
+               return(KERN_INVALID_ARGUMENT);
        }
 
        case x86_THREAD_STATE32: 
@@ -1451,8 +993,6 @@ machine_thread_set_state(
                        return set_thread_state32(thr_act, &state->uts.ts32);
                } else
                        return(KERN_INVALID_ARGUMENT);
-
-               break;
        }
        case x86_DEBUG_STATE32:
        {
@@ -1556,6 +1096,39 @@ machine_thread_get_state(
                break;
            }
 
+           case THREAD_STATE_FLAVOR_LIST_10_9:
+           {
+               if (*count < 5)
+                       return (KERN_INVALID_ARGUMENT);
+
+               tstate[0] = x86_THREAD_STATE;
+               tstate[1] = x86_FLOAT_STATE;
+               tstate[2] = x86_EXCEPTION_STATE;
+               tstate[3] = x86_DEBUG_STATE;
+               tstate[4] = x86_AVX_STATE;
+
+               *count = 5;
+               break;
+           }
+
+#if !defined(RC_HIDE_XNU_J137)
+           case THREAD_STATE_FLAVOR_LIST_10_13:
+           {
+               if (*count < 6)
+                       return (KERN_INVALID_ARGUMENT);
+
+               tstate[0] = x86_THREAD_STATE;
+               tstate[1] = x86_FLOAT_STATE;
+               tstate[2] = x86_EXCEPTION_STATE;
+               tstate[3] = x86_DEBUG_STATE;
+               tstate[4] = x86_AVX_STATE;
+               tstate[5] = x86_AVX512_STATE;
+
+               *count = 6;
+               break;
+           }
+
+#endif
            case x86_SAVED_STATE32:
            {
                x86_saved_state32_t     *state;
@@ -1664,31 +1237,66 @@ machine_thread_get_state(
                return(kret);
            }
 
-       case x86_AVX_STATE32:
-       {
-               if (*count != x86_AVX_STATE32_COUNT)
+           case x86_AVX_STATE32:
+#if !defined(RC_HIDE_XNU_J137)
+           case x86_AVX512_STATE32:
+#endif
+           {
+               if (*count != _MachineStateCount[flavor])
                        return(KERN_INVALID_ARGUMENT);
 
                if (thread_is_64bit(thr_act))
                        return(KERN_INVALID_ARGUMENT);
 
-               *count = x86_AVX_STATE32_COUNT;
+               *count = _MachineStateCount[flavor];
 
                return fpu_get_fxstate(thr_act, tstate, flavor);
-       }
+           }
 
-       case x86_AVX_STATE64:
-       {
-               if (*count != x86_AVX_STATE64_COUNT)
+           case x86_AVX_STATE64:
+#if !defined(RC_HIDE_XNU_J137)
+           case x86_AVX512_STATE64:
+#endif
+           {
+               if (*count != _MachineStateCount[flavor])
                        return(KERN_INVALID_ARGUMENT);
 
                if ( !thread_is_64bit(thr_act))
                        return(KERN_INVALID_ARGUMENT);
 
-               *count = x86_AVX_STATE64_COUNT;
+               *count = _MachineStateCount[flavor];
 
                return fpu_get_fxstate(thr_act, tstate, flavor);
-       }
+           }
+
+           case x86_AVX_STATE:
+#if !defined(RC_HIDE_XNU_J137)
+           case x86_AVX512_STATE:
+#endif
+           {
+               x86_avx_state_t         *state;
+               thread_state_t          fstate;
+
+               if (*count < _MachineStateCount[flavor])
+                       return(KERN_INVALID_ARGUMENT);
+
+               *count = _MachineStateCount[flavor];
+               state = (x86_avx_state_t *)tstate;
+
+               bzero((char *)state, *count * sizeof(int));
+
+               if (thread_is_64bit(thr_act)) {
+                       flavor -= 1;    /* 64-bit flavor */
+                       fstate = (thread_state_t) &state->ufs.as64;
+               } else {
+                       flavor -= 2;    /* 32-bit flavor */
+                       fstate = (thread_state_t) &state->ufs.as32;
+               }
+               state->ash.flavor = flavor; 
+               state->ash.count  = _MachineStateCount[flavor];
+
+               return fpu_get_fxstate(thr_act, fstate, flavor);
+           }
 
            case x86_THREAD_STATE32: 
            {
@@ -1757,6 +1365,11 @@ machine_thread_get_state(
                *count = x86_EXCEPTION_STATE32_COUNT;
 
                get_exception_state32(thr_act, (x86_exception_state32_t *)tstate);
+               /*
+                * Suppress the cpu number for binary compatibility
+                * of this deprecated state.
+                */
+               ((x86_exception_state32_t *)tstate)->cpu = 0;
                break;
            }
 
@@ -1771,6 +1384,11 @@ machine_thread_get_state(
                *count = x86_EXCEPTION_STATE64_COUNT;
 
                get_exception_state64(thr_act, (x86_exception_state64_t *)tstate);
+               /*
+                * Suppress the cpu number for binary compatibility
+                * of this deprecated state.
+                */
+               ((x86_exception_state64_t *)tstate)->cpu = 0;
                break;
            }
 
@@ -2029,156 +1647,6 @@ machine_thread_get_kern_state(
 }
 
 
-/*
- * Initialize the machine-dependent state for a new thread.
- */
-kern_return_t
-machine_thread_create(
-       thread_t                thread,
-       task_t                  task)
-{
-       pcb_t                   pcb = &thread->machine.xxx_pcb;
-       x86_saved_state_t       *iss;
-
-#if NCOPY_WINDOWS > 0
-       inval_copy_windows(thread);
-
-       thread->machine.physwindow_pte = 0;
-       thread->machine.physwindow_busy = 0;
-#endif
-
-       /*
-        * Allocate pcb only if required.
-        */
-       if (pcb->sf == NULL) {
-               pcb->sf = zalloc(iss_zone);
-               if (pcb->sf == NULL)
-                       panic("iss_zone");
-       }
-
-        if (task_has_64BitAddr(task)) {
-               x86_sframe64_t          *sf64;
-
-               sf64 = (x86_sframe64_t *) pcb->sf;
-
-               bzero((char *)sf64, sizeof(x86_sframe64_t));
-
-               iss = (x86_saved_state_t *) &sf64->ssf;
-               iss->flavor = x86_SAVED_STATE64;
-               /*
-                *      Guarantee that the bootstrapped thread will be in user
-                *      mode.
-                */
-               iss->ss_64.isf.rflags = EFL_USER_SET;
-               iss->ss_64.isf.cs = USER64_CS;
-               iss->ss_64.isf.ss = USER_DS;
-               iss->ss_64.fs = USER_DS;
-               iss->ss_64.gs = USER_DS;
-       } else {
-               if (cpu_mode_is64bit()) {
-                       x86_sframe_compat32_t      *sfc32;
-
-                       sfc32 = (x86_sframe_compat32_t *)pcb->sf;
-
-                       bzero((char *)sfc32, sizeof(x86_sframe_compat32_t));
-
-                       iss = (x86_saved_state_t *) &sfc32->ssf.iss32;
-                       iss->flavor = x86_SAVED_STATE32;
-#if defined(__i386__)
-#if DEBUG
-                       {
-                               x86_saved_state_compat32_t *xssc;
-
-                               xssc  = (x86_saved_state_compat32_t *) iss;
-
-                               xssc->pad_for_16byte_alignment[0] = 0x64326432;
-                               xssc->pad_for_16byte_alignment[1] = 0x64326432;
-                       }
-#endif /* DEBUG */
-               } else {
-                       x86_sframe32_t          *sf32;
-                       struct real_descriptor  *ldtp;
-                       pmap_paddr_t            paddr;
-
-                       sf32 = (x86_sframe32_t *) pcb->sf;
-
-                       bzero((char *)sf32, sizeof(x86_sframe32_t));
-
-                       iss = (x86_saved_state_t *) &sf32->ssf;
-                       iss->flavor = x86_SAVED_STATE32;
-                       pcb->iss_pte0 = pte_kernel_rw(kvtophys((vm_offset_t)iss));
-                       if (0 == (paddr = pa_to_pte(kvtophys((vm_offset_t)iss + PAGE_SIZE))))
-                               pcb->iss_pte1 = INTEL_PTE_INVALID;
-                       else
-                               pcb->iss_pte1 = pte_kernel_rw(paddr);
-
-
-                       ldtp = (struct real_descriptor *)
-                                   pmap_index_to_virt(HIGH_FIXED_LDT_BEGIN);
-                       pcb->cthread_desc = ldtp[sel_idx(USER_DS)];
-                       pcb->uldt_desc = ldtp[sel_idx(USER_DS)];
-#endif /* __i386__ */
-               }
-               /*
-                *      Guarantee that the bootstrapped thread will be in user
-                *      mode.
-                */
-               iss->ss_32.cs = USER_CS;
-               iss->ss_32.ss = USER_DS;
-               iss->ss_32.ds = USER_DS;
-               iss->ss_32.es = USER_DS;
-               iss->ss_32.fs = USER_DS;
-               iss->ss_32.gs = USER_DS;
-               iss->ss_32.efl = EFL_USER_SET;
-
-       }
-       pcb->iss = iss;
-
-       thread->machine.pcb = pcb;
-       simple_lock_init(&pcb->lock, 0);
-
-       pcb->arg_store_valid = 0;
-       pcb->cthread_self = 0;
-       pcb->uldt_selector = 0;
-
-       /* Ensure that the "cthread" descriptor describes a valid
-        * segment.
-        */
-       if ((pcb->cthread_desc.access & ACC_P) == 0) {
-               struct real_descriptor  *ldtp;
-               ldtp = (struct real_descriptor *)current_ldt();
-               pcb->cthread_desc = ldtp[sel_idx(USER_DS)];
-       }
-
-
-       return(KERN_SUCCESS);
-}
-
-/*
- * Machine-dependent cleanup prior to destroying a thread
- */
-void
-machine_thread_destroy(
-       thread_t                thread)
-{
-       register pcb_t  pcb = thread->machine.pcb;
-
-       assert(pcb);
-        
-       if (pcb->ifps != 0)
-               fpu_free(pcb->ifps);
-       if (pcb->sf != 0) {
-               zfree(iss_zone, pcb->sf);
-               pcb->sf = 0;
-       }
-       if (pcb->ids) {
-               zfree(ids_zone, pcb->ids);
-               pcb->ids = NULL;
-       }
-       thread->machine.pcb = (pcb_t)0;
-
-}
-
 void
 machine_thread_switch_addrmode(thread_t thread)
 {
@@ -2189,17 +1657,19 @@ machine_thread_switch_addrmode(thread_t thread)
        disable_preemption();
 
        /*
-        * Reset the state saveareas.
+        * Reset the state saveareas. As we're resetting, we anticipate no
+        * memory allocations in this path.
         */
        machine_thread_create(thread, thread->task);
 
+       /* Adjust FPU state */
+       fpu_switch_addrmode(thread, task_has_64BitAddr(thread->task));
+
        /* If we're switching ourselves, reset the pcb addresses etc. */
        if (thread == current_thread()) {
-#if defined(__i386__)
-         if (current_cpu_datap()->cpu_active_cr3 != kernel_pmap->pm_cr3)
-               pmap_load_kernel_cr3();
-#endif /* defined(__i386) */
-         act_machine_switch_pcb(thread);
+               boolean_t istate = ml_set_interrupts_enabled(FALSE);
+               act_machine_switch_pcb(NULL, thread);
+               ml_set_interrupts_enabled(istate);
        }
        enable_preemption();
 }
@@ -2216,51 +1686,6 @@ machine_set_current_thread(thread_t thread)
        current_cpu_datap()->cpu_active_thread = thread;
 }
 
-/*
- * This is called when a task is terminated, and also on exec().
- * Clear machine-dependent state that is stored on the task.
- */
-void
-machine_thread_terminate_self(void)
-{
-       task_t self_task = current_task();
-       if (self_task) {
-           user_ldt_t user_ldt = self_task->i386_ldt;
-           if (user_ldt != 0) {
-               self_task->i386_ldt = 0;
-               user_ldt_free(user_ldt);
-           }
-
-           if (self_task->task_debug != NULL) {
-               zfree(ids_zone, self_task->task_debug);
-               self_task->task_debug = NULL;
-           }    
-       }
-}
-
-void
-act_machine_return(
-               int code
-               )
-{
-       /*
-        * This code is called with nothing locked.
-        * It also returns with nothing locked, if it returns.
-        *
-        * This routine terminates the current thread activation.
-        * If this is the only activation associated with its
-        * thread shuttle, then the entire thread (shuttle plus
-        * activation) is terminated.
-        */
-       assert( code == KERN_TERMINATED );
-
-       thread_terminate_self();
-
-       /*NOTREACHED*/
-
-       panic("act_machine_return(%d): TALKING ZOMBIE! (1)", code);
-}
-
 
 /*
  * Perform machine-dependent per-thread initializations
@@ -2268,133 +1693,26 @@ act_machine_return(
 void
 machine_thread_init(void)
 {
-       if (cpu_mode_is64bit()) {
-               assert(sizeof(x86_sframe_compat32_t) % 16 == 0);
-               iss_zone = zinit(sizeof(x86_sframe64_t),
-                               thread_max * sizeof(x86_sframe64_t),
-                               THREAD_CHUNK * sizeof(x86_sframe64_t),
-                               "x86_64 saved state");
-
-               ids_zone = zinit(sizeof(x86_debug_state64_t),
-                                thread_max * sizeof(x86_debug_state64_t),
-                                THREAD_CHUNK * sizeof(x86_debug_state64_t),
-                                "x86_64 debug state");
-
-       } else {
-               iss_zone = zinit(sizeof(x86_sframe32_t),
-                               thread_max * sizeof(x86_sframe32_t),
-                               THREAD_CHUNK * sizeof(x86_sframe32_t),
-                               "x86 saved state");
-               ids_zone = zinit(sizeof(x86_debug_state32_t),
-                               thread_max * (sizeof(x86_debug_state32_t)),
-                               THREAD_CHUNK * (sizeof(x86_debug_state32_t)),
-                               "x86 debug state");
-       }
-       fpu_module_init();
-}
+       iss_zone = zinit(sizeof(x86_saved_state_t),
+                       thread_max * sizeof(x86_saved_state_t),
+                       THREAD_CHUNK * sizeof(x86_saved_state_t),
+                       "x86_64 saved state");
 
+        ids_zone = zinit(sizeof(x86_debug_state64_t),
+                        thread_max * sizeof(x86_debug_state64_t),
+                        THREAD_CHUNK * sizeof(x86_debug_state64_t),
+                        "x86_64 debug state");
 
-#if defined(__i386__)
-/*
- * Some routines for debugging activation code
- */
-static void    dump_handlers(thread_t);
-void           dump_regs(thread_t);
-int            dump_act(thread_t thr_act);
-
-static void
-dump_handlers(thread_t thr_act)
-{
-       ReturnHandler *rhp = thr_act->handlers;
-       int     counter = 0;
-
-       printf("\t");
-       while (rhp) {
-               if (rhp == &thr_act->special_handler){
-                       if (rhp->next)
-                               printf("[NON-Zero next ptr(%p)]", rhp->next);
-                       printf("special_handler()->");
-                       break;
-               }
-               printf("hdlr_%d(%p)->", counter, rhp->handler);
-               rhp = rhp->next;
-               if (++counter > 32) {
-                       printf("Aborting: HUGE handler chain\n");
-                       break;
-               }
-       }
-       printf("HLDR_NULL\n");
-}
-
-void
-dump_regs(thread_t thr_act)
-{
-       if (thr_act->machine.pcb == NULL)
-               return;
-
-       if (thread_is_64bit(thr_act)) {
-               x86_saved_state64_t     *ssp;
-
-               ssp = USER_REGS64(thr_act);
-
-               panic("dump_regs: 64bit tasks not yet supported");
-
-       } else {
-               x86_saved_state32_t     *ssp;
-
-               ssp = USER_REGS32(thr_act);
-
-               /*
-                * Print out user register state
-                */
-               printf("\tRegs:\tedi=%x esi=%x ebp=%x ebx=%x edx=%x\n",
-                       ssp->edi, ssp->esi, ssp->ebp, ssp->ebx, ssp->edx);
-
-               printf("\t\tecx=%x eax=%x eip=%x efl=%x uesp=%x\n",
-                       ssp->ecx, ssp->eax, ssp->eip, ssp->efl, ssp->uesp);
-
-               printf("\t\tcs=%x ss=%x\n", ssp->cs, ssp->ss);
-       }
+       fpu_module_init();
 }
 
-int
-dump_act(thread_t thr_act)
-{
-       if (!thr_act)
-               return(0);
-
-       printf("thread(%p)(%d): task=%p(%d)\n",
-                       thr_act, thr_act->ref_count,
-                       thr_act->task,
-                       thr_act->task   ? thr_act->task->ref_count : 0);
-
-       printf("\tsusp=%d user_stop=%d active=%x ast=%x\n",
-                       thr_act->suspend_count, thr_act->user_stop_count,
-                       thr_act->active, thr_act->ast);
-       printf("\tpcb=%p\n", thr_act->machine.pcb);
-
-       if (thr_act->kernel_stack) {
-               vm_offset_t stack = thr_act->kernel_stack;
-
-               printf("\tk_stk %lx  eip %x ebx %x esp %x iss %p\n",
-                       (long)stack, STACK_IKS(stack)->k_eip, STACK_IKS(stack)->k_ebx,
-                       STACK_IKS(stack)->k_esp, STACK_IEL(stack)->saved_state);
-       }
 
-       dump_handlers(thr_act);
-       dump_regs(thr_act);
-       return((int)thr_act);
-}
-#endif
 
 user_addr_t
 get_useraddr(void)
 {
         thread_t thr_act = current_thread();
  
-       if (thr_act->machine.pcb == NULL) 
-               return(0);
-
         if (thread_is_64bit(thr_act)) {
                x86_saved_state64_t     *iss64;
                
@@ -2447,16 +1765,17 @@ machine_stack_attach(
 
        assert(stack);
        thread->kernel_stack = stack;
+       thread_initialize_kernel_state(thread);
 
        statep = STACK_IKS(stack);
 #if defined(__x86_64__)
        statep->k_rip = (unsigned long) Thread_continue;
        statep->k_rbx = (unsigned long) thread_continue;
-       statep->k_rsp = (unsigned long) STACK_IEL(stack);
+       statep->k_rsp = (unsigned long) STACK_IKS(stack);
 #else
        statep->k_eip = (unsigned long) Thread_continue;
        statep->k_ebx = (unsigned long) thread_continue;
-       statep->k_esp = (unsigned long) STACK_IEL(stack);
+       statep->k_esp = (unsigned long) STACK_IKS(stack);
 #endif
 
        return;
@@ -2475,9 +1794,7 @@ machine_stack_handoff(thread_t old,
        assert(new);
        assert(old);
 
-#if CONFIG_COUNTERS
-       machine_pmc_cswitch(old, new);
-#endif
+       kpc_off_cpu(old);
 
        stack = old->kernel_stack;
        if (stack == old->reserved_stack) {
@@ -2492,16 +1809,20 @@ machine_stack_handoff(thread_t old,
         */
        new->kernel_stack = stack;
 
-       fpu_save_context(old);
+       fpu_switch_context(old, new);
        
-
        old->machine.specFlags &= ~OnProc;
        new->machine.specFlags |= OnProc;
 
-       PMAP_SWITCH_CONTEXT(old, new, cpu_number());
-       act_machine_switch_pcb(new);
+       pmap_switch_context(old, new, cpu_number());
+       act_machine_switch_pcb(old, new);
+
+#if HYPERVISOR
+       ml_hv_cswitch(old, new);
+#endif
 
        machine_set_current_thread(new);
+       thread_initialize_kernel_state(new);
 
        return;
 }
@@ -2642,16 +1963,6 @@ void act_thread_cfree(__unused void *ctx)
 {
        /* XXX - Unused */
 }
-void x86_toggle_sysenter_arg_store(thread_t thread, boolean_t valid);
-void x86_toggle_sysenter_arg_store(thread_t thread, boolean_t valid) {
-       thread->machine.pcb->arg_store_valid = valid;
-}
-
-boolean_t x86_sysenter_arg_store_isvalid(thread_t thread);
-
-boolean_t x86_sysenter_arg_store_isvalid(thread_t thread) {
-       return (thread->machine.pcb->arg_store_valid);
-}
 
 /*
  * Duplicate one x86_debug_state32_t to another.  "all" parameter
@@ -2702,4 +2013,3 @@ copy_debug_state64(
        target->dr6 = src->dr6;
        target->dr7 = src->dr7;
 }
-