]> git.saurik.com Git - apple/xnu.git/blobdiff - osfmk/i386/trap.c
xnu-792.13.8.tar.gz
[apple/xnu.git] / osfmk / i386 / trap.c
index afb29fa3825b97000515737f2b6429df3afebcee..97ded92fa81e57df5255888ba7ea41178a07c170 100644 (file)
@@ -71,6 +71,7 @@
 #include <i386/trap.h>
 #include <i386/pmap.h>
 #include <i386/fpu.h>
+#include <architecture/i386/pio.h> /* inb() */
 
 #include <mach/exception.h>
 #include <mach/kern_return.h>
 #include <kern/spl.h>
 #include <kern/misc_protos.h>
 
+#include <sys/kdebug.h>
+
 #if    MACH_KGDB
 #include <kgdb/kgdb_defs.h>
 #endif /* MACH_KGDB */
 
-#include <i386/intel_read_fault.h>
-
-#if     MACH_KGDB
-#include <kgdb/kgdb_defs.h>
-#endif  /* MACH_KGDB */
-
 #if    MACH_KDB
+#include <debug.h>
 #include <ddb/db_watch.h>
 #include <ddb/db_run.h>
 #include <ddb/db_break.h>
 #include <string.h>
 
 #include <i386/io_emulate.h>
+#include <i386/postcode.h>
+#include <i386/mp_desc.h>
+#include <i386/proc_reg.h>
+#include <mach/i386/syscall_sw.h>
 
 /*
  * Forward declarations
  */
-extern void            user_page_fault_continue(
-                               kern_return_t           kr);
+static void user_page_fault_continue(kern_return_t kret);
+static void panic_trap(x86_saved_state32_t *saved_state);
+static void set_recovery_ip(x86_saved_state32_t *saved_state, vm_offset_t ip);
 
-extern boolean_t       v86_assist(
-                               thread_t                thread,
-                               struct i386_saved_state *regs);
-
-extern boolean_t       check_io_fault(
-                               struct i386_saved_state *regs);
-
-extern int             inst_fetch(
-                               int                     eip,
-                               int                     cs);
+perfCallback perfTrapHook = NULL; /* Pointer to CHUD trap hook routine */
+perfCallback perfASTHook  = NULL; /* Pointer to CHUD AST hook routine */
 
 void
 thread_syscall_return(
         kern_return_t ret)
 {
-        register thread_t   thr_act = current_thread();
-        register struct i386_saved_state *regs = USER_REGS(thr_act);
-        regs->eax = ret;
+        thread_t       thr_act = current_thread();
+
+        if (thread_is_64bit(thr_act)) {
+               x86_saved_state64_t     *regs;
+               
+               regs = USER_REGS64(thr_act);
+
+               if (kdebug_enable && ((regs->rax & SYSCALL_CLASS_MASK) == (SYSCALL_CLASS_MACH << SYSCALL_CLASS_SHIFT))) {
+                       /* Mach trap */
+                       KERNEL_DEBUG_CONSTANT(
+                                             MACHDBG_CODE(DBG_MACH_EXCP_SC, ((int) (regs->rax & SYSCALL_NUMBER_MASK)))
+                                             | DBG_FUNC_END,
+                                             ret, 0, 0, 0, 0);
+               }
+               regs->rax = ret;
+
+       } else {
+               x86_saved_state32_t     *regs;
+               
+               regs = USER_REGS32(thr_act);
+
+               if (kdebug_enable && ((int) regs->eax < 0)) {
+                       /* Mach trap */
+                       KERNEL_DEBUG_CONSTANT(
+                                             MACHDBG_CODE(DBG_MACH_EXCP_SC, -((int) regs->eax))
+                                             | DBG_FUNC_END,
+                                             ret, 0, 0, 0, 0);
+               }
+               regs->eax = ret;
+       }
         thread_exception_return();
         /*NOTREACHED*/
 }
@@ -149,27 +171,71 @@ extern boolean_t db_breakpoints_inserted;
 void
 thread_kdb_return(void)
 {
-       register thread_t       thread = current_thread();
-       register struct i386_saved_state *regs = USER_REGS(thread);
+       thread_t                thr_act = current_thread();
+       x86_saved_state_t       *iss = USER_STATE(thr_act);
 
-       if (kdb_trap(regs->trapno, regs->err, regs)) {
-#if            MACH_LDEBUG
-               assert(thread->mutex_count == 0); 
-#endif         /* MACH_LDEBUG */
-               thread_exception_return();
-               /*NOTREACHED*/
+        if (is_saved_state64(iss)) {
+               x86_saved_state64_t     *regs;
+               
+               regs = saved_state64(iss);
+
+               if (kdb_trap(regs->isf.trapno, (int)regs->isf.err, (void *)regs)) {
+                       thread_exception_return();
+                       /*NOTREACHED*/
+               }
+
+       } else {
+               x86_saved_state32_t     *regs;
+               
+               regs = saved_state32(iss);
+
+               if (kdb_trap(regs->trapno, regs->err, (void *)regs)) {
+                       thread_exception_return();
+                       /*NOTREACHED*/
+               }
        }
 }
-boolean_t let_ddb_vm_fault = FALSE;
 
 #endif /* MACH_KDB */
 
 void
 user_page_fault_continue(
-       kern_return_t   kr)
+                        kern_return_t  kr)
 {
-       register thread_t       thread = current_thread();
-       register struct i386_saved_state *regs = USER_REGS(thread);
+       thread_t        thread = current_thread();
+       x86_saved_state_t *regs = USER_STATE(thread);
+       ast_t           *myast;
+       boolean_t       intr;
+       user_addr_t     vaddr;
+#if    MACH_KDB
+       int             err;
+       int             trapno;
+#endif
+
+       assert((is_saved_state32(regs) && !thread_is_64bit(thread)) ||
+              (is_saved_state64(regs) &&  thread_is_64bit(thread)));
+
+        if (thread_is_64bit(thread)) {
+               x86_saved_state64_t     *uregs;
+
+               uregs = USER_REGS64(thread);
+
+#if    MACH_KDB
+               trapno = uregs->isf.trapno;
+               err = uregs->isf.err;
+#endif
+               vaddr = (user_addr_t)uregs->cr2;
+       } else {
+               x86_saved_state32_t     *uregs;
+
+               uregs = USER_REGS32(thread);
+
+#if    MACH_KDB
+               trapno = uregs->trapno;
+               err = uregs->err;
+#endif
+               vaddr = uregs->cr2;
+       }
 
        if ((kr == KERN_SUCCESS) || (kr == KERN_ABORTED)) {
 #if    MACH_KDB
@@ -178,28 +244,34 @@ user_page_fault_continue(
                }
                if (db_watchpoint_list &&
                    db_watchpoints_inserted &&
-                   (regs->err & T_PF_WRITE) &&
+                   (err & T_PF_WRITE) &&
                    db_find_watchpoint(thread->map,
-                                      (vm_offset_t)regs->cr2,
+                                      (vm_offset_t)vaddr,
                                       regs))
                        kdb_trap(T_WATCHPOINT, 0, regs);
 #endif /* MACH_KDB */
+               intr = ml_set_interrupts_enabled(FALSE);
+               myast = ast_pending();
+               while (*myast & AST_ALL) {
+                       ast_taken(AST_ALL, intr);
+                       ml_set_interrupts_enabled(FALSE);
+                       myast = ast_pending();
+               }
+               ml_set_interrupts_enabled(intr);
+
                thread_exception_return();
                /*NOTREACHED*/
        }
 
 #if    MACH_KDB
        if (debug_all_traps_with_kdb &&
-           kdb_trap(regs->trapno, regs->err, regs)) {
-#if            MACH_LDEBUG
-               assert(thread->mutex_count == 0);
-#endif         /* MACH_LDEBUG */
+           kdb_trap(trapno, err, regs)) {
                thread_exception_return();
                /*NOTREACHED*/
        }
 #endif /* MACH_KDB */
 
-       i386_exception(EXC_BAD_ACCESS, kr, regs->cr2);
+       i386_exception(EXC_BAD_ACCESS, kr, vaddr);
        /*NOTREACHED*/
 }
 
@@ -214,56 +286,165 @@ struct recovery {
 extern struct recovery recover_table[];
 extern struct recovery recover_table_end[];
 
-/*
- * Recovery from Successful fault in copyout does not
- * return directly - it retries the pte check, since
- * the 386 ignores write protection in kernel mode.
- */
-extern struct recovery retry_table[];
-extern struct recovery retry_table_end[];
-
-const char *           trap_type[] = {TRAP_NAMES};
-int    TRAP_TYPES = sizeof(trap_type)/sizeof(trap_type[0]);
+const char *   trap_type[] = {TRAP_NAMES};
+unsigned       TRAP_TYPES = sizeof(trap_type)/sizeof(trap_type[0]);
 
+extern unsigned panic_io_port;
 
+static inline void
+reset_dr7(void)
+{
+       uint32_t dr7 = 0x400; /* magic dr7 reset value */
+       __asm__ volatile("movl %0,%%dr7" : : "r" (dr7));
+}
+#if MACH_KDP
+unsigned kdp_has_active_watchpoints = 0;
+#endif
 /*
  * Trap from kernel mode.  Only page-fault errors are recoverable,
  * and then only in special circumstances.  All other errors are
  * fatal.  Return value indicates if trap was handled.
  */
-boolean_t
+void
 kernel_trap(
-       register struct i386_saved_state        *regs)
+       x86_saved_state_t       *state)
 {
+       x86_saved_state32_t     *saved_state;
        int                     code;
-       unsigned int            subcode;
-       int                     interruptible = THREAD_UNINT;
-       register int            type;
+       user_addr_t             vaddr;
+       int                     type;
        vm_map_t                map;
        kern_return_t           result = KERN_FAILURE;
-       register thread_t       thread;
+       thread_t                thread;
+       ast_t                   *myast;
+       boolean_t               intr;
+       vm_prot_t               prot;
+        struct recovery                *rp;
+       vm_offset_t             kern_ip;
+       int                     fault_in_copy_window = -1;
+       int                     is_user = 0;
+#if MACH_KDB   
+       pt_entry_t              *pte;
+#endif /* MACH_KDB */
 
-       type = regs->trapno;
-       code = regs->err;
        thread = current_thread();
 
+       if (is_saved_state64(state))
+               panic("kernel_trap(%p) with 64-bit state", state);
+       saved_state = saved_state32(state);
+
+       vaddr = (user_addr_t)saved_state->cr2;
+       type  = saved_state->trapno;
+       code  = saved_state->err & 0xffff;
+       intr  = (saved_state->efl & EFL_IF) != 0;       /* state of ints at trap */
+
+       kern_ip = (vm_offset_t)saved_state->eip;
+
+       myast = ast_pending();
+
+       if (perfASTHook) {
+               if (*myast & AST_CHUD_ALL)
+                       perfASTHook(type, NULL, 0, 0);
+       } else
+               *myast &= ~AST_CHUD_ALL;
+
+       /*
+        * Is there a hook?
+        */
+       if (perfTrapHook) {
+               if (perfTrapHook(type, NULL, 0, 0) == KERN_SUCCESS) {
+                       /*
+                        * If it succeeds, we are done...
+                        */
+                       return;
+               }
+       }
+       /*
+        * we come here with interrupts off as we don't want to recurse
+        * on preemption below.  but we do want to re-enable interrupts
+        * as soon we possibly can to hold latency down
+        */
+       if (T_PREEMPT == type) {
+
+               KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_EXCP_KTRAP_x86, type)) | DBG_FUNC_NONE,
+                                     0, 0, 0, kern_ip, 0);
+
+               ast_taken(AST_PREEMPTION, FALSE);
+               return;
+       }
+       
+       if (T_PAGE_FAULT == type) {
+               /*
+                * assume we're faulting in the kernel map
+                */
+               map = kernel_map;
+
+               if (thread != THREAD_NULL && thread->map != kernel_map) {
+                       vm_offset_t     copy_window_base;
+                       vm_offset_t     kvaddr;
+                       int             window_index;
+
+                       kvaddr = (vm_offset_t)vaddr;
+                       /*
+                        * must determine if fault occurred in
+                        * the copy window while pre-emption is
+                        * disabled for this processor so that
+                        * we only need to look at the window
+                        * associated with this processor
+                        */
+                       copy_window_base = current_cpu_datap()->cpu_copywindow_base;
+
+                       if (kvaddr >= copy_window_base && kvaddr < (copy_window_base + (NBPDE * NCOPY_WINDOWS)) ) {
+
+                               window_index = (kvaddr - copy_window_base) / NBPDE;
+
+                               if (thread->machine.copy_window[window_index].user_base != (user_addr_t)-1) {
+
+                                       kvaddr -= (copy_window_base + (NBPDE * window_index));
+                                       vaddr = thread->machine.copy_window[window_index].user_base + kvaddr;
+
+                                       map = thread->map;
+                                       fault_in_copy_window = window_index;
+                               }
+                               is_user = -1;
+                       }
+               }
+       }
+       KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_EXCP_KTRAP_x86, type)) | DBG_FUNC_NONE,
+                             (int)(vaddr >> 32), (int)vaddr, is_user, kern_ip, 0);
+
+
+       (void) ml_set_interrupts_enabled(intr);
+
        switch (type) {
-           case T_PREEMPT:
-               ast_taken(AST_PREEMPTION, FALSE);
-               return (TRUE);
 
            case T_NO_FPU:
                fpnoextflt();
-               return (TRUE);
+               return;
 
            case T_FPU_FAULT:
                fpextovrflt();
-               return (TRUE);
+               return;
 
            case T_FLOATING_POINT_ERROR:
                fpexterrflt();
-               return (TRUE);
+               return;
 
+           case T_SSE_FLOAT_ERROR:
+               fpSSEexterrflt();
+               return;
+           case T_DEBUG:
+                   if ((saved_state->efl & EFL_TF) == 0
+                   && !kdp_has_active_watchpoints) {
+                           /* We've somehow encountered a debug
+                            * register match that does not belong
+                            * to the kernel debugger.
+                            * This isn't supposed to happen.
+                            */
+                           reset_dr7();
+                           return;
+                           }
+                   goto debugger_entry;
            case T_PAGE_FAULT:
                /*
                 * If the current map is a submap of the kernel map,
@@ -272,144 +453,116 @@ kernel_trap(
                 * (vm_map_lookup), we may deadlock on the kernel map
                 * lock.
                 */
-#if    MACH_KDB
-               mp_disable_preemption();
-               if (db_active
-                   && kdb_active[cpu_number()]
-                   && !let_ddb_vm_fault) {
-                       /*
-                        * Force kdb to handle this one.
-                        */
-                       mp_enable_preemption();
-                       return (FALSE);
-               }
-               mp_enable_preemption();
-#endif /* MACH_KDB */
-               subcode = regs->cr2;    /* get faulting address */
 
-               if (subcode > LINEAR_KERNEL_ADDRESS) {
-                   map = kernel_map;
-               } else if (thread == THREAD_NULL)
-                   map = kernel_map;
-               else {
-                   map = thread->map;
-               }
+               prot = VM_PROT_READ;
+
+               if (code & T_PF_WRITE)
+                       prot |= VM_PROT_WRITE;
+#if     PAE
+               if (code & T_PF_EXECUTE)
+                       prot |= VM_PROT_EXECUTE;
+#endif
+
 #if    MACH_KDB
                /*
                 * Check for watchpoint on kernel static data.
                 * vm_fault would fail in this case 
                 */
-               if (map == kernel_map && 
-                   db_watchpoint_list &&
-                   db_watchpoints_inserted &&
-                   (code & T_PF_WRITE) &&
-                   (vm_offset_t)subcode < vm_last_phys &&
-                   ((*(pte = pmap_pte(kernel_pmap, (vm_offset_t)subcode))) &
-                    INTEL_PTE_WRITE) == 0) {
-                 *pte = *pte | INTEL_PTE_VALID | INTEL_PTE_WRITE; /* XXX need invltlb here? */
+               if (map == kernel_map && db_watchpoint_list && db_watchpoints_inserted &&
+                   (code & T_PF_WRITE) && vaddr < vm_map_max(map) &&
+                   ((*(pte = pmap_pte(kernel_pmap, (vm_map_offset_t)vaddr))) & INTEL_PTE_WRITE) == 0) {
+                       pmap_store_pte(
+                               pte,
+                               *pte | INTEL_PTE_VALID | INTEL_PTE_WRITE);
+                       /* XXX need invltlb here? */
+
                        result = KERN_SUCCESS;
-               } else
-#endif /* MACH_KDB */
-               {
-                       /*
-                        * Since the 386 ignores write protection in
-                        * kernel mode, always try for write permission
-                        * first.  If that fails and the fault was a
-                        * read fault, retry with read permission.
-                        */
-                       if (map == kernel_map) {
-                               register struct recovery *rp;
-
-                               interruptible = THREAD_UNINT;
-                               for (rp = recover_table; rp < recover_table_end; rp++) {
-                                       if (regs->eip == rp->fault_addr) {
-                                               interruptible = THREAD_ABORTSAFE;
-                                               break;
-                                       }
-                               }
-                       }
-                       result = vm_fault(map,
-                                         trunc_page((vm_offset_t)subcode),
-                                         VM_PROT_READ|VM_PROT_WRITE,
-                                         FALSE, 
-                                         (map == kernel_map) ? interruptible : THREAD_ABORTSAFE, NULL, 0);
+                       goto look_for_watchpoints;
                }
+#endif /* MACH_KDB */
+
+               result = vm_fault(map,
+                                 vm_map_trunc_page(vaddr),
+                                 prot,
+                                 FALSE, 
+                                 THREAD_UNINT, NULL, 0);
+
 #if    MACH_KDB
                if (result == KERN_SUCCESS) {
-                   /* Look for watchpoints */
-                   if (db_watchpoint_list &&
-                       db_watchpoints_inserted &&
-                       (code & T_PF_WRITE) &&
-                       db_find_watchpoint(map,
-                               (vm_offset_t)subcode, regs))
-                       kdb_trap(T_WATCHPOINT, 0, regs);
+                       /*
+                        * Look for watchpoints
+                        */
+look_for_watchpoints:
+                       if (map == kernel_map && db_watchpoint_list && db_watchpoints_inserted && (code & T_PF_WRITE) &&
+                           db_find_watchpoint(map, vaddr, saved_state))
+                               kdb_trap(T_WATCHPOINT, 0, saved_state);
                }
-               else
 #endif /* MACH_KDB */
-               if ((code & T_PF_WRITE) == 0 &&
-                   result == KERN_PROTECTION_FAILURE)
-               {
-                   /*
-                    *  Must expand vm_fault by hand,
-                    *  so that we can ask for read-only access
-                    *  but enter a (kernel)writable mapping.
-                    */
-                   result = intel_read_fault(map,
-                                         trunc_page((vm_offset_t)subcode));
-               }
 
                if (result == KERN_SUCCESS) {
-                   /*
-                    * Certain faults require that we back up
-                    * the EIP.
-                    */
-                   register struct recovery *rp;
-
-                   for (rp = retry_table; rp < retry_table_end; rp++) {
-                       if (regs->eip == rp->fault_addr) {
-                           regs->eip = rp->recover_addr;
-                           break;
+
+                       if (fault_in_copy_window != -1) {
+                               pt_entry_t      *updp;
+                               pt_entry_t      *kpdp;
+
+                               /*
+                                * in case there was no page table assigned
+                                * for the user base address and the pmap
+                                * got 'expanded' due to this fault, we'll
+                                * copy in the descriptor 
+                                *
+                                * we're either setting the page table descriptor
+                                * to the same value or it was 0... no need
+                                * for a TLB flush in either case
+                                */
+
+                               ml_set_interrupts_enabled(FALSE);
+                               updp = pmap_pde(map->pmap, thread->machine.copy_window[fault_in_copy_window].user_base);
+                               assert(updp);
+                               if (0 == updp) panic("trap: updp 0"); /* XXX DEBUG */
+                               kpdp = current_cpu_datap()->cpu_copywindow_pdp;
+                               kpdp += fault_in_copy_window;
+
+#if JOE_DEBUG
+                               if (*kpdp && (*kpdp & PG_FRAME) != (*updp & PG_FRAME))
+                                       panic("kernel_fault: user pdp doesn't match - updp = 0x%x, kpdp = 0x%x\n", updp, kpdp);
+#endif
+                               pmap_store_pte(kpdp, *updp);
+
+                               (void) ml_set_interrupts_enabled(intr);
                        }
-                   }
-                   return (TRUE);
+                       return;
                }
-
-               /* fall through */
+               /*
+                * fall through
+                */
 
            case T_GENERAL_PROTECTION:
-
                /*
                 * If there is a failure recovery address
                 * for this fault, go there.
                 */
-               {
-                   register struct recovery *rp;
-
-                   for (rp = recover_table;
-                        rp < recover_table_end;
-                        rp++) {
-                       if (regs->eip == rp->fault_addr) {
-                           regs->eip = rp->recover_addr;
-                           return (TRUE);
+               for (rp = recover_table; rp < recover_table_end; rp++) {
+                       if (kern_ip == rp->fault_addr) {
+                               set_recovery_ip(saved_state, rp->recover_addr);
+                               return;
                        }
-                   }
                }
 
                /*
-                * Check thread recovery address also -
-                * v86 assist uses it.
+                * Check thread recovery address also.
                 */
                if (thread->recover) {
-                   regs->eip = thread->recover;
-                   thread->recover = 0;
-                   return (TRUE);
+                       set_recovery_ip(saved_state, thread->recover);
+                       thread->recover = 0;
+                       return;
                }
-
                /*
                 * Unanticipated page-fault errors in kernel
                 * should not happen.
+                *
+                * fall through...
                 */
-               /* fall through... */
 
            default:
                /*
@@ -418,71 +571,337 @@ kernel_trap(
                 */
                if (type == 15) {
                        kprintf("kernel_trap() ignoring spurious trap 15\n"); 
-                       return (TRUE);
+                       return;
                }
-
-               /*
-                * ...and return failure, so that locore can call into
-                * debugger.
+debugger_entry:
+               /* Ensure that the i386_kernel_state at the base of the
+                * current thread's stack (if any) is synchronized with the
+                * context at the moment of the trap, to facilitate
+                * access through the debugger.
                 */
+               sync_iss_to_iks(saved_state);
+#if MACH_KDB
+restart_debugger:
+#endif /* MACH_KDB */          
 #if  MACH_KDP
-               kdp_i386_trap(type, regs, result, regs->cr2);
-#endif
-               return (FALSE);
+                if (current_debugger != KDB_CUR_DB) {
+                       if (kdp_i386_trap(type, saved_state, result, vaddr))
+                               return;
+               }
+#endif /* MACH_KDP */
+#if MACH_KDB
+               else
+                       if (kdb_trap(type, code, saved_state)) {
+                               if (switch_debugger) {
+                                       current_debugger = KDP_CUR_DB;
+                                       switch_debugger = 0;
+                                       goto restart_debugger;
+                               }
+                               return;
+                       }
+#endif /* MACH_KDB */
        }
-       return (TRUE);
+
+       panic_trap(saved_state);
+       /*
+        * NO RETURN
+        */
+}
+
+
+static void
+set_recovery_ip(x86_saved_state32_t  *saved_state, vm_offset_t ip)
+{
+        saved_state->eip = ip;
+}
+
+
+static void
+panic_trap(x86_saved_state32_t *regs)
+{
+       const char *trapname = "Unknown";
+       uint32_t        cr0 = get_cr0();
+       uint32_t        cr2 = get_cr2();
+       uint32_t        cr3 = get_cr3();
+       uint32_t        cr4 = get_cr4();
+
+       if (panic_io_port)
+         (void)inb(panic_io_port);
+
+       kprintf("panic trap number 0x%x, eip 0x%x\n", regs->trapno, regs->eip);
+       kprintf("cr0 0x%08x cr2 0x%08x cr3 0x%08x cr4 0x%08x\n",
+               cr0, cr2, cr3, cr4);
+
+       if (regs->trapno < TRAP_TYPES)
+               trapname = trap_type[regs->trapno];
+
+       panic("Unresolved kernel trap (CPU %d, Type %d=%s), registers:\n"
+             "CR0: 0x%08x, CR2: 0x%08x, CR3: 0x%08x, CR4: 0x%08x\n"
+             "EAX: 0x%08x, EBX: 0x%08x, ECX: 0x%08x, EDX: 0x%08x\n"
+             "CR2: 0x%08x, EBP: 0x%08x, ESI: 0x%08x, EDI: 0x%08x\n"
+             "EFL: 0x%08x, EIP: 0x%08x, CS:  0x%08x, DS:  0x%08x\n",
+             cpu_number(), regs->trapno, trapname, cr0, cr2, cr3, cr4,
+             regs->eax,regs->ebx,regs->ecx,regs->edx,
+             regs->cr2,regs->ebp,regs->esi,regs->edi,
+             regs->efl,regs->eip,regs->cs, regs->ds);
+       /*
+        * This next statement is not executed,
+        * but it's needed to stop the compiler using tail call optimization
+        * for the panic call - which confuses the subsequent backtrace.
+        */
+       cr0 = 0;
 }
 
+extern void     kprintf_break_lock(void);
+
+
 /*
- * Called if both kernel_trap() and kdb_trap() fail.
+ * Called from locore on a special reserved stack after a double-fault
+ * is taken in kernel space.
+ * Kernel stack overflow is one route here.
  */
 void
-panic_trap(
-       register struct i386_saved_state        *regs)
+panic_double_fault(int code)
 {
-       int             code;
-       register int    type;
+       struct i386_tss *my_ktss = current_ktss();
+
+       /* Set postcode (DEBUG only) */
+       postcode(PANIC_DOUBLE_FAULT);
+
+/* Issue an I/O port read if one has been requested - this is an event logic
+ * analyzers can use as a trigger point.
+ */
+       if (panic_io_port)
+               (void)inb(panic_io_port);
+
+       /*
+        * Break kprintf lock in case of recursion,
+        * and record originally faulted instruction address.
+        */
+       kprintf_break_lock();
+
+#if MACH_KDP
+       /*
+        * Print backtrace leading to first fault:
+        */
+       panic_i386_backtrace((void *) my_ktss->ebp, 10);
+#endif
+
+       panic("Double fault (CPU:%d, thread:%p, code:0x%x),"
+             "registers:\n"
+             "CR0: 0x%08x, CR2: 0x%08x, CR3: 0x%08x, CR4: 0x%08x\n"
+             "EAX: 0x%08x, EBX: 0x%08x, ECX: 0x%08x, EDX: 0x%08x\n"
+             "ESP: 0x%08x, EBP: 0x%08x, ESI: 0x%08x, EDI: 0x%08x\n"
+             "EFL: 0x%08x, EIP: 0x%08x\n",
+             cpu_number(), current_thread(), code,
+             get_cr0(), get_cr2(), get_cr3(), get_cr4(),
+             my_ktss->eax, my_ktss->ebx, my_ktss->ecx, my_ktss->edx,
+             my_ktss->esp, my_ktss->ebp, my_ktss->esi, my_ktss->edi,
+             my_ktss->eflags, my_ktss->eip);
+}
+
 
-       type = regs->trapno;
-       code = regs->err;
+/*
+ * Called from locore on a special reserved stack after a machine-check
+ */
+void
+panic_machine_check(int code)
+{
+       struct i386_tss *my_ktss = current_ktss();
+
+       /* Set postcode (DEBUG only) */
+       postcode(PANIC_MACHINE_CHECK);
 
-       printf("trap type %d, code = %x, pc = %x\n",
-               type, code, regs->eip);
-       panic("trap");
+       /*
+        * Break kprintf lock in case of recursion,
+        * and record originally faulted instruction address.
+        */
+       kprintf_break_lock();
+       panic("Machine-check (CPU:%d, thread:%p, code:0x%x),"
+             "registers:\n"
+             "CR0: 0x%08x, CR2: 0x%08x, CR3: 0x%08x, CR4: 0x%08x\n"
+             "EAX: 0x%08x, EBX: 0x%08x, ECX: 0x%08x, EDX: 0x%08x\n"
+             "ESP: 0x%08x, EBP: 0x%08x, ESI: 0x%08x, EDI: 0x%08x\n"
+             "EFL: 0x%08x, EIP: 0x%08x\n",
+             cpu_number(), current_thread(), code,
+             get_cr0(), get_cr2(), get_cr3(), get_cr4(),
+             my_ktss->eax, my_ktss->ebx, my_ktss->ecx, my_ktss->edx,
+             my_ktss->esp, my_ktss->ebp, my_ktss->esi, my_ktss->edi,
+             my_ktss->eflags, my_ktss->eip);
 }
 
+void
+panic_double_fault64(x86_saved_state_t *esp)
+{
+       /* Set postcode (DEBUG only) */
+       postcode(PANIC_DOUBLE_FAULT);
+
+       /*
+        * Break kprintf lock in case of recursion,
+        * and record originally faulted instruction address.
+        */
+       kprintf_break_lock();
+
+       /*
+        * Dump the interrupt stack frame at last kernel entry.
+        */
+       if (is_saved_state64(esp)) {
+               x86_saved_state64_t     *ss64p = saved_state64(esp);
+               panic("Double fault (CPU:%d, thread:%p, trapno:0x%x, err:0x%qx),"
+                     "registers:\n"
+                     "CR0: 0x%08x, CR2: 0x%08x, CR3: 0x%08x, CR4: 0x%08x\n"
+                     "RAX: 0x%016qx, RBX: 0x%016qx, RCX: 0x%016qx, RDX: 0x%016qx\n"
+                     "RSP: 0x%016qx, RBP: 0x%016qx, RSI: 0x%016qx, RDI: 0x%016qx\n"
+                     "R8:  0x%016qx, R9:  0x%016qx, R10: 0x%016qx, R11: 0x%016qx\n"
+                     "R12: 0x%016qx, R13: 0x%016qx, R14: 0x%016qx, R15: 0x%016qx\n"
+                     "RFL: 0x%016qx, RIP: 0x%016qx\n",
+                     cpu_number(), current_thread(), ss64p->isf.trapno, ss64p->isf.err,
+                     get_cr0(), get_cr2(), get_cr3(), get_cr4(),
+                     ss64p->rax, ss64p->rbx, ss64p->rcx, ss64p->rdx,
+                     ss64p->isf.rsp, ss64p->rbp, ss64p->rsi, ss64p->rdi,
+                     ss64p->r8, ss64p->r9, ss64p->r10, ss64p->r11,
+                     ss64p->r12, ss64p->r13, ss64p->r14, ss64p->r15,
+                     ss64p->isf.rflags, ss64p->isf.rip);
+       } else {
+               x86_saved_state32_t     *ss32p = saved_state32(esp);
+               panic("Double fault (CPU:%d, thread:%p, trapno:0x%x, err:0x%x),"
+                     "registers:\n"
+                     "CR0: 0x%08x, CR2: 0x%08x, CR3: 0x%08x, CR4: 0x%08x\n"
+                     "EAX: 0x%08x, EBX: 0x%08x, ECX: 0x%08x, EDX: 0x%08x\n"
+                     "ESP: 0x%08x, EBP: 0x%08x, ESI: 0x%08x, EDI: 0x%08x\n"
+                     "EFL: 0x%08x, EIP: 0x%08x\n",
+                     cpu_number(), current_thread(), ss32p->trapno, ss32p->err,
+                     get_cr0(), get_cr2(), get_cr3(), get_cr4(),
+                     ss32p->eax, ss32p->ebx, ss32p->ecx, ss32p->edx,
+                     ss32p->uesp, ss32p->ebp, ss32p->esi, ss32p->edi,
+                     ss32p->efl, ss32p->eip);
+       }
+}
+
+/*
+ * Simplistic machine check handler.
+ * We could peruse all those MSRs but we only dump register state as we do for
+ * the double fault exception.
+ * Note: the machine check registers are non-volatile across warm boot - so
+ * they'll be around when we return.
+ */
+void
+panic_machine_check64(x86_saved_state_t *esp)
+{
+       /* Set postcode (DEBUG only) */
+       postcode(PANIC_MACHINE_CHECK);
+
+       /*
+        * Break kprintf lock in case of recursion,
+        * and record originally faulted instruction address.
+        */
+       kprintf_break_lock();
+
+       /*
+        * Dump the interrupt stack frame at last kernel entry.
+        */
+       if (is_saved_state64(esp)) {
+               x86_saved_state64_t     *ss64p = saved_state64(esp);
+               panic("Machine Check (CPU:%d, thread:%p, trapno:0x%x, err:0x%qx),"
+                     "registers:\n"
+                     "CR0: 0x%08x, CR2: 0x%08x, CR3: 0x%08x, CR4: 0x%08x\n"
+                     "RAX: 0x%016qx, RBX: 0x%016qx, RCX: 0x%016qx, RDX: 0x%016qx\n"
+                     "RSP: 0x%016qx, RBP: 0x%016qx, RSI: 0x%016qx, RDI: 0x%016qx\n"
+                     "R8:  0x%016qx, R9:  0x%016qx, R10: 0x%016qx, R11: 0x%016qx\n"
+                     "R12: 0x%016qx, R13: 0x%016qx, R14: 0x%016qx, R15: 0x%016qx\n"
+                     "RFL: 0x%016qx, RIP: 0x%016qx\n",
+                     cpu_number(), current_thread(), ss64p->isf.trapno, ss64p->isf.err,
+                     get_cr0(), get_cr2(), get_cr3(), get_cr4(),
+                     ss64p->rax, ss64p->rbx, ss64p->rcx, ss64p->rdx,
+                     ss64p->isf.rsp, ss64p->rbp, ss64p->rsi, ss64p->rdi,
+                     ss64p->r8, ss64p->r9, ss64p->r10, ss64p->r11,
+                     ss64p->r12, ss64p->r13, ss64p->r14, ss64p->r15,
+                     ss64p->isf.rflags, ss64p->isf.rip);
+       } else {
+               x86_saved_state32_t     *ss32p = saved_state32(esp);
+               panic("Machine Check (CPU:%d, thread:%p, trapno:0x%x, err:0x%x),"
+                     "registers:\n"
+                     "CR0: 0x%08x, CR2: 0x%08x, CR3: 0x%08x, CR4: 0x%08x\n"
+                     "EAX: 0x%08x, EBX: 0x%08x, ECX: 0x%08x, EDX: 0x%08x\n"
+                     "ESP: 0x%08x, EBP: 0x%08x, ESI: 0x%08x, EDI: 0x%08x\n"
+                     "EFL: 0x%08x, EIP: 0x%08x\n",
+                     cpu_number(), current_thread(), ss32p->trapno, ss32p->err,
+                     get_cr0(), get_cr2(), get_cr3(), get_cr4(),
+                     ss32p->eax, ss32p->ebx, ss32p->ecx, ss32p->edx,
+                     ss32p->uesp, ss32p->ebp, ss32p->esi, ss32p->edi,
+                     ss32p->efl, ss32p->eip);
+       }
+}
 
 /*
  *     Trap from user mode.
  */
 void
 user_trap(
-       register struct i386_saved_state        *regs)
+       x86_saved_state_t *saved_state)
 {
        int             exc;
        int             code;
+       int             err;
        unsigned int    subcode;
-       register int    type;
-       vm_map_t        map;
+       int             type;
+       user_addr_t     vaddr;
        vm_prot_t       prot;
-       kern_return_t   result;
        thread_t        thread = current_thread();
-       boolean_t       kernel_act = FALSE;
-
-       if (regs->efl & EFL_VM) {
-           /*
-            * If hardware assist can handle exception,
-            * continue execution.
-            */
-           if (v86_assist(thread, regs))
-               return;
+       ast_t           *myast;
+       boolean_t       intr;
+       kern_return_t   kret;
+       user_addr_t     rip;
+
+       assert((is_saved_state32(saved_state) && !thread_is_64bit(thread)) ||
+              (is_saved_state64(saved_state) &&  thread_is_64bit(thread)));
+
+       if (is_saved_state64(saved_state)) {
+               x86_saved_state64_t     *regs;
+
+               regs = saved_state64(saved_state);
+
+               type = regs->isf.trapno;
+               err  = regs->isf.err & 0xffff;
+               vaddr = (user_addr_t)regs->cr2;
+               rip   = (user_addr_t)regs->isf.rip;
+       } else {
+               x86_saved_state32_t     *regs;
+
+               regs = saved_state32(saved_state);
+
+               type  = regs->trapno;
+               err   = regs->err & 0xffff;
+               vaddr = (user_addr_t)regs->cr2;
+               rip   = (user_addr_t)regs->eip;
        }
 
-       type = regs->trapno;
+       KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_EXCP_UTRAP_x86, type)) | DBG_FUNC_NONE,
+                             (int)(vaddr>>32), (int)vaddr, (int)(rip>>32), (int)rip, 0);
+
        code = 0;
        subcode = 0;
        exc = 0;
 
+#if DEBUG_TRACE
+       kprintf("user_trap(0x%08x) type=%d vaddr=0x%016llx\n",
+               saved_state, type, vaddr);
+#endif
+       myast = ast_pending();
+       if (perfASTHook) {
+               if (*myast & AST_CHUD_ALL) {
+                       perfASTHook(type, saved_state, 0, 0);
+               }
+       } else {
+               *myast &= ~AST_CHUD_ALL;
+       }
+
+       /* Is there a hook? */
+       if (perfTrapHook) {
+               if (perfTrapHook(type, saved_state, 0, 0) == KERN_SUCCESS)
+                       return; /* If it succeeds, we are done... */
+       }
+
        switch (type) {
 
            case T_DIVIDE_ERROR:
@@ -491,10 +910,37 @@ user_trap(
                break;
 
            case T_DEBUG:
-               exc = EXC_BREAKPOINT;
-               code = EXC_I386_SGL;
-               break;
-
+               {
+                       pcb_t   pcb;
+                       unsigned int clear = 0;
+                       /*
+                        * get dr6 and set it in the thread's pcb before
+                        * returning to userland
+                        */
+                       pcb = thread->machine.pcb;
+                       if (pcb->ids) {
+                               /*
+                                * We can get and set the status register
+                                * in 32-bit mode even on a 64-bit thread
+                                * because the high order bits are not
+                                * used on x86_64
+                                */
+                               if (thread_is_64bit(thread)) {
+                                       uint32_t dr6;
+                                       x86_debug_state64_t *ids = pcb->ids;
+                                       dr6 = (uint32_t)ids->dr6;
+                                       __asm__ volatile ("movl %%db6, %0" : "=r" (dr6));
+                                       ids->dr6 = dr6;
+                               } else { /* 32 bit thread */
+                                       x86_debug_state32_t *ids = pcb->ids;
+                                       __asm__ volatile ("movl %%db6, %0" : "=r" (ids->dr6));
+                               }
+                               __asm__ volatile ("movl %0, %%db6" : : "r" (clear));
+                       }
+                       exc = EXC_BREAKPOINT;
+                       code = EXC_I386_SGL;
+                       break;
+               }
            case T_INT3:
                exc = EXC_BREAKPOINT;
                code = EXC_I386_BPT;
@@ -527,67 +973,50 @@ user_trap(
            case 10:            /* invalid TSS == iret with NT flag set */
                exc = EXC_BAD_INSTRUCTION;
                code = EXC_I386_INVTSSFLT;
-               subcode = regs->err & 0xffff;
+               subcode = err;
                break;
 
            case T_SEGMENT_NOT_PRESENT:
                exc = EXC_BAD_INSTRUCTION;
                code = EXC_I386_SEGNPFLT;
-               subcode = regs->err & 0xffff;
+               subcode = err;
                break;
 
            case T_STACK_FAULT:
                exc = EXC_BAD_INSTRUCTION;
                code = EXC_I386_STKFLT;
-               subcode = regs->err & 0xffff;
+               subcode = err;
                break;
 
            case T_GENERAL_PROTECTION:
-               if (!(regs->efl & EFL_VM)) {
-                   if (check_io_fault(regs))
-                       return;
-               }
                exc = EXC_BAD_INSTRUCTION;
                code = EXC_I386_GPFLT;
-               subcode = regs->err & 0xffff;
+               subcode = err;
                break;
 
            case T_PAGE_FAULT:
-               subcode = regs->cr2;
-               prot = VM_PROT_READ|VM_PROT_WRITE;
-               if (kernel_act == FALSE) {
-                       if (!(regs->err & T_PF_WRITE))
-                               prot = VM_PROT_READ;
-                       (void) user_page_fault_continue(vm_fault(thread->map,
-                               trunc_page((vm_offset_t)subcode),
-                               prot,
-                               FALSE,
-                               THREAD_ABORTSAFE, NULL, 0));
-                       /* NOTREACHED */
-               }
-               else {
-                       if (subcode > LINEAR_KERNEL_ADDRESS) {
-                               map = kernel_map;
-                       }
-                       result = vm_fault(thread->map,
-                               trunc_page((vm_offset_t)subcode),
-                               prot,
-                               FALSE,
-                               (map == kernel_map) ? THREAD_UNINT : THREAD_ABORTSAFE, NULL, 0);
-                       if ((result != KERN_SUCCESS) && (result != KERN_ABORTED)) {
-                               /*
-                                * Must expand vm_fault by hand,
-                                * so that we can ask for read-only access
-                                * but enter a (kernel) writable mapping.
-                                */
-                               result = intel_read_fault(thread->map,
-                                       trunc_page((vm_offset_t)subcode));
-                       }
-                       user_page_fault_continue(result);
-                       /*NOTREACHED*/
-               }
+               prot = VM_PROT_READ;
+
+               if (err & T_PF_WRITE)
+                       prot |= VM_PROT_WRITE;
+#if     PAE
+               if (err & T_PF_EXECUTE)
+                       prot |= VM_PROT_EXECUTE;
+#endif
+               kret = vm_fault(thread->map, vm_map_trunc_page(vaddr),
+                                prot, FALSE,
+                                THREAD_ABORTSAFE, NULL, 0);
+
+               user_page_fault_continue(kret);
+       
+               /* NOTREACHED */
                break;
 
+           case T_SSE_FLOAT_ERROR:
+               fpSSEexterrflt();
+               return;
+
+
            case T_FLOATING_POINT_ERROR:
                fpexterrflt();
                return;
@@ -598,427 +1027,25 @@ user_trap(
                return;
 #endif  /* MACH_KGDB */
 #if    MACH_KDB
-               if (kdb_trap(type, regs->err, regs))
+               if (kdb_trap(type, err, saved_state))
                    return;
 #endif /* MACH_KDB */
-               printf("user trap type %d, code = %x, pc = %x\n",
-                      type, regs->err, regs->eip);
                panic("user trap");
                return;
        }
-
-#if    MACH_KDB
-       if (debug_all_traps_with_kdb &&
-           kdb_trap(type, regs->err, regs))
-               return;
-#endif /* MACH_KDB */
+       intr = ml_set_interrupts_enabled(FALSE);
+       myast = ast_pending();
+       while (*myast & AST_ALL) {
+               ast_taken(AST_ALL, intr);
+               ml_set_interrupts_enabled(FALSE);
+               myast = ast_pending();
+       }
+       ml_set_interrupts_enabled(intr);
 
        i386_exception(exc, code, subcode);
        /*NOTREACHED*/
 }
 
-/*
- *     V86 mode assist for interrupt handling.
- */
-boolean_t v86_assist_on = TRUE;
-boolean_t v86_unsafe_ok = FALSE;
-boolean_t v86_do_sti_cli = TRUE;
-boolean_t v86_do_sti_immediate = FALSE;
-
-#define        V86_IRET_PENDING 0x4000
-
-int cli_count = 0;
-int sti_count = 0;
-
-boolean_t
-v86_assist(
-       thread_t                                thread,
-       register struct i386_saved_state        *regs)
-{
-       register struct v86_assist_state *v86 = &thread->machine.pcb->ims.v86s;
-
-/*
- * Build an 8086 address.  Use only when off is known to be 16 bits.
- */
-#define        Addr8086(seg,off)       ((((seg) & 0xffff) << 4) + (off))
-
-#define        EFL_V86_SAFE            (  EFL_OF | EFL_DF | EFL_TF \
-                                | EFL_SF | EFL_ZF | EFL_AF \
-                                | EFL_PF | EFL_CF )
-       struct iret_32 {
-               int             eip;
-               int             cs;
-               int             eflags;
-       };
-       struct iret_16 {
-               unsigned short  ip;
-               unsigned short  cs;
-               unsigned short  flags;
-       };
-       union iret_struct {
-               struct iret_32  iret_32;
-               struct iret_16  iret_16;
-       };
-
-       struct int_vec {
-               unsigned short  ip;
-               unsigned short  cs;
-       };
-
-       if (!v86_assist_on)
-           return FALSE;
-
-       /*
-        * If delayed STI pending, enable interrupts.
-        * Turn off tracing if on only to delay STI.
-        */
-       if (v86->flags & V86_IF_PENDING) {
-           v86->flags &= ~V86_IF_PENDING;
-           v86->flags |=  EFL_IF;
-           if ((v86->flags & EFL_TF) == 0)
-               regs->efl &= ~EFL_TF;
-       }
-
-       if (regs->trapno == T_DEBUG) {
-
-           if (v86->flags & EFL_TF) {
-               /*
-                * Trace flag was also set - it has priority
-                */
-               return FALSE;                   /* handle as single-step */
-           }
-           /*
-            * Fall through to check for interrupts.
-            */
-       }
-       else if (regs->trapno == T_GENERAL_PROTECTION) {
-           /*
-            * General protection error - must be an 8086 instruction
-            * to emulate.
-            */
-           register int        eip;
-           boolean_t   addr_32 = FALSE;
-           boolean_t   data_32 = FALSE;
-           int         io_port;
-
-           /*
-            * Set up error handler for bad instruction/data
-            * fetches.
-            */
-           __asm__("movl $(addr_error), %0" : : "m" (thread->recover));
-
-           eip = regs->eip;
-           while (TRUE) {
-               unsigned char   opcode;
-
-               if (eip > 0xFFFF) {
-                   thread->recover = 0;
-                   return FALSE;       /* GP fault: IP out of range */
-               }
-
-               opcode = *(unsigned char *)Addr8086(regs->cs,eip);
-               eip++;
-               switch (opcode) {
-                   case 0xf0:          /* lock */
-                   case 0xf2:          /* repne */
-                   case 0xf3:          /* repe */
-                   case 0x2e:          /* cs */
-                   case 0x36:          /* ss */
-                   case 0x3e:          /* ds */
-                   case 0x26:          /* es */
-                   case 0x64:          /* fs */
-                   case 0x65:          /* gs */
-                       /* ignore prefix */
-                       continue;
-
-                   case 0x66:          /* data size */
-                       data_32 = TRUE;
-                       continue;
-
-                   case 0x67:          /* address size */
-                       addr_32 = TRUE;
-                       continue;
-
-                   case 0xe4:          /* inb imm */
-                   case 0xe5:          /* inw imm */
-                   case 0xe6:          /* outb imm */
-                   case 0xe7:          /* outw imm */
-                       io_port = *(unsigned char *)Addr8086(regs->cs, eip);
-                       eip++;
-                       goto do_in_out;
-
-                   case 0xec:          /* inb dx */
-                   case 0xed:          /* inw dx */
-                   case 0xee:          /* outb dx */
-                   case 0xef:          /* outw dx */
-                   case 0x6c:          /* insb */
-                   case 0x6d:          /* insw */
-                   case 0x6e:          /* outsb */
-                   case 0x6f:          /* outsw */
-                       io_port = regs->edx & 0xffff;
-
-                   do_in_out:
-                       if (!data_32)
-                           opcode |= 0x6600;   /* word IO */
-
-                       switch (emulate_io(regs, opcode, io_port)) {
-                           case EM_IO_DONE:
-                               /* instruction executed */
-                               break;
-                           case EM_IO_RETRY:
-                               /* port mapped, retry instruction */
-                               thread->recover = 0;
-                               return TRUE;
-                           case EM_IO_ERROR:
-                               /* port not mapped */
-                               thread->recover = 0;
-                               return FALSE;
-                       }
-                       break;
-
-                   case 0xfa:          /* cli */
-                       if (!v86_do_sti_cli) {
-                           thread->recover = 0;
-                           return (FALSE);
-                       }
-
-                       v86->flags &= ~EFL_IF;
-                                       /* disable simulated interrupts */
-                       cli_count++;
-                       break;
-
-                   case 0xfb:          /* sti */
-                       if (!v86_do_sti_cli) {
-                           thread->recover = 0;
-                           return (FALSE);
-                       }
-
-                       if ((v86->flags & EFL_IF) == 0) {
-                           if (v86_do_sti_immediate) {
-                                   v86->flags |= EFL_IF;
-                           } else {
-                                   v86->flags |= V86_IF_PENDING;
-                                   regs->efl |= EFL_TF;
-                           }
-                                       /* single step to set IF next inst. */
-                       }
-                       sti_count++;
-                       break;
-
-                   case 0x9c:          /* pushf */
-                   {
-                       int             flags;
-                       vm_offset_t     sp;
-                       unsigned int    size;
-
-                       flags = regs->efl;
-                       if ((v86->flags & EFL_IF) == 0)
-                           flags &= ~EFL_IF;
-
-                       if ((v86->flags & EFL_TF) == 0)
-                           flags &= ~EFL_TF;
-                       else flags |= EFL_TF;
-
-                       sp = regs->uesp;
-                       if (!addr_32)
-                           sp &= 0xffff;
-                       else if (sp > 0xffff)
-                           goto stack_error;
-                       size = (data_32) ? 4 : 2;
-                       if (sp < size)
-                           goto stack_error;
-                       sp -= size;
-                       if (copyout((char *)&flags,
-                                   (user_addr_t)Addr8086(regs->ss,sp),
-                                   size))
-                           goto addr_error;
-                       if (addr_32)
-                           regs->uesp = sp;
-                       else
-                           regs->uesp = (regs->uesp & 0xffff0000) | sp;
-                       break;
-                   }
-
-                   case 0x9d:          /* popf */
-                   {
-                       vm_offset_t sp;
-                       int     nflags;
-
-                       sp = regs->uesp;
-                       if (!addr_32)
-                           sp &= 0xffff;
-                       else if (sp > 0xffff)
-                           goto stack_error;
-
-                       if (data_32) {
-                           if (sp > 0xffff - sizeof(int))
-                               goto stack_error;
-                           nflags = *(int *)Addr8086(regs->ss,sp);
-                           sp += sizeof(int);
-                       }
-                       else {
-                           if (sp > 0xffff - sizeof(short))
-                               goto stack_error;
-                           nflags = *(unsigned short *)
-                                       Addr8086(regs->ss,sp);
-                           sp += sizeof(short);
-                       }
-                       if (addr_32)
-                           regs->uesp = sp;
-                       else
-                           regs->uesp = (regs->uesp & 0xffff0000) | sp;
-
-                       if (v86->flags & V86_IRET_PENDING) {
-                               v86->flags = nflags & (EFL_TF | EFL_IF);
-                               v86->flags |= V86_IRET_PENDING;
-                       } else {
-                               v86->flags = nflags & (EFL_TF | EFL_IF);
-                       }
-                       regs->efl = (regs->efl & ~EFL_V86_SAFE)
-                                    | (nflags & EFL_V86_SAFE);
-                       break;
-                   }
-                   case 0xcf:          /* iret */
-                   {
-                       vm_offset_t sp;
-                       int     nflags;
-                       union iret_struct iret_struct;
-
-                       v86->flags &= ~V86_IRET_PENDING;
-                       sp = regs->uesp;
-                       if (!addr_32)
-                           sp &= 0xffff;
-                       else if (sp > 0xffff)
-                           goto stack_error;
-
-                       if (data_32) {
-                           if (sp > 0xffff - sizeof(struct iret_32))
-                               goto stack_error;
-                           iret_struct.iret_32 =
-                               *(struct iret_32 *) Addr8086(regs->ss,sp);
-                           sp += sizeof(struct iret_32);
-                       }
-                       else {
-                           if (sp > 0xffff - sizeof(struct iret_16))
-                               goto stack_error;
-                           iret_struct.iret_16 =
-                               *(struct iret_16 *) Addr8086(regs->ss,sp);
-                           sp += sizeof(struct iret_16);
-                       }
-                       if (addr_32)
-                           regs->uesp = sp;
-                       else
-                           regs->uesp = (regs->uesp & 0xffff0000) | sp;
-
-                       if (data_32) {
-                           eip       = iret_struct.iret_32.eip;
-                           regs->cs  = iret_struct.iret_32.cs & 0xffff;
-                           nflags    = iret_struct.iret_32.eflags;
-                       }
-                       else {
-                           eip       = iret_struct.iret_16.ip;
-                           regs->cs  = iret_struct.iret_16.cs;
-                           nflags    = iret_struct.iret_16.flags;
-                       }
-
-                       v86->flags = nflags & (EFL_TF | EFL_IF);
-                       regs->efl = (regs->efl & ~EFL_V86_SAFE)
-                                    | (nflags & EFL_V86_SAFE);
-                       break;
-                   }
-                   default:
-                       /*
-                        * Instruction not emulated here.
-                        */
-                       thread->recover = 0;
-                       return FALSE;
-               }
-               break;  /* exit from 'while TRUE' */
-           }
-           regs->eip = (regs->eip & 0xffff0000) | eip;
-       }
-       else {
-           /*
-            * Not a trap we handle.
-            */
-           thread->recover = 0;
-           return FALSE;
-       }
-
-       if ((v86->flags & EFL_IF) && ((v86->flags & V86_IRET_PENDING)==0)) {
-
-           struct v86_interrupt_table *int_table;
-           int int_count;
-           int vec;
-           int i;
-
-           int_table = (struct v86_interrupt_table *) v86->int_table;
-           int_count = v86->int_count;
-
-           vec = 0;
-           for (i = 0; i < int_count; int_table++, i++) {
-               if (!int_table->mask && int_table->count > 0) {
-                   int_table->count--;
-                   vec = int_table->vec;
-                   break;
-               }
-           }
-           if (vec != 0) {
-               /*
-                * Take this interrupt
-                */
-               vm_offset_t     sp;
-               struct iret_16 iret_16;
-               struct int_vec int_vec;
-
-               sp = regs->uesp & 0xffff;
-               if (sp < sizeof(struct iret_16))
-                   goto stack_error;
-               sp -= sizeof(struct iret_16);
-               iret_16.ip = regs->eip;
-               iret_16.cs = regs->cs;
-               iret_16.flags = regs->efl & 0xFFFF;
-               if ((v86->flags & EFL_TF) == 0)
-                   iret_16.flags &= ~EFL_TF;
-               else iret_16.flags |= EFL_TF;
-
-               (void) memcpy((char *) &int_vec, 
-                             (char *) (sizeof(struct int_vec) * vec),
-                             sizeof (struct int_vec));
-               if (copyout((char *)&iret_16,
-                           (user_addr_t)Addr8086(regs->ss,sp),
-                           sizeof(struct iret_16)))
-                   goto addr_error;
-               regs->uesp = (regs->uesp & 0xFFFF0000) | (sp & 0xffff);
-               regs->eip = int_vec.ip;
-               regs->cs  = int_vec.cs;
-               regs->efl  &= ~EFL_TF;
-               v86->flags &= ~(EFL_IF | EFL_TF);
-               v86->flags |= V86_IRET_PENDING;
-           }
-       }
-
-       thread->recover = 0;
-       return TRUE;
-
-       /*
-        *      On address error, report a page fault.
-        *      XXX report GP fault - we don`t save
-        *      the faulting address.
-        */
-    addr_error:
-       __asm__("addr_error:;");
-       thread->recover = 0;
-       return FALSE;
-
-       /*
-        *      On stack address error, return stack fault (12).
-        */
-    stack_error:
-       thread->recover = 0;
-       regs->trapno = T_STACK_FAULT;
-       return FALSE;
-}
 
 /*
  * Handle AST traps for i386.
@@ -1031,45 +1058,17 @@ extern void     log_thread_action (thread_t, char *);
 void
 i386_astintr(int preemption)
 {
-       ast_t           *my_ast, mask = AST_ALL;
+       ast_t           mask = AST_ALL;
        spl_t           s;
 
-       s = splsched();         /* block interrupts to check reasons */
-       mp_disable_preemption();
-       my_ast = ast_pending();
-       if (*my_ast & AST_I386_FP) {
-           /*
-            * AST was for delayed floating-point exception -
-            * FP interrupt occurred while in kernel.
-            * Turn off this AST reason and handle the FPU error.
-            */
-
-           ast_off(AST_I386_FP);
-           mp_enable_preemption();
-           splx(s);
-
-           fpexterrflt();
-       }
-       else {
-           /*
-            * Not an FPU trap.  Handle the AST.
-            * Interrupts are still blocked.
-            */
-
-#if 1
-           if (preemption) {
-               mask = AST_PREEMPTION;
-               mp_enable_preemption();
-           } else {
-               mp_enable_preemption();
-           }
-#else
-       mp_enable_preemption();
-#endif
+       if (preemption)
+               mask = AST_PREEMPTION;
+
+       s = splsched();
 
        ast_taken(mask, s);
 
-       }
+       splx(s);
 }
 
 /*
@@ -1088,133 +1087,68 @@ i386_exception(
        int     code,
        int     subcode)
 {
-       spl_t                   s;
        exception_data_type_t   codes[EXCEPTION_CODE_MAX];
 
-       /*
-        * Turn off delayed FPU error handling.
-        */
-       s = splsched();
-       mp_disable_preemption();
-       ast_off(AST_I386_FP);
-       mp_enable_preemption();
-       splx(s);
-
        codes[0] = code;                /* new exception interface */
        codes[1] = subcode;
        exception_triage(exc, codes, 2);
        /*NOTREACHED*/
 }
 
-boolean_t
-check_io_fault(
-       struct i386_saved_state         *regs)
+
+void
+kernel_preempt_check(void)
 {
-       int             eip, opcode, io_port;
-       boolean_t       data_16 = FALSE;
+       ast_t           *myast;
+       boolean_t       intr;
 
        /*
-        * Get the instruction.
+        * disable interrupts to both prevent pre-emption
+        * and to keep the ast state from changing via
+        * an interrupt handler making something runnable
         */
-       eip = regs->eip;
-
-       for (;;) {
-           opcode = inst_fetch(eip, regs->cs);
-           eip++;
-           switch (opcode) {
-               case 0x66:      /* data-size prefix */
-                   data_16 = TRUE;
-                   continue;
-
-               case 0xf3:      /* rep prefix */
-               case 0x26:      /* es */
-               case 0x2e:      /* cs */
-               case 0x36:      /* ss */
-               case 0x3e:      /* ds */
-               case 0x64:      /* fs */
-               case 0x65:      /* gs */
-                   continue;
-
-               case 0xE4:      /* inb imm */
-               case 0xE5:      /* inl imm */
-               case 0xE6:      /* outb imm */
-               case 0xE7:      /* outl imm */
-                   /* port is immediate byte */
-                   io_port = inst_fetch(eip, regs->cs);
-                   eip++;
-                   break;
-
-               case 0xEC:      /* inb dx */
-               case 0xED:      /* inl dx */
-               case 0xEE:      /* outb dx */
-               case 0xEF:      /* outl dx */
-               case 0x6C:      /* insb */
-               case 0x6D:      /* insl */
-               case 0x6E:      /* outsb */
-               case 0x6F:      /* outsl */
-                   /* port is in DX register */
-                   io_port = regs->edx & 0xFFFF;
-                   break;
-
-               default:
-                   return FALSE;
-           }
-           break;
-       }
-
-       if (data_16)
-           opcode |= 0x6600;           /* word IO */
-
-       switch (emulate_io(regs, opcode, io_port)) {
-           case EM_IO_DONE:
-               /* instruction executed */
-               regs->eip = eip;
-               return TRUE;
+       intr = ml_set_interrupts_enabled(FALSE);
 
-           case EM_IO_RETRY:
-               /* port mapped, retry instruction */
-               return TRUE;
-
-           case EM_IO_ERROR:
-               /* port not mapped */
-               return FALSE;
-       }
-       return FALSE;
-}
+       myast = ast_pending();
 
-void
-kernel_preempt_check (void)
-{
-       ast_t           *myast;
+       if ((*myast & AST_URGENT) && intr == TRUE && get_interrupt_level() == 0) {
+               /*
+                * can handle interrupts and preemptions 
+                * at this point
+                */
+               ml_set_interrupts_enabled(intr);
 
-       mp_disable_preemption();
-       myast = ast_pending();
-        if ((*myast & AST_URGENT) &&
-           get_interrupt_level() == 1
-           ) {
-               mp_enable_preemption_no_check();
-                __asm__ volatile ("     int     $0xff");
-        } else {
-               mp_enable_preemption_no_check();
+               /*
+                * now cause the PRE-EMPTION trap
+                */
+               __asm__ volatile ("     int     $0xff");
+       } else {
+               /*
+                * if interrupts were already disabled or
+                * we're in an interrupt context, we can't
+                * preempt...  of course if AST_URGENT
+                * isn't set we also don't want to
+                */
+               ml_set_interrupts_enabled(intr);
        }
 }
 
 #if    MACH_KDB
 
-extern void    db_i386_state(struct i386_saved_state *regs);
+extern void    db_i386_state(x86_saved_state32_t *regs);
 
 #include <ddb/db_output.h>
 
 void 
 db_i386_state(
-       struct i386_saved_state *regs)
+       x86_saved_state32_t *regs)
 {
        db_printf("eip  %8x\n", regs->eip);
        db_printf("trap %8x\n", regs->trapno);
        db_printf("err  %8x\n", regs->err);
        db_printf("efl  %8x\n", regs->efl);
        db_printf("ebp  %8x\n", regs->ebp);
-       db_printf("esp  %8x\n", regs->esp);
+       db_printf("esp  %8x\n", regs->cr2);
        db_printf("uesp %8x\n", regs->uesp);
        db_printf("cs   %8x\n", regs->cs & 0xff);
        db_printf("ds   %8x\n", regs->ds & 0xff);
@@ -1231,3 +1165,85 @@ db_i386_state(
 }
 
 #endif /* MACH_KDB */
+
+/* Synchronize a thread's i386_kernel_state (if any) with the given
+ * i386_saved_state_t obtained from the trap/IPI handler; called in
+ * kernel_trap() prior to entering the debugger, and when receiving
+ * an "MP_KDP" IPI.
+ */
+  
+void
+sync_iss_to_iks(x86_saved_state32_t *saved_state)
+{
+       struct x86_kernel_state32 *iks;
+       vm_offset_t kstack;
+       boolean_t record_active_regs = FALSE;
+
+       if ((kstack = current_thread()->kernel_stack) != 0) {
+               x86_saved_state32_t     *regs;
+
+               regs = saved_state;
+
+               iks = STACK_IKS(kstack);
+
+                /*
+                 * Did we take the trap/interrupt in kernel mode?
+                 */
+               if (regs == USER_REGS32(current_thread()))
+                       record_active_regs = TRUE;
+               else {
+                       iks->k_ebx = regs->ebx;
+                       iks->k_esp = (int)regs;
+                       iks->k_ebp = regs->ebp;
+                       iks->k_edi = regs->edi;
+                       iks->k_esi = regs->esi;
+                       iks->k_eip = regs->eip;
+               }
+       }
+
+       if (record_active_regs == TRUE) {
+               /*
+                * Show the trap handler path
+                */
+               __asm__ volatile("movl %%ebx, %0" : "=m" (iks->k_ebx));
+               __asm__ volatile("movl %%esp, %0" : "=m" (iks->k_esp));
+               __asm__ volatile("movl %%ebp, %0" : "=m" (iks->k_ebp));
+               __asm__ volatile("movl %%edi, %0" : "=m" (iks->k_edi));
+               __asm__ volatile("movl %%esi, %0" : "=m" (iks->k_esi));
+               /*
+                * "Current" instruction pointer
+                */
+               __asm__ volatile("movl $1f, %0\n1:" : "=m" (iks->k_eip));
+       }
+}
+
+/*
+ * This is used by the NMI interrupt handler (from mp.c) to
+ * uncondtionally sync the trap handler context to the IKS
+ * irrespective of whether the NMI was fielded in kernel
+ * or user space.
+ */
+void
+sync_iss_to_iks_unconditionally(__unused x86_saved_state32_t *saved_state) {
+       struct x86_kernel_state32 *iks;
+       vm_offset_t kstack;
+       boolean_t record_active_regs = FALSE;
+
+       if ((kstack = current_thread()->kernel_stack) != 0) {
+
+               iks = STACK_IKS(kstack);
+                               /*
+                * Show the trap handler path
+                */
+               __asm__ volatile("movl %%ebx, %0" : "=m" (iks->k_ebx));
+               __asm__ volatile("movl %%esp, %0" : "=m" (iks->k_esp));
+               __asm__ volatile("movl %%ebp, %0" : "=m" (iks->k_ebp));
+               __asm__ volatile("movl %%edi, %0" : "=m" (iks->k_edi));
+               __asm__ volatile("movl %%esi, %0" : "=m" (iks->k_esi));
+               /*
+                * "Current" instruction pointer
+                */
+               __asm__ volatile("movl $1f, %0\n1:" : "=m" (iks->k_eip));
+
+       }
+}