X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/c0fea4742e91338fffdcf79f86a7c1d5e2b97eb1..8f6c56a50524aa785f7e596d52dddfb331e18961:/osfmk/i386/trap.c diff --git a/osfmk/i386/trap.c b/osfmk/i386/trap.c index bb3952249..75071bb3a 100644 --- a/osfmk/i386/trap.c +++ b/osfmk/i386/trap.c @@ -1,23 +1,29 @@ /* * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * - * @APPLE_LICENSE_HEADER_START@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. * - * @APPLE_LICENSE_HEADER_END@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* * @OSF_COPYRIGHT@ @@ -63,7 +69,6 @@ #include #include #include -#include /* inb() */ #include #include @@ -83,14 +88,17 @@ #include #include -#include - #if MACH_KGDB #include #endif /* MACH_KGDB */ +#include + +#if MACH_KGDB +#include +#endif /* MACH_KGDB */ + #if MACH_KDB -#include #include #include #include @@ -100,55 +108,31 @@ #include #include -#include -#include -#include -#include /* * Forward declarations */ -static void user_page_fault_continue(kern_return_t kret); -static void panic_trap(x86_saved_state32_t *saved_state); -static void set_recovery_ip(x86_saved_state32_t *saved_state, vm_offset_t ip); +extern void user_page_fault_continue( + kern_return_t kr); + +extern boolean_t v86_assist( + thread_t thread, + struct i386_saved_state *regs); + +extern boolean_t check_io_fault( + struct i386_saved_state *regs); -perfCallback perfTrapHook = NULL; /* Pointer to CHUD trap hook routine */ -perfCallback perfASTHook = NULL; /* Pointer to CHUD AST hook routine */ +extern int inst_fetch( + int eip, + int cs); void thread_syscall_return( kern_return_t ret) { - thread_t thr_act = current_thread(); - - if (thread_is_64bit(thr_act)) { - x86_saved_state64_t *regs; - - regs = USER_REGS64(thr_act); - - if (kdebug_enable && ((regs->rax & SYSCALL_CLASS_MASK) == (SYSCALL_CLASS_MACH << SYSCALL_CLASS_SHIFT))) { - /* Mach trap */ - KERNEL_DEBUG_CONSTANT( - MACHDBG_CODE(DBG_MACH_EXCP_SC, ((int) (regs->rax & SYSCALL_NUMBER_MASK))) - | DBG_FUNC_END, - ret, 0, 0, 0, 0); - } - regs->rax = ret; - - } else { - x86_saved_state32_t *regs; - - regs = USER_REGS32(thr_act); - - if (kdebug_enable && ((int) regs->eax < 0)) { - /* Mach trap */ - KERNEL_DEBUG_CONSTANT( - MACHDBG_CODE(DBG_MACH_EXCP_SC, -((int) regs->eax)) - | DBG_FUNC_END, - ret, 0, 0, 0, 0); - } - regs->eax = ret; - } + register thread_t thr_act = current_thread(); + register struct i386_saved_state *regs = USER_REGS(thr_act); + regs->eax = ret; thread_exception_return(); /*NOTREACHED*/ } @@ -163,71 +147,27 @@ extern boolean_t db_breakpoints_inserted; void thread_kdb_return(void) { - thread_t thr_act = current_thread(); - x86_saved_state_t *iss = USER_STATE(thr_act); + register thread_t thread = current_thread(); + register struct i386_saved_state *regs = USER_REGS(thread); - if (is_saved_state64(iss)) { - x86_saved_state64_t *regs; - - regs = saved_state64(iss); - - if (kdb_trap(regs->isf.trapno, (int)regs->isf.err, (void *)regs)) { - thread_exception_return(); - /*NOTREACHED*/ - } - - } else { - x86_saved_state32_t *regs; - - regs = saved_state32(iss); - - if (kdb_trap(regs->trapno, regs->err, (void *)regs)) { - thread_exception_return(); - /*NOTREACHED*/ - } + if (kdb_trap(regs->trapno, regs->err, regs)) { +#if MACH_LDEBUG + assert(thread->mutex_count == 0); +#endif /* MACH_LDEBUG */ + thread_exception_return(); + /*NOTREACHED*/ } } +boolean_t let_ddb_vm_fault = FALSE; #endif /* MACH_KDB */ void user_page_fault_continue( - kern_return_t kr) + kern_return_t kr) { - thread_t thread = current_thread(); - x86_saved_state_t *regs = USER_STATE(thread); - ast_t *myast; - boolean_t intr; - user_addr_t vaddr; -#if MACH_KDB - int err; - int trapno; -#endif - - assert((is_saved_state32(regs) && !thread_is_64bit(thread)) || - (is_saved_state64(regs) && thread_is_64bit(thread))); - - if (thread_is_64bit(thread)) { - x86_saved_state64_t *uregs; - - uregs = USER_REGS64(thread); - -#if MACH_KDB - trapno = uregs->isf.trapno; - err = uregs->isf.err; -#endif - vaddr = (user_addr_t)uregs->cr2; - } else { - x86_saved_state32_t *uregs; - - uregs = USER_REGS32(thread); - -#if MACH_KDB - trapno = uregs->trapno; - err = uregs->err; -#endif - vaddr = uregs->cr2; - } + register thread_t thread = current_thread(); + register struct i386_saved_state *regs = USER_REGS(thread); if ((kr == KERN_SUCCESS) || (kr == KERN_ABORTED)) { #if MACH_KDB @@ -236,34 +176,28 @@ user_page_fault_continue( } if (db_watchpoint_list && db_watchpoints_inserted && - (err & T_PF_WRITE) && + (regs->err & T_PF_WRITE) && db_find_watchpoint(thread->map, - (vm_offset_t)vaddr, + (vm_offset_t)regs->cr2, regs)) kdb_trap(T_WATCHPOINT, 0, regs); #endif /* MACH_KDB */ - intr = ml_set_interrupts_enabled(FALSE); - myast = ast_pending(); - while (*myast & AST_ALL) { - ast_taken(AST_ALL, intr); - ml_set_interrupts_enabled(FALSE); - myast = ast_pending(); - } - ml_set_interrupts_enabled(intr); - thread_exception_return(); /*NOTREACHED*/ } #if MACH_KDB if (debug_all_traps_with_kdb && - kdb_trap(trapno, err, regs)) { + kdb_trap(regs->trapno, regs->err, regs)) { +#if MACH_LDEBUG + assert(thread->mutex_count == 0); +#endif /* MACH_LDEBUG */ thread_exception_return(); /*NOTREACHED*/ } #endif /* MACH_KDB */ - i386_exception(EXC_BAD_ACCESS, kr, vaddr); + i386_exception(EXC_BAD_ACCESS, kr, regs->cr2); /*NOTREACHED*/ } @@ -278,165 +212,56 @@ struct recovery { extern struct recovery recover_table[]; extern struct recovery recover_table_end[]; -const char * trap_type[] = {TRAP_NAMES}; -unsigned TRAP_TYPES = sizeof(trap_type)/sizeof(trap_type[0]); +/* + * Recovery from Successful fault in copyout does not + * return directly - it retries the pte check, since + * the 386 ignores write protection in kernel mode. + */ +extern struct recovery retry_table[]; +extern struct recovery retry_table_end[]; + +const char * trap_type[] = {TRAP_NAMES}; +int TRAP_TYPES = sizeof(trap_type)/sizeof(trap_type[0]); -extern unsigned panic_io_port; -static inline void -reset_dr7(void) -{ - uint32_t dr7 = 0x400; /* magic dr7 reset value */ - __asm__ volatile("movl %0,%%dr7" : : "r" (dr7)); -} -#if MACH_KDP -unsigned kdp_has_active_watchpoints = 0; -#endif /* * Trap from kernel mode. Only page-fault errors are recoverable, * and then only in special circumstances. All other errors are * fatal. Return value indicates if trap was handled. */ -void +boolean_t kernel_trap( - x86_saved_state_t *state) + register struct i386_saved_state *regs) { - x86_saved_state32_t *saved_state; int code; - user_addr_t vaddr; - int type; + unsigned int subcode; + int interruptible = THREAD_UNINT; + register int type; vm_map_t map; kern_return_t result = KERN_FAILURE; - thread_t thread; - ast_t *myast; - boolean_t intr; - vm_prot_t prot; - struct recovery *rp; - vm_offset_t kern_ip; - int fault_in_copy_window = -1; - int is_user = 0; -#if MACH_KDB - pt_entry_t *pte; -#endif /* MACH_KDB */ + register thread_t thread; + type = regs->trapno; + code = regs->err; thread = current_thread(); - if (is_saved_state64(state)) - panic("kernel_trap(%p) with 64-bit state", state); - saved_state = saved_state32(state); - - vaddr = (user_addr_t)saved_state->cr2; - type = saved_state->trapno; - code = saved_state->err & 0xffff; - intr = (saved_state->efl & EFL_IF) != 0; /* state of ints at trap */ - - kern_ip = (vm_offset_t)saved_state->eip; - - myast = ast_pending(); - - if (perfASTHook) { - if (*myast & AST_CHUD_ALL) - perfASTHook(type, NULL, 0, 0); - } else - *myast &= ~AST_CHUD_ALL; - - /* - * Is there a hook? - */ - if (perfTrapHook) { - if (perfTrapHook(type, NULL, 0, 0) == KERN_SUCCESS) { - /* - * If it succeeds, we are done... - */ - return; - } - } - /* - * we come here with interrupts off as we don't want to recurse - * on preemption below. but we do want to re-enable interrupts - * as soon we possibly can to hold latency down - */ - if (T_PREEMPT == type) { - - KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_EXCP_KTRAP_x86, type)) | DBG_FUNC_NONE, - 0, 0, 0, kern_ip, 0); - - ast_taken(AST_PREEMPTION, FALSE); - return; - } - - if (T_PAGE_FAULT == type) { - /* - * assume we're faulting in the kernel map - */ - map = kernel_map; - - if (thread != THREAD_NULL && thread->map != kernel_map) { - vm_offset_t copy_window_base; - vm_offset_t kvaddr; - int window_index; - - kvaddr = (vm_offset_t)vaddr; - /* - * must determine if fault occurred in - * the copy window while pre-emption is - * disabled for this processor so that - * we only need to look at the window - * associated with this processor - */ - copy_window_base = current_cpu_datap()->cpu_copywindow_base; - - if (kvaddr >= copy_window_base && kvaddr < (copy_window_base + (NBPDE * NCOPY_WINDOWS)) ) { - - window_index = (kvaddr - copy_window_base) / NBPDE; - - if (thread->machine.copy_window[window_index].user_base != (user_addr_t)-1) { - - kvaddr -= (copy_window_base + (NBPDE * window_index)); - vaddr = thread->machine.copy_window[window_index].user_base + kvaddr; - - map = thread->map; - fault_in_copy_window = window_index; - } - is_user = -1; - } - } - } - KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_EXCP_KTRAP_x86, type)) | DBG_FUNC_NONE, - (int)(vaddr >> 32), (int)vaddr, is_user, kern_ip, 0); - - - (void) ml_set_interrupts_enabled(intr); - switch (type) { + case T_PREEMPT: + ast_taken(AST_PREEMPTION, FALSE); + return (TRUE); case T_NO_FPU: fpnoextflt(); - return; + return (TRUE); case T_FPU_FAULT: fpextovrflt(); - return; + return (TRUE); case T_FLOATING_POINT_ERROR: fpexterrflt(); - return; + return (TRUE); - case T_SSE_FLOAT_ERROR: - fpSSEexterrflt(); - return; - case T_DEBUG: - if ((saved_state->efl & EFL_TF) == 0 - && !kdp_has_active_watchpoints) { - /* We've somehow encountered a debug - * register match that does not belong - * to the kernel debugger. - * This isn't supposed to happen. - */ - reset_dr7(); - return; - } - goto debugger_entry; case T_PAGE_FAULT: /* * If the current map is a submap of the kernel map, @@ -445,116 +270,144 @@ kernel_trap( * (vm_map_lookup), we may deadlock on the kernel map * lock. */ +#if MACH_KDB + mp_disable_preemption(); + if (db_active + && kdb_active[cpu_number()] + && !let_ddb_vm_fault) { + /* + * Force kdb to handle this one. + */ + mp_enable_preemption(); + return (FALSE); + } + mp_enable_preemption(); +#endif /* MACH_KDB */ + subcode = regs->cr2; /* get faulting address */ - prot = VM_PROT_READ; - - if (code & T_PF_WRITE) - prot |= VM_PROT_WRITE; -#if PAE - if (code & T_PF_EXECUTE) - prot |= VM_PROT_EXECUTE; -#endif - + if (subcode > LINEAR_KERNEL_ADDRESS) { + map = kernel_map; + } else if (thread == THREAD_NULL) + map = kernel_map; + else { + map = thread->map; + } #if MACH_KDB /* * Check for watchpoint on kernel static data. * vm_fault would fail in this case */ - if (map == kernel_map && db_watchpoint_list && db_watchpoints_inserted && - (code & T_PF_WRITE) && vaddr < vm_map_max(map) && - ((*(pte = pmap_pte(kernel_pmap, (vm_map_offset_t)vaddr))) & INTEL_PTE_WRITE) == 0) { - pmap_store_pte( - pte, - *pte | INTEL_PTE_VALID | INTEL_PTE_WRITE); - /* XXX need invltlb here? */ - + if (map == kernel_map && + db_watchpoint_list && + db_watchpoints_inserted && + (code & T_PF_WRITE) && + (vm_offset_t)subcode < vm_last_phys && + ((*(pte = pmap_pte(kernel_pmap, (vm_offset_t)subcode))) & + INTEL_PTE_WRITE) == 0) { + *pte = *pte | INTEL_PTE_VALID | INTEL_PTE_WRITE; /* XXX need invltlb here? */ result = KERN_SUCCESS; - goto look_for_watchpoints; - } + } else #endif /* MACH_KDB */ - - result = vm_fault(map, - vm_map_trunc_page(vaddr), - prot, - FALSE, - THREAD_UNINT, NULL, 0); - + { + /* + * Since the 386 ignores write protection in + * kernel mode, always try for write permission + * first. If that fails and the fault was a + * read fault, retry with read permission. + */ + if (map == kernel_map) { + register struct recovery *rp; + + interruptible = THREAD_UNINT; + for (rp = recover_table; rp < recover_table_end; rp++) { + if (regs->eip == rp->fault_addr) { + interruptible = THREAD_ABORTSAFE; + break; + } + } + } + result = vm_fault(map, + trunc_page((vm_offset_t)subcode), + VM_PROT_READ|VM_PROT_WRITE, + FALSE, + (map == kernel_map) ? interruptible : THREAD_ABORTSAFE, NULL, 0); + } #if MACH_KDB if (result == KERN_SUCCESS) { - /* - * Look for watchpoints - */ -look_for_watchpoints: - if (map == kernel_map && db_watchpoint_list && db_watchpoints_inserted && (code & T_PF_WRITE) && - db_find_watchpoint(map, vaddr, saved_state)) - kdb_trap(T_WATCHPOINT, 0, saved_state); + /* Look for watchpoints */ + if (db_watchpoint_list && + db_watchpoints_inserted && + (code & T_PF_WRITE) && + db_find_watchpoint(map, + (vm_offset_t)subcode, regs)) + kdb_trap(T_WATCHPOINT, 0, regs); } + else #endif /* MACH_KDB */ + if ((code & T_PF_WRITE) == 0 && + result == KERN_PROTECTION_FAILURE) + { + /* + * Must expand vm_fault by hand, + * so that we can ask for read-only access + * but enter a (kernel)writable mapping. + */ + result = intel_read_fault(map, + trunc_page((vm_offset_t)subcode)); + } if (result == KERN_SUCCESS) { - - if (fault_in_copy_window != -1) { - pt_entry_t *updp; - pt_entry_t *kpdp; - - /* - * in case there was no page table assigned - * for the user base address and the pmap - * got 'expanded' due to this fault, we'll - * copy in the descriptor - * - * we're either setting the page table descriptor - * to the same value or it was 0... no need - * for a TLB flush in either case - */ - - ml_set_interrupts_enabled(FALSE); - updp = pmap_pde(map->pmap, thread->machine.copy_window[fault_in_copy_window].user_base); - assert(updp); - if (0 == updp) panic("trap: updp 0"); /* XXX DEBUG */ - kpdp = current_cpu_datap()->cpu_copywindow_pdp; - kpdp += fault_in_copy_window; - -#if JOE_DEBUG - if (*kpdp && (*kpdp & PG_FRAME) != (*updp & PG_FRAME)) - panic("kernel_fault: user pdp doesn't match - updp = 0x%x, kpdp = 0x%x\n", updp, kpdp); -#endif - pmap_store_pte(kpdp, *updp); - - (void) ml_set_interrupts_enabled(intr); + /* + * Certain faults require that we back up + * the EIP. + */ + register struct recovery *rp; + + for (rp = retry_table; rp < retry_table_end; rp++) { + if (regs->eip == rp->fault_addr) { + regs->eip = rp->recover_addr; + break; } - return; + } + return (TRUE); } - /* - * fall through - */ + + /* fall through */ case T_GENERAL_PROTECTION: + /* * If there is a failure recovery address * for this fault, go there. */ - for (rp = recover_table; rp < recover_table_end; rp++) { - if (kern_ip == rp->fault_addr) { - set_recovery_ip(saved_state, rp->recover_addr); - return; + { + register struct recovery *rp; + + for (rp = recover_table; + rp < recover_table_end; + rp++) { + if (regs->eip == rp->fault_addr) { + regs->eip = rp->recover_addr; + return (TRUE); } + } } /* - * Check thread recovery address also. + * Check thread recovery address also - + * v86 assist uses it. */ if (thread->recover) { - set_recovery_ip(saved_state, thread->recover); - thread->recover = 0; - return; + regs->eip = thread->recover; + thread->recover = 0; + return (TRUE); } + /* * Unanticipated page-fault errors in kernel * should not happen. - * - * fall through... */ + /* fall through... */ default: /* @@ -563,337 +416,71 @@ look_for_watchpoints: */ if (type == 15) { kprintf("kernel_trap() ignoring spurious trap 15\n"); - return; + return (TRUE); } -debugger_entry: - /* Ensure that the i386_kernel_state at the base of the - * current thread's stack (if any) is synchronized with the - * context at the moment of the trap, to facilitate - * access through the debugger. + + /* + * ...and return failure, so that locore can call into + * debugger. */ - sync_iss_to_iks(saved_state); -#if MACH_KDB -restart_debugger: -#endif /* MACH_KDB */ #if MACH_KDP - if (current_debugger != KDB_CUR_DB) { - if (kdp_i386_trap(type, saved_state, result, vaddr)) - return; - } -#endif /* MACH_KDP */ -#if MACH_KDB - else - if (kdb_trap(type, code, saved_state)) { - if (switch_debugger) { - current_debugger = KDP_CUR_DB; - switch_debugger = 0; - goto restart_debugger; - } - return; - } -#endif /* MACH_KDB */ - } - - panic_trap(saved_state); - /* - * NO RETURN - */ -} - - -static void -set_recovery_ip(x86_saved_state32_t *saved_state, vm_offset_t ip) -{ - saved_state->eip = ip; -} - - -static void -panic_trap(x86_saved_state32_t *regs) -{ - const char *trapname = "Unknown"; - uint32_t cr0 = get_cr0(); - uint32_t cr2 = get_cr2(); - uint32_t cr3 = get_cr3(); - uint32_t cr4 = get_cr4(); - - if (panic_io_port) - (void)inb(panic_io_port); - - kprintf("panic trap number 0x%x, eip 0x%x\n", regs->trapno, regs->eip); - kprintf("cr0 0x%08x cr2 0x%08x cr3 0x%08x cr4 0x%08x\n", - cr0, cr2, cr3, cr4); - - if (regs->trapno < TRAP_TYPES) - trapname = trap_type[regs->trapno]; - - panic("Unresolved kernel trap (CPU %d, Type %d=%s), registers:\n" - "CR0: 0x%08x, CR2: 0x%08x, CR3: 0x%08x, CR4: 0x%08x\n" - "EAX: 0x%08x, EBX: 0x%08x, ECX: 0x%08x, EDX: 0x%08x\n" - "CR2: 0x%08x, EBP: 0x%08x, ESI: 0x%08x, EDI: 0x%08x\n" - "EFL: 0x%08x, EIP: 0x%08x, CS: 0x%08x, DS: 0x%08x\n", - cpu_number(), regs->trapno, trapname, cr0, cr2, cr3, cr4, - regs->eax,regs->ebx,regs->ecx,regs->edx, - regs->cr2,regs->ebp,regs->esi,regs->edi, - regs->efl,regs->eip,regs->cs, regs->ds); - /* - * This next statement is not executed, - * but it's needed to stop the compiler using tail call optimization - * for the panic call - which confuses the subsequent backtrace. - */ - cr0 = 0; -} - -extern void kprintf_break_lock(void); - - -/* - * Called from locore on a special reserved stack after a double-fault - * is taken in kernel space. - * Kernel stack overflow is one route here. - */ -void -panic_double_fault(int code) -{ - struct i386_tss *my_ktss = current_ktss(); - - /* Set postcode (DEBUG only) */ - postcode(PANIC_DOUBLE_FAULT); - -/* Issue an I/O port read if one has been requested - this is an event logic - * analyzers can use as a trigger point. - */ - if (panic_io_port) - (void)inb(panic_io_port); - - /* - * Break kprintf lock in case of recursion, - * and record originally faulted instruction address. - */ - kprintf_break_lock(); - -#if MACH_KDP - /* - * Print backtrace leading to first fault: - */ - panic_i386_backtrace((void *) my_ktss->ebp, 10); + kdp_i386_trap(type, regs, result, regs->cr2); #endif - - panic("Double fault (CPU:%d, thread:%p, code:0x%x)," - "registers:\n" - "CR0: 0x%08x, CR2: 0x%08x, CR3: 0x%08x, CR4: 0x%08x\n" - "EAX: 0x%08x, EBX: 0x%08x, ECX: 0x%08x, EDX: 0x%08x\n" - "ESP: 0x%08x, EBP: 0x%08x, ESI: 0x%08x, EDI: 0x%08x\n" - "EFL: 0x%08x, EIP: 0x%08x\n", - cpu_number(), current_thread(), code, - get_cr0(), get_cr2(), get_cr3(), get_cr4(), - my_ktss->eax, my_ktss->ebx, my_ktss->ecx, my_ktss->edx, - my_ktss->esp, my_ktss->ebp, my_ktss->esi, my_ktss->edi, - my_ktss->eflags, my_ktss->eip); -} - - -/* - * Called from locore on a special reserved stack after a machine-check - */ -void -panic_machine_check(int code) -{ - struct i386_tss *my_ktss = current_ktss(); - - /* Set postcode (DEBUG only) */ - postcode(PANIC_MACHINE_CHECK); - - /* - * Break kprintf lock in case of recursion, - * and record originally faulted instruction address. - */ - kprintf_break_lock(); - panic("Machine-check (CPU:%d, thread:%p, code:0x%x)," - "registers:\n" - "CR0: 0x%08x, CR2: 0x%08x, CR3: 0x%08x, CR4: 0x%08x\n" - "EAX: 0x%08x, EBX: 0x%08x, ECX: 0x%08x, EDX: 0x%08x\n" - "ESP: 0x%08x, EBP: 0x%08x, ESI: 0x%08x, EDI: 0x%08x\n" - "EFL: 0x%08x, EIP: 0x%08x\n", - cpu_number(), current_thread(), code, - get_cr0(), get_cr2(), get_cr3(), get_cr4(), - my_ktss->eax, my_ktss->ebx, my_ktss->ecx, my_ktss->edx, - my_ktss->esp, my_ktss->ebp, my_ktss->esi, my_ktss->edi, - my_ktss->eflags, my_ktss->eip); -} - -void -panic_double_fault64(x86_saved_state_t *esp) -{ - /* Set postcode (DEBUG only) */ - postcode(PANIC_DOUBLE_FAULT); - - /* - * Break kprintf lock in case of recursion, - * and record originally faulted instruction address. - */ - kprintf_break_lock(); - - /* - * Dump the interrupt stack frame at last kernel entry. - */ - if (is_saved_state64(esp)) { - x86_saved_state64_t *ss64p = saved_state64(esp); - panic("Double fault (CPU:%d, thread:%p, trapno:0x%x, err:0x%qx)," - "registers:\n" - "CR0: 0x%08x, CR2: 0x%08x, CR3: 0x%08x, CR4: 0x%08x\n" - "RAX: 0x%016qx, RBX: 0x%016qx, RCX: 0x%016qx, RDX: 0x%016qx\n" - "RSP: 0x%016qx, RBP: 0x%016qx, RSI: 0x%016qx, RDI: 0x%016qx\n" - "R8: 0x%016qx, R9: 0x%016qx, R10: 0x%016qx, R11: 0x%016qx\n" - "R12: 0x%016qx, R13: 0x%016qx, R14: 0x%016qx, R15: 0x%016qx\n" - "RFL: 0x%016qx, RIP: 0x%016qx\n", - cpu_number(), current_thread(), ss64p->isf.trapno, ss64p->isf.err, - get_cr0(), get_cr2(), get_cr3(), get_cr4(), - ss64p->rax, ss64p->rbx, ss64p->rcx, ss64p->rdx, - ss64p->isf.rsp, ss64p->rbp, ss64p->rsi, ss64p->rdi, - ss64p->r8, ss64p->r9, ss64p->r10, ss64p->r11, - ss64p->r12, ss64p->r13, ss64p->r14, ss64p->r15, - ss64p->isf.rflags, ss64p->isf.rip); - } else { - x86_saved_state32_t *ss32p = saved_state32(esp); - panic("Double fault (CPU:%d, thread:%p, trapno:0x%x, err:0x%x)," - "registers:\n" - "CR0: 0x%08x, CR2: 0x%08x, CR3: 0x%08x, CR4: 0x%08x\n" - "EAX: 0x%08x, EBX: 0x%08x, ECX: 0x%08x, EDX: 0x%08x\n" - "ESP: 0x%08x, EBP: 0x%08x, ESI: 0x%08x, EDI: 0x%08x\n" - "EFL: 0x%08x, EIP: 0x%08x\n", - cpu_number(), current_thread(), ss32p->trapno, ss32p->err, - get_cr0(), get_cr2(), get_cr3(), get_cr4(), - ss32p->eax, ss32p->ebx, ss32p->ecx, ss32p->edx, - ss32p->uesp, ss32p->ebp, ss32p->esi, ss32p->edi, - ss32p->efl, ss32p->eip); + return (FALSE); } + return (TRUE); } /* - * Simplistic machine check handler. - * We could peruse all those MSRs but we only dump register state as we do for - * the double fault exception. - * Note: the machine check registers are non-volatile across warm boot - so - * they'll be around when we return. + * Called if both kernel_trap() and kdb_trap() fail. */ void -panic_machine_check64(x86_saved_state_t *esp) +panic_trap( + register struct i386_saved_state *regs) { - /* Set postcode (DEBUG only) */ - postcode(PANIC_MACHINE_CHECK); + int code; + register int type; - /* - * Break kprintf lock in case of recursion, - * and record originally faulted instruction address. - */ - kprintf_break_lock(); + type = regs->trapno; + code = regs->err; - /* - * Dump the interrupt stack frame at last kernel entry. - */ - if (is_saved_state64(esp)) { - x86_saved_state64_t *ss64p = saved_state64(esp); - panic("Machine Check (CPU:%d, thread:%p, trapno:0x%x, err:0x%qx)," - "registers:\n" - "CR0: 0x%08x, CR2: 0x%08x, CR3: 0x%08x, CR4: 0x%08x\n" - "RAX: 0x%016qx, RBX: 0x%016qx, RCX: 0x%016qx, RDX: 0x%016qx\n" - "RSP: 0x%016qx, RBP: 0x%016qx, RSI: 0x%016qx, RDI: 0x%016qx\n" - "R8: 0x%016qx, R9: 0x%016qx, R10: 0x%016qx, R11: 0x%016qx\n" - "R12: 0x%016qx, R13: 0x%016qx, R14: 0x%016qx, R15: 0x%016qx\n" - "RFL: 0x%016qx, RIP: 0x%016qx\n", - cpu_number(), current_thread(), ss64p->isf.trapno, ss64p->isf.err, - get_cr0(), get_cr2(), get_cr3(), get_cr4(), - ss64p->rax, ss64p->rbx, ss64p->rcx, ss64p->rdx, - ss64p->isf.rsp, ss64p->rbp, ss64p->rsi, ss64p->rdi, - ss64p->r8, ss64p->r9, ss64p->r10, ss64p->r11, - ss64p->r12, ss64p->r13, ss64p->r14, ss64p->r15, - ss64p->isf.rflags, ss64p->isf.rip); - } else { - x86_saved_state32_t *ss32p = saved_state32(esp); - panic("Machine Check (CPU:%d, thread:%p, trapno:0x%x, err:0x%x)," - "registers:\n" - "CR0: 0x%08x, CR2: 0x%08x, CR3: 0x%08x, CR4: 0x%08x\n" - "EAX: 0x%08x, EBX: 0x%08x, ECX: 0x%08x, EDX: 0x%08x\n" - "ESP: 0x%08x, EBP: 0x%08x, ESI: 0x%08x, EDI: 0x%08x\n" - "EFL: 0x%08x, EIP: 0x%08x\n", - cpu_number(), current_thread(), ss32p->trapno, ss32p->err, - get_cr0(), get_cr2(), get_cr3(), get_cr4(), - ss32p->eax, ss32p->ebx, ss32p->ecx, ss32p->edx, - ss32p->uesp, ss32p->ebp, ss32p->esi, ss32p->edi, - ss32p->efl, ss32p->eip); - } + printf("trap type %d, code = %x, pc = %x\n", + type, code, regs->eip); + panic("trap"); } + /* * Trap from user mode. */ void user_trap( - x86_saved_state_t *saved_state) + register struct i386_saved_state *regs) { int exc; int code; - int err; unsigned int subcode; - int type; - user_addr_t vaddr; + register int type; + vm_map_t map; vm_prot_t prot; + kern_return_t result; thread_t thread = current_thread(); - ast_t *myast; - boolean_t intr; - kern_return_t kret; - user_addr_t rip; - - assert((is_saved_state32(saved_state) && !thread_is_64bit(thread)) || - (is_saved_state64(saved_state) && thread_is_64bit(thread))); - - if (is_saved_state64(saved_state)) { - x86_saved_state64_t *regs; - - regs = saved_state64(saved_state); - - type = regs->isf.trapno; - err = regs->isf.err & 0xffff; - vaddr = (user_addr_t)regs->cr2; - rip = (user_addr_t)regs->isf.rip; - } else { - x86_saved_state32_t *regs; - - regs = saved_state32(saved_state); - - type = regs->trapno; - err = regs->err & 0xffff; - vaddr = (user_addr_t)regs->cr2; - rip = (user_addr_t)regs->eip; + boolean_t kernel_act = FALSE; + + if (regs->efl & EFL_VM) { + /* + * If hardware assist can handle exception, + * continue execution. + */ + if (v86_assist(thread, regs)) + return; } - KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_EXCP_UTRAP_x86, type)) | DBG_FUNC_NONE, - (int)(vaddr>>32), (int)vaddr, (int)(rip>>32), (int)rip, 0); - + type = regs->trapno; code = 0; subcode = 0; exc = 0; -#if DEBUG_TRACE - kprintf("user_trap(0x%08x) type=%d vaddr=0x%016llx\n", - saved_state, type, vaddr); -#endif - myast = ast_pending(); - if (perfASTHook) { - if (*myast & AST_CHUD_ALL) { - perfASTHook(type, saved_state, 0, 0); - } - } else { - *myast &= ~AST_CHUD_ALL; - } - - /* Is there a hook? */ - if (perfTrapHook) { - if (perfTrapHook(type, saved_state, 0, 0) == KERN_SUCCESS) - return; /* If it succeeds, we are done... */ - } - switch (type) { case T_DIVIDE_ERROR: @@ -902,37 +489,10 @@ user_trap( break; case T_DEBUG: - { - pcb_t pcb; - unsigned int clear = 0; - /* - * get dr6 and set it in the thread's pcb before - * returning to userland - */ - pcb = thread->machine.pcb; - if (pcb->ids) { - /* - * We can get and set the status register - * in 32-bit mode even on a 64-bit thread - * because the high order bits are not - * used on x86_64 - */ - if (thread_is_64bit(thread)) { - uint32_t dr6; - x86_debug_state64_t *ids = pcb->ids; - dr6 = (uint32_t)ids->dr6; - __asm__ volatile ("movl %%db6, %0" : "=r" (dr6)); - ids->dr6 = dr6; - } else { /* 32 bit thread */ - x86_debug_state32_t *ids = pcb->ids; - __asm__ volatile ("movl %%db6, %0" : "=r" (ids->dr6)); - } - __asm__ volatile ("movl %0, %%db6" : : "r" (clear)); - } - exc = EXC_BREAKPOINT; - code = EXC_I386_SGL; - break; - } + exc = EXC_BREAKPOINT; + code = EXC_I386_SGL; + break; + case T_INT3: exc = EXC_BREAKPOINT; code = EXC_I386_BPT; @@ -965,50 +525,67 @@ user_trap( case 10: /* invalid TSS == iret with NT flag set */ exc = EXC_BAD_INSTRUCTION; code = EXC_I386_INVTSSFLT; - subcode = err; + subcode = regs->err & 0xffff; break; case T_SEGMENT_NOT_PRESENT: exc = EXC_BAD_INSTRUCTION; code = EXC_I386_SEGNPFLT; - subcode = err; + subcode = regs->err & 0xffff; break; case T_STACK_FAULT: exc = EXC_BAD_INSTRUCTION; code = EXC_I386_STKFLT; - subcode = err; + subcode = regs->err & 0xffff; break; case T_GENERAL_PROTECTION: + if (!(regs->efl & EFL_VM)) { + if (check_io_fault(regs)) + return; + } exc = EXC_BAD_INSTRUCTION; code = EXC_I386_GPFLT; - subcode = err; + subcode = regs->err & 0xffff; break; case T_PAGE_FAULT: - prot = VM_PROT_READ; - - if (err & T_PF_WRITE) - prot |= VM_PROT_WRITE; -#if PAE - if (err & T_PF_EXECUTE) - prot |= VM_PROT_EXECUTE; -#endif - kret = vm_fault(thread->map, vm_map_trunc_page(vaddr), - prot, FALSE, - THREAD_ABORTSAFE, NULL, 0); - - user_page_fault_continue(kret); - - /* NOTREACHED */ + subcode = regs->cr2; + prot = VM_PROT_READ|VM_PROT_WRITE; + if (kernel_act == FALSE) { + if (!(regs->err & T_PF_WRITE)) + prot = VM_PROT_READ; + (void) user_page_fault_continue(vm_fault(thread->map, + trunc_page((vm_offset_t)subcode), + prot, + FALSE, + THREAD_ABORTSAFE, NULL, 0)); + /* NOTREACHED */ + } + else { + if (subcode > LINEAR_KERNEL_ADDRESS) { + map = kernel_map; + } + result = vm_fault(thread->map, + trunc_page((vm_offset_t)subcode), + prot, + FALSE, + (map == kernel_map) ? THREAD_UNINT : THREAD_ABORTSAFE, NULL, 0); + if ((result != KERN_SUCCESS) && (result != KERN_ABORTED)) { + /* + * Must expand vm_fault by hand, + * so that we can ask for read-only access + * but enter a (kernel) writable mapping. + */ + result = intel_read_fault(thread->map, + trunc_page((vm_offset_t)subcode)); + } + user_page_fault_continue(result); + /*NOTREACHED*/ + } break; - case T_SSE_FLOAT_ERROR: - fpSSEexterrflt(); - return; - - case T_FLOATING_POINT_ERROR: fpexterrflt(); return; @@ -1019,25 +596,427 @@ user_trap( return; #endif /* MACH_KGDB */ #if MACH_KDB - if (kdb_trap(type, err, saved_state)) + if (kdb_trap(type, regs->err, regs)) return; #endif /* MACH_KDB */ + printf("user trap type %d, code = %x, pc = %x\n", + type, regs->err, regs->eip); panic("user trap"); return; } - intr = ml_set_interrupts_enabled(FALSE); - myast = ast_pending(); - while (*myast & AST_ALL) { - ast_taken(AST_ALL, intr); - ml_set_interrupts_enabled(FALSE); - myast = ast_pending(); - } - ml_set_interrupts_enabled(intr); + +#if MACH_KDB + if (debug_all_traps_with_kdb && + kdb_trap(type, regs->err, regs)) + return; +#endif /* MACH_KDB */ i386_exception(exc, code, subcode); /*NOTREACHED*/ } +/* + * V86 mode assist for interrupt handling. + */ +boolean_t v86_assist_on = TRUE; +boolean_t v86_unsafe_ok = FALSE; +boolean_t v86_do_sti_cli = TRUE; +boolean_t v86_do_sti_immediate = FALSE; + +#define V86_IRET_PENDING 0x4000 + +int cli_count = 0; +int sti_count = 0; + +boolean_t +v86_assist( + thread_t thread, + register struct i386_saved_state *regs) +{ + register struct v86_assist_state *v86 = &thread->machine.pcb->ims.v86s; + +/* + * Build an 8086 address. Use only when off is known to be 16 bits. + */ +#define Addr8086(seg,off) ((((seg) & 0xffff) << 4) + (off)) + +#define EFL_V86_SAFE ( EFL_OF | EFL_DF | EFL_TF \ + | EFL_SF | EFL_ZF | EFL_AF \ + | EFL_PF | EFL_CF ) + struct iret_32 { + int eip; + int cs; + int eflags; + }; + struct iret_16 { + unsigned short ip; + unsigned short cs; + unsigned short flags; + }; + union iret_struct { + struct iret_32 iret_32; + struct iret_16 iret_16; + }; + + struct int_vec { + unsigned short ip; + unsigned short cs; + }; + + if (!v86_assist_on) + return FALSE; + + /* + * If delayed STI pending, enable interrupts. + * Turn off tracing if on only to delay STI. + */ + if (v86->flags & V86_IF_PENDING) { + v86->flags &= ~V86_IF_PENDING; + v86->flags |= EFL_IF; + if ((v86->flags & EFL_TF) == 0) + regs->efl &= ~EFL_TF; + } + + if (regs->trapno == T_DEBUG) { + + if (v86->flags & EFL_TF) { + /* + * Trace flag was also set - it has priority + */ + return FALSE; /* handle as single-step */ + } + /* + * Fall through to check for interrupts. + */ + } + else if (regs->trapno == T_GENERAL_PROTECTION) { + /* + * General protection error - must be an 8086 instruction + * to emulate. + */ + register int eip; + boolean_t addr_32 = FALSE; + boolean_t data_32 = FALSE; + int io_port; + + /* + * Set up error handler for bad instruction/data + * fetches. + */ + __asm__("movl $(addr_error), %0" : : "m" (thread->recover)); + + eip = regs->eip; + while (TRUE) { + unsigned char opcode; + + if (eip > 0xFFFF) { + thread->recover = 0; + return FALSE; /* GP fault: IP out of range */ + } + + opcode = *(unsigned char *)Addr8086(regs->cs,eip); + eip++; + switch (opcode) { + case 0xf0: /* lock */ + case 0xf2: /* repne */ + case 0xf3: /* repe */ + case 0x2e: /* cs */ + case 0x36: /* ss */ + case 0x3e: /* ds */ + case 0x26: /* es */ + case 0x64: /* fs */ + case 0x65: /* gs */ + /* ignore prefix */ + continue; + + case 0x66: /* data size */ + data_32 = TRUE; + continue; + + case 0x67: /* address size */ + addr_32 = TRUE; + continue; + + case 0xe4: /* inb imm */ + case 0xe5: /* inw imm */ + case 0xe6: /* outb imm */ + case 0xe7: /* outw imm */ + io_port = *(unsigned char *)Addr8086(regs->cs, eip); + eip++; + goto do_in_out; + + case 0xec: /* inb dx */ + case 0xed: /* inw dx */ + case 0xee: /* outb dx */ + case 0xef: /* outw dx */ + case 0x6c: /* insb */ + case 0x6d: /* insw */ + case 0x6e: /* outsb */ + case 0x6f: /* outsw */ + io_port = regs->edx & 0xffff; + + do_in_out: + if (!data_32) + opcode |= 0x6600; /* word IO */ + + switch (emulate_io(regs, opcode, io_port)) { + case EM_IO_DONE: + /* instruction executed */ + break; + case EM_IO_RETRY: + /* port mapped, retry instruction */ + thread->recover = 0; + return TRUE; + case EM_IO_ERROR: + /* port not mapped */ + thread->recover = 0; + return FALSE; + } + break; + + case 0xfa: /* cli */ + if (!v86_do_sti_cli) { + thread->recover = 0; + return (FALSE); + } + + v86->flags &= ~EFL_IF; + /* disable simulated interrupts */ + cli_count++; + break; + + case 0xfb: /* sti */ + if (!v86_do_sti_cli) { + thread->recover = 0; + return (FALSE); + } + + if ((v86->flags & EFL_IF) == 0) { + if (v86_do_sti_immediate) { + v86->flags |= EFL_IF; + } else { + v86->flags |= V86_IF_PENDING; + regs->efl |= EFL_TF; + } + /* single step to set IF next inst. */ + } + sti_count++; + break; + + case 0x9c: /* pushf */ + { + int flags; + vm_offset_t sp; + unsigned int size; + + flags = regs->efl; + if ((v86->flags & EFL_IF) == 0) + flags &= ~EFL_IF; + + if ((v86->flags & EFL_TF) == 0) + flags &= ~EFL_TF; + else flags |= EFL_TF; + + sp = regs->uesp; + if (!addr_32) + sp &= 0xffff; + else if (sp > 0xffff) + goto stack_error; + size = (data_32) ? 4 : 2; + if (sp < size) + goto stack_error; + sp -= size; + if (copyout((char *)&flags, + (user_addr_t)Addr8086(regs->ss,sp), + size)) + goto addr_error; + if (addr_32) + regs->uesp = sp; + else + regs->uesp = (regs->uesp & 0xffff0000) | sp; + break; + } + + case 0x9d: /* popf */ + { + vm_offset_t sp; + int nflags; + + sp = regs->uesp; + if (!addr_32) + sp &= 0xffff; + else if (sp > 0xffff) + goto stack_error; + + if (data_32) { + if (sp > 0xffff - sizeof(int)) + goto stack_error; + nflags = *(int *)Addr8086(regs->ss,sp); + sp += sizeof(int); + } + else { + if (sp > 0xffff - sizeof(short)) + goto stack_error; + nflags = *(unsigned short *) + Addr8086(regs->ss,sp); + sp += sizeof(short); + } + if (addr_32) + regs->uesp = sp; + else + regs->uesp = (regs->uesp & 0xffff0000) | sp; + + if (v86->flags & V86_IRET_PENDING) { + v86->flags = nflags & (EFL_TF | EFL_IF); + v86->flags |= V86_IRET_PENDING; + } else { + v86->flags = nflags & (EFL_TF | EFL_IF); + } + regs->efl = (regs->efl & ~EFL_V86_SAFE) + | (nflags & EFL_V86_SAFE); + break; + } + case 0xcf: /* iret */ + { + vm_offset_t sp; + int nflags; + union iret_struct iret_struct; + + v86->flags &= ~V86_IRET_PENDING; + sp = regs->uesp; + if (!addr_32) + sp &= 0xffff; + else if (sp > 0xffff) + goto stack_error; + + if (data_32) { + if (sp > 0xffff - sizeof(struct iret_32)) + goto stack_error; + iret_struct.iret_32 = + *(struct iret_32 *) Addr8086(regs->ss,sp); + sp += sizeof(struct iret_32); + } + else { + if (sp > 0xffff - sizeof(struct iret_16)) + goto stack_error; + iret_struct.iret_16 = + *(struct iret_16 *) Addr8086(regs->ss,sp); + sp += sizeof(struct iret_16); + } + if (addr_32) + regs->uesp = sp; + else + regs->uesp = (regs->uesp & 0xffff0000) | sp; + + if (data_32) { + eip = iret_struct.iret_32.eip; + regs->cs = iret_struct.iret_32.cs & 0xffff; + nflags = iret_struct.iret_32.eflags; + } + else { + eip = iret_struct.iret_16.ip; + regs->cs = iret_struct.iret_16.cs; + nflags = iret_struct.iret_16.flags; + } + + v86->flags = nflags & (EFL_TF | EFL_IF); + regs->efl = (regs->efl & ~EFL_V86_SAFE) + | (nflags & EFL_V86_SAFE); + break; + } + default: + /* + * Instruction not emulated here. + */ + thread->recover = 0; + return FALSE; + } + break; /* exit from 'while TRUE' */ + } + regs->eip = (regs->eip & 0xffff0000) | eip; + } + else { + /* + * Not a trap we handle. + */ + thread->recover = 0; + return FALSE; + } + + if ((v86->flags & EFL_IF) && ((v86->flags & V86_IRET_PENDING)==0)) { + + struct v86_interrupt_table *int_table; + int int_count; + int vec; + int i; + + int_table = (struct v86_interrupt_table *) v86->int_table; + int_count = v86->int_count; + + vec = 0; + for (i = 0; i < int_count; int_table++, i++) { + if (!int_table->mask && int_table->count > 0) { + int_table->count--; + vec = int_table->vec; + break; + } + } + if (vec != 0) { + /* + * Take this interrupt + */ + vm_offset_t sp; + struct iret_16 iret_16; + struct int_vec int_vec; + + sp = regs->uesp & 0xffff; + if (sp < sizeof(struct iret_16)) + goto stack_error; + sp -= sizeof(struct iret_16); + iret_16.ip = regs->eip; + iret_16.cs = regs->cs; + iret_16.flags = regs->efl & 0xFFFF; + if ((v86->flags & EFL_TF) == 0) + iret_16.flags &= ~EFL_TF; + else iret_16.flags |= EFL_TF; + + (void) memcpy((char *) &int_vec, + (char *) (sizeof(struct int_vec) * vec), + sizeof (struct int_vec)); + if (copyout((char *)&iret_16, + (user_addr_t)Addr8086(regs->ss,sp), + sizeof(struct iret_16))) + goto addr_error; + regs->uesp = (regs->uesp & 0xFFFF0000) | (sp & 0xffff); + regs->eip = int_vec.ip; + regs->cs = int_vec.cs; + regs->efl &= ~EFL_TF; + v86->flags &= ~(EFL_IF | EFL_TF); + v86->flags |= V86_IRET_PENDING; + } + } + + thread->recover = 0; + return TRUE; + + /* + * On address error, report a page fault. + * XXX report GP fault - we don`t save + * the faulting address. + */ + addr_error: + __asm__("addr_error:;"); + thread->recover = 0; + return FALSE; + + /* + * On stack address error, return stack fault (12). + */ + stack_error: + thread->recover = 0; + regs->trapno = T_STACK_FAULT; + return FALSE; +} /* * Handle AST traps for i386. @@ -1050,17 +1029,45 @@ extern void log_thread_action (thread_t, char *); void i386_astintr(int preemption) { - ast_t mask = AST_ALL; + ast_t *my_ast, mask = AST_ALL; spl_t s; - if (preemption) - mask = AST_PREEMPTION; - - s = splsched(); + s = splsched(); /* block interrupts to check reasons */ + mp_disable_preemption(); + my_ast = ast_pending(); + if (*my_ast & AST_I386_FP) { + /* + * AST was for delayed floating-point exception - + * FP interrupt occurred while in kernel. + * Turn off this AST reason and handle the FPU error. + */ + + ast_off(AST_I386_FP); + mp_enable_preemption(); + splx(s); + + fpexterrflt(); + } + else { + /* + * Not an FPU trap. Handle the AST. + * Interrupts are still blocked. + */ + +#if 1 + if (preemption) { + mask = AST_PREEMPTION; + mp_enable_preemption(); + } else { + mp_enable_preemption(); + } +#else + mp_enable_preemption(); +#endif ast_taken(mask, s); - splx(s); + } } /* @@ -1079,68 +1086,133 @@ i386_exception( int code, int subcode) { + spl_t s; exception_data_type_t codes[EXCEPTION_CODE_MAX]; + /* + * Turn off delayed FPU error handling. + */ + s = splsched(); + mp_disable_preemption(); + ast_off(AST_I386_FP); + mp_enable_preemption(); + splx(s); + codes[0] = code; /* new exception interface */ codes[1] = subcode; exception_triage(exc, codes, 2); /*NOTREACHED*/ } - -void -kernel_preempt_check(void) +boolean_t +check_io_fault( + struct i386_saved_state *regs) { - ast_t *myast; - boolean_t intr; + int eip, opcode, io_port; + boolean_t data_16 = FALSE; /* - * disable interrupts to both prevent pre-emption - * and to keep the ast state from changing via - * an interrupt handler making something runnable + * Get the instruction. */ - intr = ml_set_interrupts_enabled(FALSE); + eip = regs->eip; + + for (;;) { + opcode = inst_fetch(eip, regs->cs); + eip++; + switch (opcode) { + case 0x66: /* data-size prefix */ + data_16 = TRUE; + continue; + + case 0xf3: /* rep prefix */ + case 0x26: /* es */ + case 0x2e: /* cs */ + case 0x36: /* ss */ + case 0x3e: /* ds */ + case 0x64: /* fs */ + case 0x65: /* gs */ + continue; + + case 0xE4: /* inb imm */ + case 0xE5: /* inl imm */ + case 0xE6: /* outb imm */ + case 0xE7: /* outl imm */ + /* port is immediate byte */ + io_port = inst_fetch(eip, regs->cs); + eip++; + break; + + case 0xEC: /* inb dx */ + case 0xED: /* inl dx */ + case 0xEE: /* outb dx */ + case 0xEF: /* outl dx */ + case 0x6C: /* insb */ + case 0x6D: /* insl */ + case 0x6E: /* outsb */ + case 0x6F: /* outsl */ + /* port is in DX register */ + io_port = regs->edx & 0xFFFF; + break; + + default: + return FALSE; + } + break; + } - myast = ast_pending(); + if (data_16) + opcode |= 0x6600; /* word IO */ - if ((*myast & AST_URGENT) && intr == TRUE && get_interrupt_level() == 0) { - /* - * can handle interrupts and preemptions - * at this point - */ - ml_set_interrupts_enabled(intr); + switch (emulate_io(regs, opcode, io_port)) { + case EM_IO_DONE: + /* instruction executed */ + regs->eip = eip; + return TRUE; - /* - * now cause the PRE-EMPTION trap - */ - __asm__ volatile (" int $0xff"); - } else { - /* - * if interrupts were already disabled or - * we're in an interrupt context, we can't - * preempt... of course if AST_URGENT - * isn't set we also don't want to - */ - ml_set_interrupts_enabled(intr); + case EM_IO_RETRY: + /* port mapped, retry instruction */ + return TRUE; + + case EM_IO_ERROR: + /* port not mapped */ + return FALSE; + } + return FALSE; +} + +void +kernel_preempt_check (void) +{ + ast_t *myast; + + mp_disable_preemption(); + myast = ast_pending(); + if ((*myast & AST_URGENT) && + get_interrupt_level() == 1 + ) { + mp_enable_preemption_no_check(); + __asm__ volatile (" int $0xff"); + } else { + mp_enable_preemption_no_check(); } } #if MACH_KDB -extern void db_i386_state(x86_saved_state32_t *regs); +extern void db_i386_state(struct i386_saved_state *regs); #include void db_i386_state( - x86_saved_state32_t *regs) + struct i386_saved_state *regs) { db_printf("eip %8x\n", regs->eip); db_printf("trap %8x\n", regs->trapno); db_printf("err %8x\n", regs->err); db_printf("efl %8x\n", regs->efl); db_printf("ebp %8x\n", regs->ebp); - db_printf("esp %8x\n", regs->cr2); + db_printf("esp %8x\n", regs->esp); db_printf("uesp %8x\n", regs->uesp); db_printf("cs %8x\n", regs->cs & 0xff); db_printf("ds %8x\n", regs->ds & 0xff); @@ -1157,85 +1229,3 @@ db_i386_state( } #endif /* MACH_KDB */ - -/* Synchronize a thread's i386_kernel_state (if any) with the given - * i386_saved_state_t obtained from the trap/IPI handler; called in - * kernel_trap() prior to entering the debugger, and when receiving - * an "MP_KDP" IPI. - */ - -void -sync_iss_to_iks(x86_saved_state32_t *saved_state) -{ - struct x86_kernel_state32 *iks; - vm_offset_t kstack; - boolean_t record_active_regs = FALSE; - - if ((kstack = current_thread()->kernel_stack) != 0) { - x86_saved_state32_t *regs; - - regs = saved_state; - - iks = STACK_IKS(kstack); - - /* - * Did we take the trap/interrupt in kernel mode? - */ - if (regs == USER_REGS32(current_thread())) - record_active_regs = TRUE; - else { - iks->k_ebx = regs->ebx; - iks->k_esp = (int)regs; - iks->k_ebp = regs->ebp; - iks->k_edi = regs->edi; - iks->k_esi = regs->esi; - iks->k_eip = regs->eip; - } - } - - if (record_active_regs == TRUE) { - /* - * Show the trap handler path - */ - __asm__ volatile("movl %%ebx, %0" : "=m" (iks->k_ebx)); - __asm__ volatile("movl %%esp, %0" : "=m" (iks->k_esp)); - __asm__ volatile("movl %%ebp, %0" : "=m" (iks->k_ebp)); - __asm__ volatile("movl %%edi, %0" : "=m" (iks->k_edi)); - __asm__ volatile("movl %%esi, %0" : "=m" (iks->k_esi)); - /* - * "Current" instruction pointer - */ - __asm__ volatile("movl $1f, %0\n1:" : "=m" (iks->k_eip)); - } -} - -/* - * This is used by the NMI interrupt handler (from mp.c) to - * uncondtionally sync the trap handler context to the IKS - * irrespective of whether the NMI was fielded in kernel - * or user space. - */ -void -sync_iss_to_iks_unconditionally(__unused x86_saved_state32_t *saved_state) { - struct x86_kernel_state32 *iks; - vm_offset_t kstack; - boolean_t record_active_regs = FALSE; - - if ((kstack = current_thread()->kernel_stack) != 0) { - - iks = STACK_IKS(kstack); - /* - * Show the trap handler path - */ - __asm__ volatile("movl %%ebx, %0" : "=m" (iks->k_ebx)); - __asm__ volatile("movl %%esp, %0" : "=m" (iks->k_esp)); - __asm__ volatile("movl %%ebp, %0" : "=m" (iks->k_ebp)); - __asm__ volatile("movl %%edi, %0" : "=m" (iks->k_edi)); - __asm__ volatile("movl %%esi, %0" : "=m" (iks->k_esi)); - /* - * "Current" instruction pointer - */ - __asm__ volatile("movl $1f, %0\n1:" : "=m" (iks->k_eip)); - - } -}