X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/378393581903b274cb7a4d18e0d978071a6b592d..4452a7af2eac33dbad800bcc91f2399d62c18f53:/osfmk/i386/fpu.c?ds=sidebyside diff --git a/osfmk/i386/fpu.c b/osfmk/i386/fpu.c index 30de466be..ed19ecb12 100644 --- a/osfmk/i386/fpu.c +++ b/osfmk/i386/fpu.c @@ -1,23 +1,29 @@ /* * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * - * @APPLE_LICENSE_HEADER_START@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. The rights granted to you under the License + * may not be used to create, or enable the creation or redistribution of, + * unlawful or unlicensed copies of an Apple operating system, or to + * circumvent, violate, or enable the circumvention or violation of, any + * terms of an Apple operating system software license agreement. * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. * - * @APPLE_LICENSE_HEADER_END@ + * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ /* * @OSF_COPYRIGHT@ @@ -67,32 +73,14 @@ #include #include #include -#include +#include #include #include +#include -#if 0 -#include -extern int curr_ipl; -#define ASSERT_IPL(L) \ -{ \ - if (curr_ipl != L) { \ - printf("IPL is %d, expected %d\n", curr_ipl, L); \ - panic("fpu: wrong ipl"); \ - } \ -} -#else -#define ASSERT_IPL(L) -#endif - -int fp_kind = FP_387; /* 80387 present */ +int fp_kind = FP_NO; /* not inited */ zone_t ifps_zone; /* zone for FPU save area */ -#define clear_fpu() \ - { \ - set_ts(); \ - } - #define ALIGNED(addr,size) (((unsigned)(addr)&((size)-1))==0) /* Forward */ @@ -103,6 +91,63 @@ extern void fp_save( extern void fp_load( thread_t thr_act); +static void configure_mxcsr_capability_mask(struct x86_fpsave_state *ifps); + +struct x86_fpsave_state starting_fp_state; + + +/* Global MXCSR capability bitmask */ +static unsigned int mxcsr_capability_mask; + +/* + * Determine the MXCSR capability mask, which allows us to mask off any + * potentially unsafe "reserved" bits before restoring the FPU context. + * *Not* per-cpu, assumes symmetry. + */ +static void +configure_mxcsr_capability_mask(struct x86_fpsave_state *ifps) +{ + /* FXSAVE requires a 16 byte aligned store */ + assert(ALIGNED(ifps,16)); + /* Clear, to prepare for the diagnostic FXSAVE */ + bzero(ifps, sizeof(*ifps)); + /* Disable FPU/SSE Device Not Available exceptions */ + clear_ts(); + + __asm__ volatile("fxsave %0" : "=m" (ifps->fx_save_state)); + mxcsr_capability_mask = ifps->fx_save_state.fx_MXCSR_MASK; + + /* Set default mask value if necessary */ + if (mxcsr_capability_mask == 0) + mxcsr_capability_mask = 0xffbf; + + /* Re-enable FPU/SSE DNA exceptions */ + set_ts(); +} + +/* + * Allocate and initialize FP state for current thread. + * Don't load state. + */ +static struct x86_fpsave_state * +fp_state_alloc(void) +{ + struct x86_fpsave_state *ifps; + + ifps = (struct x86_fpsave_state *)zalloc(ifps_zone); + assert(ALIGNED(ifps,16)); + bzero((char *)ifps, sizeof *ifps); + + return ifps; +} + +static inline void +fp_state_free(struct x86_fpsave_state *ifps) +{ + zfree(ifps_zone, ifps); +} + + /* * Look for FPU and initialize it. * Called on each CPU. @@ -126,32 +171,41 @@ init_fpu(void) if ((status & 0xff) == 0 && (control & 0x103f) == 0x3f) { - fp_kind = FP_387; /* assume we have a 387 compatible instruction set */ /* Use FPU save/restore instructions if available */ - if (cpuid_features() & CPUID_FEATURE_FXSR) { - fp_kind = FP_FXSR; - set_cr4(get_cr4() | CR4_FXS); - printf("Enabling XMM register save/restore"); - /* And allow SIMD instructions if present */ - if (cpuid_features() & CPUID_FEATURE_SSE) { - printf(" and SSE/SSE2"); - set_cr4(get_cr4() | CR4_XMM); - } - printf(" opcodes\n"); - } + if (cpuid_features() & CPUID_FEATURE_FXSR) { + fp_kind = FP_FXSR; + set_cr4(get_cr4() | CR4_FXS); + printf("Enabling XMM register save/restore"); + /* And allow SIMD instructions if present */ + if (cpuid_features() & CPUID_FEATURE_SSE) { + printf(" and SSE/SSE2"); + set_cr4(get_cr4() | CR4_XMM); + } + printf(" opcodes\n"); + } else + panic("fpu is not FP_FXSR"); /* - * Trap wait instructions. Turn off FPU for now. + * initialze FPU to normal starting + * position so that we can take a snapshot + * of that state and store it for future use + * when we're asked for the FPU state of a + * thread, and it hasn't initiated any yet */ - set_cr0(get_cr0() | CR0_TS | CR0_MP); + fpinit(); + fxsave(&starting_fp_state.fx_save_state); + + /* + * Trap wait instructions. Turn off FPU for now. + */ + set_cr0(get_cr0() | CR0_TS | CR0_MP); } else { /* * NO FPU. */ - fp_kind = FP_NO; - set_cr0(get_cr0() | CR0_EM); + panic("fpu is not FP_FXSR"); } } @@ -161,10 +215,16 @@ init_fpu(void) void fpu_module_init(void) { - ifps_zone = zinit(sizeof(struct i386_fpsave_state), - THREAD_MAX * sizeof(struct i386_fpsave_state), - THREAD_CHUNK * sizeof(struct i386_fpsave_state), - "i386 fpsave state"); + struct x86_fpsave_state *new_ifps; + + ifps_zone = zinit(sizeof(struct x86_fpsave_state), + THREAD_MAX * sizeof(struct x86_fpsave_state), + THREAD_CHUNK * sizeof(struct x86_fpsave_state), + "x86 fpsave state"); + new_ifps = fp_state_alloc(); + /* Determine MXCSR reserved bits */ + configure_mxcsr_capability_mask(new_ifps); + fp_state_free(new_ifps); } /* @@ -173,10 +233,9 @@ fpu_module_init(void) */ void fpu_free(fps) - struct i386_fpsave_state *fps; + struct x86_fpsave_state *fps; { -ASSERT_IPL(SPL0); - zfree(ifps_zone, fps); + fp_state_free(fps); } /* @@ -190,71 +249,76 @@ ASSERT_IPL(SPL0); */ kern_return_t fpu_set_fxstate( - thread_t thr_act, - struct i386_float_state *state) + thread_t thr_act, + thread_state_t tstate) { - register pcb_t pcb; - register struct i386_fpsave_state *ifps; - register struct i386_fpsave_state *new_ifps; + struct x86_fpsave_state *ifps; + struct x86_fpsave_state *new_ifps; + x86_float_state64_t *state; + pcb_t pcb; -ASSERT_IPL(SPL0); if (fp_kind == FP_NO) - return KERN_FAILURE; + return KERN_FAILURE; + + state = (x86_float_state64_t *)tstate; - if (state->fpkind != FP_FXSR) { - /* strange if this happens, but in case someone builds one of these manually... */ - return fpu_set_state(thr_act, state); - } - assert(thr_act != THREAD_NULL); pcb = thr_act->machine.pcb; - if (state->initialized == 0) { - /* - * new FPU state is 'invalid'. - * Deallocate the fp state if it exists. - */ - simple_lock(&pcb->lock); - ifps = pcb->ims.ifps; - pcb->ims.ifps = 0; - simple_unlock(&pcb->lock); - - if (ifps != 0) { - zfree(ifps_zone, ifps); - } - } - else { - /* - * Valid state. Allocate the fp state if there is none. - */ + if (state == NULL) { + /* + * new FPU state is 'invalid'. + * Deallocate the fp state if it exists. + */ + simple_lock(&pcb->lock); - new_ifps = 0; + ifps = pcb->ifps; + pcb->ifps = 0; + + simple_unlock(&pcb->lock); + + if (ifps != 0) + fp_state_free(ifps); + } else { + /* + * Valid state. Allocate the fp state if there is none. + */ + new_ifps = 0; Retry: - simple_lock(&pcb->lock); - ifps = pcb->ims.ifps; - if (ifps == 0) { - if (new_ifps == 0) { - simple_unlock(&pcb->lock); - new_ifps = (struct i386_fpsave_state *) zalloc(ifps_zone); - assert(ALIGNED(new_ifps,16)); - goto Retry; + simple_lock(&pcb->lock); + + ifps = pcb->ifps; + if (ifps == 0) { + if (new_ifps == 0) { + simple_unlock(&pcb->lock); + new_ifps = fp_state_alloc(); + goto Retry; + } + ifps = new_ifps; + new_ifps = 0; + pcb->ifps = ifps; } - ifps = new_ifps; - new_ifps = 0; - bzero((char *)ifps, sizeof *ifps); - pcb->ims.ifps = ifps; - } + /* + * now copy over the new data. + */ + bcopy((char *)&state->fpu_fcw, + (char *)&ifps->fx_save_state, sizeof(struct x86_fx_save)); - /* - * now copy over the new data. - */ - bcopy((char *)&state->hw_state[0], (char *)&ifps->fx_save_state, sizeof(struct i386_fx_save)); - ifps->fp_save_flavor = FP_FXSR; - simple_unlock(&pcb->lock); - if (new_ifps != 0) - zfree(ifps_zone, ifps); - } + /* XXX The layout of the state set from user-space may need to be + * validated for consistency. + */ + ifps->fp_save_layout = thread_is_64bit(thr_act) ? FXSAVE64 : FXSAVE32; + /* + * Clear any reserved bits in the MXCSR to prevent a GPF + * when issuing an FXRSTOR. + */ + ifps->fx_save_state.fx_MXCSR &= mxcsr_capability_mask; + + simple_unlock(&pcb->lock); + if (new_ifps != 0) + fp_state_free(new_ifps); + } return KERN_SUCCESS; } @@ -266,234 +330,139 @@ ASSERT_IPL(SPL0); */ kern_return_t fpu_get_fxstate( - thread_t thr_act, - register struct i386_float_state *state) + thread_t thr_act, + thread_state_t tstate) { - register pcb_t pcb; - register struct i386_fpsave_state *ifps; + struct x86_fpsave_state *ifps; + x86_float_state64_t *state; + kern_return_t ret = KERN_FAILURE; + pcb_t pcb; -ASSERT_IPL(SPL0); - if (fp_kind == FP_NO) { + if (fp_kind == FP_NO) return KERN_FAILURE; - } else if (fp_kind == FP_387) { - return fpu_get_state(thr_act, state); - } + + state = (x86_float_state64_t *)tstate; assert(thr_act != THREAD_NULL); pcb = thr_act->machine.pcb; simple_lock(&pcb->lock); - ifps = pcb->ims.ifps; + + ifps = pcb->ifps; if (ifps == 0) { - /* - * No valid floating-point state. - */ - simple_unlock(&pcb->lock); - bzero((char *)state, sizeof(struct i386_float_state)); - return KERN_SUCCESS; - } + /* + * No valid floating-point state. + */ + bcopy((char *)&starting_fp_state.fx_save_state, + (char *)&state->fpu_fcw, sizeof(struct x86_fx_save)); - /* Make sure we`ve got the latest fp state info */ - /* If the live fpu state belongs to our target */ + simple_unlock(&pcb->lock); + + return KERN_SUCCESS; + } + /* + * Make sure we`ve got the latest fp state info + * If the live fpu state belongs to our target + */ if (thr_act == current_thread()) { - clear_ts(); - fp_save(thr_act); - clear_fpu(); - } - - state->fpkind = fp_kind; - state->exc_status = 0; - state->initialized = ifps->fp_valid; - bcopy( (char *)&ifps->fx_save_state, (char *)&state->hw_state[0], sizeof(struct i386_fx_save)); - - simple_unlock(&pcb->lock); - - return KERN_SUCCESS; -} - -/* - * Set the floating-point state for a thread. - * If the thread is not the current thread, it is - * not running (held). Locking needed against - * concurrent fpu_set_state or fpu_get_state. - */ -kern_return_t -fpu_set_state( - thread_t thr_act, - struct i386_float_state *state) -{ - register pcb_t pcb; - register struct i386_fpsave_state *ifps; - register struct i386_fpsave_state *new_ifps; + boolean_t intr; -ASSERT_IPL(SPL0); - if (fp_kind == FP_NO) - return KERN_FAILURE; + intr = ml_set_interrupts_enabled(FALSE); - assert(thr_act != THREAD_NULL); - pcb = thr_act->machine.pcb; + clear_ts(); + fp_save(thr_act); + clear_fpu(); - if (state->initialized == 0) { - /* - * new FPU state is 'invalid'. - * Deallocate the fp state if it exists. - */ - simple_lock(&pcb->lock); - ifps = pcb->ims.ifps; - pcb->ims.ifps = 0; - simple_unlock(&pcb->lock); - - if (ifps != 0) { - zfree(ifps_zone, ifps); - } + (void)ml_set_interrupts_enabled(intr); } - else { - /* - * Valid state. Allocate the fp state if there is none. - */ - register struct i386_fp_save *user_fp_state; - register struct i386_fp_regs *user_fp_regs; - - user_fp_state = (struct i386_fp_save *) &state->hw_state[0]; - user_fp_regs = (struct i386_fp_regs *) - &state->hw_state[sizeof(struct i386_fp_save)]; - - new_ifps = 0; - Retry: - simple_lock(&pcb->lock); - ifps = pcb->ims.ifps; - if (ifps == 0) { - if (new_ifps == 0) { - simple_unlock(&pcb->lock); - new_ifps = (struct i386_fpsave_state *) zalloc(ifps_zone); - assert(ALIGNED(new_ifps,16)); - goto Retry; - } - ifps = new_ifps; - new_ifps = 0; - bzero((char *)ifps, sizeof *ifps); // zero ALL fields first - pcb->ims.ifps = ifps; - } - - /* - * Ensure that reserved parts of the environment are 0. - */ - bzero((char *)&ifps->fp_save_state, sizeof(struct i386_fp_save)); - - ifps->fp_save_state.fp_control = user_fp_state->fp_control; - ifps->fp_save_state.fp_status = user_fp_state->fp_status; - ifps->fp_save_state.fp_tag = user_fp_state->fp_tag; - ifps->fp_save_state.fp_eip = user_fp_state->fp_eip; - ifps->fp_save_state.fp_cs = user_fp_state->fp_cs; - ifps->fp_save_state.fp_opcode = user_fp_state->fp_opcode; - ifps->fp_save_state.fp_dp = user_fp_state->fp_dp; - ifps->fp_save_state.fp_ds = user_fp_state->fp_ds; - ifps->fp_regs = *user_fp_regs; - ifps->fp_save_flavor = FP_387; - simple_unlock(&pcb->lock); - if (new_ifps != 0) - zfree(ifps_zone, ifps); + if (ifps->fp_valid) { + bcopy((char *)&ifps->fx_save_state, + (char *)&state->fpu_fcw, sizeof(struct x86_fx_save)); + ret = KERN_SUCCESS; } + simple_unlock(&pcb->lock); - return KERN_SUCCESS; + return ret; } + /* - * Get the floating-point state for a thread. - * If the thread is not the current thread, it is - * not running (held). Locking needed against - * concurrent fpu_set_state or fpu_get_state. + * the child thread is 'stopped' with the thread + * mutex held and is currently not known by anyone + * so no way for fpu state to get manipulated by an + * outside agency -> no need for pcb lock */ -kern_return_t -fpu_get_state( - thread_t thr_act, - register struct i386_float_state *state) + +void +fpu_dup_fxstate( + thread_t parent, + thread_t child) { - register pcb_t pcb; - register struct i386_fpsave_state *ifps; + struct x86_fpsave_state *new_ifps = NULL; + boolean_t intr; + pcb_t ppcb; -ASSERT_IPL(SPL0); - if (fp_kind == FP_NO) - return KERN_FAILURE; + ppcb = parent->machine.pcb; - assert(thr_act != THREAD_NULL); - pcb = thr_act->machine.pcb; + if (ppcb->ifps == NULL) + return; - simple_lock(&pcb->lock); - ifps = pcb->ims.ifps; - if (ifps == 0) { - /* - * No valid floating-point state. - */ - simple_unlock(&pcb->lock); - bzero((char *)state, sizeof(struct i386_float_state)); - return KERN_SUCCESS; - } + if (child->machine.pcb->ifps) + panic("fpu_dup_fxstate: child's ifps non-null"); - /* Make sure we`ve got the latest fp state info */ - /* If the live fpu state belongs to our target */ - if (thr_act == current_thread()) - { - clear_ts(); - fp_save(thr_act); - clear_fpu(); - } + new_ifps = fp_state_alloc(); - state->fpkind = fp_kind; - state->exc_status = 0; + simple_lock(&ppcb->lock); - { - register struct i386_fp_save *user_fp_state; - register struct i386_fp_regs *user_fp_regs; + if (ppcb->ifps != NULL) { + /* + * Make sure we`ve got the latest fp state info + */ + intr = ml_set_interrupts_enabled(FALSE); - state->initialized = ifps->fp_valid; + clear_ts(); + fp_save(parent); + clear_fpu(); - user_fp_state = (struct i386_fp_save *) &state->hw_state[0]; - user_fp_regs = (struct i386_fp_regs *) - &state->hw_state[sizeof(struct i386_fp_save)]; + (void)ml_set_interrupts_enabled(intr); - /* - * Ensure that reserved parts of the environment are 0. - */ - bzero((char *)user_fp_state, sizeof(struct i386_fp_save)); - - user_fp_state->fp_control = ifps->fp_save_state.fp_control; - user_fp_state->fp_status = ifps->fp_save_state.fp_status; - user_fp_state->fp_tag = ifps->fp_save_state.fp_tag; - user_fp_state->fp_eip = ifps->fp_save_state.fp_eip; - user_fp_state->fp_cs = ifps->fp_save_state.fp_cs; - user_fp_state->fp_opcode = ifps->fp_save_state.fp_opcode; - user_fp_state->fp_dp = ifps->fp_save_state.fp_dp; - user_fp_state->fp_ds = ifps->fp_save_state.fp_ds; - *user_fp_regs = ifps->fp_regs; + if (ppcb->ifps->fp_valid) { + child->machine.pcb->ifps = new_ifps; + + bcopy((char *)&(ppcb->ifps->fx_save_state), + (char *)&(child->machine.pcb->ifps->fx_save_state), sizeof(struct x86_fx_save)); + + new_ifps->fp_save_layout = ppcb->ifps->fp_save_layout; + /* + * Clear any reserved bits in the MXCSR to prevent a GPF + * when issuing an FXRSTOR. + */ + new_ifps->fx_save_state.fx_MXCSR &= mxcsr_capability_mask; + new_ifps = NULL; + } } - simple_unlock(&pcb->lock); + simple_unlock(&ppcb->lock); - return KERN_SUCCESS; + if (new_ifps != NULL) + fp_state_free(new_ifps); } + /* * Initialize FPU. * - * Raise exceptions for: - * invalid operation - * divide by zero - * overflow - * - * Use 53-bit precision. */ void fpinit(void) { unsigned short control; -ASSERT_IPL(SPL0); clear_ts(); fninit(); fnstcw(&control); control &= ~(FPC_PC|FPC_RC); /* Clear precision & rounding control */ - control |= (FPC_PC_53 | /* Set precision */ + control |= (FPC_PC_64 | /* Set precision */ FPC_RC_RN | /* round-to-nearest */ FPC_ZE | /* Suppress zero-divide */ FPC_OE | /* and overflow */ @@ -502,6 +471,9 @@ ASSERT_IPL(SPL0); FPC_DE | /* Allow denorms as operands */ FPC_PE); /* No trap for precision loss */ fldcw(control); + + /* Initialize SSE/SSE2 */ + __builtin_ia32_ldmxcsr(0x1f80); } /* @@ -511,16 +483,27 @@ ASSERT_IPL(SPL0); void fpnoextflt(void) { - /* - * Enable FPU use. - */ -ASSERT_IPL(SPL0); - clear_ts(); + boolean_t intr; - /* - * Load this thread`s state into the FPU. - */ - fp_load(current_thread()); + intr = ml_set_interrupts_enabled(FALSE); + + clear_ts(); /* Enable FPU use */ + + if (get_interrupt_level()) { + /* + * Save current coprocessor context if valid + * Initialize coprocessor live context + */ + fp_save(current_thread()); + fpinit(); + } else { + /* + * Load this thread`s state into coprocessor live context. + */ + fp_load(current_thread()); + } + + (void)ml_set_interrupts_enabled(intr); } /* @@ -531,9 +514,17 @@ ASSERT_IPL(SPL0); void fpextovrflt(void) { - register thread_t thr_act = current_thread(); - register pcb_t pcb; - register struct i386_fpsave_state *ifps; + thread_t thr_act = current_thread(); + pcb_t pcb; + struct x86_fpsave_state *ifps; + boolean_t intr; + + intr = ml_set_interrupts_enabled(FALSE); + + if (get_interrupt_level()) + panic("FPU segment overrun exception at interrupt context\n"); + if (current_task() == kernel_task) + panic("FPU segment overrun exception in kernel thread context\n"); /* * This is a non-recoverable error. @@ -541,8 +532,8 @@ fpextovrflt(void) */ pcb = thr_act->machine.pcb; simple_lock(&pcb->lock); - ifps = pcb->ims.ifps; - pcb->ims.ifps = 0; + ifps = pcb->ifps; + pcb->ifps = 0; simple_unlock(&pcb->lock); /* @@ -556,6 +547,8 @@ fpextovrflt(void) */ clear_fpu(); + (void)ml_set_interrupts_enabled(intr); + if (ifps) zfree(ifps_zone, ifps); @@ -573,22 +566,33 @@ fpextovrflt(void) void fpexterrflt(void) { - register thread_t thr_act = current_thread(); + thread_t thr_act = current_thread(); + struct x86_fpsave_state *ifps = thr_act->machine.pcb->ifps; + boolean_t intr; + + intr = ml_set_interrupts_enabled(FALSE); + + if (get_interrupt_level()) + panic("FPU error exception at interrupt context\n"); + if (current_task() == kernel_task) + panic("FPU error exception in kernel thread context\n"); -ASSERT_IPL(SPL0); /* * Save the FPU state and turn off the FPU. */ fp_save(thr_act); + (void)ml_set_interrupts_enabled(intr); + /* * Raise FPU exception. - * Locking not needed on pcb->ims.ifps, + * Locking not needed on pcb->ifps, * since thread is running. */ i386_exception(EXC_ARITHMETIC, EXC_I386_EXTERR, - thr_act->machine.pcb->ims.ifps->fp_save_state.fp_status); + ifps->fx_save_state.fx_status); + /*NOTREACHED*/ } @@ -599,22 +603,30 @@ ASSERT_IPL(SPL0); * . if called from fpu_get_state, pcb already locked. * . if called from fpnoextflt or fp_intr, we are single-cpu * . otherwise, thread is running. + * N.B.: Must be called with interrupts disabled */ + void fp_save( thread_t thr_act) { - register pcb_t pcb = thr_act->machine.pcb; - register struct i386_fpsave_state *ifps = pcb->ims.ifps; + pcb_t pcb = thr_act->machine.pcb; + struct x86_fpsave_state *ifps = pcb->ifps; + if (ifps != 0 && !ifps->fp_valid) { - /* registers are in FPU */ - ifps->fp_valid = TRUE; - ifps->fp_save_flavor = FP_387; - if (FXSAFE()) { - fxsave(&ifps->fx_save_state); // save the SSE2/Fp state in addition is enabled - ifps->fp_save_flavor = FP_FXSR; - } - fnsave(&ifps->fp_save_state); // also update the old save area for now... + assert((get_cr0() & CR0_TS) == 0); + /* registers are in FPU */ + ifps->fp_valid = TRUE; + + if (!thread_is_64bit(thr_act)) { + /* save the compatibility/legacy mode XMM+x87 state */ + fxsave(&ifps->fx_save_state); + ifps->fp_save_layout = FXSAVE32; + } + else { + fxsave64(&ifps->fx_save_state); + ifps->fp_save_layout = FXSAVE64; + } } } @@ -628,76 +640,33 @@ void fp_load( thread_t thr_act) { - register pcb_t pcb = thr_act->machine.pcb; - register struct i386_fpsave_state *ifps; - -ASSERT_IPL(SPL0); - ifps = pcb->ims.ifps; - if (ifps == 0) { - ifps = (struct i386_fpsave_state *) zalloc(ifps_zone); - assert(ALIGNED(ifps,16)); - bzero((char *)ifps, sizeof *ifps); - pcb->ims.ifps = ifps; - fpinit(); -#if 1 -/* - * I'm not sure this is needed. Does the fpu regenerate the interrupt in - * frstor or not? Without this code we may miss some exceptions, with it - * we might send too many exceptions. - */ - } else if (ifps->fp_valid == 2) { - /* delayed exception pending */ - - ifps->fp_valid = TRUE; - clear_fpu(); - /* - * Raise FPU exception. - * Locking not needed on pcb->ims.ifps, - * since thread is running. - */ - i386_exception(EXC_ARITHMETIC, - EXC_I386_EXTERR, - thr_act->machine.pcb->ims.ifps->fp_save_state.fp_status); - /*NOTREACHED*/ -#endif + pcb_t pcb = thr_act->machine.pcb; + struct x86_fpsave_state *ifps; + + ifps = pcb->ifps; + if (ifps == 0 || ifps->fp_valid == FALSE) { + if (ifps == 0) { + /* FIXME: This allocation mechanism should be revised + * for scenarios where interrupts are disabled. + */ + ifps = fp_state_alloc(); + pcb->ifps = ifps; + } + fpinit(); } else { - if (ifps->fp_save_flavor == FP_FXSR) fxrstor(&ifps->fx_save_state); - else frstor(ifps->fp_save_state); + assert(ifps->fp_save_layout == FXSAVE32 || ifps->fp_save_layout == FXSAVE64); + if (ifps->fp_save_layout == FXSAVE32) { + /* Restore the compatibility/legacy mode XMM+x87 state */ + fxrstor(&ifps->fx_save_state); + } + else if (ifps->fp_save_layout == FXSAVE64) { + fxrstor64(&ifps->fx_save_state); + } } ifps->fp_valid = FALSE; /* in FPU */ } -/* - * Allocate and initialize FP state for current thread. - * Don't load state. - * - * Locking not needed; always called on the current thread. - */ -void -fp_state_alloc(void) -{ - pcb_t pcb = current_thread()->machine.pcb; - struct i386_fpsave_state *ifps; - - ifps = (struct i386_fpsave_state *)zalloc(ifps_zone); - assert(ALIGNED(ifps,16)); - bzero((char *)ifps, sizeof *ifps); - pcb->ims.ifps = ifps; - - ifps->fp_valid = TRUE; - ifps->fp_save_state.fp_control = (0x037f - & ~(FPC_IM|FPC_ZM|FPC_OM|FPC_PC)) - | (FPC_PC_53|FPC_IC_AFF); - ifps->fp_save_state.fp_status = 0; - ifps->fp_save_state.fp_tag = 0xffff; /* all empty */ - ifps->fx_save_state.fx_control = ifps->fp_save_state.fp_control; - ifps->fx_save_state.fx_status = ifps->fp_save_state.fp_status; - ifps->fx_save_state.fx_tag = 0x00; - ifps->fx_save_state.fx_MXCSR = 0x1f80; - -} - /* * fpflush(thread_t) @@ -712,44 +681,53 @@ fpflush(__unused thread_t thr_act) /* not needed on MP x86s; fp not lazily evaluated */ } - /* - * Handle a coprocessor error interrupt on the AT386. - * This comes in on line 5 of the slave PIC at SPL1. + * SSE arithmetic exception handling code. + * Basically the same as the x87 exception handler with a different subtype */ void -fpintr(void) +fpSSEexterrflt(void) { - spl_t s; - thread_t thr_act = current_thread(); + thread_t thr_act = current_thread(); + struct x86_fpsave_state *ifps = thr_act->machine.pcb->ifps; + boolean_t intr; -ASSERT_IPL(SPL1); - /* - * Turn off the extended 'busy' line. - */ - outb(0xf0, 0); + intr = ml_set_interrupts_enabled(FALSE); + + if (get_interrupt_level()) + panic("SSE exception at interrupt context\n"); + if (current_task() == kernel_task) + panic("SSE exception in kernel thread context\n"); /* - * Save the FPU context to the thread using it. + * Save the FPU state and turn off the FPU. */ - clear_ts(); fp_save(thr_act); - fninit(); - clear_fpu(); + (void)ml_set_interrupts_enabled(intr); /* - * Since we are running on the interrupt stack, we must - * signal the thread to take the exception when we return - * to user mode. Use an AST to do this. - * - * Don`t set the thread`s AST field. If the thread is - * descheduled before it takes the AST, it will notice - * the FPU error when it reloads its FPU state. + * Raise FPU exception. + * Locking not needed on pcb->ifps, + * since thread is running. */ - s = splsched(); - mp_disable_preemption(); - ast_on(AST_I386_FP); - mp_enable_preemption(); - splx(s); + assert(ifps->fp_save_layout == FXSAVE32 || ifps->fp_save_layout == FXSAVE64); + i386_exception(EXC_ARITHMETIC, + EXC_I386_SSEEXTERR, + ifps->fx_save_state.fx_status); + /*NOTREACHED*/ +} + + +void +fp_setvalid(boolean_t value) { + thread_t thr_act = current_thread(); + struct x86_fpsave_state *ifps = thr_act->machine.pcb->ifps; + + if (ifps) { + ifps->fp_valid = value; + + if (value == TRUE) + clear_fpu(); + } }