/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
*
- * @APPLE_LICENSE_HEADER_START@
- *
- * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
* This file contains Original Code and/or Modifications of Original Code
* as defined in and that are subject to the Apple Public Source License
* Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this
- * file.
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
*
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* Please see the License for the specific language governing rights and
* limitations under the License.
*
- * @APPLE_LICENSE_HEADER_END@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
*/
/*
* @OSF_COPYRIGHT@
/*
*/
-#include <cpus.h>
#include <platforms.h>
#include <mach/exception_types.h>
#include <mach/i386/thread_status.h>
#include <mach/i386/fp_reg.h>
+#include <mach/branch_predicates.h>
#include <kern/mach_param.h>
+#include <kern/processor.h>
#include <kern/thread.h>
#include <kern/zalloc.h>
#include <kern/misc_protos.h>
#include <kern/spl.h>
#include <kern/assert.h>
-#include <i386/thread.h>
+#include <libkern/OSAtomic.h>
+
+#include <architecture/i386/pio.h>
+#include <i386/cpuid.h>
#include <i386/fpu.h>
-#include <i386/trap.h>
-#include <i386/pio.h>
+#include <i386/proc_reg.h>
#include <i386/misc_protos.h>
+#include <i386/thread.h>
+#include <i386/trap.h>
+
+int fp_kind = FP_NO; /* not inited */
+zone_t ifps_zone; /* zone for FPU save area */
+
+#define ALIGNED(addr,size) (((uintptr_t)(addr)&((size)-1))==0)
+
+/* Forward */
+
+extern void fpinit(void);
+extern void fp_save(
+ thread_t thr_act);
+extern void fp_load(
+ thread_t thr_act);
+
+static void configure_mxcsr_capability_mask(struct x86_avx_thread_state *fps);
+
+struct x86_avx_thread_state initial_fp_state __attribute((aligned(64)));
+
+
+/* Global MXCSR capability bitmask */
+static unsigned int mxcsr_capability_mask;
-#if 0
-#include <i386/ipl.h>
-extern int curr_ipl;
-#define ASSERT_IPL(L) \
-{ \
- if (curr_ipl != L) { \
- printf("IPL is %d, expected %d\n", curr_ipl, L); \
- panic("fpu: wrong ipl"); \
- } \
+#define fninit() \
+ __asm__ volatile("fninit")
+
+#define fnstcw(control) \
+ __asm__("fnstcw %0" : "=m" (*(unsigned short *)(control)))
+
+#define fldcw(control) \
+ __asm__ volatile("fldcw %0" : : "m" (*(unsigned short *) &(control)) )
+
+#define fnclex() \
+ __asm__ volatile("fnclex")
+
+#define fnsave(state) \
+ __asm__ volatile("fnsave %0" : "=m" (*state))
+
+#define frstor(state) \
+ __asm__ volatile("frstor %0" : : "m" (state))
+
+#define fwait() \
+ __asm__("fwait");
+
+#define fxrstor(addr) __asm__ __volatile__("fxrstor %0" : : "m" (*(addr)))
+#define fxsave(addr) __asm__ __volatile__("fxsave %0" : "=m" (*(addr)))
+
+static uint32_t fp_register_state_size = 0;
+static uint32_t fpu_YMM_present = FALSE;
+static uint32_t cpuid_reevaluated = 0;
+
+static void fpu_store_registers(void *, boolean_t);
+static void fpu_load_registers(void *);
+
+extern void xsave64o(void);
+extern void xrstor64o(void);
+
+#define XMASK ((uint32_t) (XFEM_X87 | XFEM_SSE | XFEM_YMM))
+
+/* DRK: TODO replace opcodes with mnemonics when assembler support available */
+
+static inline void xsetbv(uint32_t mask_hi, uint32_t mask_lo) {
+ __asm__ __volatile__(".short 0x010F\n\t.byte 0xD1" :: "a"(mask_lo), "d"(mask_hi), "c" (XCR0));
}
-#else
-#define ASSERT_IPL(L)
-#endif
-int fp_kind = FP_387; /* 80387 present */
-zone_t ifps_zone; /* zone for FPU save area */
+static inline void xsave(void *a) {
+ /* MOD 0x4, operand ECX 0x1 */
+ __asm__ __volatile__(".short 0xAE0F\n\t.byte 0x21" :: "a"(XMASK), "d"(0), "c" (a));
+}
-#if NCPUS == 1
-volatile thread_act_t fp_act = THR_ACT_NULL;
- /* thread whose state is in FPU */
- /* always THR_ACT_NULL if emulating FPU */
-volatile thread_act_t fp_intr_act = THR_ACT_NULL;
+static inline void xrstor(void *a) {
+ /* MOD 0x5, operand ECX 0x1 */
+ __asm__ __volatile__(".short 0xAE0F\n\t.byte 0x29" :: "a"(XMASK), "d"(0), "c" (a));
+}
+static inline void xsave64(void *a) {
+ /* Out of line call that executes in 64-bit mode on K32 */
+ __asm__ __volatile__("call _xsave64o" :: "a"(XMASK), "d"(0), "c" (a));
+}
-#define clear_fpu() \
- { \
- set_ts(); \
- fp_act = THR_ACT_NULL; \
- }
+static inline void xrstor64(void *a) {
+ /* Out of line call that executes in 64-bit mode on K32 */
+ __asm__ __volatile__("call _xrstor64o" :: "a"(XMASK), "d"(0), "c" (a));
+}
-#else /* NCPUS > 1 */
-#define clear_fpu() \
- { \
- set_ts(); \
- }
+static inline unsigned short
+fnstsw(void)
+{
+ unsigned short status;
+ __asm__ volatile("fnstsw %0" : "=ma" (status));
+ return(status);
+}
-#endif
+/*
+ * Configure the initial FPU state presented to new threads.
+ * Determine the MXCSR capability mask, which allows us to mask off any
+ * potentially unsafe "reserved" bits before restoring the FPU context.
+ * *Not* per-cpu, assumes symmetry.
+ */
-/* Forward */
+static void
+configure_mxcsr_capability_mask(struct x86_avx_thread_state *fps)
+{
+ /* XSAVE requires a 64 byte aligned store */
+ assert(ALIGNED(fps, 64));
+ /* Clear, to prepare for the diagnostic FXSAVE */
+ bzero(fps, sizeof(*fps));
+
+ fpinit();
+ fpu_store_registers(fps, FALSE);
+
+ mxcsr_capability_mask = fps->fx_MXCSR_MASK;
+
+ /* Set default mask value if necessary */
+ if (mxcsr_capability_mask == 0)
+ mxcsr_capability_mask = 0xffbf;
+
+ /* Clear vector register store */
+ bzero(&fps->fx_XMM_reg[0][0], sizeof(fps->fx_XMM_reg));
+ bzero(&fps->x_YMMH_reg[0][0], sizeof(fps->x_YMMH_reg));
+
+ fps->fp_valid = TRUE;
+ fps->fp_save_layout = fpu_YMM_present ? XSAVE32: FXSAVE32;
+ fpu_load_registers(fps);
+
+ /* Poison values to trap unsafe usage */
+ fps->fp_valid = 0xFFFFFFFF;
+ fps->fp_save_layout = FP_UNUSED;
+
+ /* Re-enable FPU/SSE DNA exceptions */
+ set_ts();
+}
-extern void fpinit(void);
-extern void fp_save(
- thread_act_t thr_act);
-extern void fp_load(
- thread_act_t thr_act);
/*
* Look for FPU and initialize it.
void
init_fpu(void)
{
- unsigned short status, control;
-
+#if DEBUG
+ unsigned short status;
+ unsigned short control;
+#endif
/*
* Check for FPU by initializing it,
* then trying to read the correct bit patterns from
* the control and status registers.
*/
- set_cr0(get_cr0() & ~(CR0_EM|CR0_TS)); /* allow use of FPU */
-
+ set_cr0((get_cr0() & ~(CR0_EM|CR0_TS)) | CR0_NE); /* allow use of FPU */
fninit();
+#if DEBUG
status = fnstsw();
fnstcw(&control);
+
+ assert(((status & 0xff) == 0) && ((control & 0x103f) == 0x3f));
+#endif
+ /* Advertise SSE support */
+ if (cpuid_features() & CPUID_FEATURE_FXSR) {
+ fp_kind = FP_FXSR;
+ set_cr4(get_cr4() | CR4_OSFXS);
+ /* And allow SIMD exceptions if present */
+ if (cpuid_features() & CPUID_FEATURE_SSE) {
+ set_cr4(get_cr4() | CR4_OSXMM);
+ }
+ fp_register_state_size = sizeof(struct x86_fx_thread_state);
- if ((status & 0xff) == 0 &&
- (control & 0x103f) == 0x3f)
- {
-#if 0
- /*
- * We have a FPU of some sort.
- * Compare -infinity against +infinity
- * to check whether we have a 287 or a 387.
- */
- volatile double fp_infinity, fp_one, fp_zero;
- fp_one = 1.0;
- fp_zero = 0.0;
- fp_infinity = fp_one / fp_zero;
- if (fp_infinity == -fp_infinity) {
- /*
- * We have an 80287.
- */
- fp_kind = FP_287;
- __asm__ volatile(".byte 0xdb; .byte 0xe4"); /* fnsetpm */
- }
- else
+ } else
+ panic("fpu is not FP_FXSR");
+
+ /* Configure the XSAVE context mechanism if the processor supports
+ * AVX/YMM registers
+ */
+ if (cpuid_features() & CPUID_FEATURE_XSAVE) {
+ cpuid_xsave_leaf_t *xsp = &cpuid_info()->cpuid_xsave_leaf;
+ if (xsp->extended_state[0] & (uint32_t)XFEM_YMM) {
+ assert(xsp->extended_state[0] & (uint32_t) XFEM_SSE);
+ /* XSAVE container size for all features */
+ assert(xsp->extended_state[2] == sizeof(struct x86_avx_thread_state));
+ fp_register_state_size = sizeof(struct x86_avx_thread_state);
+ fpu_YMM_present = TRUE;
+ set_cr4(get_cr4() | CR4_OSXSAVE);
+ xsetbv(0, XMASK);
+ /* Re-evaluate CPUID, once, to reflect OSXSAVE */
+ if (OSCompareAndSwap(0, 1, &cpuid_reevaluated))
+ cpuid_set_info();
+ /* DRK: consider verifying AVX offset with cpuid(d, ECX:2) */
+ }
+ }
+ else
+ fpu_YMM_present = FALSE;
+
+ fpinit();
+
+ /*
+ * Trap wait instructions. Turn off FPU for now.
+ */
+ set_cr0(get_cr0() | CR0_TS | CR0_MP);
+}
+
+/*
+ * Allocate and initialize FP state for current thread.
+ * Don't load state.
+ */
+static void *
+fp_state_alloc(void)
+{
+ void *ifps = zalloc(ifps_zone);
+
+#if DEBUG
+ if (!(ALIGNED(ifps,64))) {
+ panic("fp_state_alloc: %p, %u, %p, %u", ifps, (unsigned) ifps_zone->elem_size, (void *) ifps_zone->free_elements, (unsigned) ifps_zone->alloc_size);
+ }
#endif
- {
- /*
- * We have a 387.
- */
- fp_kind = FP_387;
- }
- /*
- * Trap wait instructions. Turn off FPU for now.
- */
- set_cr0(get_cr0() | CR0_TS | CR0_MP);
+ return ifps;
+}
+
+static inline void
+fp_state_free(void *ifps)
+{
+ zfree(ifps_zone, ifps);
+}
+
+void clear_fpu(void)
+{
+ set_ts();
+}
+
+
+static void fpu_load_registers(void *fstate) {
+ struct x86_fx_thread_state *ifps = fstate;
+ fp_save_layout_t layout = ifps->fp_save_layout;
+
+ assert(layout == FXSAVE32 || layout == FXSAVE64 || layout == XSAVE32 || layout == XSAVE64);
+ assert(ALIGNED(ifps, 64));
+ assert(ml_get_interrupts_enabled() == FALSE);
+
+#if DEBUG
+ if (layout == XSAVE32 || layout == XSAVE64) {
+ struct x86_avx_thread_state *iavx = fstate;
+ unsigned i;
+ /* Verify reserved bits in the XSAVE header*/
+ if (iavx->_xh.xsbv & ~7)
+ panic("iavx->_xh.xsbv: 0x%llx", iavx->_xh.xsbv);
+ for (i = 0; i < sizeof(iavx->_xh.xhrsvd); i++)
+ if (iavx->_xh.xhrsvd[i])
+ panic("Reserved bit set");
+ }
+ if (fpu_YMM_present) {
+ if (layout != XSAVE32 && layout != XSAVE64)
+ panic("Inappropriate layout: %u\n", layout);
+ }
+#endif /* DEBUG */
+
+#if defined(__i386__)
+ if (layout == FXSAVE32) {
+ /* Restore the compatibility/legacy mode XMM+x87 state */
+ fxrstor(ifps);
+ }
+ else if (layout == FXSAVE64) {
+ fxrstor64(ifps);
+ }
+ else if (layout == XSAVE32) {
+ xrstor(ifps);
}
+ else if (layout == XSAVE64) {
+ xrstor64(ifps);
+ }
+#elif defined(__x86_64__)
+ if ((layout == XSAVE64) || (layout == XSAVE32))
+ xrstor(ifps);
else
- {
- /*
- * NO FPU.
- */
- fp_kind = FP_NO;
- set_cr0(get_cr0() | CR0_EM);
+ fxrstor(ifps);
+#endif
+}
+
+static void fpu_store_registers(void *fstate, boolean_t is64) {
+ struct x86_fx_thread_state *ifps = fstate;
+ assert(ALIGNED(ifps, 64));
+#if defined(__i386__)
+ if (!is64) {
+ if (fpu_YMM_present) {
+ xsave(ifps);
+ ifps->fp_save_layout = XSAVE32;
+ }
+ else {
+ /* save the compatibility/legacy mode XMM+x87 state */
+ fxsave(ifps);
+ ifps->fp_save_layout = FXSAVE32;
+ }
+ }
+ else {
+ if (fpu_YMM_present) {
+ xsave64(ifps);
+ ifps->fp_save_layout = XSAVE64;
+ }
+ else {
+ fxsave64(ifps);
+ ifps->fp_save_layout = FXSAVE64;
+ }
}
+#elif defined(__x86_64__)
+ if (fpu_YMM_present) {
+ xsave(ifps);
+ ifps->fp_save_layout = is64 ? XSAVE64 : XSAVE32;
+ }
+ else {
+ fxsave(ifps);
+ ifps->fp_save_layout = is64 ? FXSAVE64 : FXSAVE32;
+ }
+#endif
}
/*
* Initialize FP handling.
*/
+
void
fpu_module_init(void)
{
- ifps_zone = zinit(sizeof(struct i386_fpsave_state),
- THREAD_MAX * sizeof(struct i386_fpsave_state),
- THREAD_CHUNK * sizeof(struct i386_fpsave_state),
- "i386 fpsave state");
+ if ((fp_register_state_size != sizeof(struct x86_fx_thread_state)) &&
+ (fp_register_state_size != sizeof(struct x86_avx_thread_state)))
+ panic("fpu_module_init: incorrect savearea size %u\n", fp_register_state_size);
+
+ assert(fpu_YMM_present != 0xFFFFFFFF);
+
+ /* We explicitly choose an allocation size of 64
+ * to eliminate waste for the 832 byte sized
+ * AVX XSAVE register save area.
+ */
+ ifps_zone = zinit(fp_register_state_size,
+ thread_max * fp_register_state_size,
+ 64 * fp_register_state_size,
+ "x86 fpsave state");
+
+#if ZONE_DEBUG
+ /* To maintain the required alignment, disable
+ * zone debugging for this zone as that appends
+ * 16 bytes to each element.
+ */
+ zone_debug_disable(ifps_zone);
+#endif
+ /* Determine MXCSR reserved bits and configure initial FPU state*/
+ configure_mxcsr_capability_mask(&initial_fp_state);
}
/*
- * Free a FPU save area.
- * Called only when thread terminating - no locking necessary.
+ * Save thread`s FPU context.
*/
void
-fp_free(fps)
- struct i386_fpsave_state *fps;
+fpu_save_context(thread_t thread)
{
-ASSERT_IPL(SPL0);
-#if NCPUS == 1
- if ((fp_act != THR_ACT_NULL) && (fp_act->mact.pcb->ims.ifps == fps)) {
- /*
- * Make sure we don't get FPU interrupts later for
- * this thread
- */
- fwait();
+ struct x86_fx_thread_state *ifps;
- /* Mark it free and disable access */
- clear_fpu();
+ assert(ml_get_interrupts_enabled() == FALSE);
+ ifps = (thread)->machine.ifps;
+#if DEBUG
+ if (ifps && ((ifps->fp_valid != FALSE) && (ifps->fp_valid != TRUE))) {
+ panic("ifps->fp_valid: %u\n", ifps->fp_valid);
}
-#endif /* NCPUS == 1 */
- zfree(ifps_zone, (vm_offset_t) fps);
+#endif
+ if (ifps != 0 && (ifps->fp_valid == FALSE)) {
+ /* Clear CR0.TS in preparation for the FP context save. In
+ * theory, this shouldn't be necessary since a live FPU should
+ * indicate that TS is clear. However, various routines
+ * (such as sendsig & sigreturn) manipulate TS directly.
+ */
+ clear_ts();
+ /* registers are in FPU - save to memory */
+ fpu_store_registers(ifps, (thread_is_64bit(thread) && is_saved_state64(thread->machine.iss)));
+ ifps->fp_valid = TRUE;
+ }
+ set_ts();
+}
+
+
+/*
+ * Free a FPU save area.
+ * Called only when thread terminating - no locking necessary.
+ */
+void
+fpu_free(void *fps)
+{
+ fp_state_free(fps);
}
/*
- * Set the floating-point state for a thread.
+ * Set the floating-point state for a thread based
+ * on the FXSave formatted data. This is basically
+ * the same as fpu_set_state except it uses the
+ * expanded data structure.
* If the thread is not the current thread, it is
* not running (held). Locking needed against
* concurrent fpu_set_state or fpu_get_state.
*/
kern_return_t
-fpu_set_state(
- thread_act_t thr_act,
- struct i386_float_state *state)
+fpu_set_fxstate(
+ thread_t thr_act,
+ thread_state_t tstate,
+ thread_flavor_t f)
{
- register pcb_t pcb;
- register struct i386_fpsave_state *ifps;
- register struct i386_fpsave_state *new_ifps;
-
-ASSERT_IPL(SPL0);
+ struct x86_fx_thread_state *ifps;
+ struct x86_fx_thread_state *new_ifps;
+ x86_float_state64_t *state;
+ pcb_t pcb;
+ size_t state_size = sizeof(struct x86_fx_thread_state);
+ boolean_t old_valid;
if (fp_kind == FP_NO)
return KERN_FAILURE;
- assert(thr_act != THR_ACT_NULL);
- pcb = thr_act->mact.pcb;
-
-#if NCPUS == 1
+ state = (x86_float_state64_t *)tstate;
- /*
- * If this thread`s state is in the FPU,
- * discard it; we are replacing the entire
- * FPU state.
- */
- if (fp_act == thr_act) {
- fwait(); /* wait for possible interrupt */
- clear_fpu(); /* no state in FPU */
- }
-#endif
+ assert(thr_act != THREAD_NULL);
+ pcb = THREAD_TO_PCB(thr_act);
- if (state->initialized == 0) {
+ if (state == NULL) {
/*
* new FPU state is 'invalid'.
* Deallocate the fp state if it exists.
*/
simple_lock(&pcb->lock);
- ifps = pcb->ims.ifps;
- pcb->ims.ifps = 0;
+
+ ifps = pcb->ifps;
+ pcb->ifps = 0;
+
simple_unlock(&pcb->lock);
- if (ifps != 0) {
- zfree(ifps_zone, (vm_offset_t) ifps);
- }
- }
- else {
+ if (ifps != 0)
+ fp_state_free(ifps);
+ } else {
/*
* Valid state. Allocate the fp state if there is none.
*/
- register struct i386_fp_save *user_fp_state;
- register struct i386_fp_regs *user_fp_regs;
-
- user_fp_state = (struct i386_fp_save *) &state->hw_state[0];
- user_fp_regs = (struct i386_fp_regs *)
- &state->hw_state[sizeof(struct i386_fp_save)];
-
new_ifps = 0;
Retry:
simple_lock(&pcb->lock);
- ifps = pcb->ims.ifps;
+
+ ifps = pcb->ifps;
if (ifps == 0) {
if (new_ifps == 0) {
simple_unlock(&pcb->lock);
- new_ifps = (struct i386_fpsave_state *) zalloc(ifps_zone);
+ new_ifps = fp_state_alloc();
goto Retry;
}
ifps = new_ifps;
new_ifps = 0;
- pcb->ims.ifps = ifps;
+ pcb->ifps = ifps;
}
+ /*
+ * now copy over the new data.
+ */
+ old_valid = ifps->fp_valid;
+#if DEBUG
+ if ((old_valid == FALSE) && (thr_act != current_thread())) {
+ panic("fpu_set_fxstate inconsistency, thread: %p not stopped", thr_act);
+ }
+#endif
/*
- * Ensure that reserved parts of the environment are 0.
+ * Clear any reserved bits in the MXCSR to prevent a GPF
+ * when issuing an FXRSTOR.
*/
- bzero((char *)&ifps->fp_save_state, sizeof(struct i386_fp_save));
-
- ifps->fp_save_state.fp_control = user_fp_state->fp_control;
- ifps->fp_save_state.fp_status = user_fp_state->fp_status;
- ifps->fp_save_state.fp_tag = user_fp_state->fp_tag;
- ifps->fp_save_state.fp_eip = user_fp_state->fp_eip;
- ifps->fp_save_state.fp_cs = user_fp_state->fp_cs;
- ifps->fp_save_state.fp_opcode = user_fp_state->fp_opcode;
- ifps->fp_save_state.fp_dp = user_fp_state->fp_dp;
- ifps->fp_save_state.fp_ds = user_fp_state->fp_ds;
- ifps->fp_regs = *user_fp_regs;
+
+ state->fpu_mxcsr &= mxcsr_capability_mask;
+
+ bcopy((char *)&state->fpu_fcw, (char *)ifps, state_size);
+
+ if (fpu_YMM_present) {
+ struct x86_avx_thread_state *iavx = (void *) ifps;
+ uint32_t fpu_nyreg = 0;
+
+ if (f == x86_AVX_STATE32)
+ fpu_nyreg = 8;
+ else if (f == x86_AVX_STATE64)
+ fpu_nyreg = 16;
+
+ if (fpu_nyreg) {
+ x86_avx_state64_t *ystate = (x86_avx_state64_t *) state;
+ bcopy(&ystate->__fpu_ymmh0, &iavx->x_YMMH_reg[0][0], fpu_nyreg * sizeof(_STRUCT_XMM_REG));
+ }
+
+ iavx->fp_save_layout = thread_is_64bit(thr_act) ? XSAVE64 : XSAVE32;
+ /* Sanitize XSAVE header */
+ bzero(&iavx->_xh.xhrsvd[0], sizeof(iavx->_xh.xhrsvd));
+ if (state_size == sizeof(struct x86_avx_thread_state))
+ iavx->_xh.xsbv = (XFEM_YMM | XFEM_SSE | XFEM_X87);
+ else
+ iavx->_xh.xsbv = (XFEM_SSE | XFEM_X87);
+ }
+ else
+ ifps->fp_save_layout = thread_is_64bit(thr_act) ? FXSAVE64 : FXSAVE32;
+ ifps->fp_valid = old_valid;
+
+ if (old_valid == FALSE) {
+ boolean_t istate = ml_set_interrupts_enabled(FALSE);
+ ifps->fp_valid = TRUE;
+ set_ts();
+ ml_set_interrupts_enabled(istate);
+ }
simple_unlock(&pcb->lock);
+
if (new_ifps != 0)
- zfree(ifps_zone, (vm_offset_t) ifps);
+ fp_state_free(new_ifps);
}
-
return KERN_SUCCESS;
}
* concurrent fpu_set_state or fpu_get_state.
*/
kern_return_t
-fpu_get_state(
- thread_act_t thr_act,
- register struct i386_float_state *state)
+fpu_get_fxstate(
+ thread_t thr_act,
+ thread_state_t tstate,
+ thread_flavor_t f)
{
- register pcb_t pcb;
- register struct i386_fpsave_state *ifps;
+ struct x86_fx_thread_state *ifps;
+ x86_float_state64_t *state;
+ kern_return_t ret = KERN_FAILURE;
+ pcb_t pcb;
+ size_t state_size = sizeof(struct x86_fx_thread_state);
-ASSERT_IPL(SPL0);
if (fp_kind == FP_NO)
- return KERN_FAILURE;
+ return KERN_FAILURE;
+
+ state = (x86_float_state64_t *)tstate;
- assert(thr_act != THR_ACT_NULL);
- pcb = thr_act->mact.pcb;
+ assert(thr_act != THREAD_NULL);
+ pcb = THREAD_TO_PCB(thr_act);
simple_lock(&pcb->lock);
- ifps = pcb->ims.ifps;
+
+ ifps = pcb->ifps;
if (ifps == 0) {
- /*
- * No valid floating-point state.
- */
- simple_unlock(&pcb->lock);
- bzero((char *)state, sizeof(struct i386_float_state));
- return KERN_SUCCESS;
+ /*
+ * No valid floating-point state.
+ */
+
+ bcopy((char *)&initial_fp_state, (char *)&state->fpu_fcw,
+ state_size);
+
+ simple_unlock(&pcb->lock);
+
+ return KERN_SUCCESS;
}
+ /*
+ * Make sure we`ve got the latest fp state info
+ * If the live fpu state belongs to our target
+ */
+ if (thr_act == current_thread()) {
+ boolean_t intr;
- /* Make sure we`ve got the latest fp state info */
- /* If the live fpu state belongs to our target */
-#if NCPUS == 1
- if (thr_act == fp_act)
-#else
- if (thr_act == current_act())
-#endif
- {
- clear_ts();
- fp_save(thr_act);
- clear_fpu();
+ intr = ml_set_interrupts_enabled(FALSE);
+
+ clear_ts();
+ fp_save(thr_act);
+ clear_fpu();
+
+ (void)ml_set_interrupts_enabled(intr);
}
+ if (ifps->fp_valid) {
+ bcopy((char *)ifps, (char *)&state->fpu_fcw, state_size);
+ if (fpu_YMM_present) {
+ struct x86_avx_thread_state *iavx = (void *) ifps;
+ uint32_t fpu_nyreg = 0;
+
+ if (f == x86_AVX_STATE32)
+ fpu_nyreg = 8;
+ else if (f == x86_AVX_STATE64)
+ fpu_nyreg = 16;
+
+ if (fpu_nyreg) {
+ x86_avx_state64_t *ystate = (x86_avx_state64_t *) state;
+ bcopy(&iavx->x_YMMH_reg[0][0], &ystate->__fpu_ymmh0, fpu_nyreg * sizeof(_STRUCT_XMM_REG));
+ }
+ }
- state->fpkind = fp_kind;
- state->exc_status = 0;
+ ret = KERN_SUCCESS;
+ }
+ simple_unlock(&pcb->lock);
- {
- register struct i386_fp_save *user_fp_state;
- register struct i386_fp_regs *user_fp_regs;
+ return ret;
+}
- state->initialized = ifps->fp_valid;
- user_fp_state = (struct i386_fp_save *) &state->hw_state[0];
- user_fp_regs = (struct i386_fp_regs *)
- &state->hw_state[sizeof(struct i386_fp_save)];
- /*
- * Ensure that reserved parts of the environment are 0.
- */
- bzero((char *)user_fp_state, sizeof(struct i386_fp_save));
-
- user_fp_state->fp_control = ifps->fp_save_state.fp_control;
- user_fp_state->fp_status = ifps->fp_save_state.fp_status;
- user_fp_state->fp_tag = ifps->fp_save_state.fp_tag;
- user_fp_state->fp_eip = ifps->fp_save_state.fp_eip;
- user_fp_state->fp_cs = ifps->fp_save_state.fp_cs;
- user_fp_state->fp_opcode = ifps->fp_save_state.fp_opcode;
- user_fp_state->fp_dp = ifps->fp_save_state.fp_dp;
- user_fp_state->fp_ds = ifps->fp_save_state.fp_ds;
- *user_fp_regs = ifps->fp_regs;
+/*
+ * the child thread is 'stopped' with the thread
+ * mutex held and is currently not known by anyone
+ * so no way for fpu state to get manipulated by an
+ * outside agency -> no need for pcb lock
+ */
+
+void
+fpu_dup_fxstate(
+ thread_t parent,
+ thread_t child)
+{
+ struct x86_fx_thread_state *new_ifps = NULL;
+ boolean_t intr;
+ pcb_t ppcb;
+
+ ppcb = THREAD_TO_PCB(parent);
+
+ if (ppcb->ifps == NULL)
+ return;
+
+ if (child->machine.ifps)
+ panic("fpu_dup_fxstate: child's ifps non-null");
+
+ new_ifps = fp_state_alloc();
+
+ simple_lock(&ppcb->lock);
+
+ if (ppcb->ifps != NULL) {
+ struct x86_fx_thread_state *ifps = ppcb->ifps;
+ /*
+ * Make sure we`ve got the latest fp state info
+ */
+ intr = ml_set_interrupts_enabled(FALSE);
+ assert(current_thread() == parent);
+ clear_ts();
+ fp_save(parent);
+ clear_fpu();
+
+ (void)ml_set_interrupts_enabled(intr);
+
+ if (ifps->fp_valid) {
+ child->machine.ifps = new_ifps;
+ assert((fp_register_state_size == sizeof(struct x86_fx_thread_state)) ||
+ (fp_register_state_size == sizeof(struct x86_avx_thread_state)));
+ bcopy((char *)(ppcb->ifps),
+ (char *)(child->machine.ifps), fp_register_state_size);
+
+ /* Mark the new fp saved state as non-live. */
+ /* Temporarily disabled: radar 4647827
+ * new_ifps->fp_valid = TRUE;
+ */
+
+ /*
+ * Clear any reserved bits in the MXCSR to prevent a GPF
+ * when issuing an FXRSTOR.
+ */
+ new_ifps->fx_MXCSR &= mxcsr_capability_mask;
+ new_ifps = NULL;
+ }
}
- simple_unlock(&pcb->lock);
+ simple_unlock(&ppcb->lock);
- return KERN_SUCCESS;
+ if (new_ifps != NULL)
+ fp_state_free(new_ifps);
}
+
/*
* Initialize FPU.
*
- * Raise exceptions for:
- * invalid operation
- * divide by zero
- * overflow
- *
- * Use 53-bit precision.
*/
+
void
fpinit(void)
{
unsigned short control;
-ASSERT_IPL(SPL0);
clear_ts();
fninit();
fnstcw(&control);
control &= ~(FPC_PC|FPC_RC); /* Clear precision & rounding control */
- control |= (FPC_PC_53 | /* Set precision */
+ control |= (FPC_PC_64 | /* Set precision */
FPC_RC_RN | /* round-to-nearest */
FPC_ZE | /* Suppress zero-divide */
FPC_OE | /* and overflow */
FPC_DE | /* Allow denorms as operands */
FPC_PE); /* No trap for precision loss */
fldcw(control);
+
+ /* Initialize SSE/SSE2 */
+ __builtin_ia32_ldmxcsr(0x1f80);
}
/*
void
fpnoextflt(void)
{
- /*
- * Enable FPU use.
- */
-ASSERT_IPL(SPL0);
- clear_ts();
-#if NCPUS == 1
-
- /*
- * If this thread`s state is in the FPU, we are done.
- */
- if (fp_act == current_act())
- return;
-
- /* Make sure we don't do fpsave() in fp_intr while doing fpsave()
- * here if the current fpu instruction generates an error.
- */
- fwait();
- /*
- * If another thread`s state is in the FPU, save it.
- */
- if (fp_act != THR_ACT_NULL) {
- fp_save(fp_act);
+ boolean_t intr;
+ thread_t thr_act;
+ pcb_t pcb;
+ struct x86_fx_thread_state *ifps = 0;
+
+ thr_act = current_thread();
+ pcb = THREAD_TO_PCB(thr_act);
+
+ assert(fp_register_state_size != 0);
+
+ if (pcb->ifps == 0 && !get_interrupt_level()) {
+ ifps = fp_state_alloc();
+ bcopy((char *)&initial_fp_state, (char *)ifps,
+ fp_register_state_size);
+ if (!thread_is_64bit(thr_act)) {
+ ifps->fp_save_layout = fpu_YMM_present ? XSAVE32 : FXSAVE32;
+ }
+ else
+ ifps->fp_save_layout = fpu_YMM_present ? XSAVE64 : FXSAVE64;
+ ifps->fp_valid = TRUE;
}
+ intr = ml_set_interrupts_enabled(FALSE);
- /*
- * Give this thread the FPU.
- */
- fp_act = current_act();
+ clear_ts(); /* Enable FPU use */
-#endif /* NCPUS == 1 */
+ if (__improbable(get_interrupt_level())) {
+ /*
+ * Save current coprocessor context if valid
+ * Initialize coprocessor live context
+ */
+ fp_save(thr_act);
+ fpinit();
+ } else {
+ if (pcb->ifps == 0) {
+ pcb->ifps = ifps;
+ ifps = 0;
+ }
+ /*
+ * Load this thread`s state into coprocessor live context.
+ */
+ fp_load(thr_act);
+ }
+ (void)ml_set_interrupts_enabled(intr);
- /*
- * Load this thread`s state into the FPU.
- */
- fp_load(current_act());
+ if (ifps)
+ fp_state_free(ifps);
}
/*
void
fpextovrflt(void)
{
- register thread_act_t thr_act = current_act();
- register pcb_t pcb;
- register struct i386_fpsave_state *ifps;
+ thread_t thr_act = current_thread();
+ pcb_t pcb;
+ struct x86_fx_thread_state *ifps;
+ boolean_t intr;
-#if NCPUS == 1
+ intr = ml_set_interrupts_enabled(FALSE);
- /*
- * Is exception for the currently running thread?
- */
- if (fp_act != thr_act) {
- /* Uh oh... */
- panic("fpextovrflt");
- }
-#endif
+ if (get_interrupt_level())
+ panic("FPU segment overrun exception at interrupt context\n");
+ if (current_task() == kernel_task)
+ panic("FPU segment overrun exception in kernel thread context\n");
/*
* This is a non-recoverable error.
* Invalidate the thread`s FPU state.
*/
- pcb = thr_act->mact.pcb;
+ pcb = THREAD_TO_PCB(thr_act);
simple_lock(&pcb->lock);
- ifps = pcb->ims.ifps;
- pcb->ims.ifps = 0;
+ ifps = pcb->ifps;
+ pcb->ifps = 0;
simple_unlock(&pcb->lock);
/*
*/
clear_fpu();
+ (void)ml_set_interrupts_enabled(intr);
+
if (ifps)
- zfree(ifps_zone, (vm_offset_t) ifps);
+ zfree(ifps_zone, ifps);
/*
* Raise exception.
void
fpexterrflt(void)
{
- register thread_act_t thr_act = current_act();
+ thread_t thr_act = current_thread();
+ struct x86_fx_thread_state *ifps = thr_act->machine.ifps;
+ boolean_t intr;
-ASSERT_IPL(SPL0);
-#if NCPUS == 1
- /*
- * Since FPU errors only occur on ESC or WAIT instructions,
- * the current thread should own the FPU. If it didn`t,
- * we should have gotten the task-switched interrupt first.
- */
- if (fp_act != THR_ACT_NULL) {
- panic("fpexterrflt");
- return;
- }
+ intr = ml_set_interrupts_enabled(FALSE);
+
+ if (get_interrupt_level())
+ panic("FPU error exception at interrupt context\n");
+ if (current_task() == kernel_task)
+ panic("FPU error exception in kernel thread context\n");
- /*
- * Check if we got a context switch between the interrupt and the AST
- * This can happen if the interrupt arrived after the FPU AST was
- * checked. In this case, raise the exception in fp_load when this
- * thread next time uses the FPU. Remember exception condition in
- * fp_valid (extended boolean 2).
- */
- if (fp_intr_act != thr_act) {
- if (fp_intr_act == THR_ACT_NULL) {
- panic("fpexterrflt: fp_intr_act == THR_ACT_NULL");
- return;
- }
- fp_intr_act->mact.pcb->ims.ifps->fp_valid = 2;
- fp_intr_act = THR_ACT_NULL;
- return;
- }
- fp_intr_act = THR_ACT_NULL;
-#else /* NCPUS == 1 */
/*
* Save the FPU state and turn off the FPU.
*/
fp_save(thr_act);
-#endif /* NCPUS == 1 */
+
+ (void)ml_set_interrupts_enabled(intr);
/*
* Raise FPU exception.
- * Locking not needed on pcb->ims.ifps,
+ * Locking not needed on pcb->ifps,
* since thread is running.
*/
i386_exception(EXC_ARITHMETIC,
EXC_I386_EXTERR,
- thr_act->mact.pcb->ims.ifps->fp_save_state.fp_status);
+ ifps->fx_status);
+
/*NOTREACHED*/
}
* . if called from fpu_get_state, pcb already locked.
* . if called from fpnoextflt or fp_intr, we are single-cpu
* . otherwise, thread is running.
+ * N.B.: Must be called with interrupts disabled
*/
void
fp_save(
- thread_act_t thr_act)
+ thread_t thr_act)
{
- register pcb_t pcb = thr_act->mact.pcb;
- register struct i386_fpsave_state *ifps = pcb->ims.ifps;
+ pcb_t pcb = THREAD_TO_PCB(thr_act);
+ struct x86_fx_thread_state *ifps = pcb->ifps;
+ assert(ifps != 0);
if (ifps != 0 && !ifps->fp_valid) {
- /* registers are in FPU */
- ifps->fp_valid = TRUE;
- fnsave(&ifps->fp_save_state);
+ assert((get_cr0() & CR0_TS) == 0);
+ /* registers are in FPU */
+ ifps->fp_valid = TRUE;
+ fpu_store_registers(ifps, thread_is_64bit(thr_act));
}
}
void
fp_load(
- thread_act_t thr_act)
+ thread_t thr_act)
{
- register pcb_t pcb = thr_act->mact.pcb;
- register struct i386_fpsave_state *ifps;
+ pcb_t pcb = THREAD_TO_PCB(thr_act);
+ struct x86_fx_thread_state *ifps = pcb->ifps;
-ASSERT_IPL(SPL0);
- ifps = pcb->ims.ifps;
- if (ifps == 0) {
- ifps = (struct i386_fpsave_state *) zalloc(ifps_zone);
- bzero((char *)ifps, sizeof *ifps);
- pcb->ims.ifps = ifps;
- fpinit();
-#if 1
-/*
- * I'm not sure this is needed. Does the fpu regenerate the interrupt in
- * frstor or not? Without this code we may miss some exceptions, with it
- * we might send too many exceptions.
- */
- } else if (ifps->fp_valid == 2) {
- /* delayed exception pending */
+ assert(ifps);
+ assert(ifps->fp_valid == FALSE || ifps->fp_valid == TRUE);
- ifps->fp_valid = TRUE;
- clear_fpu();
- /*
- * Raise FPU exception.
- * Locking not needed on pcb->ims.ifps,
- * since thread is running.
- */
- i386_exception(EXC_ARITHMETIC,
- EXC_I386_EXTERR,
- thr_act->mact.pcb->ims.ifps->fp_save_state.fp_status);
- /*NOTREACHED*/
-#endif
+ if (ifps->fp_valid == FALSE) {
+ fpinit();
} else {
- frstor(ifps->fp_save_state);
+ fpu_load_registers(ifps);
}
ifps->fp_valid = FALSE; /* in FPU */
}
/*
- * Allocate and initialize FP state for current thread.
- * Don't load state.
- *
- * Locking not needed; always called on the current thread.
+ * SSE arithmetic exception handling code.
+ * Basically the same as the x87 exception handler with a different subtype
*/
-void
-fp_state_alloc(void)
-{
- pcb_t pcb = current_act()->mact.pcb;
- struct i386_fpsave_state *ifps;
-
- ifps = (struct i386_fpsave_state *)zalloc(ifps_zone);
- bzero((char *)ifps, sizeof *ifps);
- pcb->ims.ifps = ifps;
-
- ifps->fp_valid = TRUE;
- ifps->fp_save_state.fp_control = (0x037f
- & ~(FPC_IM|FPC_ZM|FPC_OM|FPC_PC))
- | (FPC_PC_53|FPC_IC_AFF);
- ifps->fp_save_state.fp_status = 0;
- ifps->fp_save_state.fp_tag = 0xffff; /* all empty */
-}
-
-/*
- * fpflush(thread_act_t)
- * Flush the current act's state, if needed
- * (used by thread_terminate_self to ensure fp faults
- * aren't satisfied by overly general trap code in the
- * context of the reaper thread)
- */
void
-fpflush(thread_act_t thr_act)
+fpSSEexterrflt(void)
{
-#if NCPUS == 1
- if (fp_act && thr_act == fp_act) {
- clear_ts();
- fwait();
- clear_fpu();
- }
-#else
- /* not needed on MP x86s; fp not lazily evaluated */
-#endif
-}
+ thread_t thr_act = current_thread();
+ struct x86_fx_thread_state *ifps = thr_act->machine.ifps;
+ boolean_t intr;
+ intr = ml_set_interrupts_enabled(FALSE);
-/*
- * Handle a coprocessor error interrupt on the AT386.
- * This comes in on line 5 of the slave PIC at SPL1.
- */
+ if (get_interrupt_level())
+ panic("SSE exception at interrupt context\n");
+ if (current_task() == kernel_task)
+ panic("SSE exception in kernel thread context\n");
-void
-fpintr(void)
-{
- spl_t s;
- thread_act_t thr_act = current_act();
-
-ASSERT_IPL(SPL1);
/*
- * Turn off the extended 'busy' line.
+ * Save the FPU state and turn off the FPU.
*/
- outb(0xf0, 0);
+ fp_save(thr_act);
+ (void)ml_set_interrupts_enabled(intr);
/*
- * Save the FPU context to the thread using it.
+ * Raise FPU exception.
+ * Locking not needed on pcb->ifps,
+ * since thread is running.
*/
-#if NCPUS == 1
- if (fp_act == THR_ACT_NULL) {
- printf("fpintr: FPU not belonging to anyone!\n");
- clear_ts();
- fninit();
- clear_fpu();
- return;
- }
+ assert(ifps->fp_save_layout == FXSAVE32 || ifps->fp_save_layout == FXSAVE64);
+ i386_exception(EXC_ARITHMETIC,
+ EXC_I386_SSEEXTERR,
+ ifps->fx_MXCSR);
+ /*NOTREACHED*/
+}
- if (fp_act != thr_act) {
- /*
- * FPU exception is for a different thread.
- * When that thread again uses the FPU an exception will be
- * raised in fp_load. Remember the condition in fp_valid (== 2).
- */
- clear_ts();
- fp_save(fp_act);
- fp_act->mact.pcb->ims.ifps->fp_valid = 2;
- fninit();
- clear_fpu();
- /* leave fp_intr_act THR_ACT_NULL */
- return;
- }
- if (fp_intr_act != THR_ACT_NULL)
- panic("fp_intr: already caught intr");
- fp_intr_act = thr_act;
-#endif /* NCPUS == 1 */
+void
+fp_setvalid(boolean_t value) {
+ thread_t thr_act = current_thread();
+ struct x86_fx_thread_state *ifps = thr_act->machine.ifps;
- clear_ts();
- fp_save(thr_act);
- fninit();
- clear_fpu();
+ if (ifps) {
+ ifps->fp_valid = value;
- /*
- * Since we are running on the interrupt stack, we must
- * signal the thread to take the exception when we return
- * to user mode. Use an AST to do this.
- *
- * Don`t set the thread`s AST field. If the thread is
- * descheduled before it takes the AST, it will notice
- * the FPU error when it reloads its FPU state.
- */
- s = splsched();
- mp_disable_preemption();
- ast_on(AST_I386_FP);
- mp_enable_preemption();
- splx(s);
+ if (value == TRUE) {
+ boolean_t istate = ml_set_interrupts_enabled(FALSE);
+ clear_fpu();
+ ml_set_interrupts_enabled(istate);
+ }
+ }
+}
+
+__private_extern__ boolean_t
+ml_fpu_avx_enabled(void) {
+ return (fpu_YMM_present == TRUE);
}