/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
*
* @APPLE_LICENSE_HEADER_START@
*
- * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
- *
* This file contains Original Code and/or Modifications of Original Code
* as defined in and that are subject to the Apple Public Source License
* Version 2.0 (the 'License'). You may not use this file except in
/*
*/
-#include <cpus.h>
#include <platforms.h>
#include <mach/exception_types.h>
#include <mach/i386/fp_reg.h>
#include <kern/mach_param.h>
+#include <kern/processor.h>
#include <kern/thread.h>
#include <kern/zalloc.h>
#include <kern/misc_protos.h>
#include <i386/fpu.h>
#include <i386/trap.h>
#include <i386/pio.h>
+#include <i386/cpuid.h>
#include <i386/misc_protos.h>
#if 0
int fp_kind = FP_387; /* 80387 present */
zone_t ifps_zone; /* zone for FPU save area */
-#if NCPUS == 1
-volatile thread_act_t fp_act = THR_ACT_NULL;
- /* thread whose state is in FPU */
- /* always THR_ACT_NULL if emulating FPU */
-volatile thread_act_t fp_intr_act = THR_ACT_NULL;
-
-
#define clear_fpu() \
{ \
set_ts(); \
- fp_act = THR_ACT_NULL; \
}
-#else /* NCPUS > 1 */
-#define clear_fpu() \
- { \
- set_ts(); \
- }
-
-#endif
+#define ALIGNED(addr,size) (((unsigned)(addr)&((size)-1))==0)
/* Forward */
extern void fpinit(void);
extern void fp_save(
- thread_act_t thr_act);
+ thread_t thr_act);
extern void fp_load(
- thread_act_t thr_act);
+ thread_t thr_act);
/*
* Look for FPU and initialize it.
* then trying to read the correct bit patterns from
* the control and status registers.
*/
- set_cr0(get_cr0() & ~(CR0_EM|CR0_TS)); /* allow use of FPU */
+ set_cr0((get_cr0() & ~(CR0_EM|CR0_TS)) | CR0_NE); /* allow use of FPU */
fninit();
status = fnstsw();
fnstcw(&control);
if ((status & 0xff) == 0 &&
- (control & 0x103f) == 0x3f)
- {
-#if 0
- /*
- * We have a FPU of some sort.
- * Compare -infinity against +infinity
- * to check whether we have a 287 or a 387.
- */
- volatile double fp_infinity, fp_one, fp_zero;
- fp_one = 1.0;
- fp_zero = 0.0;
- fp_infinity = fp_one / fp_zero;
- if (fp_infinity == -fp_infinity) {
- /*
- * We have an 80287.
- */
- fp_kind = FP_287;
- __asm__ volatile(".byte 0xdb; .byte 0xe4"); /* fnsetpm */
- }
- else
-#endif
- {
- /*
- * We have a 387.
- */
- fp_kind = FP_387;
+ (control & 0x103f) == 0x3f)
+ {
+ fp_kind = FP_387; /* assume we have a 387 compatible instruction set */
+ /* Use FPU save/restore instructions if available */
+ if (cpuid_features() & CPUID_FEATURE_FXSR) {
+ fp_kind = FP_FXSR;
+ set_cr4(get_cr4() | CR4_FXS);
+ printf("Enabling XMM register save/restore");
+ /* And allow SIMD instructions if present */
+ if (cpuid_features() & CPUID_FEATURE_SSE) {
+ printf(" and SSE/SSE2");
+ set_cr4(get_cr4() | CR4_XMM);
+ }
+ printf(" opcodes\n");
}
+
/*
* Trap wait instructions. Turn off FPU for now.
*/
* Called only when thread terminating - no locking necessary.
*/
void
-fp_free(fps)
+fpu_free(fps)
struct i386_fpsave_state *fps;
{
ASSERT_IPL(SPL0);
-#if NCPUS == 1
- if ((fp_act != THR_ACT_NULL) && (fp_act->mact.pcb->ims.ifps == fps)) {
- /*
- * Make sure we don't get FPU interrupts later for
- * this thread
- */
- fwait();
+ zfree(ifps_zone, fps);
+}
+
+/*
+ * Set the floating-point state for a thread based
+ * on the FXSave formatted data. This is basically
+ * the same as fpu_set_state except it uses the
+ * expanded data structure.
+ * If the thread is not the current thread, it is
+ * not running (held). Locking needed against
+ * concurrent fpu_set_state or fpu_get_state.
+ */
+kern_return_t
+fpu_set_fxstate(
+ thread_t thr_act,
+ struct i386_float_state *state)
+{
+ register pcb_t pcb;
+ register struct i386_fpsave_state *ifps;
+ register struct i386_fpsave_state *new_ifps;
+
+ASSERT_IPL(SPL0);
+ if (fp_kind == FP_NO)
+ return KERN_FAILURE;
+
+ if (state->fpkind != FP_FXSR) {
+ /* strange if this happens, but in case someone builds one of these manually... */
+ return fpu_set_state(thr_act, state);
+ }
+
+ assert(thr_act != THREAD_NULL);
+ pcb = thr_act->machine.pcb;
+
+ if (state->initialized == 0) {
+ /*
+ * new FPU state is 'invalid'.
+ * Deallocate the fp state if it exists.
+ */
+ simple_lock(&pcb->lock);
+ ifps = pcb->ims.ifps;
+ pcb->ims.ifps = 0;
+ simple_unlock(&pcb->lock);
+
+ if (ifps != 0) {
+ zfree(ifps_zone, ifps);
+ }
+ }
+ else {
+ /*
+ * Valid state. Allocate the fp state if there is none.
+ */
+
+ new_ifps = 0;
+ Retry:
+ simple_lock(&pcb->lock);
+ ifps = pcb->ims.ifps;
+ if (ifps == 0) {
+ if (new_ifps == 0) {
+ simple_unlock(&pcb->lock);
+ new_ifps = (struct i386_fpsave_state *) zalloc(ifps_zone);
+ assert(ALIGNED(new_ifps,16));
+ goto Retry;
+ }
+ ifps = new_ifps;
+ new_ifps = 0;
+ bzero((char *)ifps, sizeof *ifps);
+ pcb->ims.ifps = ifps;
+ }
+
+ /*
+ * now copy over the new data.
+ */
+ bcopy((char *)&state->hw_state[0], (char *)&ifps->fx_save_state, sizeof(struct i386_fx_save));
+ ifps->fp_save_flavor = FP_FXSR;
+ simple_unlock(&pcb->lock);
+ if (new_ifps != 0)
+ zfree(ifps_zone, ifps);
+ }
+
+ return KERN_SUCCESS;
+}
+
+/*
+ * Get the floating-point state for a thread.
+ * If the thread is not the current thread, it is
+ * not running (held). Locking needed against
+ * concurrent fpu_set_state or fpu_get_state.
+ */
+kern_return_t
+fpu_get_fxstate(
+ thread_t thr_act,
+ register struct i386_float_state *state)
+{
+ register pcb_t pcb;
+ register struct i386_fpsave_state *ifps;
+
+ASSERT_IPL(SPL0);
+ if (fp_kind == FP_NO) {
+ return KERN_FAILURE;
+ } else if (fp_kind == FP_387) {
+ return fpu_get_state(thr_act, state);
+ }
+
+ assert(thr_act != THREAD_NULL);
+ pcb = thr_act->machine.pcb;
+
+ simple_lock(&pcb->lock);
+ ifps = pcb->ims.ifps;
+ if (ifps == 0) {
+ /*
+ * No valid floating-point state.
+ */
+ simple_unlock(&pcb->lock);
+ bzero((char *)state, sizeof(struct i386_float_state));
+ return KERN_SUCCESS;
+ }
- /* Mark it free and disable access */
+ /* Make sure we`ve got the latest fp state info */
+ /* If the live fpu state belongs to our target */
+ if (thr_act == current_thread())
+ {
+ clear_ts();
+ fp_save(thr_act);
clear_fpu();
}
-#endif /* NCPUS == 1 */
- zfree(ifps_zone, (vm_offset_t) fps);
+
+ state->fpkind = fp_kind;
+ state->exc_status = 0;
+ state->initialized = ifps->fp_valid;
+ bcopy( (char *)&ifps->fx_save_state, (char *)&state->hw_state[0], sizeof(struct i386_fx_save));
+
+ simple_unlock(&pcb->lock);
+
+ return KERN_SUCCESS;
}
/*
*/
kern_return_t
fpu_set_state(
- thread_act_t thr_act,
+ thread_t thr_act,
struct i386_float_state *state)
{
register pcb_t pcb;
if (fp_kind == FP_NO)
return KERN_FAILURE;
- assert(thr_act != THR_ACT_NULL);
- pcb = thr_act->mact.pcb;
-
-#if NCPUS == 1
-
- /*
- * If this thread`s state is in the FPU,
- * discard it; we are replacing the entire
- * FPU state.
- */
- if (fp_act == thr_act) {
- fwait(); /* wait for possible interrupt */
- clear_fpu(); /* no state in FPU */
- }
-#endif
+ assert(thr_act != THREAD_NULL);
+ pcb = thr_act->machine.pcb;
if (state->initialized == 0) {
/*
simple_unlock(&pcb->lock);
if (ifps != 0) {
- zfree(ifps_zone, (vm_offset_t) ifps);
+ zfree(ifps_zone, ifps);
}
}
else {
if (new_ifps == 0) {
simple_unlock(&pcb->lock);
new_ifps = (struct i386_fpsave_state *) zalloc(ifps_zone);
+ assert(ALIGNED(new_ifps,16));
goto Retry;
}
ifps = new_ifps;
new_ifps = 0;
+ bzero((char *)ifps, sizeof *ifps); // zero ALL fields first
pcb->ims.ifps = ifps;
}
ifps->fp_save_state.fp_dp = user_fp_state->fp_dp;
ifps->fp_save_state.fp_ds = user_fp_state->fp_ds;
ifps->fp_regs = *user_fp_regs;
-
+ ifps->fp_save_flavor = FP_387;
simple_unlock(&pcb->lock);
if (new_ifps != 0)
- zfree(ifps_zone, (vm_offset_t) ifps);
+ zfree(ifps_zone, ifps);
}
return KERN_SUCCESS;
*/
kern_return_t
fpu_get_state(
- thread_act_t thr_act,
+ thread_t thr_act,
register struct i386_float_state *state)
{
register pcb_t pcb;
if (fp_kind == FP_NO)
return KERN_FAILURE;
- assert(thr_act != THR_ACT_NULL);
- pcb = thr_act->mact.pcb;
+ assert(thr_act != THREAD_NULL);
+ pcb = thr_act->machine.pcb;
simple_lock(&pcb->lock);
ifps = pcb->ims.ifps;
/* Make sure we`ve got the latest fp state info */
/* If the live fpu state belongs to our target */
-#if NCPUS == 1
- if (thr_act == fp_act)
-#else
- if (thr_act == current_act())
-#endif
+ if (thr_act == current_thread())
{
clear_ts();
fp_save(thr_act);
*/
ASSERT_IPL(SPL0);
clear_ts();
-#if NCPUS == 1
-
- /*
- * If this thread`s state is in the FPU, we are done.
- */
- if (fp_act == current_act())
- return;
-
- /* Make sure we don't do fpsave() in fp_intr while doing fpsave()
- * here if the current fpu instruction generates an error.
- */
- fwait();
- /*
- * If another thread`s state is in the FPU, save it.
- */
- if (fp_act != THR_ACT_NULL) {
- fp_save(fp_act);
- }
-
- /*
- * Give this thread the FPU.
- */
- fp_act = current_act();
-
-#endif /* NCPUS == 1 */
/*
* Load this thread`s state into the FPU.
*/
- fp_load(current_act());
+ fp_load(current_thread());
}
/*
void
fpextovrflt(void)
{
- register thread_act_t thr_act = current_act();
+ register thread_t thr_act = current_thread();
register pcb_t pcb;
register struct i386_fpsave_state *ifps;
-#if NCPUS == 1
-
- /*
- * Is exception for the currently running thread?
- */
- if (fp_act != thr_act) {
- /* Uh oh... */
- panic("fpextovrflt");
- }
-#endif
-
/*
* This is a non-recoverable error.
* Invalidate the thread`s FPU state.
*/
- pcb = thr_act->mact.pcb;
+ pcb = thr_act->machine.pcb;
simple_lock(&pcb->lock);
ifps = pcb->ims.ifps;
pcb->ims.ifps = 0;
clear_fpu();
if (ifps)
- zfree(ifps_zone, (vm_offset_t) ifps);
+ zfree(ifps_zone, ifps);
/*
* Raise exception.
void
fpexterrflt(void)
{
- register thread_act_t thr_act = current_act();
+ register thread_t thr_act = current_thread();
ASSERT_IPL(SPL0);
-#if NCPUS == 1
- /*
- * Since FPU errors only occur on ESC or WAIT instructions,
- * the current thread should own the FPU. If it didn`t,
- * we should have gotten the task-switched interrupt first.
- */
- if (fp_act != THR_ACT_NULL) {
- panic("fpexterrflt");
- return;
- }
-
- /*
- * Check if we got a context switch between the interrupt and the AST
- * This can happen if the interrupt arrived after the FPU AST was
- * checked. In this case, raise the exception in fp_load when this
- * thread next time uses the FPU. Remember exception condition in
- * fp_valid (extended boolean 2).
- */
- if (fp_intr_act != thr_act) {
- if (fp_intr_act == THR_ACT_NULL) {
- panic("fpexterrflt: fp_intr_act == THR_ACT_NULL");
- return;
- }
- fp_intr_act->mact.pcb->ims.ifps->fp_valid = 2;
- fp_intr_act = THR_ACT_NULL;
- return;
- }
- fp_intr_act = THR_ACT_NULL;
-#else /* NCPUS == 1 */
/*
* Save the FPU state and turn off the FPU.
*/
fp_save(thr_act);
-#endif /* NCPUS == 1 */
/*
* Raise FPU exception.
*/
i386_exception(EXC_ARITHMETIC,
EXC_I386_EXTERR,
- thr_act->mact.pcb->ims.ifps->fp_save_state.fp_status);
+ thr_act->machine.pcb->ims.ifps->fp_save_state.fp_status);
/*NOTREACHED*/
}
* . if called from fpnoextflt or fp_intr, we are single-cpu
* . otherwise, thread is running.
*/
-
void
fp_save(
- thread_act_t thr_act)
+ thread_t thr_act)
{
- register pcb_t pcb = thr_act->mact.pcb;
+ register pcb_t pcb = thr_act->machine.pcb;
register struct i386_fpsave_state *ifps = pcb->ims.ifps;
-
if (ifps != 0 && !ifps->fp_valid) {
/* registers are in FPU */
ifps->fp_valid = TRUE;
- fnsave(&ifps->fp_save_state);
+ ifps->fp_save_flavor = FP_387;
+ if (FXSAFE()) {
+ fxsave(&ifps->fx_save_state); // save the SSE2/Fp state in addition is enabled
+ ifps->fp_save_flavor = FP_FXSR;
+ }
+ fnsave(&ifps->fp_save_state); // also update the old save area for now...
}
}
void
fp_load(
- thread_act_t thr_act)
+ thread_t thr_act)
{
- register pcb_t pcb = thr_act->mact.pcb;
+ register pcb_t pcb = thr_act->machine.pcb;
register struct i386_fpsave_state *ifps;
ASSERT_IPL(SPL0);
ifps = pcb->ims.ifps;
if (ifps == 0) {
ifps = (struct i386_fpsave_state *) zalloc(ifps_zone);
+ assert(ALIGNED(ifps,16));
bzero((char *)ifps, sizeof *ifps);
pcb->ims.ifps = ifps;
fpinit();
*/
i386_exception(EXC_ARITHMETIC,
EXC_I386_EXTERR,
- thr_act->mact.pcb->ims.ifps->fp_save_state.fp_status);
+ thr_act->machine.pcb->ims.ifps->fp_save_state.fp_status);
/*NOTREACHED*/
#endif
} else {
- frstor(ifps->fp_save_state);
+ if (ifps->fp_save_flavor == FP_FXSR) fxrstor(&ifps->fx_save_state);
+ else frstor(ifps->fp_save_state);
}
ifps->fp_valid = FALSE; /* in FPU */
}
+
/*
* Allocate and initialize FP state for current thread.
* Don't load state.
void
fp_state_alloc(void)
{
- pcb_t pcb = current_act()->mact.pcb;
+ pcb_t pcb = current_thread()->machine.pcb;
struct i386_fpsave_state *ifps;
ifps = (struct i386_fpsave_state *)zalloc(ifps_zone);
+ assert(ALIGNED(ifps,16));
bzero((char *)ifps, sizeof *ifps);
pcb->ims.ifps = ifps;
| (FPC_PC_53|FPC_IC_AFF);
ifps->fp_save_state.fp_status = 0;
ifps->fp_save_state.fp_tag = 0xffff; /* all empty */
+ ifps->fx_save_state.fx_control = ifps->fp_save_state.fp_control;
+ ifps->fx_save_state.fx_status = ifps->fp_save_state.fp_status;
+ ifps->fx_save_state.fx_tag = 0x00;
+ ifps->fx_save_state.fx_MXCSR = 0x1f80;
+
}
/*
- * fpflush(thread_act_t)
+ * fpflush(thread_t)
* Flush the current act's state, if needed
* (used by thread_terminate_self to ensure fp faults
* aren't satisfied by overly general trap code in the
* context of the reaper thread)
*/
void
-fpflush(thread_act_t thr_act)
+fpflush(__unused thread_t thr_act)
{
-#if NCPUS == 1
- if (fp_act && thr_act == fp_act) {
- clear_ts();
- fwait();
- clear_fpu();
- }
-#else
/* not needed on MP x86s; fp not lazily evaluated */
-#endif
}
fpintr(void)
{
spl_t s;
- thread_act_t thr_act = current_act();
+ thread_t thr_act = current_thread();
ASSERT_IPL(SPL1);
/*
/*
* Save the FPU context to the thread using it.
*/
-#if NCPUS == 1
- if (fp_act == THR_ACT_NULL) {
- printf("fpintr: FPU not belonging to anyone!\n");
- clear_ts();
- fninit();
- clear_fpu();
- return;
- }
-
- if (fp_act != thr_act) {
- /*
- * FPU exception is for a different thread.
- * When that thread again uses the FPU an exception will be
- * raised in fp_load. Remember the condition in fp_valid (== 2).
- */
- clear_ts();
- fp_save(fp_act);
- fp_act->mact.pcb->ims.ifps->fp_valid = 2;
- fninit();
- clear_fpu();
- /* leave fp_intr_act THR_ACT_NULL */
- return;
- }
- if (fp_intr_act != THR_ACT_NULL)
- panic("fp_intr: already caught intr");
- fp_intr_act = thr_act;
-#endif /* NCPUS == 1 */
-
clear_ts();
fp_save(thr_act);
fninit();