]> git.saurik.com Git - apple/xnu.git/blobdiff - osfmk/i386/fpu.c
xnu-792.6.56.tar.gz
[apple/xnu.git] / osfmk / i386 / fpu.c
index 34bf4752a6536eec49eb5b323af7ef6c4cee7346..2358b9234a95d5976a127b404facc95b10f9aadc 100644 (file)
@@ -1,10 +1,8 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
  *
  * @APPLE_LICENSE_HEADER_START@
  * 
- * Copyright (c) 1999-2003 Apple Computer, Inc.  All Rights Reserved.
- * 
  * This file contains Original Code and/or Modifications of Original Code
  * as defined in and that are subject to the Apple Public Source License
  * Version 2.0 (the 'License'). You may not use this file except in
@@ -53,7 +51,6 @@
 /*
  */
 
-#include <cpus.h>
 #include <platforms.h>
 
 #include <mach/exception_types.h>
@@ -61,6 +58,7 @@
 #include <mach/i386/fp_reg.h>
 
 #include <kern/mach_param.h>
+#include <kern/processor.h>
 #include <kern/thread.h>
 #include <kern/zalloc.h>
 #include <kern/misc_protos.h>
@@ -71,6 +69,7 @@
 #include <i386/fpu.h>
 #include <i386/trap.h>
 #include <i386/pio.h>
+#include <i386/cpuid.h>
 #include <i386/misc_protos.h>
 
 #if 0
@@ -90,34 +89,20 @@ extern int curr_ipl;
 int            fp_kind = FP_387;       /* 80387 present */
 zone_t         ifps_zone;              /* zone for FPU save area */
 
-#if    NCPUS == 1
-volatile thread_act_t  fp_act = THR_ACT_NULL;
-                                   /* thread whose state is in FPU */
-                                   /* always THR_ACT_NULL if emulating FPU */
-volatile thread_act_t  fp_intr_act = THR_ACT_NULL;
-
-
 #define        clear_fpu() \
     { \
        set_ts(); \
-       fp_act = THR_ACT_NULL; \
     }
 
-#else  /* NCPUS > 1 */
-#define        clear_fpu() \
-    { \
-       set_ts(); \
-    }
-
-#endif
+#define ALIGNED(addr,size)     (((unsigned)(addr)&((size)-1))==0)
 
 /* Forward */
 
 extern void            fpinit(void);
 extern void            fp_save(
-                               thread_act_t    thr_act);
+                               thread_t        thr_act);
 extern void            fp_load(
-                               thread_act_t    thr_act);
+                               thread_t        thr_act);
 
 /*
  * Look for FPU and initialize it.
@@ -133,40 +118,29 @@ init_fpu(void)
         * then trying to read the correct bit patterns from
         * the control and status registers.
         */
-       set_cr0(get_cr0() & ~(CR0_EM|CR0_TS));  /* allow use of FPU */
+       set_cr0((get_cr0() & ~(CR0_EM|CR0_TS)) | CR0_NE);       /* allow use of FPU */
 
        fninit();
        status = fnstsw();
        fnstcw(&control);
 
        if ((status & 0xff) == 0 &&
-           (control & 0x103f) == 0x3f)
-       {
-#if 0
-           /*
-            * We have a FPU of some sort.
-            * Compare -infinity against +infinity
-            * to check whether we have a 287 or a 387.
-            */
-           volatile double fp_infinity, fp_one, fp_zero;
-           fp_one = 1.0;
-           fp_zero = 0.0;
-           fp_infinity = fp_one / fp_zero;
-           if (fp_infinity == -fp_infinity) {
-               /*
-                * We have an 80287.
-                */
-               fp_kind = FP_287;
-               __asm__ volatile(".byte 0xdb; .byte 0xe4");     /* fnsetpm */
-           }
-           else
-#endif
-                {
-               /*
-                * We have a 387.
-                */
-               fp_kind = FP_387;
+           (control & 0x103f) == 0x3f) 
+        {
+            fp_kind = FP_387;  /* assume we have a 387 compatible instruction set */
+           /* Use FPU save/restore instructions if available */
+            if (cpuid_features() & CPUID_FEATURE_FXSR) {
+               fp_kind = FP_FXSR;
+               set_cr4(get_cr4() | CR4_FXS);
+               printf("Enabling XMM register save/restore");
+               /* And allow SIMD instructions if present */
+               if (cpuid_features() & CPUID_FEATURE_SSE) {
+                   printf(" and SSE/SSE2");
+                   set_cr4(get_cr4() | CR4_XMM);
+               }
+               printf(" opcodes\n");
            }
+
            /*
             * Trap wait instructions.  Turn off FPU for now.
             */
@@ -199,23 +173,144 @@ fpu_module_init(void)
  * Called only when thread terminating - no locking necessary.
  */
 void
-fp_free(fps)
+fpu_free(fps)
        struct i386_fpsave_state *fps;
 {
 ASSERT_IPL(SPL0);
-#if    NCPUS == 1
-       if ((fp_act != THR_ACT_NULL) && (fp_act->mact.pcb->ims.ifps == fps)) {
-               /* 
-                * Make sure we don't get FPU interrupts later for
-                * this thread
-                */
-               fwait();
+       zfree(ifps_zone, fps);
+}
+
+/*
+ * Set the floating-point state for a thread based 
+ * on the FXSave formatted data. This is basically  
+ * the same as fpu_set_state except it uses the 
+ * expanded data structure. 
+ * If the thread is not the current thread, it is
+ * not running (held).  Locking needed against
+ * concurrent fpu_set_state or fpu_get_state.
+ */
+kern_return_t
+fpu_set_fxstate(
+       thread_t                thr_act,
+       struct i386_float_state *state)
+{
+       register pcb_t  pcb;
+       register struct i386_fpsave_state *ifps;
+       register struct i386_fpsave_state *new_ifps;
+
+ASSERT_IPL(SPL0);
+       if (fp_kind == FP_NO)
+           return KERN_FAILURE;
+
+        if (state->fpkind != FP_FXSR) {
+            /* strange if this happens, but in case someone builds one of these manually... */
+            return fpu_set_state(thr_act, state);
+        }
+        
+       assert(thr_act != THREAD_NULL);
+       pcb = thr_act->machine.pcb;
+
+       if (state->initialized == 0) {
+           /*
+            * new FPU state is 'invalid'.
+            * Deallocate the fp state if it exists.
+            */
+           simple_lock(&pcb->lock);
+           ifps = pcb->ims.ifps;
+           pcb->ims.ifps = 0;
+           simple_unlock(&pcb->lock);
+
+           if (ifps != 0) {
+               zfree(ifps_zone, ifps);
+           }
+       }
+       else {
+           /*
+            * Valid state.  Allocate the fp state if there is none.
+            */
+
+           new_ifps = 0;
+       Retry:
+           simple_lock(&pcb->lock);
+           ifps = pcb->ims.ifps;
+           if (ifps == 0) {
+               if (new_ifps == 0) {
+                   simple_unlock(&pcb->lock);
+                   new_ifps = (struct i386_fpsave_state *) zalloc(ifps_zone);
+                   assert(ALIGNED(new_ifps,16));
+                   goto Retry;
+               }
+               ifps = new_ifps;
+               new_ifps = 0;
+                bzero((char *)ifps, sizeof *ifps);
+               pcb->ims.ifps = ifps;
+           }
+
+           /*
+            * now copy over the new data.
+            */
+            bcopy((char *)&state->hw_state[0], (char *)&ifps->fx_save_state, sizeof(struct i386_fx_save));
+            ifps->fp_save_flavor = FP_FXSR;
+           simple_unlock(&pcb->lock);
+           if (new_ifps != 0)
+               zfree(ifps_zone, ifps);
+       }
+
+       return KERN_SUCCESS;
+}
+
+/*
+ * Get the floating-point state for a thread.
+ * If the thread is not the current thread, it is
+ * not running (held).  Locking needed against
+ * concurrent fpu_set_state or fpu_get_state.
+ */
+kern_return_t
+fpu_get_fxstate(
+       thread_t                                thr_act,
+       register struct i386_float_state        *state)
+{
+       register pcb_t  pcb;
+       register struct i386_fpsave_state *ifps;
+
+ASSERT_IPL(SPL0);
+       if (fp_kind == FP_NO) {
+           return KERN_FAILURE;
+       } else if (fp_kind == FP_387) {
+           return fpu_get_state(thr_act, state);
+       }
+
+       assert(thr_act != THREAD_NULL);
+       pcb = thr_act->machine.pcb;
+
+       simple_lock(&pcb->lock);
+       ifps = pcb->ims.ifps;
+       if (ifps == 0) {
+           /*
+            * No valid floating-point state.
+            */
+           simple_unlock(&pcb->lock);
+           bzero((char *)state, sizeof(struct i386_float_state));
+           return KERN_SUCCESS;
+       }
 
-               /* Mark it free and disable access */
+       /* Make sure we`ve got the latest fp state info */
+       /* If the live fpu state belongs to our target */
+       if (thr_act == current_thread())
+       {
+           clear_ts();
+           fp_save(thr_act);
            clear_fpu();
        }
-#endif /* NCPUS == 1 */
-       zfree(ifps_zone, (vm_offset_t) fps);
+
+       state->fpkind = fp_kind;
+       state->exc_status = 0;
+        state->initialized = ifps->fp_valid;
+        bcopy( (char *)&ifps->fx_save_state, (char *)&state->hw_state[0], sizeof(struct i386_fx_save));
+
+       simple_unlock(&pcb->lock);
+
+       return KERN_SUCCESS;
 }
 
 /*
@@ -226,7 +321,7 @@ ASSERT_IPL(SPL0);
  */
 kern_return_t
 fpu_set_state(
-       thread_act_t            thr_act,
+       thread_t                thr_act,
        struct i386_float_state *state)
 {
        register pcb_t  pcb;
@@ -237,21 +332,8 @@ ASSERT_IPL(SPL0);
        if (fp_kind == FP_NO)
            return KERN_FAILURE;
 
-       assert(thr_act != THR_ACT_NULL);
-       pcb = thr_act->mact.pcb;
-
-#if    NCPUS == 1
-
-       /*
-        * If this thread`s state is in the FPU,
-        * discard it; we are replacing the entire
-        * FPU state.
-        */
-       if (fp_act == thr_act) {
-           fwait();                    /* wait for possible interrupt */
-           clear_fpu();                /* no state in FPU */
-       }
-#endif
+       assert(thr_act != THREAD_NULL);
+       pcb = thr_act->machine.pcb;
 
        if (state->initialized == 0) {
            /*
@@ -264,7 +346,7 @@ ASSERT_IPL(SPL0);
            simple_unlock(&pcb->lock);
 
            if (ifps != 0) {
-               zfree(ifps_zone, (vm_offset_t) ifps);
+               zfree(ifps_zone, ifps);
            }
        }
        else {
@@ -286,10 +368,12 @@ ASSERT_IPL(SPL0);
                if (new_ifps == 0) {
                    simple_unlock(&pcb->lock);
                    new_ifps = (struct i386_fpsave_state *) zalloc(ifps_zone);
+                   assert(ALIGNED(new_ifps,16));
                    goto Retry;
                }
                ifps = new_ifps;
                new_ifps = 0;
+                bzero((char *)ifps, sizeof *ifps); // zero ALL fields first
                pcb->ims.ifps = ifps;
            }
 
@@ -307,10 +391,10 @@ ASSERT_IPL(SPL0);
            ifps->fp_save_state.fp_dp      = user_fp_state->fp_dp;
            ifps->fp_save_state.fp_ds      = user_fp_state->fp_ds;
            ifps->fp_regs = *user_fp_regs;
-
+            ifps->fp_save_flavor = FP_387;
            simple_unlock(&pcb->lock);
            if (new_ifps != 0)
-               zfree(ifps_zone, (vm_offset_t) ifps);
+               zfree(ifps_zone, ifps);
        }
 
        return KERN_SUCCESS;
@@ -324,7 +408,7 @@ ASSERT_IPL(SPL0);
  */
 kern_return_t
 fpu_get_state(
-       thread_act_t                            thr_act,
+       thread_t                                thr_act,
        register struct i386_float_state        *state)
 {
        register pcb_t  pcb;
@@ -334,8 +418,8 @@ ASSERT_IPL(SPL0);
        if (fp_kind == FP_NO)
            return KERN_FAILURE;
 
-       assert(thr_act != THR_ACT_NULL);
-       pcb = thr_act->mact.pcb;
+       assert(thr_act != THREAD_NULL);
+       pcb = thr_act->machine.pcb;
 
        simple_lock(&pcb->lock);
        ifps = pcb->ims.ifps;
@@ -350,11 +434,7 @@ ASSERT_IPL(SPL0);
 
        /* Make sure we`ve got the latest fp state info */
        /* If the live fpu state belongs to our target */
-#if    NCPUS == 1
-       if (thr_act == fp_act)
-#else
-       if (thr_act == current_act())
-#endif
+       if (thr_act == current_thread())
        {
            clear_ts();
            fp_save(thr_act);
@@ -437,36 +517,11 @@ fpnoextflt(void)
         */
 ASSERT_IPL(SPL0);
        clear_ts();
-#if    NCPUS == 1
-
-       /*
-        * If this thread`s state is in the FPU, we are done.
-        */
-       if (fp_act == current_act())
-           return;
-
-       /* Make sure we don't do fpsave() in fp_intr while doing fpsave()
-        * here if the current fpu instruction generates an error.
-        */
-       fwait();
-       /*
-        * If another thread`s state is in the FPU, save it.
-        */
-       if (fp_act != THR_ACT_NULL) {
-           fp_save(fp_act);
-       }
-
-       /*
-        * Give this thread the FPU.
-        */
-       fp_act = current_act();
-
-#endif /* NCPUS == 1 */
 
        /*
         * Load this thread`s state into the FPU.
         */
-       fp_load(current_act());
+       fp_load(current_thread());
 }
 
 /*
@@ -477,26 +532,15 @@ ASSERT_IPL(SPL0);
 void
 fpextovrflt(void)
 {
-       register thread_act_t   thr_act = current_act();
+       register thread_t       thr_act = current_thread();
        register pcb_t          pcb;
        register struct i386_fpsave_state *ifps;
 
-#if    NCPUS == 1
-
-       /*
-        * Is exception for the currently running thread?
-        */
-       if (fp_act != thr_act) {
-           /* Uh oh... */
-           panic("fpextovrflt");
-       }
-#endif
-
        /*
         * This is a non-recoverable error.
         * Invalidate the thread`s FPU state.
         */
-       pcb = thr_act->mact.pcb;
+       pcb = thr_act->machine.pcb;
        simple_lock(&pcb->lock);
        ifps = pcb->ims.ifps;
        pcb->ims.ifps = 0;
@@ -514,7 +558,7 @@ fpextovrflt(void)
        clear_fpu();
 
        if (ifps)
-           zfree(ifps_zone, (vm_offset_t) ifps);
+           zfree(ifps_zone, ifps);
 
        /*
         * Raise exception.
@@ -530,43 +574,13 @@ fpextovrflt(void)
 void
 fpexterrflt(void)
 {
-       register thread_act_t   thr_act = current_act();
+       register thread_t       thr_act = current_thread();
 
 ASSERT_IPL(SPL0);
-#if    NCPUS == 1
-       /*
-        * Since FPU errors only occur on ESC or WAIT instructions,
-        * the current thread should own the FPU.  If it didn`t,
-        * we should have gotten the task-switched interrupt first.
-        */
-       if (fp_act != THR_ACT_NULL) {
-           panic("fpexterrflt");
-               return;
-       }
-
-       /*
-        * Check if we got a context switch between the interrupt and the AST
-        * This can happen if the interrupt arrived after the FPU AST was
-        * checked. In this case, raise the exception in fp_load when this
-        * thread next time uses the FPU. Remember exception condition in
-        * fp_valid (extended boolean 2).
-        */
-       if (fp_intr_act != thr_act) {
-               if (fp_intr_act == THR_ACT_NULL) {
-                       panic("fpexterrflt: fp_intr_act == THR_ACT_NULL");
-                       return;
-               }
-               fp_intr_act->mact.pcb->ims.ifps->fp_valid = 2;
-               fp_intr_act = THR_ACT_NULL;
-               return;
-       }
-       fp_intr_act = THR_ACT_NULL;
-#else  /* NCPUS == 1 */
        /*
         * Save the FPU state and turn off the FPU.
         */
        fp_save(thr_act);
-#endif /* NCPUS == 1 */
 
        /*
         * Raise FPU exception.
@@ -575,7 +589,7 @@ ASSERT_IPL(SPL0);
         */
        i386_exception(EXC_ARITHMETIC,
                       EXC_I386_EXTERR,
-                      thr_act->mact.pcb->ims.ifps->fp_save_state.fp_status);
+                      thr_act->machine.pcb->ims.ifps->fp_save_state.fp_status);
        /*NOTREACHED*/
 }
 
@@ -587,18 +601,21 @@ ASSERT_IPL(SPL0);
  * .   if called from fpnoextflt or fp_intr, we are single-cpu
  * .   otherwise, thread is running.
  */
-
 void
 fp_save(
-       thread_act_t    thr_act)
+       thread_t        thr_act)
 {
-       register pcb_t pcb = thr_act->mact.pcb;
+       register pcb_t pcb = thr_act->machine.pcb;
        register struct i386_fpsave_state *ifps = pcb->ims.ifps;
-
        if (ifps != 0 && !ifps->fp_valid) {
            /* registers are in FPU */
            ifps->fp_valid = TRUE;
-           fnsave(&ifps->fp_save_state);
+            ifps->fp_save_flavor = FP_387;
+            if (FXSAFE()) {
+                fxsave(&ifps->fx_save_state);  // save the SSE2/Fp state in addition is enabled
+                ifps->fp_save_flavor = FP_FXSR;
+            }
+           fnsave(&ifps->fp_save_state);  // also update the old save area for now...
        }
 }
 
@@ -610,15 +627,16 @@ fp_save(
 
 void
 fp_load(
-       thread_act_t    thr_act)
+       thread_t        thr_act)
 {
-       register pcb_t pcb = thr_act->mact.pcb;
+       register pcb_t pcb = thr_act->machine.pcb;
        register struct i386_fpsave_state *ifps;
 
 ASSERT_IPL(SPL0);
        ifps = pcb->ims.ifps;
        if (ifps == 0) {
            ifps = (struct i386_fpsave_state *) zalloc(ifps_zone);
+           assert(ALIGNED(ifps,16));
            bzero((char *)ifps, sizeof *ifps);
            pcb->ims.ifps = ifps;
            fpinit();
@@ -640,15 +658,17 @@ ASSERT_IPL(SPL0);
                 */
                i386_exception(EXC_ARITHMETIC,
                       EXC_I386_EXTERR,
-                      thr_act->mact.pcb->ims.ifps->fp_save_state.fp_status);
+                      thr_act->machine.pcb->ims.ifps->fp_save_state.fp_status);
                /*NOTREACHED*/
 #endif
        } else {
-           frstor(ifps->fp_save_state);
+            if (ifps->fp_save_flavor == FP_FXSR) fxrstor(&ifps->fx_save_state);
+           else frstor(ifps->fp_save_state);
        }
        ifps->fp_valid = FALSE;         /* in FPU */
 }
 
+
 /*
  * Allocate and initialize FP state for current thread.
  * Don't load state.
@@ -658,10 +678,11 @@ ASSERT_IPL(SPL0);
 void
 fp_state_alloc(void)
 {
-       pcb_t   pcb = current_act()->mact.pcb;
+       pcb_t   pcb = current_thread()->machine.pcb;
        struct i386_fpsave_state *ifps;
 
        ifps = (struct i386_fpsave_state *)zalloc(ifps_zone);
+       assert(ALIGNED(ifps,16));
        bzero((char *)ifps, sizeof *ifps);
        pcb->ims.ifps = ifps;
 
@@ -671,28 +692,25 @@ fp_state_alloc(void)
                        | (FPC_PC_53|FPC_IC_AFF);
        ifps->fp_save_state.fp_status = 0;
        ifps->fp_save_state.fp_tag = 0xffff;    /* all empty */
+        ifps->fx_save_state.fx_control = ifps->fp_save_state.fp_control;
+        ifps->fx_save_state.fx_status = ifps->fp_save_state.fp_status;
+        ifps->fx_save_state.fx_tag = 0x00;
+        ifps->fx_save_state.fx_MXCSR = 0x1f80;
+        
 }
 
 
 /*
- * fpflush(thread_act_t)
+ * fpflush(thread_t)
  *     Flush the current act's state, if needed
  *     (used by thread_terminate_self to ensure fp faults
  *     aren't satisfied by overly general trap code in the
  *     context of the reaper thread)
  */
 void
-fpflush(thread_act_t thr_act)
+fpflush(__unused thread_t thr_act)
 {
-#if    NCPUS == 1
-       if (fp_act && thr_act == fp_act) {
-           clear_ts();
-           fwait();
-           clear_fpu();
-       }
-#else
        /* not needed on MP x86s; fp not lazily evaluated */
-#endif
 }
 
 
@@ -705,7 +723,7 @@ void
 fpintr(void)
 {
        spl_t   s;
-       thread_act_t thr_act = current_act();
+       thread_t thr_act = current_thread();
 
 ASSERT_IPL(SPL1);
        /*
@@ -716,34 +734,6 @@ ASSERT_IPL(SPL1);
        /*
         * Save the FPU context to the thread using it.
         */
-#if    NCPUS == 1
-       if (fp_act == THR_ACT_NULL) {
-               printf("fpintr: FPU not belonging to anyone!\n");
-               clear_ts();
-               fninit();
-               clear_fpu();
-               return;
-       }
-
-       if (fp_act != thr_act) {
-           /*
-            * FPU exception is for a different thread.
-            * When that thread again uses the FPU an exception will be
-            * raised in fp_load. Remember the condition in fp_valid (== 2).
-            */
-           clear_ts();
-           fp_save(fp_act);
-           fp_act->mact.pcb->ims.ifps->fp_valid = 2;
-           fninit();
-           clear_fpu();
-           /* leave fp_intr_act THR_ACT_NULL */
-           return;
-       }
-       if (fp_intr_act != THR_ACT_NULL)
-           panic("fp_intr: already caught intr");
-       fp_intr_act = thr_act;
-#endif /* NCPUS == 1 */
-
        clear_ts();
        fp_save(thr_act);
        fninit();