X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/1c79356b52d46aa6b508fb032f5ae709b1f2897b..4a2492630c73add3c3aa8a805ba4ff343d4a58ea:/osfmk/i386/fpu.c diff --git a/osfmk/i386/fpu.c b/osfmk/i386/fpu.c index 0bce98884..3098f7a56 100644 --- a/osfmk/i386/fpu.c +++ b/osfmk/i386/fpu.c @@ -3,19 +3,22 @@ * * @APPLE_LICENSE_HEADER_START@ * - * The contents of this file constitute Original Code as defined in and - * are subject to the Apple Public Source License Version 1.1 (the - * "License"). You may not use this file except in compliance with the - * License. Please obtain a copy of the License at - * http://www.apple.com/publicsource and read it before using this file. + * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved. * - * This Original Code and all software distributed under the License are - * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER + * This file contains Original Code and/or Modifications of Original Code + * as defined in and that are subject to the Apple Public Source License + * Version 2.0 (the 'License'). You may not use this file except in + * compliance with the License. Please obtain a copy of the License at + * http://www.opensource.apple.com/apsl/ and read it before using this + * file. + * + * The Original Code and all software distributed under the License are + * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the - * License for the specific language governing rights and limitations - * under the License. + * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. + * Please see the License for the specific language governing rights and + * limitations under the License. * * @APPLE_LICENSE_HEADER_END@ */ @@ -68,6 +71,7 @@ #include #include #include +#include #include #if 0 @@ -107,6 +111,8 @@ volatile thread_act_t fp_intr_act = THR_ACT_NULL; } #endif + +#define ALIGNED(addr,size) (((unsigned)(addr)&((size)-1))==0) /* Forward */ @@ -137,33 +143,22 @@ init_fpu(void) fnstcw(&control); if ((status & 0xff) == 0 && - (control & 0x103f) == 0x3f) - { -#if 0 - /* - * We have a FPU of some sort. - * Compare -infinity against +infinity - * to check whether we have a 287 or a 387. - */ - volatile double fp_infinity, fp_one, fp_zero; - fp_one = 1.0; - fp_zero = 0.0; - fp_infinity = fp_one / fp_zero; - if (fp_infinity == -fp_infinity) { - /* - * We have an 80287. - */ - fp_kind = FP_287; - __asm__ volatile(".byte 0xdb; .byte 0xe4"); /* fnsetpm */ - } - else -#endif - { - /* - * We have a 387. - */ - fp_kind = FP_387; + (control & 0x103f) == 0x3f) + { + fp_kind = FP_387; /* assume we have a 387 compatible instruction set */ + /* Use FPU save/restore instructions if available */ + if (cpuid_features() & CPUID_FEATURE_FXSR) { + fp_kind = FP_FXSR; + set_cr4(get_cr4() | CR4_FXS); + printf("Enabling XMM register save/restore"); + /* And allow SIMD instructions if present */ + if (cpuid_features() & CPUID_FEATURE_SSE) { + printf(" and SSE/SSE2"); + set_cr4(get_cr4() | CR4_XMM); + } + printf(" opcodes\n"); } + /* * Trap wait instructions. Turn off FPU for now. */ @@ -215,6 +210,153 @@ ASSERT_IPL(SPL0); zfree(ifps_zone, (vm_offset_t) fps); } +/* + * Set the floating-point state for a thread based + * on the FXSave formatted data. This is basically + * the same as fpu_set_state except it uses the + * expanded data structure. + * If the thread is not the current thread, it is + * not running (held). Locking needed against + * concurrent fpu_set_state or fpu_get_state. + */ +kern_return_t +fpu_set_fxstate( + thread_act_t thr_act, + struct i386_float_state *state) +{ + register pcb_t pcb; + register struct i386_fpsave_state *ifps; + register struct i386_fpsave_state *new_ifps; + +ASSERT_IPL(SPL0); + if (fp_kind == FP_NO) + return KERN_FAILURE; + + if (state->fpkind != FP_FXSR) { + /* strange if this happens, but in case someone builds one of these manually... */ + return fpu_set_state(thr_act, state); + } + + assert(thr_act != THR_ACT_NULL); + pcb = thr_act->mact.pcb; + +#if NCPUS == 1 + + /* + * If this thread`s state is in the FPU, + * discard it; we are replacing the entire + * FPU state. + */ + if (fp_act == thr_act) { + fwait(); /* wait for possible interrupt */ + clear_fpu(); /* no state in FPU */ + } +#endif + + if (state->initialized == 0) { + /* + * new FPU state is 'invalid'. + * Deallocate the fp state if it exists. + */ + simple_lock(&pcb->lock); + ifps = pcb->ims.ifps; + pcb->ims.ifps = 0; + simple_unlock(&pcb->lock); + + if (ifps != 0) { + zfree(ifps_zone, (vm_offset_t) ifps); + } + } + else { + /* + * Valid state. Allocate the fp state if there is none. + */ + + new_ifps = 0; + Retry: + simple_lock(&pcb->lock); + ifps = pcb->ims.ifps; + if (ifps == 0) { + if (new_ifps == 0) { + simple_unlock(&pcb->lock); + new_ifps = (struct i386_fpsave_state *) zalloc(ifps_zone); + assert(ALIGNED(new_ifps,16)); + goto Retry; + } + ifps = new_ifps; + new_ifps = 0; + bzero((char *)ifps, sizeof *ifps); + pcb->ims.ifps = ifps; + } + + /* + * now copy over the new data. + */ + bcopy((char *)&state->hw_state[0], (char *)&ifps->fx_save_state, sizeof(struct i386_fx_save)); + ifps->fp_save_flavor = FP_FXSR; + simple_unlock(&pcb->lock); + if (new_ifps != 0) + zfree(ifps_zone, (vm_offset_t) ifps); + } + + return KERN_SUCCESS; +} + +/* + * Get the floating-point state for a thread. + * If the thread is not the current thread, it is + * not running (held). Locking needed against + * concurrent fpu_set_state or fpu_get_state. + */ +kern_return_t +fpu_get_fxstate( + thread_act_t thr_act, + register struct i386_float_state *state) +{ + register pcb_t pcb; + register struct i386_fpsave_state *ifps; + +ASSERT_IPL(SPL0); + if (fp_kind == FP_NO) + return KERN_FAILURE; + + assert(thr_act != THR_ACT_NULL); + pcb = thr_act->mact.pcb; + + simple_lock(&pcb->lock); + ifps = pcb->ims.ifps; + if (ifps == 0) { + /* + * No valid floating-point state. + */ + simple_unlock(&pcb->lock); + bzero((char *)state, sizeof(struct i386_float_state)); + return KERN_SUCCESS; + } + + /* Make sure we`ve got the latest fp state info */ + /* If the live fpu state belongs to our target */ +#if NCPUS == 1 + if (thr_act == fp_act) +#else + if (thr_act == current_act()) +#endif + { + clear_ts(); + fp_save(thr_act); + clear_fpu(); + } + + state->fpkind = fp_kind; + state->exc_status = 0; + state->initialized = ifps->fp_valid; + bcopy( (char *)&ifps->fx_save_state, (char *)&state->hw_state[0], sizeof(struct i386_fx_save)); + + simple_unlock(&pcb->lock); + + return KERN_SUCCESS; +} + /* * Set the floating-point state for a thread. * If the thread is not the current thread, it is @@ -283,10 +425,12 @@ ASSERT_IPL(SPL0); if (new_ifps == 0) { simple_unlock(&pcb->lock); new_ifps = (struct i386_fpsave_state *) zalloc(ifps_zone); + assert(ALIGNED(new_ifps,16)); goto Retry; } ifps = new_ifps; new_ifps = 0; + bzero((char *)ifps, sizeof *ifps); // zero ALL fields first pcb->ims.ifps = ifps; } @@ -304,7 +448,7 @@ ASSERT_IPL(SPL0); ifps->fp_save_state.fp_dp = user_fp_state->fp_dp; ifps->fp_save_state.fp_ds = user_fp_state->fp_ds; ifps->fp_regs = *user_fp_regs; - + ifps->fp_save_flavor = FP_387; simple_unlock(&pcb->lock); if (new_ifps != 0) zfree(ifps_zone, (vm_offset_t) ifps); @@ -584,18 +728,21 @@ ASSERT_IPL(SPL0); * . if called from fpnoextflt or fp_intr, we are single-cpu * . otherwise, thread is running. */ - void fp_save( thread_act_t thr_act) { register pcb_t pcb = thr_act->mact.pcb; register struct i386_fpsave_state *ifps = pcb->ims.ifps; - if (ifps != 0 && !ifps->fp_valid) { /* registers are in FPU */ ifps->fp_valid = TRUE; - fnsave(&ifps->fp_save_state); + ifps->fp_save_flavor = FP_387; + if (FXSAFE()) { + fxsave(&ifps->fx_save_state); // save the SSE2/Fp state in addition is enabled + ifps->fp_save_flavor = FP_FXSR; + } + fnsave(&ifps->fp_save_state); // also update the old save area for now... } } @@ -616,6 +763,7 @@ ASSERT_IPL(SPL0); ifps = pcb->ims.ifps; if (ifps == 0) { ifps = (struct i386_fpsave_state *) zalloc(ifps_zone); + assert(ALIGNED(ifps,16)); bzero((char *)ifps, sizeof *ifps); pcb->ims.ifps = ifps; fpinit(); @@ -641,11 +789,13 @@ ASSERT_IPL(SPL0); /*NOTREACHED*/ #endif } else { - frstor(ifps->fp_save_state); + if (ifps->fp_save_flavor == FP_FXSR) fxrstor(&ifps->fx_save_state); + else frstor(ifps->fp_save_state); } ifps->fp_valid = FALSE; /* in FPU */ } + /* * Allocate and initialize FP state for current thread. * Don't load state. @@ -659,6 +809,7 @@ fp_state_alloc(void) struct i386_fpsave_state *ifps; ifps = (struct i386_fpsave_state *)zalloc(ifps_zone); + assert(ALIGNED(ifps,16)); bzero((char *)ifps, sizeof *ifps); pcb->ims.ifps = ifps; @@ -668,6 +819,11 @@ fp_state_alloc(void) | (FPC_PC_53|FPC_IC_AFF); ifps->fp_save_state.fp_status = 0; ifps->fp_save_state.fp_tag = 0xffff; /* all empty */ + ifps->fx_save_state.fx_control = ifps->fp_save_state.fp_control; + ifps->fx_save_state.fx_status = ifps->fp_save_state.fp_status; + ifps->fx_save_state.fx_tag = 0x00; + ifps->fx_save_state.fx_MXCSR = 0x1f80; + }