2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1992-1990 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
59 #include <platforms.h>
61 #include <mach/exception_types.h>
62 #include <mach/i386/thread_status.h>
63 #include <mach/i386/fp_reg.h>
65 #include <kern/mach_param.h>
66 #include <kern/processor.h>
67 #include <kern/thread.h>
68 #include <kern/zalloc.h>
69 #include <kern/misc_protos.h>
71 #include <kern/assert.h>
73 #include <architecture/i386/pio.h>
74 #include <i386/cpuid.h>
76 #include <i386/proc_reg.h>
77 #include <i386/misc_protos.h>
78 #include <i386/thread.h>
79 #include <i386/trap.h>
81 int fp_kind
= FP_NO
; /* not inited */
82 zone_t ifps_zone
; /* zone for FPU save area */
84 #define ALIGNED(addr,size) (((uintptr_t)(addr)&((size)-1))==0)
88 extern void fpinit(void);
94 static void configure_mxcsr_capability_mask(struct x86_fpsave_state
*ifps
);
96 struct x86_fpsave_state starting_fp_state
;
99 /* Global MXCSR capability bitmask */
100 static unsigned int mxcsr_capability_mask
;
103 * Determine the MXCSR capability mask, which allows us to mask off any
104 * potentially unsafe "reserved" bits before restoring the FPU context.
105 * *Not* per-cpu, assumes symmetry.
108 configure_mxcsr_capability_mask(struct x86_fpsave_state
*ifps
)
110 /* FXSAVE requires a 16 byte aligned store */
111 assert(ALIGNED(ifps
,16));
112 /* Clear, to prepare for the diagnostic FXSAVE */
113 bzero(ifps
, sizeof(*ifps
));
114 /* Disable FPU/SSE Device Not Available exceptions */
116 __asm__
volatile("fxsave %0" : "=m" (ifps
->fx_save_state
));
117 mxcsr_capability_mask
= ifps
->fx_save_state
.fx_MXCSR_MASK
;
119 /* Set default mask value if necessary */
120 if (mxcsr_capability_mask
== 0)
121 mxcsr_capability_mask
= 0xffbf;
123 /* Re-enable FPU/SSE DNA exceptions */
128 * Allocate and initialize FP state for current thread.
131 static struct x86_fpsave_state
*
134 struct x86_fpsave_state
*ifps
;
136 ifps
= (struct x86_fpsave_state
*)zalloc(ifps_zone
);
137 assert(ALIGNED(ifps
,16));
138 bzero((char *)ifps
, sizeof *ifps
);
144 fp_state_free(struct x86_fpsave_state
*ifps
)
146 zfree(ifps_zone
, ifps
);
151 * Look for FPU and initialize it.
152 * Called on each CPU.
157 unsigned short status
, control
;
160 * Check for FPU by initializing it,
161 * then trying to read the correct bit patterns from
162 * the control and status registers.
164 set_cr0((get_cr0() & ~(CR0_EM
|CR0_TS
)) | CR0_NE
); /* allow use of FPU */
170 if ((status
& 0xff) == 0 &&
171 (control
& 0x103f) == 0x3f)
173 /* Use FPU save/restore instructions if available */
174 if (cpuid_features() & CPUID_FEATURE_FXSR
) {
176 set_cr4(get_cr4() | CR4_FXS
);
177 /* And allow SIMD instructions if present */
178 if (cpuid_features() & CPUID_FEATURE_SSE
) {
179 set_cr4(get_cr4() | CR4_XMM
);
182 panic("fpu is not FP_FXSR");
185 * initialze FPU to normal starting
186 * position so that we can take a snapshot
187 * of that state and store it for future use
188 * when we're asked for the FPU state of a
189 * thread, and it hasn't initiated any yet
192 fxsave(&starting_fp_state
.fx_save_state
);
195 * Trap wait instructions. Turn off FPU for now.
197 set_cr0(get_cr0() | CR0_TS
| CR0_MP
);
204 panic("fpu is not FP_FXSR");
209 * Initialize FP handling.
212 fpu_module_init(void)
214 struct x86_fpsave_state
*new_ifps
;
216 ifps_zone
= zinit(sizeof(struct x86_fpsave_state
),
217 thread_max
* sizeof(struct x86_fpsave_state
),
218 THREAD_CHUNK
* sizeof(struct x86_fpsave_state
),
220 new_ifps
= fp_state_alloc();
221 /* Determine MXCSR reserved bits */
222 configure_mxcsr_capability_mask(new_ifps
);
223 fp_state_free(new_ifps
);
227 * Free a FPU save area.
228 * Called only when thread terminating - no locking necessary.
231 fpu_free(struct x86_fpsave_state
*fps
)
237 * Set the floating-point state for a thread based
238 * on the FXSave formatted data. This is basically
239 * the same as fpu_set_state except it uses the
240 * expanded data structure.
241 * If the thread is not the current thread, it is
242 * not running (held). Locking needed against
243 * concurrent fpu_set_state or fpu_get_state.
248 thread_state_t tstate
)
250 struct x86_fpsave_state
*ifps
;
251 struct x86_fpsave_state
*new_ifps
;
252 x86_float_state64_t
*state
;
255 if (fp_kind
== FP_NO
)
258 state
= (x86_float_state64_t
*)tstate
;
260 assert(thr_act
!= THREAD_NULL
);
261 pcb
= thr_act
->machine
.pcb
;
265 * new FPU state is 'invalid'.
266 * Deallocate the fp state if it exists.
268 simple_lock(&pcb
->lock
);
273 simple_unlock(&pcb
->lock
);
279 * Valid state. Allocate the fp state if there is none.
283 simple_lock(&pcb
->lock
);
288 simple_unlock(&pcb
->lock
);
289 new_ifps
= fp_state_alloc();
297 * now copy over the new data.
299 bcopy((char *)&state
->fpu_fcw
,
300 (char *)&ifps
->fx_save_state
, sizeof(struct x86_fx_save
));
302 /* XXX The layout of the state set from user-space may need to be
303 * validated for consistency.
305 ifps
->fp_save_layout
= thread_is_64bit(thr_act
) ? FXSAVE64
: FXSAVE32
;
306 /* Mark the thread's floating point status as non-live. */
307 /* Temporarily disabled: radar 4647827
308 * ifps->fp_valid = TRUE;
312 * Clear any reserved bits in the MXCSR to prevent a GPF
313 * when issuing an FXRSTOR.
315 ifps
->fx_save_state
.fx_MXCSR
&= mxcsr_capability_mask
;
317 simple_unlock(&pcb
->lock
);
320 fp_state_free(new_ifps
);
326 * Get the floating-point state for a thread.
327 * If the thread is not the current thread, it is
328 * not running (held). Locking needed against
329 * concurrent fpu_set_state or fpu_get_state.
334 thread_state_t tstate
)
336 struct x86_fpsave_state
*ifps
;
337 x86_float_state64_t
*state
;
338 kern_return_t ret
= KERN_FAILURE
;
341 if (fp_kind
== FP_NO
)
344 state
= (x86_float_state64_t
*)tstate
;
346 assert(thr_act
!= THREAD_NULL
);
347 pcb
= thr_act
->machine
.pcb
;
349 simple_lock(&pcb
->lock
);
354 * No valid floating-point state.
356 bcopy((char *)&starting_fp_state
.fx_save_state
,
357 (char *)&state
->fpu_fcw
, sizeof(struct x86_fx_save
));
359 simple_unlock(&pcb
->lock
);
364 * Make sure we`ve got the latest fp state info
365 * If the live fpu state belongs to our target
367 if (thr_act
== current_thread()) {
370 intr
= ml_set_interrupts_enabled(FALSE
);
376 (void)ml_set_interrupts_enabled(intr
);
378 if (ifps
->fp_valid
) {
379 bcopy((char *)&ifps
->fx_save_state
,
380 (char *)&state
->fpu_fcw
, sizeof(struct x86_fx_save
));
383 simple_unlock(&pcb
->lock
);
391 * the child thread is 'stopped' with the thread
392 * mutex held and is currently not known by anyone
393 * so no way for fpu state to get manipulated by an
394 * outside agency -> no need for pcb lock
402 struct x86_fpsave_state
*new_ifps
= NULL
;
406 ppcb
= parent
->machine
.pcb
;
408 if (ppcb
->ifps
== NULL
)
411 if (child
->machine
.pcb
->ifps
)
412 panic("fpu_dup_fxstate: child's ifps non-null");
414 new_ifps
= fp_state_alloc();
416 simple_lock(&ppcb
->lock
);
418 if (ppcb
->ifps
!= NULL
) {
420 * Make sure we`ve got the latest fp state info
422 intr
= ml_set_interrupts_enabled(FALSE
);
428 (void)ml_set_interrupts_enabled(intr
);
430 if (ppcb
->ifps
->fp_valid
) {
431 child
->machine
.pcb
->ifps
= new_ifps
;
433 bcopy((char *)&(ppcb
->ifps
->fx_save_state
),
434 (char *)&(child
->machine
.pcb
->ifps
->fx_save_state
), sizeof(struct x86_fx_save
));
436 new_ifps
->fp_save_layout
= ppcb
->ifps
->fp_save_layout
;
437 /* Mark the new fp saved state as non-live. */
438 /* Temporarily disabled: radar 4647827
439 * new_ifps->fp_valid = TRUE;
442 * Clear any reserved bits in the MXCSR to prevent a GPF
443 * when issuing an FXRSTOR.
445 new_ifps
->fx_save_state
.fx_MXCSR
&= mxcsr_capability_mask
;
449 simple_unlock(&ppcb
->lock
);
451 if (new_ifps
!= NULL
)
452 fp_state_free(new_ifps
);
463 unsigned short control
;
468 control
&= ~(FPC_PC
|FPC_RC
); /* Clear precision & rounding control */
469 control
|= (FPC_PC_64
| /* Set precision */
470 FPC_RC_RN
| /* round-to-nearest */
471 FPC_ZE
| /* Suppress zero-divide */
472 FPC_OE
| /* and overflow */
473 FPC_UE
| /* underflow */
474 FPC_IE
| /* Allow NaNQs and +-INF */
475 FPC_DE
| /* Allow denorms as operands */
476 FPC_PE
); /* No trap for precision loss */
479 /* Initialize SSE/SSE2 */
480 __builtin_ia32_ldmxcsr(0x1f80);
484 * Coprocessor not present.
493 struct x86_fpsave_state
*ifps
= 0;
495 thr_act
= current_thread();
496 pcb
= thr_act
->machine
.pcb
;
498 if (pcb
->ifps
== 0 && !get_interrupt_level())
499 ifps
= fp_state_alloc();
501 intr
= ml_set_interrupts_enabled(FALSE
);
503 clear_ts(); /* Enable FPU use */
505 if (get_interrupt_level()) {
507 * Save current coprocessor context if valid
508 * Initialize coprocessor live context
513 if (pcb
->ifps
== 0) {
518 * Load this thread`s state into coprocessor live context.
522 (void)ml_set_interrupts_enabled(intr
);
529 * FPU overran end of segment.
530 * Re-initialize FPU. Floating point state is not valid.
536 thread_t thr_act
= current_thread();
538 struct x86_fpsave_state
*ifps
;
541 intr
= ml_set_interrupts_enabled(FALSE
);
543 if (get_interrupt_level())
544 panic("FPU segment overrun exception at interrupt context\n");
545 if (current_task() == kernel_task
)
546 panic("FPU segment overrun exception in kernel thread context\n");
549 * This is a non-recoverable error.
550 * Invalidate the thread`s FPU state.
552 pcb
= thr_act
->machine
.pcb
;
553 simple_lock(&pcb
->lock
);
556 simple_unlock(&pcb
->lock
);
559 * Re-initialize the FPU.
565 * And disable access.
569 (void)ml_set_interrupts_enabled(intr
);
572 zfree(ifps_zone
, ifps
);
577 i386_exception(EXC_BAD_ACCESS
, VM_PROT_READ
|VM_PROT_EXECUTE
, 0);
582 * FPU error. Called by AST.
588 thread_t thr_act
= current_thread();
589 struct x86_fpsave_state
*ifps
= thr_act
->machine
.pcb
->ifps
;
592 intr
= ml_set_interrupts_enabled(FALSE
);
594 if (get_interrupt_level())
595 panic("FPU error exception at interrupt context\n");
596 if (current_task() == kernel_task
)
597 panic("FPU error exception in kernel thread context\n");
600 * Save the FPU state and turn off the FPU.
604 (void)ml_set_interrupts_enabled(intr
);
607 * Raise FPU exception.
608 * Locking not needed on pcb->ifps,
609 * since thread is running.
611 i386_exception(EXC_ARITHMETIC
,
613 ifps
->fx_save_state
.fx_status
);
621 * Locking not needed:
622 * . if called from fpu_get_state, pcb already locked.
623 * . if called from fpnoextflt or fp_intr, we are single-cpu
624 * . otherwise, thread is running.
625 * N.B.: Must be called with interrupts disabled
632 pcb_t pcb
= thr_act
->machine
.pcb
;
633 struct x86_fpsave_state
*ifps
= pcb
->ifps
;
635 if (ifps
!= 0 && !ifps
->fp_valid
) {
636 assert((get_cr0() & CR0_TS
) == 0);
637 /* registers are in FPU */
638 ifps
->fp_valid
= TRUE
;
640 #if defined(__i386__)
641 if (!thread_is_64bit(thr_act
)) {
642 /* save the compatibility/legacy mode XMM+x87 state */
643 fxsave(&ifps
->fx_save_state
);
644 ifps
->fp_save_layout
= FXSAVE32
;
647 fxsave64(&ifps
->fx_save_state
);
648 ifps
->fp_save_layout
= FXSAVE64
;
650 #elif defined(__x86_64__)
651 fxsave(&ifps
->fx_save_state
);
652 ifps
->fp_save_layout
= thread_is_64bit(thr_act
) ? FXSAVE64
: FXSAVE32
;
658 * Restore FPU state from PCB.
660 * Locking not needed; always called on the current thread.
667 pcb_t pcb
= thr_act
->machine
.pcb
;
668 struct x86_fpsave_state
*ifps
;
671 if (ifps
== 0 || ifps
->fp_valid
== FALSE
) {
673 /* FIXME: This allocation mechanism should be revised
674 * for scenarios where interrupts are disabled.
676 ifps
= fp_state_alloc();
681 assert(ifps
->fp_save_layout
== FXSAVE32
|| ifps
->fp_save_layout
== FXSAVE64
);
682 #if defined(__i386__)
683 if (ifps
->fp_save_layout
== FXSAVE32
) {
684 /* Restore the compatibility/legacy mode XMM+x87 state */
685 fxrstor(&ifps
->fx_save_state
);
687 else if (ifps
->fp_save_layout
== FXSAVE64
) {
688 fxrstor64(&ifps
->fx_save_state
);
690 #elif defined(__x86_64__)
691 fxrstor(&ifps
->fx_save_state
);
694 ifps
->fp_valid
= FALSE
; /* in FPU */
701 * Flush the current act's state, if needed
702 * (used by thread_terminate_self to ensure fp faults
703 * aren't satisfied by overly general trap code in the
704 * context of the reaper thread)
707 fpflush(__unused thread_t thr_act
)
709 /* not needed on MP x86s; fp not lazily evaluated */
713 * SSE arithmetic exception handling code.
714 * Basically the same as the x87 exception handler with a different subtype
720 thread_t thr_act
= current_thread();
721 struct x86_fpsave_state
*ifps
= thr_act
->machine
.pcb
->ifps
;
724 intr
= ml_set_interrupts_enabled(FALSE
);
726 if (get_interrupt_level())
727 panic("SSE exception at interrupt context\n");
728 if (current_task() == kernel_task
)
729 panic("SSE exception in kernel thread context\n");
732 * Save the FPU state and turn off the FPU.
736 (void)ml_set_interrupts_enabled(intr
);
738 * Raise FPU exception.
739 * Locking not needed on pcb->ifps,
740 * since thread is running.
742 assert(ifps
->fp_save_layout
== FXSAVE32
|| ifps
->fp_save_layout
== FXSAVE64
);
743 i386_exception(EXC_ARITHMETIC
,
745 ifps
->fx_save_state
.fx_MXCSR
);
751 fp_setvalid(boolean_t value
) {
752 thread_t thr_act
= current_thread();
753 struct x86_fpsave_state
*ifps
= thr_act
->machine
.pcb
->ifps
;
756 ifps
->fp_valid
= value
;