2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
20 * @APPLE_LICENSE_HEADER_END@
26 * Mach Operating System
27 * Copyright (c) 1992-1990 Carnegie Mellon University
28 * All Rights Reserved.
30 * Permission to use, copy, modify and distribute this software and its
31 * documentation is hereby granted, provided that both the copyright
32 * notice and this permission notice appear in all copies of the
33 * software, derivative works or modified versions, and any portions
34 * thereof, and that both notices appear in supporting documentation.
36 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
37 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
38 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
40 * Carnegie Mellon requests users of this software to return to
42 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
43 * School of Computer Science
44 * Carnegie Mellon University
45 * Pittsburgh PA 15213-3890
47 * any improvements or extensions that they make and grant Carnegie Mellon
48 * the rights to redistribute these changes.
53 #include <platforms.h>
55 #include <mach/exception_types.h>
56 #include <mach/i386/thread_status.h>
57 #include <mach/i386/fp_reg.h>
59 #include <kern/mach_param.h>
60 #include <kern/processor.h>
61 #include <kern/thread.h>
62 #include <kern/zalloc.h>
63 #include <kern/misc_protos.h>
65 #include <kern/assert.h>
67 #include <i386/thread.h>
69 #include <i386/trap.h>
70 #include <architecture/i386/pio.h>
71 #include <i386/cpuid.h>
72 #include <i386/misc_protos.h>
73 #include <i386/proc_reg.h>
75 int fp_kind
= FP_NO
; /* not inited */
76 zone_t ifps_zone
; /* zone for FPU save area */
78 #define ALIGNED(addr,size) (((unsigned)(addr)&((size)-1))==0)
82 extern void fpinit(void);
88 static void configure_mxcsr_capability_mask(struct x86_fpsave_state
*ifps
);
90 struct x86_fpsave_state starting_fp_state
;
93 /* Global MXCSR capability bitmask */
94 static unsigned int mxcsr_capability_mask
;
97 * Determine the MXCSR capability mask, which allows us to mask off any
98 * potentially unsafe "reserved" bits before restoring the FPU context.
99 * *Not* per-cpu, assumes symmetry.
102 configure_mxcsr_capability_mask(struct x86_fpsave_state
*ifps
)
104 /* FXSAVE requires a 16 byte aligned store */
105 assert(ALIGNED(ifps
,16));
106 /* Clear, to prepare for the diagnostic FXSAVE */
107 bzero(ifps
, sizeof(*ifps
));
108 /* Disable FPU/SSE Device Not Available exceptions */
111 __asm__
volatile("fxsave %0" : "=m" (ifps
->fx_save_state
));
112 mxcsr_capability_mask
= ifps
->fx_save_state
.fx_MXCSR_MASK
;
114 /* Set default mask value if necessary */
115 if (mxcsr_capability_mask
== 0)
116 mxcsr_capability_mask
= 0xffbf;
118 /* Re-enable FPU/SSE DNA exceptions */
123 * Allocate and initialize FP state for current thread.
126 static struct x86_fpsave_state
*
129 struct x86_fpsave_state
*ifps
;
131 ifps
= (struct x86_fpsave_state
*)zalloc(ifps_zone
);
132 assert(ALIGNED(ifps
,16));
133 bzero((char *)ifps
, sizeof *ifps
);
139 fp_state_free(struct x86_fpsave_state
*ifps
)
141 zfree(ifps_zone
, ifps
);
146 * Look for FPU and initialize it.
147 * Called on each CPU.
152 unsigned short status
, control
;
155 * Check for FPU by initializing it,
156 * then trying to read the correct bit patterns from
157 * the control and status registers.
159 set_cr0((get_cr0() & ~(CR0_EM
|CR0_TS
)) | CR0_NE
); /* allow use of FPU */
165 if ((status
& 0xff) == 0 &&
166 (control
& 0x103f) == 0x3f)
168 /* Use FPU save/restore instructions if available */
169 if (cpuid_features() & CPUID_FEATURE_FXSR
) {
171 set_cr4(get_cr4() | CR4_FXS
);
172 printf("Enabling XMM register save/restore");
173 /* And allow SIMD instructions if present */
174 if (cpuid_features() & CPUID_FEATURE_SSE
) {
175 printf(" and SSE/SSE2");
176 set_cr4(get_cr4() | CR4_XMM
);
178 printf(" opcodes\n");
180 panic("fpu is not FP_FXSR");
183 * initialze FPU to normal starting
184 * position so that we can take a snapshot
185 * of that state and store it for future use
186 * when we're asked for the FPU state of a
187 * thread, and it hasn't initiated any yet
190 fxsave(&starting_fp_state
.fx_save_state
);
193 * Trap wait instructions. Turn off FPU for now.
195 set_cr0(get_cr0() | CR0_TS
| CR0_MP
);
202 panic("fpu is not FP_FXSR");
207 * Initialize FP handling.
210 fpu_module_init(void)
212 struct x86_fpsave_state
*new_ifps
;
214 ifps_zone
= zinit(sizeof(struct x86_fpsave_state
),
215 THREAD_MAX
* sizeof(struct x86_fpsave_state
),
216 THREAD_CHUNK
* sizeof(struct x86_fpsave_state
),
218 new_ifps
= fp_state_alloc();
219 /* Determine MXCSR reserved bits */
220 configure_mxcsr_capability_mask(new_ifps
);
221 fp_state_free(new_ifps
);
225 * Free a FPU save area.
226 * Called only when thread terminating - no locking necessary.
230 struct x86_fpsave_state
*fps
;
236 * Set the floating-point state for a thread based
237 * on the FXSave formatted data. This is basically
238 * the same as fpu_set_state except it uses the
239 * expanded data structure.
240 * If the thread is not the current thread, it is
241 * not running (held). Locking needed against
242 * concurrent fpu_set_state or fpu_get_state.
247 thread_state_t tstate
)
249 struct x86_fpsave_state
*ifps
;
250 struct x86_fpsave_state
*new_ifps
;
251 x86_float_state64_t
*state
;
254 if (fp_kind
== FP_NO
)
257 state
= (x86_float_state64_t
*)tstate
;
259 assert(thr_act
!= THREAD_NULL
);
260 pcb
= thr_act
->machine
.pcb
;
264 * new FPU state is 'invalid'.
265 * Deallocate the fp state if it exists.
267 simple_lock(&pcb
->lock
);
272 simple_unlock(&pcb
->lock
);
278 * Valid state. Allocate the fp state if there is none.
282 simple_lock(&pcb
->lock
);
287 simple_unlock(&pcb
->lock
);
288 new_ifps
= fp_state_alloc();
296 * now copy over the new data.
298 bcopy((char *)&state
->fpu_fcw
,
299 (char *)&ifps
->fx_save_state
, sizeof(struct x86_fx_save
));
301 /* XXX The layout of the state set from user-space may need to be
302 * validated for consistency.
304 ifps
->fp_save_layout
= thread_is_64bit(thr_act
) ? FXSAVE64
: FXSAVE32
;
305 /* Mark the thread's floating point status as non-live. */
306 ifps
->fp_valid
= TRUE
;
308 * Clear any reserved bits in the MXCSR to prevent a GPF
309 * when issuing an FXRSTOR.
311 ifps
->fx_save_state
.fx_MXCSR
&= mxcsr_capability_mask
;
313 simple_unlock(&pcb
->lock
);
316 fp_state_free(new_ifps
);
322 * Get the floating-point state for a thread.
323 * If the thread is not the current thread, it is
324 * not running (held). Locking needed against
325 * concurrent fpu_set_state or fpu_get_state.
330 thread_state_t tstate
)
332 struct x86_fpsave_state
*ifps
;
333 x86_float_state64_t
*state
;
334 kern_return_t ret
= KERN_FAILURE
;
337 if (fp_kind
== FP_NO
)
340 state
= (x86_float_state64_t
*)tstate
;
342 assert(thr_act
!= THREAD_NULL
);
343 pcb
= thr_act
->machine
.pcb
;
345 simple_lock(&pcb
->lock
);
350 * No valid floating-point state.
352 bcopy((char *)&starting_fp_state
.fx_save_state
,
353 (char *)&state
->fpu_fcw
, sizeof(struct x86_fx_save
));
355 simple_unlock(&pcb
->lock
);
360 * Make sure we`ve got the latest fp state info
361 * If the live fpu state belongs to our target
363 if (thr_act
== current_thread())
367 intr
= ml_set_interrupts_enabled(FALSE
);
373 (void)ml_set_interrupts_enabled(intr
);
375 if (ifps
->fp_valid
) {
376 bcopy((char *)&ifps
->fx_save_state
,
377 (char *)&state
->fpu_fcw
, sizeof(struct x86_fx_save
));
380 simple_unlock(&pcb
->lock
);
387 * the child thread is 'stopped' with the thread
388 * mutex held and is currently not known by anyone
389 * so no way for fpu state to get manipulated by an
390 * outside agency -> no need for pcb lock
398 struct x86_fpsave_state
*new_ifps
= NULL
;
402 ppcb
= parent
->machine
.pcb
;
404 if (ppcb
->ifps
== NULL
)
407 if (child
->machine
.pcb
->ifps
)
408 panic("fpu_dup_fxstate: child's ifps non-null");
410 new_ifps
= fp_state_alloc();
412 simple_lock(&ppcb
->lock
);
414 if (ppcb
->ifps
!= NULL
) {
416 * Make sure we`ve got the latest fp state info
418 intr
= ml_set_interrupts_enabled(FALSE
);
424 (void)ml_set_interrupts_enabled(intr
);
426 if (ppcb
->ifps
->fp_valid
) {
427 child
->machine
.pcb
->ifps
= new_ifps
;
429 bcopy((char *)&(ppcb
->ifps
->fx_save_state
),
430 (char *)&(child
->machine
.pcb
->ifps
->fx_save_state
), sizeof(struct x86_fx_save
));
432 new_ifps
->fp_save_layout
= ppcb
->ifps
->fp_save_layout
;
433 /* Mark the new fp saved state as non-live. */
434 new_ifps
->fp_valid
= TRUE
;
436 * Clear any reserved bits in the MXCSR to prevent a GPF
437 * when issuing an FXRSTOR.
439 new_ifps
->fx_save_state
.fx_MXCSR
&= mxcsr_capability_mask
;
443 simple_unlock(&ppcb
->lock
);
445 if (new_ifps
!= NULL
)
446 fp_state_free(new_ifps
);
457 unsigned short control
;
462 control
&= ~(FPC_PC
|FPC_RC
); /* Clear precision & rounding control */
463 control
|= (FPC_PC_64
| /* Set precision */
464 FPC_RC_RN
| /* round-to-nearest */
465 FPC_ZE
| /* Suppress zero-divide */
466 FPC_OE
| /* and overflow */
467 FPC_UE
| /* underflow */
468 FPC_IE
| /* Allow NaNQs and +-INF */
469 FPC_DE
| /* Allow denorms as operands */
470 FPC_PE
); /* No trap for precision loss */
473 /* Initialize SSE/SSE2 */
474 __builtin_ia32_ldmxcsr(0x1f80);
478 * Coprocessor not present.
486 intr
= ml_set_interrupts_enabled(FALSE
);
488 clear_ts(); /* Enable FPU use */
490 if (get_interrupt_level()) {
492 * Save current coprocessor context if valid
493 * Initialize coprocessor live context
495 fp_save(current_thread());
499 * Load this thread`s state into coprocessor live context.
501 fp_load(current_thread());
504 (void)ml_set_interrupts_enabled(intr
);
508 * FPU overran end of segment.
509 * Re-initialize FPU. Floating point state is not valid.
515 thread_t thr_act
= current_thread();
517 struct x86_fpsave_state
*ifps
;
520 intr
= ml_set_interrupts_enabled(FALSE
);
522 if (get_interrupt_level())
523 panic("FPU segment overrun exception at interrupt context\n");
524 if (current_task() == kernel_task
)
525 panic("FPU segment overrun exception in kernel thread context\n");
528 * This is a non-recoverable error.
529 * Invalidate the thread`s FPU state.
531 pcb
= thr_act
->machine
.pcb
;
532 simple_lock(&pcb
->lock
);
535 simple_unlock(&pcb
->lock
);
538 * Re-initialize the FPU.
544 * And disable access.
548 (void)ml_set_interrupts_enabled(intr
);
551 zfree(ifps_zone
, ifps
);
556 i386_exception(EXC_BAD_ACCESS
, VM_PROT_READ
|VM_PROT_EXECUTE
, 0);
561 * FPU error. Called by AST.
567 thread_t thr_act
= current_thread();
568 struct x86_fpsave_state
*ifps
= thr_act
->machine
.pcb
->ifps
;
571 intr
= ml_set_interrupts_enabled(FALSE
);
573 if (get_interrupt_level())
574 panic("FPU error exception at interrupt context\n");
575 if (current_task() == kernel_task
)
576 panic("FPU error exception in kernel thread context\n");
579 * Save the FPU state and turn off the FPU.
583 (void)ml_set_interrupts_enabled(intr
);
586 * Raise FPU exception.
587 * Locking not needed on pcb->ifps,
588 * since thread is running.
590 i386_exception(EXC_ARITHMETIC
,
592 ifps
->fx_save_state
.fx_status
);
600 * Locking not needed:
601 * . if called from fpu_get_state, pcb already locked.
602 * . if called from fpnoextflt or fp_intr, we are single-cpu
603 * . otherwise, thread is running.
604 * N.B.: Must be called with interrupts disabled
611 pcb_t pcb
= thr_act
->machine
.pcb
;
612 struct x86_fpsave_state
*ifps
= pcb
->ifps
;
614 if (ifps
!= 0 && !ifps
->fp_valid
) {
615 assert((get_cr0() & CR0_TS
) == 0);
616 /* registers are in FPU */
617 ifps
->fp_valid
= TRUE
;
619 if (!thread_is_64bit(thr_act
)) {
620 /* save the compatibility/legacy mode XMM+x87 state */
621 fxsave(&ifps
->fx_save_state
);
622 ifps
->fp_save_layout
= FXSAVE32
;
625 fxsave64(&ifps
->fx_save_state
);
626 ifps
->fp_save_layout
= FXSAVE64
;
632 * Restore FPU state from PCB.
634 * Locking not needed; always called on the current thread.
641 pcb_t pcb
= thr_act
->machine
.pcb
;
642 struct x86_fpsave_state
*ifps
;
645 if (ifps
== 0 || ifps
->fp_valid
== FALSE
) {
647 /* FIXME: This allocation mechanism should be revised
648 * for scenarios where interrupts are disabled.
650 ifps
= fp_state_alloc();
655 assert(ifps
->fp_save_layout
== FXSAVE32
|| ifps
->fp_save_layout
== FXSAVE64
);
656 if (ifps
->fp_save_layout
== FXSAVE32
) {
657 /* Restore the compatibility/legacy mode XMM+x87 state */
658 fxrstor(&ifps
->fx_save_state
);
660 else if (ifps
->fp_save_layout
== FXSAVE64
) {
661 fxrstor64(&ifps
->fx_save_state
);
664 ifps
->fp_valid
= FALSE
; /* in FPU */
671 * Flush the current act's state, if needed
672 * (used by thread_terminate_self to ensure fp faults
673 * aren't satisfied by overly general trap code in the
674 * context of the reaper thread)
677 fpflush(__unused thread_t thr_act
)
679 /* not needed on MP x86s; fp not lazily evaluated */
683 * SSE arithmetic exception handling code.
684 * Basically the same as the x87 exception handler with a different subtype
690 thread_t thr_act
= current_thread();
691 struct x86_fpsave_state
*ifps
= thr_act
->machine
.pcb
->ifps
;
694 intr
= ml_set_interrupts_enabled(FALSE
);
696 if (get_interrupt_level())
697 panic("SSE exception at interrupt context\n");
698 if (current_task() == kernel_task
)
699 panic("SSE exception in kernel thread context\n");
702 * Save the FPU state and turn off the FPU.
706 (void)ml_set_interrupts_enabled(intr
);
708 * Raise FPU exception.
709 * Locking not needed on pcb->ifps,
710 * since thread is running.
712 assert(ifps
->fp_save_layout
== FXSAVE32
|| ifps
->fp_save_layout
== FXSAVE64
);
713 i386_exception(EXC_ARITHMETIC
,
715 ifps
->fx_save_state
.fx_status
);
721 fp_setvalid(boolean_t value
) {
722 thread_t thr_act
= current_thread();
723 struct x86_fpsave_state
*ifps
= thr_act
->machine
.pcb
->ifps
;
726 ifps
->fp_valid
= value
;