]> git.saurik.com Git - apple/xnu.git/blob - osfmk/i386/fpu.c
ed19ecb12df681e3072729ee867916ed881e9b62
[apple/xnu.git] / osfmk / i386 / fpu.c
1 /*
2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1992-1990 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58
59 #include <platforms.h>
60
61 #include <mach/exception_types.h>
62 #include <mach/i386/thread_status.h>
63 #include <mach/i386/fp_reg.h>
64
65 #include <kern/mach_param.h>
66 #include <kern/processor.h>
67 #include <kern/thread.h>
68 #include <kern/zalloc.h>
69 #include <kern/misc_protos.h>
70 #include <kern/spl.h>
71 #include <kern/assert.h>
72
73 #include <i386/thread.h>
74 #include <i386/fpu.h>
75 #include <i386/trap.h>
76 #include <architecture/i386/pio.h>
77 #include <i386/cpuid.h>
78 #include <i386/misc_protos.h>
79 #include <i386/proc_reg.h>
80
81 int fp_kind = FP_NO; /* not inited */
82 zone_t ifps_zone; /* zone for FPU save area */
83
84 #define ALIGNED(addr,size) (((unsigned)(addr)&((size)-1))==0)
85
86 /* Forward */
87
88 extern void fpinit(void);
89 extern void fp_save(
90 thread_t thr_act);
91 extern void fp_load(
92 thread_t thr_act);
93
94 static void configure_mxcsr_capability_mask(struct x86_fpsave_state *ifps);
95
96 struct x86_fpsave_state starting_fp_state;
97
98
99 /* Global MXCSR capability bitmask */
100 static unsigned int mxcsr_capability_mask;
101
102 /*
103 * Determine the MXCSR capability mask, which allows us to mask off any
104 * potentially unsafe "reserved" bits before restoring the FPU context.
105 * *Not* per-cpu, assumes symmetry.
106 */
107 static void
108 configure_mxcsr_capability_mask(struct x86_fpsave_state *ifps)
109 {
110 /* FXSAVE requires a 16 byte aligned store */
111 assert(ALIGNED(ifps,16));
112 /* Clear, to prepare for the diagnostic FXSAVE */
113 bzero(ifps, sizeof(*ifps));
114 /* Disable FPU/SSE Device Not Available exceptions */
115 clear_ts();
116
117 __asm__ volatile("fxsave %0" : "=m" (ifps->fx_save_state));
118 mxcsr_capability_mask = ifps->fx_save_state.fx_MXCSR_MASK;
119
120 /* Set default mask value if necessary */
121 if (mxcsr_capability_mask == 0)
122 mxcsr_capability_mask = 0xffbf;
123
124 /* Re-enable FPU/SSE DNA exceptions */
125 set_ts();
126 }
127
128 /*
129 * Allocate and initialize FP state for current thread.
130 * Don't load state.
131 */
132 static struct x86_fpsave_state *
133 fp_state_alloc(void)
134 {
135 struct x86_fpsave_state *ifps;
136
137 ifps = (struct x86_fpsave_state *)zalloc(ifps_zone);
138 assert(ALIGNED(ifps,16));
139 bzero((char *)ifps, sizeof *ifps);
140
141 return ifps;
142 }
143
144 static inline void
145 fp_state_free(struct x86_fpsave_state *ifps)
146 {
147 zfree(ifps_zone, ifps);
148 }
149
150
151 /*
152 * Look for FPU and initialize it.
153 * Called on each CPU.
154 */
155 void
156 init_fpu(void)
157 {
158 unsigned short status, control;
159
160 /*
161 * Check for FPU by initializing it,
162 * then trying to read the correct bit patterns from
163 * the control and status registers.
164 */
165 set_cr0((get_cr0() & ~(CR0_EM|CR0_TS)) | CR0_NE); /* allow use of FPU */
166
167 fninit();
168 status = fnstsw();
169 fnstcw(&control);
170
171 if ((status & 0xff) == 0 &&
172 (control & 0x103f) == 0x3f)
173 {
174 /* Use FPU save/restore instructions if available */
175 if (cpuid_features() & CPUID_FEATURE_FXSR) {
176 fp_kind = FP_FXSR;
177 set_cr4(get_cr4() | CR4_FXS);
178 printf("Enabling XMM register save/restore");
179 /* And allow SIMD instructions if present */
180 if (cpuid_features() & CPUID_FEATURE_SSE) {
181 printf(" and SSE/SSE2");
182 set_cr4(get_cr4() | CR4_XMM);
183 }
184 printf(" opcodes\n");
185 } else
186 panic("fpu is not FP_FXSR");
187
188 /*
189 * initialze FPU to normal starting
190 * position so that we can take a snapshot
191 * of that state and store it for future use
192 * when we're asked for the FPU state of a
193 * thread, and it hasn't initiated any yet
194 */
195 fpinit();
196 fxsave(&starting_fp_state.fx_save_state);
197
198 /*
199 * Trap wait instructions. Turn off FPU for now.
200 */
201 set_cr0(get_cr0() | CR0_TS | CR0_MP);
202 }
203 else
204 {
205 /*
206 * NO FPU.
207 */
208 panic("fpu is not FP_FXSR");
209 }
210 }
211
212 /*
213 * Initialize FP handling.
214 */
215 void
216 fpu_module_init(void)
217 {
218 struct x86_fpsave_state *new_ifps;
219
220 ifps_zone = zinit(sizeof(struct x86_fpsave_state),
221 THREAD_MAX * sizeof(struct x86_fpsave_state),
222 THREAD_CHUNK * sizeof(struct x86_fpsave_state),
223 "x86 fpsave state");
224 new_ifps = fp_state_alloc();
225 /* Determine MXCSR reserved bits */
226 configure_mxcsr_capability_mask(new_ifps);
227 fp_state_free(new_ifps);
228 }
229
230 /*
231 * Free a FPU save area.
232 * Called only when thread terminating - no locking necessary.
233 */
234 void
235 fpu_free(fps)
236 struct x86_fpsave_state *fps;
237 {
238 fp_state_free(fps);
239 }
240
241 /*
242 * Set the floating-point state for a thread based
243 * on the FXSave formatted data. This is basically
244 * the same as fpu_set_state except it uses the
245 * expanded data structure.
246 * If the thread is not the current thread, it is
247 * not running (held). Locking needed against
248 * concurrent fpu_set_state or fpu_get_state.
249 */
250 kern_return_t
251 fpu_set_fxstate(
252 thread_t thr_act,
253 thread_state_t tstate)
254 {
255 struct x86_fpsave_state *ifps;
256 struct x86_fpsave_state *new_ifps;
257 x86_float_state64_t *state;
258 pcb_t pcb;
259
260 if (fp_kind == FP_NO)
261 return KERN_FAILURE;
262
263 state = (x86_float_state64_t *)tstate;
264
265 assert(thr_act != THREAD_NULL);
266 pcb = thr_act->machine.pcb;
267
268 if (state == NULL) {
269 /*
270 * new FPU state is 'invalid'.
271 * Deallocate the fp state if it exists.
272 */
273 simple_lock(&pcb->lock);
274
275 ifps = pcb->ifps;
276 pcb->ifps = 0;
277
278 simple_unlock(&pcb->lock);
279
280 if (ifps != 0)
281 fp_state_free(ifps);
282 } else {
283 /*
284 * Valid state. Allocate the fp state if there is none.
285 */
286 new_ifps = 0;
287 Retry:
288 simple_lock(&pcb->lock);
289
290 ifps = pcb->ifps;
291 if (ifps == 0) {
292 if (new_ifps == 0) {
293 simple_unlock(&pcb->lock);
294 new_ifps = fp_state_alloc();
295 goto Retry;
296 }
297 ifps = new_ifps;
298 new_ifps = 0;
299 pcb->ifps = ifps;
300 }
301 /*
302 * now copy over the new data.
303 */
304 bcopy((char *)&state->fpu_fcw,
305 (char *)&ifps->fx_save_state, sizeof(struct x86_fx_save));
306
307 /* XXX The layout of the state set from user-space may need to be
308 * validated for consistency.
309 */
310 ifps->fp_save_layout = thread_is_64bit(thr_act) ? FXSAVE64 : FXSAVE32;
311 /*
312 * Clear any reserved bits in the MXCSR to prevent a GPF
313 * when issuing an FXRSTOR.
314 */
315 ifps->fx_save_state.fx_MXCSR &= mxcsr_capability_mask;
316
317 simple_unlock(&pcb->lock);
318
319 if (new_ifps != 0)
320 fp_state_free(new_ifps);
321 }
322 return KERN_SUCCESS;
323 }
324
325 /*
326 * Get the floating-point state for a thread.
327 * If the thread is not the current thread, it is
328 * not running (held). Locking needed against
329 * concurrent fpu_set_state or fpu_get_state.
330 */
331 kern_return_t
332 fpu_get_fxstate(
333 thread_t thr_act,
334 thread_state_t tstate)
335 {
336 struct x86_fpsave_state *ifps;
337 x86_float_state64_t *state;
338 kern_return_t ret = KERN_FAILURE;
339 pcb_t pcb;
340
341 if (fp_kind == FP_NO)
342 return KERN_FAILURE;
343
344 state = (x86_float_state64_t *)tstate;
345
346 assert(thr_act != THREAD_NULL);
347 pcb = thr_act->machine.pcb;
348
349 simple_lock(&pcb->lock);
350
351 ifps = pcb->ifps;
352 if (ifps == 0) {
353 /*
354 * No valid floating-point state.
355 */
356 bcopy((char *)&starting_fp_state.fx_save_state,
357 (char *)&state->fpu_fcw, sizeof(struct x86_fx_save));
358
359 simple_unlock(&pcb->lock);
360
361 return KERN_SUCCESS;
362 }
363 /*
364 * Make sure we`ve got the latest fp state info
365 * If the live fpu state belongs to our target
366 */
367 if (thr_act == current_thread())
368 {
369 boolean_t intr;
370
371 intr = ml_set_interrupts_enabled(FALSE);
372
373 clear_ts();
374 fp_save(thr_act);
375 clear_fpu();
376
377 (void)ml_set_interrupts_enabled(intr);
378 }
379 if (ifps->fp_valid) {
380 bcopy((char *)&ifps->fx_save_state,
381 (char *)&state->fpu_fcw, sizeof(struct x86_fx_save));
382 ret = KERN_SUCCESS;
383 }
384 simple_unlock(&pcb->lock);
385
386 return ret;
387 }
388
389
390 /*
391 * the child thread is 'stopped' with the thread
392 * mutex held and is currently not known by anyone
393 * so no way for fpu state to get manipulated by an
394 * outside agency -> no need for pcb lock
395 */
396
397 void
398 fpu_dup_fxstate(
399 thread_t parent,
400 thread_t child)
401 {
402 struct x86_fpsave_state *new_ifps = NULL;
403 boolean_t intr;
404 pcb_t ppcb;
405
406 ppcb = parent->machine.pcb;
407
408 if (ppcb->ifps == NULL)
409 return;
410
411 if (child->machine.pcb->ifps)
412 panic("fpu_dup_fxstate: child's ifps non-null");
413
414 new_ifps = fp_state_alloc();
415
416 simple_lock(&ppcb->lock);
417
418 if (ppcb->ifps != NULL) {
419 /*
420 * Make sure we`ve got the latest fp state info
421 */
422 intr = ml_set_interrupts_enabled(FALSE);
423
424 clear_ts();
425 fp_save(parent);
426 clear_fpu();
427
428 (void)ml_set_interrupts_enabled(intr);
429
430 if (ppcb->ifps->fp_valid) {
431 child->machine.pcb->ifps = new_ifps;
432
433 bcopy((char *)&(ppcb->ifps->fx_save_state),
434 (char *)&(child->machine.pcb->ifps->fx_save_state), sizeof(struct x86_fx_save));
435
436 new_ifps->fp_save_layout = ppcb->ifps->fp_save_layout;
437 /*
438 * Clear any reserved bits in the MXCSR to prevent a GPF
439 * when issuing an FXRSTOR.
440 */
441 new_ifps->fx_save_state.fx_MXCSR &= mxcsr_capability_mask;
442 new_ifps = NULL;
443 }
444 }
445 simple_unlock(&ppcb->lock);
446
447 if (new_ifps != NULL)
448 fp_state_free(new_ifps);
449 }
450
451
452 /*
453 * Initialize FPU.
454 *
455 */
456 void
457 fpinit(void)
458 {
459 unsigned short control;
460
461 clear_ts();
462 fninit();
463 fnstcw(&control);
464 control &= ~(FPC_PC|FPC_RC); /* Clear precision & rounding control */
465 control |= (FPC_PC_64 | /* Set precision */
466 FPC_RC_RN | /* round-to-nearest */
467 FPC_ZE | /* Suppress zero-divide */
468 FPC_OE | /* and overflow */
469 FPC_UE | /* underflow */
470 FPC_IE | /* Allow NaNQs and +-INF */
471 FPC_DE | /* Allow denorms as operands */
472 FPC_PE); /* No trap for precision loss */
473 fldcw(control);
474
475 /* Initialize SSE/SSE2 */
476 __builtin_ia32_ldmxcsr(0x1f80);
477 }
478
479 /*
480 * Coprocessor not present.
481 */
482
483 void
484 fpnoextflt(void)
485 {
486 boolean_t intr;
487
488 intr = ml_set_interrupts_enabled(FALSE);
489
490 clear_ts(); /* Enable FPU use */
491
492 if (get_interrupt_level()) {
493 /*
494 * Save current coprocessor context if valid
495 * Initialize coprocessor live context
496 */
497 fp_save(current_thread());
498 fpinit();
499 } else {
500 /*
501 * Load this thread`s state into coprocessor live context.
502 */
503 fp_load(current_thread());
504 }
505
506 (void)ml_set_interrupts_enabled(intr);
507 }
508
509 /*
510 * FPU overran end of segment.
511 * Re-initialize FPU. Floating point state is not valid.
512 */
513
514 void
515 fpextovrflt(void)
516 {
517 thread_t thr_act = current_thread();
518 pcb_t pcb;
519 struct x86_fpsave_state *ifps;
520 boolean_t intr;
521
522 intr = ml_set_interrupts_enabled(FALSE);
523
524 if (get_interrupt_level())
525 panic("FPU segment overrun exception at interrupt context\n");
526 if (current_task() == kernel_task)
527 panic("FPU segment overrun exception in kernel thread context\n");
528
529 /*
530 * This is a non-recoverable error.
531 * Invalidate the thread`s FPU state.
532 */
533 pcb = thr_act->machine.pcb;
534 simple_lock(&pcb->lock);
535 ifps = pcb->ifps;
536 pcb->ifps = 0;
537 simple_unlock(&pcb->lock);
538
539 /*
540 * Re-initialize the FPU.
541 */
542 clear_ts();
543 fninit();
544
545 /*
546 * And disable access.
547 */
548 clear_fpu();
549
550 (void)ml_set_interrupts_enabled(intr);
551
552 if (ifps)
553 zfree(ifps_zone, ifps);
554
555 /*
556 * Raise exception.
557 */
558 i386_exception(EXC_BAD_ACCESS, VM_PROT_READ|VM_PROT_EXECUTE, 0);
559 /*NOTREACHED*/
560 }
561
562 /*
563 * FPU error. Called by AST.
564 */
565
566 void
567 fpexterrflt(void)
568 {
569 thread_t thr_act = current_thread();
570 struct x86_fpsave_state *ifps = thr_act->machine.pcb->ifps;
571 boolean_t intr;
572
573 intr = ml_set_interrupts_enabled(FALSE);
574
575 if (get_interrupt_level())
576 panic("FPU error exception at interrupt context\n");
577 if (current_task() == kernel_task)
578 panic("FPU error exception in kernel thread context\n");
579
580 /*
581 * Save the FPU state and turn off the FPU.
582 */
583 fp_save(thr_act);
584
585 (void)ml_set_interrupts_enabled(intr);
586
587 /*
588 * Raise FPU exception.
589 * Locking not needed on pcb->ifps,
590 * since thread is running.
591 */
592 i386_exception(EXC_ARITHMETIC,
593 EXC_I386_EXTERR,
594 ifps->fx_save_state.fx_status);
595
596 /*NOTREACHED*/
597 }
598
599 /*
600 * Save FPU state.
601 *
602 * Locking not needed:
603 * . if called from fpu_get_state, pcb already locked.
604 * . if called from fpnoextflt or fp_intr, we are single-cpu
605 * . otherwise, thread is running.
606 * N.B.: Must be called with interrupts disabled
607 */
608
609 void
610 fp_save(
611 thread_t thr_act)
612 {
613 pcb_t pcb = thr_act->machine.pcb;
614 struct x86_fpsave_state *ifps = pcb->ifps;
615
616 if (ifps != 0 && !ifps->fp_valid) {
617 assert((get_cr0() & CR0_TS) == 0);
618 /* registers are in FPU */
619 ifps->fp_valid = TRUE;
620
621 if (!thread_is_64bit(thr_act)) {
622 /* save the compatibility/legacy mode XMM+x87 state */
623 fxsave(&ifps->fx_save_state);
624 ifps->fp_save_layout = FXSAVE32;
625 }
626 else {
627 fxsave64(&ifps->fx_save_state);
628 ifps->fp_save_layout = FXSAVE64;
629 }
630 }
631 }
632
633 /*
634 * Restore FPU state from PCB.
635 *
636 * Locking not needed; always called on the current thread.
637 */
638
639 void
640 fp_load(
641 thread_t thr_act)
642 {
643 pcb_t pcb = thr_act->machine.pcb;
644 struct x86_fpsave_state *ifps;
645
646 ifps = pcb->ifps;
647 if (ifps == 0 || ifps->fp_valid == FALSE) {
648 if (ifps == 0) {
649 /* FIXME: This allocation mechanism should be revised
650 * for scenarios where interrupts are disabled.
651 */
652 ifps = fp_state_alloc();
653 pcb->ifps = ifps;
654 }
655 fpinit();
656 } else {
657 assert(ifps->fp_save_layout == FXSAVE32 || ifps->fp_save_layout == FXSAVE64);
658 if (ifps->fp_save_layout == FXSAVE32) {
659 /* Restore the compatibility/legacy mode XMM+x87 state */
660 fxrstor(&ifps->fx_save_state);
661 }
662 else if (ifps->fp_save_layout == FXSAVE64) {
663 fxrstor64(&ifps->fx_save_state);
664 }
665 }
666 ifps->fp_valid = FALSE; /* in FPU */
667 }
668
669
670
671 /*
672 * fpflush(thread_t)
673 * Flush the current act's state, if needed
674 * (used by thread_terminate_self to ensure fp faults
675 * aren't satisfied by overly general trap code in the
676 * context of the reaper thread)
677 */
678 void
679 fpflush(__unused thread_t thr_act)
680 {
681 /* not needed on MP x86s; fp not lazily evaluated */
682 }
683
684 /*
685 * SSE arithmetic exception handling code.
686 * Basically the same as the x87 exception handler with a different subtype
687 */
688
689 void
690 fpSSEexterrflt(void)
691 {
692 thread_t thr_act = current_thread();
693 struct x86_fpsave_state *ifps = thr_act->machine.pcb->ifps;
694 boolean_t intr;
695
696 intr = ml_set_interrupts_enabled(FALSE);
697
698 if (get_interrupt_level())
699 panic("SSE exception at interrupt context\n");
700 if (current_task() == kernel_task)
701 panic("SSE exception in kernel thread context\n");
702
703 /*
704 * Save the FPU state and turn off the FPU.
705 */
706 fp_save(thr_act);
707
708 (void)ml_set_interrupts_enabled(intr);
709 /*
710 * Raise FPU exception.
711 * Locking not needed on pcb->ifps,
712 * since thread is running.
713 */
714 assert(ifps->fp_save_layout == FXSAVE32 || ifps->fp_save_layout == FXSAVE64);
715 i386_exception(EXC_ARITHMETIC,
716 EXC_I386_SSEEXTERR,
717 ifps->fx_save_state.fx_status);
718 /*NOTREACHED*/
719 }
720
721
722 void
723 fp_setvalid(boolean_t value) {
724 thread_t thr_act = current_thread();
725 struct x86_fpsave_state *ifps = thr_act->machine.pcb->ifps;
726
727 if (ifps) {
728 ifps->fp_valid = value;
729
730 if (value == TRUE)
731 clear_fpu();
732 }
733 }