]> git.saurik.com Git - apple/xnu.git/blob - osfmk/i386/fpu.c
xnu-1228.tar.gz
[apple/xnu.git] / osfmk / i386 / fpu.c
1 /*
2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1992-1990 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58
59 #include <platforms.h>
60
61 #include <mach/exception_types.h>
62 #include <mach/i386/thread_status.h>
63 #include <mach/i386/fp_reg.h>
64
65 #include <kern/mach_param.h>
66 #include <kern/processor.h>
67 #include <kern/thread.h>
68 #include <kern/zalloc.h>
69 #include <kern/misc_protos.h>
70 #include <kern/spl.h>
71 #include <kern/assert.h>
72
73 #include <i386/thread.h>
74 #include <i386/fpu.h>
75 #include <i386/trap.h>
76 #include <architecture/i386/pio.h>
77 #include <i386/cpuid.h>
78 #include <i386/misc_protos.h>
79 #include <i386/proc_reg.h>
80
81 int fp_kind = FP_NO; /* not inited */
82 zone_t ifps_zone; /* zone for FPU save area */
83
84 #define ALIGNED(addr,size) (((unsigned)(addr)&((size)-1))==0)
85
86 /* Forward */
87
88 extern void fpinit(void);
89 extern void fp_save(
90 thread_t thr_act);
91 extern void fp_load(
92 thread_t thr_act);
93
94 static void configure_mxcsr_capability_mask(struct x86_fpsave_state *ifps);
95
96 struct x86_fpsave_state starting_fp_state;
97
98
99 /* Global MXCSR capability bitmask */
100 static unsigned int mxcsr_capability_mask;
101
102 /*
103 * Determine the MXCSR capability mask, which allows us to mask off any
104 * potentially unsafe "reserved" bits before restoring the FPU context.
105 * *Not* per-cpu, assumes symmetry.
106 */
107 static void
108 configure_mxcsr_capability_mask(struct x86_fpsave_state *ifps)
109 {
110 /* FXSAVE requires a 16 byte aligned store */
111 assert(ALIGNED(ifps,16));
112 /* Clear, to prepare for the diagnostic FXSAVE */
113 bzero(ifps, sizeof(*ifps));
114 /* Disable FPU/SSE Device Not Available exceptions */
115 clear_ts();
116
117 __asm__ volatile("fxsave %0" : "=m" (ifps->fx_save_state));
118 mxcsr_capability_mask = ifps->fx_save_state.fx_MXCSR_MASK;
119
120 /* Set default mask value if necessary */
121 if (mxcsr_capability_mask == 0)
122 mxcsr_capability_mask = 0xffbf;
123
124 /* Re-enable FPU/SSE DNA exceptions */
125 set_ts();
126 }
127
128 /*
129 * Allocate and initialize FP state for current thread.
130 * Don't load state.
131 */
132 static struct x86_fpsave_state *
133 fp_state_alloc(void)
134 {
135 struct x86_fpsave_state *ifps;
136
137 ifps = (struct x86_fpsave_state *)zalloc(ifps_zone);
138 assert(ALIGNED(ifps,16));
139 bzero((char *)ifps, sizeof *ifps);
140
141 return ifps;
142 }
143
144 static inline void
145 fp_state_free(struct x86_fpsave_state *ifps)
146 {
147 zfree(ifps_zone, ifps);
148 }
149
150
151 /*
152 * Look for FPU and initialize it.
153 * Called on each CPU.
154 */
155 void
156 init_fpu(void)
157 {
158 unsigned short status, control;
159
160 /*
161 * Check for FPU by initializing it,
162 * then trying to read the correct bit patterns from
163 * the control and status registers.
164 */
165 set_cr0((get_cr0() & ~(CR0_EM|CR0_TS)) | CR0_NE); /* allow use of FPU */
166
167 fninit();
168 status = fnstsw();
169 fnstcw(&control);
170
171 if ((status & 0xff) == 0 &&
172 (control & 0x103f) == 0x3f)
173 {
174 /* Use FPU save/restore instructions if available */
175 if (cpuid_features() & CPUID_FEATURE_FXSR) {
176 fp_kind = FP_FXSR;
177 set_cr4(get_cr4() | CR4_FXS);
178 /* And allow SIMD instructions if present */
179 if (cpuid_features() & CPUID_FEATURE_SSE) {
180 set_cr4(get_cr4() | CR4_XMM);
181 }
182 } else
183 panic("fpu is not FP_FXSR");
184
185 /*
186 * initialze FPU to normal starting
187 * position so that we can take a snapshot
188 * of that state and store it for future use
189 * when we're asked for the FPU state of a
190 * thread, and it hasn't initiated any yet
191 */
192 fpinit();
193 fxsave(&starting_fp_state.fx_save_state);
194
195 /*
196 * Trap wait instructions. Turn off FPU for now.
197 */
198 set_cr0(get_cr0() | CR0_TS | CR0_MP);
199 }
200 else
201 {
202 /*
203 * NO FPU.
204 */
205 panic("fpu is not FP_FXSR");
206 }
207 }
208
209 /*
210 * Initialize FP handling.
211 */
212 void
213 fpu_module_init(void)
214 {
215 struct x86_fpsave_state *new_ifps;
216
217 ifps_zone = zinit(sizeof(struct x86_fpsave_state),
218 THREAD_MAX * sizeof(struct x86_fpsave_state),
219 THREAD_CHUNK * sizeof(struct x86_fpsave_state),
220 "x86 fpsave state");
221 new_ifps = fp_state_alloc();
222 /* Determine MXCSR reserved bits */
223 configure_mxcsr_capability_mask(new_ifps);
224 fp_state_free(new_ifps);
225 }
226
227 /*
228 * Free a FPU save area.
229 * Called only when thread terminating - no locking necessary.
230 */
231 void
232 fpu_free(struct x86_fpsave_state *fps)
233 {
234 fp_state_free(fps);
235 }
236
237 /*
238 * Set the floating-point state for a thread based
239 * on the FXSave formatted data. This is basically
240 * the same as fpu_set_state except it uses the
241 * expanded data structure.
242 * If the thread is not the current thread, it is
243 * not running (held). Locking needed against
244 * concurrent fpu_set_state or fpu_get_state.
245 */
246 kern_return_t
247 fpu_set_fxstate(
248 thread_t thr_act,
249 thread_state_t tstate)
250 {
251 struct x86_fpsave_state *ifps;
252 struct x86_fpsave_state *new_ifps;
253 x86_float_state64_t *state;
254 pcb_t pcb;
255
256 if (fp_kind == FP_NO)
257 return KERN_FAILURE;
258
259 state = (x86_float_state64_t *)tstate;
260
261 assert(thr_act != THREAD_NULL);
262 pcb = thr_act->machine.pcb;
263
264 if (state == NULL) {
265 /*
266 * new FPU state is 'invalid'.
267 * Deallocate the fp state if it exists.
268 */
269 simple_lock(&pcb->lock);
270
271 ifps = pcb->ifps;
272 pcb->ifps = 0;
273
274 simple_unlock(&pcb->lock);
275
276 if (ifps != 0)
277 fp_state_free(ifps);
278 } else {
279 /*
280 * Valid state. Allocate the fp state if there is none.
281 */
282 new_ifps = 0;
283 Retry:
284 simple_lock(&pcb->lock);
285
286 ifps = pcb->ifps;
287 if (ifps == 0) {
288 if (new_ifps == 0) {
289 simple_unlock(&pcb->lock);
290 new_ifps = fp_state_alloc();
291 goto Retry;
292 }
293 ifps = new_ifps;
294 new_ifps = 0;
295 pcb->ifps = ifps;
296 }
297 /*
298 * now copy over the new data.
299 */
300 bcopy((char *)&state->fpu_fcw,
301 (char *)&ifps->fx_save_state, sizeof(struct x86_fx_save));
302
303 /* XXX The layout of the state set from user-space may need to be
304 * validated for consistency.
305 */
306 ifps->fp_save_layout = thread_is_64bit(thr_act) ? FXSAVE64 : FXSAVE32;
307 /* Mark the thread's floating point status as non-live. */
308 /* Temporarily disabled: radar 4647827
309 * ifps->fp_valid = TRUE;
310 */
311
312 /*
313 * Clear any reserved bits in the MXCSR to prevent a GPF
314 * when issuing an FXRSTOR.
315 */
316 ifps->fx_save_state.fx_MXCSR &= mxcsr_capability_mask;
317
318 simple_unlock(&pcb->lock);
319
320 if (new_ifps != 0)
321 fp_state_free(new_ifps);
322 }
323 return KERN_SUCCESS;
324 }
325
326 /*
327 * Get the floating-point state for a thread.
328 * If the thread is not the current thread, it is
329 * not running (held). Locking needed against
330 * concurrent fpu_set_state or fpu_get_state.
331 */
332 kern_return_t
333 fpu_get_fxstate(
334 thread_t thr_act,
335 thread_state_t tstate)
336 {
337 struct x86_fpsave_state *ifps;
338 x86_float_state64_t *state;
339 kern_return_t ret = KERN_FAILURE;
340 pcb_t pcb;
341
342 if (fp_kind == FP_NO)
343 return KERN_FAILURE;
344
345 state = (x86_float_state64_t *)tstate;
346
347 assert(thr_act != THREAD_NULL);
348 pcb = thr_act->machine.pcb;
349
350 simple_lock(&pcb->lock);
351
352 ifps = pcb->ifps;
353 if (ifps == 0) {
354 /*
355 * No valid floating-point state.
356 */
357 bcopy((char *)&starting_fp_state.fx_save_state,
358 (char *)&state->fpu_fcw, sizeof(struct x86_fx_save));
359
360 simple_unlock(&pcb->lock);
361
362 return KERN_SUCCESS;
363 }
364 /*
365 * Make sure we`ve got the latest fp state info
366 * If the live fpu state belongs to our target
367 */
368 if (thr_act == current_thread()) {
369 boolean_t intr;
370
371 intr = ml_set_interrupts_enabled(FALSE);
372
373 clear_ts();
374 fp_save(thr_act);
375 clear_fpu();
376
377 (void)ml_set_interrupts_enabled(intr);
378 }
379 if (ifps->fp_valid) {
380 bcopy((char *)&ifps->fx_save_state,
381 (char *)&state->fpu_fcw, sizeof(struct x86_fx_save));
382 ret = KERN_SUCCESS;
383 }
384 simple_unlock(&pcb->lock);
385
386 return ret;
387 }
388
389
390
391 /*
392 * the child thread is 'stopped' with the thread
393 * mutex held and is currently not known by anyone
394 * so no way for fpu state to get manipulated by an
395 * outside agency -> no need for pcb lock
396 */
397
398 void
399 fpu_dup_fxstate(
400 thread_t parent,
401 thread_t child)
402 {
403 struct x86_fpsave_state *new_ifps = NULL;
404 boolean_t intr;
405 pcb_t ppcb;
406
407 ppcb = parent->machine.pcb;
408
409 if (ppcb->ifps == NULL)
410 return;
411
412 if (child->machine.pcb->ifps)
413 panic("fpu_dup_fxstate: child's ifps non-null");
414
415 new_ifps = fp_state_alloc();
416
417 simple_lock(&ppcb->lock);
418
419 if (ppcb->ifps != NULL) {
420 /*
421 * Make sure we`ve got the latest fp state info
422 */
423 intr = ml_set_interrupts_enabled(FALSE);
424
425 clear_ts();
426 fp_save(parent);
427 clear_fpu();
428
429 (void)ml_set_interrupts_enabled(intr);
430
431 if (ppcb->ifps->fp_valid) {
432 child->machine.pcb->ifps = new_ifps;
433
434 bcopy((char *)&(ppcb->ifps->fx_save_state),
435 (char *)&(child->machine.pcb->ifps->fx_save_state), sizeof(struct x86_fx_save));
436
437 new_ifps->fp_save_layout = ppcb->ifps->fp_save_layout;
438 /* Mark the new fp saved state as non-live. */
439 /* Temporarily disabled: radar 4647827
440 * new_ifps->fp_valid = TRUE;
441 */
442 /*
443 * Clear any reserved bits in the MXCSR to prevent a GPF
444 * when issuing an FXRSTOR.
445 */
446 new_ifps->fx_save_state.fx_MXCSR &= mxcsr_capability_mask;
447 new_ifps = NULL;
448 }
449 }
450 simple_unlock(&ppcb->lock);
451
452 if (new_ifps != NULL)
453 fp_state_free(new_ifps);
454 }
455
456
457 /*
458 * Initialize FPU.
459 *
460 */
461 void
462 fpinit(void)
463 {
464 unsigned short control;
465
466 clear_ts();
467 fninit();
468 fnstcw(&control);
469 control &= ~(FPC_PC|FPC_RC); /* Clear precision & rounding control */
470 control |= (FPC_PC_64 | /* Set precision */
471 FPC_RC_RN | /* round-to-nearest */
472 FPC_ZE | /* Suppress zero-divide */
473 FPC_OE | /* and overflow */
474 FPC_UE | /* underflow */
475 FPC_IE | /* Allow NaNQs and +-INF */
476 FPC_DE | /* Allow denorms as operands */
477 FPC_PE); /* No trap for precision loss */
478 fldcw(control);
479
480 /* Initialize SSE/SSE2 */
481 __builtin_ia32_ldmxcsr(0x1f80);
482 }
483
484 /*
485 * Coprocessor not present.
486 */
487
488 void
489 fpnoextflt(void)
490 {
491 boolean_t intr;
492 thread_t thr_act;
493 pcb_t pcb;
494 struct x86_fpsave_state *ifps = 0;
495
496 thr_act = current_thread();
497 pcb = thr_act->machine.pcb;
498
499 if (pcb->ifps == 0 && !get_interrupt_level())
500 ifps = fp_state_alloc();
501
502 intr = ml_set_interrupts_enabled(FALSE);
503
504 clear_ts(); /* Enable FPU use */
505
506 if (get_interrupt_level()) {
507 /*
508 * Save current coprocessor context if valid
509 * Initialize coprocessor live context
510 */
511 fp_save(thr_act);
512 fpinit();
513 } else {
514 if (pcb->ifps == 0) {
515 pcb->ifps = ifps;
516 ifps = 0;
517 }
518 /*
519 * Load this thread`s state into coprocessor live context.
520 */
521 fp_load(thr_act);
522 }
523 (void)ml_set_interrupts_enabled(intr);
524
525 if (ifps)
526 fp_state_free(ifps);
527 }
528
529 /*
530 * FPU overran end of segment.
531 * Re-initialize FPU. Floating point state is not valid.
532 */
533
534 void
535 fpextovrflt(void)
536 {
537 thread_t thr_act = current_thread();
538 pcb_t pcb;
539 struct x86_fpsave_state *ifps;
540 boolean_t intr;
541
542 intr = ml_set_interrupts_enabled(FALSE);
543
544 if (get_interrupt_level())
545 panic("FPU segment overrun exception at interrupt context\n");
546 if (current_task() == kernel_task)
547 panic("FPU segment overrun exception in kernel thread context\n");
548
549 /*
550 * This is a non-recoverable error.
551 * Invalidate the thread`s FPU state.
552 */
553 pcb = thr_act->machine.pcb;
554 simple_lock(&pcb->lock);
555 ifps = pcb->ifps;
556 pcb->ifps = 0;
557 simple_unlock(&pcb->lock);
558
559 /*
560 * Re-initialize the FPU.
561 */
562 clear_ts();
563 fninit();
564
565 /*
566 * And disable access.
567 */
568 clear_fpu();
569
570 (void)ml_set_interrupts_enabled(intr);
571
572 if (ifps)
573 zfree(ifps_zone, ifps);
574
575 /*
576 * Raise exception.
577 */
578 i386_exception(EXC_BAD_ACCESS, VM_PROT_READ|VM_PROT_EXECUTE, 0);
579 /*NOTREACHED*/
580 }
581
582 /*
583 * FPU error. Called by AST.
584 */
585
586 void
587 fpexterrflt(void)
588 {
589 thread_t thr_act = current_thread();
590 struct x86_fpsave_state *ifps = thr_act->machine.pcb->ifps;
591 boolean_t intr;
592
593 intr = ml_set_interrupts_enabled(FALSE);
594
595 if (get_interrupt_level())
596 panic("FPU error exception at interrupt context\n");
597 if (current_task() == kernel_task)
598 panic("FPU error exception in kernel thread context\n");
599
600 /*
601 * Save the FPU state and turn off the FPU.
602 */
603 fp_save(thr_act);
604
605 (void)ml_set_interrupts_enabled(intr);
606
607 /*
608 * Raise FPU exception.
609 * Locking not needed on pcb->ifps,
610 * since thread is running.
611 */
612 i386_exception(EXC_ARITHMETIC,
613 EXC_I386_EXTERR,
614 ifps->fx_save_state.fx_status);
615
616 /*NOTREACHED*/
617 }
618
619 /*
620 * Save FPU state.
621 *
622 * Locking not needed:
623 * . if called from fpu_get_state, pcb already locked.
624 * . if called from fpnoextflt or fp_intr, we are single-cpu
625 * . otherwise, thread is running.
626 * N.B.: Must be called with interrupts disabled
627 */
628
629 void
630 fp_save(
631 thread_t thr_act)
632 {
633 pcb_t pcb = thr_act->machine.pcb;
634 struct x86_fpsave_state *ifps = pcb->ifps;
635
636 if (ifps != 0 && !ifps->fp_valid) {
637 assert((get_cr0() & CR0_TS) == 0);
638 /* registers are in FPU */
639 ifps->fp_valid = TRUE;
640
641 if (!thread_is_64bit(thr_act)) {
642 /* save the compatibility/legacy mode XMM+x87 state */
643 fxsave(&ifps->fx_save_state);
644 ifps->fp_save_layout = FXSAVE32;
645 }
646 else {
647 fxsave64(&ifps->fx_save_state);
648 ifps->fp_save_layout = FXSAVE64;
649 }
650 }
651 }
652
653 /*
654 * Restore FPU state from PCB.
655 *
656 * Locking not needed; always called on the current thread.
657 */
658
659 void
660 fp_load(
661 thread_t thr_act)
662 {
663 pcb_t pcb = thr_act->machine.pcb;
664 struct x86_fpsave_state *ifps;
665
666 ifps = pcb->ifps;
667 if (ifps == 0 || ifps->fp_valid == FALSE) {
668 if (ifps == 0) {
669 /* FIXME: This allocation mechanism should be revised
670 * for scenarios where interrupts are disabled.
671 */
672 ifps = fp_state_alloc();
673 pcb->ifps = ifps;
674 }
675 fpinit();
676 } else {
677 assert(ifps->fp_save_layout == FXSAVE32 || ifps->fp_save_layout == FXSAVE64);
678 if (ifps->fp_save_layout == FXSAVE32) {
679 /* Restore the compatibility/legacy mode XMM+x87 state */
680 fxrstor(&ifps->fx_save_state);
681 }
682 else if (ifps->fp_save_layout == FXSAVE64) {
683 fxrstor64(&ifps->fx_save_state);
684 }
685 }
686 ifps->fp_valid = FALSE; /* in FPU */
687 }
688
689
690
691 /*
692 * fpflush(thread_t)
693 * Flush the current act's state, if needed
694 * (used by thread_terminate_self to ensure fp faults
695 * aren't satisfied by overly general trap code in the
696 * context of the reaper thread)
697 */
698 void
699 fpflush(__unused thread_t thr_act)
700 {
701 /* not needed on MP x86s; fp not lazily evaluated */
702 }
703
704 /*
705 * SSE arithmetic exception handling code.
706 * Basically the same as the x87 exception handler with a different subtype
707 */
708
709 void
710 fpSSEexterrflt(void)
711 {
712 thread_t thr_act = current_thread();
713 struct x86_fpsave_state *ifps = thr_act->machine.pcb->ifps;
714 boolean_t intr;
715
716 intr = ml_set_interrupts_enabled(FALSE);
717
718 if (get_interrupt_level())
719 panic("SSE exception at interrupt context\n");
720 if (current_task() == kernel_task)
721 panic("SSE exception in kernel thread context\n");
722
723 /*
724 * Save the FPU state and turn off the FPU.
725 */
726 fp_save(thr_act);
727
728 (void)ml_set_interrupts_enabled(intr);
729 /*
730 * Raise FPU exception.
731 * Locking not needed on pcb->ifps,
732 * since thread is running.
733 */
734 assert(ifps->fp_save_layout == FXSAVE32 || ifps->fp_save_layout == FXSAVE64);
735 i386_exception(EXC_ARITHMETIC,
736 EXC_I386_SSEEXTERR,
737 ifps->fx_save_state.fx_status);
738 /*NOTREACHED*/
739 }
740
741
742 void
743 fp_setvalid(boolean_t value) {
744 thread_t thr_act = current_thread();
745 struct x86_fpsave_state *ifps = thr_act->machine.pcb->ifps;
746
747 if (ifps) {
748 ifps->fp_valid = value;
749
750 if (value == TRUE)
751 clear_fpu();
752 }
753 }