]> git.saurik.com Git - apple/xnu.git/blob - osfmk/i386/fpu.c
df88486cba0474f12149e22951b74f88628c4317
[apple/xnu.git] / osfmk / i386 / fpu.c
1 /*
2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /*
23 * @OSF_COPYRIGHT@
24 */
25 /*
26 * Mach Operating System
27 * Copyright (c) 1992-1990 Carnegie Mellon University
28 * All Rights Reserved.
29 *
30 * Permission to use, copy, modify and distribute this software and its
31 * documentation is hereby granted, provided that both the copyright
32 * notice and this permission notice appear in all copies of the
33 * software, derivative works or modified versions, and any portions
34 * thereof, and that both notices appear in supporting documentation.
35 *
36 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
37 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
38 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
39 *
40 * Carnegie Mellon requests users of this software to return to
41 *
42 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
43 * School of Computer Science
44 * Carnegie Mellon University
45 * Pittsburgh PA 15213-3890
46 *
47 * any improvements or extensions that they make and grant Carnegie Mellon
48 * the rights to redistribute these changes.
49 */
50 /*
51 */
52
53 #include <platforms.h>
54
55 #include <mach/exception_types.h>
56 #include <mach/i386/thread_status.h>
57 #include <mach/i386/fp_reg.h>
58
59 #include <kern/mach_param.h>
60 #include <kern/processor.h>
61 #include <kern/thread.h>
62 #include <kern/zalloc.h>
63 #include <kern/misc_protos.h>
64 #include <kern/spl.h>
65 #include <kern/assert.h>
66
67 #include <i386/thread.h>
68 #include <i386/fpu.h>
69 #include <i386/trap.h>
70 #include <architecture/i386/pio.h>
71 #include <i386/cpuid.h>
72 #include <i386/misc_protos.h>
73 #include <i386/proc_reg.h>
74
75 int fp_kind = FP_NO; /* not inited */
76 zone_t ifps_zone; /* zone for FPU save area */
77
78 #define ALIGNED(addr,size) (((unsigned)(addr)&((size)-1))==0)
79
80 /* Forward */
81
82 extern void fpinit(void);
83 extern void fp_save(
84 thread_t thr_act);
85 extern void fp_load(
86 thread_t thr_act);
87
88 static void configure_mxcsr_capability_mask(struct x86_fpsave_state *ifps);
89
90 struct x86_fpsave_state starting_fp_state;
91
92
93 /* Global MXCSR capability bitmask */
94 static unsigned int mxcsr_capability_mask;
95
96 /*
97 * Determine the MXCSR capability mask, which allows us to mask off any
98 * potentially unsafe "reserved" bits before restoring the FPU context.
99 * *Not* per-cpu, assumes symmetry.
100 */
101 static void
102 configure_mxcsr_capability_mask(struct x86_fpsave_state *ifps)
103 {
104 /* FXSAVE requires a 16 byte aligned store */
105 assert(ALIGNED(ifps,16));
106 /* Clear, to prepare for the diagnostic FXSAVE */
107 bzero(ifps, sizeof(*ifps));
108 /* Disable FPU/SSE Device Not Available exceptions */
109 clear_ts();
110
111 __asm__ volatile("fxsave %0" : "=m" (ifps->fx_save_state));
112 mxcsr_capability_mask = ifps->fx_save_state.fx_MXCSR_MASK;
113
114 /* Set default mask value if necessary */
115 if (mxcsr_capability_mask == 0)
116 mxcsr_capability_mask = 0xffbf;
117
118 /* Re-enable FPU/SSE DNA exceptions */
119 set_ts();
120 }
121
122 /*
123 * Allocate and initialize FP state for current thread.
124 * Don't load state.
125 */
126 static struct x86_fpsave_state *
127 fp_state_alloc(void)
128 {
129 struct x86_fpsave_state *ifps;
130
131 ifps = (struct x86_fpsave_state *)zalloc(ifps_zone);
132 assert(ALIGNED(ifps,16));
133 bzero((char *)ifps, sizeof *ifps);
134
135 return ifps;
136 }
137
138 static inline void
139 fp_state_free(struct x86_fpsave_state *ifps)
140 {
141 zfree(ifps_zone, ifps);
142 }
143
144
145 /*
146 * Look for FPU and initialize it.
147 * Called on each CPU.
148 */
149 void
150 init_fpu(void)
151 {
152 unsigned short status, control;
153
154 /*
155 * Check for FPU by initializing it,
156 * then trying to read the correct bit patterns from
157 * the control and status registers.
158 */
159 set_cr0((get_cr0() & ~(CR0_EM|CR0_TS)) | CR0_NE); /* allow use of FPU */
160
161 fninit();
162 status = fnstsw();
163 fnstcw(&control);
164
165 if ((status & 0xff) == 0 &&
166 (control & 0x103f) == 0x3f)
167 {
168 /* Use FPU save/restore instructions if available */
169 if (cpuid_features() & CPUID_FEATURE_FXSR) {
170 fp_kind = FP_FXSR;
171 set_cr4(get_cr4() | CR4_FXS);
172 printf("Enabling XMM register save/restore");
173 /* And allow SIMD instructions if present */
174 if (cpuid_features() & CPUID_FEATURE_SSE) {
175 printf(" and SSE/SSE2");
176 set_cr4(get_cr4() | CR4_XMM);
177 }
178 printf(" opcodes\n");
179 } else
180 panic("fpu is not FP_FXSR");
181
182 /*
183 * initialze FPU to normal starting
184 * position so that we can take a snapshot
185 * of that state and store it for future use
186 * when we're asked for the FPU state of a
187 * thread, and it hasn't initiated any yet
188 */
189 fpinit();
190 fxsave(&starting_fp_state.fx_save_state);
191
192 /*
193 * Trap wait instructions. Turn off FPU for now.
194 */
195 set_cr0(get_cr0() | CR0_TS | CR0_MP);
196 }
197 else
198 {
199 /*
200 * NO FPU.
201 */
202 panic("fpu is not FP_FXSR");
203 }
204 }
205
206 /*
207 * Initialize FP handling.
208 */
209 void
210 fpu_module_init(void)
211 {
212 struct x86_fpsave_state *new_ifps;
213
214 ifps_zone = zinit(sizeof(struct x86_fpsave_state),
215 THREAD_MAX * sizeof(struct x86_fpsave_state),
216 THREAD_CHUNK * sizeof(struct x86_fpsave_state),
217 "x86 fpsave state");
218 new_ifps = fp_state_alloc();
219 /* Determine MXCSR reserved bits */
220 configure_mxcsr_capability_mask(new_ifps);
221 fp_state_free(new_ifps);
222 }
223
224 /*
225 * Free a FPU save area.
226 * Called only when thread terminating - no locking necessary.
227 */
228 void
229 fpu_free(fps)
230 struct x86_fpsave_state *fps;
231 {
232 fp_state_free(fps);
233 }
234
235 /*
236 * Set the floating-point state for a thread based
237 * on the FXSave formatted data. This is basically
238 * the same as fpu_set_state except it uses the
239 * expanded data structure.
240 * If the thread is not the current thread, it is
241 * not running (held). Locking needed against
242 * concurrent fpu_set_state or fpu_get_state.
243 */
244 kern_return_t
245 fpu_set_fxstate(
246 thread_t thr_act,
247 thread_state_t tstate)
248 {
249 struct x86_fpsave_state *ifps;
250 struct x86_fpsave_state *new_ifps;
251 x86_float_state64_t *state;
252 pcb_t pcb;
253
254 if (fp_kind == FP_NO)
255 return KERN_FAILURE;
256
257 state = (x86_float_state64_t *)tstate;
258
259 assert(thr_act != THREAD_NULL);
260 pcb = thr_act->machine.pcb;
261
262 if (state == NULL) {
263 /*
264 * new FPU state is 'invalid'.
265 * Deallocate the fp state if it exists.
266 */
267 simple_lock(&pcb->lock);
268
269 ifps = pcb->ifps;
270 pcb->ifps = 0;
271
272 simple_unlock(&pcb->lock);
273
274 if (ifps != 0)
275 fp_state_free(ifps);
276 } else {
277 /*
278 * Valid state. Allocate the fp state if there is none.
279 */
280 new_ifps = 0;
281 Retry:
282 simple_lock(&pcb->lock);
283
284 ifps = pcb->ifps;
285 if (ifps == 0) {
286 if (new_ifps == 0) {
287 simple_unlock(&pcb->lock);
288 new_ifps = fp_state_alloc();
289 goto Retry;
290 }
291 ifps = new_ifps;
292 new_ifps = 0;
293 pcb->ifps = ifps;
294 }
295 /*
296 * now copy over the new data.
297 */
298 bcopy((char *)&state->fpu_fcw,
299 (char *)&ifps->fx_save_state, sizeof(struct x86_fx_save));
300
301 /* XXX The layout of the state set from user-space may need to be
302 * validated for consistency.
303 */
304 ifps->fp_save_layout = thread_is_64bit(thr_act) ? FXSAVE64 : FXSAVE32;
305 /* Mark the thread's floating point status as non-live. */
306 ifps->fp_valid = TRUE;
307 /*
308 * Clear any reserved bits in the MXCSR to prevent a GPF
309 * when issuing an FXRSTOR.
310 */
311 ifps->fx_save_state.fx_MXCSR &= mxcsr_capability_mask;
312
313 simple_unlock(&pcb->lock);
314
315 if (new_ifps != 0)
316 fp_state_free(new_ifps);
317 }
318 return KERN_SUCCESS;
319 }
320
321 /*
322 * Get the floating-point state for a thread.
323 * If the thread is not the current thread, it is
324 * not running (held). Locking needed against
325 * concurrent fpu_set_state or fpu_get_state.
326 */
327 kern_return_t
328 fpu_get_fxstate(
329 thread_t thr_act,
330 thread_state_t tstate)
331 {
332 struct x86_fpsave_state *ifps;
333 x86_float_state64_t *state;
334 kern_return_t ret = KERN_FAILURE;
335 pcb_t pcb;
336
337 if (fp_kind == FP_NO)
338 return KERN_FAILURE;
339
340 state = (x86_float_state64_t *)tstate;
341
342 assert(thr_act != THREAD_NULL);
343 pcb = thr_act->machine.pcb;
344
345 simple_lock(&pcb->lock);
346
347 ifps = pcb->ifps;
348 if (ifps == 0) {
349 /*
350 * No valid floating-point state.
351 */
352 bcopy((char *)&starting_fp_state.fx_save_state,
353 (char *)&state->fpu_fcw, sizeof(struct x86_fx_save));
354
355 simple_unlock(&pcb->lock);
356
357 return KERN_SUCCESS;
358 }
359 /*
360 * Make sure we`ve got the latest fp state info
361 * If the live fpu state belongs to our target
362 */
363 if (thr_act == current_thread())
364 {
365 boolean_t intr;
366
367 intr = ml_set_interrupts_enabled(FALSE);
368
369 clear_ts();
370 fp_save(thr_act);
371 clear_fpu();
372
373 (void)ml_set_interrupts_enabled(intr);
374 }
375 if (ifps->fp_valid) {
376 bcopy((char *)&ifps->fx_save_state,
377 (char *)&state->fpu_fcw, sizeof(struct x86_fx_save));
378 ret = KERN_SUCCESS;
379 }
380 simple_unlock(&pcb->lock);
381
382 return ret;
383 }
384
385
386 /*
387 * the child thread is 'stopped' with the thread
388 * mutex held and is currently not known by anyone
389 * so no way for fpu state to get manipulated by an
390 * outside agency -> no need for pcb lock
391 */
392
393 void
394 fpu_dup_fxstate(
395 thread_t parent,
396 thread_t child)
397 {
398 struct x86_fpsave_state *new_ifps = NULL;
399 boolean_t intr;
400 pcb_t ppcb;
401
402 ppcb = parent->machine.pcb;
403
404 if (ppcb->ifps == NULL)
405 return;
406
407 if (child->machine.pcb->ifps)
408 panic("fpu_dup_fxstate: child's ifps non-null");
409
410 new_ifps = fp_state_alloc();
411
412 simple_lock(&ppcb->lock);
413
414 if (ppcb->ifps != NULL) {
415 /*
416 * Make sure we`ve got the latest fp state info
417 */
418 intr = ml_set_interrupts_enabled(FALSE);
419
420 clear_ts();
421 fp_save(parent);
422 clear_fpu();
423
424 (void)ml_set_interrupts_enabled(intr);
425
426 if (ppcb->ifps->fp_valid) {
427 child->machine.pcb->ifps = new_ifps;
428
429 bcopy((char *)&(ppcb->ifps->fx_save_state),
430 (char *)&(child->machine.pcb->ifps->fx_save_state), sizeof(struct x86_fx_save));
431
432 new_ifps->fp_save_layout = ppcb->ifps->fp_save_layout;
433 /* Mark the new fp saved state as non-live. */
434 new_ifps->fp_valid = TRUE;
435 /*
436 * Clear any reserved bits in the MXCSR to prevent a GPF
437 * when issuing an FXRSTOR.
438 */
439 new_ifps->fx_save_state.fx_MXCSR &= mxcsr_capability_mask;
440 new_ifps = NULL;
441 }
442 }
443 simple_unlock(&ppcb->lock);
444
445 if (new_ifps != NULL)
446 fp_state_free(new_ifps);
447 }
448
449
450 /*
451 * Initialize FPU.
452 *
453 */
454 void
455 fpinit(void)
456 {
457 unsigned short control;
458
459 clear_ts();
460 fninit();
461 fnstcw(&control);
462 control &= ~(FPC_PC|FPC_RC); /* Clear precision & rounding control */
463 control |= (FPC_PC_64 | /* Set precision */
464 FPC_RC_RN | /* round-to-nearest */
465 FPC_ZE | /* Suppress zero-divide */
466 FPC_OE | /* and overflow */
467 FPC_UE | /* underflow */
468 FPC_IE | /* Allow NaNQs and +-INF */
469 FPC_DE | /* Allow denorms as operands */
470 FPC_PE); /* No trap for precision loss */
471 fldcw(control);
472
473 /* Initialize SSE/SSE2 */
474 __builtin_ia32_ldmxcsr(0x1f80);
475 }
476
477 /*
478 * Coprocessor not present.
479 */
480
481 void
482 fpnoextflt(void)
483 {
484 boolean_t intr;
485
486 intr = ml_set_interrupts_enabled(FALSE);
487
488 clear_ts(); /* Enable FPU use */
489
490 if (get_interrupt_level()) {
491 /*
492 * Save current coprocessor context if valid
493 * Initialize coprocessor live context
494 */
495 fp_save(current_thread());
496 fpinit();
497 } else {
498 /*
499 * Load this thread`s state into coprocessor live context.
500 */
501 fp_load(current_thread());
502 }
503
504 (void)ml_set_interrupts_enabled(intr);
505 }
506
507 /*
508 * FPU overran end of segment.
509 * Re-initialize FPU. Floating point state is not valid.
510 */
511
512 void
513 fpextovrflt(void)
514 {
515 thread_t thr_act = current_thread();
516 pcb_t pcb;
517 struct x86_fpsave_state *ifps;
518 boolean_t intr;
519
520 intr = ml_set_interrupts_enabled(FALSE);
521
522 if (get_interrupt_level())
523 panic("FPU segment overrun exception at interrupt context\n");
524 if (current_task() == kernel_task)
525 panic("FPU segment overrun exception in kernel thread context\n");
526
527 /*
528 * This is a non-recoverable error.
529 * Invalidate the thread`s FPU state.
530 */
531 pcb = thr_act->machine.pcb;
532 simple_lock(&pcb->lock);
533 ifps = pcb->ifps;
534 pcb->ifps = 0;
535 simple_unlock(&pcb->lock);
536
537 /*
538 * Re-initialize the FPU.
539 */
540 clear_ts();
541 fninit();
542
543 /*
544 * And disable access.
545 */
546 clear_fpu();
547
548 (void)ml_set_interrupts_enabled(intr);
549
550 if (ifps)
551 zfree(ifps_zone, ifps);
552
553 /*
554 * Raise exception.
555 */
556 i386_exception(EXC_BAD_ACCESS, VM_PROT_READ|VM_PROT_EXECUTE, 0);
557 /*NOTREACHED*/
558 }
559
560 /*
561 * FPU error. Called by AST.
562 */
563
564 void
565 fpexterrflt(void)
566 {
567 thread_t thr_act = current_thread();
568 struct x86_fpsave_state *ifps = thr_act->machine.pcb->ifps;
569 boolean_t intr;
570
571 intr = ml_set_interrupts_enabled(FALSE);
572
573 if (get_interrupt_level())
574 panic("FPU error exception at interrupt context\n");
575 if (current_task() == kernel_task)
576 panic("FPU error exception in kernel thread context\n");
577
578 /*
579 * Save the FPU state and turn off the FPU.
580 */
581 fp_save(thr_act);
582
583 (void)ml_set_interrupts_enabled(intr);
584
585 /*
586 * Raise FPU exception.
587 * Locking not needed on pcb->ifps,
588 * since thread is running.
589 */
590 i386_exception(EXC_ARITHMETIC,
591 EXC_I386_EXTERR,
592 ifps->fx_save_state.fx_status);
593
594 /*NOTREACHED*/
595 }
596
597 /*
598 * Save FPU state.
599 *
600 * Locking not needed:
601 * . if called from fpu_get_state, pcb already locked.
602 * . if called from fpnoextflt or fp_intr, we are single-cpu
603 * . otherwise, thread is running.
604 * N.B.: Must be called with interrupts disabled
605 */
606
607 void
608 fp_save(
609 thread_t thr_act)
610 {
611 pcb_t pcb = thr_act->machine.pcb;
612 struct x86_fpsave_state *ifps = pcb->ifps;
613
614 if (ifps != 0 && !ifps->fp_valid) {
615 assert((get_cr0() & CR0_TS) == 0);
616 /* registers are in FPU */
617 ifps->fp_valid = TRUE;
618
619 if (!thread_is_64bit(thr_act)) {
620 /* save the compatibility/legacy mode XMM+x87 state */
621 fxsave(&ifps->fx_save_state);
622 ifps->fp_save_layout = FXSAVE32;
623 }
624 else {
625 fxsave64(&ifps->fx_save_state);
626 ifps->fp_save_layout = FXSAVE64;
627 }
628 }
629 }
630
631 /*
632 * Restore FPU state from PCB.
633 *
634 * Locking not needed; always called on the current thread.
635 */
636
637 void
638 fp_load(
639 thread_t thr_act)
640 {
641 pcb_t pcb = thr_act->machine.pcb;
642 struct x86_fpsave_state *ifps;
643
644 ifps = pcb->ifps;
645 if (ifps == 0 || ifps->fp_valid == FALSE) {
646 if (ifps == 0) {
647 /* FIXME: This allocation mechanism should be revised
648 * for scenarios where interrupts are disabled.
649 */
650 ifps = fp_state_alloc();
651 pcb->ifps = ifps;
652 }
653 fpinit();
654 } else {
655 assert(ifps->fp_save_layout == FXSAVE32 || ifps->fp_save_layout == FXSAVE64);
656 if (ifps->fp_save_layout == FXSAVE32) {
657 /* Restore the compatibility/legacy mode XMM+x87 state */
658 fxrstor(&ifps->fx_save_state);
659 }
660 else if (ifps->fp_save_layout == FXSAVE64) {
661 fxrstor64(&ifps->fx_save_state);
662 }
663 }
664 ifps->fp_valid = FALSE; /* in FPU */
665 }
666
667
668
669 /*
670 * fpflush(thread_t)
671 * Flush the current act's state, if needed
672 * (used by thread_terminate_self to ensure fp faults
673 * aren't satisfied by overly general trap code in the
674 * context of the reaper thread)
675 */
676 void
677 fpflush(__unused thread_t thr_act)
678 {
679 /* not needed on MP x86s; fp not lazily evaluated */
680 }
681
682 /*
683 * SSE arithmetic exception handling code.
684 * Basically the same as the x87 exception handler with a different subtype
685 */
686
687 void
688 fpSSEexterrflt(void)
689 {
690 thread_t thr_act = current_thread();
691 struct x86_fpsave_state *ifps = thr_act->machine.pcb->ifps;
692 boolean_t intr;
693
694 intr = ml_set_interrupts_enabled(FALSE);
695
696 if (get_interrupt_level())
697 panic("SSE exception at interrupt context\n");
698 if (current_task() == kernel_task)
699 panic("SSE exception in kernel thread context\n");
700
701 /*
702 * Save the FPU state and turn off the FPU.
703 */
704 fp_save(thr_act);
705
706 (void)ml_set_interrupts_enabled(intr);
707 /*
708 * Raise FPU exception.
709 * Locking not needed on pcb->ifps,
710 * since thread is running.
711 */
712 assert(ifps->fp_save_layout == FXSAVE32 || ifps->fp_save_layout == FXSAVE64);
713 i386_exception(EXC_ARITHMETIC,
714 EXC_I386_SSEEXTERR,
715 ifps->fx_save_state.fx_status);
716 /*NOTREACHED*/
717 }
718
719
720 void
721 fp_setvalid(boolean_t value) {
722 thread_t thr_act = current_thread();
723 struct x86_fpsave_state *ifps = thr_act->machine.pcb->ifps;
724
725 if (ifps) {
726 ifps->fp_valid = value;
727
728 if (value == TRUE)
729 clear_fpu();
730 }
731 }